1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 typedef uintptr_t *hash_entry_type
;
48 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
49 static inline void htab_free (void *ptr
) { free (ptr
); }
52 ialias_redirect (GOMP_task
)
54 static inline hashval_t
55 htab_hash (hash_entry_type element
)
57 return hash_pointer ((void *) element
);
61 htab_eq (hash_entry_type x
, hash_entry_type y
)
66 #define FIELD_TGT_EMPTY (~(size_t) 0)
68 static void gomp_target_init (void);
70 /* The whole initialization code for offloading plugins is only run one. */
71 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
73 /* Mutex for offload image registration. */
74 static gomp_mutex_t register_lock
;
76 /* This structure describes an offload image.
77 It contains type of the target device, pointer to host table descriptor, and
78 pointer to target data. */
79 struct offload_image_descr
{
81 enum offload_target_type type
;
82 const void *host_table
;
83 const void *target_data
;
86 /* Array of descriptors of offload images. */
87 static struct offload_image_descr
*offload_images
;
89 /* Total number of offload images. */
90 static int num_offload_images
;
92 /* Array of descriptors for all available devices. */
93 static struct gomp_device_descr
*devices
;
95 /* Total number of available devices. */
96 static int num_devices
;
98 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
99 static int num_devices_openmp
;
101 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
104 gomp_realloc_unlock (void *old
, size_t size
)
106 void *ret
= realloc (old
, size
);
109 gomp_mutex_unlock (®ister_lock
);
110 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
115 attribute_hidden
void
116 gomp_init_targets_once (void)
118 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
122 gomp_get_num_devices (void)
124 gomp_init_targets_once ();
125 return num_devices_openmp
;
128 static struct gomp_device_descr
*
129 resolve_device (int device_id
)
131 if (device_id
== GOMP_DEVICE_ICV
)
133 struct gomp_task_icv
*icv
= gomp_icv (false);
134 device_id
= icv
->default_device_var
;
137 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
139 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
140 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
141 && device_id
!= num_devices_openmp
)
142 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
143 "but device not found");
148 gomp_mutex_lock (&devices
[device_id
].lock
);
149 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
150 gomp_init_device (&devices
[device_id
]);
151 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
153 gomp_mutex_unlock (&devices
[device_id
].lock
);
155 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device is finalized");
161 gomp_mutex_unlock (&devices
[device_id
].lock
);
163 return &devices
[device_id
];
167 static inline splay_tree_key
168 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
170 if (key
->host_start
!= key
->host_end
)
171 return splay_tree_lookup (mem_map
, key
);
174 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
179 n
= splay_tree_lookup (mem_map
, key
);
183 return splay_tree_lookup (mem_map
, key
);
186 static inline splay_tree_key
187 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
189 if (key
->host_start
!= key
->host_end
)
190 return splay_tree_lookup (mem_map
, key
);
193 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
199 gomp_device_copy (struct gomp_device_descr
*devicep
,
200 bool (*copy_func
) (int, void *, const void *, size_t),
201 const char *dst
, void *dstaddr
,
202 const char *src
, const void *srcaddr
,
205 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
207 gomp_mutex_unlock (&devicep
->lock
);
208 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
209 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
214 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
215 bool (*copy_func
) (int, void *, const void *, size_t,
216 struct goacc_asyncqueue
*),
217 const char *dst
, void *dstaddr
,
218 const char *src
, const void *srcaddr
,
219 const void *srcaddr_orig
,
220 size_t size
, struct goacc_asyncqueue
*aq
)
222 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
224 gomp_mutex_unlock (&devicep
->lock
);
225 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
226 gomp_fatal ("Copying of %s object [%p..%p)"
227 " via buffer %s object [%p..%p)"
228 " to %s object [%p..%p) failed",
229 src
, srcaddr_orig
, srcaddr_orig
+ size
,
230 src
, srcaddr
, srcaddr
+ size
,
231 dst
, dstaddr
, dstaddr
+ size
);
233 gomp_fatal ("Copying of %s object [%p..%p)"
234 " to %s object [%p..%p) failed",
235 src
, srcaddr
, srcaddr
+ size
,
236 dst
, dstaddr
, dstaddr
+ size
);
240 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
241 host to device memory transfers. */
243 struct gomp_coalesce_chunk
245 /* The starting and ending point of a coalesced chunk of memory. */
249 struct gomp_coalesce_buf
251 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
252 it will be copied to the device. */
254 struct target_mem_desc
*tgt
;
255 /* Array with offsets, chunks[i].start is the starting offset and
256 chunks[i].end ending offset relative to tgt->tgt_start device address
257 of chunks which are to be copied to buf and later copied to device. */
258 struct gomp_coalesce_chunk
*chunks
;
259 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
262 /* During construction of chunks array, how many memory regions are within
263 the last chunk. If there is just one memory region for a chunk, we copy
264 it directly to device rather than going through buf. */
268 /* Maximum size of memory region considered for coalescing. Larger copies
269 are performed directly. */
270 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
272 /* Maximum size of a gap in between regions to consider them being copied
273 within the same chunk. All the device offsets considered are within
274 newly allocated device memory, so it isn't fatal if we copy some padding
275 in between from host to device. The gaps come either from alignment
276 padding or from memory regions which are not supposed to be copied from
277 host to device (e.g. map(alloc:), map(from:) etc.). */
278 #define MAX_COALESCE_BUF_GAP (4 * 1024)
280 /* Add region with device tgt_start relative offset and length to CBUF.
282 This must not be used for asynchronous copies, because the host data might
283 not be computed yet (by an earlier asynchronous compute region, for
285 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
286 is it more performant to use libgomp CBUF buffering or individual device
287 asyncronous copying?) */
290 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
292 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
296 if (cbuf
->chunk_cnt
< 0)
298 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
300 cbuf
->chunk_cnt
= -1;
303 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
305 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
309 /* If the last chunk is only used by one mapping, discard it,
310 as it will be one host to device copy anyway and
311 memcpying it around will only waste cycles. */
312 if (cbuf
->use_cnt
== 1)
315 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
316 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
321 /* Return true for mapping kinds which need to copy data from the
322 host to device for regions that weren't previously mapped. */
325 gomp_to_device_kind_p (int kind
)
331 case GOMP_MAP_FORCE_ALLOC
:
332 case GOMP_MAP_FORCE_FROM
:
333 case GOMP_MAP_ALWAYS_FROM
:
340 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
341 non-NULL), when the source data is stack or may otherwise be deallocated
342 before the asynchronous copy takes place, EPHEMERAL must be passed as
345 attribute_hidden
void
346 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
347 struct goacc_asyncqueue
*aq
,
348 void *d
, const void *h
, size_t sz
,
349 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
351 if (__builtin_expect (aq
!= NULL
, 0))
353 /* See 'gomp_coalesce_buf_add'. */
356 void *h_buf
= (void *) h
;
359 /* We're queueing up an asynchronous copy from data that may
360 disappear before the transfer takes place (i.e. because it is a
361 stack local in a function that is no longer executing). Make a
362 copy of the data into a temporary buffer in those cases. */
363 h_buf
= gomp_malloc (sz
);
364 memcpy (h_buf
, h
, sz
);
366 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
367 "dev", d
, "host", h_buf
, h
, sz
, aq
);
369 /* Free temporary buffer once the transfer has completed. */
370 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
377 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
378 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
381 long last
= cbuf
->chunk_cnt
- 1;
382 while (first
<= last
)
384 long middle
= (first
+ last
) >> 1;
385 if (cbuf
->chunks
[middle
].end
<= doff
)
387 else if (cbuf
->chunks
[middle
].start
<= doff
)
389 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
391 gomp_mutex_unlock (&devicep
->lock
);
392 gomp_fatal ("internal libgomp cbuf error");
394 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
404 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
407 attribute_hidden
void
408 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
409 struct goacc_asyncqueue
*aq
,
410 void *h
, const void *d
, size_t sz
)
412 if (__builtin_expect (aq
!= NULL
, 0))
413 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
414 "host", h
, "dev", d
, NULL
, sz
, aq
);
416 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
420 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
422 if (!devicep
->free_func (devicep
->target_id
, devptr
))
424 gomp_mutex_unlock (&devicep
->lock
);
425 gomp_fatal ("error in freeing device memory block at %p", devptr
);
429 /* Increment reference count of a splay_tree_key region K by 1.
430 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
431 increment the value if refcount is not yet contained in the set (used for
432 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
433 once for each construct). */
436 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
438 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
441 uintptr_t *refcount_ptr
= &k
->refcount
;
443 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
444 refcount_ptr
= &k
->structelem_refcount
;
445 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
446 refcount_ptr
= k
->structelem_refcount_ptr
;
450 if (htab_find (*refcount_set
, refcount_ptr
))
452 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
453 *slot
= refcount_ptr
;
460 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
461 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
462 track already seen refcounts, and only adjust the value if refcount is not
463 yet contained in the set (like gomp_increment_refcount).
465 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
466 it is already zero and we know we decremented it earlier. This signals that
467 associated maps should be copied back to host.
469 *DO_REMOVE is set to true when we this is the first handling of this refcount
470 and we are setting it to zero. This signals a removal of this key from the
473 Copy and removal are separated due to cases like handling of structure
474 elements, e.g. each map of a structure element representing a possible copy
475 out of a structure field has to be handled individually, but we only signal
476 removal for one (the first encountered) sibing map. */
479 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
480 bool *do_copy
, bool *do_remove
)
482 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
484 *do_copy
= *do_remove
= false;
488 uintptr_t *refcount_ptr
= &k
->refcount
;
490 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
491 refcount_ptr
= &k
->structelem_refcount
;
492 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
493 refcount_ptr
= k
->structelem_refcount_ptr
;
495 bool new_encountered_refcount
;
496 bool set_to_zero
= false;
497 bool is_zero
= false;
499 uintptr_t orig_refcount
= *refcount_ptr
;
503 if (htab_find (*refcount_set
, refcount_ptr
))
505 new_encountered_refcount
= false;
509 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
510 *slot
= refcount_ptr
;
511 new_encountered_refcount
= true;
514 /* If no refcount_set being used, assume all keys are being decremented
515 for the first time. */
516 new_encountered_refcount
= true;
520 else if (*refcount_ptr
> 0)
524 if (*refcount_ptr
== 0)
526 if (orig_refcount
> 0)
532 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
533 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
536 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
537 gomp_map_0len_lookup found oldn for newn.
538 Helper function of gomp_map_vars. */
541 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
542 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
543 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
544 unsigned char kind
, bool always_to_flag
, bool implicit
,
545 struct gomp_coalesce_buf
*cbuf
,
546 htab_t
*refcount_set
)
548 assert (kind
!= GOMP_MAP_ATTACH
549 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
552 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
553 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
554 tgt_var
->is_attach
= false;
555 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
557 /* For implicit maps, old contained in new is valid. */
558 bool implicit_subset
= (implicit
559 && newn
->host_start
<= oldn
->host_start
560 && oldn
->host_end
<= newn
->host_end
);
562 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
564 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
566 if ((kind
& GOMP_MAP_FLAG_FORCE
)
567 /* For implicit maps, old contained in new is valid. */
569 /* Otherwise, new contained inside old is considered valid. */
570 || (oldn
->host_start
<= newn
->host_start
571 && newn
->host_end
<= oldn
->host_end
)))
573 gomp_mutex_unlock (&devicep
->lock
);
574 gomp_fatal ("Trying to map into device [%p..%p) object when "
575 "[%p..%p) is already mapped",
576 (void *) newn
->host_start
, (void *) newn
->host_end
,
577 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
580 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
582 /* Implicit + always should not happen. If this does occur, below
583 address/length adjustment is a TODO. */
584 assert (!implicit_subset
);
586 if (oldn
->aux
&& oldn
->aux
->attach_count
)
588 /* We have to be careful not to overwrite still attached pointers
589 during the copyback to host. */
590 uintptr_t addr
= newn
->host_start
;
591 while (addr
< newn
->host_end
)
593 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
594 if (oldn
->aux
->attach_count
[i
] == 0)
595 gomp_copy_host2dev (devicep
, aq
,
596 (void *) (oldn
->tgt
->tgt_start
598 + addr
- oldn
->host_start
),
600 sizeof (void *), false, cbuf
);
601 addr
+= sizeof (void *);
605 gomp_copy_host2dev (devicep
, aq
,
606 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
607 + newn
->host_start
- oldn
->host_start
),
608 (void *) newn
->host_start
,
609 newn
->host_end
- newn
->host_start
, false, cbuf
);
612 gomp_increment_refcount (oldn
, refcount_set
);
616 get_kind (bool short_mapkind
, void *kinds
, int idx
)
619 return ((unsigned char *) kinds
)[idx
];
621 int val
= ((unsigned short *) kinds
)[idx
];
622 if (GOMP_MAP_IMPLICIT_P (val
))
623 val
&= ~GOMP_MAP_IMPLICIT
;
629 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
634 int val
= ((unsigned short *) kinds
)[idx
];
635 return GOMP_MAP_IMPLICIT_P (val
);
639 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
640 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
641 struct gomp_coalesce_buf
*cbuf
,
642 bool allow_zero_length_array_sections
)
644 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
645 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
646 struct splay_tree_key_s cur_node
;
648 cur_node
.host_start
= host_ptr
;
649 if (cur_node
.host_start
== (uintptr_t) NULL
)
651 cur_node
.tgt_offset
= (uintptr_t) NULL
;
652 gomp_copy_host2dev (devicep
, aq
,
653 (void *) (tgt
->tgt_start
+ target_offset
),
654 (void *) &cur_node
.tgt_offset
, sizeof (void *),
658 /* Add bias to the pointer value. */
659 cur_node
.host_start
+= bias
;
660 cur_node
.host_end
= cur_node
.host_start
;
661 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
664 if (allow_zero_length_array_sections
)
665 cur_node
.tgt_offset
= 0;
668 gomp_mutex_unlock (&devicep
->lock
);
669 gomp_fatal ("Pointer target of array section wasn't mapped");
674 cur_node
.host_start
-= n
->host_start
;
676 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
677 /* At this point tgt_offset is target address of the
678 array section. Now subtract bias to get what we want
679 to initialize the pointer with. */
680 cur_node
.tgt_offset
-= bias
;
682 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
683 (void *) &cur_node
.tgt_offset
, sizeof (void *),
688 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
689 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
690 size_t first
, size_t i
, void **hostaddrs
,
691 size_t *sizes
, void *kinds
,
692 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
694 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
695 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
696 struct splay_tree_key_s cur_node
;
699 const bool short_mapkind
= true;
700 const int typemask
= short_mapkind
? 0xff : 0x7;
702 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
703 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
704 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
705 kind
= get_kind (short_mapkind
, kinds
, i
);
706 implicit
= get_implicit (short_mapkind
, kinds
, i
);
709 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
711 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
712 kind
& typemask
, false, implicit
, cbuf
,
718 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
720 cur_node
.host_start
--;
721 n2
= splay_tree_lookup (mem_map
, &cur_node
);
722 cur_node
.host_start
++;
725 && n2
->host_start
- n
->host_start
726 == n2
->tgt_offset
- n
->tgt_offset
)
728 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
729 kind
& typemask
, false, implicit
, cbuf
,
735 n2
= splay_tree_lookup (mem_map
, &cur_node
);
739 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
741 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
742 kind
& typemask
, false, implicit
, cbuf
,
747 gomp_mutex_unlock (&devicep
->lock
);
748 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
749 "other mapped elements from the same structure weren't mapped "
750 "together with it", (void *) cur_node
.host_start
,
751 (void *) cur_node
.host_end
);
754 attribute_hidden
void
755 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
756 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
757 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
758 struct gomp_coalesce_buf
*cbufp
,
759 bool allow_zero_length_array_sections
)
761 struct splay_tree_key_s s
;
766 gomp_mutex_unlock (&devicep
->lock
);
767 gomp_fatal ("enclosing struct not mapped for attach");
770 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
771 /* We might have a pointer in a packed struct: however we cannot have more
772 than one such pointer in each pointer-sized portion of the struct, so
774 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
777 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
779 if (!n
->aux
->attach_count
)
781 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
783 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
784 n
->aux
->attach_count
[idx
]++;
787 gomp_mutex_unlock (&devicep
->lock
);
788 gomp_fatal ("attach count overflow");
791 if (n
->aux
->attach_count
[idx
] == 1)
793 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
795 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
799 if ((void *) target
== NULL
)
801 gomp_mutex_unlock (&devicep
->lock
);
802 gomp_fatal ("attempt to attach null pointer");
805 s
.host_start
= target
+ bias
;
806 s
.host_end
= s
.host_start
+ 1;
807 tn
= splay_tree_lookup (mem_map
, &s
);
811 if (allow_zero_length_array_sections
)
812 /* When allowing attachment to zero-length array sections, we
813 allow attaching to NULL pointers when the target region is not
818 gomp_mutex_unlock (&devicep
->lock
);
819 gomp_fatal ("pointer target not mapped for attach");
823 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
826 "%s: attaching host %p, target %p (struct base %p) to %p\n",
827 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
828 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
830 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
831 sizeof (void *), true, cbufp
);
834 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
835 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
838 attribute_hidden
void
839 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
840 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
841 uintptr_t detach_from
, bool finalize
,
842 struct gomp_coalesce_buf
*cbufp
)
848 gomp_mutex_unlock (&devicep
->lock
);
849 gomp_fatal ("enclosing struct not mapped for detach");
852 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
854 if (!n
->aux
|| !n
->aux
->attach_count
)
856 gomp_mutex_unlock (&devicep
->lock
);
857 gomp_fatal ("no attachment counters for struct");
861 n
->aux
->attach_count
[idx
] = 1;
863 if (n
->aux
->attach_count
[idx
] == 0)
865 gomp_mutex_unlock (&devicep
->lock
);
866 gomp_fatal ("attach count underflow");
869 n
->aux
->attach_count
[idx
]--;
871 if (n
->aux
->attach_count
[idx
] == 0)
873 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
875 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
878 "%s: detaching host %p, target %p (struct base %p) to %p\n",
879 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
880 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
883 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
884 sizeof (void *), true, cbufp
);
887 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
888 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
891 attribute_hidden
uintptr_t
892 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
894 if (tgt
->list
[i
].key
!= NULL
)
895 return tgt
->list
[i
].key
->tgt
->tgt_start
896 + tgt
->list
[i
].key
->tgt_offset
897 + tgt
->list
[i
].offset
;
899 switch (tgt
->list
[i
].offset
)
902 return (uintptr_t) hostaddrs
[i
];
908 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
909 + tgt
->list
[i
+ 1].key
->tgt_offset
910 + tgt
->list
[i
+ 1].offset
911 + (uintptr_t) hostaddrs
[i
]
912 - (uintptr_t) hostaddrs
[i
+ 1];
915 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
919 static inline __attribute__((always_inline
)) struct target_mem_desc
*
920 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
921 struct goacc_asyncqueue
*aq
, size_t mapnum
,
922 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
923 void *kinds
, bool short_mapkind
,
924 htab_t
*refcount_set
,
925 enum gomp_map_vars_kind pragma_kind
)
927 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
928 bool has_firstprivate
= false;
929 bool has_always_ptrset
= false;
930 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
931 const int rshift
= short_mapkind
? 8 : 3;
932 const int typemask
= short_mapkind
? 0xff : 0x7;
933 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
934 struct splay_tree_key_s cur_node
;
935 struct target_mem_desc
*tgt
936 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
937 tgt
->list_count
= mapnum
;
938 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
939 tgt
->device_descr
= devicep
;
941 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
950 tgt_align
= sizeof (void *);
956 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
958 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
959 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
962 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
964 size_t align
= 4 * sizeof (void *);
966 tgt_size
= mapnum
* sizeof (void *);
968 cbuf
.use_cnt
= 1 + (mapnum
> 1);
969 cbuf
.chunks
[0].start
= 0;
970 cbuf
.chunks
[0].end
= tgt_size
;
973 gomp_mutex_lock (&devicep
->lock
);
974 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
976 gomp_mutex_unlock (&devicep
->lock
);
981 for (i
= 0; i
< mapnum
; i
++)
983 int kind
= get_kind (short_mapkind
, kinds
, i
);
984 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
985 if (hostaddrs
[i
] == NULL
986 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
988 tgt
->list
[i
].key
= NULL
;
989 tgt
->list
[i
].offset
= OFFSET_INLINED
;
992 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
993 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
995 tgt
->list
[i
].key
= NULL
;
998 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
999 on a separate construct prior to using use_device_{addr,ptr}.
1000 In OpenMP 5.0, map directives need to be ordered by the
1001 middle-end before the use_device_* clauses. If
1002 !not_found_cnt, all mappings requested (if any) are already
1003 mapped, so use_device_{addr,ptr} can be resolved right away.
1004 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1005 now but would succeed after performing the mappings in the
1006 following loop. We can't defer this always to the second
1007 loop, because it is not even invoked when !not_found_cnt
1008 after the first loop. */
1009 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1010 cur_node
.host_end
= cur_node
.host_start
;
1011 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1014 cur_node
.host_start
-= n
->host_start
;
1016 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1017 + cur_node
.host_start
);
1019 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1021 gomp_mutex_unlock (&devicep
->lock
);
1022 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1024 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1025 /* If not present, continue using the host address. */
1028 __builtin_unreachable ();
1029 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1032 tgt
->list
[i
].offset
= 0;
1035 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1037 size_t first
= i
+ 1;
1038 size_t last
= i
+ sizes
[i
];
1039 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1040 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1042 tgt
->list
[i
].key
= NULL
;
1043 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1044 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1047 size_t align
= (size_t) 1 << (kind
>> rshift
);
1048 if (tgt_align
< align
)
1050 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1051 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1052 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1053 not_found_cnt
+= last
- i
;
1054 for (i
= first
; i
<= last
; i
++)
1056 tgt
->list
[i
].key
= NULL
;
1058 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1060 gomp_coalesce_buf_add (&cbuf
,
1061 tgt_size
- cur_node
.host_end
1062 + (uintptr_t) hostaddrs
[i
],
1068 for (i
= first
; i
<= last
; i
++)
1069 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1070 sizes
, kinds
, NULL
, refcount_set
);
1074 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1076 tgt
->list
[i
].key
= NULL
;
1077 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1078 has_firstprivate
= true;
1081 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1082 || ((kind
& typemask
)
1083 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1085 tgt
->list
[i
].key
= NULL
;
1086 has_firstprivate
= true;
1089 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1090 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1091 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1093 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1094 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1096 tgt
->list
[i
].key
= NULL
;
1098 size_t align
= (size_t) 1 << (kind
>> rshift
);
1099 if (tgt_align
< align
)
1101 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1103 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1104 cur_node
.host_end
- cur_node
.host_start
);
1105 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1106 has_firstprivate
= true;
1110 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1112 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1115 tgt
->list
[i
].key
= NULL
;
1116 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1121 n
= splay_tree_lookup (mem_map
, &cur_node
);
1122 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1124 int always_to_cnt
= 0;
1125 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1127 bool has_nullptr
= false;
1129 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1130 if (n
->tgt
->list
[j
].key
== n
)
1132 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1135 if (n
->tgt
->list_count
== 0)
1137 /* 'declare target'; assume has_nullptr; it could also be
1138 statically assigned pointer, but that it should be to
1139 the equivalent variable on the host. */
1140 assert (n
->refcount
== REFCOUNT_INFINITY
);
1144 assert (j
< n
->tgt
->list_count
);
1145 /* Re-map the data if there is an 'always' modifier or if it a
1146 null pointer was there and non a nonnull has been found; that
1147 permits transparent re-mapping for Fortran array descriptors
1148 which were previously mapped unallocated. */
1149 for (j
= i
+ 1; j
< mapnum
; j
++)
1151 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1152 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1154 || !GOMP_MAP_POINTER_P (ptr_kind
)
1155 || *(void **) hostaddrs
[j
] == NULL
))
1157 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1158 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1159 > cur_node
.host_end
))
1163 has_always_ptrset
= true;
1168 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1169 kind
& typemask
, always_to_cnt
> 0, implicit
,
1170 NULL
, refcount_set
);
1175 tgt
->list
[i
].key
= NULL
;
1177 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1179 /* Not present, hence, skip entry - including its MAP_POINTER,
1181 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1183 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1184 == GOMP_MAP_POINTER
))
1187 tgt
->list
[i
].key
= NULL
;
1188 tgt
->list
[i
].offset
= 0;
1192 size_t align
= (size_t) 1 << (kind
>> rshift
);
1194 if (tgt_align
< align
)
1196 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1198 && gomp_to_device_kind_p (kind
& typemask
))
1199 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1200 cur_node
.host_end
- cur_node
.host_start
);
1201 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1202 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1206 for (j
= i
+ 1; j
< mapnum
; j
++)
1207 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1208 kinds
, j
)) & typemask
))
1209 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1211 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1212 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1213 > cur_node
.host_end
))
1217 tgt
->list
[j
].key
= NULL
;
1228 gomp_mutex_unlock (&devicep
->lock
);
1229 gomp_fatal ("unexpected aggregation");
1231 tgt
->to_free
= devaddrs
[0];
1232 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1233 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1235 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1237 /* Allocate tgt_align aligned tgt_size block of memory. */
1238 /* FIXME: Perhaps change interface to allocate properly aligned
1240 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1241 tgt_size
+ tgt_align
- 1);
1244 gomp_mutex_unlock (&devicep
->lock
);
1245 gomp_fatal ("device memory allocation fail");
1248 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1249 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1250 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1252 if (cbuf
.use_cnt
== 1)
1254 if (cbuf
.chunk_cnt
> 0)
1257 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1267 tgt
->to_free
= NULL
;
1273 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1274 tgt_size
= mapnum
* sizeof (void *);
1277 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1280 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1281 splay_tree_node array
= tgt
->array
;
1282 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1283 uintptr_t field_tgt_base
= 0;
1284 splay_tree_key field_tgt_structelem_first
= NULL
;
1286 for (i
= 0; i
< mapnum
; i
++)
1287 if (has_always_ptrset
1289 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1290 == GOMP_MAP_TO_PSET
)
1292 splay_tree_key k
= tgt
->list
[i
].key
;
1293 bool has_nullptr
= false;
1295 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1296 if (k
->tgt
->list
[j
].key
== k
)
1298 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1301 if (k
->tgt
->list_count
== 0)
1304 assert (j
< k
->tgt
->list_count
);
1306 tgt
->list
[i
].has_null_ptr_assoc
= false;
1307 for (j
= i
+ 1; j
< mapnum
; j
++)
1309 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1310 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1312 || !GOMP_MAP_POINTER_P (ptr_kind
)
1313 || *(void **) hostaddrs
[j
] == NULL
))
1315 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1316 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1321 if (*(void **) hostaddrs
[j
] == NULL
)
1322 tgt
->list
[i
].has_null_ptr_assoc
= true;
1323 tgt
->list
[j
].key
= k
;
1324 tgt
->list
[j
].copy_from
= false;
1325 tgt
->list
[j
].always_copy_from
= false;
1326 tgt
->list
[j
].is_attach
= false;
1327 gomp_increment_refcount (k
, refcount_set
);
1328 gomp_map_pointer (k
->tgt
, aq
,
1329 (uintptr_t) *(void **) hostaddrs
[j
],
1330 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1332 sizes
[j
], cbufp
, false);
1337 else if (tgt
->list
[i
].key
== NULL
)
1339 int kind
= get_kind (short_mapkind
, kinds
, i
);
1340 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1341 if (hostaddrs
[i
] == NULL
)
1343 switch (kind
& typemask
)
1345 size_t align
, len
, first
, last
;
1347 case GOMP_MAP_FIRSTPRIVATE
:
1348 align
= (size_t) 1 << (kind
>> rshift
);
1349 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1350 tgt
->list
[i
].offset
= tgt_size
;
1352 gomp_copy_host2dev (devicep
, aq
,
1353 (void *) (tgt
->tgt_start
+ tgt_size
),
1354 (void *) hostaddrs
[i
], len
, false, cbufp
);
1357 case GOMP_MAP_FIRSTPRIVATE_INT
:
1358 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1360 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1361 /* The OpenACC 'host_data' construct only allows 'use_device'
1362 "mapping" clauses, so in the first loop, 'not_found_cnt'
1363 must always have been zero, so all OpenACC 'use_device'
1364 clauses have already been handled. (We can only easily test
1365 'use_device' with 'if_present' clause here.) */
1366 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1367 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1368 code conceptually simple, similar to the first loop. */
1369 case GOMP_MAP_USE_DEVICE_PTR
:
1370 if (tgt
->list
[i
].offset
== 0)
1372 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1373 cur_node
.host_end
= cur_node
.host_start
;
1374 n
= gomp_map_lookup (mem_map
, &cur_node
);
1377 cur_node
.host_start
-= n
->host_start
;
1379 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1380 + cur_node
.host_start
);
1382 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1384 gomp_mutex_unlock (&devicep
->lock
);
1385 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1387 else if ((kind
& typemask
)
1388 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1389 /* If not present, continue using the host address. */
1392 __builtin_unreachable ();
1393 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1396 case GOMP_MAP_STRUCT
:
1398 last
= i
+ sizes
[i
];
1399 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1400 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1402 if (tgt
->list
[first
].key
!= NULL
)
1404 n
= splay_tree_lookup (mem_map
, &cur_node
);
1407 size_t align
= (size_t) 1 << (kind
>> rshift
);
1408 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1409 - (uintptr_t) hostaddrs
[i
];
1410 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1411 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1412 - (uintptr_t) hostaddrs
[i
];
1413 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1414 field_tgt_offset
= tgt_size
;
1415 field_tgt_clear
= last
;
1416 field_tgt_structelem_first
= NULL
;
1417 tgt_size
+= cur_node
.host_end
1418 - (uintptr_t) hostaddrs
[first
];
1421 for (i
= first
; i
<= last
; i
++)
1422 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1423 sizes
, kinds
, cbufp
, refcount_set
);
1426 case GOMP_MAP_ALWAYS_POINTER
:
1427 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1428 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1429 n
= splay_tree_lookup (mem_map
, &cur_node
);
1431 || n
->host_start
> cur_node
.host_start
1432 || n
->host_end
< cur_node
.host_end
)
1434 gomp_mutex_unlock (&devicep
->lock
);
1435 gomp_fatal ("always pointer not mapped");
1437 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1438 != GOMP_MAP_ALWAYS_POINTER
)
1439 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1440 if (cur_node
.tgt_offset
)
1441 cur_node
.tgt_offset
-= sizes
[i
];
1442 gomp_copy_host2dev (devicep
, aq
,
1443 (void *) (n
->tgt
->tgt_start
1445 + cur_node
.host_start
1447 (void *) &cur_node
.tgt_offset
,
1448 sizeof (void *), true, cbufp
);
1449 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1450 + cur_node
.host_start
- n
->host_start
;
1452 case GOMP_MAP_IF_PRESENT
:
1453 /* Not present - otherwise handled above. Skip over its
1454 MAP_POINTER as well. */
1456 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1457 == GOMP_MAP_POINTER
))
1460 case GOMP_MAP_ATTACH
:
1461 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1463 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1464 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1465 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1468 tgt
->list
[i
].key
= n
;
1469 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1470 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1471 tgt
->list
[i
].copy_from
= false;
1472 tgt
->list
[i
].always_copy_from
= false;
1473 tgt
->list
[i
].is_attach
= true;
1474 /* OpenACC 'attach'/'detach' doesn't affect
1475 structured/dynamic reference counts ('n->refcount',
1476 'n->dynamic_refcount'). */
1479 = ((kind
& typemask
)
1480 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1481 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1482 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1485 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1487 gomp_mutex_unlock (&devicep
->lock
);
1488 gomp_fatal ("outer struct not mapped for attach");
1495 splay_tree_key k
= &array
->key
;
1496 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1497 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1498 k
->host_end
= k
->host_start
+ sizes
[i
];
1500 k
->host_end
= k
->host_start
+ sizeof (void *);
1501 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1502 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1503 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1504 kind
& typemask
, false, implicit
, cbufp
,
1509 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1511 /* Replace target address of the pointer with target address
1512 of mapped object in the splay tree. */
1513 splay_tree_remove (mem_map
, n
);
1515 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1516 k
->aux
->link_key
= n
;
1518 size_t align
= (size_t) 1 << (kind
>> rshift
);
1519 tgt
->list
[i
].key
= k
;
1522 k
->dynamic_refcount
= 0;
1523 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1525 k
->tgt_offset
= k
->host_start
- field_tgt_base
1529 k
->refcount
= REFCOUNT_STRUCTELEM
;
1530 if (field_tgt_structelem_first
== NULL
)
1532 /* Set to first structure element of sequence. */
1533 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1534 field_tgt_structelem_first
= k
;
1537 /* Point to refcount of leading element, but do not
1539 k
->structelem_refcount_ptr
1540 = &field_tgt_structelem_first
->structelem_refcount
;
1542 if (i
== field_tgt_clear
)
1544 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1545 field_tgt_structelem_first
= NULL
;
1548 if (i
== field_tgt_clear
)
1549 field_tgt_clear
= FIELD_TGT_EMPTY
;
1553 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1554 k
->tgt_offset
= tgt_size
;
1555 tgt_size
+= k
->host_end
- k
->host_start
;
1557 /* First increment, from 0 to 1. gomp_increment_refcount
1558 encapsulates the different increment cases, so use this
1559 instead of directly setting 1 during initialization. */
1560 gomp_increment_refcount (k
, refcount_set
);
1562 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1563 tgt
->list
[i
].always_copy_from
1564 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1565 tgt
->list
[i
].is_attach
= false;
1566 tgt
->list
[i
].offset
= 0;
1567 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1570 array
->right
= NULL
;
1571 splay_tree_insert (mem_map
, array
);
1572 switch (kind
& typemask
)
1574 case GOMP_MAP_ALLOC
:
1576 case GOMP_MAP_FORCE_ALLOC
:
1577 case GOMP_MAP_FORCE_FROM
:
1578 case GOMP_MAP_ALWAYS_FROM
:
1581 case GOMP_MAP_TOFROM
:
1582 case GOMP_MAP_FORCE_TO
:
1583 case GOMP_MAP_FORCE_TOFROM
:
1584 case GOMP_MAP_ALWAYS_TO
:
1585 case GOMP_MAP_ALWAYS_TOFROM
:
1586 gomp_copy_host2dev (devicep
, aq
,
1587 (void *) (tgt
->tgt_start
1589 (void *) k
->host_start
,
1590 k
->host_end
- k
->host_start
,
1593 case GOMP_MAP_POINTER
:
1594 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1596 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1597 k
->tgt_offset
, sizes
[i
], cbufp
,
1599 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1601 case GOMP_MAP_TO_PSET
:
1602 gomp_copy_host2dev (devicep
, aq
,
1603 (void *) (tgt
->tgt_start
1605 (void *) k
->host_start
,
1606 k
->host_end
- k
->host_start
,
1608 tgt
->list
[i
].has_null_ptr_assoc
= false;
1610 for (j
= i
+ 1; j
< mapnum
; j
++)
1612 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1614 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1615 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1617 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1618 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1623 tgt
->list
[j
].key
= k
;
1624 tgt
->list
[j
].copy_from
= false;
1625 tgt
->list
[j
].always_copy_from
= false;
1626 tgt
->list
[j
].is_attach
= false;
1627 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1628 /* For OpenMP, the use of refcount_sets causes
1629 errors if we set k->refcount = 1 above but also
1630 increment it again here, for decrementing will
1631 not properly match, since we decrement only once
1632 for each key's refcount. Therefore avoid this
1633 increment for OpenMP constructs. */
1635 gomp_increment_refcount (k
, refcount_set
);
1636 gomp_map_pointer (tgt
, aq
,
1637 (uintptr_t) *(void **) hostaddrs
[j
],
1639 + ((uintptr_t) hostaddrs
[j
]
1641 sizes
[j
], cbufp
, false);
1646 case GOMP_MAP_FORCE_PRESENT
:
1648 /* We already looked up the memory region above and it
1650 size_t size
= k
->host_end
- k
->host_start
;
1651 gomp_mutex_unlock (&devicep
->lock
);
1652 #ifdef HAVE_INTTYPES_H
1653 gomp_fatal ("present clause: !acc_is_present (%p, "
1654 "%"PRIu64
" (0x%"PRIx64
"))",
1655 (void *) k
->host_start
,
1656 (uint64_t) size
, (uint64_t) size
);
1658 gomp_fatal ("present clause: !acc_is_present (%p, "
1659 "%lu (0x%lx))", (void *) k
->host_start
,
1660 (unsigned long) size
, (unsigned long) size
);
1664 case GOMP_MAP_FORCE_DEVICEPTR
:
1665 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1666 gomp_copy_host2dev (devicep
, aq
,
1667 (void *) (tgt
->tgt_start
1669 (void *) k
->host_start
,
1670 sizeof (void *), false, cbufp
);
1673 gomp_mutex_unlock (&devicep
->lock
);
1674 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1678 if (k
->aux
&& k
->aux
->link_key
)
1680 /* Set link pointer on target to the device address of the
1682 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1683 /* We intentionally do not use coalescing here, as it's not
1684 data allocated by the current call to this function. */
1685 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1686 &tgt_addr
, sizeof (void *), true, NULL
);
1693 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1695 for (i
= 0; i
< mapnum
; i
++)
1697 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1698 gomp_copy_host2dev (devicep
, aq
,
1699 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1700 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1707 /* See 'gomp_coalesce_buf_add'. */
1711 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1712 gomp_copy_host2dev (devicep
, aq
,
1713 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1714 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1715 - cbuf
.chunks
[0].start
),
1716 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1723 /* If the variable from "omp target enter data" map-list was already mapped,
1724 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1726 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1732 gomp_mutex_unlock (&devicep
->lock
);
1736 static struct target_mem_desc
*
1737 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1738 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1739 bool short_mapkind
, htab_t
*refcount_set
,
1740 enum gomp_map_vars_kind pragma_kind
)
1742 /* This management of a local refcount_set is for convenience of callers
1743 who do not share a refcount_set over multiple map/unmap uses. */
1744 htab_t local_refcount_set
= NULL
;
1745 if (refcount_set
== NULL
)
1747 local_refcount_set
= htab_create (mapnum
);
1748 refcount_set
= &local_refcount_set
;
1751 struct target_mem_desc
*tgt
;
1752 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1753 sizes
, kinds
, short_mapkind
, refcount_set
,
1755 if (local_refcount_set
)
1756 htab_free (local_refcount_set
);
1761 attribute_hidden
struct target_mem_desc
*
1762 goacc_map_vars (struct gomp_device_descr
*devicep
,
1763 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1764 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1765 void *kinds
, bool short_mapkind
,
1766 enum gomp_map_vars_kind pragma_kind
)
1768 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1769 sizes
, kinds
, short_mapkind
, NULL
,
1770 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1774 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1776 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1778 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1785 gomp_unref_tgt (void *ptr
)
1787 bool is_tgt_unmapped
= false;
1789 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1791 if (tgt
->refcount
> 1)
1795 gomp_unmap_tgt (tgt
);
1796 is_tgt_unmapped
= true;
1799 return is_tgt_unmapped
;
1803 gomp_unref_tgt_void (void *ptr
)
1805 (void) gomp_unref_tgt (ptr
);
1809 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1811 splay_tree_remove (sp
, k
);
1814 if (k
->aux
->link_key
)
1815 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1816 if (k
->aux
->attach_count
)
1817 free (k
->aux
->attach_count
);
1823 static inline __attribute__((always_inline
)) bool
1824 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1825 struct goacc_asyncqueue
*aq
)
1827 bool is_tgt_unmapped
= false;
1829 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1831 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1832 /* Infer the splay_tree_key of the first structelem key using the
1833 pointer to the first structleme_refcount. */
1834 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1835 - offsetof (struct splay_tree_key_s
,
1836 structelem_refcount
));
1837 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1839 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1840 with the splay_tree_keys embedded inside. */
1841 splay_tree_node node
=
1842 (splay_tree_node
) ((char *) k
1843 - offsetof (struct splay_tree_node_s
, key
));
1846 /* Starting from the _FIRST key, and continue for all following
1848 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1849 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1856 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1859 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1862 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1863 return is_tgt_unmapped
;
1866 attribute_hidden
bool
1867 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1869 return gomp_remove_var_internal (devicep
, k
, NULL
);
1872 /* Remove a variable asynchronously. This actually removes the variable
1873 mapping immediately, but retains the linked target_mem_desc until the
1874 asynchronous operation has completed (as it may still refer to target
1875 memory). The device lock must be held before entry, and remains locked on
1878 attribute_hidden
void
1879 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1880 struct goacc_asyncqueue
*aq
)
1882 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1885 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1886 variables back from device to host: if it is false, it is assumed that this
1887 has been done already. */
1889 static inline __attribute__((always_inline
)) void
1890 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1891 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1893 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1895 if (tgt
->list_count
== 0)
1901 gomp_mutex_lock (&devicep
->lock
);
1902 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1904 gomp_mutex_unlock (&devicep
->lock
);
1912 /* We must perform detachments before any copies back to the host. */
1913 for (i
= 0; i
< tgt
->list_count
; i
++)
1915 splay_tree_key k
= tgt
->list
[i
].key
;
1917 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1918 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1919 + tgt
->list
[i
].offset
,
1923 for (i
= 0; i
< tgt
->list_count
; i
++)
1925 splay_tree_key k
= tgt
->list
[i
].key
;
1929 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1930 counts ('n->refcount', 'n->dynamic_refcount'). */
1931 if (tgt
->list
[i
].is_attach
)
1934 bool do_copy
, do_remove
;
1935 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1937 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1938 || tgt
->list
[i
].always_copy_from
)
1939 gomp_copy_dev2host (devicep
, aq
,
1940 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1941 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1942 + tgt
->list
[i
].offset
),
1943 tgt
->list
[i
].length
);
1946 struct target_mem_desc
*k_tgt
= k
->tgt
;
1947 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1948 /* It would be bad if TGT got unmapped while we're still iterating
1949 over its LIST_COUNT, and also expect to use it in the following
1951 assert (!is_tgt_unmapped
1957 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1960 gomp_unref_tgt ((void *) tgt
);
1962 gomp_mutex_unlock (&devicep
->lock
);
1966 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1967 htab_t
*refcount_set
)
1969 /* This management of a local refcount_set is for convenience of callers
1970 who do not share a refcount_set over multiple map/unmap uses. */
1971 htab_t local_refcount_set
= NULL
;
1972 if (refcount_set
== NULL
)
1974 local_refcount_set
= htab_create (tgt
->list_count
);
1975 refcount_set
= &local_refcount_set
;
1978 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
1980 if (local_refcount_set
)
1981 htab_free (local_refcount_set
);
1984 attribute_hidden
void
1985 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1986 struct goacc_asyncqueue
*aq
)
1988 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
1992 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1993 size_t *sizes
, void *kinds
, bool short_mapkind
)
1996 struct splay_tree_key_s cur_node
;
1997 const int typemask
= short_mapkind
? 0xff : 0x7;
2005 gomp_mutex_lock (&devicep
->lock
);
2006 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2008 gomp_mutex_unlock (&devicep
->lock
);
2012 for (i
= 0; i
< mapnum
; i
++)
2015 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2016 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2017 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2020 int kind
= get_kind (short_mapkind
, kinds
, i
);
2021 if (n
->host_start
> cur_node
.host_start
2022 || n
->host_end
< cur_node
.host_end
)
2024 gomp_mutex_unlock (&devicep
->lock
);
2025 gomp_fatal ("Trying to update [%p..%p) object when "
2026 "only [%p..%p) is mapped",
2027 (void *) cur_node
.host_start
,
2028 (void *) cur_node
.host_end
,
2029 (void *) n
->host_start
,
2030 (void *) n
->host_end
);
2033 if (n
->aux
&& n
->aux
->attach_count
)
2035 uintptr_t addr
= cur_node
.host_start
;
2036 while (addr
< cur_node
.host_end
)
2038 /* We have to be careful not to overwrite still attached
2039 pointers during host<->device updates. */
2040 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2041 if (n
->aux
->attach_count
[i
] == 0)
2043 void *devaddr
= (void *) (n
->tgt
->tgt_start
2045 + addr
- n
->host_start
);
2046 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2047 gomp_copy_host2dev (devicep
, NULL
,
2048 devaddr
, (void *) addr
,
2049 sizeof (void *), false, NULL
);
2050 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2051 gomp_copy_dev2host (devicep
, NULL
,
2052 (void *) addr
, devaddr
,
2055 addr
+= sizeof (void *);
2060 void *hostaddr
= (void *) cur_node
.host_start
;
2061 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2062 + cur_node
.host_start
2064 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2066 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2067 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2069 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2070 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2074 gomp_mutex_unlock (&devicep
->lock
);
2077 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2078 And insert to splay tree the mapping between addresses from HOST_TABLE and
2079 from loaded target image. We rely in the host and device compiler
2080 emitting variable and functions in the same order. */
2083 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2084 const void *host_table
, const void *target_data
,
2085 bool is_register_lock
)
2087 void **host_func_table
= ((void ***) host_table
)[0];
2088 void **host_funcs_end
= ((void ***) host_table
)[1];
2089 void **host_var_table
= ((void ***) host_table
)[2];
2090 void **host_vars_end
= ((void ***) host_table
)[3];
2092 /* The func table contains only addresses, the var table contains addresses
2093 and corresponding sizes. */
2094 int num_funcs
= host_funcs_end
- host_func_table
;
2095 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2097 /* Others currently is only 'device_num' */
2100 /* Load image to device and get target addresses for the image. */
2101 struct addr_pair
*target_table
= NULL
;
2102 int i
, num_target_entries
;
2105 = devicep
->load_image_func (devicep
->target_id
, version
,
2106 target_data
, &target_table
);
2108 if (num_target_entries
!= num_funcs
+ num_vars
2109 /* Others (device_num) are included as trailing entries in pair list. */
2110 && num_target_entries
!= num_funcs
+ num_vars
+ num_others
)
2112 gomp_mutex_unlock (&devicep
->lock
);
2113 if (is_register_lock
)
2114 gomp_mutex_unlock (®ister_lock
);
2115 gomp_fatal ("Cannot map target functions or variables"
2116 " (expected %u, have %u)", num_funcs
+ num_vars
,
2117 num_target_entries
);
2120 /* Insert host-target address mapping into splay tree. */
2121 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2122 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
2123 tgt
->refcount
= REFCOUNT_INFINITY
;
2126 tgt
->to_free
= NULL
;
2128 tgt
->list_count
= 0;
2129 tgt
->device_descr
= devicep
;
2130 splay_tree_node array
= tgt
->array
;
2132 for (i
= 0; i
< num_funcs
; i
++)
2134 splay_tree_key k
= &array
->key
;
2135 k
->host_start
= (uintptr_t) host_func_table
[i
];
2136 k
->host_end
= k
->host_start
+ 1;
2138 k
->tgt_offset
= target_table
[i
].start
;
2139 k
->refcount
= REFCOUNT_INFINITY
;
2140 k
->dynamic_refcount
= 0;
2143 array
->right
= NULL
;
2144 splay_tree_insert (&devicep
->mem_map
, array
);
2148 /* Most significant bit of the size in host and target tables marks
2149 "omp declare target link" variables. */
2150 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2151 const uintptr_t size_mask
= ~link_bit
;
2153 for (i
= 0; i
< num_vars
; i
++)
2155 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2156 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2157 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2159 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2161 gomp_mutex_unlock (&devicep
->lock
);
2162 if (is_register_lock
)
2163 gomp_mutex_unlock (®ister_lock
);
2164 gomp_fatal ("Cannot map target variables (size mismatch)");
2167 splay_tree_key k
= &array
->key
;
2168 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2170 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2172 k
->tgt_offset
= target_var
->start
;
2173 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2174 k
->dynamic_refcount
= 0;
2177 array
->right
= NULL
;
2178 splay_tree_insert (&devicep
->mem_map
, array
);
2182 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2183 where plugin does not return this entry. */
2184 if (num_funcs
+ num_vars
< num_target_entries
)
2186 struct addr_pair
*device_num_var
= &target_table
[num_funcs
+ num_vars
];
2187 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2188 was found in this image. */
2189 if (device_num_var
->start
!= 0)
2191 /* The index of the devicep within devices[] is regarded as its
2192 'device number', which is different from the per-device type
2193 devicep->target_id. */
2194 int device_num_val
= (int) (devicep
- &devices
[0]);
2195 if (device_num_var
->end
- device_num_var
->start
!= sizeof (int))
2197 gomp_mutex_unlock (&devicep
->lock
);
2198 if (is_register_lock
)
2199 gomp_mutex_unlock (®ister_lock
);
2200 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2204 /* Copy device_num value to place on device memory, hereby actually
2205 designating its device number into effect. */
2206 gomp_copy_host2dev (devicep
, NULL
, (void *) device_num_var
->start
,
2207 &device_num_val
, sizeof (int), false, NULL
);
2211 free (target_table
);
2214 /* Unload the mappings described by target_data from device DEVICE_P.
2215 The device must be locked. */
2218 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2220 const void *host_table
, const void *target_data
)
2222 void **host_func_table
= ((void ***) host_table
)[0];
2223 void **host_funcs_end
= ((void ***) host_table
)[1];
2224 void **host_var_table
= ((void ***) host_table
)[2];
2225 void **host_vars_end
= ((void ***) host_table
)[3];
2227 /* The func table contains only addresses, the var table contains addresses
2228 and corresponding sizes. */
2229 int num_funcs
= host_funcs_end
- host_func_table
;
2230 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2232 struct splay_tree_key_s k
;
2233 splay_tree_key node
= NULL
;
2235 /* Find mapping at start of node array */
2236 if (num_funcs
|| num_vars
)
2238 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2239 : (uintptr_t) host_var_table
[0]);
2240 k
.host_end
= k
.host_start
+ 1;
2241 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2244 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2246 gomp_mutex_unlock (&devicep
->lock
);
2247 gomp_fatal ("image unload fail");
2250 /* Remove mappings from splay tree. */
2252 for (i
= 0; i
< num_funcs
; i
++)
2254 k
.host_start
= (uintptr_t) host_func_table
[i
];
2255 k
.host_end
= k
.host_start
+ 1;
2256 splay_tree_remove (&devicep
->mem_map
, &k
);
2259 /* Most significant bit of the size in host and target tables marks
2260 "omp declare target link" variables. */
2261 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2262 const uintptr_t size_mask
= ~link_bit
;
2263 bool is_tgt_unmapped
= false;
2265 for (i
= 0; i
< num_vars
; i
++)
2267 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2269 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2271 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2272 splay_tree_remove (&devicep
->mem_map
, &k
);
2275 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2276 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2280 if (node
&& !is_tgt_unmapped
)
2287 /* This function should be called from every offload image while loading.
2288 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2289 the target, and TARGET_DATA needed by target plugin. */
2292 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2293 int target_type
, const void *target_data
)
2297 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2298 gomp_fatal ("Library too old for offload (version %u < %u)",
2299 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2301 gomp_mutex_lock (®ister_lock
);
2303 /* Load image to all initialized devices. */
2304 for (i
= 0; i
< num_devices
; i
++)
2306 struct gomp_device_descr
*devicep
= &devices
[i
];
2307 gomp_mutex_lock (&devicep
->lock
);
2308 if (devicep
->type
== target_type
2309 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2310 gomp_load_image_to_device (devicep
, version
,
2311 host_table
, target_data
, true);
2312 gomp_mutex_unlock (&devicep
->lock
);
2315 /* Insert image to array of pending images. */
2317 = gomp_realloc_unlock (offload_images
,
2318 (num_offload_images
+ 1)
2319 * sizeof (struct offload_image_descr
));
2320 offload_images
[num_offload_images
].version
= version
;
2321 offload_images
[num_offload_images
].type
= target_type
;
2322 offload_images
[num_offload_images
].host_table
= host_table
;
2323 offload_images
[num_offload_images
].target_data
= target_data
;
2325 num_offload_images
++;
2326 gomp_mutex_unlock (®ister_lock
);
2330 GOMP_offload_register (const void *host_table
, int target_type
,
2331 const void *target_data
)
2333 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2336 /* This function should be called from every offload image while unloading.
2337 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2338 the target, and TARGET_DATA needed by target plugin. */
2341 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2342 int target_type
, const void *target_data
)
2346 gomp_mutex_lock (®ister_lock
);
2348 /* Unload image from all initialized devices. */
2349 for (i
= 0; i
< num_devices
; i
++)
2351 struct gomp_device_descr
*devicep
= &devices
[i
];
2352 gomp_mutex_lock (&devicep
->lock
);
2353 if (devicep
->type
== target_type
2354 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2355 gomp_unload_image_from_device (devicep
, version
,
2356 host_table
, target_data
);
2357 gomp_mutex_unlock (&devicep
->lock
);
2360 /* Remove image from array of pending images. */
2361 for (i
= 0; i
< num_offload_images
; i
++)
2362 if (offload_images
[i
].target_data
== target_data
)
2364 offload_images
[i
] = offload_images
[--num_offload_images
];
2368 gomp_mutex_unlock (®ister_lock
);
2372 GOMP_offload_unregister (const void *host_table
, int target_type
,
2373 const void *target_data
)
2375 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2378 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2379 must be locked on entry, and remains locked on return. */
2381 attribute_hidden
void
2382 gomp_init_device (struct gomp_device_descr
*devicep
)
2385 if (!devicep
->init_device_func (devicep
->target_id
))
2387 gomp_mutex_unlock (&devicep
->lock
);
2388 gomp_fatal ("device initialization failed");
2391 /* Load to device all images registered by the moment. */
2392 for (i
= 0; i
< num_offload_images
; i
++)
2394 struct offload_image_descr
*image
= &offload_images
[i
];
2395 if (image
->type
== devicep
->type
)
2396 gomp_load_image_to_device (devicep
, image
->version
,
2397 image
->host_table
, image
->target_data
,
2401 /* Initialize OpenACC asynchronous queues. */
2402 goacc_init_asyncqueues (devicep
);
2404 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2407 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2408 must be locked on entry, and remains locked on return. */
2410 attribute_hidden
bool
2411 gomp_fini_device (struct gomp_device_descr
*devicep
)
2413 bool ret
= goacc_fini_asyncqueues (devicep
);
2414 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2415 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2419 attribute_hidden
void
2420 gomp_unload_device (struct gomp_device_descr
*devicep
)
2422 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2426 /* Unload from device all images registered at the moment. */
2427 for (i
= 0; i
< num_offload_images
; i
++)
2429 struct offload_image_descr
*image
= &offload_images
[i
];
2430 if (image
->type
== devicep
->type
)
2431 gomp_unload_image_from_device (devicep
, image
->version
,
2433 image
->target_data
);
2438 /* Host fallback for GOMP_target{,_ext} routines. */
2441 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2442 struct gomp_device_descr
*devicep
, void **args
)
2444 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2446 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2448 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2449 "be used for offloading");
2452 memset (thr
, '\0', sizeof (*thr
));
2453 if (gomp_places_list
)
2455 thr
->place
= old_thr
.place
;
2456 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2461 intptr_t id
= (intptr_t) *args
++, val
;
2462 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2463 val
= (intptr_t) *args
++;
2465 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2466 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2468 id
&= GOMP_TARGET_ARG_ID_MASK
;
2469 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2471 val
= val
> INT_MAX
? INT_MAX
: val
;
2473 gomp_icv (true)->thread_limit_var
= val
;
2478 gomp_free_thread (thr
);
2482 /* Calculate alignment and size requirements of a private copy of data shared
2483 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2486 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2487 unsigned short *kinds
, size_t *tgt_align
,
2491 for (i
= 0; i
< mapnum
; i
++)
2492 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2494 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2495 if (*tgt_align
< align
)
2497 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2498 *tgt_size
+= sizes
[i
];
2502 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2505 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2506 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2509 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2511 tgt
+= tgt_align
- al
;
2514 for (i
= 0; i
< mapnum
; i
++)
2515 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2517 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2518 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2519 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2520 hostaddrs
[i
] = tgt
+ tgt_size
;
2521 tgt_size
= tgt_size
+ sizes
[i
];
2525 /* Helper function of GOMP_target{,_ext} routines. */
2528 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2529 void (*host_fn
) (void *))
2531 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2532 return (void *) host_fn
;
2535 gomp_mutex_lock (&devicep
->lock
);
2536 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2538 gomp_mutex_unlock (&devicep
->lock
);
2542 struct splay_tree_key_s k
;
2543 k
.host_start
= (uintptr_t) host_fn
;
2544 k
.host_end
= k
.host_start
+ 1;
2545 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2546 gomp_mutex_unlock (&devicep
->lock
);
2550 return (void *) tgt_fn
->tgt_offset
;
2554 /* Called when encountering a target directive. If DEVICE
2555 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2556 GOMP_DEVICE_HOST_FALLBACK (or any value
2557 larger than last available hw device), use host fallback.
2558 FN is address of host code, UNUSED is part of the current ABI, but
2559 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2560 with MAPNUM entries, with addresses of the host objects,
2561 sizes of the host objects (resp. for pointer kind pointer bias
2562 and assumed sizeof (void *) size) and kinds. */
2565 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2566 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2567 unsigned char *kinds
)
2569 struct gomp_device_descr
*devicep
= resolve_device (device
);
2573 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2574 /* All shared memory devices should use the GOMP_target_ext function. */
2575 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2576 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2577 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2579 htab_t refcount_set
= htab_create (mapnum
);
2580 struct target_mem_desc
*tgt_vars
2581 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2582 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2583 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2585 htab_clear (refcount_set
);
2586 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2587 htab_free (refcount_set
);
2590 static inline unsigned int
2591 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2593 /* If we cannot run asynchronously, simply ignore nowait. */
2594 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2595 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2600 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2601 and several arguments have been added:
2602 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2603 DEPEND is array of dependencies, see GOMP_task for details.
2605 ARGS is a pointer to an array consisting of a variable number of both
2606 device-independent and device-specific arguments, which can take one two
2607 elements where the first specifies for which device it is intended, the type
2608 and optionally also the value. If the value is not present in the first
2609 one, the whole second element the actual value. The last element of the
2610 array is a single NULL. Among the device independent can be for example
2611 NUM_TEAMS and THREAD_LIMIT.
2613 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2614 that value, or 1 if teams construct is not present, or 0, if
2615 teams construct does not have num_teams clause and so the choice is
2616 implementation defined, and -1 if it can't be determined on the host
2617 what value will GOMP_teams have on the device.
2618 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2619 body with that value, or 0, if teams construct does not have thread_limit
2620 clause or the teams construct is not present, or -1 if it can't be
2621 determined on the host what value will GOMP_teams have on the device. */
2624 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2625 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2626 unsigned int flags
, void **depend
, void **args
)
2628 struct gomp_device_descr
*devicep
= resolve_device (device
);
2629 size_t tgt_align
= 0, tgt_size
= 0;
2630 bool fpc_done
= false;
2632 flags
= clear_unsupported_flags (devicep
, flags
);
2634 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2636 struct gomp_thread
*thr
= gomp_thread ();
2637 /* Create a team if we don't have any around, as nowait
2638 target tasks make sense to run asynchronously even when
2639 outside of any parallel. */
2640 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2642 struct gomp_team
*team
= gomp_new_team (1);
2643 struct gomp_task
*task
= thr
->task
;
2644 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2645 team
->prev_ts
= thr
->ts
;
2646 thr
->ts
.team
= team
;
2647 thr
->ts
.team_id
= 0;
2648 thr
->ts
.work_share
= &team
->work_shares
[0];
2649 thr
->ts
.last_work_share
= NULL
;
2650 #ifdef HAVE_SYNC_BUILTINS
2651 thr
->ts
.single_count
= 0;
2653 thr
->ts
.static_trip
= 0;
2654 thr
->task
= &team
->implicit_task
[0];
2655 gomp_init_task (thr
->task
, NULL
, icv
);
2661 thr
->task
= &team
->implicit_task
[0];
2664 pthread_setspecific (gomp_thread_destructor
, thr
);
2667 && !thr
->task
->final_task
)
2669 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2670 sizes
, kinds
, flags
, depend
, args
,
2671 GOMP_TARGET_TASK_BEFORE_MAP
);
2676 /* If there are depend clauses, but nowait is not present
2677 (or we are in a final task), block the parent task until the
2678 dependencies are resolved and then just continue with the rest
2679 of the function as if it is a merged task. */
2682 struct gomp_thread
*thr
= gomp_thread ();
2683 if (thr
->task
&& thr
->task
->depend_hash
)
2685 /* If we might need to wait, copy firstprivate now. */
2686 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2687 &tgt_align
, &tgt_size
);
2690 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2691 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2692 tgt_align
, tgt_size
);
2695 gomp_task_maybe_wait_for_dependencies (depend
);
2701 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2702 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2703 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2707 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2708 &tgt_align
, &tgt_size
);
2711 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2712 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2713 tgt_align
, tgt_size
);
2716 gomp_target_fallback (fn
, hostaddrs
, devicep
, args
);
2720 struct target_mem_desc
*tgt_vars
;
2721 htab_t refcount_set
= NULL
;
2723 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2727 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2728 &tgt_align
, &tgt_size
);
2731 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2732 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2733 tgt_align
, tgt_size
);
2740 refcount_set
= htab_create (mapnum
);
2741 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2742 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
2744 devicep
->run_func (devicep
->target_id
, fn_addr
,
2745 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2749 htab_clear (refcount_set
);
2750 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2753 htab_free (refcount_set
);
2756 /* Host fallback for GOMP_target_data{,_ext} routines. */
2759 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2761 struct gomp_task_icv
*icv
= gomp_icv (false);
2763 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2765 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2766 "be used for offloading");
2768 if (icv
->target_data
)
2770 /* Even when doing a host fallback, if there are any active
2771 #pragma omp target data constructs, need to remember the
2772 new #pragma omp target data, otherwise GOMP_target_end_data
2773 would get out of sync. */
2774 struct target_mem_desc
*tgt
2775 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2776 NULL
, GOMP_MAP_VARS_DATA
);
2777 tgt
->prev
= icv
->target_data
;
2778 icv
->target_data
= tgt
;
2783 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2784 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2786 struct gomp_device_descr
*devicep
= resolve_device (device
);
2789 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2790 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2791 return gomp_target_data_fallback (devicep
);
2793 struct target_mem_desc
*tgt
2794 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2795 NULL
, GOMP_MAP_VARS_DATA
);
2796 struct gomp_task_icv
*icv
= gomp_icv (true);
2797 tgt
->prev
= icv
->target_data
;
2798 icv
->target_data
= tgt
;
2802 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2803 size_t *sizes
, unsigned short *kinds
)
2805 struct gomp_device_descr
*devicep
= resolve_device (device
);
2808 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2809 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2810 return gomp_target_data_fallback (devicep
);
2812 struct target_mem_desc
*tgt
2813 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2814 NULL
, GOMP_MAP_VARS_DATA
);
2815 struct gomp_task_icv
*icv
= gomp_icv (true);
2816 tgt
->prev
= icv
->target_data
;
2817 icv
->target_data
= tgt
;
2821 GOMP_target_end_data (void)
2823 struct gomp_task_icv
*icv
= gomp_icv (false);
2824 if (icv
->target_data
)
2826 struct target_mem_desc
*tgt
= icv
->target_data
;
2827 icv
->target_data
= tgt
->prev
;
2828 gomp_unmap_vars (tgt
, true, NULL
);
2833 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2834 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2836 struct gomp_device_descr
*devicep
= resolve_device (device
);
2839 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2840 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2843 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2847 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2848 size_t *sizes
, unsigned short *kinds
,
2849 unsigned int flags
, void **depend
)
2851 struct gomp_device_descr
*devicep
= resolve_device (device
);
2853 /* If there are depend clauses, but nowait is not present,
2854 block the parent task until the dependencies are resolved
2855 and then just continue with the rest of the function as if it
2856 is a merged task. Until we are able to schedule task during
2857 variable mapping or unmapping, ignore nowait if depend clauses
2861 struct gomp_thread
*thr
= gomp_thread ();
2862 if (thr
->task
&& thr
->task
->depend_hash
)
2864 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2866 && !thr
->task
->final_task
)
2868 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2869 mapnum
, hostaddrs
, sizes
, kinds
,
2870 flags
| GOMP_TARGET_FLAG_UPDATE
,
2871 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2876 struct gomp_team
*team
= thr
->ts
.team
;
2877 /* If parallel or taskgroup has been cancelled, don't start new
2879 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2881 if (gomp_team_barrier_cancelled (&team
->barrier
))
2883 if (thr
->task
->taskgroup
)
2885 if (thr
->task
->taskgroup
->cancelled
)
2887 if (thr
->task
->taskgroup
->workshare
2888 && thr
->task
->taskgroup
->prev
2889 && thr
->task
->taskgroup
->prev
->cancelled
)
2894 gomp_task_maybe_wait_for_dependencies (depend
);
2900 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2901 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2904 struct gomp_thread
*thr
= gomp_thread ();
2905 struct gomp_team
*team
= thr
->ts
.team
;
2906 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2907 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2909 if (gomp_team_barrier_cancelled (&team
->barrier
))
2911 if (thr
->task
->taskgroup
)
2913 if (thr
->task
->taskgroup
->cancelled
)
2915 if (thr
->task
->taskgroup
->workshare
2916 && thr
->task
->taskgroup
->prev
2917 && thr
->task
->taskgroup
->prev
->cancelled
)
2922 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2926 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2927 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2928 htab_t
*refcount_set
)
2930 const int typemask
= 0xff;
2932 gomp_mutex_lock (&devicep
->lock
);
2933 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2935 gomp_mutex_unlock (&devicep
->lock
);
2939 for (i
= 0; i
< mapnum
; i
++)
2940 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
2942 struct splay_tree_key_s cur_node
;
2943 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2944 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
2945 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2948 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
2953 splay_tree_key remove_vars
[mapnum
];
2955 for (i
= 0; i
< mapnum
; i
++)
2957 struct splay_tree_key_s cur_node
;
2958 unsigned char kind
= kinds
[i
] & typemask
;
2962 case GOMP_MAP_ALWAYS_FROM
:
2963 case GOMP_MAP_DELETE
:
2964 case GOMP_MAP_RELEASE
:
2965 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2966 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2967 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2968 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2969 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2970 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2971 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2972 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2976 bool delete_p
= (kind
== GOMP_MAP_DELETE
2977 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
2978 bool do_copy
, do_remove
;
2979 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
2982 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
2983 || kind
== GOMP_MAP_ALWAYS_FROM
)
2985 if (k
->aux
&& k
->aux
->attach_count
)
2987 /* We have to be careful not to overwrite still attached
2988 pointers during the copyback to host. */
2989 uintptr_t addr
= k
->host_start
;
2990 while (addr
< k
->host_end
)
2992 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
2993 if (k
->aux
->attach_count
[i
] == 0)
2994 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
2995 (void *) (k
->tgt
->tgt_start
2997 + addr
- k
->host_start
),
2999 addr
+= sizeof (void *);
3003 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3004 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3005 + cur_node
.host_start
3007 cur_node
.host_end
- cur_node
.host_start
);
3010 /* Structure elements lists are removed altogether at once, which
3011 may cause immediate deallocation of the target_mem_desc, causing
3012 errors if we still have following element siblings to copy back.
3013 While we're at it, it also seems more disciplined to simply
3014 queue all removals together for processing below.
3016 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3017 not have this problem, since they maintain an additional
3018 tgt->refcount = 1 reference to the target_mem_desc to start with.
3021 remove_vars
[nrmvars
++] = k
;
3024 case GOMP_MAP_DETACH
:
3027 gomp_mutex_unlock (&devicep
->lock
);
3028 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3033 for (int i
= 0; i
< nrmvars
; i
++)
3034 gomp_remove_var (devicep
, remove_vars
[i
]);
3036 gomp_mutex_unlock (&devicep
->lock
);
3040 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
3041 size_t *sizes
, unsigned short *kinds
,
3042 unsigned int flags
, void **depend
)
3044 struct gomp_device_descr
*devicep
= resolve_device (device
);
3046 /* If there are depend clauses, but nowait is not present,
3047 block the parent task until the dependencies are resolved
3048 and then just continue with the rest of the function as if it
3049 is a merged task. Until we are able to schedule task during
3050 variable mapping or unmapping, ignore nowait if depend clauses
3054 struct gomp_thread
*thr
= gomp_thread ();
3055 if (thr
->task
&& thr
->task
->depend_hash
)
3057 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3059 && !thr
->task
->final_task
)
3061 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3062 mapnum
, hostaddrs
, sizes
, kinds
,
3063 flags
, depend
, NULL
,
3064 GOMP_TARGET_TASK_DATA
))
3069 struct gomp_team
*team
= thr
->ts
.team
;
3070 /* If parallel or taskgroup has been cancelled, don't start new
3072 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3074 if (gomp_team_barrier_cancelled (&team
->barrier
))
3076 if (thr
->task
->taskgroup
)
3078 if (thr
->task
->taskgroup
->cancelled
)
3080 if (thr
->task
->taskgroup
->workshare
3081 && thr
->task
->taskgroup
->prev
3082 && thr
->task
->taskgroup
->prev
->cancelled
)
3087 gomp_task_maybe_wait_for_dependencies (depend
);
3093 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3094 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3097 struct gomp_thread
*thr
= gomp_thread ();
3098 struct gomp_team
*team
= thr
->ts
.team
;
3099 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3100 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3102 if (gomp_team_barrier_cancelled (&team
->barrier
))
3104 if (thr
->task
->taskgroup
)
3106 if (thr
->task
->taskgroup
->cancelled
)
3108 if (thr
->task
->taskgroup
->workshare
3109 && thr
->task
->taskgroup
->prev
3110 && thr
->task
->taskgroup
->prev
->cancelled
)
3115 htab_t refcount_set
= htab_create (mapnum
);
3117 /* The variables are mapped separately such that they can be released
3120 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3121 for (i
= 0; i
< mapnum
; i
++)
3122 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3124 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
3125 &kinds
[i
], true, &refcount_set
,
3126 GOMP_MAP_VARS_ENTER_DATA
);
3129 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
3131 for (j
= i
+ 1; j
< mapnum
; j
++)
3132 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
3133 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
3135 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
3136 &kinds
[i
], true, &refcount_set
,
3137 GOMP_MAP_VARS_ENTER_DATA
);
3140 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
3142 /* An attach operation must be processed together with the mapped
3143 base-pointer list item. */
3144 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3145 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3149 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3150 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3152 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
3153 htab_free (refcount_set
);
3157 gomp_target_task_fn (void *data
)
3159 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
3160 struct gomp_device_descr
*devicep
= ttask
->devicep
;
3162 if (ttask
->fn
!= NULL
)
3166 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3167 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
3168 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3170 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
3171 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
3176 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
3179 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
3183 void *actual_arguments
;
3184 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3187 actual_arguments
= ttask
->hostaddrs
;
3191 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
3192 NULL
, ttask
->sizes
, ttask
->kinds
, true,
3193 NULL
, GOMP_MAP_VARS_TARGET
);
3194 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
3196 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
3198 assert (devicep
->async_run_func
);
3199 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
3200 ttask
->args
, (void *) ttask
);
3203 else if (devicep
== NULL
3204 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3205 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3209 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
3210 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3211 ttask
->kinds
, true);
3214 htab_t refcount_set
= htab_create (ttask
->mapnum
);
3215 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3216 for (i
= 0; i
< ttask
->mapnum
; i
++)
3217 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3219 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
3220 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
3221 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3222 i
+= ttask
->sizes
[i
];
3225 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
3226 &ttask
->kinds
[i
], true, &refcount_set
,
3227 GOMP_MAP_VARS_ENTER_DATA
);
3229 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3230 ttask
->kinds
, &refcount_set
);
3231 htab_free (refcount_set
);
3237 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
3241 struct gomp_task_icv
*icv
= gomp_icv (true);
3242 icv
->thread_limit_var
3243 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3249 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
3250 unsigned int thread_limit
, bool first
)
3252 struct gomp_thread
*thr
= gomp_thread ();
3257 struct gomp_task_icv
*icv
= gomp_icv (true);
3258 icv
->thread_limit_var
3259 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3261 (void) num_teams_high
;
3262 if (num_teams_low
== 0)
3264 thr
->num_teams
= num_teams_low
- 1;
3267 else if (thr
->team_num
== thr
->num_teams
)
3275 omp_target_alloc (size_t size
, int device_num
)
3277 if (device_num
== gomp_get_num_devices ())
3278 return malloc (size
);
3283 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3284 if (devicep
== NULL
)
3287 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3288 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3289 return malloc (size
);
3291 gomp_mutex_lock (&devicep
->lock
);
3292 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
3293 gomp_mutex_unlock (&devicep
->lock
);
3298 omp_target_free (void *device_ptr
, int device_num
)
3300 if (device_ptr
== NULL
)
3303 if (device_num
== gomp_get_num_devices ())
3312 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3313 if (devicep
== NULL
)
3316 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3317 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3323 gomp_mutex_lock (&devicep
->lock
);
3324 gomp_free_device_memory (devicep
, device_ptr
);
3325 gomp_mutex_unlock (&devicep
->lock
);
3329 omp_target_is_present (const void *ptr
, int device_num
)
3334 if (device_num
== gomp_get_num_devices ())
3340 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3341 if (devicep
== NULL
)
3344 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3345 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3348 gomp_mutex_lock (&devicep
->lock
);
3349 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3350 struct splay_tree_key_s cur_node
;
3352 cur_node
.host_start
= (uintptr_t) ptr
;
3353 cur_node
.host_end
= cur_node
.host_start
;
3354 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3355 int ret
= n
!= NULL
;
3356 gomp_mutex_unlock (&devicep
->lock
);
3361 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
3362 struct gomp_device_descr
**dst_devicep
,
3363 struct gomp_device_descr
**src_devicep
)
3365 if (dst_device_num
!= gomp_get_num_devices ())
3367 if (dst_device_num
< 0)
3370 *dst_devicep
= resolve_device (dst_device_num
);
3371 if (*dst_devicep
== NULL
)
3374 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3375 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3376 *dst_devicep
= NULL
;
3379 if (src_device_num
!= num_devices_openmp
)
3381 if (src_device_num
< 0)
3384 *src_devicep
= resolve_device (src_device_num
);
3385 if (*src_devicep
== NULL
)
3388 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3389 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3390 *src_devicep
= NULL
;
3397 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
3398 size_t dst_offset
, size_t src_offset
,
3399 struct gomp_device_descr
*dst_devicep
,
3400 struct gomp_device_descr
*src_devicep
)
3403 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
3405 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
3408 if (src_devicep
== NULL
)
3410 gomp_mutex_lock (&dst_devicep
->lock
);
3411 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3412 (char *) dst
+ dst_offset
,
3413 (char *) src
+ src_offset
, length
);
3414 gomp_mutex_unlock (&dst_devicep
->lock
);
3415 return (ret
? 0 : EINVAL
);
3417 if (dst_devicep
== NULL
)
3419 gomp_mutex_lock (&src_devicep
->lock
);
3420 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3421 (char *) dst
+ dst_offset
,
3422 (char *) src
+ src_offset
, length
);
3423 gomp_mutex_unlock (&src_devicep
->lock
);
3424 return (ret
? 0 : EINVAL
);
3426 if (src_devicep
== dst_devicep
)
3428 gomp_mutex_lock (&src_devicep
->lock
);
3429 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3430 (char *) dst
+ dst_offset
,
3431 (char *) src
+ src_offset
, length
);
3432 gomp_mutex_unlock (&src_devicep
->lock
);
3433 return (ret
? 0 : EINVAL
);
3439 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
3440 size_t src_offset
, int dst_device_num
, int src_device_num
)
3442 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3443 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
3444 &dst_devicep
, &src_devicep
);
3449 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
3450 dst_devicep
, src_devicep
);
3462 struct gomp_device_descr
*dst_devicep
;
3463 struct gomp_device_descr
*src_devicep
;
3464 } omp_target_memcpy_data
;
3467 omp_target_memcpy_async_helper (void *args
)
3469 omp_target_memcpy_data
*a
= args
;
3470 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
3471 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
3472 gomp_fatal ("omp_target_memcpy failed");
3476 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
3477 size_t dst_offset
, size_t src_offset
,
3478 int dst_device_num
, int src_device_num
,
3479 int depobj_count
, omp_depend_t
*depobj_list
)
3481 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3482 unsigned int flags
= 0;
3483 void *depend
[depobj_count
+ 5];
3485 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
3486 &dst_devicep
, &src_devicep
);
3488 omp_target_memcpy_data s
= {
3492 .dst_offset
= dst_offset
,
3493 .src_offset
= src_offset
,
3494 .dst_devicep
= dst_devicep
,
3495 .src_devicep
= src_devicep
3501 if (depobj_count
> 0 && depobj_list
!= NULL
)
3503 flags
|= GOMP_TASK_FLAG_DEPEND
;
3505 depend
[1] = (void *) (uintptr_t) depobj_count
;
3506 depend
[2] = depend
[3] = depend
[4] = 0;
3507 for (i
= 0; i
< depobj_count
; ++i
)
3508 depend
[i
+ 5] = &depobj_list
[i
];
3511 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
3512 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
3518 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
3519 int num_dims
, const size_t *volume
,
3520 const size_t *dst_offsets
,
3521 const size_t *src_offsets
,
3522 const size_t *dst_dimensions
,
3523 const size_t *src_dimensions
,
3524 struct gomp_device_descr
*dst_devicep
,
3525 struct gomp_device_descr
*src_devicep
)
3527 size_t dst_slice
= element_size
;
3528 size_t src_slice
= element_size
;
3529 size_t j
, dst_off
, src_off
, length
;
3534 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
3535 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
3536 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
3538 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
3540 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
3544 else if (src_devicep
== NULL
)
3545 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3546 (char *) dst
+ dst_off
,
3547 (const char *) src
+ src_off
,
3549 else if (dst_devicep
== NULL
)
3550 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3551 (char *) dst
+ dst_off
,
3552 (const char *) src
+ src_off
,
3554 else if (src_devicep
== dst_devicep
)
3555 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3556 (char *) dst
+ dst_off
,
3557 (const char *) src
+ src_off
,
3561 return ret
? 0 : EINVAL
;
3564 /* FIXME: it would be nice to have some plugin function to handle
3565 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3566 be handled in the generic recursion below, and for host-host it
3567 should be used even for any num_dims >= 2. */
3569 for (i
= 1; i
< num_dims
; i
++)
3570 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
3571 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
3573 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
3574 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
3576 for (j
= 0; j
< volume
[0]; j
++)
3578 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
3579 (const char *) src
+ src_off
,
3580 element_size
, num_dims
- 1,
3581 volume
+ 1, dst_offsets
+ 1,
3582 src_offsets
+ 1, dst_dimensions
+ 1,
3583 src_dimensions
+ 1, dst_devicep
,
3587 dst_off
+= dst_slice
;
3588 src_off
+= src_slice
;
3594 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
3596 struct gomp_device_descr
**dst_devicep
,
3597 struct gomp_device_descr
**src_devicep
)
3602 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
3603 dst_devicep
, src_devicep
);
3607 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
3614 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
3615 size_t element_size
, int num_dims
,
3616 const size_t *volume
, const size_t *dst_offsets
,
3617 const size_t *src_offsets
,
3618 const size_t *dst_dimensions
,
3619 const size_t *src_dimensions
,
3620 struct gomp_device_descr
*dst_devicep
,
3621 struct gomp_device_descr
*src_devicep
)
3624 gomp_mutex_lock (&src_devicep
->lock
);
3625 else if (dst_devicep
)
3626 gomp_mutex_lock (&dst_devicep
->lock
);
3627 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3628 volume
, dst_offsets
, src_offsets
,
3629 dst_dimensions
, src_dimensions
,
3630 dst_devicep
, src_devicep
);
3632 gomp_mutex_unlock (&src_devicep
->lock
);
3633 else if (dst_devicep
)
3634 gomp_mutex_unlock (&dst_devicep
->lock
);
3640 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
3641 int num_dims
, const size_t *volume
,
3642 const size_t *dst_offsets
,
3643 const size_t *src_offsets
,
3644 const size_t *dst_dimensions
,
3645 const size_t *src_dimensions
,
3646 int dst_device_num
, int src_device_num
)
3648 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3650 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
3651 src_device_num
, &dst_devicep
,
3657 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
3658 volume
, dst_offsets
, src_offsets
,
3659 dst_dimensions
, src_dimensions
,
3660 dst_devicep
, src_devicep
);
3669 size_t element_size
;
3670 const size_t *volume
;
3671 const size_t *dst_offsets
;
3672 const size_t *src_offsets
;
3673 const size_t *dst_dimensions
;
3674 const size_t *src_dimensions
;
3675 struct gomp_device_descr
*dst_devicep
;
3676 struct gomp_device_descr
*src_devicep
;
3678 } omp_target_memcpy_rect_data
;
3681 omp_target_memcpy_rect_async_helper (void *args
)
3683 omp_target_memcpy_rect_data
*a
= args
;
3684 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
3685 a
->num_dims
, a
->volume
, a
->dst_offsets
,
3686 a
->src_offsets
, a
->dst_dimensions
,
3687 a
->src_dimensions
, a
->dst_devicep
,
3690 gomp_fatal ("omp_target_memcpy_rect failed");
3694 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
3695 int num_dims
, const size_t *volume
,
3696 const size_t *dst_offsets
,
3697 const size_t *src_offsets
,
3698 const size_t *dst_dimensions
,
3699 const size_t *src_dimensions
,
3700 int dst_device_num
, int src_device_num
,
3701 int depobj_count
, omp_depend_t
*depobj_list
)
3703 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3705 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
3706 src_device_num
, &dst_devicep
,
3708 void *depend
[depobj_count
+ 5];
3711 omp_target_memcpy_rect_data s
= {
3714 .element_size
= element_size
,
3715 .num_dims
= num_dims
,
3717 .dst_offsets
= dst_offsets
,
3718 .src_offsets
= src_offsets
,
3719 .dst_dimensions
= dst_dimensions
,
3720 .src_dimensions
= src_dimensions
,
3721 .dst_devicep
= dst_devicep
,
3722 .src_devicep
= src_devicep
3728 if (depobj_count
> 0 && depobj_list
!= NULL
)
3730 flags
|= GOMP_TASK_FLAG_DEPEND
;
3732 depend
[1] = (void *) (uintptr_t) depobj_count
;
3733 depend
[2] = depend
[3] = depend
[4] = 0;
3734 for (i
= 0; i
< depobj_count
; ++i
)
3735 depend
[i
+ 5] = &depobj_list
[i
];
3738 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
3739 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
3745 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3746 size_t size
, size_t device_offset
, int device_num
)
3748 if (device_num
== gomp_get_num_devices ())
3754 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3755 if (devicep
== NULL
)
3758 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3759 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3762 gomp_mutex_lock (&devicep
->lock
);
3764 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3765 struct splay_tree_key_s cur_node
;
3768 cur_node
.host_start
= (uintptr_t) host_ptr
;
3769 cur_node
.host_end
= cur_node
.host_start
+ size
;
3770 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3773 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3774 == (uintptr_t) device_ptr
+ device_offset
3775 && n
->host_start
<= cur_node
.host_start
3776 && n
->host_end
>= cur_node
.host_end
)
3781 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3782 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3786 tgt
->to_free
= NULL
;
3788 tgt
->list_count
= 0;
3789 tgt
->device_descr
= devicep
;
3790 splay_tree_node array
= tgt
->array
;
3791 splay_tree_key k
= &array
->key
;
3792 k
->host_start
= cur_node
.host_start
;
3793 k
->host_end
= cur_node
.host_end
;
3795 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3796 k
->refcount
= REFCOUNT_INFINITY
;
3797 k
->dynamic_refcount
= 0;
3800 array
->right
= NULL
;
3801 splay_tree_insert (&devicep
->mem_map
, array
);
3804 gomp_mutex_unlock (&devicep
->lock
);
3809 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3811 if (device_num
== gomp_get_num_devices ())
3817 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3818 if (devicep
== NULL
)
3821 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3824 gomp_mutex_lock (&devicep
->lock
);
3826 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3827 struct splay_tree_key_s cur_node
;
3830 cur_node
.host_start
= (uintptr_t) ptr
;
3831 cur_node
.host_end
= cur_node
.host_start
;
3832 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3834 && n
->host_start
== cur_node
.host_start
3835 && n
->refcount
== REFCOUNT_INFINITY
3836 && n
->tgt
->tgt_start
== 0
3837 && n
->tgt
->to_free
== NULL
3838 && n
->tgt
->refcount
== 1
3839 && n
->tgt
->list_count
== 0)
3841 splay_tree_remove (&devicep
->mem_map
, n
);
3842 gomp_unmap_tgt (n
->tgt
);
3846 gomp_mutex_unlock (&devicep
->lock
);
3851 omp_get_mapped_ptr (const void *ptr
, int device_num
)
3853 if (device_num
< 0 || device_num
> gomp_get_num_devices ())
3856 if (device_num
== omp_get_initial_device ())
3857 return (void *) ptr
;
3859 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3860 if (devicep
== NULL
)
3863 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3864 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3865 return (void *) ptr
;
3867 gomp_mutex_lock (&devicep
->lock
);
3869 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3870 struct splay_tree_key_s cur_node
;
3873 cur_node
.host_start
= (uintptr_t) ptr
;
3874 cur_node
.host_end
= cur_node
.host_start
;
3875 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3879 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
3880 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
3883 gomp_mutex_unlock (&devicep
->lock
);
3889 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
3891 if (device_num
< 0 || device_num
> gomp_get_num_devices ())
3894 if (device_num
== gomp_get_num_devices ())
3897 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3898 if (devicep
== NULL
)
3901 /* TODO: Unified shared memory must be handled when available. */
3903 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
3907 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3910 if (device_num
== gomp_get_num_devices ())
3911 return gomp_pause_host ();
3912 if (device_num
< 0 || device_num
>= num_devices_openmp
)
3914 /* Do nothing for target devices for now. */
3919 omp_pause_resource_all (omp_pause_resource_t kind
)
3922 if (gomp_pause_host ())
3924 /* Do nothing for target devices for now. */
3928 ialias (omp_pause_resource
)
3929 ialias (omp_pause_resource_all
)
3931 #ifdef PLUGIN_SUPPORT
3933 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3935 The handles of the found functions are stored in the corresponding fields
3936 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3939 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3940 const char *plugin_name
)
3942 const char *err
= NULL
, *last_missing
= NULL
;
3944 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3946 #if OFFLOAD_DEFAULTED
3952 /* Check if all required functions are available in the plugin and store
3953 their handlers. None of the symbols can legitimately be NULL,
3954 so we don't need to check dlerror all the time. */
3956 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3958 /* Similar, but missing functions are not an error. Return false if
3959 failed, true otherwise. */
3960 #define DLSYM_OPT(f, n) \
3961 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3962 || (last_missing = #n, 0))
3965 if (device
->version_func () != GOMP_VERSION
)
3967 err
= "plugin version mismatch";
3974 DLSYM (get_num_devices
);
3975 DLSYM (init_device
);
3976 DLSYM (fini_device
);
3978 DLSYM (unload_image
);
3983 device
->capabilities
= device
->get_caps_func ();
3984 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3987 DLSYM_OPT (async_run
, async_run
);
3988 DLSYM_OPT (can_run
, can_run
);
3991 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3993 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3994 || !DLSYM_OPT (openacc
.create_thread_data
,
3995 openacc_create_thread_data
)
3996 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3997 openacc_destroy_thread_data
)
3998 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3999 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4000 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4001 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4002 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4003 || !DLSYM_OPT (openacc
.async
.queue_callback
,
4004 openacc_async_queue_callback
)
4005 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
4006 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
4007 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
4008 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
4010 /* Require all the OpenACC handlers if we have
4011 GOMP_OFFLOAD_CAP_OPENACC_200. */
4012 err
= "plugin missing OpenACC handler function";
4017 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
4018 openacc_cuda_get_current_device
);
4019 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
4020 openacc_cuda_get_current_context
);
4021 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
4022 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
4023 if (cuda
&& cuda
!= 4)
4025 /* Make sure all the CUDA functions are there if any of them are. */
4026 err
= "plugin missing OpenACC CUDA handler function";
4038 gomp_error ("while loading %s: %s", plugin_name
, err
);
4040 gomp_error ("missing function was %s", last_missing
);
4042 dlclose (plugin_handle
);
4047 /* This function finalizes all initialized devices. */
4050 gomp_target_fini (void)
4053 for (i
= 0; i
< num_devices
; i
++)
4056 struct gomp_device_descr
*devicep
= &devices
[i
];
4057 gomp_mutex_lock (&devicep
->lock
);
4058 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
4059 ret
= gomp_fini_device (devicep
);
4060 gomp_mutex_unlock (&devicep
->lock
);
4062 gomp_fatal ("device finalization failed");
4066 /* This function initializes the runtime for offloading.
4067 It parses the list of offload plugins, and tries to load these.
4068 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4069 will be set, and the array DEVICES initialized, containing descriptors for
4070 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4074 gomp_target_init (void)
4076 const char *prefix
="libgomp-plugin-";
4077 const char *suffix
= SONAME_SUFFIX (1);
4078 const char *cur
, *next
;
4080 int i
, new_num_devs
;
4081 int num_devs
= 0, num_devs_openmp
;
4082 struct gomp_device_descr
*devs
= NULL
;
4084 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
4087 cur
= OFFLOAD_PLUGINS
;
4091 struct gomp_device_descr current_device
;
4092 size_t prefix_len
, suffix_len
, cur_len
;
4094 next
= strchr (cur
, ',');
4096 prefix_len
= strlen (prefix
);
4097 cur_len
= next
? next
- cur
: strlen (cur
);
4098 suffix_len
= strlen (suffix
);
4100 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
4107 memcpy (plugin_name
, prefix
, prefix_len
);
4108 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
4109 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
4111 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
4113 new_num_devs
= current_device
.get_num_devices_func ();
4114 if (new_num_devs
>= 1)
4116 /* Augment DEVICES and NUM_DEVICES. */
4118 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
4119 * sizeof (struct gomp_device_descr
));
4127 current_device
.name
= current_device
.get_name_func ();
4128 /* current_device.capabilities has already been set. */
4129 current_device
.type
= current_device
.get_type_func ();
4130 current_device
.mem_map
.root
= NULL
;
4131 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
4132 for (i
= 0; i
< new_num_devs
; i
++)
4134 current_device
.target_id
= i
;
4135 devs
[num_devs
] = current_device
;
4136 gomp_mutex_init (&devs
[num_devs
].lock
);
4147 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
4148 NUM_DEVICES_OPENMP. */
4149 struct gomp_device_descr
*devs_s
4150 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
4157 num_devs_openmp
= 0;
4158 for (i
= 0; i
< num_devs
; i
++)
4159 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4160 devs_s
[num_devs_openmp
++] = devs
[i
];
4161 int num_devs_after_openmp
= num_devs_openmp
;
4162 for (i
= 0; i
< num_devs
; i
++)
4163 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4164 devs_s
[num_devs_after_openmp
++] = devs
[i
];
4168 for (i
= 0; i
< num_devs
; i
++)
4170 /* The 'devices' array can be moved (by the realloc call) until we have
4171 found all the plugins, so registering with the OpenACC runtime (which
4172 takes a copy of the pointer argument) must be delayed until now. */
4173 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4174 goacc_register (&devs
[i
]);
4177 num_devices
= num_devs
;
4178 num_devices_openmp
= num_devs_openmp
;
4180 if (atexit (gomp_target_fini
) != 0)
4181 gomp_fatal ("atexit failed");
4184 #else /* PLUGIN_SUPPORT */
4185 /* If dlfcn.h is unavailable we always fallback to host execution.
4186 GOMP_target* routines are just stubs for this case. */
4188 gomp_target_init (void)
4191 #endif /* PLUGIN_SUPPORT */