]>
Commit | Line | Data |
---|---|---|
fbd26352 | 1 | /* Copyright (C) 2013-2019 Free Software Foundation, Inc. |
bc7bff74 | 2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
c35c9a62 | 4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
bc7bff74 | 6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
995b27b9 | 26 | /* This file contains the support of offloading. */ |
bc7bff74 | 27 | |
28 | #include "libgomp.h" | |
ca4c3545 | 29 | #include "oacc-plugin.h" |
30 | #include "oacc-int.h" | |
31 | #include "gomp-constants.h" | |
bc7bff74 | 32 | #include <limits.h> |
33 | #include <stdbool.h> | |
34 | #include <stdlib.h> | |
2634aed9 | 35 | #ifdef HAVE_INTTYPES_H |
36 | # include <inttypes.h> /* For PRIu64. */ | |
37 | #endif | |
bc7bff74 | 38 | #include <string.h> |
ca4c3545 | 39 | #include <assert.h> |
43895be5 | 40 | #include <errno.h> |
bc7bff74 | 41 | |
995b27b9 | 42 | #ifdef PLUGIN_SUPPORT |
43 | #include <dlfcn.h> | |
4fda895a | 44 | #include "plugin-suffix.h" |
995b27b9 | 45 | #endif |
46 | ||
47 | static void gomp_target_init (void); | |
48 | ||
ca4c3545 | 49 | /* The whole initialization code for offloading plugins is only run one. */ |
995b27b9 | 50 | static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; |
51 | ||
0d8c703d | 52 | /* Mutex for offload image registration. */ |
53 | static gomp_mutex_t register_lock; | |
54 | ||
995b27b9 | 55 | /* This structure describes an offload image. |
56 | It contains type of the target device, pointer to host table descriptor, and | |
57 | pointer to target data. */ | |
58 | struct offload_image_descr { | |
d3d8e632 | 59 | unsigned version; |
995b27b9 | 60 | enum offload_target_type type; |
4e985e0f | 61 | const void *host_table; |
70046055 | 62 | const void *target_data; |
995b27b9 | 63 | }; |
64 | ||
65 | /* Array of descriptors of offload images. */ | |
66 | static struct offload_image_descr *offload_images; | |
67 | ||
68 | /* Total number of offload images. */ | |
69 | static int num_offload_images; | |
70 | ||
71 | /* Array of descriptors for all available devices. */ | |
72 | static struct gomp_device_descr *devices; | |
73 | ||
74 | /* Total number of available devices. */ | |
75 | static int num_devices; | |
76 | ||
ca4c3545 | 77 | /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ |
78 | static int num_devices_openmp; | |
79 | ||
0d8c703d | 80 | /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ |
81 | ||
82 | static void * | |
83 | gomp_realloc_unlock (void *old, size_t size) | |
84 | { | |
85 | void *ret = realloc (old, size); | |
86 | if (ret == NULL) | |
87 | { | |
88 | gomp_mutex_unlock (®ister_lock); | |
89 | gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); | |
90 | } | |
91 | return ret; | |
92 | } | |
93 | ||
ca4c3545 | 94 | attribute_hidden void |
95 | gomp_init_targets_once (void) | |
995b27b9 | 96 | { |
ca4c3545 | 97 | (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
98 | } | |
995b27b9 | 99 | |
bc7bff74 | 100 | attribute_hidden int |
101 | gomp_get_num_devices (void) | |
102 | { | |
ca4c3545 | 103 | gomp_init_targets_once (); |
104 | return num_devices_openmp; | |
995b27b9 | 105 | } |
106 | ||
107 | static struct gomp_device_descr * | |
108 | resolve_device (int device_id) | |
109 | { | |
ca4c3545 | 110 | if (device_id == GOMP_DEVICE_ICV) |
995b27b9 | 111 | { |
112 | struct gomp_task_icv *icv = gomp_icv (false); | |
113 | device_id = icv->default_device_var; | |
114 | } | |
115 | ||
116 | if (device_id < 0 || device_id >= gomp_get_num_devices ()) | |
117 | return NULL; | |
118 | ||
43895be5 | 119 | gomp_mutex_lock (&devices[device_id].lock); |
f87b2900 | 120 | if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) |
43895be5 | 121 | gomp_init_device (&devices[device_id]); |
f87b2900 | 122 | else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) |
123 | { | |
124 | gomp_mutex_unlock (&devices[device_id].lock); | |
125 | return NULL; | |
126 | } | |
43895be5 | 127 | gomp_mutex_unlock (&devices[device_id].lock); |
128 | ||
995b27b9 | 129 | return &devices[device_id]; |
130 | } | |
131 | ||
132 | ||
43895be5 | 133 | static inline splay_tree_key |
134 | gomp_map_lookup (splay_tree mem_map, splay_tree_key key) | |
135 | { | |
136 | if (key->host_start != key->host_end) | |
137 | return splay_tree_lookup (mem_map, key); | |
138 | ||
139 | key->host_end++; | |
140 | splay_tree_key n = splay_tree_lookup (mem_map, key); | |
141 | key->host_end--; | |
142 | if (n) | |
143 | return n; | |
144 | key->host_start--; | |
145 | n = splay_tree_lookup (mem_map, key); | |
146 | key->host_start++; | |
147 | if (n) | |
148 | return n; | |
149 | return splay_tree_lookup (mem_map, key); | |
150 | } | |
151 | ||
9561765e | 152 | static inline splay_tree_key |
153 | gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) | |
154 | { | |
155 | if (key->host_start != key->host_end) | |
156 | return splay_tree_lookup (mem_map, key); | |
157 | ||
158 | key->host_end++; | |
159 | splay_tree_key n = splay_tree_lookup (mem_map, key); | |
160 | key->host_end--; | |
161 | return n; | |
162 | } | |
163 | ||
9b50ad1d | 164 | static inline void |
165 | gomp_device_copy (struct gomp_device_descr *devicep, | |
166 | bool (*copy_func) (int, void *, const void *, size_t), | |
167 | const char *dst, void *dstaddr, | |
168 | const char *src, const void *srcaddr, | |
169 | size_t size) | |
170 | { | |
171 | if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) | |
172 | { | |
173 | gomp_mutex_unlock (&devicep->lock); | |
174 | gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", | |
175 | src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); | |
176 | } | |
177 | } | |
178 | ||
534b5e00 | 179 | static inline void |
180 | goacc_device_copy_async (struct gomp_device_descr *devicep, | |
181 | bool (*copy_func) (int, void *, const void *, size_t, | |
182 | struct goacc_asyncqueue *), | |
183 | const char *dst, void *dstaddr, | |
184 | const char *src, const void *srcaddr, | |
185 | size_t size, struct goacc_asyncqueue *aq) | |
186 | { | |
187 | if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) | |
188 | { | |
189 | gomp_mutex_unlock (&devicep->lock); | |
190 | gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", | |
191 | src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); | |
192 | } | |
193 | } | |
194 | ||
5af416bd | 195 | /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) |
196 | host to device memory transfers. */ | |
197 | ||
65c9e9f9 | 198 | struct gomp_coalesce_chunk |
199 | { | |
200 | /* The starting and ending point of a coalesced chunk of memory. */ | |
201 | size_t start, end; | |
202 | }; | |
203 | ||
5af416bd | 204 | struct gomp_coalesce_buf |
205 | { | |
206 | /* Buffer into which gomp_copy_host2dev will memcpy data and from which | |
207 | it will be copied to the device. */ | |
208 | void *buf; | |
209 | struct target_mem_desc *tgt; | |
65c9e9f9 | 210 | /* Array with offsets, chunks[i].start is the starting offset and |
211 | chunks[i].end ending offset relative to tgt->tgt_start device address | |
5af416bd | 212 | of chunks which are to be copied to buf and later copied to device. */ |
65c9e9f9 | 213 | struct gomp_coalesce_chunk *chunks; |
5af416bd | 214 | /* Number of chunks in chunks array, or -1 if coalesce buffering should not |
215 | be performed. */ | |
216 | long chunk_cnt; | |
217 | /* During construction of chunks array, how many memory regions are within | |
218 | the last chunk. If there is just one memory region for a chunk, we copy | |
219 | it directly to device rather than going through buf. */ | |
220 | long use_cnt; | |
221 | }; | |
222 | ||
223 | /* Maximum size of memory region considered for coalescing. Larger copies | |
224 | are performed directly. */ | |
225 | #define MAX_COALESCE_BUF_SIZE (32 * 1024) | |
226 | ||
227 | /* Maximum size of a gap in between regions to consider them being copied | |
228 | within the same chunk. All the device offsets considered are within | |
229 | newly allocated device memory, so it isn't fatal if we copy some padding | |
230 | in between from host to device. The gaps come either from alignment | |
231 | padding or from memory regions which are not supposed to be copied from | |
232 | host to device (e.g. map(alloc:), map(from:) etc.). */ | |
233 | #define MAX_COALESCE_BUF_GAP (4 * 1024) | |
234 | ||
235 | /* Add region with device tgt_start relative offset and length to CBUF. */ | |
236 | ||
237 | static inline void | |
238 | gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) | |
239 | { | |
240 | if (len > MAX_COALESCE_BUF_SIZE || len == 0) | |
241 | return; | |
242 | if (cbuf->chunk_cnt) | |
243 | { | |
244 | if (cbuf->chunk_cnt < 0) | |
245 | return; | |
65c9e9f9 | 246 | if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) |
5af416bd | 247 | { |
248 | cbuf->chunk_cnt = -1; | |
249 | return; | |
250 | } | |
65c9e9f9 | 251 | if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) |
5af416bd | 252 | { |
65c9e9f9 | 253 | cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; |
5af416bd | 254 | cbuf->use_cnt++; |
255 | return; | |
256 | } | |
257 | /* If the last chunk is only used by one mapping, discard it, | |
258 | as it will be one host to device copy anyway and | |
259 | memcpying it around will only waste cycles. */ | |
260 | if (cbuf->use_cnt == 1) | |
261 | cbuf->chunk_cnt--; | |
262 | } | |
65c9e9f9 | 263 | cbuf->chunks[cbuf->chunk_cnt].start = start; |
264 | cbuf->chunks[cbuf->chunk_cnt].end = start + len; | |
5af416bd | 265 | cbuf->chunk_cnt++; |
266 | cbuf->use_cnt = 1; | |
267 | } | |
268 | ||
269 | /* Return true for mapping kinds which need to copy data from the | |
270 | host to device for regions that weren't previously mapped. */ | |
271 | ||
272 | static inline bool | |
273 | gomp_to_device_kind_p (int kind) | |
274 | { | |
275 | switch (kind) | |
276 | { | |
277 | case GOMP_MAP_ALLOC: | |
278 | case GOMP_MAP_FROM: | |
279 | case GOMP_MAP_FORCE_ALLOC: | |
280 | case GOMP_MAP_ALWAYS_FROM: | |
281 | return false; | |
282 | default: | |
283 | return true; | |
284 | } | |
285 | } | |
286 | ||
534b5e00 | 287 | attribute_hidden void |
9b50ad1d | 288 | gomp_copy_host2dev (struct gomp_device_descr *devicep, |
534b5e00 | 289 | struct goacc_asyncqueue *aq, |
5af416bd | 290 | void *d, const void *h, size_t sz, |
291 | struct gomp_coalesce_buf *cbuf) | |
9b50ad1d | 292 | { |
5af416bd | 293 | if (cbuf) |
294 | { | |
295 | uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; | |
65c9e9f9 | 296 | if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) |
5af416bd | 297 | { |
298 | long first = 0; | |
299 | long last = cbuf->chunk_cnt - 1; | |
300 | while (first <= last) | |
301 | { | |
302 | long middle = (first + last) >> 1; | |
65c9e9f9 | 303 | if (cbuf->chunks[middle].end <= doff) |
5af416bd | 304 | first = middle + 1; |
65c9e9f9 | 305 | else if (cbuf->chunks[middle].start <= doff) |
5af416bd | 306 | { |
65c9e9f9 | 307 | if (doff + sz > cbuf->chunks[middle].end) |
5af416bd | 308 | gomp_fatal ("internal libgomp cbuf error"); |
65c9e9f9 | 309 | memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), |
5af416bd | 310 | h, sz); |
311 | return; | |
312 | } | |
313 | else | |
314 | last = middle - 1; | |
315 | } | |
316 | } | |
317 | } | |
534b5e00 | 318 | if (__builtin_expect (aq != NULL, 0)) |
319 | goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, | |
320 | "dev", d, "host", h, sz, aq); | |
321 | else | |
322 | gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); | |
9b50ad1d | 323 | } |
324 | ||
534b5e00 | 325 | attribute_hidden void |
9b50ad1d | 326 | gomp_copy_dev2host (struct gomp_device_descr *devicep, |
534b5e00 | 327 | struct goacc_asyncqueue *aq, |
9b50ad1d | 328 | void *h, const void *d, size_t sz) |
329 | { | |
534b5e00 | 330 | if (__builtin_expect (aq != NULL, 0)) |
331 | goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, | |
332 | "host", h, "dev", d, sz, aq); | |
333 | else | |
334 | gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); | |
9b50ad1d | 335 | } |
336 | ||
337 | static void | |
338 | gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) | |
339 | { | |
340 | if (!devicep->free_func (devicep->target_id, devptr)) | |
341 | { | |
342 | gomp_mutex_unlock (&devicep->lock); | |
343 | gomp_fatal ("error in freeing device memory block at %p", devptr); | |
344 | } | |
345 | } | |
346 | ||
9561765e | 347 | /* Handle the case where gomp_map_lookup, splay_tree_lookup or |
348 | gomp_map_0len_lookup found oldn for newn. | |
995b27b9 | 349 | Helper function of gomp_map_vars. */ |
350 | ||
351 | static inline void | |
534b5e00 | 352 | gomp_map_vars_existing (struct gomp_device_descr *devicep, |
353 | struct goacc_asyncqueue *aq, splay_tree_key oldn, | |
43895be5 | 354 | splay_tree_key newn, struct target_var_desc *tgt_var, |
5af416bd | 355 | unsigned char kind, struct gomp_coalesce_buf *cbuf) |
995b27b9 | 356 | { |
43895be5 | 357 | tgt_var->key = oldn; |
358 | tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); | |
359 | tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); | |
360 | tgt_var->offset = newn->host_start - oldn->host_start; | |
361 | tgt_var->length = newn->host_end - newn->host_start; | |
362 | ||
ca4c3545 | 363 | if ((kind & GOMP_MAP_FLAG_FORCE) |
364 | || oldn->host_start > newn->host_start | |
995b27b9 | 365 | || oldn->host_end < newn->host_end) |
0d8c703d | 366 | { |
367 | gomp_mutex_unlock (&devicep->lock); | |
368 | gomp_fatal ("Trying to map into device [%p..%p) object when " | |
369 | "[%p..%p) is already mapped", | |
370 | (void *) newn->host_start, (void *) newn->host_end, | |
371 | (void *) oldn->host_start, (void *) oldn->host_end); | |
372 | } | |
43895be5 | 373 | |
374 | if (GOMP_MAP_ALWAYS_TO_P (kind)) | |
534b5e00 | 375 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 376 | (void *) (oldn->tgt->tgt_start + oldn->tgt_offset |
377 | + newn->host_start - oldn->host_start), | |
378 | (void *) newn->host_start, | |
5af416bd | 379 | newn->host_end - newn->host_start, cbuf); |
9b50ad1d | 380 | |
43895be5 | 381 | if (oldn->refcount != REFCOUNT_INFINITY) |
382 | oldn->refcount++; | |
995b27b9 | 383 | } |
384 | ||
ca4c3545 | 385 | static int |
43895be5 | 386 | get_kind (bool short_mapkind, void *kinds, int idx) |
ca4c3545 | 387 | { |
43895be5 | 388 | return short_mapkind ? ((unsigned short *) kinds)[idx] |
389 | : ((unsigned char *) kinds)[idx]; | |
ca4c3545 | 390 | } |
391 | ||
84f53106 | 392 | static void |
534b5e00 | 393 | gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, |
394 | uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias, | |
5af416bd | 395 | struct gomp_coalesce_buf *cbuf) |
84f53106 | 396 | { |
397 | struct gomp_device_descr *devicep = tgt->device_descr; | |
398 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
399 | struct splay_tree_key_s cur_node; | |
400 | ||
401 | cur_node.host_start = host_ptr; | |
402 | if (cur_node.host_start == (uintptr_t) NULL) | |
403 | { | |
404 | cur_node.tgt_offset = (uintptr_t) NULL; | |
534b5e00 | 405 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 406 | (void *) (tgt->tgt_start + target_offset), |
407 | (void *) &cur_node.tgt_offset, | |
5af416bd | 408 | sizeof (void *), cbuf); |
84f53106 | 409 | return; |
410 | } | |
411 | /* Add bias to the pointer value. */ | |
412 | cur_node.host_start += bias; | |
43895be5 | 413 | cur_node.host_end = cur_node.host_start; |
414 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
84f53106 | 415 | if (n == NULL) |
416 | { | |
417 | gomp_mutex_unlock (&devicep->lock); | |
418 | gomp_fatal ("Pointer target of array section wasn't mapped"); | |
419 | } | |
420 | cur_node.host_start -= n->host_start; | |
421 | cur_node.tgt_offset | |
422 | = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; | |
423 | /* At this point tgt_offset is target address of the | |
424 | array section. Now subtract bias to get what we want | |
425 | to initialize the pointer with. */ | |
426 | cur_node.tgt_offset -= bias; | |
534b5e00 | 427 | gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), |
5af416bd | 428 | (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); |
84f53106 | 429 | } |
430 | ||
43895be5 | 431 | static void |
534b5e00 | 432 | gomp_map_fields_existing (struct target_mem_desc *tgt, |
433 | struct goacc_asyncqueue *aq, splay_tree_key n, | |
43895be5 | 434 | size_t first, size_t i, void **hostaddrs, |
5af416bd | 435 | size_t *sizes, void *kinds, |
436 | struct gomp_coalesce_buf *cbuf) | |
43895be5 | 437 | { |
438 | struct gomp_device_descr *devicep = tgt->device_descr; | |
439 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
440 | struct splay_tree_key_s cur_node; | |
441 | int kind; | |
442 | const bool short_mapkind = true; | |
443 | const int typemask = short_mapkind ? 0xff : 0x7; | |
444 | ||
445 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
446 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
447 | splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); | |
448 | kind = get_kind (short_mapkind, kinds, i); | |
449 | if (n2 | |
450 | && n2->tgt == n->tgt | |
451 | && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) | |
452 | { | |
534b5e00 | 453 | gomp_map_vars_existing (devicep, aq, n2, &cur_node, |
5af416bd | 454 | &tgt->list[i], kind & typemask, cbuf); |
43895be5 | 455 | return; |
456 | } | |
457 | if (sizes[i] == 0) | |
458 | { | |
459 | if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) | |
460 | { | |
461 | cur_node.host_start--; | |
462 | n2 = splay_tree_lookup (mem_map, &cur_node); | |
463 | cur_node.host_start++; | |
464 | if (n2 | |
465 | && n2->tgt == n->tgt | |
466 | && n2->host_start - n->host_start | |
467 | == n2->tgt_offset - n->tgt_offset) | |
468 | { | |
534b5e00 | 469 | gomp_map_vars_existing (devicep, aq, n2, &cur_node, |
470 | &tgt->list[i], kind & typemask, cbuf); | |
43895be5 | 471 | return; |
472 | } | |
473 | } | |
474 | cur_node.host_end++; | |
475 | n2 = splay_tree_lookup (mem_map, &cur_node); | |
476 | cur_node.host_end--; | |
477 | if (n2 | |
478 | && n2->tgt == n->tgt | |
479 | && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) | |
480 | { | |
534b5e00 | 481 | gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
5af416bd | 482 | kind & typemask, cbuf); |
43895be5 | 483 | return; |
484 | } | |
485 | } | |
486 | gomp_mutex_unlock (&devicep->lock); | |
487 | gomp_fatal ("Trying to map into device [%p..%p) structure element when " | |
488 | "other mapped elements from the same structure weren't mapped " | |
489 | "together with it", (void *) cur_node.host_start, | |
490 | (void *) cur_node.host_end); | |
491 | } | |
492 | ||
9561765e | 493 | static inline uintptr_t |
494 | gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) | |
495 | { | |
496 | if (tgt->list[i].key != NULL) | |
497 | return tgt->list[i].key->tgt->tgt_start | |
498 | + tgt->list[i].key->tgt_offset | |
499 | + tgt->list[i].offset; | |
500 | if (tgt->list[i].offset == ~(uintptr_t) 0) | |
501 | return (uintptr_t) hostaddrs[i]; | |
502 | if (tgt->list[i].offset == ~(uintptr_t) 1) | |
503 | return 0; | |
504 | if (tgt->list[i].offset == ~(uintptr_t) 2) | |
505 | return tgt->list[i + 1].key->tgt->tgt_start | |
506 | + tgt->list[i + 1].key->tgt_offset | |
507 | + tgt->list[i + 1].offset | |
508 | + (uintptr_t) hostaddrs[i] | |
509 | - (uintptr_t) hostaddrs[i + 1]; | |
510 | return tgt->tgt_start + tgt->list[i].offset; | |
511 | } | |
512 | ||
534b5e00 | 513 | static inline __attribute__((always_inline)) struct target_mem_desc * |
514 | gomp_map_vars_internal (struct gomp_device_descr *devicep, | |
515 | struct goacc_asyncqueue *aq, size_t mapnum, | |
516 | void **hostaddrs, void **devaddrs, size_t *sizes, | |
517 | void *kinds, bool short_mapkind, | |
518 | enum gomp_map_vars_kind pragma_kind) | |
995b27b9 | 519 | { |
520 | size_t i, tgt_align, tgt_size, not_found_cnt = 0; | |
43895be5 | 521 | bool has_firstprivate = false; |
522 | const int rshift = short_mapkind ? 8 : 3; | |
523 | const int typemask = short_mapkind ? 0xff : 0x7; | |
0d8c703d | 524 | struct splay_tree_s *mem_map = &devicep->mem_map; |
995b27b9 | 525 | struct splay_tree_key_s cur_node; |
526 | struct target_mem_desc *tgt | |
527 | = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); | |
528 | tgt->list_count = mapnum; | |
43895be5 | 529 | tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; |
995b27b9 | 530 | tgt->device_descr = devicep; |
5af416bd | 531 | struct gomp_coalesce_buf cbuf, *cbufp = NULL; |
995b27b9 | 532 | |
533 | if (mapnum == 0) | |
3bdac239 | 534 | { |
535 | tgt->tgt_start = 0; | |
536 | tgt->tgt_end = 0; | |
537 | return tgt; | |
538 | } | |
995b27b9 | 539 | |
540 | tgt_align = sizeof (void *); | |
541 | tgt_size = 0; | |
5af416bd | 542 | cbuf.chunks = NULL; |
543 | cbuf.chunk_cnt = -1; | |
544 | cbuf.use_cnt = 0; | |
545 | cbuf.buf = NULL; | |
546 | if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) | |
547 | { | |
65c9e9f9 | 548 | size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); |
549 | cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); | |
5af416bd | 550 | cbuf.chunk_cnt = 0; |
551 | } | |
43895be5 | 552 | if (pragma_kind == GOMP_MAP_VARS_TARGET) |
995b27b9 | 553 | { |
554 | size_t align = 4 * sizeof (void *); | |
555 | tgt_align = align; | |
556 | tgt_size = mapnum * sizeof (void *); | |
5af416bd | 557 | cbuf.chunk_cnt = 1; |
558 | cbuf.use_cnt = 1 + (mapnum > 1); | |
65c9e9f9 | 559 | cbuf.chunks[0].start = 0; |
560 | cbuf.chunks[0].end = tgt_size; | |
995b27b9 | 561 | } |
562 | ||
0d8c703d | 563 | gomp_mutex_lock (&devicep->lock); |
f87b2900 | 564 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
565 | { | |
566 | gomp_mutex_unlock (&devicep->lock); | |
567 | free (tgt); | |
568 | return NULL; | |
569 | } | |
ca4c3545 | 570 | |
995b27b9 | 571 | for (i = 0; i < mapnum; i++) |
572 | { | |
43895be5 | 573 | int kind = get_kind (short_mapkind, kinds, i); |
574 | if (hostaddrs[i] == NULL | |
575 | || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) | |
995b27b9 | 576 | { |
43895be5 | 577 | tgt->list[i].key = NULL; |
578 | tgt->list[i].offset = ~(uintptr_t) 0; | |
579 | continue; | |
580 | } | |
581 | else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) | |
582 | { | |
583 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
584 | cur_node.host_end = cur_node.host_start; | |
585 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
586 | if (n == NULL) | |
587 | { | |
588 | gomp_mutex_unlock (&devicep->lock); | |
589 | gomp_fatal ("use_device_ptr pointer wasn't mapped"); | |
590 | } | |
591 | cur_node.host_start -= n->host_start; | |
592 | hostaddrs[i] | |
593 | = (void *) (n->tgt->tgt_start + n->tgt_offset | |
594 | + cur_node.host_start); | |
595 | tgt->list[i].key = NULL; | |
596 | tgt->list[i].offset = ~(uintptr_t) 0; | |
597 | continue; | |
598 | } | |
599 | else if ((kind & typemask) == GOMP_MAP_STRUCT) | |
600 | { | |
601 | size_t first = i + 1; | |
602 | size_t last = i + sizes[i]; | |
603 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
604 | cur_node.host_end = (uintptr_t) hostaddrs[last] | |
605 | + sizes[last]; | |
606 | tgt->list[i].key = NULL; | |
607 | tgt->list[i].offset = ~(uintptr_t) 2; | |
608 | splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); | |
609 | if (n == NULL) | |
610 | { | |
611 | size_t align = (size_t) 1 << (kind >> rshift); | |
612 | if (tgt_align < align) | |
613 | tgt_align = align; | |
5af416bd | 614 | tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; |
43895be5 | 615 | tgt_size = (tgt_size + align - 1) & ~(align - 1); |
5af416bd | 616 | tgt_size += cur_node.host_end - cur_node.host_start; |
43895be5 | 617 | not_found_cnt += last - i; |
618 | for (i = first; i <= last; i++) | |
5af416bd | 619 | { |
620 | tgt->list[i].key = NULL; | |
621 | if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) | |
622 | & typemask)) | |
623 | gomp_coalesce_buf_add (&cbuf, | |
624 | tgt_size - cur_node.host_end | |
625 | + (uintptr_t) hostaddrs[i], | |
626 | sizes[i]); | |
627 | } | |
43895be5 | 628 | i--; |
629 | continue; | |
630 | } | |
631 | for (i = first; i <= last; i++) | |
534b5e00 | 632 | gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, |
5af416bd | 633 | sizes, kinds, NULL); |
43895be5 | 634 | i--; |
995b27b9 | 635 | continue; |
636 | } | |
9561765e | 637 | else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) |
638 | { | |
639 | tgt->list[i].key = NULL; | |
640 | tgt->list[i].offset = ~(uintptr_t) 1; | |
641 | has_firstprivate = true; | |
642 | continue; | |
643 | } | |
995b27b9 | 644 | cur_node.host_start = (uintptr_t) hostaddrs[i]; |
ca4c3545 | 645 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
995b27b9 | 646 | cur_node.host_end = cur_node.host_start + sizes[i]; |
647 | else | |
648 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
43895be5 | 649 | if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) |
650 | { | |
651 | tgt->list[i].key = NULL; | |
652 | ||
653 | size_t align = (size_t) 1 << (kind >> rshift); | |
654 | if (tgt_align < align) | |
655 | tgt_align = align; | |
656 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
5af416bd | 657 | gomp_coalesce_buf_add (&cbuf, tgt_size, |
658 | cur_node.host_end - cur_node.host_start); | |
43895be5 | 659 | tgt_size += cur_node.host_end - cur_node.host_start; |
660 | has_firstprivate = true; | |
661 | continue; | |
662 | } | |
663 | splay_tree_key n; | |
664 | if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) | |
995b27b9 | 665 | { |
9561765e | 666 | n = gomp_map_0len_lookup (mem_map, &cur_node); |
43895be5 | 667 | if (!n) |
668 | { | |
669 | tgt->list[i].key = NULL; | |
670 | tgt->list[i].offset = ~(uintptr_t) 1; | |
671 | continue; | |
672 | } | |
995b27b9 | 673 | } |
43895be5 | 674 | else |
675 | n = splay_tree_lookup (mem_map, &cur_node); | |
c0998828 | 676 | if (n && n->refcount != REFCOUNT_LINK) |
534b5e00 | 677 | gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], |
5af416bd | 678 | kind & typemask, NULL); |
995b27b9 | 679 | else |
680 | { | |
43895be5 | 681 | tgt->list[i].key = NULL; |
ca4c3545 | 682 | |
683 | size_t align = (size_t) 1 << (kind >> rshift); | |
995b27b9 | 684 | not_found_cnt++; |
685 | if (tgt_align < align) | |
686 | tgt_align = align; | |
687 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
5af416bd | 688 | if (gomp_to_device_kind_p (kind & typemask)) |
689 | gomp_coalesce_buf_add (&cbuf, tgt_size, | |
690 | cur_node.host_end - cur_node.host_start); | |
995b27b9 | 691 | tgt_size += cur_node.host_end - cur_node.host_start; |
ca4c3545 | 692 | if ((kind & typemask) == GOMP_MAP_TO_PSET) |
995b27b9 | 693 | { |
694 | size_t j; | |
695 | for (j = i + 1; j < mapnum; j++) | |
43895be5 | 696 | if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) |
ca4c3545 | 697 | & typemask)) |
995b27b9 | 698 | break; |
699 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
700 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
701 | > cur_node.host_end)) | |
702 | break; | |
703 | else | |
704 | { | |
43895be5 | 705 | tgt->list[j].key = NULL; |
995b27b9 | 706 | i++; |
707 | } | |
708 | } | |
709 | } | |
710 | } | |
711 | ||
ca4c3545 | 712 | if (devaddrs) |
713 | { | |
714 | if (mapnum != 1) | |
0d8c703d | 715 | { |
716 | gomp_mutex_unlock (&devicep->lock); | |
717 | gomp_fatal ("unexpected aggregation"); | |
718 | } | |
ca4c3545 | 719 | tgt->to_free = devaddrs[0]; |
720 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
721 | tgt->tgt_end = tgt->tgt_start + sizes[0]; | |
722 | } | |
43895be5 | 723 | else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) |
995b27b9 | 724 | { |
725 | /* Allocate tgt_align aligned tgt_size block of memory. */ | |
726 | /* FIXME: Perhaps change interface to allocate properly aligned | |
727 | memory. */ | |
728 | tgt->to_free = devicep->alloc_func (devicep->target_id, | |
729 | tgt_size + tgt_align - 1); | |
9b50ad1d | 730 | if (!tgt->to_free) |
731 | { | |
732 | gomp_mutex_unlock (&devicep->lock); | |
733 | gomp_fatal ("device memory allocation fail"); | |
734 | } | |
735 | ||
995b27b9 | 736 | tgt->tgt_start = (uintptr_t) tgt->to_free; |
737 | tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); | |
738 | tgt->tgt_end = tgt->tgt_start + tgt_size; | |
5af416bd | 739 | |
740 | if (cbuf.use_cnt == 1) | |
741 | cbuf.chunk_cnt--; | |
742 | if (cbuf.chunk_cnt > 0) | |
743 | { | |
744 | cbuf.buf | |
65c9e9f9 | 745 | = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); |
5af416bd | 746 | if (cbuf.buf) |
747 | { | |
748 | cbuf.tgt = tgt; | |
749 | cbufp = &cbuf; | |
750 | } | |
751 | } | |
995b27b9 | 752 | } |
753 | else | |
754 | { | |
755 | tgt->to_free = NULL; | |
756 | tgt->tgt_start = 0; | |
757 | tgt->tgt_end = 0; | |
758 | } | |
759 | ||
760 | tgt_size = 0; | |
43895be5 | 761 | if (pragma_kind == GOMP_MAP_VARS_TARGET) |
995b27b9 | 762 | tgt_size = mapnum * sizeof (void *); |
763 | ||
764 | tgt->array = NULL; | |
43895be5 | 765 | if (not_found_cnt || has_firstprivate) |
995b27b9 | 766 | { |
43895be5 | 767 | if (not_found_cnt) |
768 | tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); | |
995b27b9 | 769 | splay_tree_node array = tgt->array; |
43895be5 | 770 | size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; |
771 | uintptr_t field_tgt_base = 0; | |
995b27b9 | 772 | |
773 | for (i = 0; i < mapnum; i++) | |
43895be5 | 774 | if (tgt->list[i].key == NULL) |
995b27b9 | 775 | { |
43895be5 | 776 | int kind = get_kind (short_mapkind, kinds, i); |
995b27b9 | 777 | if (hostaddrs[i] == NULL) |
778 | continue; | |
43895be5 | 779 | switch (kind & typemask) |
780 | { | |
781 | size_t align, len, first, last; | |
782 | splay_tree_key n; | |
783 | case GOMP_MAP_FIRSTPRIVATE: | |
784 | align = (size_t) 1 << (kind >> rshift); | |
785 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
786 | tgt->list[i].offset = tgt_size; | |
787 | len = sizes[i]; | |
534b5e00 | 788 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 789 | (void *) (tgt->tgt_start + tgt_size), |
5af416bd | 790 | (void *) hostaddrs[i], len, cbufp); |
43895be5 | 791 | tgt_size += len; |
792 | continue; | |
793 | case GOMP_MAP_FIRSTPRIVATE_INT: | |
794 | case GOMP_MAP_USE_DEVICE_PTR: | |
795 | case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: | |
796 | continue; | |
797 | case GOMP_MAP_STRUCT: | |
798 | first = i + 1; | |
799 | last = i + sizes[i]; | |
800 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
801 | cur_node.host_end = (uintptr_t) hostaddrs[last] | |
802 | + sizes[last]; | |
803 | if (tgt->list[first].key != NULL) | |
804 | continue; | |
805 | n = splay_tree_lookup (mem_map, &cur_node); | |
806 | if (n == NULL) | |
807 | { | |
808 | size_t align = (size_t) 1 << (kind >> rshift); | |
809 | tgt_size -= (uintptr_t) hostaddrs[first] | |
810 | - (uintptr_t) hostaddrs[i]; | |
811 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
812 | tgt_size += (uintptr_t) hostaddrs[first] | |
813 | - (uintptr_t) hostaddrs[i]; | |
814 | field_tgt_base = (uintptr_t) hostaddrs[first]; | |
815 | field_tgt_offset = tgt_size; | |
816 | field_tgt_clear = last; | |
817 | tgt_size += cur_node.host_end | |
818 | - (uintptr_t) hostaddrs[first]; | |
819 | continue; | |
820 | } | |
821 | for (i = first; i <= last; i++) | |
534b5e00 | 822 | gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, |
5af416bd | 823 | sizes, kinds, cbufp); |
43895be5 | 824 | i--; |
825 | continue; | |
9561765e | 826 | case GOMP_MAP_ALWAYS_POINTER: |
827 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
828 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
829 | n = splay_tree_lookup (mem_map, &cur_node); | |
830 | if (n == NULL | |
831 | || n->host_start > cur_node.host_start | |
832 | || n->host_end < cur_node.host_end) | |
833 | { | |
834 | gomp_mutex_unlock (&devicep->lock); | |
835 | gomp_fatal ("always pointer not mapped"); | |
836 | } | |
837 | if ((get_kind (short_mapkind, kinds, i - 1) & typemask) | |
838 | != GOMP_MAP_ALWAYS_POINTER) | |
839 | cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); | |
840 | if (cur_node.tgt_offset) | |
841 | cur_node.tgt_offset -= sizes[i]; | |
534b5e00 | 842 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 843 | (void *) (n->tgt->tgt_start |
844 | + n->tgt_offset | |
845 | + cur_node.host_start | |
846 | - n->host_start), | |
847 | (void *) &cur_node.tgt_offset, | |
5af416bd | 848 | sizeof (void *), cbufp); |
9561765e | 849 | cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset |
850 | + cur_node.host_start - n->host_start; | |
851 | continue; | |
43895be5 | 852 | default: |
853 | break; | |
854 | } | |
995b27b9 | 855 | splay_tree_key k = &array->key; |
856 | k->host_start = (uintptr_t) hostaddrs[i]; | |
ca4c3545 | 857 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
995b27b9 | 858 | k->host_end = k->host_start + sizes[i]; |
859 | else | |
860 | k->host_end = k->host_start + sizeof (void *); | |
0d8c703d | 861 | splay_tree_key n = splay_tree_lookup (mem_map, k); |
c0998828 | 862 | if (n && n->refcount != REFCOUNT_LINK) |
534b5e00 | 863 | gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], |
5af416bd | 864 | kind & typemask, cbufp); |
995b27b9 | 865 | else |
866 | { | |
c0998828 | 867 | k->link_key = NULL; |
868 | if (n && n->refcount == REFCOUNT_LINK) | |
869 | { | |
870 | /* Replace target address of the pointer with target address | |
871 | of mapped object in the splay tree. */ | |
872 | splay_tree_remove (mem_map, n); | |
873 | k->link_key = n; | |
874 | } | |
ca4c3545 | 875 | size_t align = (size_t) 1 << (kind >> rshift); |
43895be5 | 876 | tgt->list[i].key = k; |
995b27b9 | 877 | k->tgt = tgt; |
43895be5 | 878 | if (field_tgt_clear != ~(size_t) 0) |
879 | { | |
880 | k->tgt_offset = k->host_start - field_tgt_base | |
881 | + field_tgt_offset; | |
882 | if (i == field_tgt_clear) | |
883 | field_tgt_clear = ~(size_t) 0; | |
884 | } | |
885 | else | |
886 | { | |
887 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
888 | k->tgt_offset = tgt_size; | |
889 | tgt_size += k->host_end - k->host_start; | |
890 | } | |
891 | tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); | |
892 | tgt->list[i].always_copy_from | |
893 | = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); | |
894 | tgt->list[i].offset = 0; | |
895 | tgt->list[i].length = k->host_end - k->host_start; | |
995b27b9 | 896 | k->refcount = 1; |
737cc978 | 897 | k->dynamic_refcount = 0; |
995b27b9 | 898 | tgt->refcount++; |
899 | array->left = NULL; | |
900 | array->right = NULL; | |
0d8c703d | 901 | splay_tree_insert (mem_map, array); |
ca4c3545 | 902 | switch (kind & typemask) |
995b27b9 | 903 | { |
ca4c3545 | 904 | case GOMP_MAP_ALLOC: |
905 | case GOMP_MAP_FROM: | |
906 | case GOMP_MAP_FORCE_ALLOC: | |
907 | case GOMP_MAP_FORCE_FROM: | |
43895be5 | 908 | case GOMP_MAP_ALWAYS_FROM: |
995b27b9 | 909 | break; |
ca4c3545 | 910 | case GOMP_MAP_TO: |
911 | case GOMP_MAP_TOFROM: | |
912 | case GOMP_MAP_FORCE_TO: | |
913 | case GOMP_MAP_FORCE_TOFROM: | |
43895be5 | 914 | case GOMP_MAP_ALWAYS_TO: |
915 | case GOMP_MAP_ALWAYS_TOFROM: | |
534b5e00 | 916 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 917 | (void *) (tgt->tgt_start |
918 | + k->tgt_offset), | |
919 | (void *) k->host_start, | |
5af416bd | 920 | k->host_end - k->host_start, cbufp); |
995b27b9 | 921 | break; |
ca4c3545 | 922 | case GOMP_MAP_POINTER: |
534b5e00 | 923 | gomp_map_pointer (tgt, aq, |
924 | (uintptr_t) *(void **) k->host_start, | |
5af416bd | 925 | k->tgt_offset, sizes[i], cbufp); |
995b27b9 | 926 | break; |
ca4c3545 | 927 | case GOMP_MAP_TO_PSET: |
534b5e00 | 928 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 929 | (void *) (tgt->tgt_start |
930 | + k->tgt_offset), | |
931 | (void *) k->host_start, | |
5af416bd | 932 | k->host_end - k->host_start, cbufp); |
ca4c3545 | 933 | |
995b27b9 | 934 | for (j = i + 1; j < mapnum; j++) |
43895be5 | 935 | if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, |
936 | j) | |
ca4c3545 | 937 | & typemask)) |
995b27b9 | 938 | break; |
939 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
940 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
941 | > k->host_end)) | |
942 | break; | |
943 | else | |
944 | { | |
43895be5 | 945 | tgt->list[j].key = k; |
946 | tgt->list[j].copy_from = false; | |
947 | tgt->list[j].always_copy_from = false; | |
948 | if (k->refcount != REFCOUNT_INFINITY) | |
949 | k->refcount++; | |
534b5e00 | 950 | gomp_map_pointer (tgt, aq, |
84f53106 | 951 | (uintptr_t) *(void **) hostaddrs[j], |
952 | k->tgt_offset | |
953 | + ((uintptr_t) hostaddrs[j] | |
954 | - k->host_start), | |
5af416bd | 955 | sizes[j], cbufp); |
995b27b9 | 956 | i++; |
957 | } | |
ca4c3545 | 958 | break; |
959 | case GOMP_MAP_FORCE_PRESENT: | |
960 | { | |
961 | /* We already looked up the memory region above and it | |
962 | was missing. */ | |
963 | size_t size = k->host_end - k->host_start; | |
0d8c703d | 964 | gomp_mutex_unlock (&devicep->lock); |
2634aed9 | 965 | #ifdef HAVE_INTTYPES_H |
966 | gomp_fatal ("present clause: !acc_is_present (%p, " | |
967 | "%"PRIu64" (0x%"PRIx64"))", | |
968 | (void *) k->host_start, | |
969 | (uint64_t) size, (uint64_t) size); | |
970 | #else | |
ca4c3545 | 971 | gomp_fatal ("present clause: !acc_is_present (%p, " |
2634aed9 | 972 | "%lu (0x%lx))", (void *) k->host_start, |
973 | (unsigned long) size, (unsigned long) size); | |
974 | #endif | |
ca4c3545 | 975 | } |
976 | break; | |
977 | case GOMP_MAP_FORCE_DEVICEPTR: | |
978 | assert (k->host_end - k->host_start == sizeof (void *)); | |
534b5e00 | 979 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 980 | (void *) (tgt->tgt_start |
981 | + k->tgt_offset), | |
982 | (void *) k->host_start, | |
5af416bd | 983 | sizeof (void *), cbufp); |
ca4c3545 | 984 | break; |
985 | default: | |
0d8c703d | 986 | gomp_mutex_unlock (&devicep->lock); |
ca4c3545 | 987 | gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, |
988 | kind); | |
995b27b9 | 989 | } |
c0998828 | 990 | |
991 | if (k->link_key) | |
992 | { | |
993 | /* Set link pointer on target to the device address of the | |
994 | mapped object. */ | |
995 | void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); | |
9d5a0b9d | 996 | /* We intentionally do not use coalescing here, as it's not |
997 | data allocated by the current call to this function. */ | |
534b5e00 | 998 | gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, |
9d5a0b9d | 999 | &tgt_addr, sizeof (void *), NULL); |
c0998828 | 1000 | } |
995b27b9 | 1001 | array++; |
1002 | } | |
1003 | } | |
1004 | } | |
ca4c3545 | 1005 | |
43895be5 | 1006 | if (pragma_kind == GOMP_MAP_VARS_TARGET) |
995b27b9 | 1007 | { |
1008 | for (i = 0; i < mapnum; i++) | |
1009 | { | |
9561765e | 1010 | cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); |
534b5e00 | 1011 | gomp_copy_host2dev (devicep, aq, |
9b50ad1d | 1012 | (void *) (tgt->tgt_start + i * sizeof (void *)), |
5af416bd | 1013 | (void *) &cur_node.tgt_offset, sizeof (void *), |
1014 | cbufp); | |
995b27b9 | 1015 | } |
1016 | } | |
1017 | ||
5af416bd | 1018 | if (cbufp) |
1019 | { | |
1020 | long c = 0; | |
1021 | for (c = 0; c < cbuf.chunk_cnt; ++c) | |
534b5e00 | 1022 | gomp_copy_host2dev (devicep, aq, |
65c9e9f9 | 1023 | (void *) (tgt->tgt_start + cbuf.chunks[c].start), |
1024 | (char *) cbuf.buf + (cbuf.chunks[c].start | |
1025 | - cbuf.chunks[0].start), | |
1026 | cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); | |
5af416bd | 1027 | free (cbuf.buf); |
65c9e9f9 | 1028 | cbuf.buf = NULL; |
1029 | cbufp = NULL; | |
5af416bd | 1030 | } |
1031 | ||
43895be5 | 1032 | /* If the variable from "omp target enter data" map-list was already mapped, |
1033 | tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or | |
1034 | gomp_exit_data. */ | |
1035 | if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) | |
1036 | { | |
1037 | free (tgt); | |
1038 | tgt = NULL; | |
1039 | } | |
1040 | ||
0d8c703d | 1041 | gomp_mutex_unlock (&devicep->lock); |
995b27b9 | 1042 | return tgt; |
1043 | } | |
1044 | ||
534b5e00 | 1045 | attribute_hidden struct target_mem_desc * |
1046 | gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, | |
1047 | void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, | |
1048 | bool short_mapkind, enum gomp_map_vars_kind pragma_kind) | |
1049 | { | |
1050 | return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, | |
1051 | sizes, kinds, short_mapkind, pragma_kind); | |
1052 | } | |
1053 | ||
1054 | attribute_hidden struct target_mem_desc * | |
1055 | gomp_map_vars_async (struct gomp_device_descr *devicep, | |
1056 | struct goacc_asyncqueue *aq, size_t mapnum, | |
1057 | void **hostaddrs, void **devaddrs, size_t *sizes, | |
1058 | void *kinds, bool short_mapkind, | |
1059 | enum gomp_map_vars_kind pragma_kind) | |
1060 | { | |
1061 | return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, | |
1062 | sizes, kinds, short_mapkind, pragma_kind); | |
1063 | } | |
1064 | ||
1065 | attribute_hidden void | |
995b27b9 | 1066 | gomp_unmap_tgt (struct target_mem_desc *tgt) |
1067 | { | |
1068 | /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ | |
1069 | if (tgt->tgt_end) | |
9b50ad1d | 1070 | gomp_free_device_memory (tgt->device_descr, tgt->to_free); |
995b27b9 | 1071 | |
1072 | free (tgt->array); | |
1073 | free (tgt); | |
1074 | } | |
1075 | ||
737cc978 | 1076 | attribute_hidden bool |
1077 | gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) | |
1078 | { | |
1079 | bool is_tgt_unmapped = false; | |
1080 | splay_tree_remove (&devicep->mem_map, k); | |
1081 | if (k->link_key) | |
1082 | splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key); | |
1083 | if (k->tgt->refcount > 1) | |
1084 | k->tgt->refcount--; | |
1085 | else | |
1086 | { | |
1087 | is_tgt_unmapped = true; | |
1088 | gomp_unmap_tgt (k->tgt); | |
1089 | } | |
1090 | return is_tgt_unmapped; | |
1091 | } | |
1092 | ||
534b5e00 | 1093 | static void |
1094 | gomp_unref_tgt (void *ptr) | |
1095 | { | |
1096 | struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; | |
1097 | ||
1098 | if (tgt->refcount > 1) | |
1099 | tgt->refcount--; | |
1100 | else | |
1101 | gomp_unmap_tgt (tgt); | |
1102 | } | |
1103 | ||
ca4c3545 | 1104 | /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant |
1105 | variables back from device to host: if it is false, it is assumed that this | |
2bf775c0 | 1106 | has been done already. */ |
ca4c3545 | 1107 | |
534b5e00 | 1108 | static inline __attribute__((always_inline)) void |
1109 | gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, | |
1110 | struct goacc_asyncqueue *aq) | |
995b27b9 | 1111 | { |
1112 | struct gomp_device_descr *devicep = tgt->device_descr; | |
1113 | ||
1114 | if (tgt->list_count == 0) | |
1115 | { | |
1116 | free (tgt); | |
1117 | return; | |
1118 | } | |
1119 | ||
0d8c703d | 1120 | gomp_mutex_lock (&devicep->lock); |
f87b2900 | 1121 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
1122 | { | |
1123 | gomp_mutex_unlock (&devicep->lock); | |
1124 | free (tgt->array); | |
1125 | free (tgt); | |
1126 | return; | |
1127 | } | |
ca4c3545 | 1128 | |
995b27b9 | 1129 | size_t i; |
995b27b9 | 1130 | for (i = 0; i < tgt->list_count; i++) |
43895be5 | 1131 | { |
1132 | splay_tree_key k = tgt->list[i].key; | |
1133 | if (k == NULL) | |
1134 | continue; | |
1135 | ||
1136 | bool do_unmap = false; | |
1137 | if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) | |
1138 | k->refcount--; | |
1139 | else if (k->refcount == 1) | |
1140 | { | |
2bf775c0 | 1141 | k->refcount--; |
1142 | do_unmap = true; | |
43895be5 | 1143 | } |
1144 | ||
1145 | if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) | |
1146 | || tgt->list[i].always_copy_from) | |
534b5e00 | 1147 | gomp_copy_dev2host (devicep, aq, |
9b50ad1d | 1148 | (void *) (k->host_start + tgt->list[i].offset), |
1149 | (void *) (k->tgt->tgt_start + k->tgt_offset | |
1150 | + tgt->list[i].offset), | |
1151 | tgt->list[i].length); | |
43895be5 | 1152 | if (do_unmap) |
737cc978 | 1153 | gomp_remove_var (devicep, k); |
43895be5 | 1154 | } |
995b27b9 | 1155 | |
534b5e00 | 1156 | if (aq) |
1157 | devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt, | |
1158 | (void *) tgt); | |
995b27b9 | 1159 | else |
534b5e00 | 1160 | gomp_unref_tgt ((void *) tgt); |
ca4c3545 | 1161 | |
0d8c703d | 1162 | gomp_mutex_unlock (&devicep->lock); |
995b27b9 | 1163 | } |
1164 | ||
534b5e00 | 1165 | attribute_hidden void |
1166 | gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) | |
1167 | { | |
1168 | gomp_unmap_vars_internal (tgt, do_copyfrom, NULL); | |
1169 | } | |
1170 | ||
1171 | attribute_hidden void | |
1172 | gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, | |
1173 | struct goacc_asyncqueue *aq) | |
1174 | { | |
1175 | gomp_unmap_vars_internal (tgt, do_copyfrom, aq); | |
1176 | } | |
1177 | ||
995b27b9 | 1178 | static void |
0d8c703d | 1179 | gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, |
43895be5 | 1180 | size_t *sizes, void *kinds, bool short_mapkind) |
995b27b9 | 1181 | { |
1182 | size_t i; | |
1183 | struct splay_tree_key_s cur_node; | |
43895be5 | 1184 | const int typemask = short_mapkind ? 0xff : 0x7; |
995b27b9 | 1185 | |
1186 | if (!devicep) | |
1187 | return; | |
1188 | ||
1189 | if (mapnum == 0) | |
1190 | return; | |
1191 | ||
0d8c703d | 1192 | gomp_mutex_lock (&devicep->lock); |
f87b2900 | 1193 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
1194 | { | |
1195 | gomp_mutex_unlock (&devicep->lock); | |
1196 | return; | |
1197 | } | |
1198 | ||
995b27b9 | 1199 | for (i = 0; i < mapnum; i++) |
1200 | if (sizes[i]) | |
1201 | { | |
1202 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1203 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
0d8c703d | 1204 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); |
995b27b9 | 1205 | if (n) |
1206 | { | |
43895be5 | 1207 | int kind = get_kind (short_mapkind, kinds, i); |
995b27b9 | 1208 | if (n->host_start > cur_node.host_start |
1209 | || n->host_end < cur_node.host_end) | |
0d8c703d | 1210 | { |
1211 | gomp_mutex_unlock (&devicep->lock); | |
1212 | gomp_fatal ("Trying to update [%p..%p) object when " | |
1213 | "only [%p..%p) is mapped", | |
1214 | (void *) cur_node.host_start, | |
1215 | (void *) cur_node.host_end, | |
1216 | (void *) n->host_start, | |
1217 | (void *) n->host_end); | |
1218 | } | |
9b50ad1d | 1219 | |
1220 | ||
1221 | void *hostaddr = (void *) cur_node.host_start; | |
1222 | void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset | |
1223 | + cur_node.host_start - n->host_start); | |
1224 | size_t size = cur_node.host_end - cur_node.host_start; | |
1225 | ||
ca4c3545 | 1226 | if (GOMP_MAP_COPY_TO_P (kind & typemask)) |
534b5e00 | 1227 | gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, |
1228 | NULL); | |
ca4c3545 | 1229 | if (GOMP_MAP_COPY_FROM_P (kind & typemask)) |
534b5e00 | 1230 | gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); |
995b27b9 | 1231 | } |
995b27b9 | 1232 | } |
0d8c703d | 1233 | gomp_mutex_unlock (&devicep->lock); |
1234 | } | |
1235 | ||
1236 | /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. | |
1237 | And insert to splay tree the mapping between addresses from HOST_TABLE and | |
7de5731e | 1238 | from loaded target image. We rely in the host and device compiler |
1239 | emitting variable and functions in the same order. */ | |
0d8c703d | 1240 | |
1241 | static void | |
d3d8e632 | 1242 | gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, |
7de5731e | 1243 | const void *host_table, const void *target_data, |
1244 | bool is_register_lock) | |
0d8c703d | 1245 | { |
1246 | void **host_func_table = ((void ***) host_table)[0]; | |
1247 | void **host_funcs_end = ((void ***) host_table)[1]; | |
1248 | void **host_var_table = ((void ***) host_table)[2]; | |
1249 | void **host_vars_end = ((void ***) host_table)[3]; | |
1250 | ||
1251 | /* The func table contains only addresses, the var table contains addresses | |
1252 | and corresponding sizes. */ | |
1253 | int num_funcs = host_funcs_end - host_func_table; | |
1254 | int num_vars = (host_vars_end - host_var_table) / 2; | |
1255 | ||
1256 | /* Load image to device and get target addresses for the image. */ | |
1257 | struct addr_pair *target_table = NULL; | |
d3d8e632 | 1258 | int i, num_target_entries; |
1259 | ||
1260 | num_target_entries | |
1261 | = devicep->load_image_func (devicep->target_id, version, | |
1262 | target_data, &target_table); | |
0d8c703d | 1263 | |
1264 | if (num_target_entries != num_funcs + num_vars) | |
1265 | { | |
1266 | gomp_mutex_unlock (&devicep->lock); | |
1267 | if (is_register_lock) | |
1268 | gomp_mutex_unlock (®ister_lock); | |
d3d8e632 | 1269 | gomp_fatal ("Cannot map target functions or variables" |
1270 | " (expected %u, have %u)", num_funcs + num_vars, | |
1271 | num_target_entries); | |
0d8c703d | 1272 | } |
1273 | ||
1274 | /* Insert host-target address mapping into splay tree. */ | |
1275 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
1276 | tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); | |
43895be5 | 1277 | tgt->refcount = REFCOUNT_INFINITY; |
0d8c703d | 1278 | tgt->tgt_start = 0; |
1279 | tgt->tgt_end = 0; | |
1280 | tgt->to_free = NULL; | |
1281 | tgt->prev = NULL; | |
1282 | tgt->list_count = 0; | |
1283 | tgt->device_descr = devicep; | |
1284 | splay_tree_node array = tgt->array; | |
1285 | ||
1286 | for (i = 0; i < num_funcs; i++) | |
1287 | { | |
1288 | splay_tree_key k = &array->key; | |
1289 | k->host_start = (uintptr_t) host_func_table[i]; | |
1290 | k->host_end = k->host_start + 1; | |
1291 | k->tgt = tgt; | |
1292 | k->tgt_offset = target_table[i].start; | |
43895be5 | 1293 | k->refcount = REFCOUNT_INFINITY; |
c0998828 | 1294 | k->link_key = NULL; |
0d8c703d | 1295 | array->left = NULL; |
1296 | array->right = NULL; | |
1297 | splay_tree_insert (&devicep->mem_map, array); | |
1298 | array++; | |
1299 | } | |
1300 | ||
c0998828 | 1301 | /* Most significant bit of the size in host and target tables marks |
1302 | "omp declare target link" variables. */ | |
1303 | const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); | |
1304 | const uintptr_t size_mask = ~link_bit; | |
1305 | ||
0d8c703d | 1306 | for (i = 0; i < num_vars; i++) |
1307 | { | |
1308 | struct addr_pair *target_var = &target_table[num_funcs + i]; | |
c0998828 | 1309 | uintptr_t target_size = target_var->end - target_var->start; |
1310 | ||
1311 | if ((uintptr_t) host_var_table[i * 2 + 1] != target_size) | |
0d8c703d | 1312 | { |
1313 | gomp_mutex_unlock (&devicep->lock); | |
1314 | if (is_register_lock) | |
1315 | gomp_mutex_unlock (®ister_lock); | |
c0998828 | 1316 | gomp_fatal ("Cannot map target variables (size mismatch)"); |
0d8c703d | 1317 | } |
1318 | ||
1319 | splay_tree_key k = &array->key; | |
1320 | k->host_start = (uintptr_t) host_var_table[i * 2]; | |
c0998828 | 1321 | k->host_end |
1322 | = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); | |
0d8c703d | 1323 | k->tgt = tgt; |
1324 | k->tgt_offset = target_var->start; | |
c0998828 | 1325 | k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; |
c0998828 | 1326 | k->link_key = NULL; |
0d8c703d | 1327 | array->left = NULL; |
1328 | array->right = NULL; | |
1329 | splay_tree_insert (&devicep->mem_map, array); | |
1330 | array++; | |
1331 | } | |
1332 | ||
1333 | free (target_table); | |
995b27b9 | 1334 | } |
1335 | ||
7de5731e | 1336 | /* Unload the mappings described by target_data from device DEVICE_P. |
1337 | The device must be locked. */ | |
1338 | ||
1339 | static void | |
1340 | gomp_unload_image_from_device (struct gomp_device_descr *devicep, | |
d3d8e632 | 1341 | unsigned version, |
7de5731e | 1342 | const void *host_table, const void *target_data) |
1343 | { | |
1344 | void **host_func_table = ((void ***) host_table)[0]; | |
1345 | void **host_funcs_end = ((void ***) host_table)[1]; | |
1346 | void **host_var_table = ((void ***) host_table)[2]; | |
1347 | void **host_vars_end = ((void ***) host_table)[3]; | |
1348 | ||
1349 | /* The func table contains only addresses, the var table contains addresses | |
1350 | and corresponding sizes. */ | |
1351 | int num_funcs = host_funcs_end - host_func_table; | |
1352 | int num_vars = (host_vars_end - host_var_table) / 2; | |
1353 | ||
7de5731e | 1354 | struct splay_tree_key_s k; |
1355 | splay_tree_key node = NULL; | |
1356 | ||
1357 | /* Find mapping at start of node array */ | |
1358 | if (num_funcs || num_vars) | |
1359 | { | |
1360 | k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] | |
1361 | : (uintptr_t) host_var_table[0]); | |
1362 | k.host_end = k.host_start + 1; | |
1363 | node = splay_tree_lookup (&devicep->mem_map, &k); | |
1364 | } | |
d3d8e632 | 1365 | |
9b50ad1d | 1366 | if (!devicep->unload_image_func (devicep->target_id, version, target_data)) |
1367 | { | |
1368 | gomp_mutex_unlock (&devicep->lock); | |
1369 | gomp_fatal ("image unload fail"); | |
1370 | } | |
7de5731e | 1371 | |
1372 | /* Remove mappings from splay tree. */ | |
c0998828 | 1373 | int i; |
1374 | for (i = 0; i < num_funcs; i++) | |
7de5731e | 1375 | { |
c0998828 | 1376 | k.host_start = (uintptr_t) host_func_table[i]; |
7de5731e | 1377 | k.host_end = k.host_start + 1; |
1378 | splay_tree_remove (&devicep->mem_map, &k); | |
1379 | } | |
1380 | ||
c0998828 | 1381 | /* Most significant bit of the size in host and target tables marks |
1382 | "omp declare target link" variables. */ | |
1383 | const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); | |
1384 | const uintptr_t size_mask = ~link_bit; | |
1385 | bool is_tgt_unmapped = false; | |
1386 | ||
1387 | for (i = 0; i < num_vars; i++) | |
7de5731e | 1388 | { |
c0998828 | 1389 | k.host_start = (uintptr_t) host_var_table[i * 2]; |
1390 | k.host_end | |
1391 | = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); | |
1392 | ||
1393 | if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) | |
1394 | splay_tree_remove (&devicep->mem_map, &k); | |
1395 | else | |
1396 | { | |
1397 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); | |
737cc978 | 1398 | is_tgt_unmapped = gomp_remove_var (devicep, n); |
c0998828 | 1399 | } |
7de5731e | 1400 | } |
1401 | ||
c0998828 | 1402 | if (node && !is_tgt_unmapped) |
7de5731e | 1403 | { |
1404 | free (node->tgt); | |
1405 | free (node); | |
1406 | } | |
1407 | } | |
1408 | ||
0d8c703d | 1409 | /* This function should be called from every offload image while loading. |
995b27b9 | 1410 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
1411 | the target, and TARGET_DATA needed by target plugin. */ | |
1412 | ||
1413 | void | |
d3d8e632 | 1414 | GOMP_offload_register_ver (unsigned version, const void *host_table, |
1415 | int target_type, const void *target_data) | |
995b27b9 | 1416 | { |
0d8c703d | 1417 | int i; |
d3d8e632 | 1418 | |
1419 | if (GOMP_VERSION_LIB (version) > GOMP_VERSION) | |
1420 | gomp_fatal ("Library too old for offload (version %u < %u)", | |
1421 | GOMP_VERSION, GOMP_VERSION_LIB (version)); | |
1422 | ||
0d8c703d | 1423 | gomp_mutex_lock (®ister_lock); |
1424 | ||
1425 | /* Load image to all initialized devices. */ | |
1426 | for (i = 0; i < num_devices; i++) | |
1427 | { | |
1428 | struct gomp_device_descr *devicep = &devices[i]; | |
1429 | gomp_mutex_lock (&devicep->lock); | |
f87b2900 | 1430 | if (devicep->type == target_type |
1431 | && devicep->state == GOMP_DEVICE_INITIALIZED) | |
d3d8e632 | 1432 | gomp_load_image_to_device (devicep, version, |
1433 | host_table, target_data, true); | |
0d8c703d | 1434 | gomp_mutex_unlock (&devicep->lock); |
1435 | } | |
995b27b9 | 1436 | |
0d8c703d | 1437 | /* Insert image to array of pending images. */ |
1438 | offload_images | |
1439 | = gomp_realloc_unlock (offload_images, | |
1440 | (num_offload_images + 1) | |
1441 | * sizeof (struct offload_image_descr)); | |
d3d8e632 | 1442 | offload_images[num_offload_images].version = version; |
995b27b9 | 1443 | offload_images[num_offload_images].type = target_type; |
1444 | offload_images[num_offload_images].host_table = host_table; | |
1445 | offload_images[num_offload_images].target_data = target_data; | |
1446 | ||
1447 | num_offload_images++; | |
0d8c703d | 1448 | gomp_mutex_unlock (®ister_lock); |
995b27b9 | 1449 | } |
1450 | ||
d3d8e632 | 1451 | void |
1452 | GOMP_offload_register (const void *host_table, int target_type, | |
1453 | const void *target_data) | |
1454 | { | |
1455 | GOMP_offload_register_ver (0, host_table, target_type, target_data); | |
1456 | } | |
1457 | ||
0d8c703d | 1458 | /* This function should be called from every offload image while unloading. |
1459 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of | |
1460 | the target, and TARGET_DATA needed by target plugin. */ | |
995b27b9 | 1461 | |
0d8c703d | 1462 | void |
d3d8e632 | 1463 | GOMP_offload_unregister_ver (unsigned version, const void *host_table, |
1464 | int target_type, const void *target_data) | |
995b27b9 | 1465 | { |
0d8c703d | 1466 | int i; |
1467 | ||
0d8c703d | 1468 | gomp_mutex_lock (®ister_lock); |
1469 | ||
1470 | /* Unload image from all initialized devices. */ | |
1471 | for (i = 0; i < num_devices; i++) | |
1472 | { | |
0d8c703d | 1473 | struct gomp_device_descr *devicep = &devices[i]; |
1474 | gomp_mutex_lock (&devicep->lock); | |
f87b2900 | 1475 | if (devicep->type == target_type |
1476 | && devicep->state == GOMP_DEVICE_INITIALIZED) | |
d3d8e632 | 1477 | gomp_unload_image_from_device (devicep, version, |
1478 | host_table, target_data); | |
0d8c703d | 1479 | gomp_mutex_unlock (&devicep->lock); |
1480 | } | |
1481 | ||
1482 | /* Remove image from array of pending images. */ | |
1483 | for (i = 0; i < num_offload_images; i++) | |
1484 | if (offload_images[i].target_data == target_data) | |
1485 | { | |
1486 | offload_images[i] = offload_images[--num_offload_images]; | |
1487 | break; | |
1488 | } | |
1489 | ||
1490 | gomp_mutex_unlock (®ister_lock); | |
ca4c3545 | 1491 | } |
995b27b9 | 1492 | |
d3d8e632 | 1493 | void |
1494 | GOMP_offload_unregister (const void *host_table, int target_type, | |
1495 | const void *target_data) | |
1496 | { | |
1497 | GOMP_offload_unregister_ver (0, host_table, target_type, target_data); | |
1498 | } | |
1499 | ||
0d8c703d | 1500 | /* This function initializes the target device, specified by DEVICEP. DEVICEP |
1501 | must be locked on entry, and remains locked on return. */ | |
ca4c3545 | 1502 | |
1503 | attribute_hidden void | |
0d8c703d | 1504 | gomp_init_device (struct gomp_device_descr *devicep) |
ca4c3545 | 1505 | { |
995b27b9 | 1506 | int i; |
9b50ad1d | 1507 | if (!devicep->init_device_func (devicep->target_id)) |
1508 | { | |
1509 | gomp_mutex_unlock (&devicep->lock); | |
1510 | gomp_fatal ("device initialization failed"); | |
1511 | } | |
0d8c703d | 1512 | |
1513 | /* Load to device all images registered by the moment. */ | |
1514 | for (i = 0; i < num_offload_images; i++) | |
995b27b9 | 1515 | { |
0d8c703d | 1516 | struct offload_image_descr *image = &offload_images[i]; |
1517 | if (image->type == devicep->type) | |
d3d8e632 | 1518 | gomp_load_image_to_device (devicep, image->version, |
1519 | image->host_table, image->target_data, | |
1520 | false); | |
995b27b9 | 1521 | } |
1522 | ||
534b5e00 | 1523 | /* Initialize OpenACC asynchronous queues. */ |
1524 | goacc_init_asyncqueues (devicep); | |
1525 | ||
f87b2900 | 1526 | devicep->state = GOMP_DEVICE_INITIALIZED; |
ca4c3545 | 1527 | } |
1528 | ||
534b5e00 | 1529 | /* This function finalizes the target device, specified by DEVICEP. DEVICEP |
1530 | must be locked on entry, and remains locked on return. */ | |
1531 | ||
1532 | attribute_hidden bool | |
1533 | gomp_fini_device (struct gomp_device_descr *devicep) | |
1534 | { | |
1535 | bool ret = goacc_fini_asyncqueues (devicep); | |
1536 | ret &= devicep->fini_device_func (devicep->target_id); | |
1537 | devicep->state = GOMP_DEVICE_FINALIZED; | |
1538 | return ret; | |
1539 | } | |
1540 | ||
7de5731e | 1541 | attribute_hidden void |
1542 | gomp_unload_device (struct gomp_device_descr *devicep) | |
1543 | { | |
f87b2900 | 1544 | if (devicep->state == GOMP_DEVICE_INITIALIZED) |
7de5731e | 1545 | { |
1546 | unsigned i; | |
1547 | ||
1548 | /* Unload from device all images registered at the moment. */ | |
1549 | for (i = 0; i < num_offload_images; i++) | |
1550 | { | |
1551 | struct offload_image_descr *image = &offload_images[i]; | |
1552 | if (image->type == devicep->type) | |
d3d8e632 | 1553 | gomp_unload_image_from_device (devicep, image->version, |
1554 | image->host_table, | |
7de5731e | 1555 | image->target_data); |
1556 | } | |
1557 | } | |
1558 | } | |
1559 | ||
ca4c3545 | 1560 | /* Free address mapping tables. MM must be locked on entry, and remains locked |
1561 | on return. */ | |
1562 | ||
1563 | attribute_hidden void | |
0d8c703d | 1564 | gomp_free_memmap (struct splay_tree_s *mem_map) |
ca4c3545 | 1565 | { |
0d8c703d | 1566 | while (mem_map->root) |
ca4c3545 | 1567 | { |
0d8c703d | 1568 | struct target_mem_desc *tgt = mem_map->root->key.tgt; |
ca4c3545 | 1569 | |
0d8c703d | 1570 | splay_tree_remove (mem_map, &mem_map->root->key); |
ca4c3545 | 1571 | free (tgt->array); |
1572 | free (tgt); | |
1573 | } | |
ca4c3545 | 1574 | } |
1575 | ||
9561765e | 1576 | /* Host fallback for GOMP_target{,_ext} routines. */ |
43895be5 | 1577 | |
1578 | static void | |
1579 | gomp_target_fallback (void (*fn) (void *), void **hostaddrs) | |
1580 | { | |
1581 | struct gomp_thread old_thr, *thr = gomp_thread (); | |
1582 | old_thr = *thr; | |
1583 | memset (thr, '\0', sizeof (*thr)); | |
1584 | if (gomp_places_list) | |
1585 | { | |
1586 | thr->place = old_thr.place; | |
1587 | thr->ts.place_partition_len = gomp_places_list_len; | |
1588 | } | |
1589 | fn (hostaddrs); | |
1590 | gomp_free_thread (thr); | |
1591 | *thr = old_thr; | |
1592 | } | |
1593 | ||
56686608 | 1594 | /* Calculate alignment and size requirements of a private copy of data shared |
1595 | as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ | |
9561765e | 1596 | |
56686608 | 1597 | static inline void |
1598 | calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, | |
1599 | unsigned short *kinds, size_t *tgt_align, | |
1600 | size_t *tgt_size) | |
9561765e | 1601 | { |
56686608 | 1602 | size_t i; |
1603 | for (i = 0; i < mapnum; i++) | |
1604 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
1605 | { | |
1606 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
1607 | if (*tgt_align < align) | |
1608 | *tgt_align = align; | |
1609 | *tgt_size = (*tgt_size + align - 1) & ~(align - 1); | |
1610 | *tgt_size += sizes[i]; | |
1611 | } | |
1612 | } | |
1613 | ||
1614 | /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ | |
1615 | ||
1616 | static inline void | |
1617 | copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, | |
1618 | size_t *sizes, unsigned short *kinds, size_t tgt_align, | |
1619 | size_t tgt_size) | |
1620 | { | |
1621 | uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); | |
1622 | if (al) | |
1623 | tgt += tgt_align - al; | |
1624 | tgt_size = 0; | |
1625 | size_t i; | |
9561765e | 1626 | for (i = 0; i < mapnum; i++) |
1627 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
1628 | { | |
1629 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
9561765e | 1630 | tgt_size = (tgt_size + align - 1) & ~(align - 1); |
56686608 | 1631 | memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); |
1632 | hostaddrs[i] = tgt + tgt_size; | |
1633 | tgt_size = tgt_size + sizes[i]; | |
9561765e | 1634 | } |
56686608 | 1635 | } |
1636 | ||
9561765e | 1637 | /* Helper function of GOMP_target{,_ext} routines. */ |
43895be5 | 1638 | |
1639 | static void * | |
1640 | gomp_get_target_fn_addr (struct gomp_device_descr *devicep, | |
1641 | void (*host_fn) (void *)) | |
1642 | { | |
1643 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) | |
1644 | return (void *) host_fn; | |
1645 | else | |
1646 | { | |
1647 | gomp_mutex_lock (&devicep->lock); | |
f87b2900 | 1648 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
1649 | { | |
1650 | gomp_mutex_unlock (&devicep->lock); | |
1651 | return NULL; | |
1652 | } | |
1653 | ||
43895be5 | 1654 | struct splay_tree_key_s k; |
1655 | k.host_start = (uintptr_t) host_fn; | |
1656 | k.host_end = k.host_start + 1; | |
1657 | splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); | |
1658 | gomp_mutex_unlock (&devicep->lock); | |
1659 | if (tgt_fn == NULL) | |
bc6edebb | 1660 | return NULL; |
43895be5 | 1661 | |
1662 | return (void *) tgt_fn->tgt_offset; | |
1663 | } | |
1664 | } | |
1665 | ||
bc7bff74 | 1666 | /* Called when encountering a target directive. If DEVICE |
ca4c3545 | 1667 | is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
1668 | GOMP_DEVICE_HOST_FALLBACK (or any value | |
1669 | larger than last available hw device), use host fallback. | |
dc19c8fd | 1670 | FN is address of host code, UNUSED is part of the current ABI, but |
1671 | we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays | |
bc7bff74 | 1672 | with MAPNUM entries, with addresses of the host objects, |
1673 | sizes of the host objects (resp. for pointer kind pointer bias | |
1674 | and assumed sizeof (void *) size) and kinds. */ | |
1675 | ||
1676 | void | |
dc19c8fd | 1677 | GOMP_target (int device, void (*fn) (void *), const void *unused, |
bc7bff74 | 1678 | size_t mapnum, void **hostaddrs, size_t *sizes, |
1679 | unsigned char *kinds) | |
1680 | { | |
995b27b9 | 1681 | struct gomp_device_descr *devicep = resolve_device (device); |
ca4c3545 | 1682 | |
f87b2900 | 1683 | void *fn_addr; |
ca4c3545 | 1684 | if (devicep == NULL |
f87b2900 | 1685 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
56686608 | 1686 | /* All shared memory devices should use the GOMP_target_ext function. */ |
1687 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM | |
f87b2900 | 1688 | || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) |
43895be5 | 1689 | return gomp_target_fallback (fn, hostaddrs); |
1690 | ||
43895be5 | 1691 | struct target_mem_desc *tgt_vars |
1692 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, | |
1693 | GOMP_MAP_VARS_TARGET); | |
56686608 | 1694 | devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, |
1695 | NULL); | |
43895be5 | 1696 | gomp_unmap_vars (tgt_vars, true); |
1697 | } | |
995b27b9 | 1698 | |
9561765e | 1699 | /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, |
1700 | and several arguments have been added: | |
1701 | FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. | |
1702 | DEPEND is array of dependencies, see GOMP_task for details. | |
56686608 | 1703 | |
1704 | ARGS is a pointer to an array consisting of a variable number of both | |
1705 | device-independent and device-specific arguments, which can take one two | |
1706 | elements where the first specifies for which device it is intended, the type | |
1707 | and optionally also the value. If the value is not present in the first | |
1708 | one, the whole second element the actual value. The last element of the | |
1709 | array is a single NULL. Among the device independent can be for example | |
1710 | NUM_TEAMS and THREAD_LIMIT. | |
1711 | ||
9561765e | 1712 | NUM_TEAMS is positive if GOMP_teams will be called in the body with |
1713 | that value, or 1 if teams construct is not present, or 0, if | |
1714 | teams construct does not have num_teams clause and so the choice is | |
1715 | implementation defined, and -1 if it can't be determined on the host | |
1716 | what value will GOMP_teams have on the device. | |
1717 | THREAD_LIMIT similarly is positive if GOMP_teams will be called in the | |
1718 | body with that value, or 0, if teams construct does not have thread_limit | |
1719 | clause or the teams construct is not present, or -1 if it can't be | |
1720 | determined on the host what value will GOMP_teams have on the device. */ | |
1721 | ||
43895be5 | 1722 | void |
9561765e | 1723 | GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, |
1724 | void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
56686608 | 1725 | unsigned int flags, void **depend, void **args) |
43895be5 | 1726 | { |
1727 | struct gomp_device_descr *devicep = resolve_device (device); | |
84217e9d | 1728 | size_t tgt_align = 0, tgt_size = 0; |
1729 | bool fpc_done = false; | |
ca4c3545 | 1730 | |
a9833286 | 1731 | if (flags & GOMP_TARGET_FLAG_NOWAIT) |
1732 | { | |
1733 | struct gomp_thread *thr = gomp_thread (); | |
1734 | /* Create a team if we don't have any around, as nowait | |
1735 | target tasks make sense to run asynchronously even when | |
1736 | outside of any parallel. */ | |
1737 | if (__builtin_expect (thr->ts.team == NULL, 0)) | |
1738 | { | |
1739 | struct gomp_team *team = gomp_new_team (1); | |
1740 | struct gomp_task *task = thr->task; | |
1741 | struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; | |
1742 | team->prev_ts = thr->ts; | |
1743 | thr->ts.team = team; | |
1744 | thr->ts.team_id = 0; | |
1745 | thr->ts.work_share = &team->work_shares[0]; | |
1746 | thr->ts.last_work_share = NULL; | |
1747 | #ifdef HAVE_SYNC_BUILTINS | |
1748 | thr->ts.single_count = 0; | |
1749 | #endif | |
1750 | thr->ts.static_trip = 0; | |
1751 | thr->task = &team->implicit_task[0]; | |
1752 | gomp_init_task (thr->task, NULL, icv); | |
1753 | if (task) | |
1754 | { | |
1755 | thr->task = task; | |
1756 | gomp_end_task (); | |
1757 | free (task); | |
1758 | thr->task = &team->implicit_task[0]; | |
1759 | } | |
1760 | else | |
1761 | pthread_setspecific (gomp_thread_destructor, thr); | |
1762 | } | |
1763 | if (thr->ts.team | |
1764 | && !thr->task->final_task) | |
1765 | { | |
1766 | gomp_create_target_task (devicep, fn, mapnum, hostaddrs, | |
56686608 | 1767 | sizes, kinds, flags, depend, args, |
a9833286 | 1768 | GOMP_TARGET_TASK_BEFORE_MAP); |
1769 | return; | |
1770 | } | |
1771 | } | |
1772 | ||
1773 | /* If there are depend clauses, but nowait is not present | |
1774 | (or we are in a final task), block the parent task until the | |
1775 | dependencies are resolved and then just continue with the rest | |
1776 | of the function as if it is a merged task. */ | |
43895be5 | 1777 | if (depend != NULL) |
1778 | { | |
1779 | struct gomp_thread *thr = gomp_thread (); | |
1780 | if (thr->task && thr->task->depend_hash) | |
84217e9d | 1781 | { |
1782 | /* If we might need to wait, copy firstprivate now. */ | |
1783 | calculate_firstprivate_requirements (mapnum, sizes, kinds, | |
1784 | &tgt_align, &tgt_size); | |
1785 | if (tgt_align) | |
1786 | { | |
1787 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
1788 | copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, | |
1789 | tgt_align, tgt_size); | |
1790 | } | |
1791 | fpc_done = true; | |
1792 | gomp_task_maybe_wait_for_dependencies (depend); | |
1793 | } | |
43895be5 | 1794 | } |
ca4c3545 | 1795 | |
f87b2900 | 1796 | void *fn_addr; |
43895be5 | 1797 | if (devicep == NULL |
f87b2900 | 1798 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
56686608 | 1799 | || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) |
1800 | || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) | |
ca4c3545 | 1801 | { |
84217e9d | 1802 | if (!fpc_done) |
1803 | { | |
1804 | calculate_firstprivate_requirements (mapnum, sizes, kinds, | |
1805 | &tgt_align, &tgt_size); | |
1806 | if (tgt_align) | |
1807 | { | |
1808 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
1809 | copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, | |
1810 | tgt_align, tgt_size); | |
1811 | } | |
1812 | } | |
1813 | gomp_target_fallback (fn, hostaddrs); | |
43895be5 | 1814 | return; |
ca4c3545 | 1815 | } |
995b27b9 | 1816 | |
56686608 | 1817 | struct target_mem_desc *tgt_vars; |
56686608 | 1818 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
1819 | { | |
84217e9d | 1820 | if (!fpc_done) |
1821 | { | |
1822 | calculate_firstprivate_requirements (mapnum, sizes, kinds, | |
1823 | &tgt_align, &tgt_size); | |
1824 | if (tgt_align) | |
1825 | { | |
1826 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
1827 | copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, | |
1828 | tgt_align, tgt_size); | |
1829 | } | |
1830 | } | |
56686608 | 1831 | tgt_vars = NULL; |
1832 | } | |
1833 | else | |
1834 | tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, | |
1835 | true, GOMP_MAP_VARS_TARGET); | |
1836 | devicep->run_func (devicep->target_id, fn_addr, | |
1837 | tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, | |
1838 | args); | |
1839 | if (tgt_vars) | |
1840 | gomp_unmap_vars (tgt_vars, true); | |
bc7bff74 | 1841 | } |
1842 | ||
9561765e | 1843 | /* Host fallback for GOMP_target_data{,_ext} routines. */ |
43895be5 | 1844 | |
1845 | static void | |
1846 | gomp_target_data_fallback (void) | |
1847 | { | |
1848 | struct gomp_task_icv *icv = gomp_icv (false); | |
1849 | if (icv->target_data) | |
1850 | { | |
1851 | /* Even when doing a host fallback, if there are any active | |
1852 | #pragma omp target data constructs, need to remember the | |
1853 | new #pragma omp target data, otherwise GOMP_target_end_data | |
1854 | would get out of sync. */ | |
1855 | struct target_mem_desc *tgt | |
1856 | = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, | |
1857 | GOMP_MAP_VARS_DATA); | |
1858 | tgt->prev = icv->target_data; | |
1859 | icv->target_data = tgt; | |
1860 | } | |
1861 | } | |
1862 | ||
bc7bff74 | 1863 | void |
dc19c8fd | 1864 | GOMP_target_data (int device, const void *unused, size_t mapnum, |
bc7bff74 | 1865 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
1866 | { | |
995b27b9 | 1867 | struct gomp_device_descr *devicep = resolve_device (device); |
ca4c3545 | 1868 | |
1869 | if (devicep == NULL | |
56686608 | 1870 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
1871 | || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) | |
43895be5 | 1872 | return gomp_target_data_fallback (); |
ca4c3545 | 1873 | |
995b27b9 | 1874 | struct target_mem_desc *tgt |
ca4c3545 | 1875 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
43895be5 | 1876 | GOMP_MAP_VARS_DATA); |
1877 | struct gomp_task_icv *icv = gomp_icv (true); | |
1878 | tgt->prev = icv->target_data; | |
1879 | icv->target_data = tgt; | |
1880 | } | |
1881 | ||
1882 | void | |
9561765e | 1883 | GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, |
1884 | size_t *sizes, unsigned short *kinds) | |
43895be5 | 1885 | { |
1886 | struct gomp_device_descr *devicep = resolve_device (device); | |
1887 | ||
1888 | if (devicep == NULL | |
56686608 | 1889 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
1890 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 1891 | return gomp_target_data_fallback (); |
1892 | ||
1893 | struct target_mem_desc *tgt | |
1894 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
1895 | GOMP_MAP_VARS_DATA); | |
995b27b9 | 1896 | struct gomp_task_icv *icv = gomp_icv (true); |
1897 | tgt->prev = icv->target_data; | |
1898 | icv->target_data = tgt; | |
bc7bff74 | 1899 | } |
1900 | ||
1901 | void | |
1902 | GOMP_target_end_data (void) | |
1903 | { | |
995b27b9 | 1904 | struct gomp_task_icv *icv = gomp_icv (false); |
1905 | if (icv->target_data) | |
1906 | { | |
1907 | struct target_mem_desc *tgt = icv->target_data; | |
1908 | icv->target_data = tgt->prev; | |
ca4c3545 | 1909 | gomp_unmap_vars (tgt, true); |
995b27b9 | 1910 | } |
bc7bff74 | 1911 | } |
1912 | ||
1913 | void | |
dc19c8fd | 1914 | GOMP_target_update (int device, const void *unused, size_t mapnum, |
bc7bff74 | 1915 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
1916 | { | |
995b27b9 | 1917 | struct gomp_device_descr *devicep = resolve_device (device); |
ca4c3545 | 1918 | |
1919 | if (devicep == NULL | |
56686608 | 1920 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
1921 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
995b27b9 | 1922 | return; |
1923 | ||
43895be5 | 1924 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); |
1925 | } | |
1926 | ||
1927 | void | |
9561765e | 1928 | GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, |
1929 | size_t *sizes, unsigned short *kinds, | |
1930 | unsigned int flags, void **depend) | |
43895be5 | 1931 | { |
1932 | struct gomp_device_descr *devicep = resolve_device (device); | |
1933 | ||
1934 | /* If there are depend clauses, but nowait is not present, | |
1935 | block the parent task until the dependencies are resolved | |
1936 | and then just continue with the rest of the function as if it | |
1937 | is a merged task. Until we are able to schedule task during | |
1938 | variable mapping or unmapping, ignore nowait if depend clauses | |
1939 | are not present. */ | |
1940 | if (depend != NULL) | |
1941 | { | |
1942 | struct gomp_thread *thr = gomp_thread (); | |
1943 | if (thr->task && thr->task->depend_hash) | |
1944 | { | |
1945 | if ((flags & GOMP_TARGET_FLAG_NOWAIT) | |
1946 | && thr->ts.team | |
1947 | && !thr->task->final_task) | |
1948 | { | |
a9833286 | 1949 | if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, |
1950 | mapnum, hostaddrs, sizes, kinds, | |
1951 | flags | GOMP_TARGET_FLAG_UPDATE, | |
56686608 | 1952 | depend, NULL, GOMP_TARGET_TASK_DATA)) |
a9833286 | 1953 | return; |
1954 | } | |
1955 | else | |
1956 | { | |
1957 | struct gomp_team *team = thr->ts.team; | |
1958 | /* If parallel or taskgroup has been cancelled, don't start new | |
1959 | tasks. */ | |
7e5a76c8 | 1960 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
1961 | { | |
1962 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1963 | return; | |
1964 | if (thr->task->taskgroup) | |
1965 | { | |
1966 | if (thr->task->taskgroup->cancelled) | |
1967 | return; | |
1968 | if (thr->task->taskgroup->workshare | |
1969 | && thr->task->taskgroup->prev | |
1970 | && thr->task->taskgroup->prev->cancelled) | |
1971 | return; | |
1972 | } | |
1973 | } | |
a9833286 | 1974 | |
1975 | gomp_task_maybe_wait_for_dependencies (depend); | |
43895be5 | 1976 | } |
43895be5 | 1977 | } |
1978 | } | |
1979 | ||
1980 | if (devicep == NULL | |
56686608 | 1981 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
1982 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 1983 | return; |
1984 | ||
1985 | struct gomp_thread *thr = gomp_thread (); | |
1986 | struct gomp_team *team = thr->ts.team; | |
1987 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
7e5a76c8 | 1988 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
1989 | { | |
1990 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1991 | return; | |
1992 | if (thr->task->taskgroup) | |
1993 | { | |
1994 | if (thr->task->taskgroup->cancelled) | |
1995 | return; | |
1996 | if (thr->task->taskgroup->workshare | |
1997 | && thr->task->taskgroup->prev | |
1998 | && thr->task->taskgroup->prev->cancelled) | |
1999 | return; | |
2000 | } | |
2001 | } | |
43895be5 | 2002 | |
2003 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); | |
2004 | } | |
2005 | ||
2006 | static void | |
2007 | gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, | |
2008 | void **hostaddrs, size_t *sizes, unsigned short *kinds) | |
2009 | { | |
2010 | const int typemask = 0xff; | |
2011 | size_t i; | |
ca4c3545 | 2012 | gomp_mutex_lock (&devicep->lock); |
f87b2900 | 2013 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
2014 | { | |
2015 | gomp_mutex_unlock (&devicep->lock); | |
2016 | return; | |
2017 | } | |
2018 | ||
43895be5 | 2019 | for (i = 0; i < mapnum; i++) |
2020 | { | |
2021 | struct splay_tree_key_s cur_node; | |
2022 | unsigned char kind = kinds[i] & typemask; | |
2023 | switch (kind) | |
2024 | { | |
2025 | case GOMP_MAP_FROM: | |
2026 | case GOMP_MAP_ALWAYS_FROM: | |
2027 | case GOMP_MAP_DELETE: | |
2028 | case GOMP_MAP_RELEASE: | |
2029 | case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: | |
2030 | case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: | |
2031 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
2032 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
2033 | splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION | |
2034 | || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) | |
9561765e | 2035 | ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) |
43895be5 | 2036 | : splay_tree_lookup (&devicep->mem_map, &cur_node); |
2037 | if (!k) | |
2038 | continue; | |
2039 | ||
2040 | if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) | |
2041 | k->refcount--; | |
2042 | if ((kind == GOMP_MAP_DELETE | |
2043 | || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) | |
2044 | && k->refcount != REFCOUNT_INFINITY) | |
2045 | k->refcount = 0; | |
2046 | ||
2047 | if ((kind == GOMP_MAP_FROM && k->refcount == 0) | |
2048 | || kind == GOMP_MAP_ALWAYS_FROM) | |
534b5e00 | 2049 | gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, |
9b50ad1d | 2050 | (void *) (k->tgt->tgt_start + k->tgt_offset |
2051 | + cur_node.host_start | |
2052 | - k->host_start), | |
2053 | cur_node.host_end - cur_node.host_start); | |
43895be5 | 2054 | if (k->refcount == 0) |
2055 | { | |
2056 | splay_tree_remove (&devicep->mem_map, k); | |
c0998828 | 2057 | if (k->link_key) |
2058 | splay_tree_insert (&devicep->mem_map, | |
2059 | (splay_tree_node) k->link_key); | |
43895be5 | 2060 | if (k->tgt->refcount > 1) |
2061 | k->tgt->refcount--; | |
2062 | else | |
2063 | gomp_unmap_tgt (k->tgt); | |
2064 | } | |
2065 | ||
2066 | break; | |
2067 | default: | |
2068 | gomp_mutex_unlock (&devicep->lock); | |
2069 | gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", | |
2070 | kind); | |
2071 | } | |
2072 | } | |
2073 | ||
ca4c3545 | 2074 | gomp_mutex_unlock (&devicep->lock); |
43895be5 | 2075 | } |
ca4c3545 | 2076 | |
43895be5 | 2077 | void |
2078 | GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, | |
2079 | size_t *sizes, unsigned short *kinds, | |
2080 | unsigned int flags, void **depend) | |
2081 | { | |
2082 | struct gomp_device_descr *devicep = resolve_device (device); | |
2083 | ||
2084 | /* If there are depend clauses, but nowait is not present, | |
2085 | block the parent task until the dependencies are resolved | |
2086 | and then just continue with the rest of the function as if it | |
2087 | is a merged task. Until we are able to schedule task during | |
2088 | variable mapping or unmapping, ignore nowait if depend clauses | |
2089 | are not present. */ | |
2090 | if (depend != NULL) | |
2091 | { | |
2092 | struct gomp_thread *thr = gomp_thread (); | |
2093 | if (thr->task && thr->task->depend_hash) | |
2094 | { | |
2095 | if ((flags & GOMP_TARGET_FLAG_NOWAIT) | |
2096 | && thr->ts.team | |
2097 | && !thr->task->final_task) | |
2098 | { | |
a9833286 | 2099 | if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, |
2100 | mapnum, hostaddrs, sizes, kinds, | |
56686608 | 2101 | flags, depend, NULL, |
a9833286 | 2102 | GOMP_TARGET_TASK_DATA)) |
2103 | return; | |
2104 | } | |
2105 | else | |
2106 | { | |
2107 | struct gomp_team *team = thr->ts.team; | |
2108 | /* If parallel or taskgroup has been cancelled, don't start new | |
2109 | tasks. */ | |
7e5a76c8 | 2110 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
2111 | { | |
2112 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
2113 | return; | |
2114 | if (thr->task->taskgroup) | |
2115 | { | |
2116 | if (thr->task->taskgroup->cancelled) | |
2117 | return; | |
2118 | if (thr->task->taskgroup->workshare | |
2119 | && thr->task->taskgroup->prev | |
2120 | && thr->task->taskgroup->prev->cancelled) | |
2121 | return; | |
2122 | } | |
2123 | } | |
a9833286 | 2124 | |
2125 | gomp_task_maybe_wait_for_dependencies (depend); | |
43895be5 | 2126 | } |
43895be5 | 2127 | } |
2128 | } | |
2129 | ||
2130 | if (devicep == NULL | |
56686608 | 2131 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2132 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2133 | return; |
2134 | ||
2135 | struct gomp_thread *thr = gomp_thread (); | |
2136 | struct gomp_team *team = thr->ts.team; | |
2137 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
7e5a76c8 | 2138 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
2139 | { | |
2140 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
2141 | return; | |
2142 | if (thr->task->taskgroup) | |
2143 | { | |
2144 | if (thr->task->taskgroup->cancelled) | |
2145 | return; | |
2146 | if (thr->task->taskgroup->workshare | |
2147 | && thr->task->taskgroup->prev | |
2148 | && thr->task->taskgroup->prev->cancelled) | |
2149 | return; | |
2150 | } | |
2151 | } | |
43895be5 | 2152 | |
2153 | size_t i; | |
2154 | if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) | |
2155 | for (i = 0; i < mapnum; i++) | |
2156 | if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) | |
2157 | { | |
2158 | gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], | |
2159 | &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); | |
2160 | i += sizes[i]; | |
2161 | } | |
2162 | else | |
2163 | gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], | |
2164 | true, GOMP_MAP_VARS_ENTER_DATA); | |
2165 | else | |
2166 | gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); | |
2167 | } | |
2168 | ||
a9833286 | 2169 | bool |
43895be5 | 2170 | gomp_target_task_fn (void *data) |
2171 | { | |
2172 | struct gomp_target_task *ttask = (struct gomp_target_task *) data; | |
a9833286 | 2173 | struct gomp_device_descr *devicep = ttask->devicep; |
2174 | ||
43895be5 | 2175 | if (ttask->fn != NULL) |
2176 | { | |
f87b2900 | 2177 | void *fn_addr; |
a9833286 | 2178 | if (devicep == NULL |
f87b2900 | 2179 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
56686608 | 2180 | || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) |
2181 | || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) | |
a9833286 | 2182 | { |
2183 | ttask->state = GOMP_TARGET_TASK_FALLBACK; | |
84217e9d | 2184 | gomp_target_fallback (ttask->fn, ttask->hostaddrs); |
a9833286 | 2185 | return false; |
2186 | } | |
2187 | ||
2188 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
2189 | { | |
56686608 | 2190 | if (ttask->tgt) |
2191 | gomp_unmap_vars (ttask->tgt, true); | |
a9833286 | 2192 | return false; |
2193 | } | |
2194 | ||
56686608 | 2195 | void *actual_arguments; |
2196 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
2197 | { | |
2198 | ttask->tgt = NULL; | |
56686608 | 2199 | actual_arguments = ttask->hostaddrs; |
2200 | } | |
2201 | else | |
2202 | { | |
2203 | ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, | |
2204 | NULL, ttask->sizes, ttask->kinds, true, | |
2205 | GOMP_MAP_VARS_TARGET); | |
2206 | actual_arguments = (void *) ttask->tgt->tgt_start; | |
2207 | } | |
a9833286 | 2208 | ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; |
2209 | ||
56686608 | 2210 | devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, |
2211 | ttask->args, (void *) ttask); | |
a9833286 | 2212 | return true; |
43895be5 | 2213 | } |
a9833286 | 2214 | else if (devicep == NULL |
56686608 | 2215 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2216 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
a9833286 | 2217 | return false; |
43895be5 | 2218 | |
2219 | size_t i; | |
2220 | if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) | |
a9833286 | 2221 | gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, |
43895be5 | 2222 | ttask->kinds, true); |
2223 | else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) | |
2224 | for (i = 0; i < ttask->mapnum; i++) | |
2225 | if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) | |
2226 | { | |
a9833286 | 2227 | gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], |
2228 | NULL, &ttask->sizes[i], &ttask->kinds[i], true, | |
2229 | GOMP_MAP_VARS_ENTER_DATA); | |
43895be5 | 2230 | i += ttask->sizes[i]; |
2231 | } | |
2232 | else | |
a9833286 | 2233 | gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], |
2234 | &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); | |
43895be5 | 2235 | else |
a9833286 | 2236 | gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, |
2237 | ttask->kinds); | |
2238 | return false; | |
bc7bff74 | 2239 | } |
2240 | ||
2241 | void | |
2242 | GOMP_teams (unsigned int num_teams, unsigned int thread_limit) | |
2243 | { | |
2244 | if (thread_limit) | |
2245 | { | |
2246 | struct gomp_task_icv *icv = gomp_icv (true); | |
2247 | icv->thread_limit_var | |
2248 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
2249 | } | |
2250 | (void) num_teams; | |
2251 | } | |
995b27b9 | 2252 | |
43895be5 | 2253 | void * |
2254 | omp_target_alloc (size_t size, int device_num) | |
2255 | { | |
2256 | if (device_num == GOMP_DEVICE_HOST_FALLBACK) | |
2257 | return malloc (size); | |
2258 | ||
2259 | if (device_num < 0) | |
2260 | return NULL; | |
2261 | ||
2262 | struct gomp_device_descr *devicep = resolve_device (device_num); | |
2263 | if (devicep == NULL) | |
2264 | return NULL; | |
2265 | ||
56686608 | 2266 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2267 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2268 | return malloc (size); |
2269 | ||
2270 | gomp_mutex_lock (&devicep->lock); | |
2271 | void *ret = devicep->alloc_func (devicep->target_id, size); | |
2272 | gomp_mutex_unlock (&devicep->lock); | |
2273 | return ret; | |
2274 | } | |
2275 | ||
2276 | void | |
2277 | omp_target_free (void *device_ptr, int device_num) | |
2278 | { | |
2279 | if (device_ptr == NULL) | |
2280 | return; | |
2281 | ||
2282 | if (device_num == GOMP_DEVICE_HOST_FALLBACK) | |
2283 | { | |
2284 | free (device_ptr); | |
2285 | return; | |
2286 | } | |
2287 | ||
2288 | if (device_num < 0) | |
2289 | return; | |
2290 | ||
2291 | struct gomp_device_descr *devicep = resolve_device (device_num); | |
2292 | if (devicep == NULL) | |
2293 | return; | |
2294 | ||
56686608 | 2295 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2296 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2297 | { |
2298 | free (device_ptr); | |
2299 | return; | |
2300 | } | |
2301 | ||
2302 | gomp_mutex_lock (&devicep->lock); | |
9b50ad1d | 2303 | gomp_free_device_memory (devicep, device_ptr); |
43895be5 | 2304 | gomp_mutex_unlock (&devicep->lock); |
2305 | } | |
2306 | ||
2307 | int | |
7e5a76c8 | 2308 | omp_target_is_present (const void *ptr, int device_num) |
43895be5 | 2309 | { |
2310 | if (ptr == NULL) | |
2311 | return 1; | |
2312 | ||
2313 | if (device_num == GOMP_DEVICE_HOST_FALLBACK) | |
2314 | return 1; | |
2315 | ||
2316 | if (device_num < 0) | |
2317 | return 0; | |
2318 | ||
2319 | struct gomp_device_descr *devicep = resolve_device (device_num); | |
2320 | if (devicep == NULL) | |
2321 | return 0; | |
2322 | ||
56686608 | 2323 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2324 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2325 | return 1; |
2326 | ||
2327 | gomp_mutex_lock (&devicep->lock); | |
2328 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
2329 | struct splay_tree_key_s cur_node; | |
2330 | ||
2331 | cur_node.host_start = (uintptr_t) ptr; | |
2332 | cur_node.host_end = cur_node.host_start; | |
9561765e | 2333 | splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); |
43895be5 | 2334 | int ret = n != NULL; |
2335 | gomp_mutex_unlock (&devicep->lock); | |
2336 | return ret; | |
2337 | } | |
2338 | ||
2339 | int | |
7e5a76c8 | 2340 | omp_target_memcpy (void *dst, const void *src, size_t length, |
2341 | size_t dst_offset, size_t src_offset, int dst_device_num, | |
2342 | int src_device_num) | |
43895be5 | 2343 | { |
2344 | struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; | |
9b50ad1d | 2345 | bool ret; |
43895be5 | 2346 | |
2347 | if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) | |
2348 | { | |
2349 | if (dst_device_num < 0) | |
2350 | return EINVAL; | |
2351 | ||
2352 | dst_devicep = resolve_device (dst_device_num); | |
2353 | if (dst_devicep == NULL) | |
2354 | return EINVAL; | |
2355 | ||
56686608 | 2356 | if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2357 | || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2358 | dst_devicep = NULL; |
2359 | } | |
2360 | if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) | |
2361 | { | |
2362 | if (src_device_num < 0) | |
2363 | return EINVAL; | |
2364 | ||
2365 | src_devicep = resolve_device (src_device_num); | |
2366 | if (src_devicep == NULL) | |
2367 | return EINVAL; | |
2368 | ||
56686608 | 2369 | if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2370 | || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2371 | src_devicep = NULL; |
2372 | } | |
2373 | if (src_devicep == NULL && dst_devicep == NULL) | |
2374 | { | |
2375 | memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); | |
2376 | return 0; | |
2377 | } | |
2378 | if (src_devicep == NULL) | |
2379 | { | |
2380 | gomp_mutex_lock (&dst_devicep->lock); | |
9b50ad1d | 2381 | ret = dst_devicep->host2dev_func (dst_devicep->target_id, |
2382 | (char *) dst + dst_offset, | |
2383 | (char *) src + src_offset, length); | |
43895be5 | 2384 | gomp_mutex_unlock (&dst_devicep->lock); |
9b50ad1d | 2385 | return (ret ? 0 : EINVAL); |
43895be5 | 2386 | } |
2387 | if (dst_devicep == NULL) | |
2388 | { | |
2389 | gomp_mutex_lock (&src_devicep->lock); | |
9b50ad1d | 2390 | ret = src_devicep->dev2host_func (src_devicep->target_id, |
2391 | (char *) dst + dst_offset, | |
2392 | (char *) src + src_offset, length); | |
43895be5 | 2393 | gomp_mutex_unlock (&src_devicep->lock); |
9b50ad1d | 2394 | return (ret ? 0 : EINVAL); |
43895be5 | 2395 | } |
2396 | if (src_devicep == dst_devicep) | |
2397 | { | |
2398 | gomp_mutex_lock (&src_devicep->lock); | |
9b50ad1d | 2399 | ret = src_devicep->dev2dev_func (src_devicep->target_id, |
2400 | (char *) dst + dst_offset, | |
2401 | (char *) src + src_offset, length); | |
43895be5 | 2402 | gomp_mutex_unlock (&src_devicep->lock); |
9b50ad1d | 2403 | return (ret ? 0 : EINVAL); |
43895be5 | 2404 | } |
2405 | return EINVAL; | |
2406 | } | |
2407 | ||
2408 | static int | |
7e5a76c8 | 2409 | omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, |
43895be5 | 2410 | int num_dims, const size_t *volume, |
2411 | const size_t *dst_offsets, | |
2412 | const size_t *src_offsets, | |
2413 | const size_t *dst_dimensions, | |
2414 | const size_t *src_dimensions, | |
2415 | struct gomp_device_descr *dst_devicep, | |
2416 | struct gomp_device_descr *src_devicep) | |
2417 | { | |
2418 | size_t dst_slice = element_size; | |
2419 | size_t src_slice = element_size; | |
2420 | size_t j, dst_off, src_off, length; | |
2421 | int i, ret; | |
2422 | ||
2423 | if (num_dims == 1) | |
2424 | { | |
2425 | if (__builtin_mul_overflow (element_size, volume[0], &length) | |
2426 | || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) | |
2427 | || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) | |
2428 | return EINVAL; | |
2429 | if (dst_devicep == NULL && src_devicep == NULL) | |
9b50ad1d | 2430 | { |
7e5a76c8 | 2431 | memcpy ((char *) dst + dst_off, (const char *) src + src_off, |
2432 | length); | |
9b50ad1d | 2433 | ret = 1; |
2434 | } | |
43895be5 | 2435 | else if (src_devicep == NULL) |
9b50ad1d | 2436 | ret = dst_devicep->host2dev_func (dst_devicep->target_id, |
2437 | (char *) dst + dst_off, | |
7e5a76c8 | 2438 | (const char *) src + src_off, |
2439 | length); | |
43895be5 | 2440 | else if (dst_devicep == NULL) |
9b50ad1d | 2441 | ret = src_devicep->dev2host_func (src_devicep->target_id, |
2442 | (char *) dst + dst_off, | |
7e5a76c8 | 2443 | (const char *) src + src_off, |
2444 | length); | |
43895be5 | 2445 | else if (src_devicep == dst_devicep) |
9b50ad1d | 2446 | ret = src_devicep->dev2dev_func (src_devicep->target_id, |
2447 | (char *) dst + dst_off, | |
7e5a76c8 | 2448 | (const char *) src + src_off, |
2449 | length); | |
43895be5 | 2450 | else |
9b50ad1d | 2451 | ret = 0; |
2452 | return ret ? 0 : EINVAL; | |
43895be5 | 2453 | } |
2454 | ||
2455 | /* FIXME: it would be nice to have some plugin function to handle | |
2456 | num_dims == 2 and num_dims == 3 more efficiently. Larger ones can | |
2457 | be handled in the generic recursion below, and for host-host it | |
2458 | should be used even for any num_dims >= 2. */ | |
2459 | ||
2460 | for (i = 1; i < num_dims; i++) | |
2461 | if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) | |
2462 | || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) | |
2463 | return EINVAL; | |
2464 | if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) | |
2465 | || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) | |
2466 | return EINVAL; | |
2467 | for (j = 0; j < volume[0]; j++) | |
2468 | { | |
2469 | ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, | |
7e5a76c8 | 2470 | (const char *) src + src_off, |
43895be5 | 2471 | element_size, num_dims - 1, |
2472 | volume + 1, dst_offsets + 1, | |
2473 | src_offsets + 1, dst_dimensions + 1, | |
2474 | src_dimensions + 1, dst_devicep, | |
2475 | src_devicep); | |
2476 | if (ret) | |
2477 | return ret; | |
2478 | dst_off += dst_slice; | |
2479 | src_off += src_slice; | |
2480 | } | |
2481 | return 0; | |
2482 | } | |
2483 | ||
2484 | int | |
7e5a76c8 | 2485 | omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, |
43895be5 | 2486 | int num_dims, const size_t *volume, |
2487 | const size_t *dst_offsets, | |
2488 | const size_t *src_offsets, | |
2489 | const size_t *dst_dimensions, | |
2490 | const size_t *src_dimensions, | |
2491 | int dst_device_num, int src_device_num) | |
2492 | { | |
2493 | struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; | |
2494 | ||
2495 | if (!dst && !src) | |
2496 | return INT_MAX; | |
2497 | ||
2498 | if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) | |
2499 | { | |
2500 | if (dst_device_num < 0) | |
2501 | return EINVAL; | |
2502 | ||
2503 | dst_devicep = resolve_device (dst_device_num); | |
2504 | if (dst_devicep == NULL) | |
2505 | return EINVAL; | |
2506 | ||
56686608 | 2507 | if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2508 | || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2509 | dst_devicep = NULL; |
2510 | } | |
2511 | if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) | |
2512 | { | |
2513 | if (src_device_num < 0) | |
2514 | return EINVAL; | |
2515 | ||
2516 | src_devicep = resolve_device (src_device_num); | |
2517 | if (src_devicep == NULL) | |
2518 | return EINVAL; | |
2519 | ||
56686608 | 2520 | if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2521 | || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2522 | src_devicep = NULL; |
2523 | } | |
2524 | ||
2525 | if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) | |
2526 | return EINVAL; | |
2527 | ||
2528 | if (src_devicep) | |
2529 | gomp_mutex_lock (&src_devicep->lock); | |
2530 | else if (dst_devicep) | |
2531 | gomp_mutex_lock (&dst_devicep->lock); | |
2532 | int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, | |
2533 | volume, dst_offsets, src_offsets, | |
2534 | dst_dimensions, src_dimensions, | |
2535 | dst_devicep, src_devicep); | |
2536 | if (src_devicep) | |
2537 | gomp_mutex_unlock (&src_devicep->lock); | |
2538 | else if (dst_devicep) | |
2539 | gomp_mutex_unlock (&dst_devicep->lock); | |
2540 | return ret; | |
2541 | } | |
2542 | ||
2543 | int | |
7e5a76c8 | 2544 | omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, |
2545 | size_t size, size_t device_offset, int device_num) | |
43895be5 | 2546 | { |
2547 | if (device_num == GOMP_DEVICE_HOST_FALLBACK) | |
2548 | return EINVAL; | |
2549 | ||
2550 | if (device_num < 0) | |
2551 | return EINVAL; | |
2552 | ||
2553 | struct gomp_device_descr *devicep = resolve_device (device_num); | |
2554 | if (devicep == NULL) | |
2555 | return EINVAL; | |
2556 | ||
56686608 | 2557 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
2558 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
43895be5 | 2559 | return EINVAL; |
2560 | ||
2561 | gomp_mutex_lock (&devicep->lock); | |
2562 | ||
2563 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
2564 | struct splay_tree_key_s cur_node; | |
2565 | int ret = EINVAL; | |
2566 | ||
2567 | cur_node.host_start = (uintptr_t) host_ptr; | |
2568 | cur_node.host_end = cur_node.host_start + size; | |
2569 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
2570 | if (n) | |
2571 | { | |
2572 | if (n->tgt->tgt_start + n->tgt_offset | |
2573 | == (uintptr_t) device_ptr + device_offset | |
2574 | && n->host_start <= cur_node.host_start | |
2575 | && n->host_end >= cur_node.host_end) | |
2576 | ret = 0; | |
2577 | } | |
2578 | else | |
2579 | { | |
2580 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
2581 | tgt->array = gomp_malloc (sizeof (*tgt->array)); | |
2582 | tgt->refcount = 1; | |
2583 | tgt->tgt_start = 0; | |
2584 | tgt->tgt_end = 0; | |
2585 | tgt->to_free = NULL; | |
2586 | tgt->prev = NULL; | |
2587 | tgt->list_count = 0; | |
2588 | tgt->device_descr = devicep; | |
2589 | splay_tree_node array = tgt->array; | |
2590 | splay_tree_key k = &array->key; | |
2591 | k->host_start = cur_node.host_start; | |
2592 | k->host_end = cur_node.host_end; | |
2593 | k->tgt = tgt; | |
2594 | k->tgt_offset = (uintptr_t) device_ptr + device_offset; | |
2595 | k->refcount = REFCOUNT_INFINITY; | |
43895be5 | 2596 | array->left = NULL; |
2597 | array->right = NULL; | |
2598 | splay_tree_insert (&devicep->mem_map, array); | |
2599 | ret = 0; | |
2600 | } | |
2601 | gomp_mutex_unlock (&devicep->lock); | |
2602 | return ret; | |
2603 | } | |
2604 | ||
2605 | int | |
7e5a76c8 | 2606 | omp_target_disassociate_ptr (const void *ptr, int device_num) |
43895be5 | 2607 | { |
2608 | if (device_num == GOMP_DEVICE_HOST_FALLBACK) | |
2609 | return EINVAL; | |
2610 | ||
2611 | if (device_num < 0) | |
2612 | return EINVAL; | |
2613 | ||
2614 | struct gomp_device_descr *devicep = resolve_device (device_num); | |
2615 | if (devicep == NULL) | |
2616 | return EINVAL; | |
2617 | ||
2618 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
2619 | return EINVAL; | |
2620 | ||
2621 | gomp_mutex_lock (&devicep->lock); | |
2622 | ||
2623 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
2624 | struct splay_tree_key_s cur_node; | |
2625 | int ret = EINVAL; | |
2626 | ||
2627 | cur_node.host_start = (uintptr_t) ptr; | |
2628 | cur_node.host_end = cur_node.host_start; | |
2629 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
2630 | if (n | |
2631 | && n->host_start == cur_node.host_start | |
2632 | && n->refcount == REFCOUNT_INFINITY | |
2633 | && n->tgt->tgt_start == 0 | |
2634 | && n->tgt->to_free == NULL | |
2635 | && n->tgt->refcount == 1 | |
2636 | && n->tgt->list_count == 0) | |
2637 | { | |
2638 | splay_tree_remove (&devicep->mem_map, n); | |
2639 | gomp_unmap_tgt (n->tgt); | |
2640 | ret = 0; | |
2641 | } | |
2642 | ||
2643 | gomp_mutex_unlock (&devicep->lock); | |
2644 | return ret; | |
2645 | } | |
2646 | ||
7e5a76c8 | 2647 | int |
2648 | omp_pause_resource (omp_pause_resource_t kind, int device_num) | |
2649 | { | |
2650 | (void) kind; | |
2651 | if (device_num == GOMP_DEVICE_HOST_FALLBACK) | |
2652 | return gomp_pause_host (); | |
2653 | if (device_num < 0 || device_num >= gomp_get_num_devices ()) | |
2654 | return -1; | |
2655 | /* Do nothing for target devices for now. */ | |
2656 | return 0; | |
2657 | } | |
2658 | ||
2659 | int | |
2660 | omp_pause_resource_all (omp_pause_resource_t kind) | |
2661 | { | |
2662 | (void) kind; | |
2663 | if (gomp_pause_host ()) | |
2664 | return -1; | |
2665 | /* Do nothing for target devices for now. */ | |
2666 | return 0; | |
2667 | } | |
2668 | ||
2669 | ialias (omp_pause_resource) | |
2670 | ialias (omp_pause_resource_all) | |
2671 | ||
995b27b9 | 2672 | #ifdef PLUGIN_SUPPORT |
2673 | ||
2674 | /* This function tries to load a plugin for DEVICE. Name of plugin is passed | |
2675 | in PLUGIN_NAME. | |
2676 | The handles of the found functions are stored in the corresponding fields | |
2677 | of DEVICE. The function returns TRUE on success and FALSE otherwise. */ | |
2678 | ||
2679 | static bool | |
2680 | gomp_load_plugin_for_device (struct gomp_device_descr *device, | |
2681 | const char *plugin_name) | |
2682 | { | |
c6aa02c6 | 2683 | const char *err = NULL, *last_missing = NULL; |
ca4c3545 | 2684 | |
995b27b9 | 2685 | void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); |
2686 | if (!plugin_handle) | |
d3d8e632 | 2687 | goto dl_fail; |
995b27b9 | 2688 | |
2689 | /* Check if all required functions are available in the plugin and store | |
d3d8e632 | 2690 | their handlers. None of the symbols can legitimately be NULL, |
2691 | so we don't need to check dlerror all the time. */ | |
ca4c3545 | 2692 | #define DLSYM(f) \ |
d3d8e632 | 2693 | if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ |
2694 | goto dl_fail | |
2695 | /* Similar, but missing functions are not an error. Return false if | |
2696 | failed, true otherwise. */ | |
2697 | #define DLSYM_OPT(f, n) \ | |
2698 | ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ | |
2699 | || (last_missing = #n, 0)) | |
2700 | ||
2701 | DLSYM (version); | |
2702 | if (device->version_func () != GOMP_VERSION) | |
2703 | { | |
2704 | err = "plugin version mismatch"; | |
2705 | goto fail; | |
2706 | } | |
ca4c3545 | 2707 | |
2708 | DLSYM (get_name); | |
2709 | DLSYM (get_caps); | |
995b27b9 | 2710 | DLSYM (get_type); |
2711 | DLSYM (get_num_devices); | |
995b27b9 | 2712 | DLSYM (init_device); |
ca4c3545 | 2713 | DLSYM (fini_device); |
0d8c703d | 2714 | DLSYM (load_image); |
2715 | DLSYM (unload_image); | |
995b27b9 | 2716 | DLSYM (alloc); |
2717 | DLSYM (free); | |
2718 | DLSYM (dev2host); | |
2719 | DLSYM (host2dev); | |
ca4c3545 | 2720 | device->capabilities = device->get_caps_func (); |
2721 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
43895be5 | 2722 | { |
2723 | DLSYM (run); | |
a9833286 | 2724 | DLSYM (async_run); |
56686608 | 2725 | DLSYM_OPT (can_run, can_run); |
43895be5 | 2726 | DLSYM (dev2dev); |
2727 | } | |
ca4c3545 | 2728 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) |
2729 | { | |
65caa53b | 2730 | if (!DLSYM_OPT (openacc.exec, openacc_exec) |
d3d8e632 | 2731 | || !DLSYM_OPT (openacc.create_thread_data, |
2732 | openacc_create_thread_data) | |
2733 | || !DLSYM_OPT (openacc.destroy_thread_data, | |
534b5e00 | 2734 | openacc_destroy_thread_data) |
2735 | || !DLSYM_OPT (openacc.async.construct, openacc_async_construct) | |
2736 | || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct) | |
2737 | || !DLSYM_OPT (openacc.async.test, openacc_async_test) | |
2738 | || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize) | |
2739 | || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize) | |
2740 | || !DLSYM_OPT (openacc.async.queue_callback, | |
2741 | openacc_async_queue_callback) | |
2742 | || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) | |
2743 | || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) | |
2744 | || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)) | |
ca4c3545 | 2745 | { |
d3d8e632 | 2746 | /* Require all the OpenACC handlers if we have |
2747 | GOMP_OFFLOAD_CAP_OPENACC_200. */ | |
ca4c3545 | 2748 | err = "plugin missing OpenACC handler function"; |
d3d8e632 | 2749 | goto fail; |
ca4c3545 | 2750 | } |
d3d8e632 | 2751 | |
2752 | unsigned cuda = 0; | |
2753 | cuda += DLSYM_OPT (openacc.cuda.get_current_device, | |
65caa53b | 2754 | openacc_cuda_get_current_device); |
d3d8e632 | 2755 | cuda += DLSYM_OPT (openacc.cuda.get_current_context, |
65caa53b | 2756 | openacc_cuda_get_current_context); |
2757 | cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); | |
2758 | cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); | |
d3d8e632 | 2759 | if (cuda && cuda != 4) |
ca4c3545 | 2760 | { |
d3d8e632 | 2761 | /* Make sure all the CUDA functions are there if any of them are. */ |
ca4c3545 | 2762 | err = "plugin missing OpenACC CUDA handler function"; |
d3d8e632 | 2763 | goto fail; |
ca4c3545 | 2764 | } |
2765 | } | |
995b27b9 | 2766 | #undef DLSYM |
ca4c3545 | 2767 | #undef DLSYM_OPT |
995b27b9 | 2768 | |
d3d8e632 | 2769 | return 1; |
2770 | ||
2771 | dl_fail: | |
2772 | err = dlerror (); | |
2773 | fail: | |
2774 | gomp_error ("while loading %s: %s", plugin_name, err); | |
2775 | if (last_missing) | |
2776 | gomp_error ("missing function was %s", last_missing); | |
2777 | if (plugin_handle) | |
2778 | dlclose (plugin_handle); | |
2779 | ||
2780 | return 0; | |
995b27b9 | 2781 | } |
2782 | ||
f87b2900 | 2783 | /* This function finalizes all initialized devices. */ |
2784 | ||
2785 | static void | |
2786 | gomp_target_fini (void) | |
2787 | { | |
2788 | int i; | |
2789 | for (i = 0; i < num_devices; i++) | |
2790 | { | |
9b50ad1d | 2791 | bool ret = true; |
f87b2900 | 2792 | struct gomp_device_descr *devicep = &devices[i]; |
2793 | gomp_mutex_lock (&devicep->lock); | |
2794 | if (devicep->state == GOMP_DEVICE_INITIALIZED) | |
534b5e00 | 2795 | ret = gomp_fini_device (devicep); |
f87b2900 | 2796 | gomp_mutex_unlock (&devicep->lock); |
9b50ad1d | 2797 | if (!ret) |
2798 | gomp_fatal ("device finalization failed"); | |
f87b2900 | 2799 | } |
2800 | } | |
2801 | ||
3a590203 | 2802 | /* This function initializes the runtime for offloading. |
2803 | It parses the list of offload plugins, and tries to load these. | |
2804 | On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP | |
ca4c3545 | 2805 | will be set, and the array DEVICES initialized, containing descriptors for |
2806 | corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows | |
2807 | by the others. */ | |
995b27b9 | 2808 | |
2809 | static void | |
2810 | gomp_target_init (void) | |
2811 | { | |
2812 | const char *prefix ="libgomp-plugin-"; | |
4fda895a | 2813 | const char *suffix = SONAME_SUFFIX (1); |
995b27b9 | 2814 | const char *cur, *next; |
2815 | char *plugin_name; | |
2816 | int i, new_num_devices; | |
2817 | ||
2818 | num_devices = 0; | |
2819 | devices = NULL; | |
2820 | ||
3a590203 | 2821 | cur = OFFLOAD_PLUGINS; |
995b27b9 | 2822 | if (*cur) |
2823 | do | |
2824 | { | |
2825 | struct gomp_device_descr current_device; | |
02b7bb30 | 2826 | size_t prefix_len, suffix_len, cur_len; |
995b27b9 | 2827 | |
2828 | next = strchr (cur, ','); | |
2829 | ||
02b7bb30 | 2830 | prefix_len = strlen (prefix); |
2831 | cur_len = next ? next - cur : strlen (cur); | |
2832 | suffix_len = strlen (suffix); | |
2833 | ||
2834 | plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); | |
995b27b9 | 2835 | if (!plugin_name) |
2836 | { | |
2837 | num_devices = 0; | |
2838 | break; | |
2839 | } | |
2840 | ||
02b7bb30 | 2841 | memcpy (plugin_name, prefix, prefix_len); |
2842 | memcpy (plugin_name + prefix_len, cur, cur_len); | |
2843 | memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); | |
995b27b9 | 2844 | |
2845 | if (gomp_load_plugin_for_device (¤t_device, plugin_name)) | |
2846 | { | |
2847 | new_num_devices = current_device.get_num_devices_func (); | |
2848 | if (new_num_devices >= 1) | |
2849 | { | |
ca4c3545 | 2850 | /* Augment DEVICES and NUM_DEVICES. */ |
2851 | ||
995b27b9 | 2852 | devices = realloc (devices, (num_devices + new_num_devices) |
2853 | * sizeof (struct gomp_device_descr)); | |
2854 | if (!devices) | |
2855 | { | |
2856 | num_devices = 0; | |
2857 | free (plugin_name); | |
2858 | break; | |
2859 | } | |
2860 | ||
ca4c3545 | 2861 | current_device.name = current_device.get_name_func (); |
2862 | /* current_device.capabilities has already been set. */ | |
995b27b9 | 2863 | current_device.type = current_device.get_type_func (); |
0d8c703d | 2864 | current_device.mem_map.root = NULL; |
f87b2900 | 2865 | current_device.state = GOMP_DEVICE_UNINITIALIZED; |
ca4c3545 | 2866 | current_device.openacc.data_environ = NULL; |
995b27b9 | 2867 | for (i = 0; i < new_num_devices; i++) |
2868 | { | |
995b27b9 | 2869 | current_device.target_id = i; |
2870 | devices[num_devices] = current_device; | |
ca4c3545 | 2871 | gomp_mutex_init (&devices[num_devices].lock); |
995b27b9 | 2872 | num_devices++; |
2873 | } | |
2874 | } | |
2875 | } | |
2876 | ||
2877 | free (plugin_name); | |
2878 | cur = next + 1; | |
2879 | } | |
2880 | while (next); | |
2881 | ||
ca4c3545 | 2882 | /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set |
2883 | NUM_DEVICES_OPENMP. */ | |
2884 | struct gomp_device_descr *devices_s | |
2885 | = malloc (num_devices * sizeof (struct gomp_device_descr)); | |
2886 | if (!devices_s) | |
2887 | { | |
2888 | num_devices = 0; | |
2889 | free (devices); | |
2890 | devices = NULL; | |
2891 | } | |
2892 | num_devices_openmp = 0; | |
2893 | for (i = 0; i < num_devices; i++) | |
2894 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
2895 | devices_s[num_devices_openmp++] = devices[i]; | |
2896 | int num_devices_after_openmp = num_devices_openmp; | |
2897 | for (i = 0; i < num_devices; i++) | |
2898 | if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
2899 | devices_s[num_devices_after_openmp++] = devices[i]; | |
2900 | free (devices); | |
2901 | devices = devices_s; | |
2902 | ||
2903 | for (i = 0; i < num_devices; i++) | |
2904 | { | |
ca4c3545 | 2905 | /* The 'devices' array can be moved (by the realloc call) until we have |
2906 | found all the plugins, so registering with the OpenACC runtime (which | |
2907 | takes a copy of the pointer argument) must be delayed until now. */ | |
2908 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
2909 | goacc_register (&devices[i]); | |
2910 | } | |
f87b2900 | 2911 | |
2912 | if (atexit (gomp_target_fini) != 0) | |
2913 | gomp_fatal ("atexit failed"); | |
995b27b9 | 2914 | } |
2915 | ||
2916 | #else /* PLUGIN_SUPPORT */ | |
2917 | /* If dlfcn.h is unavailable we always fallback to host execution. | |
2918 | GOMP_target* routines are just stubs for this case. */ | |
2919 | static void | |
2920 | gomp_target_init (void) | |
2921 | { | |
2922 | } | |
2923 | #endif /* PLUGIN_SUPPORT */ |