]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | /* Copyright (C) 2013-2023 Free Software Foundation, Inc. |
acf0174b JJ |
2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
acf0174b JJ |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
1df3f842 | 26 | /* This file contains the support of offloading. */ |
acf0174b JJ |
27 | |
28 | #include "libgomp.h" | |
41dbbb37 TS |
29 | #include "oacc-plugin.h" |
30 | #include "oacc-int.h" | |
31 | #include "gomp-constants.h" | |
acf0174b JJ |
32 | #include <limits.h> |
33 | #include <stdbool.h> | |
34 | #include <stdlib.h> | |
01c0b3b0 KT |
35 | #ifdef HAVE_INTTYPES_H |
36 | # include <inttypes.h> /* For PRIu64. */ | |
37 | #endif | |
acf0174b | 38 | #include <string.h> |
683f1184 | 39 | #include <stdio.h> /* For snprintf. */ |
41dbbb37 | 40 | #include <assert.h> |
d9a6bd32 | 41 | #include <errno.h> |
acf0174b | 42 | |
1df3f842 JJ |
43 | #ifdef PLUGIN_SUPPORT |
44 | #include <dlfcn.h> | |
b5f7a6ca | 45 | #include "plugin-suffix.h" |
1df3f842 JJ |
46 | #endif |
47 | ||
ea4b23d9 TB |
48 | /* Define another splay tree instantiation - for reverse offload. */ |
49 | #define splay_tree_prefix reverse | |
50 | #define splay_tree_c | |
51 | #include "splay-tree.h" | |
52 | ||
53 | ||
275c736e CLT |
54 | typedef uintptr_t *hash_entry_type; |
55 | static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } | |
56 | static inline void htab_free (void *ptr) { free (ptr); } | |
57 | #include "hashtab.h" | |
58 | ||
6c420193 MV |
59 | ialias_redirect (GOMP_task) |
60 | ||
275c736e CLT |
61 | static inline hashval_t |
62 | htab_hash (hash_entry_type element) | |
63 | { | |
64 | return hash_pointer ((void *) element); | |
65 | } | |
66 | ||
67 | static inline bool | |
68 | htab_eq (hash_entry_type x, hash_entry_type y) | |
69 | { | |
70 | return x == y; | |
71 | } | |
72 | ||
6c7e076b JB |
73 | #define FIELD_TGT_EMPTY (~(size_t) 0) |
74 | ||
1df3f842 JJ |
75 | static void gomp_target_init (void); |
76 | ||
41dbbb37 | 77 | /* The whole initialization code for offloading plugins is only run one. */ |
1df3f842 JJ |
78 | static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; |
79 | ||
a51df54e IV |
80 | /* Mutex for offload image registration. */ |
81 | static gomp_mutex_t register_lock; | |
82 | ||
1df3f842 JJ |
83 | /* This structure describes an offload image. |
84 | It contains type of the target device, pointer to host table descriptor, and | |
85 | pointer to target data. */ | |
86 | struct offload_image_descr { | |
2a21ff19 | 87 | unsigned version; |
1df3f842 | 88 | enum offload_target_type type; |
ebe4a560 | 89 | const void *host_table; |
afb2d80b | 90 | const void *target_data; |
1df3f842 JJ |
91 | }; |
92 | ||
93 | /* Array of descriptors of offload images. */ | |
94 | static struct offload_image_descr *offload_images; | |
95 | ||
96 | /* Total number of offload images. */ | |
97 | static int num_offload_images; | |
98 | ||
99 | /* Array of descriptors for all available devices. */ | |
100 | static struct gomp_device_descr *devices; | |
101 | ||
102 | /* Total number of available devices. */ | |
103 | static int num_devices; | |
104 | ||
41dbbb37 TS |
105 | /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ |
106 | static int num_devices_openmp; | |
107 | ||
683f1184 TB |
108 | /* OpenMP requires mask. */ |
109 | static int omp_requires_mask; | |
110 | ||
a51df54e IV |
111 | /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ |
112 | ||
113 | static void * | |
114 | gomp_realloc_unlock (void *old, size_t size) | |
115 | { | |
116 | void *ret = realloc (old, size); | |
117 | if (ret == NULL) | |
118 | { | |
119 | gomp_mutex_unlock (®ister_lock); | |
120 | gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); | |
121 | } | |
122 | return ret; | |
123 | } | |
124 | ||
41dbbb37 TS |
125 | attribute_hidden void |
126 | gomp_init_targets_once (void) | |
1df3f842 | 127 | { |
41dbbb37 TS |
128 | (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
129 | } | |
1df3f842 | 130 | |
acf0174b JJ |
131 | attribute_hidden int |
132 | gomp_get_num_devices (void) | |
133 | { | |
41dbbb37 TS |
134 | gomp_init_targets_once (); |
135 | return num_devices_openmp; | |
1df3f842 JJ |
136 | } |
137 | ||
138 | static struct gomp_device_descr * | |
1158fe43 | 139 | resolve_device (int device_id, bool remapped) |
1df3f842 | 140 | { |
8216ca85 TB |
141 | /* Get number of devices and thus ensure that 'gomp_init_targets_once' was |
142 | called, which must be done before using default_device_var. */ | |
143 | int num_devices = gomp_get_num_devices (); | |
144 | ||
1158fe43 | 145 | if (remapped && device_id == GOMP_DEVICE_ICV) |
1df3f842 JJ |
146 | { |
147 | struct gomp_task_icv *icv = gomp_icv (false); | |
148 | device_id = icv->default_device_var; | |
1158fe43 | 149 | remapped = false; |
1df3f842 JJ |
150 | } |
151 | ||
1158fe43 JJ |
152 | if (device_id < 0) |
153 | { | |
154 | if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK | |
155 | : omp_initial_device)) | |
156 | return NULL; | |
18c8b56c | 157 | if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY |
8216ca85 | 158 | && num_devices == 0) |
f2ef1dab TS |
159 | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " |
160 | "but only the host device is available"); | |
18c8b56c | 161 | else if (device_id == omp_invalid_device) |
1158fe43 JJ |
162 | gomp_fatal ("omp_invalid_device encountered"); |
163 | else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) | |
164 | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " | |
165 | "but device not found"); | |
166 | ||
167 | return NULL; | |
168 | } | |
8216ca85 | 169 | else if (device_id >= num_devices) |
1bfc07d1 KCY |
170 | { |
171 | if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY | |
8216ca85 | 172 | && device_id != num_devices) |
1bfc07d1 KCY |
173 | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " |
174 | "but device not found"); | |
175 | ||
176 | return NULL; | |
177 | } | |
1df3f842 | 178 | |
d9a6bd32 | 179 | gomp_mutex_lock (&devices[device_id].lock); |
d84ffc0a | 180 | if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) |
d9a6bd32 | 181 | gomp_init_device (&devices[device_id]); |
d84ffc0a IV |
182 | else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) |
183 | { | |
184 | gomp_mutex_unlock (&devices[device_id].lock); | |
1bfc07d1 | 185 | |
74c9882b | 186 | if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) |
1bfc07d1 KCY |
187 | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " |
188 | "but device is finalized"); | |
189 | ||
d84ffc0a IV |
190 | return NULL; |
191 | } | |
d9a6bd32 JJ |
192 | gomp_mutex_unlock (&devices[device_id].lock); |
193 | ||
1df3f842 JJ |
194 | return &devices[device_id]; |
195 | } | |
196 | ||
197 | ||
d9a6bd32 JJ |
198 | static inline splay_tree_key |
199 | gomp_map_lookup (splay_tree mem_map, splay_tree_key key) | |
200 | { | |
201 | if (key->host_start != key->host_end) | |
202 | return splay_tree_lookup (mem_map, key); | |
203 | ||
204 | key->host_end++; | |
205 | splay_tree_key n = splay_tree_lookup (mem_map, key); | |
206 | key->host_end--; | |
207 | if (n) | |
208 | return n; | |
209 | key->host_start--; | |
210 | n = splay_tree_lookup (mem_map, key); | |
211 | key->host_start++; | |
212 | if (n) | |
213 | return n; | |
214 | return splay_tree_lookup (mem_map, key); | |
215 | } | |
216 | ||
ea4b23d9 TB |
217 | static inline reverse_splay_tree_key |
218 | gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key) | |
219 | { | |
220 | return reverse_splay_tree_lookup (mem_map_rev, key); | |
221 | } | |
222 | ||
e01d41e5 JJ |
223 | static inline splay_tree_key |
224 | gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) | |
225 | { | |
226 | if (key->host_start != key->host_end) | |
227 | return splay_tree_lookup (mem_map, key); | |
228 | ||
229 | key->host_end++; | |
230 | splay_tree_key n = splay_tree_lookup (mem_map, key); | |
231 | key->host_end--; | |
232 | return n; | |
233 | } | |
234 | ||
6ce13072 CLT |
235 | static inline void |
236 | gomp_device_copy (struct gomp_device_descr *devicep, | |
237 | bool (*copy_func) (int, void *, const void *, size_t), | |
238 | const char *dst, void *dstaddr, | |
239 | const char *src, const void *srcaddr, | |
240 | size_t size) | |
241 | { | |
242 | if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) | |
243 | { | |
244 | gomp_mutex_unlock (&devicep->lock); | |
245 | gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", | |
246 | src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); | |
247 | } | |
248 | } | |
249 | ||
1f4c5b9b CLT |
250 | static inline void |
251 | goacc_device_copy_async (struct gomp_device_descr *devicep, | |
252 | bool (*copy_func) (int, void *, const void *, size_t, | |
253 | struct goacc_asyncqueue *), | |
254 | const char *dst, void *dstaddr, | |
255 | const char *src, const void *srcaddr, | |
9c41f5b9 | 256 | const void *srcaddr_orig, |
1f4c5b9b CLT |
257 | size_t size, struct goacc_asyncqueue *aq) |
258 | { | |
259 | if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) | |
260 | { | |
261 | gomp_mutex_unlock (&devicep->lock); | |
9c41f5b9 JB |
262 | if (srcaddr_orig && srcaddr_orig != srcaddr) |
263 | gomp_fatal ("Copying of %s object [%p..%p)" | |
264 | " via buffer %s object [%p..%p)" | |
265 | " to %s object [%p..%p) failed", | |
266 | src, srcaddr_orig, srcaddr_orig + size, | |
267 | src, srcaddr, srcaddr + size, | |
268 | dst, dstaddr, dstaddr + size); | |
269 | else | |
270 | gomp_fatal ("Copying of %s object [%p..%p)" | |
271 | " to %s object [%p..%p) failed", | |
272 | src, srcaddr, srcaddr + size, | |
273 | dst, dstaddr, dstaddr + size); | |
1f4c5b9b CLT |
274 | } |
275 | } | |
276 | ||
7324369a JJ |
277 | /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) |
278 | host to device memory transfers. */ | |
279 | ||
a44c1790 TS |
280 | struct gomp_coalesce_chunk |
281 | { | |
282 | /* The starting and ending point of a coalesced chunk of memory. */ | |
283 | size_t start, end; | |
284 | }; | |
285 | ||
7324369a JJ |
286 | struct gomp_coalesce_buf |
287 | { | |
288 | /* Buffer into which gomp_copy_host2dev will memcpy data and from which | |
289 | it will be copied to the device. */ | |
290 | void *buf; | |
291 | struct target_mem_desc *tgt; | |
a44c1790 TS |
292 | /* Array with offsets, chunks[i].start is the starting offset and |
293 | chunks[i].end ending offset relative to tgt->tgt_start device address | |
7324369a | 294 | of chunks which are to be copied to buf and later copied to device. */ |
a44c1790 | 295 | struct gomp_coalesce_chunk *chunks; |
7324369a JJ |
296 | /* Number of chunks in chunks array, or -1 if coalesce buffering should not |
297 | be performed. */ | |
298 | long chunk_cnt; | |
299 | /* During construction of chunks array, how many memory regions are within | |
300 | the last chunk. If there is just one memory region for a chunk, we copy | |
301 | it directly to device rather than going through buf. */ | |
302 | long use_cnt; | |
303 | }; | |
304 | ||
305 | /* Maximum size of memory region considered for coalescing. Larger copies | |
306 | are performed directly. */ | |
307 | #define MAX_COALESCE_BUF_SIZE (32 * 1024) | |
308 | ||
309 | /* Maximum size of a gap in between regions to consider them being copied | |
310 | within the same chunk. All the device offsets considered are within | |
311 | newly allocated device memory, so it isn't fatal if we copy some padding | |
312 | in between from host to device. The gaps come either from alignment | |
313 | padding or from memory regions which are not supposed to be copied from | |
314 | host to device (e.g. map(alloc:), map(from:) etc.). */ | |
315 | #define MAX_COALESCE_BUF_GAP (4 * 1024) | |
316 | ||
d88a6951 TS |
317 | /* Add region with device tgt_start relative offset and length to CBUF. |
318 | ||
319 | This must not be used for asynchronous copies, because the host data might | |
320 | not be computed yet (by an earlier asynchronous compute region, for | |
2b2340e2 TS |
321 | example). The exception is for EPHEMERAL data, that we know is available |
322 | already "by construction". */ | |
7324369a JJ |
323 | |
324 | static inline void | |
325 | gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) | |
326 | { | |
327 | if (len > MAX_COALESCE_BUF_SIZE || len == 0) | |
328 | return; | |
329 | if (cbuf->chunk_cnt) | |
330 | { | |
331 | if (cbuf->chunk_cnt < 0) | |
332 | return; | |
a44c1790 | 333 | if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) |
7324369a JJ |
334 | { |
335 | cbuf->chunk_cnt = -1; | |
336 | return; | |
337 | } | |
a44c1790 | 338 | if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) |
7324369a | 339 | { |
a44c1790 | 340 | cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; |
7324369a JJ |
341 | cbuf->use_cnt++; |
342 | return; | |
343 | } | |
344 | /* If the last chunk is only used by one mapping, discard it, | |
345 | as it will be one host to device copy anyway and | |
346 | memcpying it around will only waste cycles. */ | |
347 | if (cbuf->use_cnt == 1) | |
348 | cbuf->chunk_cnt--; | |
349 | } | |
a44c1790 TS |
350 | cbuf->chunks[cbuf->chunk_cnt].start = start; |
351 | cbuf->chunks[cbuf->chunk_cnt].end = start + len; | |
7324369a JJ |
352 | cbuf->chunk_cnt++; |
353 | cbuf->use_cnt = 1; | |
354 | } | |
355 | ||
356 | /* Return true for mapping kinds which need to copy data from the | |
357 | host to device for regions that weren't previously mapped. */ | |
358 | ||
359 | static inline bool | |
360 | gomp_to_device_kind_p (int kind) | |
361 | { | |
362 | switch (kind) | |
363 | { | |
364 | case GOMP_MAP_ALLOC: | |
365 | case GOMP_MAP_FROM: | |
366 | case GOMP_MAP_FORCE_ALLOC: | |
cc3f11f5 | 367 | case GOMP_MAP_FORCE_FROM: |
7324369a | 368 | case GOMP_MAP_ALWAYS_FROM: |
4ede915d | 369 | case GOMP_MAP_ALWAYS_PRESENT_FROM: |
38944ec2 | 370 | case GOMP_MAP_FORCE_PRESENT: |
7324369a JJ |
371 | return false; |
372 | default: | |
373 | return true; | |
374 | } | |
375 | } | |
376 | ||
9c41f5b9 JB |
377 | /* Copy host memory to an offload device. In asynchronous mode (if AQ is |
378 | non-NULL), when the source data is stack or may otherwise be deallocated | |
379 | before the asynchronous copy takes place, EPHEMERAL must be passed as | |
380 | TRUE. */ | |
381 | ||
1f4c5b9b | 382 | attribute_hidden void |
6ce13072 | 383 | gomp_copy_host2dev (struct gomp_device_descr *devicep, |
1f4c5b9b | 384 | struct goacc_asyncqueue *aq, |
7324369a | 385 | void *d, const void *h, size_t sz, |
9c41f5b9 | 386 | bool ephemeral, struct gomp_coalesce_buf *cbuf) |
6ce13072 | 387 | { |
7324369a JJ |
388 | if (cbuf) |
389 | { | |
390 | uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; | |
a44c1790 | 391 | if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) |
7324369a JJ |
392 | { |
393 | long first = 0; | |
394 | long last = cbuf->chunk_cnt - 1; | |
395 | while (first <= last) | |
396 | { | |
397 | long middle = (first + last) >> 1; | |
a44c1790 | 398 | if (cbuf->chunks[middle].end <= doff) |
7324369a | 399 | first = middle + 1; |
a44c1790 | 400 | else if (cbuf->chunks[middle].start <= doff) |
7324369a | 401 | { |
a44c1790 | 402 | if (doff + sz > cbuf->chunks[middle].end) |
ccfcf08e JB |
403 | { |
404 | gomp_mutex_unlock (&devicep->lock); | |
405 | gomp_fatal ("internal libgomp cbuf error"); | |
406 | } | |
2b2340e2 TS |
407 | |
408 | /* In an asynchronous context, verify that CBUF isn't used | |
409 | with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */ | |
410 | if (__builtin_expect (aq != NULL, 0)) | |
411 | assert (ephemeral); | |
412 | ||
a44c1790 | 413 | memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), |
7324369a JJ |
414 | h, sz); |
415 | return; | |
416 | } | |
417 | else | |
418 | last = middle - 1; | |
419 | } | |
420 | } | |
421 | } | |
d88a6951 | 422 | |
2b2340e2 TS |
423 | if (__builtin_expect (aq != NULL, 0)) |
424 | { | |
425 | void *h_buf = (void *) h; | |
426 | if (ephemeral) | |
427 | { | |
428 | /* We're queueing up an asynchronous copy from data that may | |
429 | disappear before the transfer takes place (i.e. because it is a | |
430 | stack local in a function that is no longer executing). As we've | |
431 | not been able to use CBUF, make a copy of the data into a | |
432 | temporary buffer. */ | |
433 | h_buf = gomp_malloc (sz); | |
434 | memcpy (h_buf, h, sz); | |
435 | } | |
436 | goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, | |
437 | "dev", d, "host", h_buf, h, sz, aq); | |
438 | if (ephemeral) | |
439 | /* Free once the transfer has completed. */ | |
440 | devicep->openacc.async.queue_callback_func (aq, free, h_buf); | |
441 | } | |
442 | else | |
443 | gomp_device_copy (devicep, devicep->host2dev_func, | |
444 | "dev", d, "host", h, sz); | |
6ce13072 CLT |
445 | } |
446 | ||
1f4c5b9b | 447 | attribute_hidden void |
6ce13072 | 448 | gomp_copy_dev2host (struct gomp_device_descr *devicep, |
1f4c5b9b | 449 | struct goacc_asyncqueue *aq, |
6ce13072 CLT |
450 | void *h, const void *d, size_t sz) |
451 | { | |
1f4c5b9b CLT |
452 | if (__builtin_expect (aq != NULL, 0)) |
453 | goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, | |
9c41f5b9 | 454 | "host", h, "dev", d, NULL, sz, aq); |
1f4c5b9b CLT |
455 | else |
456 | gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); | |
6ce13072 CLT |
457 | } |
458 | ||
459 | static void | |
460 | gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) | |
461 | { | |
462 | if (!devicep->free_func (devicep->target_id, devptr)) | |
463 | { | |
464 | gomp_mutex_unlock (&devicep->lock); | |
465 | gomp_fatal ("error in freeing device memory block at %p", devptr); | |
466 | } | |
467 | } | |
468 | ||
275c736e CLT |
469 | /* Increment reference count of a splay_tree_key region K by 1. |
470 | If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only | |
471 | increment the value if refcount is not yet contained in the set (used for | |
472 | OpenMP 5.0, which specifies that a region's refcount is adjusted at most | |
473 | once for each construct). */ | |
474 | ||
475 | static inline void | |
476 | gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set) | |
477 | { | |
478 | if (k == NULL || k->refcount == REFCOUNT_INFINITY) | |
479 | return; | |
480 | ||
481 | uintptr_t *refcount_ptr = &k->refcount; | |
482 | ||
483 | if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) | |
484 | refcount_ptr = &k->structelem_refcount; | |
485 | else if (REFCOUNT_STRUCTELEM_P (k->refcount)) | |
486 | refcount_ptr = k->structelem_refcount_ptr; | |
487 | ||
488 | if (refcount_set) | |
489 | { | |
490 | if (htab_find (*refcount_set, refcount_ptr)) | |
491 | return; | |
492 | uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); | |
493 | *slot = refcount_ptr; | |
494 | } | |
495 | ||
496 | *refcount_ptr += 1; | |
497 | return; | |
498 | } | |
499 | ||
500 | /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P | |
501 | is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to | |
502 | track already seen refcounts, and only adjust the value if refcount is not | |
503 | yet contained in the set (like gomp_increment_refcount). | |
504 | ||
505 | Return out-values: set *DO_COPY to true if we set the refcount to zero, or | |
506 | it is already zero and we know we decremented it earlier. This signals that | |
507 | associated maps should be copied back to host. | |
508 | ||
509 | *DO_REMOVE is set to true when we this is the first handling of this refcount | |
510 | and we are setting it to zero. This signals a removal of this key from the | |
511 | splay-tree map. | |
512 | ||
513 | Copy and removal are separated due to cases like handling of structure | |
514 | elements, e.g. each map of a structure element representing a possible copy | |
515 | out of a structure field has to be handled individually, but we only signal | |
516 | removal for one (the first encountered) sibing map. */ | |
517 | ||
518 | static inline void | |
519 | gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p, | |
520 | bool *do_copy, bool *do_remove) | |
521 | { | |
522 | if (k == NULL || k->refcount == REFCOUNT_INFINITY) | |
523 | { | |
524 | *do_copy = *do_remove = false; | |
525 | return; | |
526 | } | |
527 | ||
528 | uintptr_t *refcount_ptr = &k->refcount; | |
529 | ||
530 | if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) | |
531 | refcount_ptr = &k->structelem_refcount; | |
532 | else if (REFCOUNT_STRUCTELEM_P (k->refcount)) | |
533 | refcount_ptr = k->structelem_refcount_ptr; | |
534 | ||
535 | bool new_encountered_refcount; | |
536 | bool set_to_zero = false; | |
537 | bool is_zero = false; | |
538 | ||
539 | uintptr_t orig_refcount = *refcount_ptr; | |
540 | ||
541 | if (refcount_set) | |
542 | { | |
543 | if (htab_find (*refcount_set, refcount_ptr)) | |
544 | { | |
545 | new_encountered_refcount = false; | |
546 | goto end; | |
547 | } | |
548 | ||
549 | uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); | |
550 | *slot = refcount_ptr; | |
551 | new_encountered_refcount = true; | |
552 | } | |
553 | else | |
554 | /* If no refcount_set being used, assume all keys are being decremented | |
555 | for the first time. */ | |
556 | new_encountered_refcount = true; | |
557 | ||
558 | if (delete_p) | |
559 | *refcount_ptr = 0; | |
560 | else if (*refcount_ptr > 0) | |
561 | *refcount_ptr -= 1; | |
562 | ||
563 | end: | |
564 | if (*refcount_ptr == 0) | |
565 | { | |
566 | if (orig_refcount > 0) | |
567 | set_to_zero = true; | |
568 | ||
569 | is_zero = true; | |
570 | } | |
571 | ||
572 | *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero)); | |
573 | *do_remove = (new_encountered_refcount && set_to_zero); | |
574 | } | |
575 | ||
e01d41e5 JJ |
576 | /* Handle the case where gomp_map_lookup, splay_tree_lookup or |
577 | gomp_map_0len_lookup found oldn for newn. | |
1df3f842 JJ |
578 | Helper function of gomp_map_vars. */ |
579 | ||
580 | static inline void | |
1f4c5b9b CLT |
581 | gomp_map_vars_existing (struct gomp_device_descr *devicep, |
582 | struct goacc_asyncqueue *aq, splay_tree_key oldn, | |
d9a6bd32 | 583 | splay_tree_key newn, struct target_var_desc *tgt_var, |
b7e20480 | 584 | unsigned char kind, bool always_to_flag, bool implicit, |
275c736e CLT |
585 | struct gomp_coalesce_buf *cbuf, |
586 | htab_t *refcount_set) | |
1df3f842 | 587 | { |
0ab29cf0 CLT |
588 | assert (kind != GOMP_MAP_ATTACH |
589 | || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); | |
aff43ac0 | 590 | |
d9a6bd32 JJ |
591 | tgt_var->key = oldn; |
592 | tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); | |
593 | tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); | |
bc4ed079 | 594 | tgt_var->is_attach = false; |
d9a6bd32 | 595 | tgt_var->offset = newn->host_start - oldn->host_start; |
b7e20480 CLT |
596 | |
597 | /* For implicit maps, old contained in new is valid. */ | |
598 | bool implicit_subset = (implicit | |
599 | && newn->host_start <= oldn->host_start | |
600 | && oldn->host_end <= newn->host_end); | |
601 | if (implicit_subset) | |
602 | tgt_var->length = oldn->host_end - oldn->host_start; | |
603 | else | |
604 | tgt_var->length = newn->host_end - newn->host_start; | |
d9a6bd32 | 605 | |
4ede915d | 606 | if (GOMP_MAP_FORCE_P (kind) |
b7e20480 CLT |
607 | /* For implicit maps, old contained in new is valid. */ |
608 | || !(implicit_subset | |
609 | /* Otherwise, new contained inside old is considered valid. */ | |
610 | || (oldn->host_start <= newn->host_start | |
611 | && newn->host_end <= oldn->host_end))) | |
a51df54e IV |
612 | { |
613 | gomp_mutex_unlock (&devicep->lock); | |
614 | gomp_fatal ("Trying to map into device [%p..%p) object when " | |
615 | "[%p..%p) is already mapped", | |
616 | (void *) newn->host_start, (void *) newn->host_end, | |
617 | (void *) oldn->host_start, (void *) oldn->host_end); | |
618 | } | |
d9a6bd32 | 619 | |
972da557 | 620 | if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag) |
b7e20480 CLT |
621 | { |
622 | /* Implicit + always should not happen. If this does occur, below | |
623 | address/length adjustment is a TODO. */ | |
624 | assert (!implicit_subset); | |
625 | ||
6c039937 CLT |
626 | if (oldn->aux && oldn->aux->attach_count) |
627 | { | |
628 | /* We have to be careful not to overwrite still attached pointers | |
629 | during the copyback to host. */ | |
630 | uintptr_t addr = newn->host_start; | |
631 | while (addr < newn->host_end) | |
632 | { | |
633 | size_t i = (addr - oldn->host_start) / sizeof (void *); | |
634 | if (oldn->aux->attach_count[i] == 0) | |
635 | gomp_copy_host2dev (devicep, aq, | |
636 | (void *) (oldn->tgt->tgt_start | |
637 | + oldn->tgt_offset | |
638 | + addr - oldn->host_start), | |
639 | (void *) addr, | |
640 | sizeof (void *), false, cbuf); | |
641 | addr += sizeof (void *); | |
642 | } | |
643 | } | |
644 | else | |
645 | gomp_copy_host2dev (devicep, aq, | |
646 | (void *) (oldn->tgt->tgt_start + oldn->tgt_offset | |
647 | + newn->host_start - oldn->host_start), | |
648 | (void *) newn->host_start, | |
649 | newn->host_end - newn->host_start, false, cbuf); | |
b7e20480 | 650 | } |
6ce13072 | 651 | |
275c736e | 652 | gomp_increment_refcount (oldn, refcount_set); |
1df3f842 JJ |
653 | } |
654 | ||
41dbbb37 | 655 | static int |
d9a6bd32 | 656 | get_kind (bool short_mapkind, void *kinds, int idx) |
41dbbb37 | 657 | { |
b7e20480 CLT |
658 | if (!short_mapkind) |
659 | return ((unsigned char *) kinds)[idx]; | |
660 | ||
661 | int val = ((unsigned short *) kinds)[idx]; | |
662 | if (GOMP_MAP_IMPLICIT_P (val)) | |
663 | val &= ~GOMP_MAP_IMPLICIT; | |
664 | return val; | |
665 | } | |
666 | ||
667 | ||
668 | static bool | |
669 | get_implicit (bool short_mapkind, void *kinds, int idx) | |
670 | { | |
671 | if (!short_mapkind) | |
672 | return false; | |
673 | ||
674 | int val = ((unsigned short *) kinds)[idx]; | |
675 | return GOMP_MAP_IMPLICIT_P (val); | |
41dbbb37 TS |
676 | } |
677 | ||
1716efeb | 678 | static void |
1f4c5b9b CLT |
679 | gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, |
680 | uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias, | |
0ab29cf0 CLT |
681 | struct gomp_coalesce_buf *cbuf, |
682 | bool allow_zero_length_array_sections) | |
1716efeb CLT |
683 | { |
684 | struct gomp_device_descr *devicep = tgt->device_descr; | |
685 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
686 | struct splay_tree_key_s cur_node; | |
687 | ||
688 | cur_node.host_start = host_ptr; | |
689 | if (cur_node.host_start == (uintptr_t) NULL) | |
690 | { | |
691 | cur_node.tgt_offset = (uintptr_t) NULL; | |
1f4c5b9b | 692 | gomp_copy_host2dev (devicep, aq, |
6ce13072 | 693 | (void *) (tgt->tgt_start + target_offset), |
9c41f5b9 JB |
694 | (void *) &cur_node.tgt_offset, sizeof (void *), |
695 | true, cbuf); | |
1716efeb CLT |
696 | return; |
697 | } | |
698 | /* Add bias to the pointer value. */ | |
699 | cur_node.host_start += bias; | |
d9a6bd32 JJ |
700 | cur_node.host_end = cur_node.host_start; |
701 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
1716efeb CLT |
702 | if (n == NULL) |
703 | { | |
0ab29cf0 CLT |
704 | if (allow_zero_length_array_sections) |
705 | cur_node.tgt_offset = 0; | |
706 | else | |
707 | { | |
708 | gomp_mutex_unlock (&devicep->lock); | |
709 | gomp_fatal ("Pointer target of array section wasn't mapped"); | |
710 | } | |
711 | } | |
712 | else | |
713 | { | |
714 | cur_node.host_start -= n->host_start; | |
715 | cur_node.tgt_offset | |
716 | = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; | |
717 | /* At this point tgt_offset is target address of the | |
718 | array section. Now subtract bias to get what we want | |
719 | to initialize the pointer with. */ | |
720 | cur_node.tgt_offset -= bias; | |
1716efeb | 721 | } |
1f4c5b9b | 722 | gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), |
9c41f5b9 JB |
723 | (void *) &cur_node.tgt_offset, sizeof (void *), |
724 | true, cbuf); | |
1716efeb CLT |
725 | } |
726 | ||
d9a6bd32 | 727 | static void |
1f4c5b9b CLT |
728 | gomp_map_fields_existing (struct target_mem_desc *tgt, |
729 | struct goacc_asyncqueue *aq, splay_tree_key n, | |
d9a6bd32 | 730 | size_t first, size_t i, void **hostaddrs, |
7324369a | 731 | size_t *sizes, void *kinds, |
275c736e | 732 | struct gomp_coalesce_buf *cbuf, htab_t *refcount_set) |
d9a6bd32 JJ |
733 | { |
734 | struct gomp_device_descr *devicep = tgt->device_descr; | |
735 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
736 | struct splay_tree_key_s cur_node; | |
737 | int kind; | |
b7e20480 | 738 | bool implicit; |
d9a6bd32 JJ |
739 | const bool short_mapkind = true; |
740 | const int typemask = short_mapkind ? 0xff : 0x7; | |
741 | ||
742 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
743 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
744 | splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); | |
745 | kind = get_kind (short_mapkind, kinds, i); | |
b7e20480 | 746 | implicit = get_implicit (short_mapkind, kinds, i); |
d9a6bd32 JJ |
747 | if (n2 |
748 | && n2->tgt == n->tgt | |
749 | && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) | |
750 | { | |
972da557 | 751 | gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
b7e20480 CLT |
752 | kind & typemask, false, implicit, cbuf, |
753 | refcount_set); | |
d9a6bd32 JJ |
754 | return; |
755 | } | |
756 | if (sizes[i] == 0) | |
757 | { | |
758 | if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) | |
759 | { | |
760 | cur_node.host_start--; | |
761 | n2 = splay_tree_lookup (mem_map, &cur_node); | |
762 | cur_node.host_start++; | |
763 | if (n2 | |
764 | && n2->tgt == n->tgt | |
765 | && n2->host_start - n->host_start | |
766 | == n2->tgt_offset - n->tgt_offset) | |
767 | { | |
972da557 | 768 | gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
b7e20480 CLT |
769 | kind & typemask, false, implicit, cbuf, |
770 | refcount_set); | |
d9a6bd32 JJ |
771 | return; |
772 | } | |
773 | } | |
774 | cur_node.host_end++; | |
775 | n2 = splay_tree_lookup (mem_map, &cur_node); | |
776 | cur_node.host_end--; | |
777 | if (n2 | |
778 | && n2->tgt == n->tgt | |
779 | && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) | |
780 | { | |
1f4c5b9b | 781 | gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
b7e20480 CLT |
782 | kind & typemask, false, implicit, cbuf, |
783 | refcount_set); | |
d9a6bd32 JJ |
784 | return; |
785 | } | |
786 | } | |
787 | gomp_mutex_unlock (&devicep->lock); | |
788 | gomp_fatal ("Trying to map into device [%p..%p) structure element when " | |
789 | "other mapped elements from the same structure weren't mapped " | |
790 | "together with it", (void *) cur_node.host_start, | |
791 | (void *) cur_node.host_end); | |
792 | } | |
793 | ||
5d5be7bf JB |
794 | attribute_hidden void |
795 | gomp_attach_pointer (struct gomp_device_descr *devicep, | |
796 | struct goacc_asyncqueue *aq, splay_tree mem_map, | |
797 | splay_tree_key n, uintptr_t attach_to, size_t bias, | |
0ab29cf0 CLT |
798 | struct gomp_coalesce_buf *cbufp, |
799 | bool allow_zero_length_array_sections) | |
5d5be7bf JB |
800 | { |
801 | struct splay_tree_key_s s; | |
802 | size_t size, idx; | |
803 | ||
804 | if (n == NULL) | |
805 | { | |
806 | gomp_mutex_unlock (&devicep->lock); | |
807 | gomp_fatal ("enclosing struct not mapped for attach"); | |
808 | } | |
809 | ||
810 | size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); | |
811 | /* We might have a pointer in a packed struct: however we cannot have more | |
812 | than one such pointer in each pointer-sized portion of the struct, so | |
813 | this is safe. */ | |
814 | idx = (attach_to - n->host_start) / sizeof (void *); | |
815 | ||
816 | if (!n->aux) | |
817 | n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); | |
818 | ||
819 | if (!n->aux->attach_count) | |
820 | n->aux->attach_count | |
821 | = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); | |
822 | ||
823 | if (n->aux->attach_count[idx] < UINTPTR_MAX) | |
824 | n->aux->attach_count[idx]++; | |
825 | else | |
826 | { | |
827 | gomp_mutex_unlock (&devicep->lock); | |
828 | gomp_fatal ("attach count overflow"); | |
829 | } | |
830 | ||
831 | if (n->aux->attach_count[idx] == 1) | |
832 | { | |
833 | uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to | |
834 | - n->host_start; | |
835 | uintptr_t target = (uintptr_t) *(void **) attach_to; | |
836 | splay_tree_key tn; | |
837 | uintptr_t data; | |
838 | ||
839 | if ((void *) target == NULL) | |
840 | { | |
841 | gomp_mutex_unlock (&devicep->lock); | |
842 | gomp_fatal ("attempt to attach null pointer"); | |
843 | } | |
844 | ||
845 | s.host_start = target + bias; | |
846 | s.host_end = s.host_start + 1; | |
847 | tn = splay_tree_lookup (mem_map, &s); | |
848 | ||
849 | if (!tn) | |
850 | { | |
0ab29cf0 CLT |
851 | if (allow_zero_length_array_sections) |
852 | /* When allowing attachment to zero-length array sections, we | |
853 | allow attaching to NULL pointers when the target region is not | |
854 | mapped. */ | |
855 | data = 0; | |
856 | else | |
857 | { | |
858 | gomp_mutex_unlock (&devicep->lock); | |
859 | gomp_fatal ("pointer target not mapped for attach"); | |
860 | } | |
5d5be7bf | 861 | } |
0ab29cf0 CLT |
862 | else |
863 | data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; | |
5d5be7bf JB |
864 | |
865 | gomp_debug (1, | |
866 | "%s: attaching host %p, target %p (struct base %p) to %p\n", | |
867 | __FUNCTION__, (void *) attach_to, (void *) devptr, | |
868 | (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); | |
869 | ||
870 | gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, | |
9c41f5b9 | 871 | sizeof (void *), true, cbufp); |
5d5be7bf JB |
872 | } |
873 | else | |
874 | gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, | |
875 | (void *) attach_to, (int) n->aux->attach_count[idx]); | |
876 | } | |
877 | ||
878 | attribute_hidden void | |
879 | gomp_detach_pointer (struct gomp_device_descr *devicep, | |
880 | struct goacc_asyncqueue *aq, splay_tree_key n, | |
881 | uintptr_t detach_from, bool finalize, | |
882 | struct gomp_coalesce_buf *cbufp) | |
883 | { | |
884 | size_t idx; | |
885 | ||
886 | if (n == NULL) | |
887 | { | |
888 | gomp_mutex_unlock (&devicep->lock); | |
889 | gomp_fatal ("enclosing struct not mapped for detach"); | |
890 | } | |
891 | ||
892 | idx = (detach_from - n->host_start) / sizeof (void *); | |
893 | ||
894 | if (!n->aux || !n->aux->attach_count) | |
895 | { | |
896 | gomp_mutex_unlock (&devicep->lock); | |
897 | gomp_fatal ("no attachment counters for struct"); | |
898 | } | |
899 | ||
900 | if (finalize) | |
901 | n->aux->attach_count[idx] = 1; | |
902 | ||
903 | if (n->aux->attach_count[idx] == 0) | |
904 | { | |
905 | gomp_mutex_unlock (&devicep->lock); | |
906 | gomp_fatal ("attach count underflow"); | |
907 | } | |
908 | else | |
909 | n->aux->attach_count[idx]--; | |
910 | ||
911 | if (n->aux->attach_count[idx] == 0) | |
912 | { | |
913 | uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from | |
914 | - n->host_start; | |
915 | uintptr_t target = (uintptr_t) *(void **) detach_from; | |
916 | ||
917 | gomp_debug (1, | |
918 | "%s: detaching host %p, target %p (struct base %p) to %p\n", | |
919 | __FUNCTION__, (void *) detach_from, (void *) devptr, | |
920 | (void *) (n->tgt->tgt_start + n->tgt_offset), | |
921 | (void *) target); | |
922 | ||
923 | gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, | |
9c41f5b9 | 924 | sizeof (void *), true, cbufp); |
5d5be7bf JB |
925 | } |
926 | else | |
927 | gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, | |
928 | (void *) detach_from, (int) n->aux->attach_count[idx]); | |
929 | } | |
930 | ||
5bcd470b | 931 | attribute_hidden uintptr_t |
e01d41e5 JJ |
932 | gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) |
933 | { | |
934 | if (tgt->list[i].key != NULL) | |
935 | return tgt->list[i].key->tgt->tgt_start | |
936 | + tgt->list[i].key->tgt_offset | |
937 | + tgt->list[i].offset; | |
6c7e076b JB |
938 | |
939 | switch (tgt->list[i].offset) | |
940 | { | |
941 | case OFFSET_INLINED: | |
942 | return (uintptr_t) hostaddrs[i]; | |
943 | ||
944 | case OFFSET_POINTER: | |
945 | return 0; | |
946 | ||
947 | case OFFSET_STRUCT: | |
948 | return tgt->list[i + 1].key->tgt->tgt_start | |
949 | + tgt->list[i + 1].key->tgt_offset | |
950 | + tgt->list[i + 1].offset | |
951 | + (uintptr_t) hostaddrs[i] | |
952 | - (uintptr_t) hostaddrs[i + 1]; | |
953 | ||
954 | default: | |
955 | return tgt->tgt_start + tgt->list[i].offset; | |
956 | } | |
e01d41e5 JJ |
957 | } |
958 | ||
1f4c5b9b CLT |
959 | static inline __attribute__((always_inline)) struct target_mem_desc * |
960 | gomp_map_vars_internal (struct gomp_device_descr *devicep, | |
961 | struct goacc_asyncqueue *aq, size_t mapnum, | |
962 | void **hostaddrs, void **devaddrs, size_t *sizes, | |
963 | void *kinds, bool short_mapkind, | |
275c736e | 964 | htab_t *refcount_set, |
1f4c5b9b | 965 | enum gomp_map_vars_kind pragma_kind) |
1df3f842 JJ |
966 | { |
967 | size_t i, tgt_align, tgt_size, not_found_cnt = 0; | |
d9a6bd32 | 968 | bool has_firstprivate = false; |
972da557 | 969 | bool has_always_ptrset = false; |
275c736e | 970 | bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0; |
d9a6bd32 JJ |
971 | const int rshift = short_mapkind ? 8 : 3; |
972 | const int typemask = short_mapkind ? 0xff : 0x7; | |
a51df54e | 973 | struct splay_tree_s *mem_map = &devicep->mem_map; |
1df3f842 JJ |
974 | struct splay_tree_key_s cur_node; |
975 | struct target_mem_desc *tgt | |
976 | = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); | |
977 | tgt->list_count = mapnum; | |
9e628024 | 978 | tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1; |
1df3f842 | 979 | tgt->device_descr = devicep; |
378da98f | 980 | tgt->prev = NULL; |
7324369a | 981 | struct gomp_coalesce_buf cbuf, *cbufp = NULL; |
1df3f842 JJ |
982 | |
983 | if (mapnum == 0) | |
3837c6d6 IV |
984 | { |
985 | tgt->tgt_start = 0; | |
986 | tgt->tgt_end = 0; | |
987 | return tgt; | |
988 | } | |
1df3f842 JJ |
989 | |
990 | tgt_align = sizeof (void *); | |
991 | tgt_size = 0; | |
7324369a JJ |
992 | cbuf.chunks = NULL; |
993 | cbuf.chunk_cnt = -1; | |
994 | cbuf.use_cnt = 0; | |
995 | cbuf.buf = NULL; | |
f8332e52 | 996 | if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET)) |
7324369a | 997 | { |
a44c1790 TS |
998 | size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); |
999 | cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); | |
7324369a JJ |
1000 | cbuf.chunk_cnt = 0; |
1001 | } | |
f8332e52 | 1002 | if (pragma_kind & GOMP_MAP_VARS_TARGET) |
1df3f842 JJ |
1003 | { |
1004 | size_t align = 4 * sizeof (void *); | |
1005 | tgt_align = align; | |
1006 | tgt_size = mapnum * sizeof (void *); | |
7324369a JJ |
1007 | cbuf.chunk_cnt = 1; |
1008 | cbuf.use_cnt = 1 + (mapnum > 1); | |
a44c1790 TS |
1009 | cbuf.chunks[0].start = 0; |
1010 | cbuf.chunks[0].end = tgt_size; | |
1df3f842 JJ |
1011 | } |
1012 | ||
a51df54e | 1013 | gomp_mutex_lock (&devicep->lock); |
d84ffc0a IV |
1014 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
1015 | { | |
1016 | gomp_mutex_unlock (&devicep->lock); | |
1017 | free (tgt); | |
1018 | return NULL; | |
1019 | } | |
41dbbb37 | 1020 | |
1df3f842 JJ |
1021 | for (i = 0; i < mapnum; i++) |
1022 | { | |
d9a6bd32 | 1023 | int kind = get_kind (short_mapkind, kinds, i); |
b7e20480 | 1024 | bool implicit = get_implicit (short_mapkind, kinds, i); |
d9a6bd32 JJ |
1025 | if (hostaddrs[i] == NULL |
1026 | || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) | |
1df3f842 | 1027 | { |
d9a6bd32 | 1028 | tgt->list[i].key = NULL; |
6c7e076b | 1029 | tgt->list[i].offset = OFFSET_INLINED; |
d9a6bd32 JJ |
1030 | continue; |
1031 | } | |
d5c23c6c TB |
1032 | else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR |
1033 | || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) | |
d9a6bd32 | 1034 | { |
8860d270 JJ |
1035 | tgt->list[i].key = NULL; |
1036 | if (!not_found_cnt) | |
d9a6bd32 | 1037 | { |
7d48e14f JJ |
1038 | /* In OpenMP < 5.0 and OpenACC the mapping has to be done |
1039 | on a separate construct prior to using use_device_{addr,ptr}. | |
1040 | In OpenMP 5.0, map directives need to be ordered by the | |
1041 | middle-end before the use_device_* clauses. If | |
1042 | !not_found_cnt, all mappings requested (if any) are already | |
1043 | mapped, so use_device_{addr,ptr} can be resolved right away. | |
1044 | Otherwise, if not_found_cnt, gomp_map_lookup might fail | |
1045 | now but would succeed after performing the mappings in the | |
1046 | following loop. We can't defer this always to the second | |
1047 | loop, because it is not even invoked when !not_found_cnt | |
1048 | after the first loop. */ | |
1049 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1050 | cur_node.host_end = cur_node.host_start; | |
1051 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
b3b75e66 TS |
1052 | if (n != NULL) |
1053 | { | |
1054 | cur_node.host_start -= n->host_start; | |
1055 | hostaddrs[i] | |
1056 | = (void *) (n->tgt->tgt_start + n->tgt_offset | |
1057 | + cur_node.host_start); | |
1058 | } | |
1059 | else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) | |
7d48e14f JJ |
1060 | { |
1061 | gomp_mutex_unlock (&devicep->lock); | |
1062 | gomp_fatal ("use_device_ptr pointer wasn't mapped"); | |
1063 | } | |
b3b75e66 TS |
1064 | else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) |
1065 | /* If not present, continue using the host address. */ | |
1066 | ; | |
1067 | else | |
1068 | __builtin_unreachable (); | |
1069 | tgt->list[i].offset = OFFSET_INLINED; | |
d9a6bd32 | 1070 | } |
8860d270 JJ |
1071 | else |
1072 | tgt->list[i].offset = 0; | |
d9a6bd32 JJ |
1073 | continue; |
1074 | } | |
1075 | else if ((kind & typemask) == GOMP_MAP_STRUCT) | |
1076 | { | |
1077 | size_t first = i + 1; | |
1078 | size_t last = i + sizes[i]; | |
1079 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1080 | cur_node.host_end = (uintptr_t) hostaddrs[last] | |
1081 | + sizes[last]; | |
1082 | tgt->list[i].key = NULL; | |
6c7e076b | 1083 | tgt->list[i].offset = OFFSET_STRUCT; |
d9a6bd32 JJ |
1084 | splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); |
1085 | if (n == NULL) | |
1086 | { | |
1087 | size_t align = (size_t) 1 << (kind >> rshift); | |
1088 | if (tgt_align < align) | |
1089 | tgt_align = align; | |
7324369a | 1090 | tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; |
d9a6bd32 | 1091 | tgt_size = (tgt_size + align - 1) & ~(align - 1); |
7324369a | 1092 | tgt_size += cur_node.host_end - cur_node.host_start; |
d9a6bd32 JJ |
1093 | not_found_cnt += last - i; |
1094 | for (i = first; i <= last; i++) | |
7324369a JJ |
1095 | { |
1096 | tgt->list[i].key = NULL; | |
d88a6951 TS |
1097 | if (!aq |
1098 | && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) | |
1099 | & typemask)) | |
7324369a JJ |
1100 | gomp_coalesce_buf_add (&cbuf, |
1101 | tgt_size - cur_node.host_end | |
1102 | + (uintptr_t) hostaddrs[i], | |
1103 | sizes[i]); | |
1104 | } | |
d9a6bd32 JJ |
1105 | i--; |
1106 | continue; | |
1107 | } | |
1108 | for (i = first; i <= last; i++) | |
1f4c5b9b | 1109 | gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, |
275c736e | 1110 | sizes, kinds, NULL, refcount_set); |
d9a6bd32 | 1111 | i--; |
1df3f842 JJ |
1112 | continue; |
1113 | } | |
e01d41e5 JJ |
1114 | else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) |
1115 | { | |
1116 | tgt->list[i].key = NULL; | |
6c7e076b | 1117 | tgt->list[i].offset = OFFSET_POINTER; |
e01d41e5 JJ |
1118 | has_firstprivate = true; |
1119 | continue; | |
1120 | } | |
0ab29cf0 CLT |
1121 | else if ((kind & typemask) == GOMP_MAP_ATTACH |
1122 | || ((kind & typemask) | |
1123 | == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)) | |
8e7e71ff JB |
1124 | { |
1125 | tgt->list[i].key = NULL; | |
1126 | has_firstprivate = true; | |
1127 | continue; | |
1128 | } | |
1df3f842 | 1129 | cur_node.host_start = (uintptr_t) hostaddrs[i]; |
aff43ac0 | 1130 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
1df3f842 JJ |
1131 | cur_node.host_end = cur_node.host_start + sizes[i]; |
1132 | else | |
1133 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
d9a6bd32 JJ |
1134 | if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) |
1135 | { | |
1136 | tgt->list[i].key = NULL; | |
1137 | ||
1138 | size_t align = (size_t) 1 << (kind >> rshift); | |
1139 | if (tgt_align < align) | |
1140 | tgt_align = align; | |
1141 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
d88a6951 TS |
1142 | if (!aq) |
1143 | gomp_coalesce_buf_add (&cbuf, tgt_size, | |
1144 | cur_node.host_end - cur_node.host_start); | |
d9a6bd32 JJ |
1145 | tgt_size += cur_node.host_end - cur_node.host_start; |
1146 | has_firstprivate = true; | |
1147 | continue; | |
1148 | } | |
1149 | splay_tree_key n; | |
1150 | if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) | |
1df3f842 | 1151 | { |
e01d41e5 | 1152 | n = gomp_map_0len_lookup (mem_map, &cur_node); |
d9a6bd32 JJ |
1153 | if (!n) |
1154 | { | |
1155 | tgt->list[i].key = NULL; | |
b25ea7ab | 1156 | tgt->list[i].offset = OFFSET_INLINED; |
d9a6bd32 JJ |
1157 | continue; |
1158 | } | |
1df3f842 | 1159 | } |
d9a6bd32 JJ |
1160 | else |
1161 | n = splay_tree_lookup (mem_map, &cur_node); | |
4a38b02b | 1162 | if (n && n->refcount != REFCOUNT_LINK) |
972da557 TB |
1163 | { |
1164 | int always_to_cnt = 0; | |
1165 | if ((kind & typemask) == GOMP_MAP_TO_PSET) | |
1166 | { | |
1b9bdd52 | 1167 | bool has_nullptr = false; |
972da557 TB |
1168 | size_t j; |
1169 | for (j = 0; j < n->tgt->list_count; j++) | |
1170 | if (n->tgt->list[j].key == n) | |
1171 | { | |
1172 | has_nullptr = n->tgt->list[j].has_null_ptr_assoc; | |
1173 | break; | |
1174 | } | |
1175 | if (n->tgt->list_count == 0) | |
1176 | { | |
1177 | /* 'declare target'; assume has_nullptr; it could also be | |
1178 | statically assigned pointer, but that it should be to | |
1179 | the equivalent variable on the host. */ | |
1180 | assert (n->refcount == REFCOUNT_INFINITY); | |
1181 | has_nullptr = true; | |
1182 | } | |
1183 | else | |
1184 | assert (j < n->tgt->list_count); | |
1185 | /* Re-map the data if there is an 'always' modifier or if it a | |
1186 | null pointer was there and non a nonnull has been found; that | |
1187 | permits transparent re-mapping for Fortran array descriptors | |
1188 | which were previously mapped unallocated. */ | |
1189 | for (j = i + 1; j < mapnum; j++) | |
1190 | { | |
1191 | int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; | |
1192 | if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) | |
1193 | && (!has_nullptr | |
1194 | || !GOMP_MAP_POINTER_P (ptr_kind) | |
1195 | || *(void **) hostaddrs[j] == NULL)) | |
1196 | break; | |
1197 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
1198 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
1199 | > cur_node.host_end)) | |
1200 | break; | |
1201 | else | |
1202 | { | |
1203 | has_always_ptrset = true; | |
1204 | ++always_to_cnt; | |
1205 | } | |
1206 | } | |
1207 | } | |
1208 | gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], | |
b7e20480 CLT |
1209 | kind & typemask, always_to_cnt > 0, implicit, |
1210 | NULL, refcount_set); | |
972da557 TB |
1211 | i += always_to_cnt; |
1212 | } | |
1df3f842 JJ |
1213 | else |
1214 | { | |
d9a6bd32 | 1215 | tgt->list[i].key = NULL; |
41dbbb37 | 1216 | |
a6163563 JB |
1217 | if ((kind & typemask) == GOMP_MAP_IF_PRESENT) |
1218 | { | |
1219 | /* Not present, hence, skip entry - including its MAP_POINTER, | |
1220 | when existing. */ | |
199867d0 | 1221 | tgt->list[i].offset = OFFSET_INLINED; |
a6163563 JB |
1222 | if (i + 1 < mapnum |
1223 | && ((typemask & get_kind (short_mapkind, kinds, i + 1)) | |
1224 | == GOMP_MAP_POINTER)) | |
1225 | { | |
1226 | ++i; | |
1227 | tgt->list[i].key = NULL; | |
1228 | tgt->list[i].offset = 0; | |
1229 | } | |
1230 | continue; | |
1231 | } | |
41dbbb37 | 1232 | size_t align = (size_t) 1 << (kind >> rshift); |
1df3f842 JJ |
1233 | not_found_cnt++; |
1234 | if (tgt_align < align) | |
1235 | tgt_align = align; | |
1236 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
d88a6951 TS |
1237 | if (!aq |
1238 | && gomp_to_device_kind_p (kind & typemask)) | |
7324369a JJ |
1239 | gomp_coalesce_buf_add (&cbuf, tgt_size, |
1240 | cur_node.host_end - cur_node.host_start); | |
1df3f842 | 1241 | tgt_size += cur_node.host_end - cur_node.host_start; |
41dbbb37 | 1242 | if ((kind & typemask) == GOMP_MAP_TO_PSET) |
1df3f842 JJ |
1243 | { |
1244 | size_t j; | |
972da557 | 1245 | int kind; |
1df3f842 | 1246 | for (j = i + 1; j < mapnum; j++) |
972da557 TB |
1247 | if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind, |
1248 | kinds, j)) & typemask)) | |
1249 | && !GOMP_MAP_ALWAYS_POINTER_P (kind)) | |
1df3f842 JJ |
1250 | break; |
1251 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
1252 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
1253 | > cur_node.host_end)) | |
1254 | break; | |
1255 | else | |
1256 | { | |
d9a6bd32 | 1257 | tgt->list[j].key = NULL; |
1df3f842 JJ |
1258 | i++; |
1259 | } | |
1260 | } | |
1261 | } | |
1262 | } | |
1263 | ||
41dbbb37 TS |
1264 | if (devaddrs) |
1265 | { | |
1266 | if (mapnum != 1) | |
a51df54e IV |
1267 | { |
1268 | gomp_mutex_unlock (&devicep->lock); | |
1269 | gomp_fatal ("unexpected aggregation"); | |
1270 | } | |
41dbbb37 TS |
1271 | tgt->to_free = devaddrs[0]; |
1272 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
1273 | tgt->tgt_end = tgt->tgt_start + sizes[0]; | |
1274 | } | |
f8332e52 | 1275 | else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET)) |
1df3f842 JJ |
1276 | { |
1277 | /* Allocate tgt_align aligned tgt_size block of memory. */ | |
1278 | /* FIXME: Perhaps change interface to allocate properly aligned | |
1279 | memory. */ | |
1280 | tgt->to_free = devicep->alloc_func (devicep->target_id, | |
1281 | tgt_size + tgt_align - 1); | |
6ce13072 CLT |
1282 | if (!tgt->to_free) |
1283 | { | |
1284 | gomp_mutex_unlock (&devicep->lock); | |
1285 | gomp_fatal ("device memory allocation fail"); | |
1286 | } | |
1287 | ||
1df3f842 JJ |
1288 | tgt->tgt_start = (uintptr_t) tgt->to_free; |
1289 | tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); | |
1290 | tgt->tgt_end = tgt->tgt_start + tgt_size; | |
7324369a JJ |
1291 | |
1292 | if (cbuf.use_cnt == 1) | |
1293 | cbuf.chunk_cnt--; | |
1294 | if (cbuf.chunk_cnt > 0) | |
1295 | { | |
1296 | cbuf.buf | |
a44c1790 | 1297 | = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); |
7324369a JJ |
1298 | if (cbuf.buf) |
1299 | { | |
1300 | cbuf.tgt = tgt; | |
1301 | cbufp = &cbuf; | |
1302 | } | |
1303 | } | |
1df3f842 JJ |
1304 | } |
1305 | else | |
1306 | { | |
1307 | tgt->to_free = NULL; | |
1308 | tgt->tgt_start = 0; | |
1309 | tgt->tgt_end = 0; | |
1310 | } | |
1311 | ||
1312 | tgt_size = 0; | |
f8332e52 | 1313 | if (pragma_kind & GOMP_MAP_VARS_TARGET) |
1df3f842 JJ |
1314 | tgt_size = mapnum * sizeof (void *); |
1315 | ||
1316 | tgt->array = NULL; | |
972da557 | 1317 | if (not_found_cnt || has_firstprivate || has_always_ptrset) |
1df3f842 | 1318 | { |
d9a6bd32 JJ |
1319 | if (not_found_cnt) |
1320 | tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); | |
1df3f842 | 1321 | splay_tree_node array = tgt->array; |
5cafae2c | 1322 | size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; |
d9a6bd32 | 1323 | uintptr_t field_tgt_base = 0; |
275c736e | 1324 | splay_tree_key field_tgt_structelem_first = NULL; |
1df3f842 JJ |
1325 | |
1326 | for (i = 0; i < mapnum; i++) | |
972da557 TB |
1327 | if (has_always_ptrset |
1328 | && tgt->list[i].key | |
1329 | && (get_kind (short_mapkind, kinds, i) & typemask) | |
1330 | == GOMP_MAP_TO_PSET) | |
1331 | { | |
1332 | splay_tree_key k = tgt->list[i].key; | |
1b9bdd52 | 1333 | bool has_nullptr = false; |
972da557 TB |
1334 | size_t j; |
1335 | for (j = 0; j < k->tgt->list_count; j++) | |
1336 | if (k->tgt->list[j].key == k) | |
1337 | { | |
1338 | has_nullptr = k->tgt->list[j].has_null_ptr_assoc; | |
1339 | break; | |
1340 | } | |
1341 | if (k->tgt->list_count == 0) | |
1342 | has_nullptr = true; | |
1343 | else | |
1344 | assert (j < k->tgt->list_count); | |
1345 | ||
1346 | tgt->list[i].has_null_ptr_assoc = false; | |
1347 | for (j = i + 1; j < mapnum; j++) | |
1348 | { | |
1349 | int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; | |
1350 | if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) | |
1351 | && (!has_nullptr | |
1352 | || !GOMP_MAP_POINTER_P (ptr_kind) | |
1353 | || *(void **) hostaddrs[j] == NULL)) | |
1354 | break; | |
1355 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
1356 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
1357 | > k->host_end)) | |
1358 | break; | |
1359 | else | |
1360 | { | |
1361 | if (*(void **) hostaddrs[j] == NULL) | |
1362 | tgt->list[i].has_null_ptr_assoc = true; | |
1363 | tgt->list[j].key = k; | |
1364 | tgt->list[j].copy_from = false; | |
1365 | tgt->list[j].always_copy_from = false; | |
1366 | tgt->list[j].is_attach = false; | |
275c736e | 1367 | gomp_increment_refcount (k, refcount_set); |
972da557 TB |
1368 | gomp_map_pointer (k->tgt, aq, |
1369 | (uintptr_t) *(void **) hostaddrs[j], | |
1370 | k->tgt_offset + ((uintptr_t) hostaddrs[j] | |
1371 | - k->host_start), | |
0ab29cf0 | 1372 | sizes[j], cbufp, false); |
972da557 TB |
1373 | } |
1374 | } | |
1375 | i = j - 1; | |
1376 | } | |
1377 | else if (tgt->list[i].key == NULL) | |
1df3f842 | 1378 | { |
d9a6bd32 | 1379 | int kind = get_kind (short_mapkind, kinds, i); |
b7e20480 | 1380 | bool implicit = get_implicit (short_mapkind, kinds, i); |
1df3f842 JJ |
1381 | if (hostaddrs[i] == NULL) |
1382 | continue; | |
d9a6bd32 JJ |
1383 | switch (kind & typemask) |
1384 | { | |
1385 | size_t align, len, first, last; | |
1386 | splay_tree_key n; | |
1387 | case GOMP_MAP_FIRSTPRIVATE: | |
1388 | align = (size_t) 1 << (kind >> rshift); | |
1389 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
1390 | tgt->list[i].offset = tgt_size; | |
1391 | len = sizes[i]; | |
1f4c5b9b | 1392 | gomp_copy_host2dev (devicep, aq, |
6ce13072 | 1393 | (void *) (tgt->tgt_start + tgt_size), |
9c41f5b9 | 1394 | (void *) hostaddrs[i], len, false, cbufp); |
49d1a2f9 TB |
1395 | /* Save device address in hostaddr to permit latter availablity |
1396 | when doing a deep-firstprivate with pointer attach. */ | |
1397 | hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size); | |
d9a6bd32 | 1398 | tgt_size += len; |
49d1a2f9 TB |
1399 | |
1400 | /* If followed by GOMP_MAP_ATTACH, pointer assign this | |
1401 | firstprivate to hostaddrs[i+1], which is assumed to contain a | |
1402 | device address. */ | |
1403 | if (i + 1 < mapnum | |
1404 | && (GOMP_MAP_ATTACH | |
1405 | == (typemask & get_kind (short_mapkind, kinds, i+1)))) | |
1406 | { | |
1407 | uintptr_t target = (uintptr_t) hostaddrs[i]; | |
1408 | void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1]; | |
e8fec699 TS |
1409 | /* Per |
1410 | <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net> | |
1411 | "OpenMP: Handle descriptors in target's firstprivate [PR104949]" | |
1412 | this probably needs revision for 'aq' usage. */ | |
1413 | assert (!aq); | |
49d1a2f9 TB |
1414 | gomp_copy_host2dev (devicep, aq, devptr, &target, |
1415 | sizeof (void *), false, cbufp); | |
1416 | ++i; | |
1417 | } | |
d9a6bd32 JJ |
1418 | continue; |
1419 | case GOMP_MAP_FIRSTPRIVATE_INT: | |
d9a6bd32 JJ |
1420 | case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: |
1421 | continue; | |
d5c23c6c | 1422 | case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: |
b3b75e66 TS |
1423 | /* The OpenACC 'host_data' construct only allows 'use_device' |
1424 | "mapping" clauses, so in the first loop, 'not_found_cnt' | |
1425 | must always have been zero, so all OpenACC 'use_device' | |
1426 | clauses have already been handled. (We can only easily test | |
1427 | 'use_device' with 'if_present' clause here.) */ | |
1428 | assert (tgt->list[i].offset == OFFSET_INLINED); | |
1429 | /* Nevertheless, FALLTHRU to the normal handling, to keep the | |
1430 | code conceptually simple, similar to the first loop. */ | |
1431 | case GOMP_MAP_USE_DEVICE_PTR: | |
8860d270 JJ |
1432 | if (tgt->list[i].offset == 0) |
1433 | { | |
1434 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1435 | cur_node.host_end = cur_node.host_start; | |
1436 | n = gomp_map_lookup (mem_map, &cur_node); | |
b3b75e66 TS |
1437 | if (n != NULL) |
1438 | { | |
1439 | cur_node.host_start -= n->host_start; | |
1440 | hostaddrs[i] | |
1441 | = (void *) (n->tgt->tgt_start + n->tgt_offset | |
1442 | + cur_node.host_start); | |
1443 | } | |
1444 | else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) | |
8860d270 JJ |
1445 | { |
1446 | gomp_mutex_unlock (&devicep->lock); | |
1447 | gomp_fatal ("use_device_ptr pointer wasn't mapped"); | |
1448 | } | |
b3b75e66 TS |
1449 | else if ((kind & typemask) |
1450 | == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) | |
1451 | /* If not present, continue using the host address. */ | |
1452 | ; | |
1453 | else | |
1454 | __builtin_unreachable (); | |
1455 | tgt->list[i].offset = OFFSET_INLINED; | |
8860d270 JJ |
1456 | } |
1457 | continue; | |
d9a6bd32 JJ |
1458 | case GOMP_MAP_STRUCT: |
1459 | first = i + 1; | |
1460 | last = i + sizes[i]; | |
1461 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1462 | cur_node.host_end = (uintptr_t) hostaddrs[last] | |
1463 | + sizes[last]; | |
1464 | if (tgt->list[first].key != NULL) | |
1465 | continue; | |
1466 | n = splay_tree_lookup (mem_map, &cur_node); | |
1467 | if (n == NULL) | |
1468 | { | |
1469 | size_t align = (size_t) 1 << (kind >> rshift); | |
1470 | tgt_size -= (uintptr_t) hostaddrs[first] | |
1471 | - (uintptr_t) hostaddrs[i]; | |
1472 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
1473 | tgt_size += (uintptr_t) hostaddrs[first] | |
1474 | - (uintptr_t) hostaddrs[i]; | |
1475 | field_tgt_base = (uintptr_t) hostaddrs[first]; | |
1476 | field_tgt_offset = tgt_size; | |
1477 | field_tgt_clear = last; | |
275c736e | 1478 | field_tgt_structelem_first = NULL; |
d9a6bd32 JJ |
1479 | tgt_size += cur_node.host_end |
1480 | - (uintptr_t) hostaddrs[first]; | |
1481 | continue; | |
1482 | } | |
1483 | for (i = first; i <= last; i++) | |
1f4c5b9b | 1484 | gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, |
275c736e | 1485 | sizes, kinds, cbufp, refcount_set); |
d9a6bd32 JJ |
1486 | i--; |
1487 | continue; | |
e01d41e5 JJ |
1488 | case GOMP_MAP_ALWAYS_POINTER: |
1489 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1490 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
1491 | n = splay_tree_lookup (mem_map, &cur_node); | |
1492 | if (n == NULL | |
1493 | || n->host_start > cur_node.host_start | |
1494 | || n->host_end < cur_node.host_end) | |
1495 | { | |
1496 | gomp_mutex_unlock (&devicep->lock); | |
1497 | gomp_fatal ("always pointer not mapped"); | |
1498 | } | |
c7a9655b TB |
1499 | if (i > 0 |
1500 | && ((get_kind (short_mapkind, kinds, i - 1) & typemask) | |
1501 | != GOMP_MAP_ALWAYS_POINTER)) | |
e01d41e5 JJ |
1502 | cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); |
1503 | if (cur_node.tgt_offset) | |
1504 | cur_node.tgt_offset -= sizes[i]; | |
1f4c5b9b | 1505 | gomp_copy_host2dev (devicep, aq, |
6ce13072 CLT |
1506 | (void *) (n->tgt->tgt_start |
1507 | + n->tgt_offset | |
1508 | + cur_node.host_start | |
1509 | - n->host_start), | |
1510 | (void *) &cur_node.tgt_offset, | |
9c41f5b9 | 1511 | sizeof (void *), true, cbufp); |
e01d41e5 JJ |
1512 | cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset |
1513 | + cur_node.host_start - n->host_start; | |
1514 | continue; | |
a6163563 JB |
1515 | case GOMP_MAP_IF_PRESENT: |
1516 | /* Not present - otherwise handled above. Skip over its | |
1517 | MAP_POINTER as well. */ | |
1518 | if (i + 1 < mapnum | |
1519 | && ((typemask & get_kind (short_mapkind, kinds, i + 1)) | |
1520 | == GOMP_MAP_POINTER)) | |
1521 | ++i; | |
1522 | continue; | |
8e7e71ff | 1523 | case GOMP_MAP_ATTACH: |
0ab29cf0 | 1524 | case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: |
8e7e71ff JB |
1525 | { |
1526 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1527 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
1528 | splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); | |
1529 | if (n != NULL) | |
1530 | { | |
1531 | tgt->list[i].key = n; | |
1532 | tgt->list[i].offset = cur_node.host_start - n->host_start; | |
1533 | tgt->list[i].length = n->host_end - n->host_start; | |
1534 | tgt->list[i].copy_from = false; | |
1535 | tgt->list[i].always_copy_from = false; | |
bc4ed079 JB |
1536 | tgt->list[i].is_attach = true; |
1537 | /* OpenACC 'attach'/'detach' doesn't affect | |
1538 | structured/dynamic reference counts ('n->refcount', | |
1539 | 'n->dynamic_refcount'). */ | |
9e628024 | 1540 | |
0ab29cf0 CLT |
1541 | bool zlas |
1542 | = ((kind & typemask) | |
1543 | == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); | |
9e628024 CLT |
1544 | gomp_attach_pointer (devicep, aq, mem_map, n, |
1545 | (uintptr_t) hostaddrs[i], sizes[i], | |
0ab29cf0 | 1546 | cbufp, zlas); |
8e7e71ff | 1547 | } |
9e628024 | 1548 | else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) |
8e7e71ff JB |
1549 | { |
1550 | gomp_mutex_unlock (&devicep->lock); | |
1551 | gomp_fatal ("outer struct not mapped for attach"); | |
1552 | } | |
8e7e71ff JB |
1553 | continue; |
1554 | } | |
d9a6bd32 JJ |
1555 | default: |
1556 | break; | |
1557 | } | |
1df3f842 JJ |
1558 | splay_tree_key k = &array->key; |
1559 | k->host_start = (uintptr_t) hostaddrs[i]; | |
41dbbb37 | 1560 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
1df3f842 JJ |
1561 | k->host_end = k->host_start + sizes[i]; |
1562 | else | |
1563 | k->host_end = k->host_start + sizeof (void *); | |
a51df54e | 1564 | splay_tree_key n = splay_tree_lookup (mem_map, k); |
4a38b02b | 1565 | if (n && n->refcount != REFCOUNT_LINK) |
1f4c5b9b | 1566 | gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], |
b7e20480 | 1567 | kind & typemask, false, implicit, cbufp, |
275c736e | 1568 | refcount_set); |
1df3f842 JJ |
1569 | else |
1570 | { | |
2a656a93 | 1571 | k->aux = NULL; |
4a38b02b IV |
1572 | if (n && n->refcount == REFCOUNT_LINK) |
1573 | { | |
1574 | /* Replace target address of the pointer with target address | |
1575 | of mapped object in the splay tree. */ | |
1576 | splay_tree_remove (mem_map, n); | |
2a656a93 JB |
1577 | k->aux |
1578 | = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); | |
1579 | k->aux->link_key = n; | |
4a38b02b | 1580 | } |
41dbbb37 | 1581 | size_t align = (size_t) 1 << (kind >> rshift); |
d9a6bd32 | 1582 | tgt->list[i].key = k; |
1df3f842 | 1583 | k->tgt = tgt; |
275c736e CLT |
1584 | k->refcount = 0; |
1585 | k->dynamic_refcount = 0; | |
6c7e076b | 1586 | if (field_tgt_clear != FIELD_TGT_EMPTY) |
d9a6bd32 JJ |
1587 | { |
1588 | k->tgt_offset = k->host_start - field_tgt_base | |
1589 | + field_tgt_offset; | |
275c736e CLT |
1590 | if (openmp_p) |
1591 | { | |
1592 | k->refcount = REFCOUNT_STRUCTELEM; | |
1593 | if (field_tgt_structelem_first == NULL) | |
1594 | { | |
1595 | /* Set to first structure element of sequence. */ | |
1596 | k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST; | |
1597 | field_tgt_structelem_first = k; | |
1598 | } | |
1599 | else | |
1600 | /* Point to refcount of leading element, but do not | |
1601 | increment again. */ | |
1602 | k->structelem_refcount_ptr | |
1603 | = &field_tgt_structelem_first->structelem_refcount; | |
1604 | ||
1605 | if (i == field_tgt_clear) | |
1606 | { | |
1607 | k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; | |
1608 | field_tgt_structelem_first = NULL; | |
1609 | } | |
1610 | } | |
d9a6bd32 | 1611 | if (i == field_tgt_clear) |
6c7e076b | 1612 | field_tgt_clear = FIELD_TGT_EMPTY; |
d9a6bd32 JJ |
1613 | } |
1614 | else | |
1615 | { | |
1616 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
1617 | k->tgt_offset = tgt_size; | |
1618 | tgt_size += k->host_end - k->host_start; | |
1619 | } | |
275c736e CLT |
1620 | /* First increment, from 0 to 1. gomp_increment_refcount |
1621 | encapsulates the different increment cases, so use this | |
1622 | instead of directly setting 1 during initialization. */ | |
1623 | gomp_increment_refcount (k, refcount_set); | |
1624 | ||
d9a6bd32 JJ |
1625 | tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); |
1626 | tgt->list[i].always_copy_from | |
1627 | = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); | |
bc4ed079 | 1628 | tgt->list[i].is_attach = false; |
d9a6bd32 JJ |
1629 | tgt->list[i].offset = 0; |
1630 | tgt->list[i].length = k->host_end - k->host_start; | |
1df3f842 JJ |
1631 | tgt->refcount++; |
1632 | array->left = NULL; | |
1633 | array->right = NULL; | |
a51df54e | 1634 | splay_tree_insert (mem_map, array); |
41dbbb37 | 1635 | switch (kind & typemask) |
1df3f842 | 1636 | { |
41dbbb37 TS |
1637 | case GOMP_MAP_ALLOC: |
1638 | case GOMP_MAP_FROM: | |
1639 | case GOMP_MAP_FORCE_ALLOC: | |
1640 | case GOMP_MAP_FORCE_FROM: | |
d9a6bd32 | 1641 | case GOMP_MAP_ALWAYS_FROM: |
1df3f842 | 1642 | break; |
41dbbb37 TS |
1643 | case GOMP_MAP_TO: |
1644 | case GOMP_MAP_TOFROM: | |
1645 | case GOMP_MAP_FORCE_TO: | |
1646 | case GOMP_MAP_FORCE_TOFROM: | |
d9a6bd32 JJ |
1647 | case GOMP_MAP_ALWAYS_TO: |
1648 | case GOMP_MAP_ALWAYS_TOFROM: | |
1f4c5b9b | 1649 | gomp_copy_host2dev (devicep, aq, |
6ce13072 CLT |
1650 | (void *) (tgt->tgt_start |
1651 | + k->tgt_offset), | |
1652 | (void *) k->host_start, | |
9c41f5b9 JB |
1653 | k->host_end - k->host_start, |
1654 | false, cbufp); | |
1df3f842 | 1655 | break; |
41dbbb37 | 1656 | case GOMP_MAP_POINTER: |
0ab29cf0 CLT |
1657 | case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: |
1658 | gomp_map_pointer | |
1659 | (tgt, aq, (uintptr_t) *(void **) k->host_start, | |
1660 | k->tgt_offset, sizes[i], cbufp, | |
1661 | ((kind & typemask) | |
1662 | == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION)); | |
1df3f842 | 1663 | break; |
41dbbb37 | 1664 | case GOMP_MAP_TO_PSET: |
1f4c5b9b | 1665 | gomp_copy_host2dev (devicep, aq, |
6ce13072 CLT |
1666 | (void *) (tgt->tgt_start |
1667 | + k->tgt_offset), | |
1668 | (void *) k->host_start, | |
9c41f5b9 JB |
1669 | k->host_end - k->host_start, |
1670 | false, cbufp); | |
972da557 | 1671 | tgt->list[i].has_null_ptr_assoc = false; |
41dbbb37 | 1672 | |
1df3f842 | 1673 | for (j = i + 1; j < mapnum; j++) |
972da557 TB |
1674 | { |
1675 | int ptr_kind = (get_kind (short_mapkind, kinds, j) | |
1676 | & typemask); | |
1677 | if (!GOMP_MAP_POINTER_P (ptr_kind) | |
1678 | && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)) | |
1679 | break; | |
1680 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
1681 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
1682 | > k->host_end)) | |
1683 | break; | |
1684 | else | |
1685 | { | |
1686 | tgt->list[j].key = k; | |
1687 | tgt->list[j].copy_from = false; | |
1688 | tgt->list[j].always_copy_from = false; | |
1689 | tgt->list[j].is_attach = false; | |
1690 | tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]); | |
275c736e CLT |
1691 | /* For OpenMP, the use of refcount_sets causes |
1692 | errors if we set k->refcount = 1 above but also | |
1693 | increment it again here, for decrementing will | |
1694 | not properly match, since we decrement only once | |
1695 | for each key's refcount. Therefore avoid this | |
1696 | increment for OpenMP constructs. */ | |
1697 | if (!openmp_p) | |
1698 | gomp_increment_refcount (k, refcount_set); | |
972da557 TB |
1699 | gomp_map_pointer (tgt, aq, |
1700 | (uintptr_t) *(void **) hostaddrs[j], | |
1701 | k->tgt_offset | |
1702 | + ((uintptr_t) hostaddrs[j] | |
1703 | - k->host_start), | |
0ab29cf0 | 1704 | sizes[j], cbufp, false); |
972da557 | 1705 | } |
ea4b23d9 | 1706 | } |
972da557 | 1707 | i = j - 1; |
41dbbb37 TS |
1708 | break; |
1709 | case GOMP_MAP_FORCE_PRESENT: | |
38944ec2 TB |
1710 | case GOMP_MAP_ALWAYS_PRESENT_TO: |
1711 | case GOMP_MAP_ALWAYS_PRESENT_FROM: | |
1712 | case GOMP_MAP_ALWAYS_PRESENT_TOFROM: | |
41dbbb37 TS |
1713 | { |
1714 | /* We already looked up the memory region above and it | |
1715 | was missing. */ | |
1716 | size_t size = k->host_end - k->host_start; | |
a51df54e | 1717 | gomp_mutex_unlock (&devicep->lock); |
01c0b3b0 | 1718 | #ifdef HAVE_INTTYPES_H |
38944ec2 TB |
1719 | gomp_fatal ("present clause: not present on the device " |
1720 | "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), " | |
1721 | "dev: %d)", (void *) k->host_start, | |
1722 | (uint64_t) size, (uint64_t) size, | |
1723 | devicep->target_id); | |
01c0b3b0 | 1724 | #else |
38944ec2 TB |
1725 | gomp_fatal ("present clause: not present on the device " |
1726 | "(addr: %p, size: %lu (0x%lx), dev: %d)", | |
1727 | (void *) k->host_start, | |
1728 | (unsigned long) size, (unsigned long) size, | |
1729 | devicep->target_id); | |
01c0b3b0 | 1730 | #endif |
41dbbb37 TS |
1731 | } |
1732 | break; | |
1733 | case GOMP_MAP_FORCE_DEVICEPTR: | |
1734 | assert (k->host_end - k->host_start == sizeof (void *)); | |
1f4c5b9b | 1735 | gomp_copy_host2dev (devicep, aq, |
6ce13072 CLT |
1736 | (void *) (tgt->tgt_start |
1737 | + k->tgt_offset), | |
1738 | (void *) k->host_start, | |
9c41f5b9 | 1739 | sizeof (void *), false, cbufp); |
41dbbb37 TS |
1740 | break; |
1741 | default: | |
a51df54e | 1742 | gomp_mutex_unlock (&devicep->lock); |
41dbbb37 TS |
1743 | gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, |
1744 | kind); | |
1df3f842 | 1745 | } |
4a38b02b | 1746 | |
2a656a93 | 1747 | if (k->aux && k->aux->link_key) |
4a38b02b IV |
1748 | { |
1749 | /* Set link pointer on target to the device address of the | |
1750 | mapped object. */ | |
1751 | void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); | |
84ca08d2 TS |
1752 | /* We intentionally do not use coalescing here, as it's not |
1753 | data allocated by the current call to this function. */ | |
1f4c5b9b | 1754 | gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, |
9c41f5b9 | 1755 | &tgt_addr, sizeof (void *), true, NULL); |
4a38b02b | 1756 | } |
1df3f842 JJ |
1757 | array++; |
1758 | } | |
1759 | } | |
1760 | } | |
41dbbb37 | 1761 | |
f8332e52 | 1762 | if (pragma_kind & GOMP_MAP_VARS_TARGET) |
1df3f842 JJ |
1763 | { |
1764 | for (i = 0; i < mapnum; i++) | |
1765 | { | |
e01d41e5 | 1766 | cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); |
1f4c5b9b | 1767 | gomp_copy_host2dev (devicep, aq, |
6ce13072 | 1768 | (void *) (tgt->tgt_start + i * sizeof (void *)), |
7324369a | 1769 | (void *) &cur_node.tgt_offset, sizeof (void *), |
9c41f5b9 | 1770 | true, cbufp); |
1df3f842 JJ |
1771 | } |
1772 | } | |
1773 | ||
7324369a JJ |
1774 | if (cbufp) |
1775 | { | |
1776 | long c = 0; | |
1777 | for (c = 0; c < cbuf.chunk_cnt; ++c) | |
1f4c5b9b | 1778 | gomp_copy_host2dev (devicep, aq, |
a44c1790 TS |
1779 | (void *) (tgt->tgt_start + cbuf.chunks[c].start), |
1780 | (char *) cbuf.buf + (cbuf.chunks[c].start | |
1781 | - cbuf.chunks[0].start), | |
9c41f5b9 | 1782 | cbuf.chunks[c].end - cbuf.chunks[c].start, |
2b2340e2 TS |
1783 | false, NULL); |
1784 | if (aq) | |
1785 | /* Free once the transfer has completed. */ | |
1786 | devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf); | |
1787 | else | |
1788 | free (cbuf.buf); | |
a44c1790 TS |
1789 | cbuf.buf = NULL; |
1790 | cbufp = NULL; | |
7324369a JJ |
1791 | } |
1792 | ||
d9a6bd32 JJ |
1793 | /* If the variable from "omp target enter data" map-list was already mapped, |
1794 | tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or | |
1795 | gomp_exit_data. */ | |
9e628024 | 1796 | if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0) |
6f5b4b64 | 1797 | { |
d9a6bd32 JJ |
1798 | free (tgt); |
1799 | tgt = NULL; | |
1800 | } | |
1801 | ||
a51df54e | 1802 | gomp_mutex_unlock (&devicep->lock); |
1df3f842 JJ |
1803 | return tgt; |
1804 | } | |
1805 | ||
275c736e | 1806 | static struct target_mem_desc * |
1f4c5b9b CLT |
1807 | gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, |
1808 | void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, | |
275c736e CLT |
1809 | bool short_mapkind, htab_t *refcount_set, |
1810 | enum gomp_map_vars_kind pragma_kind) | |
1f4c5b9b | 1811 | { |
275c736e CLT |
1812 | /* This management of a local refcount_set is for convenience of callers |
1813 | who do not share a refcount_set over multiple map/unmap uses. */ | |
1814 | htab_t local_refcount_set = NULL; | |
1815 | if (refcount_set == NULL) | |
1816 | { | |
1817 | local_refcount_set = htab_create (mapnum); | |
1818 | refcount_set = &local_refcount_set; | |
1819 | } | |
1820 | ||
1821 | struct target_mem_desc *tgt; | |
1822 | tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, | |
1823 | sizes, kinds, short_mapkind, refcount_set, | |
1824 | pragma_kind); | |
1825 | if (local_refcount_set) | |
1826 | htab_free (local_refcount_set); | |
1827 | ||
1828 | return tgt; | |
1f4c5b9b CLT |
1829 | } |
1830 | ||
1831 | attribute_hidden struct target_mem_desc * | |
275c736e CLT |
1832 | goacc_map_vars (struct gomp_device_descr *devicep, |
1833 | struct goacc_asyncqueue *aq, size_t mapnum, | |
1834 | void **hostaddrs, void **devaddrs, size_t *sizes, | |
1835 | void *kinds, bool short_mapkind, | |
1836 | enum gomp_map_vars_kind pragma_kind) | |
1f4c5b9b CLT |
1837 | { |
1838 | return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, | |
275c736e CLT |
1839 | sizes, kinds, short_mapkind, NULL, |
1840 | GOMP_MAP_VARS_OPENACC | pragma_kind); | |
1f4c5b9b CLT |
1841 | } |
1842 | ||
6278b549 | 1843 | static void |
1df3f842 JJ |
1844 | gomp_unmap_tgt (struct target_mem_desc *tgt) |
1845 | { | |
1846 | /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ | |
1847 | if (tgt->tgt_end) | |
6ce13072 | 1848 | gomp_free_device_memory (tgt->device_descr, tgt->to_free); |
1df3f842 JJ |
1849 | |
1850 | free (tgt->array); | |
1851 | free (tgt); | |
1852 | } | |
1853 | ||
1cbd94e8 JB |
1854 | static bool |
1855 | gomp_unref_tgt (void *ptr) | |
829c6349 CLT |
1856 | { |
1857 | bool is_tgt_unmapped = false; | |
1cbd94e8 JB |
1858 | |
1859 | struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; | |
1860 | ||
1861 | if (tgt->refcount > 1) | |
1862 | tgt->refcount--; | |
829c6349 CLT |
1863 | else |
1864 | { | |
1cbd94e8 | 1865 | gomp_unmap_tgt (tgt); |
829c6349 | 1866 | is_tgt_unmapped = true; |
829c6349 | 1867 | } |
1cbd94e8 | 1868 | |
829c6349 CLT |
1869 | return is_tgt_unmapped; |
1870 | } | |
1871 | ||
1f4c5b9b | 1872 | static void |
1cbd94e8 | 1873 | gomp_unref_tgt_void (void *ptr) |
1f4c5b9b | 1874 | { |
1cbd94e8 JB |
1875 | (void) gomp_unref_tgt (ptr); |
1876 | } | |
1f4c5b9b | 1877 | |
275c736e CLT |
1878 | static void |
1879 | gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k) | |
1cbd94e8 | 1880 | { |
275c736e | 1881 | splay_tree_remove (sp, k); |
2a656a93 JB |
1882 | if (k->aux) |
1883 | { | |
1884 | if (k->aux->link_key) | |
275c736e | 1885 | splay_tree_insert (sp, (splay_tree_node) k->aux->link_key); |
5d5be7bf JB |
1886 | if (k->aux->attach_count) |
1887 | free (k->aux->attach_count); | |
2a656a93 JB |
1888 | free (k->aux); |
1889 | k->aux = NULL; | |
1890 | } | |
275c736e CLT |
1891 | } |
1892 | ||
1893 | static inline __attribute__((always_inline)) bool | |
1894 | gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, | |
1895 | struct goacc_asyncqueue *aq) | |
1896 | { | |
1897 | bool is_tgt_unmapped = false; | |
1898 | ||
1899 | if (REFCOUNT_STRUCTELEM_P (k->refcount)) | |
1900 | { | |
1901 | if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false) | |
1902 | /* Infer the splay_tree_key of the first structelem key using the | |
1903 | pointer to the first structleme_refcount. */ | |
1904 | k = (splay_tree_key) ((char *) k->structelem_refcount_ptr | |
1905 | - offsetof (struct splay_tree_key_s, | |
1906 | structelem_refcount)); | |
1907 | assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)); | |
1908 | ||
1909 | /* The array created by gomp_map_vars is an array of splay_tree_nodes, | |
1910 | with the splay_tree_keys embedded inside. */ | |
1911 | splay_tree_node node = | |
1912 | (splay_tree_node) ((char *) k | |
1913 | - offsetof (struct splay_tree_node_s, key)); | |
1914 | while (true) | |
1915 | { | |
1916 | /* Starting from the _FIRST key, and continue for all following | |
1917 | sibling keys. */ | |
1918 | gomp_remove_splay_tree_key (&devicep->mem_map, k); | |
1919 | if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount)) | |
1920 | break; | |
1921 | else | |
1922 | k = &(++node)->key; | |
1923 | } | |
1924 | } | |
1925 | else | |
1926 | gomp_remove_splay_tree_key (&devicep->mem_map, k); | |
1927 | ||
1cbd94e8 JB |
1928 | if (aq) |
1929 | devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, | |
1930 | (void *) k->tgt); | |
1f4c5b9b | 1931 | else |
1cbd94e8 JB |
1932 | is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); |
1933 | return is_tgt_unmapped; | |
1934 | } | |
1935 | ||
1936 | attribute_hidden bool | |
1937 | gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) | |
1938 | { | |
1939 | return gomp_remove_var_internal (devicep, k, NULL); | |
1940 | } | |
1941 | ||
1942 | /* Remove a variable asynchronously. This actually removes the variable | |
1943 | mapping immediately, but retains the linked target_mem_desc until the | |
1944 | asynchronous operation has completed (as it may still refer to target | |
1945 | memory). The device lock must be held before entry, and remains locked on | |
1946 | exit. */ | |
1947 | ||
1948 | attribute_hidden void | |
1949 | gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, | |
1950 | struct goacc_asyncqueue *aq) | |
1951 | { | |
1952 | (void) gomp_remove_var_internal (devicep, k, aq); | |
1f4c5b9b CLT |
1953 | } |
1954 | ||
41dbbb37 TS |
1955 | /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant |
1956 | variables back from device to host: if it is false, it is assumed that this | |
b4557008 | 1957 | has been done already. */ |
41dbbb37 | 1958 | |
1f4c5b9b CLT |
1959 | static inline __attribute__((always_inline)) void |
1960 | gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, | |
275c736e | 1961 | htab_t *refcount_set, struct goacc_asyncqueue *aq) |
1df3f842 JJ |
1962 | { |
1963 | struct gomp_device_descr *devicep = tgt->device_descr; | |
1964 | ||
1965 | if (tgt->list_count == 0) | |
1966 | { | |
1967 | free (tgt); | |
1968 | return; | |
1969 | } | |
1970 | ||
a51df54e | 1971 | gomp_mutex_lock (&devicep->lock); |
d84ffc0a IV |
1972 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
1973 | { | |
1974 | gomp_mutex_unlock (&devicep->lock); | |
1975 | free (tgt->array); | |
1976 | free (tgt); | |
1977 | return; | |
1978 | } | |
41dbbb37 | 1979 | |
1df3f842 | 1980 | size_t i; |
8e7e71ff JB |
1981 | |
1982 | /* We must perform detachments before any copies back to the host. */ | |
1983 | for (i = 0; i < tgt->list_count; i++) | |
1984 | { | |
1985 | splay_tree_key k = tgt->list[i].key; | |
1986 | ||
bc4ed079 | 1987 | if (k != NULL && tgt->list[i].is_attach) |
8e7e71ff JB |
1988 | gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start |
1989 | + tgt->list[i].offset, | |
25bce75c | 1990 | false, NULL); |
8e7e71ff JB |
1991 | } |
1992 | ||
1df3f842 | 1993 | for (i = 0; i < tgt->list_count; i++) |
d9a6bd32 JJ |
1994 | { |
1995 | splay_tree_key k = tgt->list[i].key; | |
1996 | if (k == NULL) | |
1997 | continue; | |
1998 | ||
bc4ed079 JB |
1999 | /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference |
2000 | counts ('n->refcount', 'n->dynamic_refcount'). */ | |
2001 | if (tgt->list[i].is_attach) | |
2002 | continue; | |
2003 | ||
275c736e CLT |
2004 | bool do_copy, do_remove; |
2005 | gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove); | |
d9a6bd32 | 2006 | |
275c736e | 2007 | if ((do_copy && do_copyfrom && tgt->list[i].copy_from) |
d9a6bd32 | 2008 | || tgt->list[i].always_copy_from) |
1f4c5b9b | 2009 | gomp_copy_dev2host (devicep, aq, |
6ce13072 CLT |
2010 | (void *) (k->host_start + tgt->list[i].offset), |
2011 | (void *) (k->tgt->tgt_start + k->tgt_offset | |
2012 | + tgt->list[i].offset), | |
2013 | tgt->list[i].length); | |
275c736e | 2014 | if (do_remove) |
83d1d065 TS |
2015 | { |
2016 | struct target_mem_desc *k_tgt = k->tgt; | |
2017 | bool is_tgt_unmapped = gomp_remove_var (devicep, k); | |
2018 | /* It would be bad if TGT got unmapped while we're still iterating | |
2019 | over its LIST_COUNT, and also expect to use it in the following | |
2020 | code. */ | |
2021 | assert (!is_tgt_unmapped | |
2022 | || k_tgt != tgt); | |
2023 | } | |
d9a6bd32 | 2024 | } |
1df3f842 | 2025 | |
1f4c5b9b | 2026 | if (aq) |
1cbd94e8 | 2027 | devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, |
1f4c5b9b | 2028 | (void *) tgt); |
1df3f842 | 2029 | else |
1f4c5b9b | 2030 | gomp_unref_tgt ((void *) tgt); |
41dbbb37 | 2031 | |
a51df54e | 2032 | gomp_mutex_unlock (&devicep->lock); |
1df3f842 JJ |
2033 | } |
2034 | ||
275c736e CLT |
2035 | static void |
2036 | gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, | |
2037 | htab_t *refcount_set) | |
1f4c5b9b | 2038 | { |
275c736e CLT |
2039 | /* This management of a local refcount_set is for convenience of callers |
2040 | who do not share a refcount_set over multiple map/unmap uses. */ | |
2041 | htab_t local_refcount_set = NULL; | |
2042 | if (refcount_set == NULL) | |
2043 | { | |
2044 | local_refcount_set = htab_create (tgt->list_count); | |
2045 | refcount_set = &local_refcount_set; | |
2046 | } | |
2047 | ||
2048 | gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL); | |
2049 | ||
2050 | if (local_refcount_set) | |
2051 | htab_free (local_refcount_set); | |
1f4c5b9b CLT |
2052 | } |
2053 | ||
2054 | attribute_hidden void | |
275c736e CLT |
2055 | goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, |
2056 | struct goacc_asyncqueue *aq) | |
1f4c5b9b | 2057 | { |
275c736e | 2058 | gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq); |
1f4c5b9b CLT |
2059 | } |
2060 | ||
1df3f842 | 2061 | static void |
a51df54e | 2062 | gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, |
d9a6bd32 | 2063 | size_t *sizes, void *kinds, bool short_mapkind) |
1df3f842 JJ |
2064 | { |
2065 | size_t i; | |
2066 | struct splay_tree_key_s cur_node; | |
d9a6bd32 | 2067 | const int typemask = short_mapkind ? 0xff : 0x7; |
1df3f842 JJ |
2068 | |
2069 | if (!devicep) | |
2070 | return; | |
2071 | ||
2072 | if (mapnum == 0) | |
2073 | return; | |
2074 | ||
a51df54e | 2075 | gomp_mutex_lock (&devicep->lock); |
d84ffc0a IV |
2076 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
2077 | { | |
2078 | gomp_mutex_unlock (&devicep->lock); | |
2079 | return; | |
2080 | } | |
2081 | ||
1df3f842 JJ |
2082 | for (i = 0; i < mapnum; i++) |
2083 | if (sizes[i]) | |
2084 | { | |
2085 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
2086 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
a51df54e | 2087 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); |
1df3f842 JJ |
2088 | if (n) |
2089 | { | |
d9a6bd32 | 2090 | int kind = get_kind (short_mapkind, kinds, i); |
1df3f842 JJ |
2091 | if (n->host_start > cur_node.host_start |
2092 | || n->host_end < cur_node.host_end) | |
a51df54e IV |
2093 | { |
2094 | gomp_mutex_unlock (&devicep->lock); | |
2095 | gomp_fatal ("Trying to update [%p..%p) object when " | |
2096 | "only [%p..%p) is mapped", | |
2097 | (void *) cur_node.host_start, | |
2098 | (void *) cur_node.host_end, | |
2099 | (void *) n->host_start, | |
2100 | (void *) n->host_end); | |
2101 | } | |
6ce13072 | 2102 | |
6c039937 CLT |
2103 | if (n->aux && n->aux->attach_count) |
2104 | { | |
2105 | uintptr_t addr = cur_node.host_start; | |
2106 | while (addr < cur_node.host_end) | |
2107 | { | |
2108 | /* We have to be careful not to overwrite still attached | |
2109 | pointers during host<->device updates. */ | |
2110 | size_t i = (addr - cur_node.host_start) / sizeof (void *); | |
2111 | if (n->aux->attach_count[i] == 0) | |
2112 | { | |
2113 | void *devaddr = (void *) (n->tgt->tgt_start | |
2114 | + n->tgt_offset | |
2115 | + addr - n->host_start); | |
2116 | if (GOMP_MAP_COPY_TO_P (kind & typemask)) | |
2117 | gomp_copy_host2dev (devicep, NULL, | |
2118 | devaddr, (void *) addr, | |
2119 | sizeof (void *), false, NULL); | |
2120 | if (GOMP_MAP_COPY_FROM_P (kind & typemask)) | |
2121 | gomp_copy_dev2host (devicep, NULL, | |
2122 | (void *) addr, devaddr, | |
2123 | sizeof (void *)); | |
2124 | } | |
2125 | addr += sizeof (void *); | |
2126 | } | |
2127 | } | |
2128 | else | |
2129 | { | |
2130 | void *hostaddr = (void *) cur_node.host_start; | |
2131 | void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset | |
2132 | + cur_node.host_start | |
2133 | - n->host_start); | |
2134 | size_t size = cur_node.host_end - cur_node.host_start; | |
2135 | ||
2136 | if (GOMP_MAP_COPY_TO_P (kind & typemask)) | |
2137 | gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, | |
2138 | false, NULL); | |
2139 | if (GOMP_MAP_COPY_FROM_P (kind & typemask)) | |
2140 | gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); | |
2141 | } | |
1df3f842 | 2142 | } |
4ede915d TB |
2143 | else |
2144 | { | |
2145 | int kind = get_kind (short_mapkind, kinds, i); | |
2146 | ||
2147 | if (GOMP_MAP_PRESENT_P (kind)) | |
2148 | { | |
2149 | /* We already looked up the memory region above and it | |
2150 | was missing. */ | |
2151 | gomp_mutex_unlock (&devicep->lock); | |
38944ec2 | 2152 | #ifdef HAVE_INTTYPES_H |
4ede915d | 2153 | gomp_fatal ("present clause: not present on the device " |
38944ec2 TB |
2154 | "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), " |
2155 | "dev: %d)", (void *) hostaddrs[i], | |
2156 | (uint64_t) sizes[i], (uint64_t) sizes[i], | |
2157 | devicep->target_id); | |
2158 | #else | |
2159 | gomp_fatal ("present clause: not present on the device " | |
2160 | "(addr: %p, size: %lu (0x%lx), dev: %d)", | |
2161 | (void *) hostaddrs[i], (unsigned long) sizes[i], | |
2162 | (unsigned long) sizes[i], devicep->target_id); | |
2163 | #endif | |
4ede915d TB |
2164 | } |
2165 | } | |
1df3f842 | 2166 | } |
a51df54e IV |
2167 | gomp_mutex_unlock (&devicep->lock); |
2168 | } | |
2169 | ||
9f2fca56 MV |
2170 | static struct gomp_offload_icv_list * |
2171 | gomp_get_offload_icv_item (int dev_num) | |
2172 | { | |
2173 | struct gomp_offload_icv_list *l = gomp_offload_icv_list; | |
2174 | while (l != NULL && l->device_num != dev_num) | |
2175 | l = l->next; | |
2176 | ||
2177 | return l; | |
2178 | } | |
2179 | ||
2180 | /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values | |
2181 | depending on the device num and the variable hierarchy | |
2182 | (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given | |
2183 | device and thus no item with that device number is contained in | |
2184 | gomp_offload_icv_list, then a new item is created and added to the list. */ | |
2185 | ||
2186 | static struct gomp_offload_icvs * | |
2187 | get_gomp_offload_icvs (int dev_num) | |
2188 | { | |
2189 | struct gomp_icv_list *dev | |
2190 | = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV); | |
2191 | struct gomp_icv_list *all | |
2192 | = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL); | |
2193 | struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num); | |
2194 | struct gomp_offload_icv_list *offload_icvs | |
2195 | = gomp_get_offload_icv_item (dev_num); | |
2196 | ||
2197 | if (offload_icvs != NULL) | |
2198 | return &offload_icvs->icvs; | |
2199 | ||
2200 | struct gomp_offload_icv_list *new | |
2201 | = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list)); | |
2202 | ||
2203 | new->device_num = dev_num; | |
2204 | new->icvs.device_num = dev_num; | |
2205 | new->next = gomp_offload_icv_list; | |
2206 | ||
2207 | if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS)) | |
2208 | new->icvs.nteams = dev_x->icvs.nteams_var; | |
2209 | else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS)) | |
2210 | new->icvs.nteams = dev->icvs.nteams_var; | |
2211 | else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS)) | |
2212 | new->icvs.nteams = all->icvs.nteams_var; | |
2213 | else | |
2214 | new->icvs.nteams = gomp_default_icv_values.nteams_var; | |
2215 | ||
81476bc4 MV |
2216 | if (dev_x != NULL |
2217 | && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) | |
2218 | new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var; | |
2219 | else if (dev != NULL | |
2220 | && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) | |
2221 | new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var; | |
2222 | else if (all != NULL | |
2223 | && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) | |
2224 | new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var; | |
2225 | else | |
2226 | new->icvs.teams_thread_limit | |
2227 | = gomp_default_icv_values.teams_thread_limit_var; | |
2228 | ||
9f2fca56 MV |
2229 | if (dev_x != NULL |
2230 | && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE)) | |
2231 | new->icvs.default_device = dev_x->icvs.default_device_var; | |
2232 | else if (dev != NULL | |
2233 | && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE)) | |
2234 | new->icvs.default_device = dev->icvs.default_device_var; | |
2235 | else if (all != NULL | |
2236 | && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE)) | |
2237 | new->icvs.default_device = all->icvs.default_device_var; | |
2238 | else | |
2239 | new->icvs.default_device = gomp_default_icv_values.default_device_var; | |
2240 | ||
2241 | gomp_offload_icv_list = new; | |
2242 | return &new->icvs; | |
2243 | } | |
2244 | ||
a51df54e IV |
2245 | /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. |
2246 | And insert to splay tree the mapping between addresses from HOST_TABLE and | |
22be2349 NS |
2247 | from loaded target image. We rely in the host and device compiler |
2248 | emitting variable and functions in the same order. */ | |
a51df54e IV |
2249 | |
2250 | static void | |
2a21ff19 | 2251 | gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, |
22be2349 NS |
2252 | const void *host_table, const void *target_data, |
2253 | bool is_register_lock) | |
a51df54e IV |
2254 | { |
2255 | void **host_func_table = ((void ***) host_table)[0]; | |
2256 | void **host_funcs_end = ((void ***) host_table)[1]; | |
2257 | void **host_var_table = ((void ***) host_table)[2]; | |
2258 | void **host_vars_end = ((void ***) host_table)[3]; | |
a49c7d31 KCY |
2259 | void **host_ind_func_table = NULL; |
2260 | void **host_ind_funcs_end = NULL; | |
a51df54e | 2261 | |
a49c7d31 KCY |
2262 | if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)) |
2263 | { | |
2264 | host_ind_func_table = ((void ***) host_table)[4]; | |
2265 | host_ind_funcs_end = ((void ***) host_table)[5]; | |
2266 | } | |
2267 | ||
2268 | /* The func and ind_func tables contain only addresses, the var table | |
2269 | contains addresses and corresponding sizes. */ | |
a51df54e IV |
2270 | int num_funcs = host_funcs_end - host_func_table; |
2271 | int num_vars = (host_vars_end - host_var_table) / 2; | |
a49c7d31 | 2272 | int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table); |
a51df54e IV |
2273 | |
2274 | /* Load image to device and get target addresses for the image. */ | |
2275 | struct addr_pair *target_table = NULL; | |
ea4b23d9 | 2276 | uint64_t *rev_target_fn_table = NULL; |
2a21ff19 NS |
2277 | int i, num_target_entries; |
2278 | ||
ea4b23d9 TB |
2279 | /* With reverse offload, insert also target-host addresses. */ |
2280 | bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD; | |
2281 | ||
2a21ff19 NS |
2282 | num_target_entries |
2283 | = devicep->load_image_func (devicep->target_id, version, | |
ea4b23d9 | 2284 | target_data, &target_table, |
a49c7d31 KCY |
2285 | rev_lookup ? &rev_target_fn_table : NULL, |
2286 | num_ind_funcs | |
2287 | ? (uint64_t *) host_ind_func_table : NULL); | |
a51df54e | 2288 | |
0bac793e | 2289 | if (num_target_entries != num_funcs + num_vars |
9f2fca56 MV |
2290 | /* "+1" due to the additional ICV struct. */ |
2291 | && num_target_entries != num_funcs + num_vars + 1) | |
a51df54e IV |
2292 | { |
2293 | gomp_mutex_unlock (&devicep->lock); | |
2294 | if (is_register_lock) | |
2295 | gomp_mutex_unlock (®ister_lock); | |
2a21ff19 NS |
2296 | gomp_fatal ("Cannot map target functions or variables" |
2297 | " (expected %u, have %u)", num_funcs + num_vars, | |
2298 | num_target_entries); | |
a51df54e IV |
2299 | } |
2300 | ||
2301 | /* Insert host-target address mapping into splay tree. */ | |
2302 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
9f2fca56 MV |
2303 | /* "+1" due to the additional ICV struct. */ |
2304 | tgt->array = gomp_malloc ((num_funcs + num_vars + 1) | |
2305 | * sizeof (*tgt->array)); | |
ea4b23d9 TB |
2306 | if (rev_target_fn_table) |
2307 | tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array)); | |
2308 | else | |
2309 | tgt->rev_array = NULL; | |
d9a6bd32 | 2310 | tgt->refcount = REFCOUNT_INFINITY; |
a51df54e IV |
2311 | tgt->tgt_start = 0; |
2312 | tgt->tgt_end = 0; | |
2313 | tgt->to_free = NULL; | |
2314 | tgt->prev = NULL; | |
2315 | tgt->list_count = 0; | |
2316 | tgt->device_descr = devicep; | |
2317 | splay_tree_node array = tgt->array; | |
ea4b23d9 | 2318 | reverse_splay_tree_node rev_array = tgt->rev_array; |
a51df54e IV |
2319 | |
2320 | for (i = 0; i < num_funcs; i++) | |
2321 | { | |
2322 | splay_tree_key k = &array->key; | |
2323 | k->host_start = (uintptr_t) host_func_table[i]; | |
2324 | k->host_end = k->host_start + 1; | |
2325 | k->tgt = tgt; | |
2326 | k->tgt_offset = target_table[i].start; | |
d9a6bd32 | 2327 | k->refcount = REFCOUNT_INFINITY; |
6f5b4b64 | 2328 | k->dynamic_refcount = 0; |
2a656a93 | 2329 | k->aux = NULL; |
a51df54e IV |
2330 | array->left = NULL; |
2331 | array->right = NULL; | |
2332 | splay_tree_insert (&devicep->mem_map, array); | |
ea4b23d9 TB |
2333 | if (rev_target_fn_table) |
2334 | { | |
2335 | reverse_splay_tree_key k2 = &rev_array->key; | |
2336 | k2->dev = rev_target_fn_table[i]; | |
2337 | k2->k = k; | |
2338 | rev_array->left = NULL; | |
2339 | rev_array->right = NULL; | |
2340 | if (k2->dev != 0) | |
2341 | reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array); | |
2342 | rev_array++; | |
2343 | } | |
a51df54e IV |
2344 | array++; |
2345 | } | |
2346 | ||
4a38b02b IV |
2347 | /* Most significant bit of the size in host and target tables marks |
2348 | "omp declare target link" variables. */ | |
2349 | const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); | |
2350 | const uintptr_t size_mask = ~link_bit; | |
2351 | ||
a51df54e IV |
2352 | for (i = 0; i < num_vars; i++) |
2353 | { | |
2354 | struct addr_pair *target_var = &target_table[num_funcs + i]; | |
4a38b02b | 2355 | uintptr_t target_size = target_var->end - target_var->start; |
4897bb00 | 2356 | bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1]; |
4a38b02b | 2357 | |
4897bb00 | 2358 | if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size) |
a51df54e IV |
2359 | { |
2360 | gomp_mutex_unlock (&devicep->lock); | |
2361 | if (is_register_lock) | |
2362 | gomp_mutex_unlock (®ister_lock); | |
4a38b02b | 2363 | gomp_fatal ("Cannot map target variables (size mismatch)"); |
a51df54e IV |
2364 | } |
2365 | ||
2366 | splay_tree_key k = &array->key; | |
2367 | k->host_start = (uintptr_t) host_var_table[i * 2]; | |
4a38b02b IV |
2368 | k->host_end |
2369 | = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); | |
a51df54e IV |
2370 | k->tgt = tgt; |
2371 | k->tgt_offset = target_var->start; | |
4897bb00 | 2372 | k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; |
6f5b4b64 | 2373 | k->dynamic_refcount = 0; |
2a656a93 | 2374 | k->aux = NULL; |
a51df54e IV |
2375 | array->left = NULL; |
2376 | array->right = NULL; | |
2377 | splay_tree_insert (&devicep->mem_map, array); | |
2378 | array++; | |
2379 | } | |
2380 | ||
9f2fca56 MV |
2381 | /* Last entry is for a ICVs variable. |
2382 | Tolerate case where plugin does not return those entries. */ | |
0bac793e CLT |
2383 | if (num_funcs + num_vars < num_target_entries) |
2384 | { | |
9f2fca56 MV |
2385 | struct addr_pair *var = &target_table[num_funcs + num_vars]; |
2386 | ||
2387 | /* Start address will be non-zero for the ICVs variable if | |
2388 | the variable was found in this image. */ | |
2389 | if (var->start != 0) | |
0bac793e CLT |
2390 | { |
2391 | /* The index of the devicep within devices[] is regarded as its | |
2392 | 'device number', which is different from the per-device type | |
2393 | devicep->target_id. */ | |
9f2fca56 MV |
2394 | int dev_num = (int) (devicep - &devices[0]); |
2395 | struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num); | |
2396 | size_t var_size = var->end - var->start; | |
81476bc4 MV |
2397 | if (var_size != sizeof (struct gomp_offload_icvs)) |
2398 | { | |
2399 | gomp_mutex_unlock (&devicep->lock); | |
2400 | if (is_register_lock) | |
2401 | gomp_mutex_unlock (®ister_lock); | |
2402 | gomp_fatal ("offload plugin managed 'icv struct' not of expected " | |
2403 | "format"); | |
2404 | } | |
9f2fca56 MV |
2405 | /* Copy the ICVs variable to place on device memory, hereby |
2406 | actually designating its device number into effect. */ | |
2407 | gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs, | |
2408 | var_size, false, NULL); | |
2409 | splay_tree_key k = &array->key; | |
2410 | k->host_start = (uintptr_t) icvs; | |
2411 | k->host_end = | |
2412 | k->host_start + (size_mask & sizeof (struct gomp_offload_icvs)); | |
2413 | k->tgt = tgt; | |
2414 | k->tgt_offset = var->start; | |
2415 | k->refcount = REFCOUNT_INFINITY; | |
2416 | k->dynamic_refcount = 0; | |
2417 | k->aux = NULL; | |
2418 | array->left = NULL; | |
2419 | array->right = NULL; | |
2420 | splay_tree_insert (&devicep->mem_map, array); | |
2421 | array++; | |
0bac793e CLT |
2422 | } |
2423 | } | |
2424 | ||
a51df54e | 2425 | free (target_table); |
1df3f842 JJ |
2426 | } |
2427 | ||
22be2349 NS |
2428 | /* Unload the mappings described by target_data from device DEVICE_P. |
2429 | The device must be locked. */ | |
2430 | ||
2431 | static void | |
2432 | gomp_unload_image_from_device (struct gomp_device_descr *devicep, | |
2a21ff19 | 2433 | unsigned version, |
22be2349 NS |
2434 | const void *host_table, const void *target_data) |
2435 | { | |
2436 | void **host_func_table = ((void ***) host_table)[0]; | |
2437 | void **host_funcs_end = ((void ***) host_table)[1]; | |
2438 | void **host_var_table = ((void ***) host_table)[2]; | |
2439 | void **host_vars_end = ((void ***) host_table)[3]; | |
2440 | ||
2441 | /* The func table contains only addresses, the var table contains addresses | |
2442 | and corresponding sizes. */ | |
2443 | int num_funcs = host_funcs_end - host_func_table; | |
2444 | int num_vars = (host_vars_end - host_var_table) / 2; | |
2445 | ||
22be2349 NS |
2446 | struct splay_tree_key_s k; |
2447 | splay_tree_key node = NULL; | |
2448 | ||
2449 | /* Find mapping at start of node array */ | |
2450 | if (num_funcs || num_vars) | |
2451 | { | |
2452 | k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] | |
2453 | : (uintptr_t) host_var_table[0]); | |
2454 | k.host_end = k.host_start + 1; | |
2455 | node = splay_tree_lookup (&devicep->mem_map, &k); | |
2456 | } | |
2a21ff19 | 2457 | |
6ce13072 CLT |
2458 | if (!devicep->unload_image_func (devicep->target_id, version, target_data)) |
2459 | { | |
2460 | gomp_mutex_unlock (&devicep->lock); | |
2461 | gomp_fatal ("image unload fail"); | |
2462 | } | |
ea4b23d9 TB |
2463 | if (devicep->mem_map_rev.root) |
2464 | { | |
2465 | /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only | |
2466 | real allocation. */ | |
2467 | assert (node && node->tgt && node->tgt->rev_array); | |
2468 | assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt); | |
2469 | free (node->tgt->rev_array); | |
2470 | devicep->mem_map_rev.root = NULL; | |
2471 | } | |
22be2349 NS |
2472 | |
2473 | /* Remove mappings from splay tree. */ | |
4a38b02b IV |
2474 | int i; |
2475 | for (i = 0; i < num_funcs; i++) | |
22be2349 | 2476 | { |
4a38b02b | 2477 | k.host_start = (uintptr_t) host_func_table[i]; |
22be2349 NS |
2478 | k.host_end = k.host_start + 1; |
2479 | splay_tree_remove (&devicep->mem_map, &k); | |
2480 | } | |
2481 | ||
4a38b02b IV |
2482 | /* Most significant bit of the size in host and target tables marks |
2483 | "omp declare target link" variables. */ | |
2484 | const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); | |
2485 | const uintptr_t size_mask = ~link_bit; | |
2486 | bool is_tgt_unmapped = false; | |
2487 | ||
2488 | for (i = 0; i < num_vars; i++) | |
22be2349 | 2489 | { |
4a38b02b IV |
2490 | k.host_start = (uintptr_t) host_var_table[i * 2]; |
2491 | k.host_end | |
2492 | = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); | |
2493 | ||
2494 | if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) | |
2495 | splay_tree_remove (&devicep->mem_map, &k); | |
2496 | else | |
2497 | { | |
2498 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); | |
829c6349 | 2499 | is_tgt_unmapped = gomp_remove_var (devicep, n); |
4a38b02b | 2500 | } |
22be2349 NS |
2501 | } |
2502 | ||
4a38b02b | 2503 | if (node && !is_tgt_unmapped) |
22be2349 NS |
2504 | { |
2505 | free (node->tgt); | |
2506 | free (node); | |
2507 | } | |
2508 | } | |
2509 | ||
683f1184 TB |
2510 | static void |
2511 | gomp_requires_to_name (char *buf, size_t size, int requires_mask) | |
2512 | { | |
2513 | char *end = buf + size, *p = buf; | |
2514 | if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS) | |
2515 | p += snprintf (p, end - p, "unified_address"); | |
2516 | if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY) | |
2517 | p += snprintf (p, end - p, "%sunified_shared_memory", | |
2518 | (p == buf ? "" : ", ")); | |
2519 | if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD) | |
2520 | p += snprintf (p, end - p, "%sreverse_offload", | |
2521 | (p == buf ? "" : ", ")); | |
2522 | } | |
2523 | ||
a51df54e | 2524 | /* This function should be called from every offload image while loading. |
1df3f842 | 2525 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
3f05e03d | 2526 | the target, and DATA. */ |
1df3f842 JJ |
2527 | |
2528 | void | |
2a21ff19 | 2529 | GOMP_offload_register_ver (unsigned version, const void *host_table, |
3f05e03d | 2530 | int target_type, const void *data) |
1df3f842 | 2531 | { |
a51df54e | 2532 | int i; |
2a21ff19 NS |
2533 | |
2534 | if (GOMP_VERSION_LIB (version) > GOMP_VERSION) | |
2535 | gomp_fatal ("Library too old for offload (version %u < %u)", | |
2536 | GOMP_VERSION, GOMP_VERSION_LIB (version)); | |
683f1184 | 2537 | |
3f05e03d TS |
2538 | int omp_req; |
2539 | const void *target_data; | |
683f1184 TB |
2540 | if (GOMP_VERSION_LIB (version) > 1) |
2541 | { | |
3f05e03d TS |
2542 | omp_req = (int) (size_t) ((void **) data)[0]; |
2543 | target_data = &((void **) data)[1]; | |
2544 | } | |
2545 | else | |
2546 | { | |
2547 | omp_req = 0; | |
2548 | target_data = data; | |
683f1184 TB |
2549 | } |
2550 | ||
a51df54e IV |
2551 | gomp_mutex_lock (®ister_lock); |
2552 | ||
683f1184 TB |
2553 | if (omp_req && omp_requires_mask && omp_requires_mask != omp_req) |
2554 | { | |
2555 | char buf1[sizeof ("unified_address, unified_shared_memory, " | |
2556 | "reverse_offload")]; | |
2557 | char buf2[sizeof ("unified_address, unified_shared_memory, " | |
2558 | "reverse_offload")]; | |
2559 | gomp_requires_to_name (buf2, sizeof (buf2), | |
2560 | omp_req != GOMP_REQUIRES_TARGET_USED | |
2561 | ? omp_req : omp_requires_mask); | |
2562 | if (omp_req != GOMP_REQUIRES_TARGET_USED | |
2563 | && omp_requires_mask != GOMP_REQUIRES_TARGET_USED) | |
2564 | { | |
2565 | gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask); | |
2566 | gomp_fatal ("OpenMP 'requires' directive with non-identical clauses " | |
2567 | "in multiple compilation units: '%s' vs. '%s'", | |
2568 | buf1, buf2); | |
2569 | } | |
2570 | else | |
2571 | gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in " | |
2572 | "some compilation units", buf2); | |
2573 | } | |
2574 | omp_requires_mask = omp_req; | |
2575 | ||
a51df54e IV |
2576 | /* Load image to all initialized devices. */ |
2577 | for (i = 0; i < num_devices; i++) | |
2578 | { | |
2579 | struct gomp_device_descr *devicep = &devices[i]; | |
2580 | gomp_mutex_lock (&devicep->lock); | |
d84ffc0a IV |
2581 | if (devicep->type == target_type |
2582 | && devicep->state == GOMP_DEVICE_INITIALIZED) | |
2a21ff19 NS |
2583 | gomp_load_image_to_device (devicep, version, |
2584 | host_table, target_data, true); | |
a51df54e IV |
2585 | gomp_mutex_unlock (&devicep->lock); |
2586 | } | |
1df3f842 | 2587 | |
a51df54e IV |
2588 | /* Insert image to array of pending images. */ |
2589 | offload_images | |
2590 | = gomp_realloc_unlock (offload_images, | |
2591 | (num_offload_images + 1) | |
2592 | * sizeof (struct offload_image_descr)); | |
2a21ff19 | 2593 | offload_images[num_offload_images].version = version; |
1df3f842 JJ |
2594 | offload_images[num_offload_images].type = target_type; |
2595 | offload_images[num_offload_images].host_table = host_table; | |
2596 | offload_images[num_offload_images].target_data = target_data; | |
2597 | ||
2598 | num_offload_images++; | |
a51df54e | 2599 | gomp_mutex_unlock (®ister_lock); |
1df3f842 JJ |
2600 | } |
2601 | ||
9ef71453 TS |
2602 | /* Legacy entry point. */ |
2603 | ||
2a21ff19 NS |
2604 | void |
2605 | GOMP_offload_register (const void *host_table, int target_type, | |
2606 | const void *target_data) | |
2607 | { | |
2608 | GOMP_offload_register_ver (0, host_table, target_type, target_data); | |
2609 | } | |
2610 | ||
a51df54e IV |
2611 | /* This function should be called from every offload image while unloading. |
2612 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of | |
3f05e03d | 2613 | the target, and DATA. */ |
1df3f842 | 2614 | |
a51df54e | 2615 | void |
2a21ff19 | 2616 | GOMP_offload_unregister_ver (unsigned version, const void *host_table, |
3f05e03d | 2617 | int target_type, const void *data) |
1df3f842 | 2618 | { |
a51df54e IV |
2619 | int i; |
2620 | ||
3f05e03d TS |
2621 | if (GOMP_VERSION_LIB (version) > GOMP_VERSION) |
2622 | gomp_fatal ("Library too old for offload (version %u < %u)", | |
2623 | GOMP_VERSION, GOMP_VERSION_LIB (version)); | |
2624 | ||
2625 | const void *target_data; | |
2626 | if (GOMP_VERSION_LIB (version) > 1) | |
2627 | target_data = &((void **) data)[1]; | |
2628 | else | |
2629 | target_data = data; | |
2630 | ||
a51df54e IV |
2631 | gomp_mutex_lock (®ister_lock); |
2632 | ||
2633 | /* Unload image from all initialized devices. */ | |
2634 | for (i = 0; i < num_devices; i++) | |
2635 | { | |
a51df54e IV |
2636 | struct gomp_device_descr *devicep = &devices[i]; |
2637 | gomp_mutex_lock (&devicep->lock); | |
d84ffc0a IV |
2638 | if (devicep->type == target_type |
2639 | && devicep->state == GOMP_DEVICE_INITIALIZED) | |
2a21ff19 NS |
2640 | gomp_unload_image_from_device (devicep, version, |
2641 | host_table, target_data); | |
a51df54e IV |
2642 | gomp_mutex_unlock (&devicep->lock); |
2643 | } | |
2644 | ||
2645 | /* Remove image from array of pending images. */ | |
2646 | for (i = 0; i < num_offload_images; i++) | |
2647 | if (offload_images[i].target_data == target_data) | |
2648 | { | |
2649 | offload_images[i] = offload_images[--num_offload_images]; | |
2650 | break; | |
2651 | } | |
2652 | ||
2653 | gomp_mutex_unlock (®ister_lock); | |
41dbbb37 | 2654 | } |
1df3f842 | 2655 | |
9ef71453 TS |
2656 | /* Legacy entry point. */ |
2657 | ||
2a21ff19 NS |
2658 | void |
2659 | GOMP_offload_unregister (const void *host_table, int target_type, | |
2660 | const void *target_data) | |
2661 | { | |
2662 | GOMP_offload_unregister_ver (0, host_table, target_type, target_data); | |
2663 | } | |
2664 | ||
a51df54e IV |
2665 | /* This function initializes the target device, specified by DEVICEP. DEVICEP |
2666 | must be locked on entry, and remains locked on return. */ | |
41dbbb37 TS |
2667 | |
2668 | attribute_hidden void | |
a51df54e | 2669 | gomp_init_device (struct gomp_device_descr *devicep) |
41dbbb37 | 2670 | { |
1df3f842 | 2671 | int i; |
6ce13072 CLT |
2672 | if (!devicep->init_device_func (devicep->target_id)) |
2673 | { | |
2674 | gomp_mutex_unlock (&devicep->lock); | |
2675 | gomp_fatal ("device initialization failed"); | |
2676 | } | |
a51df54e IV |
2677 | |
2678 | /* Load to device all images registered by the moment. */ | |
2679 | for (i = 0; i < num_offload_images; i++) | |
1df3f842 | 2680 | { |
a51df54e IV |
2681 | struct offload_image_descr *image = &offload_images[i]; |
2682 | if (image->type == devicep->type) | |
2a21ff19 NS |
2683 | gomp_load_image_to_device (devicep, image->version, |
2684 | image->host_table, image->target_data, | |
2685 | false); | |
1df3f842 JJ |
2686 | } |
2687 | ||
1f4c5b9b CLT |
2688 | /* Initialize OpenACC asynchronous queues. */ |
2689 | goacc_init_asyncqueues (devicep); | |
2690 | ||
d84ffc0a | 2691 | devicep->state = GOMP_DEVICE_INITIALIZED; |
41dbbb37 TS |
2692 | } |
2693 | ||
1f4c5b9b CLT |
2694 | /* This function finalizes the target device, specified by DEVICEP. DEVICEP |
2695 | must be locked on entry, and remains locked on return. */ | |
2696 | ||
2697 | attribute_hidden bool | |
2698 | gomp_fini_device (struct gomp_device_descr *devicep) | |
2699 | { | |
2700 | bool ret = goacc_fini_asyncqueues (devicep); | |
2701 | ret &= devicep->fini_device_func (devicep->target_id); | |
2702 | devicep->state = GOMP_DEVICE_FINALIZED; | |
2703 | return ret; | |
2704 | } | |
2705 | ||
22be2349 NS |
2706 | attribute_hidden void |
2707 | gomp_unload_device (struct gomp_device_descr *devicep) | |
2708 | { | |
d84ffc0a | 2709 | if (devicep->state == GOMP_DEVICE_INITIALIZED) |
22be2349 NS |
2710 | { |
2711 | unsigned i; | |
d22cd774 | 2712 | |
22be2349 NS |
2713 | /* Unload from device all images registered at the moment. */ |
2714 | for (i = 0; i < num_offload_images; i++) | |
2715 | { | |
2716 | struct offload_image_descr *image = &offload_images[i]; | |
2717 | if (image->type == devicep->type) | |
2a21ff19 NS |
2718 | gomp_unload_image_from_device (devicep, image->version, |
2719 | image->host_table, | |
22be2349 NS |
2720 | image->target_data); |
2721 | } | |
2722 | } | |
2723 | } | |
2724 | ||
e01d41e5 | 2725 | /* Host fallback for GOMP_target{,_ext} routines. */ |
d9a6bd32 JJ |
2726 | |
2727 | static void | |
1bfc07d1 | 2728 | gomp_target_fallback (void (*fn) (void *), void **hostaddrs, |
aea72386 | 2729 | struct gomp_device_descr *devicep, void **args) |
d9a6bd32 JJ |
2730 | { |
2731 | struct gomp_thread old_thr, *thr = gomp_thread (); | |
1bfc07d1 KCY |
2732 | |
2733 | if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY | |
2734 | && devicep != NULL) | |
2735 | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " | |
2736 | "be used for offloading"); | |
2737 | ||
d9a6bd32 JJ |
2738 | old_thr = *thr; |
2739 | memset (thr, '\0', sizeof (*thr)); | |
2740 | if (gomp_places_list) | |
2741 | { | |
2742 | thr->place = old_thr.place; | |
2743 | thr->ts.place_partition_len = gomp_places_list_len; | |
2744 | } | |
aea72386 JJ |
2745 | if (args) |
2746 | while (*args) | |
2747 | { | |
2748 | intptr_t id = (intptr_t) *args++, val; | |
2749 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
2750 | val = (intptr_t) *args++; | |
2751 | else | |
2752 | val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; | |
2753 | if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) | |
2754 | continue; | |
2755 | id &= GOMP_TARGET_ARG_ID_MASK; | |
2756 | if (id != GOMP_TARGET_ARG_THREAD_LIMIT) | |
2757 | continue; | |
2758 | val = val > INT_MAX ? INT_MAX : val; | |
2759 | if (val) | |
2760 | gomp_icv (true)->thread_limit_var = val; | |
2761 | break; | |
2762 | } | |
2763 | ||
d9a6bd32 JJ |
2764 | fn (hostaddrs); |
2765 | gomp_free_thread (thr); | |
2766 | *thr = old_thr; | |
2767 | } | |
2768 | ||
b2b40051 MJ |
2769 | /* Calculate alignment and size requirements of a private copy of data shared |
2770 | as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ | |
e01d41e5 | 2771 | |
b2b40051 MJ |
2772 | static inline void |
2773 | calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, | |
2774 | unsigned short *kinds, size_t *tgt_align, | |
2775 | size_t *tgt_size) | |
e01d41e5 | 2776 | { |
b2b40051 MJ |
2777 | size_t i; |
2778 | for (i = 0; i < mapnum; i++) | |
2779 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
2780 | { | |
2781 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
2782 | if (*tgt_align < align) | |
2783 | *tgt_align = align; | |
2784 | *tgt_size = (*tgt_size + align - 1) & ~(align - 1); | |
2785 | *tgt_size += sizes[i]; | |
2786 | } | |
2787 | } | |
2788 | ||
2789 | /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ | |
2790 | ||
2791 | static inline void | |
2792 | copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, | |
2793 | size_t *sizes, unsigned short *kinds, size_t tgt_align, | |
2794 | size_t tgt_size) | |
2795 | { | |
2796 | uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); | |
2797 | if (al) | |
2798 | tgt += tgt_align - al; | |
2799 | tgt_size = 0; | |
2800 | size_t i; | |
e01d41e5 | 2801 | for (i = 0; i < mapnum; i++) |
bbb7f860 | 2802 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL) |
e01d41e5 JJ |
2803 | { |
2804 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
e01d41e5 | 2805 | tgt_size = (tgt_size + align - 1) & ~(align - 1); |
b2b40051 MJ |
2806 | memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); |
2807 | hostaddrs[i] = tgt + tgt_size; | |
2808 | tgt_size = tgt_size + sizes[i]; | |
49d1a2f9 TB |
2809 | if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH) |
2810 | { | |
2811 | *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i]; | |
2812 | ++i; | |
2813 | } | |
e01d41e5 | 2814 | } |
b2b40051 MJ |
2815 | } |
2816 | ||
e01d41e5 | 2817 | /* Helper function of GOMP_target{,_ext} routines. */ |
d9a6bd32 JJ |
2818 | |
2819 | static void * | |
2820 | gomp_get_target_fn_addr (struct gomp_device_descr *devicep, | |
2821 | void (*host_fn) (void *)) | |
2822 | { | |
2823 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) | |
2824 | return (void *) host_fn; | |
2825 | else | |
2826 | { | |
2827 | gomp_mutex_lock (&devicep->lock); | |
d84ffc0a IV |
2828 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
2829 | { | |
2830 | gomp_mutex_unlock (&devicep->lock); | |
2831 | return NULL; | |
2832 | } | |
2833 | ||
d9a6bd32 JJ |
2834 | struct splay_tree_key_s k; |
2835 | k.host_start = (uintptr_t) host_fn; | |
2836 | k.host_end = k.host_start + 1; | |
2837 | splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); | |
2838 | gomp_mutex_unlock (&devicep->lock); | |
2839 | if (tgt_fn == NULL) | |
eb4048f2 | 2840 | return NULL; |
d9a6bd32 JJ |
2841 | |
2842 | return (void *) tgt_fn->tgt_offset; | |
2843 | } | |
2844 | } | |
2845 | ||
acf0174b | 2846 | /* Called when encountering a target directive. If DEVICE |
41dbbb37 TS |
2847 | is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
2848 | GOMP_DEVICE_HOST_FALLBACK (or any value | |
2849 | larger than last available hw device), use host fallback. | |
128b26dc TS |
2850 | FN is address of host code, UNUSED is part of the current ABI, but |
2851 | we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays | |
acf0174b JJ |
2852 | with MAPNUM entries, with addresses of the host objects, |
2853 | sizes of the host objects (resp. for pointer kind pointer bias | |
2854 | and assumed sizeof (void *) size) and kinds. */ | |
2855 | ||
2856 | void | |
128b26dc | 2857 | GOMP_target (int device, void (*fn) (void *), const void *unused, |
acf0174b JJ |
2858 | size_t mapnum, void **hostaddrs, size_t *sizes, |
2859 | unsigned char *kinds) | |
2860 | { | |
1158fe43 | 2861 | struct gomp_device_descr *devicep = resolve_device (device, true); |
41dbbb37 | 2862 | |
d84ffc0a | 2863 | void *fn_addr; |
41dbbb37 | 2864 | if (devicep == NULL |
d84ffc0a | 2865 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
b2b40051 MJ |
2866 | /* All shared memory devices should use the GOMP_target_ext function. */ |
2867 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM | |
d84ffc0a | 2868 | || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) |
aea72386 | 2869 | return gomp_target_fallback (fn, hostaddrs, devicep, NULL); |
d9a6bd32 | 2870 | |
275c736e | 2871 | htab_t refcount_set = htab_create (mapnum); |
d9a6bd32 JJ |
2872 | struct target_mem_desc *tgt_vars |
2873 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, | |
275c736e | 2874 | &refcount_set, GOMP_MAP_VARS_TARGET); |
b2b40051 MJ |
2875 | devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, |
2876 | NULL); | |
275c736e CLT |
2877 | htab_clear (refcount_set); |
2878 | gomp_unmap_vars (tgt_vars, true, &refcount_set); | |
2879 | htab_free (refcount_set); | |
d9a6bd32 | 2880 | } |
1df3f842 | 2881 | |
001ab12e FH |
2882 | static inline unsigned int |
2883 | clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) | |
2884 | { | |
2885 | /* If we cannot run asynchronously, simply ignore nowait. */ | |
2886 | if (devicep != NULL && devicep->async_run_func == NULL) | |
2887 | flags &= ~GOMP_TARGET_FLAG_NOWAIT; | |
2888 | ||
2889 | return flags; | |
2890 | } | |
2891 | ||
81476bc4 MV |
2892 | static void |
2893 | gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device) | |
2894 | { | |
2895 | struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); | |
2896 | if (item == NULL) | |
2897 | return; | |
2898 | ||
2899 | void *host_ptr = &item->icvs; | |
2900 | void *dev_ptr = omp_get_mapped_ptr (host_ptr, device); | |
2901 | if (dev_ptr != NULL) | |
2902 | gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr, | |
2903 | sizeof (struct gomp_offload_icvs)); | |
2904 | } | |
2905 | ||
e01d41e5 JJ |
2906 | /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, |
2907 | and several arguments have been added: | |
2908 | FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. | |
2909 | DEPEND is array of dependencies, see GOMP_task for details. | |
b2b40051 MJ |
2910 | |
2911 | ARGS is a pointer to an array consisting of a variable number of both | |
2912 | device-independent and device-specific arguments, which can take one two | |
2913 | elements where the first specifies for which device it is intended, the type | |
2914 | and optionally also the value. If the value is not present in the first | |
2915 | one, the whole second element the actual value. The last element of the | |
2916 | array is a single NULL. Among the device independent can be for example | |
2917 | NUM_TEAMS and THREAD_LIMIT. | |
2918 | ||
e01d41e5 JJ |
2919 | NUM_TEAMS is positive if GOMP_teams will be called in the body with |
2920 | that value, or 1 if teams construct is not present, or 0, if | |
2921 | teams construct does not have num_teams clause and so the choice is | |
2922 | implementation defined, and -1 if it can't be determined on the host | |
2923 | what value will GOMP_teams have on the device. | |
2924 | THREAD_LIMIT similarly is positive if GOMP_teams will be called in the | |
2925 | body with that value, or 0, if teams construct does not have thread_limit | |
2926 | clause or the teams construct is not present, or -1 if it can't be | |
2927 | determined on the host what value will GOMP_teams have on the device. */ | |
2928 | ||
d9a6bd32 | 2929 | void |
e01d41e5 JJ |
2930 | GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, |
2931 | void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
b2b40051 | 2932 | unsigned int flags, void **depend, void **args) |
d9a6bd32 | 2933 | { |
1158fe43 | 2934 | struct gomp_device_descr *devicep = resolve_device (device, true); |
21f3a236 JJ |
2935 | size_t tgt_align = 0, tgt_size = 0; |
2936 | bool fpc_done = false; | |
41dbbb37 | 2937 | |
81476bc4 MV |
2938 | /* Obtain the original TEAMS and THREADS values from ARGS. */ |
2939 | intptr_t orig_teams = 1, orig_threads = 0; | |
2940 | size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1; | |
2941 | void **tmpargs = args; | |
2942 | while (*tmpargs) | |
2943 | { | |
2944 | intptr_t id = (intptr_t) *tmpargs++, val; | |
2945 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
2946 | { | |
2947 | val = (intptr_t) *tmpargs++; | |
2948 | len = 2; | |
2949 | } | |
2950 | else | |
2951 | { | |
2952 | val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; | |
2953 | len = 1; | |
2954 | } | |
2955 | num_args += len; | |
2956 | if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) | |
2957 | continue; | |
2958 | val = val > INT_MAX ? INT_MAX : val; | |
2959 | if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS) | |
2960 | { | |
2961 | orig_teams = val; | |
2962 | teams_len = len; | |
2963 | } | |
2964 | else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT) | |
2965 | { | |
2966 | orig_threads = val; | |
2967 | threads_len = len; | |
2968 | } | |
2969 | } | |
2970 | ||
2971 | intptr_t new_teams = orig_teams, new_threads = orig_threads; | |
2972 | /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1. | |
2973 | ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the | |
2974 | value could not be determined. No change. | |
2975 | ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause. | |
2976 | Set device-specific value. | |
2977 | ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause. | |
2978 | No change. */ | |
2979 | if (orig_teams == -2) | |
2980 | new_teams = 1; | |
2981 | else if (orig_teams == 0) | |
2982 | { | |
2983 | struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); | |
2984 | if (item != NULL) | |
2985 | new_teams = item->icvs.nteams; | |
2986 | } | |
2987 | /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS | |
2988 | region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by | |
2989 | e.g. a THREAD_LIMIT clause. */ | |
2990 | if (orig_teams > -2 && orig_threads == 0) | |
2991 | { | |
2992 | struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); | |
2993 | if (item != NULL) | |
2994 | new_threads = item->icvs.teams_thread_limit; | |
2995 | } | |
2996 | ||
2997 | /* Copy and change the arguments list only if TEAMS or THREADS need to be | |
2998 | updated. */ | |
2999 | void **new_args = args; | |
3000 | if (orig_teams != new_teams || orig_threads != new_threads) | |
3001 | { | |
3002 | size_t tms_len = (orig_teams == new_teams | |
3003 | ? teams_len | |
3004 | : (new_teams > -(1 << 15) && new_teams < (1 << 15) | |
3005 | ? 1 : 2)); | |
3006 | size_t ths_len = (orig_threads == new_threads | |
3007 | ? threads_len | |
3008 | : (new_threads > -(1 << 15) && new_threads < (1 << 15) | |
3009 | ? 1 : 2)); | |
3010 | /* One additional item after the last arg must be NULL. */ | |
3011 | size_t new_args_cnt = num_args - teams_len - threads_len + tms_len | |
3012 | + ths_len + 1; | |
3013 | new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*)); | |
3014 | ||
3015 | tmpargs = args; | |
3016 | void **tmp_new_args = new_args; | |
3017 | /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied | |
3018 | too if they have not been changed and skipped otherwise. */ | |
3019 | while (*tmpargs) | |
3020 | { | |
3021 | intptr_t id = (intptr_t) *tmpargs; | |
3022 | if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS | |
3023 | && orig_teams != new_teams) | |
3024 | || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT | |
3025 | && orig_threads != new_threads)) | |
3026 | { | |
3027 | tmpargs++; | |
3028 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
3029 | tmpargs++; | |
3030 | } | |
3031 | else | |
3032 | { | |
3033 | *tmp_new_args++ = *tmpargs++; | |
3034 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
3035 | *tmp_new_args++ = *tmpargs++; | |
3036 | } | |
3037 | } | |
3038 | ||
3039 | /* Add the new TEAMS arg to the new args list if it has been changed. */ | |
3040 | if (orig_teams != new_teams) | |
3041 | { | |
3042 | intptr_t new_val = new_teams; | |
3043 | if (tms_len == 1) | |
3044 | { | |
3045 | new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) | |
3046 | | GOMP_TARGET_ARG_NUM_TEAMS; | |
3047 | *tmp_new_args++ = (void *) new_val; | |
3048 | } | |
3049 | else | |
3050 | { | |
3051 | *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM | |
3052 | | GOMP_TARGET_ARG_NUM_TEAMS); | |
3053 | *tmp_new_args++ = (void *) new_val; | |
3054 | } | |
3055 | } | |
3056 | ||
3057 | /* Add the new THREADS arg to the new args list if it has been changed. */ | |
3058 | if (orig_threads != new_threads) | |
3059 | { | |
3060 | intptr_t new_val = new_threads; | |
3061 | if (ths_len == 1) | |
3062 | { | |
3063 | new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) | |
3064 | | GOMP_TARGET_ARG_THREAD_LIMIT; | |
3065 | *tmp_new_args++ = (void *) new_val; | |
3066 | } | |
3067 | else | |
3068 | { | |
3069 | *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM | |
3070 | | GOMP_TARGET_ARG_THREAD_LIMIT); | |
3071 | *tmp_new_args++ = (void *) new_val; | |
3072 | } | |
3073 | } | |
3074 | ||
3075 | *tmp_new_args = NULL; | |
3076 | } | |
3077 | ||
001ab12e FH |
3078 | flags = clear_unsupported_flags (devicep, flags); |
3079 | ||
e4606348 JJ |
3080 | if (flags & GOMP_TARGET_FLAG_NOWAIT) |
3081 | { | |
3082 | struct gomp_thread *thr = gomp_thread (); | |
3083 | /* Create a team if we don't have any around, as nowait | |
3084 | target tasks make sense to run asynchronously even when | |
3085 | outside of any parallel. */ | |
3086 | if (__builtin_expect (thr->ts.team == NULL, 0)) | |
3087 | { | |
3088 | struct gomp_team *team = gomp_new_team (1); | |
3089 | struct gomp_task *task = thr->task; | |
a58a965e | 3090 | struct gomp_task **implicit_task = &task; |
e4606348 JJ |
3091 | struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; |
3092 | team->prev_ts = thr->ts; | |
3093 | thr->ts.team = team; | |
3094 | thr->ts.team_id = 0; | |
3095 | thr->ts.work_share = &team->work_shares[0]; | |
3096 | thr->ts.last_work_share = NULL; | |
3097 | #ifdef HAVE_SYNC_BUILTINS | |
3098 | thr->ts.single_count = 0; | |
3099 | #endif | |
3100 | thr->ts.static_trip = 0; | |
3101 | thr->task = &team->implicit_task[0]; | |
3102 | gomp_init_task (thr->task, NULL, icv); | |
a58a965e JJ |
3103 | while (*implicit_task |
3104 | && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) | |
3105 | implicit_task = &(*implicit_task)->parent; | |
3106 | if (*implicit_task) | |
e4606348 | 3107 | { |
a58a965e | 3108 | thr->task = *implicit_task; |
e4606348 | 3109 | gomp_end_task (); |
a58a965e | 3110 | free (*implicit_task); |
e4606348 JJ |
3111 | thr->task = &team->implicit_task[0]; |
3112 | } | |
3113 | else | |
3114 | pthread_setspecific (gomp_thread_destructor, thr); | |
a58a965e JJ |
3115 | if (implicit_task != &task) |
3116 | { | |
3117 | *implicit_task = thr->task; | |
3118 | thr->task = task; | |
3119 | } | |
e4606348 JJ |
3120 | } |
3121 | if (thr->ts.team | |
3122 | && !thr->task->final_task) | |
3123 | { | |
3124 | gomp_create_target_task (devicep, fn, mapnum, hostaddrs, | |
81476bc4 | 3125 | sizes, kinds, flags, depend, new_args, |
e4606348 JJ |
3126 | GOMP_TARGET_TASK_BEFORE_MAP); |
3127 | return; | |
3128 | } | |
3129 | } | |
3130 | ||
3131 | /* If there are depend clauses, but nowait is not present | |
3132 | (or we are in a final task), block the parent task until the | |
3133 | dependencies are resolved and then just continue with the rest | |
3134 | of the function as if it is a merged task. */ | |
d9a6bd32 JJ |
3135 | if (depend != NULL) |
3136 | { | |
3137 | struct gomp_thread *thr = gomp_thread (); | |
3138 | if (thr->task && thr->task->depend_hash) | |
21f3a236 JJ |
3139 | { |
3140 | /* If we might need to wait, copy firstprivate now. */ | |
3141 | calculate_firstprivate_requirements (mapnum, sizes, kinds, | |
3142 | &tgt_align, &tgt_size); | |
3143 | if (tgt_align) | |
3144 | { | |
3145 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
3146 | copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, | |
3147 | tgt_align, tgt_size); | |
3148 | } | |
3149 | fpc_done = true; | |
3150 | gomp_task_maybe_wait_for_dependencies (depend); | |
3151 | } | |
d9a6bd32 | 3152 | } |
41dbbb37 | 3153 | |
d84ffc0a | 3154 | void *fn_addr; |
d9a6bd32 | 3155 | if (devicep == NULL |
d84ffc0a | 3156 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
b2b40051 MJ |
3157 | || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) |
3158 | || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) | |
41dbbb37 | 3159 | { |
21f3a236 JJ |
3160 | if (!fpc_done) |
3161 | { | |
3162 | calculate_firstprivate_requirements (mapnum, sizes, kinds, | |
3163 | &tgt_align, &tgt_size); | |
3164 | if (tgt_align) | |
3165 | { | |
3166 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
3167 | copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, | |
3168 | tgt_align, tgt_size); | |
3169 | } | |
3170 | } | |
81476bc4 | 3171 | gomp_target_fallback (fn, hostaddrs, devicep, new_args); |
d9a6bd32 | 3172 | return; |
41dbbb37 | 3173 | } |
1df3f842 | 3174 | |
b2b40051 | 3175 | struct target_mem_desc *tgt_vars; |
275c736e CLT |
3176 | htab_t refcount_set = NULL; |
3177 | ||
b2b40051 MJ |
3178 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
3179 | { | |
21f3a236 JJ |
3180 | if (!fpc_done) |
3181 | { | |
3182 | calculate_firstprivate_requirements (mapnum, sizes, kinds, | |
3183 | &tgt_align, &tgt_size); | |
3184 | if (tgt_align) | |
3185 | { | |
3186 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
3187 | copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, | |
3188 | tgt_align, tgt_size); | |
3189 | } | |
3190 | } | |
b2b40051 MJ |
3191 | tgt_vars = NULL; |
3192 | } | |
3193 | else | |
275c736e CLT |
3194 | { |
3195 | refcount_set = htab_create (mapnum); | |
3196 | tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, | |
3197 | true, &refcount_set, GOMP_MAP_VARS_TARGET); | |
3198 | } | |
b2b40051 MJ |
3199 | devicep->run_func (devicep->target_id, fn_addr, |
3200 | tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, | |
81476bc4 | 3201 | new_args); |
b2b40051 | 3202 | if (tgt_vars) |
275c736e CLT |
3203 | { |
3204 | htab_clear (refcount_set); | |
3205 | gomp_unmap_vars (tgt_vars, true, &refcount_set); | |
3206 | } | |
3207 | if (refcount_set) | |
3208 | htab_free (refcount_set); | |
81476bc4 MV |
3209 | |
3210 | /* Copy back ICVs from device to host. | |
3211 | HOST_PTR is expected to exist since it was added in | |
3212 | gomp_load_image_to_device if not already available. */ | |
3213 | gomp_copy_back_icvs (devicep, device); | |
3214 | ||
acf0174b JJ |
3215 | } |
3216 | ||
ea4b23d9 TB |
3217 | |
3218 | /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid | |
3219 | keeping track of all variable handling - assuming that reverse offload occurs | |
3220 | ony very rarely. Downside is that the reverse search is slow. */ | |
3221 | ||
3222 | struct gomp_splay_tree_rev_lookup_data { | |
3223 | uintptr_t tgt_start; | |
3224 | uintptr_t tgt_end; | |
3225 | splay_tree_key key; | |
3226 | }; | |
3227 | ||
3228 | static int | |
3229 | gomp_splay_tree_rev_lookup (splay_tree_key key, void *d) | |
3230 | { | |
3231 | struct gomp_splay_tree_rev_lookup_data *data; | |
3232 | data = (struct gomp_splay_tree_rev_lookup_data *)d; | |
3233 | uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset; | |
3234 | ||
3235 | if (tgt_start > data->tgt_start || key->tgt->list_count == 0) | |
3236 | return 0; | |
3237 | ||
3238 | size_t j; | |
3239 | for (j = 0; j < key->tgt->list_count; j++) | |
3240 | if (key->tgt->list[j].key == key) | |
3241 | break; | |
3242 | assert (j < key->tgt->list_count); | |
3243 | uintptr_t tgt_end = tgt_start + key->tgt->list[j].length; | |
3244 | ||
3245 | if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end) | |
3246 | || (tgt_end > data->tgt_start && tgt_start < data->tgt_end)) | |
3247 | { | |
3248 | data->key = key; | |
3249 | return 1; | |
3250 | } | |
3251 | return 0; | |
3252 | } | |
3253 | ||
3254 | static inline splay_tree_key | |
3255 | gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end, | |
3256 | bool zero_len) | |
3257 | { | |
3258 | struct gomp_splay_tree_rev_lookup_data data; | |
3259 | data.key = NULL; | |
3260 | data.tgt_start = tgt_start; | |
3261 | data.tgt_end = tgt_end; | |
3262 | ||
3263 | if (tgt_start != tgt_end) | |
3264 | { | |
3265 | splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); | |
3266 | return data.key; | |
3267 | } | |
3268 | ||
3269 | data.tgt_end++; | |
3270 | splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); | |
3271 | if (data.key != NULL || zero_len) | |
3272 | return data.key; | |
3273 | data.tgt_end--; | |
3274 | ||
3275 | data.tgt_start--; | |
3276 | splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); | |
3277 | return data.key; | |
3278 | } | |
3279 | ||
3280 | struct cpy_data | |
3281 | { | |
3282 | uint64_t devaddr; | |
3283 | bool present, aligned; | |
3284 | }; | |
3285 | ||
3286 | ||
3287 | /* Search just mapped reverse-offload data; returns index if found, | |
3288 | otherwise >= n. */ | |
3289 | ||
3290 | static inline int | |
3291 | gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs, | |
3292 | unsigned short *kinds, uint64_t *sizes, size_t n, | |
3293 | uint64_t tgt_start, uint64_t tgt_end) | |
3294 | { | |
3295 | const bool short_mapkind = true; | |
3296 | const int typemask = short_mapkind ? 0xff : 0x7; | |
3297 | size_t i; | |
3298 | for (i = 0; i < n; i++) | |
3299 | { | |
3300 | bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask) | |
3301 | == GOMP_MAP_STRUCT); | |
3302 | uint64_t dev_end; | |
3303 | if (!is_struct) | |
3304 | dev_end = d[i].devaddr + sizes[i]; | |
3305 | else | |
3306 | { | |
3307 | if (i + sizes[i] < n) | |
3308 | dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]]; | |
3309 | else | |
3310 | dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]]; | |
3311 | } | |
3312 | if ((d[i].devaddr == tgt_start && dev_end == tgt_end) | |
3313 | || (dev_end > tgt_start && d[i].devaddr < tgt_end)) | |
3314 | break; | |
3315 | if (is_struct) | |
3316 | i += sizes[i]; | |
3317 | } | |
3318 | return i; | |
3319 | } | |
3320 | ||
3321 | static inline int | |
3322 | gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs, | |
3323 | unsigned short *kinds, uint64_t *sizes, | |
3324 | size_t n, uint64_t tgt_start, uint64_t tgt_end, | |
3325 | bool zero_len) | |
3326 | { | |
3327 | size_t i; | |
3328 | if (tgt_start != tgt_end) | |
3329 | return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, | |
3330 | tgt_start, tgt_end); | |
3331 | tgt_end++; | |
3332 | i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, | |
3333 | tgt_start, tgt_end); | |
3334 | if (i < n || zero_len) | |
3335 | return i; | |
3336 | tgt_end--; | |
3337 | ||
3338 | tgt_start--; | |
3339 | return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, | |
3340 | tgt_start, tgt_end); | |
3341 | } | |
3342 | ||
131d18e9 | 3343 | /* Handle reverse offload. This is called by the device plugins for a |
ea4b23d9 TB |
3344 | reverse offload; it is not called if the outer target runs on the host. |
3345 | The mapping is simplified device-affecting constructs (except for target | |
3346 | with device(ancestor:1)) must not be encountered; in particular not | |
3347 | target (enter/exit) data. */ | |
131d18e9 TB |
3348 | |
3349 | void | |
ea4b23d9 TB |
3350 | gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, |
3351 | uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num, | |
130c2f3c | 3352 | struct goacc_asyncqueue *aq) |
131d18e9 | 3353 | { |
ea4b23d9 TB |
3354 | /* Return early if there is no offload code. */ |
3355 | if (sizeof (OFFLOAD_PLUGINS) == sizeof ("")) | |
3356 | return; | |
3357 | /* Currently, this fails because of calculate_firstprivate_requirements | |
3358 | below; it could be fixed but additional code needs to be updated to | |
3359 | handle 32bit hosts - thus, it is not worthwhile. */ | |
3360 | if (sizeof (void *) != sizeof (uint64_t)) | |
3361 | gomp_fatal ("Reverse offload of 32bit hosts not supported."); | |
3362 | ||
3363 | struct cpy_data *cdata = NULL; | |
3364 | uint64_t *devaddrs; | |
3365 | uint64_t *sizes; | |
3366 | unsigned short *kinds; | |
3367 | const bool short_mapkind = true; | |
3368 | const int typemask = short_mapkind ? 0xff : 0x7; | |
3369 | struct gomp_device_descr *devicep = resolve_device (dev_num, false); | |
3370 | ||
3371 | reverse_splay_tree_key n; | |
3372 | struct reverse_splay_tree_key_s k; | |
3373 | k.dev = fn_ptr; | |
3374 | ||
3375 | gomp_mutex_lock (&devicep->lock); | |
3376 | n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k); | |
3377 | gomp_mutex_unlock (&devicep->lock); | |
3378 | ||
3379 | if (n == NULL) | |
3380 | gomp_fatal ("Cannot find reverse-offload function"); | |
3381 | void (*host_fn)() = (void (*)()) n->k->host_start; | |
3382 | ||
0b1ce70a | 3383 | if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0) |
ea4b23d9 TB |
3384 | { |
3385 | devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr; | |
3386 | sizes = (uint64_t *) (uintptr_t) sizes_ptr; | |
3387 | kinds = (unsigned short *) (uintptr_t) kinds_ptr; | |
3388 | } | |
3389 | else | |
3390 | { | |
3391 | devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t)); | |
3392 | sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t)); | |
3393 | kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short)); | |
130c2f3c TS |
3394 | gomp_copy_dev2host (devicep, aq, devaddrs, |
3395 | (const void *) (uintptr_t) devaddrs_ptr, | |
3396 | mapnum * sizeof (uint64_t)); | |
3397 | gomp_copy_dev2host (devicep, aq, sizes, | |
3398 | (const void *) (uintptr_t) sizes_ptr, | |
3399 | mapnum * sizeof (uint64_t)); | |
3400 | gomp_copy_dev2host (devicep, aq, kinds, | |
3401 | (const void *) (uintptr_t) kinds_ptr, | |
3402 | mapnum * sizeof (unsigned short)); | |
3403 | if (aq && !devicep->openacc.async.synchronize_func (aq)) | |
3404 | exit (EXIT_FAILURE); | |
ea4b23d9 TB |
3405 | } |
3406 | ||
3407 | size_t tgt_align = 0, tgt_size = 0; | |
3408 | ||
3409 | /* If actually executed on 32bit systems, the casts lead to wrong code; | |
3410 | but 32bit with offloading is not supported; see top of this function. */ | |
3411 | calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes, | |
3412 | (void *) (uintptr_t) kinds, | |
3413 | &tgt_align, &tgt_size); | |
3414 | ||
3415 | if (tgt_align) | |
3416 | { | |
3417 | char *tgt = gomp_alloca (tgt_size + tgt_align - 1); | |
3418 | uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); | |
3419 | if (al) | |
3420 | tgt += tgt_align - al; | |
3421 | tgt_size = 0; | |
3422 | for (uint64_t i = 0; i < mapnum; i++) | |
3423 | if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE | |
3424 | && devaddrs[i] != 0) | |
3425 | { | |
3426 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
3427 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
3428 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
3429 | memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], | |
3430 | (size_t) sizes[i]); | |
ea4b23d9 | 3431 | else |
130c2f3c TS |
3432 | { |
3433 | gomp_copy_dev2host (devicep, aq, tgt + tgt_size, | |
3434 | (void *) (uintptr_t) devaddrs[i], | |
3435 | (size_t) sizes[i]); | |
3436 | if (aq && !devicep->openacc.async.synchronize_func (aq)) | |
3437 | exit (EXIT_FAILURE); | |
3438 | } | |
ea4b23d9 TB |
3439 | devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size; |
3440 | tgt_size = tgt_size + sizes[i]; | |
3441 | if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
3442 | && i + 1 < mapnum | |
3443 | && ((get_kind (short_mapkind, kinds, i) & typemask) | |
3444 | == GOMP_MAP_ATTACH)) | |
3445 | { | |
3446 | *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1]) | |
3447 | = (uint64_t) devaddrs[i]; | |
3448 | ++i; | |
3449 | } | |
3450 | } | |
3451 | } | |
3452 | ||
0b1ce70a | 3453 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0) |
ea4b23d9 TB |
3454 | { |
3455 | size_t j, struct_cpy = 0; | |
3456 | splay_tree_key n2; | |
3457 | cdata = gomp_alloca (sizeof (*cdata) * mapnum); | |
3458 | memset (cdata, '\0', sizeof (*cdata) * mapnum); | |
3459 | gomp_mutex_lock (&devicep->lock); | |
3460 | for (uint64_t i = 0; i < mapnum; i++) | |
3461 | { | |
3462 | if (devaddrs[i] == 0) | |
3463 | continue; | |
3464 | n = NULL; | |
3465 | int kind = get_kind (short_mapkind, kinds, i) & typemask; | |
3466 | switch (kind) | |
3467 | { | |
3468 | case GOMP_MAP_FIRSTPRIVATE: | |
3469 | case GOMP_MAP_FIRSTPRIVATE_INT: | |
3470 | continue; | |
3471 | ||
3472 | case GOMP_MAP_DELETE: | |
3473 | case GOMP_MAP_RELEASE: | |
3474 | case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: | |
4ede915d TB |
3475 | /* Assume it is present; look it up - but ignore unless the |
3476 | present clause is there. */ | |
ea4b23d9 TB |
3477 | case GOMP_MAP_ALLOC: |
3478 | case GOMP_MAP_FROM: | |
3479 | case GOMP_MAP_FORCE_ALLOC: | |
3480 | case GOMP_MAP_FORCE_FROM: | |
3481 | case GOMP_MAP_ALWAYS_FROM: | |
3482 | case GOMP_MAP_TO: | |
3483 | case GOMP_MAP_TOFROM: | |
3484 | case GOMP_MAP_FORCE_TO: | |
3485 | case GOMP_MAP_FORCE_TOFROM: | |
3486 | case GOMP_MAP_ALWAYS_TO: | |
3487 | case GOMP_MAP_ALWAYS_TOFROM: | |
38944ec2 | 3488 | case GOMP_MAP_FORCE_PRESENT: |
4ede915d TB |
3489 | case GOMP_MAP_ALWAYS_PRESENT_FROM: |
3490 | case GOMP_MAP_ALWAYS_PRESENT_TO: | |
3491 | case GOMP_MAP_ALWAYS_PRESENT_TOFROM: | |
ea4b23d9 TB |
3492 | case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: |
3493 | cdata[i].devaddr = devaddrs[i]; | |
3494 | bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION | |
3495 | || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION); | |
3496 | j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, | |
3497 | devaddrs[i], | |
3498 | devaddrs[i] + sizes[i], zero_len); | |
3499 | if (j < i) | |
3500 | { | |
3501 | n2 = NULL; | |
3502 | cdata[i].present = true; | |
3503 | devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr; | |
3504 | } | |
3505 | else | |
3506 | { | |
3507 | n2 = gomp_map_rev_lookup (&devicep->mem_map, | |
3508 | devaddrs[i], | |
3509 | devaddrs[i] + sizes[i], zero_len); | |
3510 | cdata[i].present = n2 != NULL; | |
3511 | } | |
4ede915d TB |
3512 | if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind)) |
3513 | { | |
3514 | gomp_mutex_unlock (&devicep->lock); | |
3515 | #ifdef HAVE_INTTYPES_H | |
3516 | gomp_fatal ("present clause: no corresponding data on " | |
3517 | "parent device at %p with size %"PRIu64, | |
3518 | (void *) (uintptr_t) devaddrs[i], | |
3519 | (uint64_t) sizes[i]); | |
3520 | #else | |
3521 | gomp_fatal ("present clause: no corresponding data on " | |
3522 | "parent device at %p with size %lu", | |
3523 | (void *) (uintptr_t) devaddrs[i], | |
3524 | (unsigned long) sizes[i]); | |
3525 | #endif | |
3526 | break; | |
3527 | } | |
3528 | else if (!cdata[i].present | |
ea4b23d9 TB |
3529 | && kind != GOMP_MAP_DELETE |
3530 | && kind != GOMP_MAP_RELEASE | |
3531 | && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) | |
3532 | { | |
3533 | cdata[i].aligned = true; | |
3534 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
3535 | devaddrs[i] | |
3536 | = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, | |
3537 | sizes[i]); | |
3538 | } | |
3539 | else if (n2 != NULL) | |
3540 | devaddrs[i] = (n2->host_start + cdata[i].devaddr | |
3541 | - (n2->tgt->tgt_start + n2->tgt_offset)); | |
3542 | if (((!cdata[i].present || struct_cpy) | |
3543 | && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM)) | |
3544 | || kind == GOMP_MAP_FORCE_TO | |
3545 | || kind == GOMP_MAP_FORCE_TOFROM | |
4ede915d | 3546 | || GOMP_MAP_ALWAYS_TO_P (kind)) |
ea4b23d9 | 3547 | { |
130c2f3c TS |
3548 | gomp_copy_dev2host (devicep, aq, |
3549 | (void *) (uintptr_t) devaddrs[i], | |
3550 | (void *) (uintptr_t) cdata[i].devaddr, | |
3551 | sizes[i]); | |
3552 | if (aq && !devicep->openacc.async.synchronize_func (aq)) | |
3553 | { | |
3554 | gomp_mutex_unlock (&devicep->lock); | |
3555 | exit (EXIT_FAILURE); | |
3556 | } | |
ea4b23d9 TB |
3557 | } |
3558 | if (struct_cpy) | |
3559 | struct_cpy--; | |
3560 | break; | |
3561 | case GOMP_MAP_ATTACH: | |
3562 | case GOMP_MAP_POINTER: | |
3563 | case GOMP_MAP_ALWAYS_POINTER: | |
3564 | n2 = gomp_map_rev_lookup (&devicep->mem_map, | |
3565 | devaddrs[i] + sizes[i], | |
3566 | devaddrs[i] + sizes[i] | |
3567 | + sizeof (void*), false); | |
3568 | cdata[i].present = n2 != NULL; | |
3569 | cdata[i].devaddr = devaddrs[i]; | |
3570 | if (n2) | |
3571 | devaddrs[i] = (n2->host_start + cdata[i].devaddr | |
3572 | - (n2->tgt->tgt_start + n2->tgt_offset)); | |
3573 | else | |
3574 | { | |
3575 | j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, | |
3576 | devaddrs[i] + sizes[i], | |
3577 | devaddrs[i] + sizes[i] | |
3578 | + sizeof (void*), false); | |
3579 | if (j < i) | |
3580 | { | |
3581 | cdata[i].present = true; | |
3582 | devaddrs[i] = (devaddrs[j] + devaddrs[i] | |
3583 | - cdata[j].devaddr); | |
3584 | } | |
3585 | } | |
3586 | if (!cdata[i].present) | |
3587 | devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*)); | |
3588 | /* Assume that when present, the pointer is already correct. */ | |
3589 | if (!n2) | |
3590 | *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i]) | |
3591 | = devaddrs[i-1]; | |
3592 | break; | |
3593 | case GOMP_MAP_TO_PSET: | |
3594 | /* Assume that when present, the pointers are fine and no 'to:' | |
3595 | is required. */ | |
3596 | n2 = gomp_map_rev_lookup (&devicep->mem_map, | |
3597 | devaddrs[i], devaddrs[i] + sizes[i], | |
3598 | false); | |
3599 | cdata[i].present = n2 != NULL; | |
3600 | cdata[i].devaddr = devaddrs[i]; | |
3601 | if (n2) | |
3602 | devaddrs[i] = (n2->host_start + cdata[i].devaddr | |
3603 | - (n2->tgt->tgt_start + n2->tgt_offset)); | |
3604 | else | |
3605 | { | |
3606 | j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, | |
3607 | devaddrs[i], | |
3608 | devaddrs[i] + sizes[i], false); | |
3609 | if (j < i) | |
3610 | { | |
3611 | cdata[i].present = true; | |
3612 | devaddrs[i] = (devaddrs[j] + devaddrs[i] | |
3613 | - cdata[j].devaddr); | |
3614 | } | |
3615 | } | |
3616 | if (!cdata[i].present) | |
3617 | { | |
3618 | cdata[i].aligned = true; | |
3619 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
3620 | devaddrs[i] | |
3621 | = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, | |
3622 | sizes[i]); | |
130c2f3c TS |
3623 | gomp_copy_dev2host (devicep, aq, |
3624 | (void *) (uintptr_t) devaddrs[i], | |
3625 | (void *) (uintptr_t) cdata[i].devaddr, | |
3626 | sizes[i]); | |
3627 | if (aq && !devicep->openacc.async.synchronize_func (aq)) | |
3628 | { | |
3629 | gomp_mutex_unlock (&devicep->lock); | |
3630 | exit (EXIT_FAILURE); | |
3631 | } | |
ea4b23d9 TB |
3632 | } |
3633 | for (j = i + 1; j < mapnum; j++) | |
3634 | { | |
3635 | kind = get_kind (short_mapkind, kinds, j) & typemask; | |
3636 | if (!GOMP_MAP_ALWAYS_POINTER_P (kind) | |
3637 | && !GOMP_MAP_POINTER_P (kind)) | |
3638 | break; | |
3639 | if (devaddrs[j] < devaddrs[i]) | |
3640 | break; | |
3641 | if (cdata[i].present) | |
3642 | continue; | |
3643 | if (devaddrs[j] == 0) | |
3644 | { | |
3645 | *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0; | |
3646 | continue; | |
3647 | } | |
3648 | int k; | |
3649 | n2 = NULL; | |
edaf1d60 TB |
3650 | /* Dereference devaddrs[j] to get the device addr. */ |
3651 | assert (devaddrs[j] - sizes[j] == cdata[i].devaddr); | |
3652 | devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i] | |
3653 | + sizes[j]); | |
3654 | cdata[j].present = true; | |
ea4b23d9 | 3655 | cdata[j].devaddr = devaddrs[j]; |
edaf1d60 TB |
3656 | if (devaddrs[j] == 0) |
3657 | continue; | |
ea4b23d9 TB |
3658 | k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j, |
3659 | devaddrs[j], | |
3660 | devaddrs[j] + sizeof (void*), | |
3661 | false); | |
3662 | if (k < j) | |
3663 | devaddrs[j] = (devaddrs[k] + devaddrs[j] | |
3664 | - cdata[k].devaddr); | |
3665 | else | |
3666 | { | |
3667 | n2 = gomp_map_rev_lookup (&devicep->mem_map, | |
3668 | devaddrs[j], | |
3669 | devaddrs[j] + sizeof (void*), | |
3670 | false); | |
3671 | if (n2 == NULL) | |
3672 | { | |
3673 | gomp_mutex_unlock (&devicep->lock); | |
3674 | gomp_fatal ("Pointer target wasn't mapped"); | |
3675 | } | |
3676 | devaddrs[j] = (n2->host_start + cdata[j].devaddr | |
3677 | - (n2->tgt->tgt_start + n2->tgt_offset)); | |
3678 | } | |
3679 | *(void **) (uintptr_t) (devaddrs[i] + sizes[j]) | |
3680 | = (void *) (uintptr_t) devaddrs[j]; | |
3681 | } | |
3682 | i = j -1; | |
3683 | break; | |
3684 | case GOMP_MAP_STRUCT: | |
3685 | n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1], | |
3686 | devaddrs[i + sizes[i]] | |
3687 | + sizes[i + sizes[i]], false); | |
3688 | cdata[i].present = n2 != NULL; | |
3689 | cdata[i].devaddr = devaddrs[i]; | |
3690 | struct_cpy = cdata[i].present ? 0 : sizes[i]; | |
3691 | if (!n2) | |
3692 | { | |
3693 | size_t sz = (size_t) (devaddrs[i + sizes[i]] | |
3694 | - devaddrs[i+1] | |
3695 | + sizes[i + sizes[i]]); | |
3696 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
3697 | cdata[i].aligned = true; | |
3698 | devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz); | |
3699 | devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr; | |
3700 | } | |
3701 | else | |
3702 | devaddrs[i] = (n2->host_start + cdata[i].devaddr | |
3703 | - (n2->tgt->tgt_start + n2->tgt_offset)); | |
3704 | break; | |
3705 | default: | |
3706 | gomp_mutex_unlock (&devicep->lock); | |
3707 | gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]); | |
3708 | } | |
3709 | } | |
3710 | gomp_mutex_unlock (&devicep->lock); | |
3711 | } | |
3712 | ||
3713 | host_fn (devaddrs); | |
3714 | ||
0b1ce70a | 3715 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0) |
ea4b23d9 TB |
3716 | { |
3717 | uint64_t struct_cpy = 0; | |
3718 | bool clean_struct = false; | |
3719 | for (uint64_t i = 0; i < mapnum; i++) | |
3720 | { | |
3721 | if (cdata[i].devaddr == 0) | |
3722 | continue; | |
3723 | int kind = get_kind (short_mapkind, kinds, i) & typemask; | |
3724 | bool copy = !cdata[i].present || struct_cpy; | |
3725 | switch (kind) | |
3726 | { | |
3727 | case GOMP_MAP_FORCE_FROM: | |
3728 | case GOMP_MAP_FORCE_TOFROM: | |
3729 | case GOMP_MAP_ALWAYS_FROM: | |
3730 | case GOMP_MAP_ALWAYS_TOFROM: | |
4ede915d TB |
3731 | case GOMP_MAP_ALWAYS_PRESENT_FROM: |
3732 | case GOMP_MAP_ALWAYS_PRESENT_TOFROM: | |
ea4b23d9 TB |
3733 | copy = true; |
3734 | /* FALLTHRU */ | |
3735 | case GOMP_MAP_FROM: | |
3736 | case GOMP_MAP_TOFROM: | |
130c2f3c TS |
3737 | if (copy) |
3738 | { | |
3739 | gomp_copy_host2dev (devicep, aq, | |
3740 | (void *) (uintptr_t) cdata[i].devaddr, | |
3741 | (void *) (uintptr_t) devaddrs[i], | |
3742 | sizes[i], false, NULL); | |
3743 | if (aq && !devicep->openacc.async.synchronize_func (aq)) | |
3744 | exit (EXIT_FAILURE); | |
3745 | } | |
ea4b23d9 TB |
3746 | default: |
3747 | break; | |
3748 | } | |
3749 | if (struct_cpy) | |
3750 | { | |
3751 | struct_cpy--; | |
3752 | continue; | |
3753 | } | |
3754 | if (kind == GOMP_MAP_STRUCT && !cdata[i].present) | |
3755 | { | |
3756 | clean_struct = true; | |
3757 | struct_cpy = sizes[i]; | |
3758 | } | |
0b1ce70a | 3759 | else if (!cdata[i].present && cdata[i].aligned) |
ea4b23d9 TB |
3760 | gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]); |
3761 | else if (!cdata[i].present) | |
3762 | free ((void *) (uintptr_t) devaddrs[i]); | |
3763 | } | |
3764 | if (clean_struct) | |
3765 | for (uint64_t i = 0; i < mapnum; i++) | |
3766 | if (!cdata[i].present | |
3767 | && ((get_kind (short_mapkind, kinds, i) & typemask) | |
3768 | == GOMP_MAP_STRUCT)) | |
3769 | { | |
3770 | devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr; | |
3771 | gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]); | |
3772 | } | |
3773 | ||
3774 | free (devaddrs); | |
3775 | free (sizes); | |
3776 | free (kinds); | |
3777 | } | |
131d18e9 TB |
3778 | } |
3779 | ||
e01d41e5 | 3780 | /* Host fallback for GOMP_target_data{,_ext} routines. */ |
d9a6bd32 JJ |
3781 | |
3782 | static void | |
1bfc07d1 | 3783 | gomp_target_data_fallback (struct gomp_device_descr *devicep) |
d9a6bd32 JJ |
3784 | { |
3785 | struct gomp_task_icv *icv = gomp_icv (false); | |
1bfc07d1 KCY |
3786 | |
3787 | if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY | |
3788 | && devicep != NULL) | |
3789 | gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " | |
3790 | "be used for offloading"); | |
3791 | ||
d9a6bd32 JJ |
3792 | if (icv->target_data) |
3793 | { | |
3794 | /* Even when doing a host fallback, if there are any active | |
3795 | #pragma omp target data constructs, need to remember the | |
3796 | new #pragma omp target data, otherwise GOMP_target_end_data | |
3797 | would get out of sync. */ | |
3798 | struct target_mem_desc *tgt | |
3799 | = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, | |
275c736e | 3800 | NULL, GOMP_MAP_VARS_DATA); |
d9a6bd32 JJ |
3801 | tgt->prev = icv->target_data; |
3802 | icv->target_data = tgt; | |
3803 | } | |
3804 | } | |
3805 | ||
acf0174b | 3806 | void |
128b26dc | 3807 | GOMP_target_data (int device, const void *unused, size_t mapnum, |
acf0174b JJ |
3808 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
3809 | { | |
1158fe43 | 3810 | struct gomp_device_descr *devicep = resolve_device (device, true); |
41dbbb37 TS |
3811 | |
3812 | if (devicep == NULL | |
b2b40051 MJ |
3813 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
3814 | || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) | |
1bfc07d1 | 3815 | return gomp_target_data_fallback (devicep); |
41dbbb37 | 3816 | |
1df3f842 | 3817 | struct target_mem_desc *tgt |
41dbbb37 | 3818 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
275c736e | 3819 | NULL, GOMP_MAP_VARS_DATA); |
d9a6bd32 JJ |
3820 | struct gomp_task_icv *icv = gomp_icv (true); |
3821 | tgt->prev = icv->target_data; | |
3822 | icv->target_data = tgt; | |
3823 | } | |
3824 | ||
3825 | void | |
e01d41e5 JJ |
3826 | GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, |
3827 | size_t *sizes, unsigned short *kinds) | |
d9a6bd32 | 3828 | { |
1158fe43 | 3829 | struct gomp_device_descr *devicep = resolve_device (device, true); |
d9a6bd32 JJ |
3830 | |
3831 | if (devicep == NULL | |
b2b40051 MJ |
3832 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
3833 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
1bfc07d1 | 3834 | return gomp_target_data_fallback (devicep); |
d9a6bd32 JJ |
3835 | |
3836 | struct target_mem_desc *tgt | |
3837 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
275c736e | 3838 | NULL, GOMP_MAP_VARS_DATA); |
1df3f842 JJ |
3839 | struct gomp_task_icv *icv = gomp_icv (true); |
3840 | tgt->prev = icv->target_data; | |
3841 | icv->target_data = tgt; | |
acf0174b JJ |
3842 | } |
3843 | ||
3844 | void | |
3845 | GOMP_target_end_data (void) | |
3846 | { | |
1df3f842 JJ |
3847 | struct gomp_task_icv *icv = gomp_icv (false); |
3848 | if (icv->target_data) | |
3849 | { | |
3850 | struct target_mem_desc *tgt = icv->target_data; | |
3851 | icv->target_data = tgt->prev; | |
275c736e | 3852 | gomp_unmap_vars (tgt, true, NULL); |
1df3f842 | 3853 | } |
acf0174b JJ |
3854 | } |
3855 | ||
3856 | void | |
128b26dc | 3857 | GOMP_target_update (int device, const void *unused, size_t mapnum, |
acf0174b JJ |
3858 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
3859 | { | |
1158fe43 | 3860 | struct gomp_device_descr *devicep = resolve_device (device, true); |
41dbbb37 TS |
3861 | |
3862 | if (devicep == NULL | |
b2b40051 MJ |
3863 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
3864 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
1df3f842 JJ |
3865 | return; |
3866 | ||
d9a6bd32 JJ |
3867 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); |
3868 | } | |
3869 | ||
3870 | void | |
e01d41e5 JJ |
3871 | GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, |
3872 | size_t *sizes, unsigned short *kinds, | |
3873 | unsigned int flags, void **depend) | |
d9a6bd32 | 3874 | { |
1158fe43 | 3875 | struct gomp_device_descr *devicep = resolve_device (device, true); |
d9a6bd32 JJ |
3876 | |
3877 | /* If there are depend clauses, but nowait is not present, | |
3878 | block the parent task until the dependencies are resolved | |
3879 | and then just continue with the rest of the function as if it | |
3880 | is a merged task. Until we are able to schedule task during | |
3881 | variable mapping or unmapping, ignore nowait if depend clauses | |
3882 | are not present. */ | |
3883 | if (depend != NULL) | |
3884 | { | |
3885 | struct gomp_thread *thr = gomp_thread (); | |
3886 | if (thr->task && thr->task->depend_hash) | |
3887 | { | |
3888 | if ((flags & GOMP_TARGET_FLAG_NOWAIT) | |
3889 | && thr->ts.team | |
3890 | && !thr->task->final_task) | |
3891 | { | |
e4606348 JJ |
3892 | if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, |
3893 | mapnum, hostaddrs, sizes, kinds, | |
3894 | flags | GOMP_TARGET_FLAG_UPDATE, | |
b2b40051 | 3895 | depend, NULL, GOMP_TARGET_TASK_DATA)) |
e4606348 JJ |
3896 | return; |
3897 | } | |
3898 | else | |
3899 | { | |
3900 | struct gomp_team *team = thr->ts.team; | |
3901 | /* If parallel or taskgroup has been cancelled, don't start new | |
3902 | tasks. */ | |
28567c40 JJ |
3903 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
3904 | { | |
3905 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
3906 | return; | |
3907 | if (thr->task->taskgroup) | |
3908 | { | |
3909 | if (thr->task->taskgroup->cancelled) | |
3910 | return; | |
3911 | if (thr->task->taskgroup->workshare | |
3912 | && thr->task->taskgroup->prev | |
3913 | && thr->task->taskgroup->prev->cancelled) | |
3914 | return; | |
3915 | } | |
3916 | } | |
e4606348 JJ |
3917 | |
3918 | gomp_task_maybe_wait_for_dependencies (depend); | |
d9a6bd32 | 3919 | } |
d9a6bd32 JJ |
3920 | } |
3921 | } | |
3922 | ||
3923 | if (devicep == NULL | |
b2b40051 MJ |
3924 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
3925 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
d9a6bd32 JJ |
3926 | return; |
3927 | ||
3928 | struct gomp_thread *thr = gomp_thread (); | |
3929 | struct gomp_team *team = thr->ts.team; | |
3930 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 JJ |
3931 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
3932 | { | |
3933 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
3934 | return; | |
3935 | if (thr->task->taskgroup) | |
3936 | { | |
3937 | if (thr->task->taskgroup->cancelled) | |
3938 | return; | |
3939 | if (thr->task->taskgroup->workshare | |
3940 | && thr->task->taskgroup->prev | |
3941 | && thr->task->taskgroup->prev->cancelled) | |
3942 | return; | |
3943 | } | |
3944 | } | |
d9a6bd32 JJ |
3945 | |
3946 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); | |
3947 | } | |
3948 | ||
3949 | static void | |
3950 | gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, | |
275c736e CLT |
3951 | void **hostaddrs, size_t *sizes, unsigned short *kinds, |
3952 | htab_t *refcount_set) | |
d9a6bd32 JJ |
3953 | { |
3954 | const int typemask = 0xff; | |
3955 | size_t i; | |
41dbbb37 | 3956 | gomp_mutex_lock (&devicep->lock); |
d84ffc0a IV |
3957 | if (devicep->state == GOMP_DEVICE_FINALIZED) |
3958 | { | |
3959 | gomp_mutex_unlock (&devicep->lock); | |
3960 | return; | |
3961 | } | |
3962 | ||
9e628024 CLT |
3963 | for (i = 0; i < mapnum; i++) |
3964 | if ((kinds[i] & typemask) == GOMP_MAP_DETACH) | |
3965 | { | |
3966 | struct splay_tree_key_s cur_node; | |
3967 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
3968 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
3969 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); | |
3970 | ||
3971 | if (n) | |
3972 | gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i], | |
3973 | false, NULL); | |
3974 | } | |
3975 | ||
275c736e CLT |
3976 | int nrmvars = 0; |
3977 | splay_tree_key remove_vars[mapnum]; | |
3978 | ||
d9a6bd32 JJ |
3979 | for (i = 0; i < mapnum; i++) |
3980 | { | |
3981 | struct splay_tree_key_s cur_node; | |
3982 | unsigned char kind = kinds[i] & typemask; | |
3983 | switch (kind) | |
3984 | { | |
3985 | case GOMP_MAP_FROM: | |
3986 | case GOMP_MAP_ALWAYS_FROM: | |
3987 | case GOMP_MAP_DELETE: | |
3988 | case GOMP_MAP_RELEASE: | |
3989 | case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: | |
3990 | case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: | |
3991 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
3992 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
3993 | splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION | |
3994 | || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) | |
e01d41e5 | 3995 | ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) |
d9a6bd32 JJ |
3996 | : splay_tree_lookup (&devicep->mem_map, &cur_node); |
3997 | if (!k) | |
3998 | continue; | |
3999 | ||
275c736e CLT |
4000 | bool delete_p = (kind == GOMP_MAP_DELETE |
4001 | || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION); | |
4002 | bool do_copy, do_remove; | |
4003 | gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy, | |
4004 | &do_remove); | |
d9a6bd32 | 4005 | |
275c736e | 4006 | if ((kind == GOMP_MAP_FROM && do_copy) |
d9a6bd32 | 4007 | || kind == GOMP_MAP_ALWAYS_FROM) |
6c039937 CLT |
4008 | { |
4009 | if (k->aux && k->aux->attach_count) | |
4010 | { | |
4011 | /* We have to be careful not to overwrite still attached | |
4012 | pointers during the copyback to host. */ | |
4013 | uintptr_t addr = k->host_start; | |
4014 | while (addr < k->host_end) | |
4015 | { | |
4016 | size_t i = (addr - k->host_start) / sizeof (void *); | |
4017 | if (k->aux->attach_count[i] == 0) | |
4018 | gomp_copy_dev2host (devicep, NULL, (void *) addr, | |
4019 | (void *) (k->tgt->tgt_start | |
4020 | + k->tgt_offset | |
4021 | + addr - k->host_start), | |
4022 | sizeof (void *)); | |
4023 | addr += sizeof (void *); | |
4024 | } | |
4025 | } | |
4026 | else | |
4027 | gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, | |
4028 | (void *) (k->tgt->tgt_start + k->tgt_offset | |
4029 | + cur_node.host_start | |
4030 | - k->host_start), | |
4031 | cur_node.host_end - cur_node.host_start); | |
4032 | } | |
275c736e CLT |
4033 | |
4034 | /* Structure elements lists are removed altogether at once, which | |
4035 | may cause immediate deallocation of the target_mem_desc, causing | |
4036 | errors if we still have following element siblings to copy back. | |
4037 | While we're at it, it also seems more disciplined to simply | |
4038 | queue all removals together for processing below. | |
4039 | ||
4040 | Structured block unmapping (i.e. gomp_unmap_vars_internal) should | |
4041 | not have this problem, since they maintain an additional | |
4042 | tgt->refcount = 1 reference to the target_mem_desc to start with. | |
4043 | */ | |
4044 | if (do_remove) | |
4045 | remove_vars[nrmvars++] = k; | |
9e628024 | 4046 | break; |
d9a6bd32 | 4047 | |
9e628024 | 4048 | case GOMP_MAP_DETACH: |
d9a6bd32 JJ |
4049 | break; |
4050 | default: | |
4051 | gomp_mutex_unlock (&devicep->lock); | |
4052 | gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", | |
4053 | kind); | |
4054 | } | |
4055 | } | |
4056 | ||
275c736e CLT |
4057 | for (int i = 0; i < nrmvars; i++) |
4058 | gomp_remove_var (devicep, remove_vars[i]); | |
4059 | ||
41dbbb37 | 4060 | gomp_mutex_unlock (&devicep->lock); |
d9a6bd32 | 4061 | } |
41dbbb37 | 4062 | |
d9a6bd32 JJ |
4063 | void |
4064 | GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, | |
4065 | size_t *sizes, unsigned short *kinds, | |
4066 | unsigned int flags, void **depend) | |
4067 | { | |
1158fe43 | 4068 | struct gomp_device_descr *devicep = resolve_device (device, true); |
d9a6bd32 JJ |
4069 | |
4070 | /* If there are depend clauses, but nowait is not present, | |
4071 | block the parent task until the dependencies are resolved | |
4072 | and then just continue with the rest of the function as if it | |
4073 | is a merged task. Until we are able to schedule task during | |
4074 | variable mapping or unmapping, ignore nowait if depend clauses | |
4075 | are not present. */ | |
4076 | if (depend != NULL) | |
4077 | { | |
4078 | struct gomp_thread *thr = gomp_thread (); | |
4079 | if (thr->task && thr->task->depend_hash) | |
4080 | { | |
4081 | if ((flags & GOMP_TARGET_FLAG_NOWAIT) | |
4082 | && thr->ts.team | |
4083 | && !thr->task->final_task) | |
4084 | { | |
e4606348 JJ |
4085 | if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, |
4086 | mapnum, hostaddrs, sizes, kinds, | |
b2b40051 | 4087 | flags, depend, NULL, |
e4606348 JJ |
4088 | GOMP_TARGET_TASK_DATA)) |
4089 | return; | |
4090 | } | |
4091 | else | |
4092 | { | |
4093 | struct gomp_team *team = thr->ts.team; | |
4094 | /* If parallel or taskgroup has been cancelled, don't start new | |
4095 | tasks. */ | |
28567c40 JJ |
4096 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
4097 | { | |
4098 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
4099 | return; | |
4100 | if (thr->task->taskgroup) | |
4101 | { | |
4102 | if (thr->task->taskgroup->cancelled) | |
4103 | return; | |
4104 | if (thr->task->taskgroup->workshare | |
4105 | && thr->task->taskgroup->prev | |
4106 | && thr->task->taskgroup->prev->cancelled) | |
4107 | return; | |
4108 | } | |
4109 | } | |
e4606348 JJ |
4110 | |
4111 | gomp_task_maybe_wait_for_dependencies (depend); | |
d9a6bd32 | 4112 | } |
d9a6bd32 JJ |
4113 | } |
4114 | } | |
4115 | ||
4116 | if (devicep == NULL | |
b2b40051 MJ |
4117 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4118 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
d9a6bd32 JJ |
4119 | return; |
4120 | ||
4121 | struct gomp_thread *thr = gomp_thread (); | |
4122 | struct gomp_team *team = thr->ts.team; | |
4123 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 JJ |
4124 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
4125 | { | |
4126 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
4127 | return; | |
4128 | if (thr->task->taskgroup) | |
4129 | { | |
4130 | if (thr->task->taskgroup->cancelled) | |
4131 | return; | |
4132 | if (thr->task->taskgroup->workshare | |
4133 | && thr->task->taskgroup->prev | |
4134 | && thr->task->taskgroup->prev->cancelled) | |
4135 | return; | |
4136 | } | |
4137 | } | |
d9a6bd32 | 4138 | |
275c736e CLT |
4139 | htab_t refcount_set = htab_create (mapnum); |
4140 | ||
689418b9 TB |
4141 | /* The variables are mapped separately such that they can be released |
4142 | independently. */ | |
4143 | size_t i, j; | |
d9a6bd32 JJ |
4144 | if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) |
4145 | for (i = 0; i < mapnum; i++) | |
4146 | if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) | |
4147 | { | |
4148 | gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], | |
275c736e CLT |
4149 | &kinds[i], true, &refcount_set, |
4150 | GOMP_MAP_VARS_ENTER_DATA); | |
d9a6bd32 JJ |
4151 | i += sizes[i]; |
4152 | } | |
689418b9 TB |
4153 | else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) |
4154 | { | |
4155 | for (j = i + 1; j < mapnum; j++) | |
972da557 TB |
4156 | if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff) |
4157 | && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff)) | |
689418b9 TB |
4158 | break; |
4159 | gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], | |
275c736e CLT |
4160 | &kinds[i], true, &refcount_set, |
4161 | GOMP_MAP_VARS_ENTER_DATA); | |
689418b9 TB |
4162 | i += j - i - 1; |
4163 | } | |
c7a9655b TB |
4164 | else if (i + 1 < mapnum |
4165 | && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH | |
4166 | || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER | |
4167 | && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER))) | |
9e628024 CLT |
4168 | { |
4169 | /* An attach operation must be processed together with the mapped | |
4170 | base-pointer list item. */ | |
4171 | gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i], | |
275c736e | 4172 | true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); |
9e628024 CLT |
4173 | i += 1; |
4174 | } | |
d9a6bd32 JJ |
4175 | else |
4176 | gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], | |
275c736e | 4177 | true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); |
d9a6bd32 | 4178 | else |
275c736e CLT |
4179 | gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set); |
4180 | htab_free (refcount_set); | |
d9a6bd32 JJ |
4181 | } |
4182 | ||
e4606348 | 4183 | bool |
d9a6bd32 JJ |
4184 | gomp_target_task_fn (void *data) |
4185 | { | |
4186 | struct gomp_target_task *ttask = (struct gomp_target_task *) data; | |
e4606348 JJ |
4187 | struct gomp_device_descr *devicep = ttask->devicep; |
4188 | ||
d9a6bd32 JJ |
4189 | if (ttask->fn != NULL) |
4190 | { | |
d84ffc0a | 4191 | void *fn_addr; |
e4606348 | 4192 | if (devicep == NULL |
d84ffc0a | 4193 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
b2b40051 MJ |
4194 | || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) |
4195 | || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) | |
e4606348 JJ |
4196 | { |
4197 | ttask->state = GOMP_TARGET_TASK_FALLBACK; | |
aea72386 JJ |
4198 | gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep, |
4199 | ttask->args); | |
e4606348 JJ |
4200 | return false; |
4201 | } | |
4202 | ||
4203 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
4204 | { | |
b2b40051 | 4205 | if (ttask->tgt) |
275c736e | 4206 | gomp_unmap_vars (ttask->tgt, true, NULL); |
e4606348 JJ |
4207 | return false; |
4208 | } | |
4209 | ||
b2b40051 MJ |
4210 | void *actual_arguments; |
4211 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
4212 | { | |
4213 | ttask->tgt = NULL; | |
b2b40051 MJ |
4214 | actual_arguments = ttask->hostaddrs; |
4215 | } | |
4216 | else | |
4217 | { | |
4218 | ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, | |
4219 | NULL, ttask->sizes, ttask->kinds, true, | |
275c736e | 4220 | NULL, GOMP_MAP_VARS_TARGET); |
b2b40051 MJ |
4221 | actual_arguments = (void *) ttask->tgt->tgt_start; |
4222 | } | |
e4606348 JJ |
4223 | ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; |
4224 | ||
001ab12e | 4225 | assert (devicep->async_run_func); |
b2b40051 MJ |
4226 | devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, |
4227 | ttask->args, (void *) ttask); | |
e4606348 | 4228 | return true; |
d9a6bd32 | 4229 | } |
e4606348 | 4230 | else if (devicep == NULL |
b2b40051 MJ |
4231 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4232 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
e4606348 | 4233 | return false; |
d9a6bd32 JJ |
4234 | |
4235 | size_t i; | |
4236 | if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) | |
e4606348 | 4237 | gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, |
d9a6bd32 | 4238 | ttask->kinds, true); |
d9a6bd32 | 4239 | else |
275c736e CLT |
4240 | { |
4241 | htab_t refcount_set = htab_create (ttask->mapnum); | |
4242 | if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) | |
4243 | for (i = 0; i < ttask->mapnum; i++) | |
4244 | if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) | |
4245 | { | |
4246 | gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], | |
4247 | NULL, &ttask->sizes[i], &ttask->kinds[i], true, | |
4248 | &refcount_set, GOMP_MAP_VARS_ENTER_DATA); | |
4249 | i += ttask->sizes[i]; | |
4250 | } | |
4251 | else | |
4252 | gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], | |
4253 | &ttask->kinds[i], true, &refcount_set, | |
4254 | GOMP_MAP_VARS_ENTER_DATA); | |
4255 | else | |
4256 | gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, | |
4257 | ttask->kinds, &refcount_set); | |
4258 | htab_free (refcount_set); | |
4259 | } | |
e4606348 | 4260 | return false; |
acf0174b JJ |
4261 | } |
4262 | ||
4263 | void | |
4264 | GOMP_teams (unsigned int num_teams, unsigned int thread_limit) | |
4265 | { | |
4266 | if (thread_limit) | |
4267 | { | |
4268 | struct gomp_task_icv *icv = gomp_icv (true); | |
4269 | icv->thread_limit_var | |
4270 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
4271 | } | |
4272 | (void) num_teams; | |
4273 | } | |
1df3f842 | 4274 | |
7d6da11f JJ |
4275 | bool |
4276 | GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high, | |
4277 | unsigned int thread_limit, bool first) | |
4278 | { | |
4279 | struct gomp_thread *thr = gomp_thread (); | |
4280 | if (first) | |
4281 | { | |
4282 | if (thread_limit) | |
4283 | { | |
4284 | struct gomp_task_icv *icv = gomp_icv (true); | |
4285 | icv->thread_limit_var | |
4286 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
4287 | } | |
4288 | (void) num_teams_high; | |
4289 | if (num_teams_low == 0) | |
4290 | num_teams_low = 1; | |
4291 | thr->num_teams = num_teams_low - 1; | |
4292 | thr->team_num = 0; | |
4293 | } | |
4294 | else if (thr->team_num == thr->num_teams) | |
4295 | return false; | |
4296 | else | |
4297 | ++thr->team_num; | |
4298 | return true; | |
4299 | } | |
4300 | ||
d9a6bd32 JJ |
4301 | void * |
4302 | omp_target_alloc (size_t size, int device_num) | |
4303 | { | |
1158fe43 JJ |
4304 | if (device_num == omp_initial_device |
4305 | || device_num == gomp_get_num_devices ()) | |
d9a6bd32 JJ |
4306 | return malloc (size); |
4307 | ||
1158fe43 | 4308 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
d9a6bd32 JJ |
4309 | if (devicep == NULL) |
4310 | return NULL; | |
4311 | ||
b2b40051 MJ |
4312 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4313 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
d9a6bd32 JJ |
4314 | return malloc (size); |
4315 | ||
4316 | gomp_mutex_lock (&devicep->lock); | |
4317 | void *ret = devicep->alloc_func (devicep->target_id, size); | |
4318 | gomp_mutex_unlock (&devicep->lock); | |
4319 | return ret; | |
4320 | } | |
4321 | ||
4322 | void | |
4323 | omp_target_free (void *device_ptr, int device_num) | |
4324 | { | |
1158fe43 JJ |
4325 | if (device_num == omp_initial_device |
4326 | || device_num == gomp_get_num_devices ()) | |
d9a6bd32 JJ |
4327 | { |
4328 | free (device_ptr); | |
4329 | return; | |
4330 | } | |
4331 | ||
1158fe43 JJ |
4332 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
4333 | if (devicep == NULL || device_ptr == NULL) | |
d9a6bd32 JJ |
4334 | return; |
4335 | ||
b2b40051 MJ |
4336 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4337 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
d9a6bd32 JJ |
4338 | { |
4339 | free (device_ptr); | |
4340 | return; | |
4341 | } | |
4342 | ||
4343 | gomp_mutex_lock (&devicep->lock); | |
6ce13072 | 4344 | gomp_free_device_memory (devicep, device_ptr); |
d9a6bd32 JJ |
4345 | gomp_mutex_unlock (&devicep->lock); |
4346 | } | |
4347 | ||
4348 | int | |
28567c40 | 4349 | omp_target_is_present (const void *ptr, int device_num) |
d9a6bd32 | 4350 | { |
1158fe43 JJ |
4351 | if (device_num == omp_initial_device |
4352 | || device_num == gomp_get_num_devices ()) | |
d9a6bd32 JJ |
4353 | return 1; |
4354 | ||
1158fe43 | 4355 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
d9a6bd32 JJ |
4356 | if (devicep == NULL) |
4357 | return 0; | |
4358 | ||
1158fe43 JJ |
4359 | if (ptr == NULL) |
4360 | return 1; | |
4361 | ||
b2b40051 MJ |
4362 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4363 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
d9a6bd32 JJ |
4364 | return 1; |
4365 | ||
4366 | gomp_mutex_lock (&devicep->lock); | |
4367 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
4368 | struct splay_tree_key_s cur_node; | |
4369 | ||
4370 | cur_node.host_start = (uintptr_t) ptr; | |
4371 | cur_node.host_end = cur_node.host_start; | |
e01d41e5 | 4372 | splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); |
d9a6bd32 JJ |
4373 | int ret = n != NULL; |
4374 | gomp_mutex_unlock (&devicep->lock); | |
4375 | return ret; | |
4376 | } | |
4377 | ||
6c420193 MV |
4378 | static int |
4379 | omp_target_memcpy_check (int dst_device_num, int src_device_num, | |
4380 | struct gomp_device_descr **dst_devicep, | |
4381 | struct gomp_device_descr **src_devicep) | |
d9a6bd32 | 4382 | { |
1158fe43 JJ |
4383 | if (dst_device_num != gomp_get_num_devices () |
4384 | /* Above gomp_get_num_devices has to be called unconditionally. */ | |
4385 | && dst_device_num != omp_initial_device) | |
d9a6bd32 | 4386 | { |
1158fe43 | 4387 | *dst_devicep = resolve_device (dst_device_num, false); |
6c420193 | 4388 | if (*dst_devicep == NULL) |
d9a6bd32 JJ |
4389 | return EINVAL; |
4390 | ||
6c420193 MV |
4391 | if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4392 | || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
4393 | *dst_devicep = NULL; | |
d9a6bd32 | 4394 | } |
6c420193 | 4395 | |
1158fe43 JJ |
4396 | if (src_device_num != num_devices_openmp |
4397 | && src_device_num != omp_initial_device) | |
d9a6bd32 | 4398 | { |
1158fe43 | 4399 | *src_devicep = resolve_device (src_device_num, false); |
6c420193 | 4400 | if (*src_devicep == NULL) |
d9a6bd32 JJ |
4401 | return EINVAL; |
4402 | ||
6c420193 MV |
4403 | if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4404 | || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
4405 | *src_devicep = NULL; | |
d9a6bd32 | 4406 | } |
6c420193 MV |
4407 | |
4408 | return 0; | |
4409 | } | |
4410 | ||
4411 | static int | |
4412 | omp_target_memcpy_copy (void *dst, const void *src, size_t length, | |
4413 | size_t dst_offset, size_t src_offset, | |
4414 | struct gomp_device_descr *dst_devicep, | |
4415 | struct gomp_device_descr *src_devicep) | |
4416 | { | |
4417 | bool ret; | |
d9a6bd32 JJ |
4418 | if (src_devicep == NULL && dst_devicep == NULL) |
4419 | { | |
4420 | memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); | |
4421 | return 0; | |
4422 | } | |
4423 | if (src_devicep == NULL) | |
4424 | { | |
4425 | gomp_mutex_lock (&dst_devicep->lock); | |
6ce13072 CLT |
4426 | ret = dst_devicep->host2dev_func (dst_devicep->target_id, |
4427 | (char *) dst + dst_offset, | |
4428 | (char *) src + src_offset, length); | |
d9a6bd32 | 4429 | gomp_mutex_unlock (&dst_devicep->lock); |
6ce13072 | 4430 | return (ret ? 0 : EINVAL); |
d9a6bd32 JJ |
4431 | } |
4432 | if (dst_devicep == NULL) | |
4433 | { | |
4434 | gomp_mutex_lock (&src_devicep->lock); | |
6ce13072 CLT |
4435 | ret = src_devicep->dev2host_func (src_devicep->target_id, |
4436 | (char *) dst + dst_offset, | |
4437 | (char *) src + src_offset, length); | |
d9a6bd32 | 4438 | gomp_mutex_unlock (&src_devicep->lock); |
6ce13072 | 4439 | return (ret ? 0 : EINVAL); |
d9a6bd32 JJ |
4440 | } |
4441 | if (src_devicep == dst_devicep) | |
4442 | { | |
4443 | gomp_mutex_lock (&src_devicep->lock); | |
6ce13072 CLT |
4444 | ret = src_devicep->dev2dev_func (src_devicep->target_id, |
4445 | (char *) dst + dst_offset, | |
4446 | (char *) src + src_offset, length); | |
d9a6bd32 | 4447 | gomp_mutex_unlock (&src_devicep->lock); |
6ce13072 | 4448 | return (ret ? 0 : EINVAL); |
d9a6bd32 JJ |
4449 | } |
4450 | return EINVAL; | |
4451 | } | |
4452 | ||
6c420193 MV |
4453 | int |
4454 | omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset, | |
4455 | size_t src_offset, int dst_device_num, int src_device_num) | |
4456 | { | |
4457 | struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; | |
4458 | int ret = omp_target_memcpy_check (dst_device_num, src_device_num, | |
4459 | &dst_devicep, &src_devicep); | |
4460 | ||
4461 | if (ret) | |
4462 | return ret; | |
4463 | ||
4464 | ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset, | |
4465 | dst_devicep, src_devicep); | |
4466 | ||
4467 | return ret; | |
4468 | } | |
4469 | ||
4470 | typedef struct | |
4471 | { | |
4472 | void *dst; | |
4473 | const void *src; | |
4474 | size_t length; | |
4475 | size_t dst_offset; | |
4476 | size_t src_offset; | |
4477 | struct gomp_device_descr *dst_devicep; | |
4478 | struct gomp_device_descr *src_devicep; | |
4479 | } omp_target_memcpy_data; | |
4480 | ||
4481 | static void | |
4482 | omp_target_memcpy_async_helper (void *args) | |
4483 | { | |
4484 | omp_target_memcpy_data *a = args; | |
4485 | if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset, | |
4486 | a->src_offset, a->dst_devicep, a->src_devicep)) | |
4487 | gomp_fatal ("omp_target_memcpy failed"); | |
4488 | } | |
4489 | ||
4490 | int | |
4491 | omp_target_memcpy_async (void *dst, const void *src, size_t length, | |
4492 | size_t dst_offset, size_t src_offset, | |
4493 | int dst_device_num, int src_device_num, | |
4494 | int depobj_count, omp_depend_t *depobj_list) | |
4495 | { | |
4496 | struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; | |
4497 | unsigned int flags = 0; | |
4498 | void *depend[depobj_count + 5]; | |
4499 | int i; | |
4500 | int check = omp_target_memcpy_check (dst_device_num, src_device_num, | |
4501 | &dst_devicep, &src_devicep); | |
4502 | ||
4503 | omp_target_memcpy_data s = { | |
4504 | .dst = dst, | |
4505 | .src = src, | |
4506 | .length = length, | |
4507 | .dst_offset = dst_offset, | |
4508 | .src_offset = src_offset, | |
4509 | .dst_devicep = dst_devicep, | |
4510 | .src_devicep = src_devicep | |
4511 | }; | |
4512 | ||
4513 | if (check) | |
4514 | return check; | |
4515 | ||
4516 | if (depobj_count > 0 && depobj_list != NULL) | |
4517 | { | |
4518 | flags |= GOMP_TASK_FLAG_DEPEND; | |
4519 | depend[0] = 0; | |
4520 | depend[1] = (void *) (uintptr_t) depobj_count; | |
4521 | depend[2] = depend[3] = depend[4] = 0; | |
4522 | for (i = 0; i < depobj_count; ++i) | |
4523 | depend[i + 5] = &depobj_list[i]; | |
4524 | } | |
4525 | ||
4526 | GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s), | |
4527 | __alignof__ (s), true, flags, depend, 0, NULL); | |
4528 | ||
4529 | return 0; | |
4530 | } | |
4531 | ||
d9a6bd32 | 4532 | static int |
28567c40 | 4533 | omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, |
d9a6bd32 JJ |
4534 | int num_dims, const size_t *volume, |
4535 | const size_t *dst_offsets, | |
4536 | const size_t *src_offsets, | |
4537 | const size_t *dst_dimensions, | |
4538 | const size_t *src_dimensions, | |
4539 | struct gomp_device_descr *dst_devicep, | |
25072a47 TB |
4540 | struct gomp_device_descr *src_devicep, |
4541 | size_t *tmp_size, void **tmp) | |
d9a6bd32 JJ |
4542 | { |
4543 | size_t dst_slice = element_size; | |
4544 | size_t src_slice = element_size; | |
4545 | size_t j, dst_off, src_off, length; | |
4546 | int i, ret; | |
4547 | ||
4548 | if (num_dims == 1) | |
4549 | { | |
4550 | if (__builtin_mul_overflow (element_size, volume[0], &length) | |
4551 | || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) | |
4552 | || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) | |
4553 | return EINVAL; | |
8b9e559f TB |
4554 | if (dst_devicep == NULL && src_devicep == NULL) |
4555 | { | |
4556 | memcpy ((char *) dst + dst_off, (const char *) src + src_off, | |
4557 | length); | |
4558 | ret = 1; | |
4559 | } | |
4560 | else if (src_devicep == NULL) | |
4561 | ret = dst_devicep->host2dev_func (dst_devicep->target_id, | |
6ce13072 | 4562 | (char *) dst + dst_off, |
28567c40 JJ |
4563 | (const char *) src + src_off, |
4564 | length); | |
8b9e559f TB |
4565 | else if (dst_devicep == NULL) |
4566 | ret = src_devicep->dev2host_func (src_devicep->target_id, | |
6ce13072 | 4567 | (char *) dst + dst_off, |
28567c40 JJ |
4568 | (const char *) src + src_off, |
4569 | length); | |
d9a6bd32 | 4570 | else if (src_devicep == dst_devicep) |
6ce13072 CLT |
4571 | ret = src_devicep->dev2dev_func (src_devicep->target_id, |
4572 | (char *) dst + dst_off, | |
28567c40 JJ |
4573 | (const char *) src + src_off, |
4574 | length); | |
d9a6bd32 | 4575 | else |
25072a47 TB |
4576 | { |
4577 | if (*tmp_size == 0) | |
4578 | { | |
4579 | *tmp_size = length; | |
4580 | *tmp = malloc (length); | |
4581 | if (*tmp == NULL) | |
4582 | return ENOMEM; | |
4583 | } | |
4584 | else if (*tmp_size < length) | |
4585 | { | |
4586 | *tmp_size = length; | |
8b9e559f TB |
4587 | free (*tmp); |
4588 | *tmp = malloc (length); | |
25072a47 TB |
4589 | if (*tmp == NULL) |
4590 | return ENOMEM; | |
4591 | } | |
4592 | ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp, | |
4593 | (const char *) src + src_off, | |
4594 | length); | |
4595 | if (ret == 1) | |
4596 | ret = dst_devicep->host2dev_func (dst_devicep->target_id, | |
4597 | (char *) dst + dst_off, *tmp, | |
4598 | length); | |
4599 | } | |
6ce13072 | 4600 | return ret ? 0 : EINVAL; |
d9a6bd32 JJ |
4601 | } |
4602 | ||
8b9e559f | 4603 | /* host->device, device->host and intra device. */ |
25072a47 TB |
4604 | if (num_dims == 2 |
4605 | && ((src_devicep | |
4606 | && src_devicep == dst_devicep | |
4607 | && src_devicep->memcpy2d_func) | |
4608 | || (!src_devicep != !dst_devicep | |
4609 | && ((src_devicep && src_devicep->memcpy2d_func) | |
4610 | || (dst_devicep && dst_devicep->memcpy2d_func))))) | |
4611 | { | |
4612 | size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1; | |
4613 | int dst_id = dst_devicep ? dst_devicep->target_id : -1; | |
4614 | int src_id = src_devicep ? src_devicep->target_id : -1; | |
4615 | struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep; | |
4616 | ||
4617 | if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1) | |
4618 | || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1) | |
4619 | || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1) | |
4620 | || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1) | |
4621 | || __builtin_mul_overflow (src_offsets[1], element_size, | |
4622 | &src_off_sz1)) | |
4623 | return EINVAL; | |
4624 | ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0], | |
4625 | dst, dst_off_sz1, dst_offsets[0], dst_sz1, | |
4626 | src, src_off_sz1, src_offsets[0], src_sz1); | |
4627 | if (ret != -1) | |
4628 | return ret ? 0 : EINVAL; | |
4629 | } | |
4630 | else if (num_dims == 3 | |
4631 | && ((src_devicep | |
4632 | && src_devicep == dst_devicep | |
4633 | && src_devicep->memcpy3d_func) | |
4634 | || (!src_devicep != !dst_devicep | |
4635 | && ((src_devicep && src_devicep->memcpy3d_func) | |
4636 | || (dst_devicep && dst_devicep->memcpy3d_func))))) | |
4637 | { | |
4638 | size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2; | |
4639 | int dst_id = dst_devicep ? dst_devicep->target_id : -1; | |
4640 | int src_id = src_devicep ? src_devicep->target_id : -1; | |
4641 | struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep; | |
4642 | ||
4643 | if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2) | |
4644 | || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2) | |
4645 | || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2) | |
4646 | || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2) | |
4647 | || __builtin_mul_overflow (src_offsets[2], element_size, | |
4648 | &src_off_sz2)) | |
4649 | return EINVAL; | |
4650 | ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0], | |
4651 | dst, dst_off_sz2, dst_offsets[1], | |
4652 | dst_offsets[0], dst_sz2, dst_dimensions[1], | |
4653 | src, src_off_sz2, src_offsets[1], | |
4654 | src_offsets[0], src_sz2, src_dimensions[1]); | |
4655 | if (ret != -1) | |
4656 | return ret ? 0 : EINVAL; | |
4657 | } | |
d9a6bd32 JJ |
4658 | |
4659 | for (i = 1; i < num_dims; i++) | |
4660 | if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) | |
4661 | || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) | |
4662 | return EINVAL; | |
4663 | if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) | |
4664 | || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) | |
4665 | return EINVAL; | |
4666 | for (j = 0; j < volume[0]; j++) | |
4667 | { | |
4668 | ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, | |
28567c40 | 4669 | (const char *) src + src_off, |
d9a6bd32 JJ |
4670 | element_size, num_dims - 1, |
4671 | volume + 1, dst_offsets + 1, | |
4672 | src_offsets + 1, dst_dimensions + 1, | |
4673 | src_dimensions + 1, dst_devicep, | |
25072a47 | 4674 | src_devicep, tmp_size, tmp); |
d9a6bd32 JJ |
4675 | if (ret) |
4676 | return ret; | |
4677 | dst_off += dst_slice; | |
4678 | src_off += src_slice; | |
4679 | } | |
4680 | return 0; | |
4681 | } | |
4682 | ||
6c420193 MV |
4683 | static int |
4684 | omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num, | |
4685 | int src_device_num, | |
4686 | struct gomp_device_descr **dst_devicep, | |
4687 | struct gomp_device_descr **src_devicep) | |
d9a6bd32 | 4688 | { |
d9a6bd32 JJ |
4689 | if (!dst && !src) |
4690 | return INT_MAX; | |
4691 | ||
6c420193 MV |
4692 | int ret = omp_target_memcpy_check (dst_device_num, src_device_num, |
4693 | dst_devicep, src_devicep); | |
4694 | if (ret) | |
4695 | return ret; | |
d9a6bd32 | 4696 | |
6c420193 MV |
4697 | return 0; |
4698 | } | |
4699 | ||
4700 | static int | |
4701 | omp_target_memcpy_rect_copy (void *dst, const void *src, | |
4702 | size_t element_size, int num_dims, | |
4703 | const size_t *volume, const size_t *dst_offsets, | |
4704 | const size_t *src_offsets, | |
4705 | const size_t *dst_dimensions, | |
4706 | const size_t *src_dimensions, | |
4707 | struct gomp_device_descr *dst_devicep, | |
4708 | struct gomp_device_descr *src_devicep) | |
4709 | { | |
25072a47 TB |
4710 | size_t tmp_size = 0; |
4711 | void *tmp = NULL; | |
4712 | bool lock_src; | |
4713 | bool lock_dst; | |
4714 | ||
8b9e559f TB |
4715 | lock_src = src_devicep != NULL; |
4716 | lock_dst = dst_devicep != NULL && src_devicep != dst_devicep; | |
25072a47 | 4717 | if (lock_src) |
d9a6bd32 | 4718 | gomp_mutex_lock (&src_devicep->lock); |
25072a47 | 4719 | if (lock_dst) |
d9a6bd32 JJ |
4720 | gomp_mutex_lock (&dst_devicep->lock); |
4721 | int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, | |
4722 | volume, dst_offsets, src_offsets, | |
4723 | dst_dimensions, src_dimensions, | |
25072a47 TB |
4724 | dst_devicep, src_devicep, |
4725 | &tmp_size, &tmp); | |
4726 | if (lock_src) | |
d9a6bd32 | 4727 | gomp_mutex_unlock (&src_devicep->lock); |
25072a47 | 4728 | if (lock_dst) |
d9a6bd32 | 4729 | gomp_mutex_unlock (&dst_devicep->lock); |
25072a47 TB |
4730 | if (tmp) |
4731 | free (tmp); | |
6c420193 MV |
4732 | |
4733 | return ret; | |
4734 | } | |
4735 | ||
4736 | int | |
4737 | omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, | |
4738 | int num_dims, const size_t *volume, | |
4739 | const size_t *dst_offsets, | |
4740 | const size_t *src_offsets, | |
4741 | const size_t *dst_dimensions, | |
4742 | const size_t *src_dimensions, | |
4743 | int dst_device_num, int src_device_num) | |
4744 | { | |
4745 | struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; | |
4746 | ||
4747 | int check = omp_target_memcpy_rect_check (dst, src, dst_device_num, | |
4748 | src_device_num, &dst_devicep, | |
4749 | &src_devicep); | |
4750 | ||
4751 | if (check) | |
4752 | return check; | |
4753 | ||
4754 | int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims, | |
4755 | volume, dst_offsets, src_offsets, | |
4756 | dst_dimensions, src_dimensions, | |
4757 | dst_devicep, src_devicep); | |
4758 | ||
d9a6bd32 JJ |
4759 | return ret; |
4760 | } | |
4761 | ||
6c420193 MV |
4762 | typedef struct |
4763 | { | |
4764 | void *dst; | |
4765 | const void *src; | |
4766 | size_t element_size; | |
4767 | const size_t *volume; | |
4768 | const size_t *dst_offsets; | |
4769 | const size_t *src_offsets; | |
4770 | const size_t *dst_dimensions; | |
4771 | const size_t *src_dimensions; | |
4772 | struct gomp_device_descr *dst_devicep; | |
4773 | struct gomp_device_descr *src_devicep; | |
4774 | int num_dims; | |
4775 | } omp_target_memcpy_rect_data; | |
4776 | ||
4777 | static void | |
4778 | omp_target_memcpy_rect_async_helper (void *args) | |
4779 | { | |
4780 | omp_target_memcpy_rect_data *a = args; | |
4781 | int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size, | |
4782 | a->num_dims, a->volume, a->dst_offsets, | |
4783 | a->src_offsets, a->dst_dimensions, | |
4784 | a->src_dimensions, a->dst_devicep, | |
4785 | a->src_devicep); | |
4786 | if (ret) | |
4787 | gomp_fatal ("omp_target_memcpy_rect failed"); | |
4788 | } | |
4789 | ||
4790 | int | |
4791 | omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size, | |
4792 | int num_dims, const size_t *volume, | |
4793 | const size_t *dst_offsets, | |
4794 | const size_t *src_offsets, | |
4795 | const size_t *dst_dimensions, | |
4796 | const size_t *src_dimensions, | |
4797 | int dst_device_num, int src_device_num, | |
4798 | int depobj_count, omp_depend_t *depobj_list) | |
4799 | { | |
4800 | struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; | |
4801 | unsigned flags = 0; | |
4802 | int check = omp_target_memcpy_rect_check (dst, src, dst_device_num, | |
4803 | src_device_num, &dst_devicep, | |
4804 | &src_devicep); | |
4805 | void *depend[depobj_count + 5]; | |
4806 | int i; | |
4807 | ||
4808 | omp_target_memcpy_rect_data s = { | |
4809 | .dst = dst, | |
4810 | .src = src, | |
4811 | .element_size = element_size, | |
4812 | .num_dims = num_dims, | |
4813 | .volume = volume, | |
4814 | .dst_offsets = dst_offsets, | |
4815 | .src_offsets = src_offsets, | |
4816 | .dst_dimensions = dst_dimensions, | |
4817 | .src_dimensions = src_dimensions, | |
4818 | .dst_devicep = dst_devicep, | |
4819 | .src_devicep = src_devicep | |
4820 | }; | |
4821 | ||
4822 | if (check) | |
4823 | return check; | |
4824 | ||
4825 | if (depobj_count > 0 && depobj_list != NULL) | |
4826 | { | |
4827 | flags |= GOMP_TASK_FLAG_DEPEND; | |
4828 | depend[0] = 0; | |
4829 | depend[1] = (void *) (uintptr_t) depobj_count; | |
4830 | depend[2] = depend[3] = depend[4] = 0; | |
4831 | for (i = 0; i < depobj_count; ++i) | |
4832 | depend[i + 5] = &depobj_list[i]; | |
4833 | } | |
4834 | ||
4835 | GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s), | |
4836 | __alignof__ (s), true, flags, depend, 0, NULL); | |
4837 | ||
4838 | return 0; | |
4839 | } | |
4840 | ||
d9a6bd32 | 4841 | int |
28567c40 JJ |
4842 | omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, |
4843 | size_t size, size_t device_offset, int device_num) | |
d9a6bd32 | 4844 | { |
1158fe43 JJ |
4845 | if (device_num == omp_initial_device |
4846 | || device_num == gomp_get_num_devices ()) | |
d9a6bd32 JJ |
4847 | return EINVAL; |
4848 | ||
1158fe43 | 4849 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
d9a6bd32 JJ |
4850 | if (devicep == NULL) |
4851 | return EINVAL; | |
4852 | ||
b2b40051 MJ |
4853 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
4854 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
d9a6bd32 JJ |
4855 | return EINVAL; |
4856 | ||
4857 | gomp_mutex_lock (&devicep->lock); | |
4858 | ||
4859 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
4860 | struct splay_tree_key_s cur_node; | |
4861 | int ret = EINVAL; | |
4862 | ||
4863 | cur_node.host_start = (uintptr_t) host_ptr; | |
4864 | cur_node.host_end = cur_node.host_start + size; | |
4865 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
4866 | if (n) | |
4867 | { | |
4868 | if (n->tgt->tgt_start + n->tgt_offset | |
4869 | == (uintptr_t) device_ptr + device_offset | |
4870 | && n->host_start <= cur_node.host_start | |
4871 | && n->host_end >= cur_node.host_end) | |
4872 | ret = 0; | |
4873 | } | |
4874 | else | |
4875 | { | |
4876 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
4877 | tgt->array = gomp_malloc (sizeof (*tgt->array)); | |
4878 | tgt->refcount = 1; | |
4879 | tgt->tgt_start = 0; | |
4880 | tgt->tgt_end = 0; | |
4881 | tgt->to_free = NULL; | |
4882 | tgt->prev = NULL; | |
4883 | tgt->list_count = 0; | |
4884 | tgt->device_descr = devicep; | |
4885 | splay_tree_node array = tgt->array; | |
4886 | splay_tree_key k = &array->key; | |
4887 | k->host_start = cur_node.host_start; | |
4888 | k->host_end = cur_node.host_end; | |
4889 | k->tgt = tgt; | |
4890 | k->tgt_offset = (uintptr_t) device_ptr + device_offset; | |
4891 | k->refcount = REFCOUNT_INFINITY; | |
6f5b4b64 | 4892 | k->dynamic_refcount = 0; |
2a656a93 | 4893 | k->aux = NULL; |
d9a6bd32 JJ |
4894 | array->left = NULL; |
4895 | array->right = NULL; | |
4896 | splay_tree_insert (&devicep->mem_map, array); | |
4897 | ret = 0; | |
4898 | } | |
4899 | gomp_mutex_unlock (&devicep->lock); | |
4900 | return ret; | |
4901 | } | |
4902 | ||
4903 | int | |
28567c40 | 4904 | omp_target_disassociate_ptr (const void *ptr, int device_num) |
d9a6bd32 | 4905 | { |
1158fe43 | 4906 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
d9a6bd32 JJ |
4907 | if (devicep == NULL) |
4908 | return EINVAL; | |
4909 | ||
4910 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
4911 | return EINVAL; | |
4912 | ||
4913 | gomp_mutex_lock (&devicep->lock); | |
4914 | ||
4915 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
4916 | struct splay_tree_key_s cur_node; | |
4917 | int ret = EINVAL; | |
4918 | ||
4919 | cur_node.host_start = (uintptr_t) ptr; | |
4920 | cur_node.host_end = cur_node.host_start; | |
4921 | splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); | |
4922 | if (n | |
4923 | && n->host_start == cur_node.host_start | |
4924 | && n->refcount == REFCOUNT_INFINITY | |
4925 | && n->tgt->tgt_start == 0 | |
4926 | && n->tgt->to_free == NULL | |
4927 | && n->tgt->refcount == 1 | |
4928 | && n->tgt->list_count == 0) | |
4929 | { | |
4930 | splay_tree_remove (&devicep->mem_map, n); | |
4931 | gomp_unmap_tgt (n->tgt); | |
4932 | ret = 0; | |
4933 | } | |
4934 | ||
4935 | gomp_mutex_unlock (&devicep->lock); | |
4936 | return ret; | |
4937 | } | |
4938 | ||
941cdc8b MV |
4939 | void * |
4940 | omp_get_mapped_ptr (const void *ptr, int device_num) | |
4941 | { | |
1158fe43 JJ |
4942 | if (device_num == omp_initial_device |
4943 | || device_num == omp_get_initial_device ()) | |
941cdc8b MV |
4944 | return (void *) ptr; |
4945 | ||
1158fe43 | 4946 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
941cdc8b MV |
4947 | if (devicep == NULL) |
4948 | return NULL; | |
4949 | ||
4950 | if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
4951 | || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
4952 | return (void *) ptr; | |
4953 | ||
4954 | gomp_mutex_lock (&devicep->lock); | |
4955 | ||
4956 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
4957 | struct splay_tree_key_s cur_node; | |
4958 | void *ret = NULL; | |
4959 | ||
4960 | cur_node.host_start = (uintptr_t) ptr; | |
4961 | cur_node.host_end = cur_node.host_start; | |
4962 | splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); | |
4963 | ||
4964 | if (n) | |
4965 | { | |
4966 | uintptr_t offset = cur_node.host_start - n->host_start; | |
4967 | ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset); | |
4968 | } | |
4969 | ||
4970 | gomp_mutex_unlock (&devicep->lock); | |
4971 | ||
4972 | return ret; | |
4973 | } | |
4974 | ||
4043f53c MV |
4975 | int |
4976 | omp_target_is_accessible (const void *ptr, size_t size, int device_num) | |
4977 | { | |
1158fe43 JJ |
4978 | if (device_num == omp_initial_device |
4979 | || device_num == gomp_get_num_devices ()) | |
4043f53c MV |
4980 | return true; |
4981 | ||
1158fe43 | 4982 | struct gomp_device_descr *devicep = resolve_device (device_num, false); |
4043f53c MV |
4983 | if (devicep == NULL) |
4984 | return false; | |
4985 | ||
4986 | /* TODO: Unified shared memory must be handled when available. */ | |
4987 | ||
4988 | return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM; | |
4989 | } | |
4990 | ||
28567c40 JJ |
4991 | int |
4992 | omp_pause_resource (omp_pause_resource_t kind, int device_num) | |
4993 | { | |
4994 | (void) kind; | |
1158fe43 JJ |
4995 | if (device_num == omp_initial_device |
4996 | || device_num == gomp_get_num_devices ()) | |
28567c40 | 4997 | return gomp_pause_host (); |
1158fe43 JJ |
4998 | |
4999 | struct gomp_device_descr *devicep = resolve_device (device_num, false); | |
5000 | if (devicep == NULL) | |
28567c40 | 5001 | return -1; |
1158fe43 | 5002 | |
28567c40 JJ |
5003 | /* Do nothing for target devices for now. */ |
5004 | return 0; | |
5005 | } | |
5006 | ||
5007 | int | |
5008 | omp_pause_resource_all (omp_pause_resource_t kind) | |
5009 | { | |
5010 | (void) kind; | |
5011 | if (gomp_pause_host ()) | |
5012 | return -1; | |
5013 | /* Do nothing for target devices for now. */ | |
5014 | return 0; | |
5015 | } | |
5016 | ||
5017 | ialias (omp_pause_resource) | |
5018 | ialias (omp_pause_resource_all) | |
5019 | ||
1df3f842 JJ |
5020 | #ifdef PLUGIN_SUPPORT |
5021 | ||
5022 | /* This function tries to load a plugin for DEVICE. Name of plugin is passed | |
5023 | in PLUGIN_NAME. | |
5024 | The handles of the found functions are stored in the corresponding fields | |
5025 | of DEVICE. The function returns TRUE on success and FALSE otherwise. */ | |
5026 | ||
5027 | static bool | |
5028 | gomp_load_plugin_for_device (struct gomp_device_descr *device, | |
5029 | const char *plugin_name) | |
5030 | { | |
196904d8 | 5031 | const char *err = NULL, *last_missing = NULL; |
41dbbb37 | 5032 | |
1df3f842 JJ |
5033 | void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); |
5034 | if (!plugin_handle) | |
fe5bfa67 TB |
5035 | #if OFFLOAD_DEFAULTED |
5036 | return 0; | |
5037 | #else | |
2a21ff19 | 5038 | goto dl_fail; |
fe5bfa67 | 5039 | #endif |
1df3f842 JJ |
5040 | |
5041 | /* Check if all required functions are available in the plugin and store | |
2a21ff19 NS |
5042 | their handlers. None of the symbols can legitimately be NULL, |
5043 | so we don't need to check dlerror all the time. */ | |
41dbbb37 | 5044 | #define DLSYM(f) \ |
2a21ff19 NS |
5045 | if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ |
5046 | goto dl_fail | |
5047 | /* Similar, but missing functions are not an error. Return false if | |
5048 | failed, true otherwise. */ | |
5049 | #define DLSYM_OPT(f, n) \ | |
5050 | ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ | |
5051 | || (last_missing = #n, 0)) | |
5052 | ||
5053 | DLSYM (version); | |
5054 | if (device->version_func () != GOMP_VERSION) | |
5055 | { | |
5056 | err = "plugin version mismatch"; | |
5057 | goto fail; | |
5058 | } | |
41dbbb37 TS |
5059 | |
5060 | DLSYM (get_name); | |
5061 | DLSYM (get_caps); | |
1df3f842 JJ |
5062 | DLSYM (get_type); |
5063 | DLSYM (get_num_devices); | |
1df3f842 | 5064 | DLSYM (init_device); |
41dbbb37 | 5065 | DLSYM (fini_device); |
a51df54e IV |
5066 | DLSYM (load_image); |
5067 | DLSYM (unload_image); | |
1df3f842 JJ |
5068 | DLSYM (alloc); |
5069 | DLSYM (free); | |
5070 | DLSYM (dev2host); | |
5071 | DLSYM (host2dev); | |
8b9e559f TB |
5072 | DLSYM_OPT (memcpy2d, memcpy2d); |
5073 | DLSYM_OPT (memcpy3d, memcpy3d); | |
41dbbb37 TS |
5074 | device->capabilities = device->get_caps_func (); |
5075 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
d9a6bd32 JJ |
5076 | { |
5077 | DLSYM (run); | |
001ab12e | 5078 | DLSYM_OPT (async_run, async_run); |
b2b40051 | 5079 | DLSYM_OPT (can_run, can_run); |
d9a6bd32 JJ |
5080 | DLSYM (dev2dev); |
5081 | } | |
41dbbb37 TS |
5082 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) |
5083 | { | |
345a8c17 | 5084 | if (!DLSYM_OPT (openacc.exec, openacc_exec) |
2a21ff19 NS |
5085 | || !DLSYM_OPT (openacc.create_thread_data, |
5086 | openacc_create_thread_data) | |
5087 | || !DLSYM_OPT (openacc.destroy_thread_data, | |
1f4c5b9b CLT |
5088 | openacc_destroy_thread_data) |
5089 | || !DLSYM_OPT (openacc.async.construct, openacc_async_construct) | |
5090 | || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct) | |
5091 | || !DLSYM_OPT (openacc.async.test, openacc_async_test) | |
5092 | || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize) | |
5093 | || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize) | |
5094 | || !DLSYM_OPT (openacc.async.queue_callback, | |
5095 | openacc_async_queue_callback) | |
5096 | || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) | |
5097 | || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) | |
6fc0385c TS |
5098 | || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) |
5099 | || !DLSYM_OPT (openacc.get_property, openacc_get_property)) | |
41dbbb37 | 5100 | { |
2a21ff19 NS |
5101 | /* Require all the OpenACC handlers if we have |
5102 | GOMP_OFFLOAD_CAP_OPENACC_200. */ | |
41dbbb37 | 5103 | err = "plugin missing OpenACC handler function"; |
2a21ff19 | 5104 | goto fail; |
41dbbb37 | 5105 | } |
2a21ff19 NS |
5106 | |
5107 | unsigned cuda = 0; | |
5108 | cuda += DLSYM_OPT (openacc.cuda.get_current_device, | |
345a8c17 | 5109 | openacc_cuda_get_current_device); |
2a21ff19 | 5110 | cuda += DLSYM_OPT (openacc.cuda.get_current_context, |
345a8c17 TS |
5111 | openacc_cuda_get_current_context); |
5112 | cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); | |
5113 | cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); | |
2a21ff19 | 5114 | if (cuda && cuda != 4) |
41dbbb37 | 5115 | { |
2a21ff19 | 5116 | /* Make sure all the CUDA functions are there if any of them are. */ |
41dbbb37 | 5117 | err = "plugin missing OpenACC CUDA handler function"; |
2a21ff19 | 5118 | goto fail; |
41dbbb37 TS |
5119 | } |
5120 | } | |
1df3f842 | 5121 | #undef DLSYM |
41dbbb37 | 5122 | #undef DLSYM_OPT |
1df3f842 | 5123 | |
2a21ff19 NS |
5124 | return 1; |
5125 | ||
5126 | dl_fail: | |
5127 | err = dlerror (); | |
5128 | fail: | |
5129 | gomp_error ("while loading %s: %s", plugin_name, err); | |
5130 | if (last_missing) | |
5131 | gomp_error ("missing function was %s", last_missing); | |
5132 | if (plugin_handle) | |
5133 | dlclose (plugin_handle); | |
5134 | ||
5135 | return 0; | |
1df3f842 JJ |
5136 | } |
5137 | ||
d84ffc0a IV |
5138 | /* This function finalizes all initialized devices. */ |
5139 | ||
5140 | static void | |
5141 | gomp_target_fini (void) | |
5142 | { | |
5143 | int i; | |
5144 | for (i = 0; i < num_devices; i++) | |
5145 | { | |
6ce13072 | 5146 | bool ret = true; |
d84ffc0a IV |
5147 | struct gomp_device_descr *devicep = &devices[i]; |
5148 | gomp_mutex_lock (&devicep->lock); | |
5149 | if (devicep->state == GOMP_DEVICE_INITIALIZED) | |
1f4c5b9b | 5150 | ret = gomp_fini_device (devicep); |
d84ffc0a | 5151 | gomp_mutex_unlock (&devicep->lock); |
6ce13072 CLT |
5152 | if (!ret) |
5153 | gomp_fatal ("device finalization failed"); | |
d84ffc0a IV |
5154 | } |
5155 | } | |
5156 | ||
ee332b4a TS |
5157 | /* This function initializes the runtime for offloading. |
5158 | It parses the list of offload plugins, and tries to load these. | |
5159 | On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP | |
41dbbb37 TS |
5160 | will be set, and the array DEVICES initialized, containing descriptors for |
5161 | corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows | |
5162 | by the others. */ | |
1df3f842 JJ |
5163 | |
5164 | static void | |
5165 | gomp_target_init (void) | |
5166 | { | |
5167 | const char *prefix ="libgomp-plugin-"; | |
b5f7a6ca | 5168 | const char *suffix = SONAME_SUFFIX (1); |
1df3f842 JJ |
5169 | const char *cur, *next; |
5170 | char *plugin_name; | |
35f258f4 JJ |
5171 | int i, new_num_devs; |
5172 | int num_devs = 0, num_devs_openmp; | |
5173 | struct gomp_device_descr *devs = NULL; | |
1df3f842 | 5174 | |
1bfc07d1 KCY |
5175 | if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED) |
5176 | return; | |
5177 | ||
ee332b4a | 5178 | cur = OFFLOAD_PLUGINS; |
1df3f842 JJ |
5179 | if (*cur) |
5180 | do | |
5181 | { | |
5182 | struct gomp_device_descr current_device; | |
b13547d8 | 5183 | size_t prefix_len, suffix_len, cur_len; |
1df3f842 JJ |
5184 | |
5185 | next = strchr (cur, ','); | |
5186 | ||
b13547d8 JJ |
5187 | prefix_len = strlen (prefix); |
5188 | cur_len = next ? next - cur : strlen (cur); | |
5189 | suffix_len = strlen (suffix); | |
5190 | ||
5191 | plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); | |
1df3f842 JJ |
5192 | if (!plugin_name) |
5193 | { | |
35f258f4 | 5194 | num_devs = 0; |
1df3f842 JJ |
5195 | break; |
5196 | } | |
5197 | ||
b13547d8 JJ |
5198 | memcpy (plugin_name, prefix, prefix_len); |
5199 | memcpy (plugin_name + prefix_len, cur, cur_len); | |
5200 | memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); | |
1df3f842 JJ |
5201 | |
5202 | if (gomp_load_plugin_for_device (¤t_device, plugin_name)) | |
5203 | { | |
683f1184 TB |
5204 | int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED; |
5205 | new_num_devs = current_device.get_num_devices_func (omp_req); | |
5206 | if (gomp_debug_var > 0 && new_num_devs < 0) | |
5207 | { | |
5208 | bool found = false; | |
5209 | int type = current_device.get_type_func (); | |
5210 | for (int img = 0; img < num_offload_images; img++) | |
5211 | if (type == offload_images[img].type) | |
5212 | found = true; | |
5213 | if (found) | |
5214 | { | |
5215 | char buf[sizeof ("unified_address, unified_shared_memory, " | |
5216 | "reverse_offload")]; | |
5217 | gomp_requires_to_name (buf, sizeof (buf), omp_req); | |
5218 | char *name = (char *) malloc (cur_len + 1); | |
5219 | memcpy (name, cur, cur_len); | |
5220 | name[cur_len] = '\0'; | |
5221 | gomp_debug (1, | |
5222 | "%s devices present but 'omp requires %s' " | |
220bef46 | 5223 | "cannot be fulfilled\n", name, buf); |
683f1184 TB |
5224 | free (name); |
5225 | } | |
5226 | } | |
5227 | else if (new_num_devs >= 1) | |
1df3f842 | 5228 | { |
41dbbb37 TS |
5229 | /* Augment DEVICES and NUM_DEVICES. */ |
5230 | ||
35f258f4 JJ |
5231 | devs = realloc (devs, (num_devs + new_num_devs) |
5232 | * sizeof (struct gomp_device_descr)); | |
5233 | if (!devs) | |
1df3f842 | 5234 | { |
35f258f4 | 5235 | num_devs = 0; |
1df3f842 JJ |
5236 | free (plugin_name); |
5237 | break; | |
5238 | } | |
5239 | ||
41dbbb37 TS |
5240 | current_device.name = current_device.get_name_func (); |
5241 | /* current_device.capabilities has already been set. */ | |
1df3f842 | 5242 | current_device.type = current_device.get_type_func (); |
a51df54e | 5243 | current_device.mem_map.root = NULL; |
ea4b23d9 | 5244 | current_device.mem_map_rev.root = NULL; |
d84ffc0a | 5245 | current_device.state = GOMP_DEVICE_UNINITIALIZED; |
35f258f4 | 5246 | for (i = 0; i < new_num_devs; i++) |
1df3f842 | 5247 | { |
1df3f842 | 5248 | current_device.target_id = i; |
35f258f4 JJ |
5249 | devs[num_devs] = current_device; |
5250 | gomp_mutex_init (&devs[num_devs].lock); | |
5251 | num_devs++; | |
1df3f842 JJ |
5252 | } |
5253 | } | |
5254 | } | |
5255 | ||
5256 | free (plugin_name); | |
5257 | cur = next + 1; | |
5258 | } | |
5259 | while (next); | |
5260 | ||
41dbbb37 TS |
5261 | /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set |
5262 | NUM_DEVICES_OPENMP. */ | |
35f258f4 JJ |
5263 | struct gomp_device_descr *devs_s |
5264 | = malloc (num_devs * sizeof (struct gomp_device_descr)); | |
5265 | if (!devs_s) | |
5266 | { | |
5267 | num_devs = 0; | |
5268 | free (devs); | |
5269 | devs = NULL; | |
5270 | } | |
5271 | num_devs_openmp = 0; | |
5272 | for (i = 0; i < num_devs; i++) | |
5273 | if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
5274 | devs_s[num_devs_openmp++] = devs[i]; | |
5275 | int num_devs_after_openmp = num_devs_openmp; | |
5276 | for (i = 0; i < num_devs; i++) | |
5277 | if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
5278 | devs_s[num_devs_after_openmp++] = devs[i]; | |
5279 | free (devs); | |
5280 | devs = devs_s; | |
5281 | ||
5282 | for (i = 0; i < num_devs; i++) | |
41dbbb37 | 5283 | { |
41dbbb37 TS |
5284 | /* The 'devices' array can be moved (by the realloc call) until we have |
5285 | found all the plugins, so registering with the OpenACC runtime (which | |
5286 | takes a copy of the pointer argument) must be delayed until now. */ | |
35f258f4 JJ |
5287 | if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) |
5288 | goacc_register (&devs[i]); | |
41dbbb37 | 5289 | } |
18c8b56c TB |
5290 | if (gomp_global_icv.default_device_var == INT_MIN) |
5291 | { | |
5292 | /* This implies OMP_TARGET_OFFLOAD=mandatory. */ | |
5293 | struct gomp_icv_list *none; | |
5294 | none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX); | |
5295 | gomp_global_icv.default_device_var = (num_devs_openmp | |
5296 | ? 0 : omp_invalid_device); | |
5297 | none->icvs.default_device_var = gomp_global_icv.default_device_var; | |
5298 | } | |
d84ffc0a | 5299 | |
35f258f4 JJ |
5300 | num_devices = num_devs; |
5301 | num_devices_openmp = num_devs_openmp; | |
5302 | devices = devs; | |
d84ffc0a IV |
5303 | if (atexit (gomp_target_fini) != 0) |
5304 | gomp_fatal ("atexit failed"); | |
1df3f842 JJ |
5305 | } |
5306 | ||
5307 | #else /* PLUGIN_SUPPORT */ | |
5308 | /* If dlfcn.h is unavailable we always fallback to host execution. | |
5309 | GOMP_target* routines are just stubs for this case. */ | |
5310 | static void | |
5311 | gomp_target_init (void) | |
5312 | { | |
5313 | } | |
5314 | #endif /* PLUGIN_SUPPORT */ |