]>
Commit | Line | Data |
---|---|---|
41dbbb37 TS |
1 | /* OpenACC Runtime initialization routines |
2 | ||
8d9254fc | 3 | Copyright (C) 2013-2020 Free Software Foundation, Inc. |
41dbbb37 TS |
4 | |
5 | Contributed by Mentor Embedded. | |
6 | ||
7 | This file is part of the GNU Offloading and Multi Processing Library | |
8 | (libgomp). | |
9 | ||
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
29 | #include "openacc.h" | |
41dbbb37 TS |
30 | #include "libgomp.h" |
31 | #include "gomp-constants.h" | |
32 | #include "oacc-int.h" | |
e46c7770 | 33 | #include <string.h> |
41dbbb37 TS |
34 | #include <assert.h> |
35 | ||
e38fdba4 JB |
36 | /* Return block containing [H->S), or NULL if not contained. The device lock |
37 | for DEV must be locked on entry, and remains locked on exit. */ | |
41dbbb37 TS |
38 | |
39 | static splay_tree_key | |
a51df54e | 40 | lookup_host (struct gomp_device_descr *dev, void *h, size_t s) |
41dbbb37 TS |
41 | { |
42 | struct splay_tree_key_s node; | |
43 | splay_tree_key key; | |
44 | ||
45 | node.host_start = (uintptr_t) h; | |
46 | node.host_end = (uintptr_t) h + s; | |
47 | ||
a51df54e | 48 | key = splay_tree_lookup (&dev->mem_map, &node); |
41dbbb37 TS |
49 | |
50 | return key; | |
51 | } | |
52 | ||
47afc7b4 | 53 | /* Helper for lookup_dev. Iterate over splay tree. */ |
41dbbb37 TS |
54 | |
55 | static splay_tree_key | |
47afc7b4 | 56 | lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s) |
41dbbb37 | 57 | { |
47afc7b4 TS |
58 | splay_tree_key key = &node->key; |
59 | if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end) | |
60 | return key; | |
41dbbb37 | 61 | |
47afc7b4 TS |
62 | key = NULL; |
63 | if (node->left) | |
64 | key = lookup_dev_1 (node->left, d, s); | |
65 | if (!key && node->right) | |
66 | key = lookup_dev_1 (node->right, d, s); | |
41dbbb37 | 67 | |
47afc7b4 TS |
68 | return key; |
69 | } | |
41dbbb37 | 70 | |
47afc7b4 | 71 | /* Return block containing [D->S), or NULL if not contained. |
41dbbb37 | 72 | |
47afc7b4 TS |
73 | This iterates over the splay tree. This is not expected to be a common |
74 | operation. | |
41dbbb37 | 75 | |
47afc7b4 TS |
76 | The device lock associated with MEM_MAP must be locked on entry, and remains |
77 | locked on exit. */ | |
41dbbb37 | 78 | |
47afc7b4 TS |
79 | static splay_tree_key |
80 | lookup_dev (splay_tree mem_map, void *d, size_t s) | |
81 | { | |
82 | if (!mem_map || !mem_map->root) | |
83 | return NULL; | |
41dbbb37 | 84 | |
47afc7b4 | 85 | return lookup_dev_1 (mem_map->root, (uintptr_t) d, s); |
41dbbb37 TS |
86 | } |
87 | ||
47afc7b4 | 88 | |
41dbbb37 TS |
89 | /* OpenACC is silent on how memory exhaustion is indicated. We return |
90 | NULL. */ | |
91 | ||
92 | void * | |
93 | acc_malloc (size_t s) | |
94 | { | |
95 | if (!s) | |
96 | return NULL; | |
97 | ||
98 | goacc_lazy_initialize (); | |
99 | ||
100 | struct goacc_thread *thr = goacc_thread (); | |
101 | ||
d93bdab5 JB |
102 | assert (thr->dev); |
103 | ||
e46c7770 CP |
104 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
105 | return malloc (s); | |
106 | ||
5fae049d TS |
107 | acc_prof_info prof_info; |
108 | acc_api_info api_info; | |
109 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
110 | ||
111 | void *res = thr->dev->alloc_func (thr->dev->target_id, s); | |
112 | ||
113 | if (profiling_p) | |
114 | { | |
115 | thr->prof_info = NULL; | |
116 | thr->api_info = NULL; | |
117 | } | |
118 | ||
119 | return res; | |
41dbbb37 TS |
120 | } |
121 | ||
41dbbb37 TS |
122 | void |
123 | acc_free (void *d) | |
124 | { | |
125 | splay_tree_key k; | |
41dbbb37 TS |
126 | |
127 | if (!d) | |
128 | return; | |
129 | ||
e38fdba4 JB |
130 | struct goacc_thread *thr = goacc_thread (); |
131 | ||
d93bdab5 JB |
132 | assert (thr && thr->dev); |
133 | ||
e38fdba4 JB |
134 | struct gomp_device_descr *acc_dev = thr->dev; |
135 | ||
e46c7770 CP |
136 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
137 | return free (d); | |
138 | ||
5fae049d TS |
139 | acc_prof_info prof_info; |
140 | acc_api_info api_info; | |
141 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
142 | ||
e38fdba4 JB |
143 | gomp_mutex_lock (&acc_dev->lock); |
144 | ||
41dbbb37 TS |
145 | /* We don't have to call lazy open here, as the ptr value must have |
146 | been returned by acc_malloc. It's not permitted to pass NULL in | |
147 | (unless you got that null from acc_malloc). */ | |
47afc7b4 | 148 | if ((k = lookup_dev (&acc_dev->mem_map, d, 1))) |
e38fdba4 | 149 | { |
cec41816 TS |
150 | void *offset = d - k->tgt->tgt_start + k->tgt_offset; |
151 | void *h = k->host_start + offset; | |
152 | size_t h_size = k->host_end - k->host_start; | |
e38fdba4 | 153 | gomp_mutex_unlock (&acc_dev->lock); |
cec41816 TS |
154 | /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still |
155 | used in a mapping". */ | |
156 | gomp_fatal ("refusing to free device memory space at %p that is still" | |
157 | " mapped at [%p,+%d]", | |
158 | d, h, (int) h_size); | |
e38fdba4 JB |
159 | } |
160 | else | |
161 | gomp_mutex_unlock (&acc_dev->lock); | |
162 | ||
6ce13072 CLT |
163 | if (!acc_dev->free_func (acc_dev->target_id, d)) |
164 | gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); | |
5fae049d TS |
165 | |
166 | if (profiling_p) | |
167 | { | |
168 | thr->prof_info = NULL; | |
169 | thr->api_info = NULL; | |
170 | } | |
41dbbb37 TS |
171 | } |
172 | ||
58168bbf CLT |
173 | static void |
174 | memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, | |
175 | const char *libfnname) | |
41dbbb37 TS |
176 | { |
177 | /* No need to call lazy open here, as the device pointer must have | |
178 | been obtained from a routine that did that. */ | |
179 | struct goacc_thread *thr = goacc_thread (); | |
180 | ||
d93bdab5 JB |
181 | assert (thr && thr->dev); |
182 | ||
e46c7770 CP |
183 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
184 | { | |
58168bbf CLT |
185 | if (from) |
186 | memmove (h, d, s); | |
187 | else | |
188 | memmove (d, h, s); | |
e46c7770 CP |
189 | return; |
190 | } | |
191 | ||
5fae049d TS |
192 | acc_prof_info prof_info; |
193 | acc_api_info api_info; | |
194 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
195 | if (profiling_p) | |
196 | { | |
197 | prof_info.async = async; | |
198 | prof_info.async_queue = prof_info.async; | |
199 | } | |
200 | ||
1f4c5b9b CLT |
201 | goacc_aq aq = get_goacc_asyncqueue (async); |
202 | if (from) | |
203 | gomp_copy_dev2host (thr->dev, aq, h, d, s); | |
204 | else | |
205 | gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); | |
5fae049d TS |
206 | |
207 | if (profiling_p) | |
208 | { | |
209 | thr->prof_info = NULL; | |
210 | thr->api_info = NULL; | |
211 | } | |
41dbbb37 TS |
212 | } |
213 | ||
214 | void | |
58168bbf | 215 | acc_memcpy_to_device (void *d, void *h, size_t s) |
41dbbb37 | 216 | { |
58168bbf CLT |
217 | memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); |
218 | } | |
41dbbb37 | 219 | |
58168bbf CLT |
220 | void |
221 | acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) | |
222 | { | |
223 | memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); | |
224 | } | |
d93bdab5 | 225 | |
58168bbf CLT |
226 | void |
227 | acc_memcpy_from_device (void *h, void *d, size_t s) | |
228 | { | |
229 | memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); | |
230 | } | |
e46c7770 | 231 | |
58168bbf CLT |
232 | void |
233 | acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) | |
234 | { | |
235 | memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); | |
41dbbb37 TS |
236 | } |
237 | ||
238 | /* Return the device pointer that corresponds to host data H. Or NULL | |
239 | if no mapping. */ | |
240 | ||
241 | void * | |
242 | acc_deviceptr (void *h) | |
243 | { | |
244 | splay_tree_key n; | |
245 | void *d; | |
246 | void *offset; | |
247 | ||
248 | goacc_lazy_initialize (); | |
249 | ||
250 | struct goacc_thread *thr = goacc_thread (); | |
e38fdba4 JB |
251 | struct gomp_device_descr *dev = thr->dev; |
252 | ||
e46c7770 CP |
253 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
254 | return h; | |
255 | ||
5fae049d TS |
256 | /* In the following, no OpenACC Profiling Interface events can possibly be |
257 | generated. */ | |
258 | ||
e38fdba4 | 259 | gomp_mutex_lock (&dev->lock); |
41dbbb37 | 260 | |
e38fdba4 | 261 | n = lookup_host (dev, h, 1); |
41dbbb37 TS |
262 | |
263 | if (!n) | |
e38fdba4 JB |
264 | { |
265 | gomp_mutex_unlock (&dev->lock); | |
266 | return NULL; | |
267 | } | |
41dbbb37 TS |
268 | |
269 | offset = h - n->host_start; | |
270 | ||
271 | d = n->tgt->tgt_start + n->tgt_offset + offset; | |
272 | ||
e38fdba4 JB |
273 | gomp_mutex_unlock (&dev->lock); |
274 | ||
41dbbb37 TS |
275 | return d; |
276 | } | |
277 | ||
278 | /* Return the host pointer that corresponds to device data D. Or NULL | |
279 | if no mapping. */ | |
280 | ||
281 | void * | |
282 | acc_hostptr (void *d) | |
283 | { | |
284 | splay_tree_key n; | |
285 | void *h; | |
286 | void *offset; | |
287 | ||
288 | goacc_lazy_initialize (); | |
289 | ||
290 | struct goacc_thread *thr = goacc_thread (); | |
e38fdba4 | 291 | struct gomp_device_descr *acc_dev = thr->dev; |
41dbbb37 | 292 | |
e46c7770 CP |
293 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
294 | return d; | |
295 | ||
5fae049d TS |
296 | /* In the following, no OpenACC Profiling Interface events can possibly be |
297 | generated. */ | |
298 | ||
e38fdba4 JB |
299 | gomp_mutex_lock (&acc_dev->lock); |
300 | ||
47afc7b4 | 301 | n = lookup_dev (&acc_dev->mem_map, d, 1); |
41dbbb37 TS |
302 | |
303 | if (!n) | |
e38fdba4 JB |
304 | { |
305 | gomp_mutex_unlock (&acc_dev->lock); | |
306 | return NULL; | |
307 | } | |
41dbbb37 TS |
308 | |
309 | offset = d - n->tgt->tgt_start + n->tgt_offset; | |
310 | ||
311 | h = n->host_start + offset; | |
312 | ||
e38fdba4 JB |
313 | gomp_mutex_unlock (&acc_dev->lock); |
314 | ||
41dbbb37 TS |
315 | return h; |
316 | } | |
317 | ||
318 | /* Return 1 if host data [H,+S] is present on the device. */ | |
319 | ||
320 | int | |
321 | acc_is_present (void *h, size_t s) | |
322 | { | |
323 | splay_tree_key n; | |
324 | ||
325 | if (!s || !h) | |
326 | return 0; | |
327 | ||
328 | goacc_lazy_initialize (); | |
329 | ||
330 | struct goacc_thread *thr = goacc_thread (); | |
331 | struct gomp_device_descr *acc_dev = thr->dev; | |
332 | ||
e46c7770 CP |
333 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
334 | return h != NULL; | |
335 | ||
5fae049d TS |
336 | /* In the following, no OpenACC Profiling Interface events can possibly be |
337 | generated. */ | |
338 | ||
e38fdba4 JB |
339 | gomp_mutex_lock (&acc_dev->lock); |
340 | ||
a51df54e | 341 | n = lookup_host (acc_dev, h, s); |
41dbbb37 TS |
342 | |
343 | if (n && ((uintptr_t)h < n->host_start | |
344 | || (uintptr_t)h + s > n->host_end | |
345 | || s > n->host_end - n->host_start)) | |
346 | n = NULL; | |
347 | ||
e38fdba4 JB |
348 | gomp_mutex_unlock (&acc_dev->lock); |
349 | ||
41dbbb37 TS |
350 | return n != NULL; |
351 | } | |
352 | ||
353 | /* Create a mapping for host [H,+S] -> device [D,+S] */ | |
354 | ||
355 | void | |
356 | acc_map_data (void *h, void *d, size_t s) | |
357 | { | |
e46c7770 | 358 | struct target_mem_desc *tgt = NULL; |
41dbbb37 TS |
359 | size_t mapnum = 1; |
360 | void *hostaddrs = h; | |
361 | void *devaddrs = d; | |
362 | size_t sizes = s; | |
363 | unsigned short kinds = GOMP_MAP_ALLOC; | |
364 | ||
365 | goacc_lazy_initialize (); | |
366 | ||
367 | struct goacc_thread *thr = goacc_thread (); | |
368 | struct gomp_device_descr *acc_dev = thr->dev; | |
369 | ||
370 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
371 | { | |
372 | if (d != h) | |
373 | gomp_fatal ("cannot map data on shared-memory system"); | |
41dbbb37 TS |
374 | } |
375 | else | |
376 | { | |
377 | struct goacc_thread *thr = goacc_thread (); | |
378 | ||
379 | if (!d || !h || !s) | |
380 | gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", | |
381 | (void *)h, (int)s, (void *)d, (int)s); | |
382 | ||
5fae049d TS |
383 | acc_prof_info prof_info; |
384 | acc_api_info api_info; | |
385 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
386 | ||
e38fdba4 JB |
387 | gomp_mutex_lock (&acc_dev->lock); |
388 | ||
a51df54e | 389 | if (lookup_host (acc_dev, h, s)) |
e38fdba4 JB |
390 | { |
391 | gomp_mutex_unlock (&acc_dev->lock); | |
392 | gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, | |
393 | (int)s); | |
394 | } | |
41dbbb37 | 395 | |
47afc7b4 | 396 | if (lookup_dev (&thr->dev->mem_map, d, s)) |
e38fdba4 JB |
397 | { |
398 | gomp_mutex_unlock (&acc_dev->lock); | |
399 | gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, | |
400 | (int)s); | |
401 | } | |
402 | ||
403 | gomp_mutex_unlock (&acc_dev->lock); | |
41dbbb37 TS |
404 | |
405 | tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, | |
ba40277f TS |
406 | &kinds, true, GOMP_MAP_VARS_ENTER_DATA); |
407 | assert (tgt); | |
e103542b TS |
408 | splay_tree_key n = tgt->list[0].key; |
409 | assert (n->refcount == 1); | |
378da98f | 410 | assert (n->virtual_refcount == 0); |
e103542b TS |
411 | /* Special reference counting behavior. */ |
412 | n->refcount = REFCOUNT_INFINITY; | |
5fae049d TS |
413 | |
414 | if (profiling_p) | |
415 | { | |
416 | thr->prof_info = NULL; | |
417 | thr->api_info = NULL; | |
418 | } | |
41dbbb37 | 419 | } |
41dbbb37 TS |
420 | } |
421 | ||
422 | void | |
423 | acc_unmap_data (void *h) | |
424 | { | |
425 | struct goacc_thread *thr = goacc_thread (); | |
426 | struct gomp_device_descr *acc_dev = thr->dev; | |
427 | ||
428 | /* No need to call lazy open, as the address must have been mapped. */ | |
429 | ||
e46c7770 CP |
430 | /* This is a no-op on shared-memory targets. */ |
431 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
432 | return; | |
433 | ||
5fae049d TS |
434 | acc_prof_info prof_info; |
435 | acc_api_info api_info; | |
436 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
437 | ||
e38fdba4 JB |
438 | gomp_mutex_lock (&acc_dev->lock); |
439 | ||
a51df54e | 440 | splay_tree_key n = lookup_host (acc_dev, h, 1); |
41dbbb37 TS |
441 | |
442 | if (!n) | |
e38fdba4 JB |
443 | { |
444 | gomp_mutex_unlock (&acc_dev->lock); | |
445 | gomp_fatal ("%p is not a mapped block", (void *)h); | |
446 | } | |
41dbbb37 | 447 | |
378da98f | 448 | size_t host_size = n->host_end - n->host_start; |
41dbbb37 TS |
449 | |
450 | if (n->host_start != (uintptr_t) h) | |
e38fdba4 JB |
451 | { |
452 | gomp_mutex_unlock (&acc_dev->lock); | |
453 | gomp_fatal ("[%p,%d] surrounds %p", | |
454 | (void *) n->host_start, (int) host_size, (void *) h); | |
455 | } | |
e103542b | 456 | /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from |
378da98f | 457 | 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating |
e103542b TS |
458 | the different 'REFCOUNT_INFINITY' cases, or simply separate |
459 | 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' | |
460 | etc.)? */ | |
461 | else if (n->refcount != REFCOUNT_INFINITY) | |
462 | { | |
463 | gomp_mutex_unlock (&acc_dev->lock); | |
464 | gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped" | |
465 | " by 'acc_map_data'", | |
466 | (void *) h, (int) host_size); | |
467 | } | |
41dbbb37 | 468 | |
378da98f JB |
469 | splay_tree_remove (&acc_dev->mem_map, n); |
470 | ||
471 | struct target_mem_desc *tgt = n->tgt; | |
41dbbb37 | 472 | |
378da98f | 473 | if (tgt->refcount == REFCOUNT_INFINITY) |
41dbbb37 | 474 | { |
378da98f JB |
475 | gomp_mutex_unlock (&acc_dev->lock); |
476 | gomp_fatal ("cannot unmap target block"); | |
477 | } | |
478 | else if (tgt->refcount > 1) | |
479 | tgt->refcount--; | |
480 | else | |
481 | { | |
482 | free (tgt->array); | |
483 | free (tgt); | |
41dbbb37 | 484 | } |
e38fdba4 | 485 | |
ba40277f | 486 | gomp_mutex_unlock (&acc_dev->lock); |
5fae049d TS |
487 | |
488 | if (profiling_p) | |
489 | { | |
490 | thr->prof_info = NULL; | |
491 | thr->api_info = NULL; | |
492 | } | |
41dbbb37 TS |
493 | } |
494 | ||
aaf0e9d7 | 495 | |
378da98f | 496 | /* Enter dynamic mapping for a single datum. Return the device pointer. */ |
41dbbb37 TS |
497 | |
498 | static void * | |
378da98f | 499 | goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) |
41dbbb37 TS |
500 | { |
501 | void *d; | |
502 | splay_tree_key n; | |
503 | ||
378da98f | 504 | if (!hostaddrs[0] || !sizes[0]) |
9444a299 | 505 | gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]); |
41dbbb37 TS |
506 | |
507 | goacc_lazy_initialize (); | |
508 | ||
509 | struct goacc_thread *thr = goacc_thread (); | |
510 | struct gomp_device_descr *acc_dev = thr->dev; | |
511 | ||
e46c7770 | 512 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
378da98f | 513 | return hostaddrs[0]; |
e46c7770 | 514 | |
5fae049d TS |
515 | acc_prof_info prof_info; |
516 | acc_api_info api_info; | |
517 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
518 | if (profiling_p) | |
519 | { | |
520 | prof_info.async = async; | |
521 | prof_info.async_queue = prof_info.async; | |
522 | } | |
523 | ||
e38fdba4 JB |
524 | gomp_mutex_lock (&acc_dev->lock); |
525 | ||
9444a299 | 526 | n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); |
378da98f | 527 | if (n) |
41dbbb37 | 528 | { |
9444a299 TS |
529 | void *h = hostaddrs[0]; |
530 | size_t s = sizes[0]; | |
531 | ||
41dbbb37 | 532 | /* Present. */ |
e307b05f | 533 | d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start); |
41dbbb37 | 534 | |
41dbbb37 | 535 | if ((h + s) > (void *)n->host_end) |
e38fdba4 JB |
536 | { |
537 | gomp_mutex_unlock (&acc_dev->lock); | |
538 | gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); | |
539 | } | |
540 | ||
d6e8c01c | 541 | assert (n->refcount != REFCOUNT_LINK); |
829c6349 | 542 | if (n->refcount != REFCOUNT_INFINITY) |
378da98f JB |
543 | { |
544 | n->refcount++; | |
545 | n->virtual_refcount++; | |
546 | } | |
77ce5555 TS |
547 | |
548 | gomp_mutex_unlock (&acc_dev->lock); | |
77ce5555 | 549 | } |
41dbbb37 TS |
550 | else |
551 | { | |
378da98f JB |
552 | const size_t mapnum = 1; |
553 | ||
e38fdba4 JB |
554 | gomp_mutex_unlock (&acc_dev->lock); |
555 | ||
1f4c5b9b | 556 | goacc_aq aq = get_goacc_asyncqueue (async); |
58168bbf | 557 | |
378da98f JB |
558 | gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, |
559 | true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); | |
41dbbb37 | 560 | |
378da98f JB |
561 | gomp_mutex_lock (&acc_dev->lock); |
562 | n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); | |
563 | assert (n != NULL); | |
564 | assert (n->tgt_offset == 0); | |
565 | assert ((uintptr_t) hostaddrs[0] == n->host_start); | |
566 | d = (void *) n->tgt->tgt_start; | |
567 | gomp_mutex_unlock (&acc_dev->lock); | |
41dbbb37 TS |
568 | } |
569 | ||
5fae049d TS |
570 | if (profiling_p) |
571 | { | |
572 | thr->prof_info = NULL; | |
573 | thr->api_info = NULL; | |
574 | } | |
575 | ||
41dbbb37 TS |
576 | return d; |
577 | } | |
578 | ||
579 | void * | |
580 | acc_create (void *h, size_t s) | |
581 | { | |
9444a299 | 582 | unsigned short kinds[1] = { GOMP_MAP_ALLOC }; |
378da98f | 583 | return goacc_enter_datum (&h, &s, &kinds, acc_async_sync); |
58168bbf CLT |
584 | } |
585 | ||
586 | void | |
587 | acc_create_async (void *h, size_t s, int async) | |
588 | { | |
9444a299 | 589 | unsigned short kinds[1] = { GOMP_MAP_ALLOC }; |
378da98f | 590 | goacc_enter_datum (&h, &s, &kinds, async); |
41dbbb37 TS |
591 | } |
592 | ||
c759830b TS |
593 | /* acc_present_or_create used to be what acc_create is now. */ |
594 | /* acc_pcreate is acc_present_or_create by a different name. */ | |
595 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
596 | strong_alias (acc_create, acc_present_or_create) | |
597 | strong_alias (acc_create, acc_pcreate) | |
598 | #else | |
41dbbb37 TS |
599 | void * |
600 | acc_present_or_create (void *h, size_t s) | |
601 | { | |
c759830b | 602 | return acc_create (h, s); |
41dbbb37 TS |
603 | } |
604 | ||
9b94fbc7 TS |
605 | void * |
606 | acc_pcreate (void *h, size_t s) | |
607 | { | |
c759830b | 608 | return acc_create (h, s); |
9b94fbc7 TS |
609 | } |
610 | #endif | |
611 | ||
41dbbb37 | 612 | void * |
c759830b | 613 | acc_copyin (void *h, size_t s) |
41dbbb37 | 614 | { |
9444a299 | 615 | unsigned short kinds[1] = { GOMP_MAP_TO }; |
378da98f | 616 | return goacc_enter_datum (&h, &s, &kinds, acc_async_sync); |
41dbbb37 TS |
617 | } |
618 | ||
c759830b TS |
619 | void |
620 | acc_copyin_async (void *h, size_t s, int async) | |
621 | { | |
9444a299 | 622 | unsigned short kinds[1] = { GOMP_MAP_TO }; |
378da98f | 623 | goacc_enter_datum (&h, &s, &kinds, async); |
c759830b TS |
624 | } |
625 | ||
626 | /* acc_present_or_copyin used to be what acc_copyin is now. */ | |
9b94fbc7 TS |
627 | /* acc_pcopyin is acc_present_or_copyin by a different name. */ |
628 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
c759830b TS |
629 | strong_alias (acc_copyin, acc_present_or_copyin) |
630 | strong_alias (acc_copyin, acc_pcopyin) | |
9b94fbc7 | 631 | #else |
c759830b TS |
632 | void * |
633 | acc_present_or_copyin (void *h, size_t s) | |
634 | { | |
635 | return acc_copyin (h, s); | |
636 | } | |
637 | ||
9b94fbc7 TS |
638 | void * |
639 | acc_pcopyin (void *h, size_t s) | |
640 | { | |
c759830b | 641 | return acc_copyin (h, s); |
9b94fbc7 TS |
642 | } |
643 | #endif | |
644 | ||
34cfe31e | 645 | |
378da98f | 646 | /* Exit a dynamic mapping for a single variable. */ |
41dbbb37 TS |
647 | |
648 | static void | |
378da98f | 649 | goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) |
41dbbb37 | 650 | { |
ddb25eb9 TS |
651 | /* No need to call lazy open, as the data must already have been |
652 | mapped. */ | |
653 | ||
34cfe31e TS |
654 | kind &= 0xff; |
655 | ||
41dbbb37 TS |
656 | struct goacc_thread *thr = goacc_thread (); |
657 | struct gomp_device_descr *acc_dev = thr->dev; | |
658 | ||
e46c7770 CP |
659 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
660 | return; | |
661 | ||
5fae049d TS |
662 | acc_prof_info prof_info; |
663 | acc_api_info api_info; | |
664 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
665 | if (profiling_p) | |
666 | { | |
667 | prof_info.async = async; | |
668 | prof_info.async_queue = prof_info.async; | |
669 | } | |
670 | ||
e38fdba4 JB |
671 | gomp_mutex_lock (&acc_dev->lock); |
672 | ||
ddb25eb9 | 673 | splay_tree_key n = lookup_host (acc_dev, h, s); |
41dbbb37 | 674 | if (!n) |
ddb25eb9 TS |
675 | /* PR92726, RP92970, PR92984: no-op. */ |
676 | goto out; | |
41dbbb37 | 677 | |
e307b05f | 678 | if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end) |
e38fdba4 | 679 | { |
e307b05f | 680 | size_t host_size = n->host_end - n->host_start; |
e38fdba4 | 681 | gomp_mutex_unlock (&acc_dev->lock); |
e307b05f JB |
682 | gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]", |
683 | (void *) h, (int) s, (void *) n->host_start, (int) host_size); | |
e38fdba4 JB |
684 | } |
685 | ||
34cfe31e TS |
686 | bool finalize = (kind == GOMP_MAP_DELETE |
687 | || kind == GOMP_MAP_FORCE_FROM); | |
688 | if (finalize) | |
829c6349 | 689 | { |
d6e8c01c | 690 | if (n->refcount != REFCOUNT_INFINITY) |
378da98f JB |
691 | n->refcount -= n->virtual_refcount; |
692 | n->virtual_refcount = 0; | |
829c6349 | 693 | } |
378da98f JB |
694 | |
695 | if (n->virtual_refcount > 0) | |
829c6349 | 696 | { |
d6e8c01c TS |
697 | if (n->refcount != REFCOUNT_INFINITY) |
698 | n->refcount--; | |
378da98f | 699 | n->virtual_refcount--; |
829c6349 | 700 | } |
378da98f JB |
701 | else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) |
702 | n->refcount--; | |
41dbbb37 | 703 | |
829c6349 CLT |
704 | if (n->refcount == 0) |
705 | { | |
1cbd94e8 JB |
706 | goacc_aq aq = get_goacc_asyncqueue (async); |
707 | ||
34cfe31e TS |
708 | bool copyout = (kind == GOMP_MAP_FROM |
709 | || kind == GOMP_MAP_FORCE_FROM); | |
710 | if (copyout) | |
58168bbf | 711 | { |
1cbd94e8 JB |
712 | void *d = (void *) (n->tgt->tgt_start + n->tgt_offset |
713 | + (uintptr_t) h - n->host_start); | |
1f4c5b9b | 714 | gomp_copy_dev2host (acc_dev, aq, h, d, s); |
58168bbf | 715 | } |
ba40277f TS |
716 | |
717 | if (aq) | |
718 | /* TODO We can't do the 'is_tgt_unmapped' checking -- see the | |
719 | 'gomp_unref_tgt' comment in | |
720 | <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; | |
721 | PR92881. */ | |
722 | gomp_remove_var_async (acc_dev, n, aq); | |
723 | else | |
724 | { | |
725 | bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); | |
726 | assert (is_tgt_unmapped); | |
727 | } | |
829c6349 CLT |
728 | } |
729 | ||
ddb25eb9 | 730 | out: |
829c6349 | 731 | gomp_mutex_unlock (&acc_dev->lock); |
5fae049d TS |
732 | |
733 | if (profiling_p) | |
734 | { | |
735 | thr->prof_info = NULL; | |
736 | thr->api_info = NULL; | |
737 | } | |
41dbbb37 TS |
738 | } |
739 | ||
740 | void | |
741 | acc_delete (void *h , size_t s) | |
742 | { | |
378da98f | 743 | goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync); |
58168bbf CLT |
744 | } |
745 | ||
746 | void | |
747 | acc_delete_async (void *h , size_t s, int async) | |
748 | { | |
378da98f | 749 | goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async); |
41dbbb37 TS |
750 | } |
751 | ||
829c6349 CLT |
752 | void |
753 | acc_delete_finalize (void *h , size_t s) | |
754 | { | |
378da98f | 755 | goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync); |
829c6349 CLT |
756 | } |
757 | ||
758 | void | |
759 | acc_delete_finalize_async (void *h , size_t s, int async) | |
760 | { | |
378da98f | 761 | goacc_exit_datum (h, s, GOMP_MAP_DELETE, async); |
829c6349 CLT |
762 | } |
763 | ||
6ce13072 CLT |
764 | void |
765 | acc_copyout (void *h, size_t s) | |
41dbbb37 | 766 | { |
378da98f | 767 | goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync); |
58168bbf CLT |
768 | } |
769 | ||
770 | void | |
771 | acc_copyout_async (void *h, size_t s, int async) | |
772 | { | |
378da98f | 773 | goacc_exit_datum (h, s, GOMP_MAP_FROM, async); |
41dbbb37 TS |
774 | } |
775 | ||
829c6349 CLT |
776 | void |
777 | acc_copyout_finalize (void *h, size_t s) | |
778 | { | |
378da98f | 779 | goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync); |
829c6349 CLT |
780 | } |
781 | ||
782 | void | |
783 | acc_copyout_finalize_async (void *h, size_t s, int async) | |
784 | { | |
378da98f | 785 | goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async); |
829c6349 CLT |
786 | } |
787 | ||
41dbbb37 | 788 | static void |
58168bbf | 789 | update_dev_host (int is_dev, void *h, size_t s, int async) |
41dbbb37 TS |
790 | { |
791 | splay_tree_key n; | |
792 | void *d; | |
8baa7864 TS |
793 | |
794 | goacc_lazy_initialize (); | |
795 | ||
41dbbb37 TS |
796 | struct goacc_thread *thr = goacc_thread (); |
797 | struct gomp_device_descr *acc_dev = thr->dev; | |
798 | ||
e46c7770 CP |
799 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
800 | return; | |
801 | ||
6e4d01d6 TB |
802 | /* Fortran optional arguments that are non-present result in a |
803 | NULL host address here. This can safely be ignored as it is | |
804 | not possible to 'update' a non-present optional argument. */ | |
805 | if (h == NULL) | |
806 | return; | |
807 | ||
5fae049d TS |
808 | acc_prof_info prof_info; |
809 | acc_api_info api_info; | |
810 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
811 | if (profiling_p) | |
812 | { | |
813 | prof_info.async = async; | |
814 | prof_info.async_queue = prof_info.async; | |
815 | } | |
816 | ||
e38fdba4 JB |
817 | gomp_mutex_lock (&acc_dev->lock); |
818 | ||
a51df54e | 819 | n = lookup_host (acc_dev, h, s); |
41dbbb37 | 820 | |
41dbbb37 | 821 | if (!n) |
e38fdba4 JB |
822 | { |
823 | gomp_mutex_unlock (&acc_dev->lock); | |
824 | gomp_fatal ("[%p,%d] is not mapped", h, (int)s); | |
825 | } | |
41dbbb37 | 826 | |
b6d1f2b5 JN |
827 | d = (void *) (n->tgt->tgt_start + n->tgt_offset |
828 | + (uintptr_t) h - n->host_start); | |
41dbbb37 | 829 | |
1f4c5b9b | 830 | goacc_aq aq = get_goacc_asyncqueue (async); |
58168bbf | 831 | |
41dbbb37 | 832 | if (is_dev) |
1f4c5b9b | 833 | gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); |
41dbbb37 | 834 | else |
1f4c5b9b | 835 | gomp_copy_dev2host (acc_dev, aq, h, d, s); |
58168bbf | 836 | |
6ce13072 | 837 | gomp_mutex_unlock (&acc_dev->lock); |
5fae049d TS |
838 | |
839 | if (profiling_p) | |
840 | { | |
841 | thr->prof_info = NULL; | |
842 | thr->api_info = NULL; | |
843 | } | |
41dbbb37 TS |
844 | } |
845 | ||
846 | void | |
847 | acc_update_device (void *h, size_t s) | |
848 | { | |
58168bbf CLT |
849 | update_dev_host (1, h, s, acc_async_sync); |
850 | } | |
851 | ||
852 | void | |
853 | acc_update_device_async (void *h, size_t s, int async) | |
854 | { | |
855 | update_dev_host (1, h, s, async); | |
41dbbb37 TS |
856 | } |
857 | ||
858 | void | |
859 | acc_update_self (void *h, size_t s) | |
860 | { | |
58168bbf CLT |
861 | update_dev_host (0, h, s, acc_async_sync); |
862 | } | |
863 | ||
864 | void | |
865 | acc_update_self_async (void *h, size_t s, int async) | |
866 | { | |
867 | update_dev_host (0, h, s, async); | |
41dbbb37 TS |
868 | } |
869 | ||
5d5be7bf JB |
870 | void |
871 | acc_attach_async (void **hostaddr, int async) | |
872 | { | |
873 | struct goacc_thread *thr = goacc_thread (); | |
874 | struct gomp_device_descr *acc_dev = thr->dev; | |
875 | goacc_aq aq = get_goacc_asyncqueue (async); | |
876 | ||
877 | struct splay_tree_key_s cur_node; | |
878 | splay_tree_key n; | |
879 | ||
880 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
881 | return; | |
882 | ||
883 | gomp_mutex_lock (&acc_dev->lock); | |
884 | ||
885 | cur_node.host_start = (uintptr_t) hostaddr; | |
886 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
887 | n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
888 | ||
889 | if (n == NULL) | |
890 | gomp_fatal ("struct not mapped for acc_attach"); | |
891 | ||
892 | gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, | |
893 | 0, NULL); | |
894 | ||
895 | gomp_mutex_unlock (&acc_dev->lock); | |
896 | } | |
897 | ||
898 | void | |
899 | acc_attach (void **hostaddr) | |
900 | { | |
901 | acc_attach_async (hostaddr, acc_async_sync); | |
902 | } | |
903 | ||
904 | static void | |
905 | goacc_detach_internal (void **hostaddr, int async, bool finalize) | |
906 | { | |
907 | struct goacc_thread *thr = goacc_thread (); | |
908 | struct gomp_device_descr *acc_dev = thr->dev; | |
909 | struct splay_tree_key_s cur_node; | |
910 | splay_tree_key n; | |
911 | struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async); | |
912 | ||
913 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
914 | return; | |
915 | ||
916 | gomp_mutex_lock (&acc_dev->lock); | |
917 | ||
918 | cur_node.host_start = (uintptr_t) hostaddr; | |
919 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
920 | n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
921 | ||
922 | if (n == NULL) | |
923 | gomp_fatal ("struct not mapped for acc_detach"); | |
924 | ||
925 | gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL); | |
926 | ||
927 | gomp_mutex_unlock (&acc_dev->lock); | |
928 | } | |
929 | ||
930 | void | |
931 | acc_detach (void **hostaddr) | |
932 | { | |
933 | goacc_detach_internal (hostaddr, acc_async_sync, false); | |
934 | } | |
935 | ||
936 | void | |
937 | acc_detach_async (void **hostaddr, int async) | |
938 | { | |
939 | goacc_detach_internal (hostaddr, async, false); | |
940 | } | |
941 | ||
942 | void | |
943 | acc_detach_finalize (void **hostaddr) | |
944 | { | |
945 | goacc_detach_internal (hostaddr, acc_async_sync, true); | |
946 | } | |
947 | ||
948 | void | |
949 | acc_detach_finalize_async (void **hostaddr, int async) | |
950 | { | |
951 | goacc_detach_internal (hostaddr, async, true); | |
952 | } | |
953 | ||
378da98f JB |
954 | /* Some types of (pointer) variables use several consecutive mappings, which |
955 | must be treated as a group for enter/exit data directives. This function | |
956 | returns the last mapping in such a group (inclusive), or POS for singleton | |
957 | mappings. */ | |
57963e39 | 958 | |
378da98f | 959 | static int |
8e7e71ff | 960 | find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds) |
378da98f JB |
961 | { |
962 | unsigned char kind0 = kinds[pos] & 0xff; | |
8e7e71ff | 963 | int first_pos = pos; |
57963e39 | 964 | |
8e7e71ff | 965 | switch (kind0) |
378da98f | 966 | { |
8e7e71ff | 967 | case GOMP_MAP_TO_PSET: |
378da98f | 968 | while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) |
8e7e71ff | 969 | pos++; |
378da98f | 970 | /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */ |
8e7e71ff JB |
971 | assert (pos > first_pos); |
972 | break; | |
973 | ||
974 | case GOMP_MAP_STRUCT: | |
975 | pos += sizes[pos]; | |
976 | break; | |
977 | ||
978 | case GOMP_MAP_POINTER: | |
979 | case GOMP_MAP_ALWAYS_POINTER: | |
980 | /* These mappings are only expected after some other mapping. If we | |
981 | see one by itself, something has gone wrong. */ | |
982 | gomp_fatal ("unexpected mapping"); | |
983 | break; | |
984 | ||
985 | default: | |
378da98f JB |
986 | /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other |
987 | mapping. */ | |
8e7e71ff JB |
988 | if (pos + 1 < mapnum) |
989 | { | |
990 | unsigned char kind1 = kinds[pos + 1] & 0xff; | |
991 | if (kind1 == GOMP_MAP_ALWAYS_POINTER) | |
992 | return pos + 1; | |
993 | } | |
378da98f | 994 | |
8e7e71ff | 995 | /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from |
378da98f JB |
996 | (etc.) mapping. */ |
997 | while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) | |
8e7e71ff | 998 | pos++; |
378da98f | 999 | } |
57963e39 | 1000 | |
8e7e71ff | 1001 | return pos; |
378da98f JB |
1002 | } |
1003 | ||
1004 | /* Map variables for OpenACC "enter data". We can't just call | |
1005 | gomp_map_vars_async once, because individual mapped variables might have | |
1006 | "exit data" called for them at different times. */ | |
57963e39 | 1007 | |
57963e39 | 1008 | static void |
378da98f JB |
1009 | goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, |
1010 | void **hostaddrs, size_t *sizes, | |
1011 | unsigned short *kinds, goacc_aq aq) | |
41dbbb37 | 1012 | { |
378da98f JB |
1013 | for (size_t i = 0; i < mapnum; i++) |
1014 | { | |
8e7e71ff | 1015 | int group_last = find_group_last (i, mapnum, sizes, kinds); |
e38fdba4 | 1016 | |
378da98f JB |
1017 | gomp_map_vars_async (acc_dev, aq, |
1018 | (group_last - i) + 1, | |
1019 | &hostaddrs[i], NULL, | |
1020 | &sizes[i], &kinds[i], true, | |
1021 | GOMP_MAP_VARS_OPENACC_ENTER_DATA); | |
41dbbb37 | 1022 | |
378da98f | 1023 | i = group_last; |
e38fdba4 | 1024 | } |
378da98f | 1025 | } |
41dbbb37 | 1026 | |
378da98f | 1027 | /* Unmap variables for OpenACC "exit data". */ |
41dbbb37 | 1028 | |
378da98f JB |
1029 | static void |
1030 | goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, | |
1031 | void **hostaddrs, size_t *sizes, | |
1032 | unsigned short *kinds, goacc_aq aq) | |
1033 | { | |
1034 | gomp_mutex_lock (&acc_dev->lock); | |
41dbbb37 | 1035 | |
8e7e71ff JB |
1036 | /* Handle "detach" before copyback/deletion of mapped data. */ |
1037 | for (size_t i = 0; i < mapnum; ++i) | |
1038 | { | |
1039 | unsigned char kind = kinds[i] & 0xff; | |
1040 | bool finalize = false; | |
1041 | switch (kind) | |
1042 | { | |
1043 | case GOMP_MAP_FORCE_DETACH: | |
1044 | finalize = true; | |
1045 | /* Fallthrough. */ | |
1046 | ||
1047 | case GOMP_MAP_DETACH: | |
1048 | { | |
1049 | struct splay_tree_key_s cur_node; | |
1050 | uintptr_t hostaddr = (uintptr_t) hostaddrs[i]; | |
1051 | cur_node.host_start = hostaddr; | |
1052 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
1053 | splay_tree_key n | |
1054 | = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
1055 | ||
1056 | if (n == NULL) | |
1057 | gomp_fatal ("struct not mapped for detach operation"); | |
1058 | ||
1059 | gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL); | |
1060 | } | |
1061 | break; | |
1062 | default: | |
1063 | ; | |
1064 | } | |
1065 | } | |
1066 | ||
378da98f | 1067 | for (size_t i = 0; i < mapnum; ++i) |
829c6349 | 1068 | { |
378da98f JB |
1069 | unsigned char kind = kinds[i] & 0xff; |
1070 | bool copyfrom = false; | |
1071 | bool finalize = false; | |
41dbbb37 | 1072 | |
378da98f | 1073 | if (kind == GOMP_MAP_FORCE_FROM |
8e7e71ff JB |
1074 | || kind == GOMP_MAP_DELETE |
1075 | || kind == GOMP_MAP_FORCE_DETACH) | |
378da98f | 1076 | finalize = true; |
41dbbb37 | 1077 | |
378da98f | 1078 | switch (kind) |
ba40277f | 1079 | { |
378da98f JB |
1080 | case GOMP_MAP_FROM: |
1081 | case GOMP_MAP_FORCE_FROM: | |
1082 | case GOMP_MAP_ALWAYS_FROM: | |
1083 | copyfrom = true; | |
1084 | /* Fallthrough. */ | |
1085 | ||
1086 | case GOMP_MAP_TO_PSET: | |
1087 | case GOMP_MAP_POINTER: | |
1088 | case GOMP_MAP_DELETE: | |
1089 | case GOMP_MAP_RELEASE: | |
8e7e71ff JB |
1090 | case GOMP_MAP_DETACH: |
1091 | case GOMP_MAP_FORCE_DETACH: | |
378da98f JB |
1092 | { |
1093 | struct splay_tree_key_s cur_node; | |
1094 | size_t size; | |
8e7e71ff JB |
1095 | if (kind == GOMP_MAP_POINTER |
1096 | || kind == GOMP_MAP_DETACH | |
1097 | || kind == GOMP_MAP_FORCE_DETACH) | |
378da98f JB |
1098 | size = sizeof (void *); |
1099 | else | |
1100 | size = sizes[i]; | |
1101 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
1102 | cur_node.host_end = cur_node.host_start + size; | |
1103 | splay_tree_key n | |
1104 | = splay_tree_lookup (&acc_dev->mem_map, &cur_node); | |
1105 | ||
1106 | if (n == NULL) | |
1107 | continue; | |
1108 | ||
1109 | if (finalize) | |
1110 | { | |
1111 | if (n->refcount != REFCOUNT_INFINITY) | |
1112 | n->refcount -= n->virtual_refcount; | |
1113 | n->virtual_refcount = 0; | |
1114 | } | |
ba40277f | 1115 | |
378da98f JB |
1116 | if (n->virtual_refcount > 0) |
1117 | { | |
1118 | if (n->refcount != REFCOUNT_INFINITY) | |
1119 | n->refcount--; | |
1120 | n->virtual_refcount--; | |
1121 | } | |
1122 | else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) | |
1123 | n->refcount--; | |
1124 | ||
1125 | if (copyfrom | |
1126 | && (kind != GOMP_MAP_FROM || n->refcount == 0)) | |
1127 | gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start, | |
1128 | (void *) (n->tgt->tgt_start + n->tgt_offset | |
1129 | + cur_node.host_start | |
1130 | - n->host_start), | |
1131 | cur_node.host_end - cur_node.host_start); | |
1132 | ||
1133 | if (n->refcount == 0) | |
1134 | gomp_remove_var_async (acc_dev, n, aq); | |
1135 | } | |
1136 | break; | |
8e7e71ff JB |
1137 | |
1138 | case GOMP_MAP_STRUCT: | |
1139 | { | |
1140 | int elems = sizes[i]; | |
1141 | for (int j = 1; j <= elems; j++) | |
1142 | { | |
1143 | struct splay_tree_key_s k; | |
1144 | k.host_start = (uintptr_t) hostaddrs[i + j]; | |
1145 | k.host_end = k.host_start + sizes[i + j]; | |
1146 | splay_tree_key str; | |
1147 | str = splay_tree_lookup (&acc_dev->mem_map, &k); | |
1148 | if (str) | |
1149 | { | |
1150 | if (finalize) | |
1151 | { | |
1152 | if (str->refcount != REFCOUNT_INFINITY) | |
1153 | str->refcount -= str->virtual_refcount; | |
1154 | str->virtual_refcount = 0; | |
1155 | } | |
1156 | if (str->virtual_refcount > 0) | |
1157 | { | |
1158 | if (str->refcount != REFCOUNT_INFINITY) | |
1159 | str->refcount--; | |
1160 | str->virtual_refcount--; | |
1161 | } | |
1162 | else if (str->refcount > 0 | |
1163 | && str->refcount != REFCOUNT_INFINITY) | |
1164 | str->refcount--; | |
1165 | if (str->refcount == 0) | |
1166 | gomp_remove_var_async (acc_dev, str, aq); | |
1167 | } | |
1168 | } | |
1169 | i += elems; | |
1170 | } | |
1171 | break; | |
1172 | ||
378da98f JB |
1173 | default: |
1174 | gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", | |
1175 | kind); | |
1f4c5b9b | 1176 | } |
829c6349 | 1177 | } |
41dbbb37 | 1178 | |
829c6349 | 1179 | gomp_mutex_unlock (&acc_dev->lock); |
57963e39 TS |
1180 | } |
1181 | ||
1182 | void | |
1183 | GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, | |
1184 | size_t *sizes, unsigned short *kinds, int async, | |
1185 | int num_waits, ...) | |
1186 | { | |
1187 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); | |
1188 | ||
1189 | struct goacc_thread *thr; | |
1190 | struct gomp_device_descr *acc_dev; | |
1191 | bool data_enter = false; | |
1192 | size_t i; | |
1193 | ||
1194 | goacc_lazy_initialize (); | |
1195 | ||
1196 | thr = goacc_thread (); | |
1197 | acc_dev = thr->dev; | |
1198 | ||
57963e39 TS |
1199 | /* Determine if this is an "acc enter data". */ |
1200 | for (i = 0; i < mapnum; ++i) | |
1201 | { | |
1202 | unsigned char kind = kinds[i] & 0xff; | |
1203 | ||
8e7e71ff JB |
1204 | if (kind == GOMP_MAP_POINTER |
1205 | || kind == GOMP_MAP_TO_PSET | |
1206 | || kind == GOMP_MAP_STRUCT) | |
57963e39 TS |
1207 | continue; |
1208 | ||
1209 | if (kind == GOMP_MAP_FORCE_ALLOC | |
1210 | || kind == GOMP_MAP_FORCE_PRESENT | |
8e7e71ff | 1211 | || kind == GOMP_MAP_ATTACH |
57963e39 TS |
1212 | || kind == GOMP_MAP_FORCE_TO |
1213 | || kind == GOMP_MAP_TO | |
1214 | || kind == GOMP_MAP_ALLOC) | |
1215 | { | |
1216 | data_enter = true; | |
1217 | break; | |
1218 | } | |
1219 | ||
1220 | if (kind == GOMP_MAP_RELEASE | |
1221 | || kind == GOMP_MAP_DELETE | |
8e7e71ff JB |
1222 | || kind == GOMP_MAP_DETACH |
1223 | || kind == GOMP_MAP_FORCE_DETACH | |
57963e39 TS |
1224 | || kind == GOMP_MAP_FROM |
1225 | || kind == GOMP_MAP_FORCE_FROM) | |
1226 | break; | |
1227 | ||
1228 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
1229 | kind); | |
1230 | } | |
1231 | ||
1232 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
1233 | ||
1234 | acc_prof_info prof_info; | |
1235 | if (profiling_p) | |
1236 | { | |
1237 | thr->prof_info = &prof_info; | |
1238 | ||
1239 | prof_info.event_type | |
1240 | = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start; | |
1241 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
1242 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
1243 | prof_info.device_type = acc_device_type (acc_dev->type); | |
1244 | prof_info.device_number = acc_dev->target_id; | |
1245 | prof_info.thread_id = -1; | |
1246 | prof_info.async = async; | |
1247 | prof_info.async_queue = prof_info.async; | |
1248 | prof_info.src_file = NULL; | |
1249 | prof_info.func_name = NULL; | |
1250 | prof_info.line_no = -1; | |
1251 | prof_info.end_line_no = -1; | |
1252 | prof_info.func_line_no = -1; | |
1253 | prof_info.func_end_line_no = -1; | |
1254 | } | |
1255 | acc_event_info enter_exit_data_event_info; | |
1256 | if (profiling_p) | |
1257 | { | |
1258 | enter_exit_data_event_info.other_event.event_type | |
1259 | = prof_info.event_type; | |
1260 | enter_exit_data_event_info.other_event.valid_bytes | |
1261 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
1262 | enter_exit_data_event_info.other_event.parent_construct | |
1263 | = data_enter ? acc_construct_enter_data : acc_construct_exit_data; | |
1264 | enter_exit_data_event_info.other_event.implicit = 0; | |
1265 | enter_exit_data_event_info.other_event.tool_info = NULL; | |
1266 | } | |
1267 | acc_api_info api_info; | |
1268 | if (profiling_p) | |
1269 | { | |
1270 | thr->api_info = &api_info; | |
1271 | ||
1272 | api_info.device_api = acc_device_api_none; | |
1273 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
1274 | api_info.device_type = prof_info.device_type; | |
1275 | api_info.vendor = -1; | |
1276 | api_info.device_handle = NULL; | |
1277 | api_info.context_handle = NULL; | |
1278 | api_info.async_handle = NULL; | |
1279 | } | |
1280 | ||
1281 | if (profiling_p) | |
1282 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
1283 | &api_info); | |
1284 | ||
1285 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
1286 | || (flags & GOACC_FLAG_HOST_FALLBACK)) | |
1287 | { | |
1288 | prof_info.device_type = acc_device_host; | |
1289 | api_info.device_type = prof_info.device_type; | |
1290 | ||
1291 | goto out_prof; | |
1292 | } | |
1293 | ||
1294 | if (num_waits) | |
1295 | { | |
1296 | va_list ap; | |
1297 | ||
1298 | va_start (ap, num_waits); | |
1299 | goacc_wait (async, num_waits, &ap); | |
1300 | va_end (ap); | |
1301 | } | |
1302 | ||
378da98f | 1303 | goacc_aq aq = get_goacc_asyncqueue (async); |
57963e39 TS |
1304 | |
1305 | if (data_enter) | |
378da98f | 1306 | goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); |
57963e39 | 1307 | else |
378da98f | 1308 | goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); |
57963e39 TS |
1309 | |
1310 | out_prof: | |
1311 | if (profiling_p) | |
1312 | { | |
1313 | prof_info.event_type | |
1314 | = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; | |
1315 | enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
1316 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
1317 | &api_info); | |
1318 | ||
1319 | thr->prof_info = NULL; | |
1320 | thr->api_info = NULL; | |
1321 | } | |
1322 | } |