]>
Commit | Line | Data |
---|---|---|
1 | /* OpenACC Runtime initialization routines | |
2 | ||
3 | Copyright (C) 2013-2019 Free Software Foundation, Inc. | |
4 | ||
5 | Contributed by Mentor Embedded. | |
6 | ||
7 | This file is part of the GNU Offloading and Multi Processing Library | |
8 | (libgomp). | |
9 | ||
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
29 | #include "openacc.h" | |
30 | #include "libgomp.h" | |
31 | #include "gomp-constants.h" | |
32 | #include "oacc-int.h" | |
33 | #include <string.h> | |
34 | #include <assert.h> | |
35 | ||
36 | /* Return block containing [H->S), or NULL if not contained. The device lock | |
37 | for DEV must be locked on entry, and remains locked on exit. */ | |
38 | ||
39 | static splay_tree_key | |
40 | lookup_host (struct gomp_device_descr *dev, void *h, size_t s) | |
41 | { | |
42 | struct splay_tree_key_s node; | |
43 | splay_tree_key key; | |
44 | ||
45 | node.host_start = (uintptr_t) h; | |
46 | node.host_end = (uintptr_t) h + s; | |
47 | ||
48 | key = splay_tree_lookup (&dev->mem_map, &node); | |
49 | ||
50 | return key; | |
51 | } | |
52 | ||
53 | /* Return block containing [D->S), or NULL if not contained. | |
54 | The list isn't ordered by device address, so we have to iterate | |
55 | over the whole array. This is not expected to be a common | |
56 | operation. The device lock associated with TGT must be locked on entry, and | |
57 | remains locked on exit. */ | |
58 | ||
59 | static splay_tree_key | |
60 | lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) | |
61 | { | |
62 | int i; | |
63 | struct target_mem_desc *t; | |
64 | ||
65 | if (!tgt) | |
66 | return NULL; | |
67 | ||
68 | for (t = tgt; t != NULL; t = t->prev) | |
69 | { | |
70 | if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) | |
71 | break; | |
72 | } | |
73 | ||
74 | if (!t) | |
75 | return NULL; | |
76 | ||
77 | for (i = 0; i < t->list_count; i++) | |
78 | { | |
79 | void * offset; | |
80 | ||
81 | splay_tree_key k = &t->array[i].key; | |
82 | offset = d - t->tgt_start + k->tgt_offset; | |
83 | ||
84 | if (k->host_start + offset <= (void *) k->host_end) | |
85 | return k; | |
86 | } | |
87 | ||
88 | return NULL; | |
89 | } | |
90 | ||
91 | /* OpenACC is silent on how memory exhaustion is indicated. We return | |
92 | NULL. */ | |
93 | ||
94 | void * | |
95 | acc_malloc (size_t s) | |
96 | { | |
97 | if (!s) | |
98 | return NULL; | |
99 | ||
100 | goacc_lazy_initialize (); | |
101 | ||
102 | struct goacc_thread *thr = goacc_thread (); | |
103 | ||
104 | assert (thr->dev); | |
105 | ||
106 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
107 | return malloc (s); | |
108 | ||
109 | acc_prof_info prof_info; | |
110 | acc_api_info api_info; | |
111 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
112 | ||
113 | void *res = thr->dev->alloc_func (thr->dev->target_id, s); | |
114 | ||
115 | if (profiling_p) | |
116 | { | |
117 | thr->prof_info = NULL; | |
118 | thr->api_info = NULL; | |
119 | } | |
120 | ||
121 | return res; | |
122 | } | |
123 | ||
124 | /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event | |
125 | the device address is mapped. We choose to check if it mapped, | |
126 | and if it is, to unmap it. */ | |
127 | void | |
128 | acc_free (void *d) | |
129 | { | |
130 | splay_tree_key k; | |
131 | ||
132 | if (!d) | |
133 | return; | |
134 | ||
135 | struct goacc_thread *thr = goacc_thread (); | |
136 | ||
137 | assert (thr && thr->dev); | |
138 | ||
139 | struct gomp_device_descr *acc_dev = thr->dev; | |
140 | ||
141 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
142 | return free (d); | |
143 | ||
144 | acc_prof_info prof_info; | |
145 | acc_api_info api_info; | |
146 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
147 | ||
148 | gomp_mutex_lock (&acc_dev->lock); | |
149 | ||
150 | /* We don't have to call lazy open here, as the ptr value must have | |
151 | been returned by acc_malloc. It's not permitted to pass NULL in | |
152 | (unless you got that null from acc_malloc). */ | |
153 | if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1))) | |
154 | { | |
155 | void *offset; | |
156 | ||
157 | offset = d - k->tgt->tgt_start + k->tgt_offset; | |
158 | ||
159 | gomp_mutex_unlock (&acc_dev->lock); | |
160 | ||
161 | acc_unmap_data ((void *)(k->host_start + offset)); | |
162 | } | |
163 | else | |
164 | gomp_mutex_unlock (&acc_dev->lock); | |
165 | ||
166 | if (!acc_dev->free_func (acc_dev->target_id, d)) | |
167 | gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); | |
168 | ||
169 | if (profiling_p) | |
170 | { | |
171 | thr->prof_info = NULL; | |
172 | thr->api_info = NULL; | |
173 | } | |
174 | } | |
175 | ||
176 | static void | |
177 | memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, | |
178 | const char *libfnname) | |
179 | { | |
180 | /* No need to call lazy open here, as the device pointer must have | |
181 | been obtained from a routine that did that. */ | |
182 | struct goacc_thread *thr = goacc_thread (); | |
183 | ||
184 | assert (thr && thr->dev); | |
185 | ||
186 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
187 | { | |
188 | if (from) | |
189 | memmove (h, d, s); | |
190 | else | |
191 | memmove (d, h, s); | |
192 | return; | |
193 | } | |
194 | ||
195 | acc_prof_info prof_info; | |
196 | acc_api_info api_info; | |
197 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
198 | if (profiling_p) | |
199 | { | |
200 | prof_info.async = async; | |
201 | prof_info.async_queue = prof_info.async; | |
202 | } | |
203 | ||
204 | goacc_aq aq = get_goacc_asyncqueue (async); | |
205 | if (from) | |
206 | gomp_copy_dev2host (thr->dev, aq, h, d, s); | |
207 | else | |
208 | gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); | |
209 | ||
210 | if (profiling_p) | |
211 | { | |
212 | thr->prof_info = NULL; | |
213 | thr->api_info = NULL; | |
214 | } | |
215 | } | |
216 | ||
217 | void | |
218 | acc_memcpy_to_device (void *d, void *h, size_t s) | |
219 | { | |
220 | memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); | |
221 | } | |
222 | ||
223 | void | |
224 | acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) | |
225 | { | |
226 | memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); | |
227 | } | |
228 | ||
229 | void | |
230 | acc_memcpy_from_device (void *h, void *d, size_t s) | |
231 | { | |
232 | memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); | |
233 | } | |
234 | ||
235 | void | |
236 | acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) | |
237 | { | |
238 | memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); | |
239 | } | |
240 | ||
241 | /* Return the device pointer that corresponds to host data H. Or NULL | |
242 | if no mapping. */ | |
243 | ||
244 | void * | |
245 | acc_deviceptr (void *h) | |
246 | { | |
247 | splay_tree_key n; | |
248 | void *d; | |
249 | void *offset; | |
250 | ||
251 | goacc_lazy_initialize (); | |
252 | ||
253 | struct goacc_thread *thr = goacc_thread (); | |
254 | struct gomp_device_descr *dev = thr->dev; | |
255 | ||
256 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
257 | return h; | |
258 | ||
259 | /* In the following, no OpenACC Profiling Interface events can possibly be | |
260 | generated. */ | |
261 | ||
262 | gomp_mutex_lock (&dev->lock); | |
263 | ||
264 | n = lookup_host (dev, h, 1); | |
265 | ||
266 | if (!n) | |
267 | { | |
268 | gomp_mutex_unlock (&dev->lock); | |
269 | return NULL; | |
270 | } | |
271 | ||
272 | offset = h - n->host_start; | |
273 | ||
274 | d = n->tgt->tgt_start + n->tgt_offset + offset; | |
275 | ||
276 | gomp_mutex_unlock (&dev->lock); | |
277 | ||
278 | return d; | |
279 | } | |
280 | ||
281 | /* Return the host pointer that corresponds to device data D. Or NULL | |
282 | if no mapping. */ | |
283 | ||
284 | void * | |
285 | acc_hostptr (void *d) | |
286 | { | |
287 | splay_tree_key n; | |
288 | void *h; | |
289 | void *offset; | |
290 | ||
291 | goacc_lazy_initialize (); | |
292 | ||
293 | struct goacc_thread *thr = goacc_thread (); | |
294 | struct gomp_device_descr *acc_dev = thr->dev; | |
295 | ||
296 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
297 | return d; | |
298 | ||
299 | /* In the following, no OpenACC Profiling Interface events can possibly be | |
300 | generated. */ | |
301 | ||
302 | gomp_mutex_lock (&acc_dev->lock); | |
303 | ||
304 | n = lookup_dev (acc_dev->openacc.data_environ, d, 1); | |
305 | ||
306 | if (!n) | |
307 | { | |
308 | gomp_mutex_unlock (&acc_dev->lock); | |
309 | return NULL; | |
310 | } | |
311 | ||
312 | offset = d - n->tgt->tgt_start + n->tgt_offset; | |
313 | ||
314 | h = n->host_start + offset; | |
315 | ||
316 | gomp_mutex_unlock (&acc_dev->lock); | |
317 | ||
318 | return h; | |
319 | } | |
320 | ||
321 | /* Return 1 if host data [H,+S] is present on the device. */ | |
322 | ||
323 | int | |
324 | acc_is_present (void *h, size_t s) | |
325 | { | |
326 | splay_tree_key n; | |
327 | ||
328 | if (!s || !h) | |
329 | return 0; | |
330 | ||
331 | goacc_lazy_initialize (); | |
332 | ||
333 | struct goacc_thread *thr = goacc_thread (); | |
334 | struct gomp_device_descr *acc_dev = thr->dev; | |
335 | ||
336 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
337 | return h != NULL; | |
338 | ||
339 | /* In the following, no OpenACC Profiling Interface events can possibly be | |
340 | generated. */ | |
341 | ||
342 | gomp_mutex_lock (&acc_dev->lock); | |
343 | ||
344 | n = lookup_host (acc_dev, h, s); | |
345 | ||
346 | if (n && ((uintptr_t)h < n->host_start | |
347 | || (uintptr_t)h + s > n->host_end | |
348 | || s > n->host_end - n->host_start)) | |
349 | n = NULL; | |
350 | ||
351 | gomp_mutex_unlock (&acc_dev->lock); | |
352 | ||
353 | return n != NULL; | |
354 | } | |
355 | ||
356 | /* Create a mapping for host [H,+S] -> device [D,+S] */ | |
357 | ||
358 | void | |
359 | acc_map_data (void *h, void *d, size_t s) | |
360 | { | |
361 | struct target_mem_desc *tgt = NULL; | |
362 | size_t mapnum = 1; | |
363 | void *hostaddrs = h; | |
364 | void *devaddrs = d; | |
365 | size_t sizes = s; | |
366 | unsigned short kinds = GOMP_MAP_ALLOC; | |
367 | ||
368 | goacc_lazy_initialize (); | |
369 | ||
370 | struct goacc_thread *thr = goacc_thread (); | |
371 | struct gomp_device_descr *acc_dev = thr->dev; | |
372 | ||
373 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
374 | { | |
375 | if (d != h) | |
376 | gomp_fatal ("cannot map data on shared-memory system"); | |
377 | } | |
378 | else | |
379 | { | |
380 | struct goacc_thread *thr = goacc_thread (); | |
381 | ||
382 | if (!d || !h || !s) | |
383 | gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", | |
384 | (void *)h, (int)s, (void *)d, (int)s); | |
385 | ||
386 | acc_prof_info prof_info; | |
387 | acc_api_info api_info; | |
388 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
389 | ||
390 | gomp_mutex_lock (&acc_dev->lock); | |
391 | ||
392 | if (lookup_host (acc_dev, h, s)) | |
393 | { | |
394 | gomp_mutex_unlock (&acc_dev->lock); | |
395 | gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, | |
396 | (int)s); | |
397 | } | |
398 | ||
399 | if (lookup_dev (thr->dev->openacc.data_environ, d, s)) | |
400 | { | |
401 | gomp_mutex_unlock (&acc_dev->lock); | |
402 | gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, | |
403 | (int)s); | |
404 | } | |
405 | ||
406 | gomp_mutex_unlock (&acc_dev->lock); | |
407 | ||
408 | tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, | |
409 | &kinds, true, GOMP_MAP_VARS_OPENACC); | |
410 | tgt->list[0].key->refcount = REFCOUNT_INFINITY; | |
411 | ||
412 | if (profiling_p) | |
413 | { | |
414 | thr->prof_info = NULL; | |
415 | thr->api_info = NULL; | |
416 | } | |
417 | } | |
418 | ||
419 | gomp_mutex_lock (&acc_dev->lock); | |
420 | tgt->prev = acc_dev->openacc.data_environ; | |
421 | acc_dev->openacc.data_environ = tgt; | |
422 | gomp_mutex_unlock (&acc_dev->lock); | |
423 | } | |
424 | ||
425 | void | |
426 | acc_unmap_data (void *h) | |
427 | { | |
428 | struct goacc_thread *thr = goacc_thread (); | |
429 | struct gomp_device_descr *acc_dev = thr->dev; | |
430 | ||
431 | /* No need to call lazy open, as the address must have been mapped. */ | |
432 | ||
433 | /* This is a no-op on shared-memory targets. */ | |
434 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
435 | return; | |
436 | ||
437 | acc_prof_info prof_info; | |
438 | acc_api_info api_info; | |
439 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
440 | ||
441 | size_t host_size; | |
442 | ||
443 | gomp_mutex_lock (&acc_dev->lock); | |
444 | ||
445 | splay_tree_key n = lookup_host (acc_dev, h, 1); | |
446 | struct target_mem_desc *t; | |
447 | ||
448 | if (!n) | |
449 | { | |
450 | gomp_mutex_unlock (&acc_dev->lock); | |
451 | gomp_fatal ("%p is not a mapped block", (void *)h); | |
452 | } | |
453 | ||
454 | host_size = n->host_end - n->host_start; | |
455 | ||
456 | if (n->host_start != (uintptr_t) h) | |
457 | { | |
458 | gomp_mutex_unlock (&acc_dev->lock); | |
459 | gomp_fatal ("[%p,%d] surrounds %p", | |
460 | (void *) n->host_start, (int) host_size, (void *) h); | |
461 | } | |
462 | ||
463 | /* Mark for removal. */ | |
464 | n->refcount = 1; | |
465 | ||
466 | t = n->tgt; | |
467 | ||
468 | if (t->refcount == 2) | |
469 | { | |
470 | struct target_mem_desc *tp; | |
471 | ||
472 | /* This is the last reference, so pull the descriptor off the | |
473 | chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from | |
474 | freeing the device memory. */ | |
475 | t->tgt_end = 0; | |
476 | t->to_free = 0; | |
477 | ||
478 | for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; | |
479 | tp = t, t = t->prev) | |
480 | if (n->tgt == t) | |
481 | { | |
482 | if (tp) | |
483 | tp->prev = t->prev; | |
484 | else | |
485 | acc_dev->openacc.data_environ = t->prev; | |
486 | ||
487 | break; | |
488 | } | |
489 | } | |
490 | ||
491 | gomp_mutex_unlock (&acc_dev->lock); | |
492 | ||
493 | gomp_unmap_vars (t, true); | |
494 | ||
495 | if (profiling_p) | |
496 | { | |
497 | thr->prof_info = NULL; | |
498 | thr->api_info = NULL; | |
499 | } | |
500 | } | |
501 | ||
502 | #define FLAG_PRESENT (1 << 0) | |
503 | #define FLAG_CREATE (1 << 1) | |
504 | #define FLAG_COPY (1 << 2) | |
505 | ||
506 | static void * | |
507 | present_create_copy (unsigned f, void *h, size_t s, int async) | |
508 | { | |
509 | void *d; | |
510 | splay_tree_key n; | |
511 | ||
512 | if (!h || !s) | |
513 | gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); | |
514 | ||
515 | goacc_lazy_initialize (); | |
516 | ||
517 | struct goacc_thread *thr = goacc_thread (); | |
518 | struct gomp_device_descr *acc_dev = thr->dev; | |
519 | ||
520 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
521 | return h; | |
522 | ||
523 | acc_prof_info prof_info; | |
524 | acc_api_info api_info; | |
525 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
526 | if (profiling_p) | |
527 | { | |
528 | prof_info.async = async; | |
529 | prof_info.async_queue = prof_info.async; | |
530 | } | |
531 | ||
532 | gomp_mutex_lock (&acc_dev->lock); | |
533 | ||
534 | n = lookup_host (acc_dev, h, s); | |
535 | if (n) | |
536 | { | |
537 | /* Present. */ | |
538 | d = (void *) (n->tgt->tgt_start + n->tgt_offset); | |
539 | ||
540 | if (!(f & FLAG_PRESENT)) | |
541 | { | |
542 | gomp_mutex_unlock (&acc_dev->lock); | |
543 | gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", | |
544 | (void *)h, (int)s, (void *)d, (int)s); | |
545 | } | |
546 | if ((h + s) > (void *)n->host_end) | |
547 | { | |
548 | gomp_mutex_unlock (&acc_dev->lock); | |
549 | gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); | |
550 | } | |
551 | ||
552 | if (n->refcount != REFCOUNT_INFINITY) | |
553 | { | |
554 | n->refcount++; | |
555 | n->dynamic_refcount++; | |
556 | } | |
557 | gomp_mutex_unlock (&acc_dev->lock); | |
558 | } | |
559 | else if (!(f & FLAG_CREATE)) | |
560 | { | |
561 | gomp_mutex_unlock (&acc_dev->lock); | |
562 | gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); | |
563 | } | |
564 | else | |
565 | { | |
566 | struct target_mem_desc *tgt; | |
567 | size_t mapnum = 1; | |
568 | unsigned short kinds; | |
569 | void *hostaddrs = h; | |
570 | ||
571 | if (f & FLAG_COPY) | |
572 | kinds = GOMP_MAP_TO; | |
573 | else | |
574 | kinds = GOMP_MAP_ALLOC; | |
575 | ||
576 | gomp_mutex_unlock (&acc_dev->lock); | |
577 | ||
578 | goacc_aq aq = get_goacc_asyncqueue (async); | |
579 | ||
580 | tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, | |
581 | &kinds, true, GOMP_MAP_VARS_OPENACC); | |
582 | /* Initialize dynamic refcount. */ | |
583 | tgt->list[0].key->dynamic_refcount = 1; | |
584 | ||
585 | gomp_mutex_lock (&acc_dev->lock); | |
586 | ||
587 | d = tgt->to_free; | |
588 | tgt->prev = acc_dev->openacc.data_environ; | |
589 | acc_dev->openacc.data_environ = tgt; | |
590 | ||
591 | gomp_mutex_unlock (&acc_dev->lock); | |
592 | } | |
593 | ||
594 | if (profiling_p) | |
595 | { | |
596 | thr->prof_info = NULL; | |
597 | thr->api_info = NULL; | |
598 | } | |
599 | ||
600 | return d; | |
601 | } | |
602 | ||
603 | void * | |
604 | acc_create (void *h, size_t s) | |
605 | { | |
606 | return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync); | |
607 | } | |
608 | ||
609 | void | |
610 | acc_create_async (void *h, size_t s, int async) | |
611 | { | |
612 | present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async); | |
613 | } | |
614 | ||
615 | /* acc_present_or_create used to be what acc_create is now. */ | |
616 | /* acc_pcreate is acc_present_or_create by a different name. */ | |
617 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
618 | strong_alias (acc_create, acc_present_or_create) | |
619 | strong_alias (acc_create, acc_pcreate) | |
620 | #else | |
621 | void * | |
622 | acc_present_or_create (void *h, size_t s) | |
623 | { | |
624 | return acc_create (h, s); | |
625 | } | |
626 | ||
627 | void * | |
628 | acc_pcreate (void *h, size_t s) | |
629 | { | |
630 | return acc_create (h, s); | |
631 | } | |
632 | #endif | |
633 | ||
634 | void * | |
635 | acc_copyin (void *h, size_t s) | |
636 | { | |
637 | return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, | |
638 | acc_async_sync); | |
639 | } | |
640 | ||
641 | void | |
642 | acc_copyin_async (void *h, size_t s, int async) | |
643 | { | |
644 | present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async); | |
645 | } | |
646 | ||
647 | /* acc_present_or_copyin used to be what acc_copyin is now. */ | |
648 | /* acc_pcopyin is acc_present_or_copyin by a different name. */ | |
649 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
650 | strong_alias (acc_copyin, acc_present_or_copyin) | |
651 | strong_alias (acc_copyin, acc_pcopyin) | |
652 | #else | |
653 | void * | |
654 | acc_present_or_copyin (void *h, size_t s) | |
655 | { | |
656 | return acc_copyin (h, s); | |
657 | } | |
658 | ||
659 | void * | |
660 | acc_pcopyin (void *h, size_t s) | |
661 | { | |
662 | return acc_copyin (h, s); | |
663 | } | |
664 | #endif | |
665 | ||
666 | #define FLAG_COPYOUT (1 << 0) | |
667 | #define FLAG_FINALIZE (1 << 1) | |
668 | ||
669 | static void | |
670 | delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) | |
671 | { | |
672 | size_t host_size; | |
673 | splay_tree_key n; | |
674 | void *d; | |
675 | struct goacc_thread *thr = goacc_thread (); | |
676 | struct gomp_device_descr *acc_dev = thr->dev; | |
677 | ||
678 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
679 | return; | |
680 | ||
681 | acc_prof_info prof_info; | |
682 | acc_api_info api_info; | |
683 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
684 | if (profiling_p) | |
685 | { | |
686 | prof_info.async = async; | |
687 | prof_info.async_queue = prof_info.async; | |
688 | } | |
689 | ||
690 | gomp_mutex_lock (&acc_dev->lock); | |
691 | ||
692 | n = lookup_host (acc_dev, h, s); | |
693 | ||
694 | /* No need to call lazy open, as the data must already have been | |
695 | mapped. */ | |
696 | ||
697 | if (!n) | |
698 | { | |
699 | gomp_mutex_unlock (&acc_dev->lock); | |
700 | gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); | |
701 | } | |
702 | ||
703 | d = (void *) (n->tgt->tgt_start + n->tgt_offset | |
704 | + (uintptr_t) h - n->host_start); | |
705 | ||
706 | host_size = n->host_end - n->host_start; | |
707 | ||
708 | if (n->host_start != (uintptr_t) h || host_size != s) | |
709 | { | |
710 | gomp_mutex_unlock (&acc_dev->lock); | |
711 | gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", | |
712 | (void *) n->host_start, (int) host_size, (void *) h, (int) s); | |
713 | } | |
714 | ||
715 | if (n->refcount == REFCOUNT_INFINITY) | |
716 | { | |
717 | n->refcount = 0; | |
718 | n->dynamic_refcount = 0; | |
719 | } | |
720 | if (n->refcount < n->dynamic_refcount) | |
721 | { | |
722 | gomp_mutex_unlock (&acc_dev->lock); | |
723 | gomp_fatal ("Dynamic reference counting assert fail\n"); | |
724 | } | |
725 | ||
726 | if (f & FLAG_FINALIZE) | |
727 | { | |
728 | n->refcount -= n->dynamic_refcount; | |
729 | n->dynamic_refcount = 0; | |
730 | } | |
731 | else if (n->dynamic_refcount) | |
732 | { | |
733 | n->dynamic_refcount--; | |
734 | n->refcount--; | |
735 | } | |
736 | ||
737 | if (n->refcount == 0) | |
738 | { | |
739 | if (n->tgt->refcount == 2) | |
740 | { | |
741 | struct target_mem_desc *tp, *t; | |
742 | for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; | |
743 | tp = t, t = t->prev) | |
744 | if (n->tgt == t) | |
745 | { | |
746 | if (tp) | |
747 | tp->prev = t->prev; | |
748 | else | |
749 | acc_dev->openacc.data_environ = t->prev; | |
750 | break; | |
751 | } | |
752 | } | |
753 | ||
754 | if (f & FLAG_COPYOUT) | |
755 | { | |
756 | goacc_aq aq = get_goacc_asyncqueue (async); | |
757 | gomp_copy_dev2host (acc_dev, aq, h, d, s); | |
758 | } | |
759 | gomp_remove_var (acc_dev, n); | |
760 | } | |
761 | ||
762 | gomp_mutex_unlock (&acc_dev->lock); | |
763 | ||
764 | if (profiling_p) | |
765 | { | |
766 | thr->prof_info = NULL; | |
767 | thr->api_info = NULL; | |
768 | } | |
769 | } | |
770 | ||
771 | void | |
772 | acc_delete (void *h , size_t s) | |
773 | { | |
774 | delete_copyout (0, h, s, acc_async_sync, __FUNCTION__); | |
775 | } | |
776 | ||
777 | void | |
778 | acc_delete_async (void *h , size_t s, int async) | |
779 | { | |
780 | delete_copyout (0, h, s, async, __FUNCTION__); | |
781 | } | |
782 | ||
783 | void | |
784 | acc_delete_finalize (void *h , size_t s) | |
785 | { | |
786 | delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__); | |
787 | } | |
788 | ||
789 | void | |
790 | acc_delete_finalize_async (void *h , size_t s, int async) | |
791 | { | |
792 | delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__); | |
793 | } | |
794 | ||
795 | void | |
796 | acc_copyout (void *h, size_t s) | |
797 | { | |
798 | delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__); | |
799 | } | |
800 | ||
801 | void | |
802 | acc_copyout_async (void *h, size_t s, int async) | |
803 | { | |
804 | delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__); | |
805 | } | |
806 | ||
807 | void | |
808 | acc_copyout_finalize (void *h, size_t s) | |
809 | { | |
810 | delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync, | |
811 | __FUNCTION__); | |
812 | } | |
813 | ||
814 | void | |
815 | acc_copyout_finalize_async (void *h, size_t s, int async) | |
816 | { | |
817 | delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__); | |
818 | } | |
819 | ||
820 | static void | |
821 | update_dev_host (int is_dev, void *h, size_t s, int async) | |
822 | { | |
823 | splay_tree_key n; | |
824 | void *d; | |
825 | ||
826 | goacc_lazy_initialize (); | |
827 | ||
828 | struct goacc_thread *thr = goacc_thread (); | |
829 | struct gomp_device_descr *acc_dev = thr->dev; | |
830 | ||
831 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
832 | return; | |
833 | ||
834 | acc_prof_info prof_info; | |
835 | acc_api_info api_info; | |
836 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
837 | if (profiling_p) | |
838 | { | |
839 | prof_info.async = async; | |
840 | prof_info.async_queue = prof_info.async; | |
841 | } | |
842 | ||
843 | gomp_mutex_lock (&acc_dev->lock); | |
844 | ||
845 | n = lookup_host (acc_dev, h, s); | |
846 | ||
847 | if (!n) | |
848 | { | |
849 | gomp_mutex_unlock (&acc_dev->lock); | |
850 | gomp_fatal ("[%p,%d] is not mapped", h, (int)s); | |
851 | } | |
852 | ||
853 | d = (void *) (n->tgt->tgt_start + n->tgt_offset | |
854 | + (uintptr_t) h - n->host_start); | |
855 | ||
856 | goacc_aq aq = get_goacc_asyncqueue (async); | |
857 | ||
858 | if (is_dev) | |
859 | gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); | |
860 | else | |
861 | gomp_copy_dev2host (acc_dev, aq, h, d, s); | |
862 | ||
863 | gomp_mutex_unlock (&acc_dev->lock); | |
864 | ||
865 | if (profiling_p) | |
866 | { | |
867 | thr->prof_info = NULL; | |
868 | thr->api_info = NULL; | |
869 | } | |
870 | } | |
871 | ||
872 | void | |
873 | acc_update_device (void *h, size_t s) | |
874 | { | |
875 | update_dev_host (1, h, s, acc_async_sync); | |
876 | } | |
877 | ||
878 | void | |
879 | acc_update_device_async (void *h, size_t s, int async) | |
880 | { | |
881 | update_dev_host (1, h, s, async); | |
882 | } | |
883 | ||
884 | void | |
885 | acc_update_self (void *h, size_t s) | |
886 | { | |
887 | update_dev_host (0, h, s, acc_async_sync); | |
888 | } | |
889 | ||
890 | void | |
891 | acc_update_self_async (void *h, size_t s, int async) | |
892 | { | |
893 | update_dev_host (0, h, s, async); | |
894 | } | |
895 | ||
896 | void | |
897 | gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, | |
898 | void *kinds, int async) | |
899 | { | |
900 | struct target_mem_desc *tgt; | |
901 | struct goacc_thread *thr = goacc_thread (); | |
902 | struct gomp_device_descr *acc_dev = thr->dev; | |
903 | ||
904 | if (acc_is_present (*hostaddrs, *sizes)) | |
905 | { | |
906 | splay_tree_key n; | |
907 | gomp_mutex_lock (&acc_dev->lock); | |
908 | n = lookup_host (acc_dev, *hostaddrs, *sizes); | |
909 | gomp_mutex_unlock (&acc_dev->lock); | |
910 | ||
911 | tgt = n->tgt; | |
912 | for (size_t i = 0; i < tgt->list_count; i++) | |
913 | if (tgt->list[i].key == n) | |
914 | { | |
915 | for (size_t j = 0; j < mapnum; j++) | |
916 | if (i + j < tgt->list_count && tgt->list[i + j].key) | |
917 | { | |
918 | tgt->list[i + j].key->refcount++; | |
919 | tgt->list[i + j].key->dynamic_refcount++; | |
920 | } | |
921 | return; | |
922 | } | |
923 | /* Should not reach here. */ | |
924 | gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset"); | |
925 | } | |
926 | ||
927 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
928 | goacc_aq aq = get_goacc_asyncqueue (async); | |
929 | tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, | |
930 | NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); | |
931 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); | |
932 | ||
933 | /* Initialize dynamic refcount. */ | |
934 | tgt->list[0].key->dynamic_refcount = 1; | |
935 | ||
936 | gomp_mutex_lock (&acc_dev->lock); | |
937 | tgt->prev = acc_dev->openacc.data_environ; | |
938 | acc_dev->openacc.data_environ = tgt; | |
939 | gomp_mutex_unlock (&acc_dev->lock); | |
940 | } | |
941 | ||
942 | void | |
943 | gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, | |
944 | int finalize, int mapnum) | |
945 | { | |
946 | struct goacc_thread *thr = goacc_thread (); | |
947 | struct gomp_device_descr *acc_dev = thr->dev; | |
948 | splay_tree_key n; | |
949 | struct target_mem_desc *t; | |
950 | int minrefs = (mapnum == 1) ? 2 : 3; | |
951 | ||
952 | if (!acc_is_present (h, s)) | |
953 | return; | |
954 | ||
955 | gomp_mutex_lock (&acc_dev->lock); | |
956 | ||
957 | n = lookup_host (acc_dev, h, 1); | |
958 | ||
959 | if (!n) | |
960 | { | |
961 | gomp_mutex_unlock (&acc_dev->lock); | |
962 | gomp_fatal ("%p is not a mapped block", (void *)h); | |
963 | } | |
964 | ||
965 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
966 | ||
967 | t = n->tgt; | |
968 | ||
969 | if (n->refcount < n->dynamic_refcount) | |
970 | { | |
971 | gomp_mutex_unlock (&acc_dev->lock); | |
972 | gomp_fatal ("Dynamic reference counting assert fail\n"); | |
973 | } | |
974 | ||
975 | if (finalize) | |
976 | { | |
977 | n->refcount -= n->dynamic_refcount; | |
978 | n->dynamic_refcount = 0; | |
979 | } | |
980 | else if (n->dynamic_refcount) | |
981 | { | |
982 | n->dynamic_refcount--; | |
983 | n->refcount--; | |
984 | } | |
985 | ||
986 | gomp_mutex_unlock (&acc_dev->lock); | |
987 | ||
988 | if (n->refcount == 0) | |
989 | { | |
990 | if (t->refcount == minrefs) | |
991 | { | |
992 | /* This is the last reference, so pull the descriptor off the | |
993 | chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from | |
994 | freeing the device memory. */ | |
995 | struct target_mem_desc *tp; | |
996 | for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; | |
997 | tp = t, t = t->prev) | |
998 | { | |
999 | if (n->tgt == t) | |
1000 | { | |
1001 | if (tp) | |
1002 | tp->prev = t->prev; | |
1003 | else | |
1004 | acc_dev->openacc.data_environ = t->prev; | |
1005 | break; | |
1006 | } | |
1007 | } | |
1008 | } | |
1009 | ||
1010 | /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */ | |
1011 | n->refcount = 1; | |
1012 | t->refcount = minrefs; | |
1013 | for (size_t i = 0; i < t->list_count; i++) | |
1014 | if (t->list[i].key == n) | |
1015 | { | |
1016 | t->list[i].copy_from = force_copyfrom ? 1 : 0; | |
1017 | break; | |
1018 | } | |
1019 | ||
1020 | /* If running synchronously, unmap immediately. */ | |
1021 | if (async < acc_async_noval) | |
1022 | gomp_unmap_vars (t, true); | |
1023 | else | |
1024 | { | |
1025 | goacc_aq aq = get_goacc_asyncqueue (async); | |
1026 | gomp_unmap_vars_async (t, true, aq); | |
1027 | } | |
1028 | } | |
1029 | ||
1030 | gomp_mutex_unlock (&acc_dev->lock); | |
1031 | ||
1032 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
1033 | } |