]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/oacc-mem.c
[PR92503] [OpenACC] Don't silently 'acc_unmap_data' in 'acc_free'
[thirdparty/gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Return block containing [D->S), or NULL if not contained.
54 The list isn't ordered by device address, so we have to iterate
55 over the whole array. This is not expected to be a common
56 operation. The device lock associated with TGT must be locked on entry, and
57 remains locked on exit. */
58
59 static splay_tree_key
60 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
61 {
62 int i;
63 struct target_mem_desc *t;
64
65 if (!tgt)
66 return NULL;
67
68 for (t = tgt; t != NULL; t = t->prev)
69 {
70 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
71 break;
72 }
73
74 if (!t)
75 return NULL;
76
77 for (i = 0; i < t->list_count; i++)
78 {
79 void * offset;
80
81 splay_tree_key k = &t->array[i].key;
82 offset = d - t->tgt_start + k->tgt_offset;
83
84 if (k->host_start + offset <= (void *) k->host_end)
85 return k;
86 }
87
88 return NULL;
89 }
90
91 /* OpenACC is silent on how memory exhaustion is indicated. We return
92 NULL. */
93
94 void *
95 acc_malloc (size_t s)
96 {
97 if (!s)
98 return NULL;
99
100 goacc_lazy_initialize ();
101
102 struct goacc_thread *thr = goacc_thread ();
103
104 assert (thr->dev);
105
106 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
107 return malloc (s);
108
109 acc_prof_info prof_info;
110 acc_api_info api_info;
111 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
112
113 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
114
115 if (profiling_p)
116 {
117 thr->prof_info = NULL;
118 thr->api_info = NULL;
119 }
120
121 return res;
122 }
123
124 void
125 acc_free (void *d)
126 {
127 splay_tree_key k;
128
129 if (!d)
130 return;
131
132 struct goacc_thread *thr = goacc_thread ();
133
134 assert (thr && thr->dev);
135
136 struct gomp_device_descr *acc_dev = thr->dev;
137
138 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
139 return free (d);
140
141 acc_prof_info prof_info;
142 acc_api_info api_info;
143 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
144
145 gomp_mutex_lock (&acc_dev->lock);
146
147 /* We don't have to call lazy open here, as the ptr value must have
148 been returned by acc_malloc. It's not permitted to pass NULL in
149 (unless you got that null from acc_malloc). */
150 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
151 {
152 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
153 void *h = k->host_start + offset;
154 size_t h_size = k->host_end - k->host_start;
155 gomp_mutex_unlock (&acc_dev->lock);
156 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
157 used in a mapping". */
158 gomp_fatal ("refusing to free device memory space at %p that is still"
159 " mapped at [%p,+%d]",
160 d, h, (int) h_size);
161 }
162 else
163 gomp_mutex_unlock (&acc_dev->lock);
164
165 if (!acc_dev->free_func (acc_dev->target_id, d))
166 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
167
168 if (profiling_p)
169 {
170 thr->prof_info = NULL;
171 thr->api_info = NULL;
172 }
173 }
174
175 static void
176 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
177 const char *libfnname)
178 {
179 /* No need to call lazy open here, as the device pointer must have
180 been obtained from a routine that did that. */
181 struct goacc_thread *thr = goacc_thread ();
182
183 assert (thr && thr->dev);
184
185 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
186 {
187 if (from)
188 memmove (h, d, s);
189 else
190 memmove (d, h, s);
191 return;
192 }
193
194 acc_prof_info prof_info;
195 acc_api_info api_info;
196 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
197 if (profiling_p)
198 {
199 prof_info.async = async;
200 prof_info.async_queue = prof_info.async;
201 }
202
203 goacc_aq aq = get_goacc_asyncqueue (async);
204 if (from)
205 gomp_copy_dev2host (thr->dev, aq, h, d, s);
206 else
207 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
208
209 if (profiling_p)
210 {
211 thr->prof_info = NULL;
212 thr->api_info = NULL;
213 }
214 }
215
216 void
217 acc_memcpy_to_device (void *d, void *h, size_t s)
218 {
219 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
220 }
221
222 void
223 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
224 {
225 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
226 }
227
228 void
229 acc_memcpy_from_device (void *h, void *d, size_t s)
230 {
231 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
232 }
233
234 void
235 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
236 {
237 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
238 }
239
240 /* Return the device pointer that corresponds to host data H. Or NULL
241 if no mapping. */
242
243 void *
244 acc_deviceptr (void *h)
245 {
246 splay_tree_key n;
247 void *d;
248 void *offset;
249
250 goacc_lazy_initialize ();
251
252 struct goacc_thread *thr = goacc_thread ();
253 struct gomp_device_descr *dev = thr->dev;
254
255 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
256 return h;
257
258 /* In the following, no OpenACC Profiling Interface events can possibly be
259 generated. */
260
261 gomp_mutex_lock (&dev->lock);
262
263 n = lookup_host (dev, h, 1);
264
265 if (!n)
266 {
267 gomp_mutex_unlock (&dev->lock);
268 return NULL;
269 }
270
271 offset = h - n->host_start;
272
273 d = n->tgt->tgt_start + n->tgt_offset + offset;
274
275 gomp_mutex_unlock (&dev->lock);
276
277 return d;
278 }
279
280 /* Return the host pointer that corresponds to device data D. Or NULL
281 if no mapping. */
282
283 void *
284 acc_hostptr (void *d)
285 {
286 splay_tree_key n;
287 void *h;
288 void *offset;
289
290 goacc_lazy_initialize ();
291
292 struct goacc_thread *thr = goacc_thread ();
293 struct gomp_device_descr *acc_dev = thr->dev;
294
295 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
296 return d;
297
298 /* In the following, no OpenACC Profiling Interface events can possibly be
299 generated. */
300
301 gomp_mutex_lock (&acc_dev->lock);
302
303 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
304
305 if (!n)
306 {
307 gomp_mutex_unlock (&acc_dev->lock);
308 return NULL;
309 }
310
311 offset = d - n->tgt->tgt_start + n->tgt_offset;
312
313 h = n->host_start + offset;
314
315 gomp_mutex_unlock (&acc_dev->lock);
316
317 return h;
318 }
319
320 /* Return 1 if host data [H,+S] is present on the device. */
321
322 int
323 acc_is_present (void *h, size_t s)
324 {
325 splay_tree_key n;
326
327 if (!s || !h)
328 return 0;
329
330 goacc_lazy_initialize ();
331
332 struct goacc_thread *thr = goacc_thread ();
333 struct gomp_device_descr *acc_dev = thr->dev;
334
335 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
336 return h != NULL;
337
338 /* In the following, no OpenACC Profiling Interface events can possibly be
339 generated. */
340
341 gomp_mutex_lock (&acc_dev->lock);
342
343 n = lookup_host (acc_dev, h, s);
344
345 if (n && ((uintptr_t)h < n->host_start
346 || (uintptr_t)h + s > n->host_end
347 || s > n->host_end - n->host_start))
348 n = NULL;
349
350 gomp_mutex_unlock (&acc_dev->lock);
351
352 return n != NULL;
353 }
354
355 /* Create a mapping for host [H,+S] -> device [D,+S] */
356
357 void
358 acc_map_data (void *h, void *d, size_t s)
359 {
360 struct target_mem_desc *tgt = NULL;
361 size_t mapnum = 1;
362 void *hostaddrs = h;
363 void *devaddrs = d;
364 size_t sizes = s;
365 unsigned short kinds = GOMP_MAP_ALLOC;
366
367 goacc_lazy_initialize ();
368
369 struct goacc_thread *thr = goacc_thread ();
370 struct gomp_device_descr *acc_dev = thr->dev;
371
372 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
373 {
374 if (d != h)
375 gomp_fatal ("cannot map data on shared-memory system");
376 }
377 else
378 {
379 struct goacc_thread *thr = goacc_thread ();
380
381 if (!d || !h || !s)
382 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
383 (void *)h, (int)s, (void *)d, (int)s);
384
385 acc_prof_info prof_info;
386 acc_api_info api_info;
387 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
388
389 gomp_mutex_lock (&acc_dev->lock);
390
391 if (lookup_host (acc_dev, h, s))
392 {
393 gomp_mutex_unlock (&acc_dev->lock);
394 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
395 (int)s);
396 }
397
398 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
399 {
400 gomp_mutex_unlock (&acc_dev->lock);
401 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
402 (int)s);
403 }
404
405 gomp_mutex_unlock (&acc_dev->lock);
406
407 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
408 &kinds, true, GOMP_MAP_VARS_OPENACC);
409 splay_tree_key n = tgt->list[0].key;
410 assert (n->refcount == 1);
411 assert (n->dynamic_refcount == 0);
412 /* Special reference counting behavior. */
413 n->refcount = REFCOUNT_INFINITY;
414
415 if (profiling_p)
416 {
417 thr->prof_info = NULL;
418 thr->api_info = NULL;
419 }
420 }
421
422 gomp_mutex_lock (&acc_dev->lock);
423 tgt->prev = acc_dev->openacc.data_environ;
424 acc_dev->openacc.data_environ = tgt;
425 gomp_mutex_unlock (&acc_dev->lock);
426 }
427
428 void
429 acc_unmap_data (void *h)
430 {
431 struct goacc_thread *thr = goacc_thread ();
432 struct gomp_device_descr *acc_dev = thr->dev;
433
434 /* No need to call lazy open, as the address must have been mapped. */
435
436 /* This is a no-op on shared-memory targets. */
437 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
438 return;
439
440 acc_prof_info prof_info;
441 acc_api_info api_info;
442 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
443
444 size_t host_size;
445
446 gomp_mutex_lock (&acc_dev->lock);
447
448 splay_tree_key n = lookup_host (acc_dev, h, 1);
449 struct target_mem_desc *t;
450
451 if (!n)
452 {
453 gomp_mutex_unlock (&acc_dev->lock);
454 gomp_fatal ("%p is not a mapped block", (void *)h);
455 }
456
457 host_size = n->host_end - n->host_start;
458
459 if (n->host_start != (uintptr_t) h)
460 {
461 gomp_mutex_unlock (&acc_dev->lock);
462 gomp_fatal ("[%p,%d] surrounds %p",
463 (void *) n->host_start, (int) host_size, (void *) h);
464 }
465 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
466 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
467 the different 'REFCOUNT_INFINITY' cases, or simply separate
468 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
469 etc.)? */
470 else if (n->refcount != REFCOUNT_INFINITY)
471 {
472 gomp_mutex_unlock (&acc_dev->lock);
473 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
474 " by 'acc_map_data'",
475 (void *) h, (int) host_size);
476 }
477
478 /* Mark for removal. */
479 n->refcount = 1;
480
481 t = n->tgt;
482
483 if (t->refcount == 2)
484 {
485 struct target_mem_desc *tp;
486
487 /* This is the last reference, so pull the descriptor off the
488 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
489 freeing the device memory. */
490 t->tgt_end = 0;
491 t->to_free = 0;
492
493 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
494 tp = t, t = t->prev)
495 if (n->tgt == t)
496 {
497 if (tp)
498 tp->prev = t->prev;
499 else
500 acc_dev->openacc.data_environ = t->prev;
501
502 break;
503 }
504 }
505
506 gomp_mutex_unlock (&acc_dev->lock);
507
508 gomp_unmap_vars (t, true);
509
510 if (profiling_p)
511 {
512 thr->prof_info = NULL;
513 thr->api_info = NULL;
514 }
515 }
516
517 #define FLAG_PRESENT (1 << 0)
518 #define FLAG_CREATE (1 << 1)
519 #define FLAG_COPY (1 << 2)
520
521 static void *
522 present_create_copy (unsigned f, void *h, size_t s, int async)
523 {
524 void *d;
525 splay_tree_key n;
526
527 if (!h || !s)
528 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
529
530 goacc_lazy_initialize ();
531
532 struct goacc_thread *thr = goacc_thread ();
533 struct gomp_device_descr *acc_dev = thr->dev;
534
535 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
536 return h;
537
538 acc_prof_info prof_info;
539 acc_api_info api_info;
540 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
541 if (profiling_p)
542 {
543 prof_info.async = async;
544 prof_info.async_queue = prof_info.async;
545 }
546
547 gomp_mutex_lock (&acc_dev->lock);
548
549 n = lookup_host (acc_dev, h, s);
550 if (n)
551 {
552 /* Present. */
553 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
554
555 if (!(f & FLAG_PRESENT))
556 {
557 gomp_mutex_unlock (&acc_dev->lock);
558 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
559 (void *)h, (int)s, (void *)d, (int)s);
560 }
561 if ((h + s) > (void *)n->host_end)
562 {
563 gomp_mutex_unlock (&acc_dev->lock);
564 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
565 }
566
567 if (n->refcount != REFCOUNT_INFINITY)
568 {
569 n->refcount++;
570 n->dynamic_refcount++;
571 }
572 gomp_mutex_unlock (&acc_dev->lock);
573 }
574 else if (!(f & FLAG_CREATE))
575 {
576 gomp_mutex_unlock (&acc_dev->lock);
577 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
578 }
579 else
580 {
581 struct target_mem_desc *tgt;
582 size_t mapnum = 1;
583 unsigned short kinds;
584 void *hostaddrs = h;
585
586 if (f & FLAG_COPY)
587 kinds = GOMP_MAP_TO;
588 else
589 kinds = GOMP_MAP_ALLOC;
590
591 gomp_mutex_unlock (&acc_dev->lock);
592
593 goacc_aq aq = get_goacc_asyncqueue (async);
594
595 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
596 &kinds, true, GOMP_MAP_VARS_OPENACC);
597 /* Initialize dynamic refcount. */
598 tgt->list[0].key->dynamic_refcount = 1;
599
600 gomp_mutex_lock (&acc_dev->lock);
601
602 d = tgt->to_free;
603 tgt->prev = acc_dev->openacc.data_environ;
604 acc_dev->openacc.data_environ = tgt;
605
606 gomp_mutex_unlock (&acc_dev->lock);
607 }
608
609 if (profiling_p)
610 {
611 thr->prof_info = NULL;
612 thr->api_info = NULL;
613 }
614
615 return d;
616 }
617
618 void *
619 acc_create (void *h, size_t s)
620 {
621 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
622 }
623
624 void
625 acc_create_async (void *h, size_t s, int async)
626 {
627 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
628 }
629
630 /* acc_present_or_create used to be what acc_create is now. */
631 /* acc_pcreate is acc_present_or_create by a different name. */
632 #ifdef HAVE_ATTRIBUTE_ALIAS
633 strong_alias (acc_create, acc_present_or_create)
634 strong_alias (acc_create, acc_pcreate)
635 #else
636 void *
637 acc_present_or_create (void *h, size_t s)
638 {
639 return acc_create (h, s);
640 }
641
642 void *
643 acc_pcreate (void *h, size_t s)
644 {
645 return acc_create (h, s);
646 }
647 #endif
648
649 void *
650 acc_copyin (void *h, size_t s)
651 {
652 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
653 acc_async_sync);
654 }
655
656 void
657 acc_copyin_async (void *h, size_t s, int async)
658 {
659 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
660 }
661
662 /* acc_present_or_copyin used to be what acc_copyin is now. */
663 /* acc_pcopyin is acc_present_or_copyin by a different name. */
664 #ifdef HAVE_ATTRIBUTE_ALIAS
665 strong_alias (acc_copyin, acc_present_or_copyin)
666 strong_alias (acc_copyin, acc_pcopyin)
667 #else
668 void *
669 acc_present_or_copyin (void *h, size_t s)
670 {
671 return acc_copyin (h, s);
672 }
673
674 void *
675 acc_pcopyin (void *h, size_t s)
676 {
677 return acc_copyin (h, s);
678 }
679 #endif
680
681 #define FLAG_COPYOUT (1 << 0)
682 #define FLAG_FINALIZE (1 << 1)
683
684 static void
685 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
686 {
687 splay_tree_key n;
688 void *d;
689 struct goacc_thread *thr = goacc_thread ();
690 struct gomp_device_descr *acc_dev = thr->dev;
691
692 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
693 return;
694
695 acc_prof_info prof_info;
696 acc_api_info api_info;
697 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
698 if (profiling_p)
699 {
700 prof_info.async = async;
701 prof_info.async_queue = prof_info.async;
702 }
703
704 gomp_mutex_lock (&acc_dev->lock);
705
706 n = lookup_host (acc_dev, h, s);
707
708 /* No need to call lazy open, as the data must already have been
709 mapped. */
710
711 if (!n)
712 {
713 gomp_mutex_unlock (&acc_dev->lock);
714 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
715 }
716
717 d = (void *) (n->tgt->tgt_start + n->tgt_offset
718 + (uintptr_t) h - n->host_start);
719
720 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
721 {
722 size_t host_size = n->host_end - n->host_start;
723 gomp_mutex_unlock (&acc_dev->lock);
724 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
725 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
726 }
727
728 if (n->refcount == REFCOUNT_INFINITY)
729 {
730 n->refcount = 0;
731 n->dynamic_refcount = 0;
732 }
733 if (n->refcount < n->dynamic_refcount)
734 {
735 gomp_mutex_unlock (&acc_dev->lock);
736 gomp_fatal ("Dynamic reference counting assert fail\n");
737 }
738
739 if (f & FLAG_FINALIZE)
740 {
741 n->refcount -= n->dynamic_refcount;
742 n->dynamic_refcount = 0;
743 }
744 else if (n->dynamic_refcount)
745 {
746 n->dynamic_refcount--;
747 n->refcount--;
748 }
749
750 if (n->refcount == 0)
751 {
752 if (n->tgt->refcount == 2)
753 {
754 struct target_mem_desc *tp, *t;
755 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
756 tp = t, t = t->prev)
757 if (n->tgt == t)
758 {
759 if (tp)
760 tp->prev = t->prev;
761 else
762 acc_dev->openacc.data_environ = t->prev;
763 break;
764 }
765 }
766
767 if (f & FLAG_COPYOUT)
768 {
769 goacc_aq aq = get_goacc_asyncqueue (async);
770 gomp_copy_dev2host (acc_dev, aq, h, d, s);
771 }
772 gomp_remove_var (acc_dev, n);
773 }
774
775 gomp_mutex_unlock (&acc_dev->lock);
776
777 if (profiling_p)
778 {
779 thr->prof_info = NULL;
780 thr->api_info = NULL;
781 }
782 }
783
784 void
785 acc_delete (void *h , size_t s)
786 {
787 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
788 }
789
790 void
791 acc_delete_async (void *h , size_t s, int async)
792 {
793 delete_copyout (0, h, s, async, __FUNCTION__);
794 }
795
796 void
797 acc_delete_finalize (void *h , size_t s)
798 {
799 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
800 }
801
802 void
803 acc_delete_finalize_async (void *h , size_t s, int async)
804 {
805 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
806 }
807
808 void
809 acc_copyout (void *h, size_t s)
810 {
811 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
812 }
813
814 void
815 acc_copyout_async (void *h, size_t s, int async)
816 {
817 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
818 }
819
820 void
821 acc_copyout_finalize (void *h, size_t s)
822 {
823 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
824 __FUNCTION__);
825 }
826
827 void
828 acc_copyout_finalize_async (void *h, size_t s, int async)
829 {
830 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
831 }
832
833 static void
834 update_dev_host (int is_dev, void *h, size_t s, int async)
835 {
836 splay_tree_key n;
837 void *d;
838
839 goacc_lazy_initialize ();
840
841 struct goacc_thread *thr = goacc_thread ();
842 struct gomp_device_descr *acc_dev = thr->dev;
843
844 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
845 return;
846
847 /* Fortran optional arguments that are non-present result in a
848 NULL host address here. This can safely be ignored as it is
849 not possible to 'update' a non-present optional argument. */
850 if (h == NULL)
851 return;
852
853 acc_prof_info prof_info;
854 acc_api_info api_info;
855 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
856 if (profiling_p)
857 {
858 prof_info.async = async;
859 prof_info.async_queue = prof_info.async;
860 }
861
862 gomp_mutex_lock (&acc_dev->lock);
863
864 n = lookup_host (acc_dev, h, s);
865
866 if (!n)
867 {
868 gomp_mutex_unlock (&acc_dev->lock);
869 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
870 }
871
872 d = (void *) (n->tgt->tgt_start + n->tgt_offset
873 + (uintptr_t) h - n->host_start);
874
875 goacc_aq aq = get_goacc_asyncqueue (async);
876
877 if (is_dev)
878 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
879 else
880 gomp_copy_dev2host (acc_dev, aq, h, d, s);
881
882 gomp_mutex_unlock (&acc_dev->lock);
883
884 if (profiling_p)
885 {
886 thr->prof_info = NULL;
887 thr->api_info = NULL;
888 }
889 }
890
891 void
892 acc_update_device (void *h, size_t s)
893 {
894 update_dev_host (1, h, s, acc_async_sync);
895 }
896
897 void
898 acc_update_device_async (void *h, size_t s, int async)
899 {
900 update_dev_host (1, h, s, async);
901 }
902
903 void
904 acc_update_self (void *h, size_t s)
905 {
906 update_dev_host (0, h, s, acc_async_sync);
907 }
908
909 void
910 acc_update_self_async (void *h, size_t s, int async)
911 {
912 update_dev_host (0, h, s, async);
913 }
914
915 void
916 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
917 void *kinds, int async)
918 {
919 struct target_mem_desc *tgt;
920 struct goacc_thread *thr = goacc_thread ();
921 struct gomp_device_descr *acc_dev = thr->dev;
922
923 if (*hostaddrs == NULL)
924 return;
925
926 if (acc_is_present (*hostaddrs, *sizes))
927 {
928 splay_tree_key n;
929 gomp_mutex_lock (&acc_dev->lock);
930 n = lookup_host (acc_dev, *hostaddrs, *sizes);
931 gomp_mutex_unlock (&acc_dev->lock);
932
933 tgt = n->tgt;
934 for (size_t i = 0; i < tgt->list_count; i++)
935 if (tgt->list[i].key == n)
936 {
937 for (size_t j = 0; j < mapnum; j++)
938 if (i + j < tgt->list_count && tgt->list[i + j].key)
939 {
940 tgt->list[i + j].key->refcount++;
941 tgt->list[i + j].key->dynamic_refcount++;
942 }
943 return;
944 }
945 /* Should not reach here. */
946 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
947 }
948
949 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
950 goacc_aq aq = get_goacc_asyncqueue (async);
951 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
952 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
953 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
954
955 /* Initialize dynamic refcount. */
956 tgt->list[0].key->dynamic_refcount = 1;
957
958 gomp_mutex_lock (&acc_dev->lock);
959 tgt->prev = acc_dev->openacc.data_environ;
960 acc_dev->openacc.data_environ = tgt;
961 gomp_mutex_unlock (&acc_dev->lock);
962 }
963
964 void
965 gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
966 int finalize, int mapnum)
967 {
968 struct goacc_thread *thr = goacc_thread ();
969 struct gomp_device_descr *acc_dev = thr->dev;
970 splay_tree_key n;
971 struct target_mem_desc *t;
972 int minrefs = (mapnum == 1) ? 2 : 3;
973
974 if (!acc_is_present (h, s))
975 return;
976
977 gomp_mutex_lock (&acc_dev->lock);
978
979 n = lookup_host (acc_dev, h, 1);
980
981 if (!n)
982 {
983 gomp_mutex_unlock (&acc_dev->lock);
984 gomp_fatal ("%p is not a mapped block", (void *)h);
985 }
986
987 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
988
989 t = n->tgt;
990
991 if (n->refcount < n->dynamic_refcount)
992 {
993 gomp_mutex_unlock (&acc_dev->lock);
994 gomp_fatal ("Dynamic reference counting assert fail\n");
995 }
996
997 if (finalize)
998 {
999 n->refcount -= n->dynamic_refcount;
1000 n->dynamic_refcount = 0;
1001 }
1002 else if (n->dynamic_refcount)
1003 {
1004 n->dynamic_refcount--;
1005 n->refcount--;
1006 }
1007
1008 gomp_mutex_unlock (&acc_dev->lock);
1009
1010 if (n->refcount == 0)
1011 {
1012 if (t->refcount == minrefs)
1013 {
1014 /* This is the last reference, so pull the descriptor off the
1015 chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
1016 freeing the device memory. */
1017 struct target_mem_desc *tp;
1018 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
1019 tp = t, t = t->prev)
1020 {
1021 if (n->tgt == t)
1022 {
1023 if (tp)
1024 tp->prev = t->prev;
1025 else
1026 acc_dev->openacc.data_environ = t->prev;
1027 break;
1028 }
1029 }
1030 }
1031
1032 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
1033 n->refcount = 1;
1034 t->refcount = minrefs;
1035 for (size_t i = 0; i < t->list_count; i++)
1036 if (t->list[i].key == n)
1037 {
1038 t->list[i].copy_from = force_copyfrom ? 1 : 0;
1039 break;
1040 }
1041
1042 /* If running synchronously, unmap immediately. */
1043 if (async < acc_async_noval)
1044 gomp_unmap_vars (t, true);
1045 else
1046 {
1047 goacc_aq aq = get_goacc_asyncqueue (async);
1048 gomp_unmap_vars_async (t, true, aq);
1049 }
1050 }
1051
1052 gomp_mutex_unlock (&acc_dev->lock);
1053
1054 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1055 }