]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/oacc-mem.c
OpenACC Profiling Interface (incomplete)
[thirdparty/gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <string.h>
36 #include <assert.h>
37
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
40
41 static splay_tree_key
42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
43 {
44 struct splay_tree_key_s node;
45 splay_tree_key key;
46
47 node.host_start = (uintptr_t) h;
48 node.host_end = (uintptr_t) h + s;
49
50 key = splay_tree_lookup (&dev->mem_map, &node);
51
52 return key;
53 }
54
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
60
61 static splay_tree_key
62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63 {
64 int i;
65 struct target_mem_desc *t;
66
67 if (!tgt)
68 return NULL;
69
70 for (t = tgt; t != NULL; t = t->prev)
71 {
72 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73 break;
74 }
75
76 if (!t)
77 return NULL;
78
79 for (i = 0; i < t->list_count; i++)
80 {
81 void * offset;
82
83 splay_tree_key k = &t->array[i].key;
84 offset = d - t->tgt_start + k->tgt_offset;
85
86 if (k->host_start + offset <= (void *) k->host_end)
87 return k;
88 }
89
90 return NULL;
91 }
92
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
94 NULL. */
95
96 void *
97 acc_malloc (size_t s)
98 {
99 if (!s)
100 return NULL;
101
102 goacc_lazy_initialize ();
103
104 struct goacc_thread *thr = goacc_thread ();
105
106 assert (thr->dev);
107
108 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
109 return malloc (s);
110
111 acc_prof_info prof_info;
112 acc_api_info api_info;
113 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
114
115 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
116
117 if (profiling_p)
118 {
119 thr->prof_info = NULL;
120 thr->api_info = NULL;
121 }
122
123 return res;
124 }
125
126 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
127 the device address is mapped. We choose to check if it mapped,
128 and if it is, to unmap it. */
129 void
130 acc_free (void *d)
131 {
132 splay_tree_key k;
133
134 if (!d)
135 return;
136
137 struct goacc_thread *thr = goacc_thread ();
138
139 assert (thr && thr->dev);
140
141 struct gomp_device_descr *acc_dev = thr->dev;
142
143 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
144 return free (d);
145
146 acc_prof_info prof_info;
147 acc_api_info api_info;
148 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
149
150 gomp_mutex_lock (&acc_dev->lock);
151
152 /* We don't have to call lazy open here, as the ptr value must have
153 been returned by acc_malloc. It's not permitted to pass NULL in
154 (unless you got that null from acc_malloc). */
155 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
156 {
157 void *offset;
158
159 offset = d - k->tgt->tgt_start + k->tgt_offset;
160
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 acc_unmap_data ((void *)(k->host_start + offset));
164 }
165 else
166 gomp_mutex_unlock (&acc_dev->lock);
167
168 if (!acc_dev->free_func (acc_dev->target_id, d))
169 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
170
171 if (profiling_p)
172 {
173 thr->prof_info = NULL;
174 thr->api_info = NULL;
175 }
176 }
177
178 static void
179 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
180 const char *libfnname)
181 {
182 /* No need to call lazy open here, as the device pointer must have
183 been obtained from a routine that did that. */
184 struct goacc_thread *thr = goacc_thread ();
185
186 assert (thr && thr->dev);
187
188 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
189 {
190 if (from)
191 memmove (h, d, s);
192 else
193 memmove (d, h, s);
194 return;
195 }
196
197 acc_prof_info prof_info;
198 acc_api_info api_info;
199 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
200 if (profiling_p)
201 {
202 prof_info.async = async;
203 prof_info.async_queue = prof_info.async;
204 }
205
206 goacc_aq aq = get_goacc_asyncqueue (async);
207 if (from)
208 gomp_copy_dev2host (thr->dev, aq, h, d, s);
209 else
210 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
211
212 if (profiling_p)
213 {
214 thr->prof_info = NULL;
215 thr->api_info = NULL;
216 }
217 }
218
219 void
220 acc_memcpy_to_device (void *d, void *h, size_t s)
221 {
222 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
223 }
224
225 void
226 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
227 {
228 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
229 }
230
231 void
232 acc_memcpy_from_device (void *h, void *d, size_t s)
233 {
234 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
235 }
236
237 void
238 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
239 {
240 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
241 }
242
243 /* Return the device pointer that corresponds to host data H. Or NULL
244 if no mapping. */
245
246 void *
247 acc_deviceptr (void *h)
248 {
249 splay_tree_key n;
250 void *d;
251 void *offset;
252
253 goacc_lazy_initialize ();
254
255 struct goacc_thread *thr = goacc_thread ();
256 struct gomp_device_descr *dev = thr->dev;
257
258 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
259 return h;
260
261 /* In the following, no OpenACC Profiling Interface events can possibly be
262 generated. */
263
264 gomp_mutex_lock (&dev->lock);
265
266 n = lookup_host (dev, h, 1);
267
268 if (!n)
269 {
270 gomp_mutex_unlock (&dev->lock);
271 return NULL;
272 }
273
274 offset = h - n->host_start;
275
276 d = n->tgt->tgt_start + n->tgt_offset + offset;
277
278 gomp_mutex_unlock (&dev->lock);
279
280 return d;
281 }
282
283 /* Return the host pointer that corresponds to device data D. Or NULL
284 if no mapping. */
285
286 void *
287 acc_hostptr (void *d)
288 {
289 splay_tree_key n;
290 void *h;
291 void *offset;
292
293 goacc_lazy_initialize ();
294
295 struct goacc_thread *thr = goacc_thread ();
296 struct gomp_device_descr *acc_dev = thr->dev;
297
298 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
299 return d;
300
301 /* In the following, no OpenACC Profiling Interface events can possibly be
302 generated. */
303
304 gomp_mutex_lock (&acc_dev->lock);
305
306 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
307
308 if (!n)
309 {
310 gomp_mutex_unlock (&acc_dev->lock);
311 return NULL;
312 }
313
314 offset = d - n->tgt->tgt_start + n->tgt_offset;
315
316 h = n->host_start + offset;
317
318 gomp_mutex_unlock (&acc_dev->lock);
319
320 return h;
321 }
322
323 /* Return 1 if host data [H,+S] is present on the device. */
324
325 int
326 acc_is_present (void *h, size_t s)
327 {
328 splay_tree_key n;
329
330 if (!s || !h)
331 return 0;
332
333 goacc_lazy_initialize ();
334
335 struct goacc_thread *thr = goacc_thread ();
336 struct gomp_device_descr *acc_dev = thr->dev;
337
338 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
339 return h != NULL;
340
341 /* In the following, no OpenACC Profiling Interface events can possibly be
342 generated. */
343
344 gomp_mutex_lock (&acc_dev->lock);
345
346 n = lookup_host (acc_dev, h, s);
347
348 if (n && ((uintptr_t)h < n->host_start
349 || (uintptr_t)h + s > n->host_end
350 || s > n->host_end - n->host_start))
351 n = NULL;
352
353 gomp_mutex_unlock (&acc_dev->lock);
354
355 return n != NULL;
356 }
357
358 /* Create a mapping for host [H,+S] -> device [D,+S] */
359
360 void
361 acc_map_data (void *h, void *d, size_t s)
362 {
363 struct target_mem_desc *tgt = NULL;
364 size_t mapnum = 1;
365 void *hostaddrs = h;
366 void *devaddrs = d;
367 size_t sizes = s;
368 unsigned short kinds = GOMP_MAP_ALLOC;
369
370 goacc_lazy_initialize ();
371
372 struct goacc_thread *thr = goacc_thread ();
373 struct gomp_device_descr *acc_dev = thr->dev;
374
375 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
376 {
377 if (d != h)
378 gomp_fatal ("cannot map data on shared-memory system");
379 }
380 else
381 {
382 struct goacc_thread *thr = goacc_thread ();
383
384 if (!d || !h || !s)
385 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
386 (void *)h, (int)s, (void *)d, (int)s);
387
388 acc_prof_info prof_info;
389 acc_api_info api_info;
390 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
391
392 gomp_mutex_lock (&acc_dev->lock);
393
394 if (lookup_host (acc_dev, h, s))
395 {
396 gomp_mutex_unlock (&acc_dev->lock);
397 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
398 (int)s);
399 }
400
401 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
402 {
403 gomp_mutex_unlock (&acc_dev->lock);
404 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
405 (int)s);
406 }
407
408 gomp_mutex_unlock (&acc_dev->lock);
409
410 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
411 &kinds, true, GOMP_MAP_VARS_OPENACC);
412 tgt->list[0].key->refcount = REFCOUNT_INFINITY;
413
414 if (profiling_p)
415 {
416 thr->prof_info = NULL;
417 thr->api_info = NULL;
418 }
419 }
420
421 gomp_mutex_lock (&acc_dev->lock);
422 tgt->prev = acc_dev->openacc.data_environ;
423 acc_dev->openacc.data_environ = tgt;
424 gomp_mutex_unlock (&acc_dev->lock);
425 }
426
427 void
428 acc_unmap_data (void *h)
429 {
430 struct goacc_thread *thr = goacc_thread ();
431 struct gomp_device_descr *acc_dev = thr->dev;
432
433 /* No need to call lazy open, as the address must have been mapped. */
434
435 /* This is a no-op on shared-memory targets. */
436 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
437 return;
438
439 acc_prof_info prof_info;
440 acc_api_info api_info;
441 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
442
443 size_t host_size;
444
445 gomp_mutex_lock (&acc_dev->lock);
446
447 splay_tree_key n = lookup_host (acc_dev, h, 1);
448 struct target_mem_desc *t;
449
450 if (!n)
451 {
452 gomp_mutex_unlock (&acc_dev->lock);
453 gomp_fatal ("%p is not a mapped block", (void *)h);
454 }
455
456 host_size = n->host_end - n->host_start;
457
458 if (n->host_start != (uintptr_t) h)
459 {
460 gomp_mutex_unlock (&acc_dev->lock);
461 gomp_fatal ("[%p,%d] surrounds %p",
462 (void *) n->host_start, (int) host_size, (void *) h);
463 }
464
465 /* Mark for removal. */
466 n->refcount = 1;
467
468 t = n->tgt;
469
470 if (t->refcount == 2)
471 {
472 struct target_mem_desc *tp;
473
474 /* This is the last reference, so pull the descriptor off the
475 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
476 freeing the device memory. */
477 t->tgt_end = 0;
478 t->to_free = 0;
479
480 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
481 tp = t, t = t->prev)
482 if (n->tgt == t)
483 {
484 if (tp)
485 tp->prev = t->prev;
486 else
487 acc_dev->openacc.data_environ = t->prev;
488
489 break;
490 }
491 }
492
493 gomp_mutex_unlock (&acc_dev->lock);
494
495 gomp_unmap_vars (t, true);
496
497 if (profiling_p)
498 {
499 thr->prof_info = NULL;
500 thr->api_info = NULL;
501 }
502 }
503
504 #define FLAG_PRESENT (1 << 0)
505 #define FLAG_CREATE (1 << 1)
506 #define FLAG_COPY (1 << 2)
507
508 static void *
509 present_create_copy (unsigned f, void *h, size_t s, int async)
510 {
511 void *d;
512 splay_tree_key n;
513
514 if (!h || !s)
515 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
516
517 goacc_lazy_initialize ();
518
519 struct goacc_thread *thr = goacc_thread ();
520 struct gomp_device_descr *acc_dev = thr->dev;
521
522 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
523 return h;
524
525 acc_prof_info prof_info;
526 acc_api_info api_info;
527 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
528 if (profiling_p)
529 {
530 prof_info.async = async;
531 prof_info.async_queue = prof_info.async;
532 }
533
534 gomp_mutex_lock (&acc_dev->lock);
535
536 n = lookup_host (acc_dev, h, s);
537 if (n)
538 {
539 /* Present. */
540 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
541
542 if (!(f & FLAG_PRESENT))
543 {
544 gomp_mutex_unlock (&acc_dev->lock);
545 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
546 (void *)h, (int)s, (void *)d, (int)s);
547 }
548 if ((h + s) > (void *)n->host_end)
549 {
550 gomp_mutex_unlock (&acc_dev->lock);
551 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
552 }
553
554 if (n->refcount != REFCOUNT_INFINITY)
555 {
556 n->refcount++;
557 n->dynamic_refcount++;
558 }
559 gomp_mutex_unlock (&acc_dev->lock);
560 }
561 else if (!(f & FLAG_CREATE))
562 {
563 gomp_mutex_unlock (&acc_dev->lock);
564 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
565 }
566 else
567 {
568 struct target_mem_desc *tgt;
569 size_t mapnum = 1;
570 unsigned short kinds;
571 void *hostaddrs = h;
572
573 if (f & FLAG_COPY)
574 kinds = GOMP_MAP_TO;
575 else
576 kinds = GOMP_MAP_ALLOC;
577
578 gomp_mutex_unlock (&acc_dev->lock);
579
580 goacc_aq aq = get_goacc_asyncqueue (async);
581
582 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
583 &kinds, true, GOMP_MAP_VARS_OPENACC);
584 /* Initialize dynamic refcount. */
585 tgt->list[0].key->dynamic_refcount = 1;
586
587 gomp_mutex_lock (&acc_dev->lock);
588
589 d = tgt->to_free;
590 tgt->prev = acc_dev->openacc.data_environ;
591 acc_dev->openacc.data_environ = tgt;
592
593 gomp_mutex_unlock (&acc_dev->lock);
594 }
595
596 if (profiling_p)
597 {
598 thr->prof_info = NULL;
599 thr->api_info = NULL;
600 }
601
602 return d;
603 }
604
605 void *
606 acc_create (void *h, size_t s)
607 {
608 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
609 }
610
611 void
612 acc_create_async (void *h, size_t s, int async)
613 {
614 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
615 }
616
617 /* acc_present_or_create used to be what acc_create is now. */
618 /* acc_pcreate is acc_present_or_create by a different name. */
619 #ifdef HAVE_ATTRIBUTE_ALIAS
620 strong_alias (acc_create, acc_present_or_create)
621 strong_alias (acc_create, acc_pcreate)
622 #else
623 void *
624 acc_present_or_create (void *h, size_t s)
625 {
626 return acc_create (h, s);
627 }
628
629 void *
630 acc_pcreate (void *h, size_t s)
631 {
632 return acc_create (h, s);
633 }
634 #endif
635
636 void *
637 acc_copyin (void *h, size_t s)
638 {
639 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
640 acc_async_sync);
641 }
642
643 void
644 acc_copyin_async (void *h, size_t s, int async)
645 {
646 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
647 }
648
649 /* acc_present_or_copyin used to be what acc_copyin is now. */
650 /* acc_pcopyin is acc_present_or_copyin by a different name. */
651 #ifdef HAVE_ATTRIBUTE_ALIAS
652 strong_alias (acc_copyin, acc_present_or_copyin)
653 strong_alias (acc_copyin, acc_pcopyin)
654 #else
655 void *
656 acc_present_or_copyin (void *h, size_t s)
657 {
658 return acc_copyin (h, s);
659 }
660
661 void *
662 acc_pcopyin (void *h, size_t s)
663 {
664 return acc_copyin (h, s);
665 }
666 #endif
667
668 #define FLAG_COPYOUT (1 << 0)
669 #define FLAG_FINALIZE (1 << 1)
670
671 static void
672 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
673 {
674 size_t host_size;
675 splay_tree_key n;
676 void *d;
677 struct goacc_thread *thr = goacc_thread ();
678 struct gomp_device_descr *acc_dev = thr->dev;
679
680 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
681 return;
682
683 acc_prof_info prof_info;
684 acc_api_info api_info;
685 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
686 if (profiling_p)
687 {
688 prof_info.async = async;
689 prof_info.async_queue = prof_info.async;
690 }
691
692 gomp_mutex_lock (&acc_dev->lock);
693
694 n = lookup_host (acc_dev, h, s);
695
696 /* No need to call lazy open, as the data must already have been
697 mapped. */
698
699 if (!n)
700 {
701 gomp_mutex_unlock (&acc_dev->lock);
702 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
703 }
704
705 d = (void *) (n->tgt->tgt_start + n->tgt_offset
706 + (uintptr_t) h - n->host_start);
707
708 host_size = n->host_end - n->host_start;
709
710 if (n->host_start != (uintptr_t) h || host_size != s)
711 {
712 gomp_mutex_unlock (&acc_dev->lock);
713 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
714 (void *) n->host_start, (int) host_size, (void *) h, (int) s);
715 }
716
717 if (n->refcount == REFCOUNT_INFINITY)
718 {
719 n->refcount = 0;
720 n->dynamic_refcount = 0;
721 }
722 if (n->refcount < n->dynamic_refcount)
723 {
724 gomp_mutex_unlock (&acc_dev->lock);
725 gomp_fatal ("Dynamic reference counting assert fail\n");
726 }
727
728 if (f & FLAG_FINALIZE)
729 {
730 n->refcount -= n->dynamic_refcount;
731 n->dynamic_refcount = 0;
732 }
733 else if (n->dynamic_refcount)
734 {
735 n->dynamic_refcount--;
736 n->refcount--;
737 }
738
739 if (n->refcount == 0)
740 {
741 if (n->tgt->refcount == 2)
742 {
743 struct target_mem_desc *tp, *t;
744 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
745 tp = t, t = t->prev)
746 if (n->tgt == t)
747 {
748 if (tp)
749 tp->prev = t->prev;
750 else
751 acc_dev->openacc.data_environ = t->prev;
752 break;
753 }
754 }
755
756 if (f & FLAG_COPYOUT)
757 {
758 goacc_aq aq = get_goacc_asyncqueue (async);
759 gomp_copy_dev2host (acc_dev, aq, h, d, s);
760 }
761 gomp_remove_var (acc_dev, n);
762 }
763
764 gomp_mutex_unlock (&acc_dev->lock);
765
766 if (profiling_p)
767 {
768 thr->prof_info = NULL;
769 thr->api_info = NULL;
770 }
771 }
772
773 void
774 acc_delete (void *h , size_t s)
775 {
776 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
777 }
778
779 void
780 acc_delete_async (void *h , size_t s, int async)
781 {
782 delete_copyout (0, h, s, async, __FUNCTION__);
783 }
784
785 void
786 acc_delete_finalize (void *h , size_t s)
787 {
788 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
789 }
790
791 void
792 acc_delete_finalize_async (void *h , size_t s, int async)
793 {
794 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
795 }
796
797 void
798 acc_copyout (void *h, size_t s)
799 {
800 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
801 }
802
803 void
804 acc_copyout_async (void *h, size_t s, int async)
805 {
806 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
807 }
808
809 void
810 acc_copyout_finalize (void *h, size_t s)
811 {
812 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
813 __FUNCTION__);
814 }
815
816 void
817 acc_copyout_finalize_async (void *h, size_t s, int async)
818 {
819 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
820 }
821
822 static void
823 update_dev_host (int is_dev, void *h, size_t s, int async)
824 {
825 splay_tree_key n;
826 void *d;
827
828 goacc_lazy_initialize ();
829
830 struct goacc_thread *thr = goacc_thread ();
831 struct gomp_device_descr *acc_dev = thr->dev;
832
833 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
834 return;
835
836 acc_prof_info prof_info;
837 acc_api_info api_info;
838 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
839 if (profiling_p)
840 {
841 prof_info.async = async;
842 prof_info.async_queue = prof_info.async;
843 }
844
845 gomp_mutex_lock (&acc_dev->lock);
846
847 n = lookup_host (acc_dev, h, s);
848
849 if (!n)
850 {
851 gomp_mutex_unlock (&acc_dev->lock);
852 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
853 }
854
855 d = (void *) (n->tgt->tgt_start + n->tgt_offset
856 + (uintptr_t) h - n->host_start);
857
858 goacc_aq aq = get_goacc_asyncqueue (async);
859
860 if (is_dev)
861 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
862 else
863 gomp_copy_dev2host (acc_dev, aq, h, d, s);
864
865 gomp_mutex_unlock (&acc_dev->lock);
866
867 if (profiling_p)
868 {
869 thr->prof_info = NULL;
870 thr->api_info = NULL;
871 }
872 }
873
874 void
875 acc_update_device (void *h, size_t s)
876 {
877 update_dev_host (1, h, s, acc_async_sync);
878 }
879
880 void
881 acc_update_device_async (void *h, size_t s, int async)
882 {
883 update_dev_host (1, h, s, async);
884 }
885
886 void
887 acc_update_self (void *h, size_t s)
888 {
889 update_dev_host (0, h, s, acc_async_sync);
890 }
891
892 void
893 acc_update_self_async (void *h, size_t s, int async)
894 {
895 update_dev_host (0, h, s, async);
896 }
897
898 void
899 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
900 void *kinds, int async)
901 {
902 struct target_mem_desc *tgt;
903 struct goacc_thread *thr = goacc_thread ();
904 struct gomp_device_descr *acc_dev = thr->dev;
905
906 if (acc_is_present (*hostaddrs, *sizes))
907 {
908 splay_tree_key n;
909 gomp_mutex_lock (&acc_dev->lock);
910 n = lookup_host (acc_dev, *hostaddrs, *sizes);
911 gomp_mutex_unlock (&acc_dev->lock);
912
913 tgt = n->tgt;
914 for (size_t i = 0; i < tgt->list_count; i++)
915 if (tgt->list[i].key == n)
916 {
917 for (size_t j = 0; j < mapnum; j++)
918 if (i + j < tgt->list_count && tgt->list[i + j].key)
919 {
920 tgt->list[i + j].key->refcount++;
921 tgt->list[i + j].key->dynamic_refcount++;
922 }
923 return;
924 }
925 /* Should not reach here. */
926 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
927 }
928
929 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
930 goacc_aq aq = get_goacc_asyncqueue (async);
931 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
932 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
933 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
934
935 /* Initialize dynamic refcount. */
936 tgt->list[0].key->dynamic_refcount = 1;
937
938 gomp_mutex_lock (&acc_dev->lock);
939 tgt->prev = acc_dev->openacc.data_environ;
940 acc_dev->openacc.data_environ = tgt;
941 gomp_mutex_unlock (&acc_dev->lock);
942 }
943
944 void
945 gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
946 int finalize, int mapnum)
947 {
948 struct goacc_thread *thr = goacc_thread ();
949 struct gomp_device_descr *acc_dev = thr->dev;
950 splay_tree_key n;
951 struct target_mem_desc *t;
952 int minrefs = (mapnum == 1) ? 2 : 3;
953
954 if (!acc_is_present (h, s))
955 return;
956
957 gomp_mutex_lock (&acc_dev->lock);
958
959 n = lookup_host (acc_dev, h, 1);
960
961 if (!n)
962 {
963 gomp_mutex_unlock (&acc_dev->lock);
964 gomp_fatal ("%p is not a mapped block", (void *)h);
965 }
966
967 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
968
969 t = n->tgt;
970
971 if (n->refcount < n->dynamic_refcount)
972 {
973 gomp_mutex_unlock (&acc_dev->lock);
974 gomp_fatal ("Dynamic reference counting assert fail\n");
975 }
976
977 if (finalize)
978 {
979 n->refcount -= n->dynamic_refcount;
980 n->dynamic_refcount = 0;
981 }
982 else if (n->dynamic_refcount)
983 {
984 n->dynamic_refcount--;
985 n->refcount--;
986 }
987
988 gomp_mutex_unlock (&acc_dev->lock);
989
990 if (n->refcount == 0)
991 {
992 if (t->refcount == minrefs)
993 {
994 /* This is the last reference, so pull the descriptor off the
995 chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
996 freeing the device memory. */
997 struct target_mem_desc *tp;
998 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
999 tp = t, t = t->prev)
1000 {
1001 if (n->tgt == t)
1002 {
1003 if (tp)
1004 tp->prev = t->prev;
1005 else
1006 acc_dev->openacc.data_environ = t->prev;
1007 break;
1008 }
1009 }
1010 }
1011
1012 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
1013 n->refcount = 1;
1014 t->refcount = minrefs;
1015 for (size_t i = 0; i < t->list_count; i++)
1016 if (t->list[i].key == n)
1017 {
1018 t->list[i].copy_from = force_copyfrom ? 1 : 0;
1019 break;
1020 }
1021
1022 /* If running synchronously, unmap immediately. */
1023 if (async < acc_async_noval)
1024 gomp_unmap_vars (t, true);
1025 else
1026 {
1027 goacc_aq aq = get_goacc_asyncqueue (async);
1028 gomp_unmap_vars_async (t, true, aq);
1029 }
1030 }
1031
1032 gomp_mutex_unlock (&acc_dev->lock);
1033
1034 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1035 }