]>
Commit | Line | Data |
---|---|---|
ca4c3545 | 1 | /* OpenACC Runtime initialization routines |
2 | ||
fbd26352 | 3 | Copyright (C) 2013-2019 Free Software Foundation, Inc. |
ca4c3545 | 4 | |
5 | Contributed by Mentor Embedded. | |
6 | ||
7 | This file is part of the GNU Offloading and Multi Processing Library | |
8 | (libgomp). | |
9 | ||
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
29 | #include "openacc.h" | |
ca4c3545 | 30 | #include "libgomp.h" |
31 | #include "gomp-constants.h" | |
32 | #include "oacc-int.h" | |
6d6a3fc3 | 33 | #include <string.h> |
ca4c3545 | 34 | #include <assert.h> |
35 | ||
09f66ac1 | 36 | /* Return block containing [H->S), or NULL if not contained. The device lock |
37 | for DEV must be locked on entry, and remains locked on exit. */ | |
ca4c3545 | 38 | |
39 | static splay_tree_key | |
0d8c703d | 40 | lookup_host (struct gomp_device_descr *dev, void *h, size_t s) |
ca4c3545 | 41 | { |
42 | struct splay_tree_key_s node; | |
43 | splay_tree_key key; | |
44 | ||
45 | node.host_start = (uintptr_t) h; | |
46 | node.host_end = (uintptr_t) h + s; | |
47 | ||
0d8c703d | 48 | key = splay_tree_lookup (&dev->mem_map, &node); |
ca4c3545 | 49 | |
50 | return key; | |
51 | } | |
52 | ||
53 | /* Return block containing [D->S), or NULL if not contained. | |
54 | The list isn't ordered by device address, so we have to iterate | |
55 | over the whole array. This is not expected to be a common | |
09f66ac1 | 56 | operation. The device lock associated with TGT must be locked on entry, and |
57 | remains locked on exit. */ | |
ca4c3545 | 58 | |
59 | static splay_tree_key | |
60 | lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) | |
61 | { | |
62 | int i; | |
63 | struct target_mem_desc *t; | |
ca4c3545 | 64 | |
65 | if (!tgt) | |
66 | return NULL; | |
67 | ||
ca4c3545 | 68 | for (t = tgt; t != NULL; t = t->prev) |
69 | { | |
70 | if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) | |
71 | break; | |
72 | } | |
73 | ||
ca4c3545 | 74 | if (!t) |
75 | return NULL; | |
76 | ||
77 | for (i = 0; i < t->list_count; i++) | |
78 | { | |
79 | void * offset; | |
80 | ||
81 | splay_tree_key k = &t->array[i].key; | |
82 | offset = d - t->tgt_start + k->tgt_offset; | |
83 | ||
84 | if (k->host_start + offset <= (void *) k->host_end) | |
85 | return k; | |
86 | } | |
87 | ||
88 | return NULL; | |
89 | } | |
90 | ||
91 | /* OpenACC is silent on how memory exhaustion is indicated. We return | |
92 | NULL. */ | |
93 | ||
94 | void * | |
95 | acc_malloc (size_t s) | |
96 | { | |
97 | if (!s) | |
98 | return NULL; | |
99 | ||
100 | goacc_lazy_initialize (); | |
101 | ||
102 | struct goacc_thread *thr = goacc_thread (); | |
103 | ||
0a1fe572 | 104 | assert (thr->dev); |
105 | ||
6d6a3fc3 | 106 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
107 | return malloc (s); | |
108 | ||
5feffd51 | 109 | acc_prof_info prof_info; |
110 | acc_api_info api_info; | |
111 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
112 | ||
113 | void *res = thr->dev->alloc_func (thr->dev->target_id, s); | |
114 | ||
115 | if (profiling_p) | |
116 | { | |
117 | thr->prof_info = NULL; | |
118 | thr->api_info = NULL; | |
119 | } | |
120 | ||
121 | return res; | |
ca4c3545 | 122 | } |
123 | ||
124 | /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event | |
125 | the device address is mapped. We choose to check if it mapped, | |
126 | and if it is, to unmap it. */ | |
127 | void | |
128 | acc_free (void *d) | |
129 | { | |
130 | splay_tree_key k; | |
ca4c3545 | 131 | |
132 | if (!d) | |
133 | return; | |
134 | ||
09f66ac1 | 135 | struct goacc_thread *thr = goacc_thread (); |
136 | ||
0a1fe572 | 137 | assert (thr && thr->dev); |
138 | ||
09f66ac1 | 139 | struct gomp_device_descr *acc_dev = thr->dev; |
140 | ||
6d6a3fc3 | 141 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
142 | return free (d); | |
143 | ||
5feffd51 | 144 | acc_prof_info prof_info; |
145 | acc_api_info api_info; | |
146 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
147 | ||
09f66ac1 | 148 | gomp_mutex_lock (&acc_dev->lock); |
149 | ||
ca4c3545 | 150 | /* We don't have to call lazy open here, as the ptr value must have |
151 | been returned by acc_malloc. It's not permitted to pass NULL in | |
152 | (unless you got that null from acc_malloc). */ | |
09f66ac1 | 153 | if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1))) |
154 | { | |
155 | void *offset; | |
ca4c3545 | 156 | |
09f66ac1 | 157 | offset = d - k->tgt->tgt_start + k->tgt_offset; |
ca4c3545 | 158 | |
09f66ac1 | 159 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 160 | |
09f66ac1 | 161 | acc_unmap_data ((void *)(k->host_start + offset)); |
162 | } | |
163 | else | |
164 | gomp_mutex_unlock (&acc_dev->lock); | |
165 | ||
9b50ad1d | 166 | if (!acc_dev->free_func (acc_dev->target_id, d)) |
167 | gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); | |
5feffd51 | 168 | |
169 | if (profiling_p) | |
170 | { | |
171 | thr->prof_info = NULL; | |
172 | thr->api_info = NULL; | |
173 | } | |
ca4c3545 | 174 | } |
175 | ||
2e51c6a2 | 176 | static void |
177 | memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, | |
178 | const char *libfnname) | |
ca4c3545 | 179 | { |
180 | /* No need to call lazy open here, as the device pointer must have | |
181 | been obtained from a routine that did that. */ | |
182 | struct goacc_thread *thr = goacc_thread (); | |
183 | ||
0a1fe572 | 184 | assert (thr && thr->dev); |
185 | ||
6d6a3fc3 | 186 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
187 | { | |
2e51c6a2 | 188 | if (from) |
189 | memmove (h, d, s); | |
190 | else | |
191 | memmove (d, h, s); | |
6d6a3fc3 | 192 | return; |
193 | } | |
194 | ||
5feffd51 | 195 | acc_prof_info prof_info; |
196 | acc_api_info api_info; | |
197 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
198 | if (profiling_p) | |
199 | { | |
200 | prof_info.async = async; | |
201 | prof_info.async_queue = prof_info.async; | |
202 | } | |
203 | ||
534b5e00 | 204 | goacc_aq aq = get_goacc_asyncqueue (async); |
205 | if (from) | |
206 | gomp_copy_dev2host (thr->dev, aq, h, d, s); | |
207 | else | |
208 | gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); | |
5feffd51 | 209 | |
210 | if (profiling_p) | |
211 | { | |
212 | thr->prof_info = NULL; | |
213 | thr->api_info = NULL; | |
214 | } | |
ca4c3545 | 215 | } |
216 | ||
217 | void | |
2e51c6a2 | 218 | acc_memcpy_to_device (void *d, void *h, size_t s) |
ca4c3545 | 219 | { |
2e51c6a2 | 220 | memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); |
221 | } | |
ca4c3545 | 222 | |
2e51c6a2 | 223 | void |
224 | acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) | |
225 | { | |
226 | memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); | |
227 | } | |
0a1fe572 | 228 | |
2e51c6a2 | 229 | void |
230 | acc_memcpy_from_device (void *h, void *d, size_t s) | |
231 | { | |
232 | memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); | |
233 | } | |
6d6a3fc3 | 234 | |
2e51c6a2 | 235 | void |
236 | acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) | |
237 | { | |
238 | memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); | |
ca4c3545 | 239 | } |
240 | ||
241 | /* Return the device pointer that corresponds to host data H. Or NULL | |
242 | if no mapping. */ | |
243 | ||
244 | void * | |
245 | acc_deviceptr (void *h) | |
246 | { | |
247 | splay_tree_key n; | |
248 | void *d; | |
249 | void *offset; | |
250 | ||
251 | goacc_lazy_initialize (); | |
252 | ||
253 | struct goacc_thread *thr = goacc_thread (); | |
09f66ac1 | 254 | struct gomp_device_descr *dev = thr->dev; |
255 | ||
6d6a3fc3 | 256 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
257 | return h; | |
258 | ||
5feffd51 | 259 | /* In the following, no OpenACC Profiling Interface events can possibly be |
260 | generated. */ | |
261 | ||
09f66ac1 | 262 | gomp_mutex_lock (&dev->lock); |
ca4c3545 | 263 | |
09f66ac1 | 264 | n = lookup_host (dev, h, 1); |
ca4c3545 | 265 | |
266 | if (!n) | |
09f66ac1 | 267 | { |
268 | gomp_mutex_unlock (&dev->lock); | |
269 | return NULL; | |
270 | } | |
ca4c3545 | 271 | |
272 | offset = h - n->host_start; | |
273 | ||
274 | d = n->tgt->tgt_start + n->tgt_offset + offset; | |
275 | ||
09f66ac1 | 276 | gomp_mutex_unlock (&dev->lock); |
277 | ||
ca4c3545 | 278 | return d; |
279 | } | |
280 | ||
281 | /* Return the host pointer that corresponds to device data D. Or NULL | |
282 | if no mapping. */ | |
283 | ||
284 | void * | |
285 | acc_hostptr (void *d) | |
286 | { | |
287 | splay_tree_key n; | |
288 | void *h; | |
289 | void *offset; | |
290 | ||
291 | goacc_lazy_initialize (); | |
292 | ||
293 | struct goacc_thread *thr = goacc_thread (); | |
09f66ac1 | 294 | struct gomp_device_descr *acc_dev = thr->dev; |
ca4c3545 | 295 | |
6d6a3fc3 | 296 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
297 | return d; | |
298 | ||
5feffd51 | 299 | /* In the following, no OpenACC Profiling Interface events can possibly be |
300 | generated. */ | |
301 | ||
09f66ac1 | 302 | gomp_mutex_lock (&acc_dev->lock); |
303 | ||
304 | n = lookup_dev (acc_dev->openacc.data_environ, d, 1); | |
ca4c3545 | 305 | |
306 | if (!n) | |
09f66ac1 | 307 | { |
308 | gomp_mutex_unlock (&acc_dev->lock); | |
309 | return NULL; | |
310 | } | |
ca4c3545 | 311 | |
312 | offset = d - n->tgt->tgt_start + n->tgt_offset; | |
313 | ||
314 | h = n->host_start + offset; | |
315 | ||
09f66ac1 | 316 | gomp_mutex_unlock (&acc_dev->lock); |
317 | ||
ca4c3545 | 318 | return h; |
319 | } | |
320 | ||
321 | /* Return 1 if host data [H,+S] is present on the device. */ | |
322 | ||
323 | int | |
324 | acc_is_present (void *h, size_t s) | |
325 | { | |
326 | splay_tree_key n; | |
327 | ||
328 | if (!s || !h) | |
329 | return 0; | |
330 | ||
331 | goacc_lazy_initialize (); | |
332 | ||
333 | struct goacc_thread *thr = goacc_thread (); | |
334 | struct gomp_device_descr *acc_dev = thr->dev; | |
335 | ||
6d6a3fc3 | 336 | if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
337 | return h != NULL; | |
338 | ||
5feffd51 | 339 | /* In the following, no OpenACC Profiling Interface events can possibly be |
340 | generated. */ | |
341 | ||
09f66ac1 | 342 | gomp_mutex_lock (&acc_dev->lock); |
343 | ||
0d8c703d | 344 | n = lookup_host (acc_dev, h, s); |
ca4c3545 | 345 | |
346 | if (n && ((uintptr_t)h < n->host_start | |
347 | || (uintptr_t)h + s > n->host_end | |
348 | || s > n->host_end - n->host_start)) | |
349 | n = NULL; | |
350 | ||
09f66ac1 | 351 | gomp_mutex_unlock (&acc_dev->lock); |
352 | ||
ca4c3545 | 353 | return n != NULL; |
354 | } | |
355 | ||
356 | /* Create a mapping for host [H,+S] -> device [D,+S] */ | |
357 | ||
358 | void | |
359 | acc_map_data (void *h, void *d, size_t s) | |
360 | { | |
6d6a3fc3 | 361 | struct target_mem_desc *tgt = NULL; |
ca4c3545 | 362 | size_t mapnum = 1; |
363 | void *hostaddrs = h; | |
364 | void *devaddrs = d; | |
365 | size_t sizes = s; | |
366 | unsigned short kinds = GOMP_MAP_ALLOC; | |
367 | ||
368 | goacc_lazy_initialize (); | |
369 | ||
370 | struct goacc_thread *thr = goacc_thread (); | |
371 | struct gomp_device_descr *acc_dev = thr->dev; | |
372 | ||
373 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
374 | { | |
375 | if (d != h) | |
376 | gomp_fatal ("cannot map data on shared-memory system"); | |
ca4c3545 | 377 | } |
378 | else | |
379 | { | |
380 | struct goacc_thread *thr = goacc_thread (); | |
381 | ||
382 | if (!d || !h || !s) | |
383 | gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", | |
384 | (void *)h, (int)s, (void *)d, (int)s); | |
385 | ||
5feffd51 | 386 | acc_prof_info prof_info; |
387 | acc_api_info api_info; | |
388 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
389 | ||
09f66ac1 | 390 | gomp_mutex_lock (&acc_dev->lock); |
391 | ||
0d8c703d | 392 | if (lookup_host (acc_dev, h, s)) |
09f66ac1 | 393 | { |
394 | gomp_mutex_unlock (&acc_dev->lock); | |
395 | gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, | |
396 | (int)s); | |
397 | } | |
ca4c3545 | 398 | |
399 | if (lookup_dev (thr->dev->openacc.data_environ, d, s)) | |
09f66ac1 | 400 | { |
401 | gomp_mutex_unlock (&acc_dev->lock); | |
402 | gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, | |
403 | (int)s); | |
404 | } | |
405 | ||
406 | gomp_mutex_unlock (&acc_dev->lock); | |
ca4c3545 | 407 | |
408 | tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, | |
43895be5 | 409 | &kinds, true, GOMP_MAP_VARS_OPENACC); |
737cc978 | 410 | tgt->list[0].key->refcount = REFCOUNT_INFINITY; |
5feffd51 | 411 | |
412 | if (profiling_p) | |
413 | { | |
414 | thr->prof_info = NULL; | |
415 | thr->api_info = NULL; | |
416 | } | |
ca4c3545 | 417 | } |
418 | ||
09f66ac1 | 419 | gomp_mutex_lock (&acc_dev->lock); |
ca4c3545 | 420 | tgt->prev = acc_dev->openacc.data_environ; |
421 | acc_dev->openacc.data_environ = tgt; | |
09f66ac1 | 422 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 423 | } |
424 | ||
425 | void | |
426 | acc_unmap_data (void *h) | |
427 | { | |
428 | struct goacc_thread *thr = goacc_thread (); | |
429 | struct gomp_device_descr *acc_dev = thr->dev; | |
430 | ||
431 | /* No need to call lazy open, as the address must have been mapped. */ | |
432 | ||
6d6a3fc3 | 433 | /* This is a no-op on shared-memory targets. */ |
434 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
435 | return; | |
436 | ||
5feffd51 | 437 | acc_prof_info prof_info; |
438 | acc_api_info api_info; | |
439 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
440 | ||
ca4c3545 | 441 | size_t host_size; |
09f66ac1 | 442 | |
443 | gomp_mutex_lock (&acc_dev->lock); | |
444 | ||
0d8c703d | 445 | splay_tree_key n = lookup_host (acc_dev, h, 1); |
ca4c3545 | 446 | struct target_mem_desc *t; |
447 | ||
448 | if (!n) | |
09f66ac1 | 449 | { |
450 | gomp_mutex_unlock (&acc_dev->lock); | |
451 | gomp_fatal ("%p is not a mapped block", (void *)h); | |
452 | } | |
ca4c3545 | 453 | |
454 | host_size = n->host_end - n->host_start; | |
455 | ||
456 | if (n->host_start != (uintptr_t) h) | |
09f66ac1 | 457 | { |
458 | gomp_mutex_unlock (&acc_dev->lock); | |
459 | gomp_fatal ("[%p,%d] surrounds %p", | |
460 | (void *) n->host_start, (int) host_size, (void *) h); | |
461 | } | |
ca4c3545 | 462 | |
737cc978 | 463 | /* Mark for removal. */ |
464 | n->refcount = 1; | |
465 | ||
ca4c3545 | 466 | t = n->tgt; |
467 | ||
468 | if (t->refcount == 2) | |
469 | { | |
470 | struct target_mem_desc *tp; | |
471 | ||
472 | /* This is the last reference, so pull the descriptor off the | |
473 | chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from | |
474 | freeing the device memory. */ | |
475 | t->tgt_end = 0; | |
476 | t->to_free = 0; | |
477 | ||
ca4c3545 | 478 | for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; |
479 | tp = t, t = t->prev) | |
480 | if (n->tgt == t) | |
481 | { | |
482 | if (tp) | |
483 | tp->prev = t->prev; | |
484 | else | |
485 | acc_dev->openacc.data_environ = t->prev; | |
486 | ||
487 | break; | |
488 | } | |
ca4c3545 | 489 | } |
490 | ||
09f66ac1 | 491 | gomp_mutex_unlock (&acc_dev->lock); |
492 | ||
ca4c3545 | 493 | gomp_unmap_vars (t, true); |
5feffd51 | 494 | |
495 | if (profiling_p) | |
496 | { | |
497 | thr->prof_info = NULL; | |
498 | thr->api_info = NULL; | |
499 | } | |
ca4c3545 | 500 | } |
501 | ||
502 | #define FLAG_PRESENT (1 << 0) | |
503 | #define FLAG_CREATE (1 << 1) | |
504 | #define FLAG_COPY (1 << 2) | |
505 | ||
506 | static void * | |
2e51c6a2 | 507 | present_create_copy (unsigned f, void *h, size_t s, int async) |
ca4c3545 | 508 | { |
509 | void *d; | |
510 | splay_tree_key n; | |
511 | ||
512 | if (!h || !s) | |
513 | gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); | |
514 | ||
515 | goacc_lazy_initialize (); | |
516 | ||
517 | struct goacc_thread *thr = goacc_thread (); | |
518 | struct gomp_device_descr *acc_dev = thr->dev; | |
519 | ||
6d6a3fc3 | 520 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
521 | return h; | |
522 | ||
5feffd51 | 523 | acc_prof_info prof_info; |
524 | acc_api_info api_info; | |
525 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
526 | if (profiling_p) | |
527 | { | |
528 | prof_info.async = async; | |
529 | prof_info.async_queue = prof_info.async; | |
530 | } | |
531 | ||
09f66ac1 | 532 | gomp_mutex_lock (&acc_dev->lock); |
533 | ||
0d8c703d | 534 | n = lookup_host (acc_dev, h, s); |
ca4c3545 | 535 | if (n) |
536 | { | |
537 | /* Present. */ | |
538 | d = (void *) (n->tgt->tgt_start + n->tgt_offset); | |
539 | ||
540 | if (!(f & FLAG_PRESENT)) | |
09f66ac1 | 541 | { |
542 | gomp_mutex_unlock (&acc_dev->lock); | |
543 | gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", | |
544 | (void *)h, (int)s, (void *)d, (int)s); | |
545 | } | |
ca4c3545 | 546 | if ((h + s) > (void *)n->host_end) |
09f66ac1 | 547 | { |
548 | gomp_mutex_unlock (&acc_dev->lock); | |
549 | gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); | |
550 | } | |
551 | ||
737cc978 | 552 | if (n->refcount != REFCOUNT_INFINITY) |
553 | { | |
554 | n->refcount++; | |
555 | n->dynamic_refcount++; | |
556 | } | |
09f66ac1 | 557 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 558 | } |
559 | else if (!(f & FLAG_CREATE)) | |
560 | { | |
09f66ac1 | 561 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 562 | gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); |
563 | } | |
564 | else | |
565 | { | |
566 | struct target_mem_desc *tgt; | |
567 | size_t mapnum = 1; | |
568 | unsigned short kinds; | |
569 | void *hostaddrs = h; | |
570 | ||
571 | if (f & FLAG_COPY) | |
572 | kinds = GOMP_MAP_TO; | |
573 | else | |
574 | kinds = GOMP_MAP_ALLOC; | |
575 | ||
09f66ac1 | 576 | gomp_mutex_unlock (&acc_dev->lock); |
577 | ||
534b5e00 | 578 | goacc_aq aq = get_goacc_asyncqueue (async); |
2e51c6a2 | 579 | |
534b5e00 | 580 | tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, |
581 | &kinds, true, GOMP_MAP_VARS_OPENACC); | |
737cc978 | 582 | /* Initialize dynamic refcount. */ |
583 | tgt->list[0].key->dynamic_refcount = 1; | |
ca4c3545 | 584 | |
0d8c703d | 585 | gomp_mutex_lock (&acc_dev->lock); |
ca4c3545 | 586 | |
587 | d = tgt->to_free; | |
588 | tgt->prev = acc_dev->openacc.data_environ; | |
589 | acc_dev->openacc.data_environ = tgt; | |
590 | ||
0d8c703d | 591 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 592 | } |
593 | ||
5feffd51 | 594 | if (profiling_p) |
595 | { | |
596 | thr->prof_info = NULL; | |
597 | thr->api_info = NULL; | |
598 | } | |
599 | ||
ca4c3545 | 600 | return d; |
601 | } | |
602 | ||
603 | void * | |
604 | acc_create (void *h, size_t s) | |
605 | { | |
2e51c6a2 | 606 | return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync); |
607 | } | |
608 | ||
609 | void | |
610 | acc_create_async (void *h, size_t s, int async) | |
611 | { | |
612 | present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async); | |
ca4c3545 | 613 | } |
614 | ||
8ccac574 | 615 | /* acc_present_or_create used to be what acc_create is now. */ |
616 | /* acc_pcreate is acc_present_or_create by a different name. */ | |
617 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
618 | strong_alias (acc_create, acc_present_or_create) | |
619 | strong_alias (acc_create, acc_pcreate) | |
620 | #else | |
ca4c3545 | 621 | void * |
622 | acc_present_or_create (void *h, size_t s) | |
623 | { | |
8ccac574 | 624 | return acc_create (h, s); |
ca4c3545 | 625 | } |
626 | ||
e4d15e02 | 627 | void * |
628 | acc_pcreate (void *h, size_t s) | |
629 | { | |
8ccac574 | 630 | return acc_create (h, s); |
e4d15e02 | 631 | } |
632 | #endif | |
633 | ||
ca4c3545 | 634 | void * |
8ccac574 | 635 | acc_copyin (void *h, size_t s) |
ca4c3545 | 636 | { |
2e51c6a2 | 637 | return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, |
638 | acc_async_sync); | |
ca4c3545 | 639 | } |
640 | ||
8ccac574 | 641 | void |
642 | acc_copyin_async (void *h, size_t s, int async) | |
643 | { | |
644 | present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async); | |
645 | } | |
646 | ||
647 | /* acc_present_or_copyin used to be what acc_copyin is now. */ | |
e4d15e02 | 648 | /* acc_pcopyin is acc_present_or_copyin by a different name. */ |
649 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
8ccac574 | 650 | strong_alias (acc_copyin, acc_present_or_copyin) |
651 | strong_alias (acc_copyin, acc_pcopyin) | |
e4d15e02 | 652 | #else |
8ccac574 | 653 | void * |
654 | acc_present_or_copyin (void *h, size_t s) | |
655 | { | |
656 | return acc_copyin (h, s); | |
657 | } | |
658 | ||
e4d15e02 | 659 | void * |
660 | acc_pcopyin (void *h, size_t s) | |
661 | { | |
8ccac574 | 662 | return acc_copyin (h, s); |
e4d15e02 | 663 | } |
664 | #endif | |
665 | ||
737cc978 | 666 | #define FLAG_COPYOUT (1 << 0) |
667 | #define FLAG_FINALIZE (1 << 1) | |
ca4c3545 | 668 | |
669 | static void | |
2e51c6a2 | 670 | delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) |
ca4c3545 | 671 | { |
672 | size_t host_size; | |
673 | splay_tree_key n; | |
674 | void *d; | |
675 | struct goacc_thread *thr = goacc_thread (); | |
676 | struct gomp_device_descr *acc_dev = thr->dev; | |
677 | ||
6d6a3fc3 | 678 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
679 | return; | |
680 | ||
5feffd51 | 681 | acc_prof_info prof_info; |
682 | acc_api_info api_info; | |
683 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
684 | if (profiling_p) | |
685 | { | |
686 | prof_info.async = async; | |
687 | prof_info.async_queue = prof_info.async; | |
688 | } | |
689 | ||
09f66ac1 | 690 | gomp_mutex_lock (&acc_dev->lock); |
691 | ||
0d8c703d | 692 | n = lookup_host (acc_dev, h, s); |
ca4c3545 | 693 | |
694 | /* No need to call lazy open, as the data must already have been | |
695 | mapped. */ | |
696 | ||
697 | if (!n) | |
09f66ac1 | 698 | { |
699 | gomp_mutex_unlock (&acc_dev->lock); | |
700 | gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); | |
701 | } | |
ca4c3545 | 702 | |
d6964b24 | 703 | d = (void *) (n->tgt->tgt_start + n->tgt_offset |
704 | + (uintptr_t) h - n->host_start); | |
ca4c3545 | 705 | |
706 | host_size = n->host_end - n->host_start; | |
707 | ||
708 | if (n->host_start != (uintptr_t) h || host_size != s) | |
09f66ac1 | 709 | { |
710 | gomp_mutex_unlock (&acc_dev->lock); | |
711 | gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", | |
712 | (void *) n->host_start, (int) host_size, (void *) h, (int) s); | |
713 | } | |
714 | ||
737cc978 | 715 | if (n->refcount == REFCOUNT_INFINITY) |
716 | { | |
717 | n->refcount = 0; | |
718 | n->dynamic_refcount = 0; | |
719 | } | |
720 | if (n->refcount < n->dynamic_refcount) | |
721 | { | |
722 | gomp_mutex_unlock (&acc_dev->lock); | |
723 | gomp_fatal ("Dynamic reference counting assert fail\n"); | |
724 | } | |
ca4c3545 | 725 | |
737cc978 | 726 | if (f & FLAG_FINALIZE) |
727 | { | |
728 | n->refcount -= n->dynamic_refcount; | |
729 | n->dynamic_refcount = 0; | |
730 | } | |
731 | else if (n->dynamic_refcount) | |
732 | { | |
733 | n->dynamic_refcount--; | |
734 | n->refcount--; | |
735 | } | |
ca4c3545 | 736 | |
737cc978 | 737 | if (n->refcount == 0) |
738 | { | |
739 | if (n->tgt->refcount == 2) | |
740 | { | |
741 | struct target_mem_desc *tp, *t; | |
742 | for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; | |
743 | tp = t, t = t->prev) | |
744 | if (n->tgt == t) | |
745 | { | |
746 | if (tp) | |
747 | tp->prev = t->prev; | |
748 | else | |
749 | acc_dev->openacc.data_environ = t->prev; | |
750 | break; | |
751 | } | |
752 | } | |
ca4c3545 | 753 | |
737cc978 | 754 | if (f & FLAG_COPYOUT) |
2e51c6a2 | 755 | { |
534b5e00 | 756 | goacc_aq aq = get_goacc_asyncqueue (async); |
757 | gomp_copy_dev2host (acc_dev, aq, h, d, s); | |
2e51c6a2 | 758 | } |
737cc978 | 759 | gomp_remove_var (acc_dev, n); |
760 | } | |
761 | ||
762 | gomp_mutex_unlock (&acc_dev->lock); | |
5feffd51 | 763 | |
764 | if (profiling_p) | |
765 | { | |
766 | thr->prof_info = NULL; | |
767 | thr->api_info = NULL; | |
768 | } | |
ca4c3545 | 769 | } |
770 | ||
771 | void | |
772 | acc_delete (void *h , size_t s) | |
773 | { | |
2e51c6a2 | 774 | delete_copyout (0, h, s, acc_async_sync, __FUNCTION__); |
775 | } | |
776 | ||
777 | void | |
778 | acc_delete_async (void *h , size_t s, int async) | |
779 | { | |
780 | delete_copyout (0, h, s, async, __FUNCTION__); | |
ca4c3545 | 781 | } |
782 | ||
737cc978 | 783 | void |
784 | acc_delete_finalize (void *h , size_t s) | |
785 | { | |
2e51c6a2 | 786 | delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__); |
737cc978 | 787 | } |
788 | ||
789 | void | |
790 | acc_delete_finalize_async (void *h , size_t s, int async) | |
791 | { | |
2e51c6a2 | 792 | delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__); |
737cc978 | 793 | } |
794 | ||
9b50ad1d | 795 | void |
796 | acc_copyout (void *h, size_t s) | |
ca4c3545 | 797 | { |
2e51c6a2 | 798 | delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__); |
799 | } | |
800 | ||
801 | void | |
802 | acc_copyout_async (void *h, size_t s, int async) | |
803 | { | |
804 | delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__); | |
ca4c3545 | 805 | } |
806 | ||
737cc978 | 807 | void |
808 | acc_copyout_finalize (void *h, size_t s) | |
809 | { | |
2e51c6a2 | 810 | delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync, |
811 | __FUNCTION__); | |
737cc978 | 812 | } |
813 | ||
814 | void | |
815 | acc_copyout_finalize_async (void *h, size_t s, int async) | |
816 | { | |
2e51c6a2 | 817 | delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__); |
737cc978 | 818 | } |
819 | ||
ca4c3545 | 820 | static void |
2e51c6a2 | 821 | update_dev_host (int is_dev, void *h, size_t s, int async) |
ca4c3545 | 822 | { |
823 | splay_tree_key n; | |
824 | void *d; | |
a6dcb558 | 825 | |
826 | goacc_lazy_initialize (); | |
827 | ||
ca4c3545 | 828 | struct goacc_thread *thr = goacc_thread (); |
829 | struct gomp_device_descr *acc_dev = thr->dev; | |
830 | ||
6d6a3fc3 | 831 | if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
832 | return; | |
833 | ||
5feffd51 | 834 | acc_prof_info prof_info; |
835 | acc_api_info api_info; | |
836 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
837 | if (profiling_p) | |
838 | { | |
839 | prof_info.async = async; | |
840 | prof_info.async_queue = prof_info.async; | |
841 | } | |
842 | ||
09f66ac1 | 843 | gomp_mutex_lock (&acc_dev->lock); |
844 | ||
0d8c703d | 845 | n = lookup_host (acc_dev, h, s); |
ca4c3545 | 846 | |
ca4c3545 | 847 | if (!n) |
09f66ac1 | 848 | { |
849 | gomp_mutex_unlock (&acc_dev->lock); | |
850 | gomp_fatal ("[%p,%d] is not mapped", h, (int)s); | |
851 | } | |
ca4c3545 | 852 | |
d6964b24 | 853 | d = (void *) (n->tgt->tgt_start + n->tgt_offset |
854 | + (uintptr_t) h - n->host_start); | |
ca4c3545 | 855 | |
534b5e00 | 856 | goacc_aq aq = get_goacc_asyncqueue (async); |
2e51c6a2 | 857 | |
ca4c3545 | 858 | if (is_dev) |
534b5e00 | 859 | gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); |
ca4c3545 | 860 | else |
534b5e00 | 861 | gomp_copy_dev2host (acc_dev, aq, h, d, s); |
2e51c6a2 | 862 | |
9b50ad1d | 863 | gomp_mutex_unlock (&acc_dev->lock); |
5feffd51 | 864 | |
865 | if (profiling_p) | |
866 | { | |
867 | thr->prof_info = NULL; | |
868 | thr->api_info = NULL; | |
869 | } | |
ca4c3545 | 870 | } |
871 | ||
872 | void | |
873 | acc_update_device (void *h, size_t s) | |
874 | { | |
2e51c6a2 | 875 | update_dev_host (1, h, s, acc_async_sync); |
876 | } | |
877 | ||
878 | void | |
879 | acc_update_device_async (void *h, size_t s, int async) | |
880 | { | |
881 | update_dev_host (1, h, s, async); | |
ca4c3545 | 882 | } |
883 | ||
884 | void | |
885 | acc_update_self (void *h, size_t s) | |
886 | { | |
2e51c6a2 | 887 | update_dev_host (0, h, s, acc_async_sync); |
888 | } | |
889 | ||
890 | void | |
891 | acc_update_self_async (void *h, size_t s, int async) | |
892 | { | |
893 | update_dev_host (0, h, s, async); | |
ca4c3545 | 894 | } |
895 | ||
896 | void | |
897 | gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, | |
534b5e00 | 898 | void *kinds, int async) |
ca4c3545 | 899 | { |
900 | struct target_mem_desc *tgt; | |
901 | struct goacc_thread *thr = goacc_thread (); | |
902 | struct gomp_device_descr *acc_dev = thr->dev; | |
903 | ||
737cc978 | 904 | if (acc_is_present (*hostaddrs, *sizes)) |
905 | { | |
906 | splay_tree_key n; | |
907 | gomp_mutex_lock (&acc_dev->lock); | |
908 | n = lookup_host (acc_dev, *hostaddrs, *sizes); | |
909 | gomp_mutex_unlock (&acc_dev->lock); | |
910 | ||
911 | tgt = n->tgt; | |
912 | for (size_t i = 0; i < tgt->list_count; i++) | |
913 | if (tgt->list[i].key == n) | |
914 | { | |
915 | for (size_t j = 0; j < mapnum; j++) | |
916 | if (i + j < tgt->list_count && tgt->list[i + j].key) | |
917 | { | |
918 | tgt->list[i + j].key->refcount++; | |
919 | tgt->list[i + j].key->dynamic_refcount++; | |
920 | } | |
921 | return; | |
922 | } | |
923 | /* Should not reach here. */ | |
924 | gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset"); | |
925 | } | |
926 | ||
ca4c3545 | 927 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); |
534b5e00 | 928 | goacc_aq aq = get_goacc_asyncqueue (async); |
929 | tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, | |
930 | NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); | |
ca4c3545 | 931 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); |
09f66ac1 | 932 | |
737cc978 | 933 | /* Initialize dynamic refcount. */ |
934 | tgt->list[0].key->dynamic_refcount = 1; | |
935 | ||
09f66ac1 | 936 | gomp_mutex_lock (&acc_dev->lock); |
ca4c3545 | 937 | tgt->prev = acc_dev->openacc.data_environ; |
938 | acc_dev->openacc.data_environ = tgt; | |
09f66ac1 | 939 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 940 | } |
941 | ||
942 | void | |
737cc978 | 943 | gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, |
944 | int finalize, int mapnum) | |
ca4c3545 | 945 | { |
946 | struct goacc_thread *thr = goacc_thread (); | |
947 | struct gomp_device_descr *acc_dev = thr->dev; | |
948 | splay_tree_key n; | |
949 | struct target_mem_desc *t; | |
950 | int minrefs = (mapnum == 1) ? 2 : 3; | |
951 | ||
737cc978 | 952 | if (!acc_is_present (h, s)) |
953 | return; | |
954 | ||
09f66ac1 | 955 | gomp_mutex_lock (&acc_dev->lock); |
956 | ||
0d8c703d | 957 | n = lookup_host (acc_dev, h, 1); |
ca4c3545 | 958 | |
959 | if (!n) | |
09f66ac1 | 960 | { |
961 | gomp_mutex_unlock (&acc_dev->lock); | |
962 | gomp_fatal ("%p is not a mapped block", (void *)h); | |
963 | } | |
ca4c3545 | 964 | |
965 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
966 | ||
967 | t = n->tgt; | |
968 | ||
737cc978 | 969 | if (n->refcount < n->dynamic_refcount) |
970 | { | |
971 | gomp_mutex_unlock (&acc_dev->lock); | |
972 | gomp_fatal ("Dynamic reference counting assert fail\n"); | |
973 | } | |
ca4c3545 | 974 | |
737cc978 | 975 | if (finalize) |
ca4c3545 | 976 | { |
737cc978 | 977 | n->refcount -= n->dynamic_refcount; |
978 | n->dynamic_refcount = 0; | |
979 | } | |
980 | else if (n->dynamic_refcount) | |
981 | { | |
982 | n->dynamic_refcount--; | |
983 | n->refcount--; | |
984 | } | |
ca4c3545 | 985 | |
737cc978 | 986 | gomp_mutex_unlock (&acc_dev->lock); |
987 | ||
988 | if (n->refcount == 0) | |
989 | { | |
990 | if (t->refcount == minrefs) | |
ca4c3545 | 991 | { |
737cc978 | 992 | /* This is the last reference, so pull the descriptor off the |
993 | chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from | |
994 | freeing the device memory. */ | |
995 | struct target_mem_desc *tp; | |
996 | for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; | |
997 | tp = t, t = t->prev) | |
ca4c3545 | 998 | { |
737cc978 | 999 | if (n->tgt == t) |
1000 | { | |
1001 | if (tp) | |
1002 | tp->prev = t->prev; | |
1003 | else | |
1004 | acc_dev->openacc.data_environ = t->prev; | |
1005 | break; | |
1006 | } | |
ca4c3545 | 1007 | } |
1008 | } | |
ca4c3545 | 1009 | |
737cc978 | 1010 | /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */ |
1011 | n->refcount = 1; | |
1012 | t->refcount = minrefs; | |
1013 | for (size_t i = 0; i < t->list_count; i++) | |
1014 | if (t->list[i].key == n) | |
1015 | { | |
1016 | t->list[i].copy_from = force_copyfrom ? 1 : 0; | |
1017 | break; | |
1018 | } | |
ca4c3545 | 1019 | |
737cc978 | 1020 | /* If running synchronously, unmap immediately. */ |
1021 | if (async < acc_async_noval) | |
1022 | gomp_unmap_vars (t, true); | |
1023 | else | |
534b5e00 | 1024 | { |
1025 | goacc_aq aq = get_goacc_asyncqueue (async); | |
1026 | gomp_unmap_vars_async (t, true, aq); | |
1027 | } | |
737cc978 | 1028 | } |
ca4c3545 | 1029 | |
737cc978 | 1030 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 1031 | |
1032 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
1033 | } |