]>
Commit | Line | Data |
---|---|---|
8e8f6434 | 1 | /* Copyright (C) 2013-2018 Free Software Foundation, Inc. |
ca4c3545 | 2 | |
3 | Contributed by Mentor Embedded. | |
4 | ||
5 | This file is part of the GNU Offloading and Multi Processing Library | |
6 | (libgomp). | |
7 | ||
8 | Libgomp is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 | more details. | |
17 | ||
18 | Under Section 7 of GPL version 3, you are granted additional | |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
26 | ||
27 | /* This file handles OpenACC constructs. */ | |
28 | ||
29 | #include "openacc.h" | |
30 | #include "libgomp.h" | |
31 | #include "libgomp_g.h" | |
32 | #include "gomp-constants.h" | |
33 | #include "oacc-int.h" | |
2634aed9 | 34 | #ifdef HAVE_INTTYPES_H |
35 | # include <inttypes.h> /* For PRIu64. */ | |
36 | #endif | |
ca4c3545 | 37 | #include <string.h> |
38 | #include <stdarg.h> | |
39 | #include <assert.h> | |
ca4c3545 | 40 | |
813421cd | 41 | |
42 | /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we | |
43 | continue to support the following two legacy values. */ | |
44 | _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, | |
45 | "legacy GOMP_DEVICE_ICV broken"); | |
46 | _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) | |
47 | == GOACC_FLAG_HOST_FALLBACK, | |
48 | "legacy GOMP_DEVICE_HOST_FALLBACK broken"); | |
49 | ||
50 | ||
737cc978 | 51 | /* Returns the number of mappings associated with the pointer or pset. PSET |
52 | have three mappings, whereas pointer have two. */ | |
53 | ||
ca4c3545 | 54 | static int |
737cc978 | 55 | find_pointer (int pos, size_t mapnum, unsigned short *kinds) |
ca4c3545 | 56 | { |
57 | if (pos + 1 >= mapnum) | |
58 | return 0; | |
59 | ||
60 | unsigned char kind = kinds[pos+1] & 0xff; | |
61 | ||
737cc978 | 62 | if (kind == GOMP_MAP_TO_PSET) |
63 | return 3; | |
64 | else if (kind == GOMP_MAP_POINTER) | |
65 | return 2; | |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
70 | /* Handle the mapping pair that are presented when a | |
71 | deviceptr clause is used with Fortran. */ | |
72 | ||
73 | static void | |
74 | handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, | |
75 | unsigned short *kinds) | |
76 | { | |
77 | int i; | |
78 | ||
79 | for (i = 0; i < mapnum; i++) | |
80 | { | |
81 | unsigned short kind1 = kinds[i] & 0xff; | |
82 | ||
83 | /* Handle Fortran deviceptr clause. */ | |
84 | if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) | |
85 | { | |
86 | unsigned short kind2; | |
87 | ||
88 | if (i < (signed)mapnum - 1) | |
89 | kind2 = kinds[i + 1] & 0xff; | |
90 | else | |
91 | kind2 = 0xffff; | |
92 | ||
93 | if (sizes[i] == sizeof (void *)) | |
94 | continue; | |
95 | ||
96 | /* At this point, we're dealing with a Fortran deviceptr. | |
97 | If the next element is not what we're expecting, then | |
98 | this is an instance of where the deviceptr variable was | |
99 | not used within the region and the pointer was removed | |
100 | by the gimplifier. */ | |
101 | if (kind2 == GOMP_MAP_POINTER | |
102 | && sizes[i + 1] == 0 | |
103 | && hostaddrs[i] == *(void **)hostaddrs[i + 1]) | |
104 | { | |
105 | kinds[i+1] = kinds[i]; | |
106 | sizes[i+1] = sizeof (void *); | |
107 | } | |
108 | ||
109 | /* Invalidate the entry. */ | |
110 | hostaddrs[i] = NULL; | |
111 | } | |
112 | } | |
ca4c3545 | 113 | } |
114 | ||
e561d5e1 | 115 | static void goacc_wait (int async, int num_waits, va_list *ap); |
116 | ||
117 | ||
813421cd | 118 | /* Launch a possibly offloaded function with FLAGS. FN is the host fn |
e561d5e1 | 119 | address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory |
120 | blocks to be copied to/from the device. Varadic arguments are | |
121 | keyed optional parameters terminated with a zero. */ | |
ca4c3545 | 122 | |
123 | void | |
813421cd | 124 | GOACC_parallel_keyed (int flags_m, void (*fn) (void *), |
e561d5e1 | 125 | size_t mapnum, void **hostaddrs, size_t *sizes, |
126 | unsigned short *kinds, ...) | |
ca4c3545 | 127 | { |
813421cd | 128 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
129 | ||
ca4c3545 | 130 | va_list ap; |
131 | struct goacc_thread *thr; | |
132 | struct gomp_device_descr *acc_dev; | |
133 | struct target_mem_desc *tgt; | |
134 | void **devaddrs; | |
135 | unsigned int i; | |
136 | struct splay_tree_key_s k; | |
137 | splay_tree_key tgt_fn_key; | |
138 | void (*tgt_fn); | |
e561d5e1 | 139 | int async = GOMP_ASYNC_SYNC; |
140 | unsigned dims[GOMP_DIM_MAX]; | |
141 | unsigned tag; | |
ca4c3545 | 142 | |
2634aed9 | 143 | #ifdef HAVE_INTTYPES_H |
e561d5e1 | 144 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", |
145 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
2634aed9 | 146 | #else |
e561d5e1 | 147 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", |
148 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
2634aed9 | 149 | #endif |
0a1fe572 | 150 | goacc_lazy_initialize (); |
ca4c3545 | 151 | |
152 | thr = goacc_thread (); | |
153 | acc_dev = thr->dev; | |
154 | ||
737cc978 | 155 | handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); |
156 | ||
ca4c3545 | 157 | /* Host fallback if "if" clause is false or if the current device is set to |
158 | the host. */ | |
813421cd | 159 | if (flags & GOACC_FLAG_HOST_FALLBACK) |
ca4c3545 | 160 | { |
161 | goacc_save_and_set_bind (acc_device_host); | |
162 | fn (hostaddrs); | |
163 | goacc_restore_bind (); | |
164 | return; | |
165 | } | |
166 | else if (acc_device_type (acc_dev->type) == acc_device_host) | |
167 | { | |
168 | fn (hostaddrs); | |
169 | return; | |
170 | } | |
171 | ||
3a37a410 | 172 | /* Default: let the runtime choose. */ |
173 | for (i = 0; i != GOMP_DIM_MAX; i++) | |
174 | dims[i] = 0; | |
175 | ||
e561d5e1 | 176 | va_start (ap, kinds); |
177 | /* TODO: This will need amending when device_type is implemented. */ | |
178 | while ((tag = va_arg (ap, unsigned)) != 0) | |
933b0582 | 179 | { |
e561d5e1 | 180 | if (GOMP_LAUNCH_DEVICE (tag)) |
181 | gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", | |
182 | GOMP_LAUNCH_DEVICE (tag)); | |
183 | ||
184 | switch (GOMP_LAUNCH_CODE (tag)) | |
185 | { | |
186 | case GOMP_LAUNCH_DIM: | |
187 | { | |
188 | unsigned mask = GOMP_LAUNCH_OP (tag); | |
189 | ||
190 | for (i = 0; i != GOMP_DIM_MAX; i++) | |
191 | if (mask & GOMP_DIM_MASK (i)) | |
192 | dims[i] = va_arg (ap, unsigned); | |
193 | } | |
194 | break; | |
195 | ||
196 | case GOMP_LAUNCH_ASYNC: | |
197 | { | |
198 | /* Small constant values are encoded in the operand. */ | |
199 | async = GOMP_LAUNCH_OP (tag); | |
200 | ||
201 | if (async == GOMP_LAUNCH_OP_MAX) | |
202 | async = va_arg (ap, unsigned); | |
203 | break; | |
204 | } | |
205 | ||
206 | case GOMP_LAUNCH_WAIT: | |
207 | { | |
208 | unsigned num_waits = GOMP_LAUNCH_OP (tag); | |
209 | ||
210 | if (num_waits) | |
211 | goacc_wait (async, num_waits, &ap); | |
212 | break; | |
213 | } | |
214 | ||
215 | default: | |
216 | gomp_fatal ("unrecognized offload code '%d'," | |
217 | " libgomp is too old", GOMP_LAUNCH_CODE (tag)); | |
218 | } | |
933b0582 | 219 | } |
e561d5e1 | 220 | va_end (ap); |
ca4c3545 | 221 | |
ca4c3545 | 222 | acc_dev->openacc.async_set_async_func (async); |
223 | ||
224 | if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) | |
225 | { | |
226 | k.host_start = (uintptr_t) fn; | |
227 | k.host_end = k.host_start + 1; | |
0d8c703d | 228 | gomp_mutex_lock (&acc_dev->lock); |
229 | tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); | |
230 | gomp_mutex_unlock (&acc_dev->lock); | |
ca4c3545 | 231 | |
232 | if (tgt_fn_key == NULL) | |
233 | gomp_fatal ("target function wasn't mapped"); | |
234 | ||
0a1fe572 | 235 | tgt_fn = (void (*)) tgt_fn_key->tgt_offset; |
ca4c3545 | 236 | } |
237 | else | |
238 | tgt_fn = (void (*)) fn; | |
239 | ||
240 | tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
43895be5 | 241 | GOMP_MAP_VARS_OPENACC); |
ca4c3545 | 242 | |
a6f19a7c | 243 | devaddrs = gomp_alloca (sizeof (void *) * mapnum); |
ca4c3545 | 244 | for (i = 0; i < mapnum; i++) |
43895be5 | 245 | devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start |
2110057d | 246 | + tgt->list[i].key->tgt_offset |
247 | + tgt->list[i].offset); | |
ca4c3545 | 248 | |
71024455 | 249 | acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, |
250 | async, dims, tgt); | |
ca4c3545 | 251 | |
252 | /* If running synchronously, unmap immediately. */ | |
737cc978 | 253 | bool copyfrom = true; |
2e3e0ea7 | 254 | if (async_synchronous_p (async)) |
ca4c3545 | 255 | gomp_unmap_vars (tgt, true); |
256 | else | |
737cc978 | 257 | { |
258 | bool async_unmap = false; | |
259 | for (size_t i = 0; i < tgt->list_count; i++) | |
260 | { | |
261 | splay_tree_key k = tgt->list[i].key; | |
262 | if (k && k->refcount == 1) | |
263 | { | |
264 | async_unmap = true; | |
265 | break; | |
266 | } | |
267 | } | |
268 | if (async_unmap) | |
269 | tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); | |
270 | else | |
271 | { | |
272 | copyfrom = false; | |
273 | gomp_unmap_vars (tgt, copyfrom); | |
274 | } | |
275 | } | |
ca4c3545 | 276 | |
277 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
278 | } | |
279 | ||
e561d5e1 | 280 | /* Legacy entry point, only provide host execution. */ |
281 | ||
282 | void | |
813421cd | 283 | GOACC_parallel (int flags_m, void (*fn) (void *), |
e561d5e1 | 284 | size_t mapnum, void **hostaddrs, size_t *sizes, |
285 | unsigned short *kinds, | |
286 | int num_gangs, int num_workers, int vector_length, | |
287 | int async, int num_waits, ...) | |
288 | { | |
289 | goacc_save_and_set_bind (acc_device_host); | |
290 | fn (hostaddrs); | |
291 | goacc_restore_bind (); | |
292 | } | |
293 | ||
ca4c3545 | 294 | void |
813421cd | 295 | GOACC_data_start (int flags_m, size_t mapnum, |
ca4c3545 | 296 | void **hostaddrs, size_t *sizes, unsigned short *kinds) |
297 | { | |
813421cd | 298 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
299 | ||
ca4c3545 | 300 | struct target_mem_desc *tgt; |
301 | ||
2634aed9 | 302 | #ifdef HAVE_INTTYPES_H |
303 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
304 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
305 | #else | |
306 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
307 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
308 | #endif | |
ca4c3545 | 309 | |
0a1fe572 | 310 | goacc_lazy_initialize (); |
ca4c3545 | 311 | |
312 | struct goacc_thread *thr = goacc_thread (); | |
313 | struct gomp_device_descr *acc_dev = thr->dev; | |
314 | ||
315 | /* Host fallback or 'do nothing'. */ | |
316 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
813421cd | 317 | || (flags & GOACC_FLAG_HOST_FALLBACK)) |
ca4c3545 | 318 | { |
43895be5 | 319 | tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, |
320 | GOMP_MAP_VARS_OPENACC); | |
ca4c3545 | 321 | tgt->prev = thr->mapped_data; |
322 | thr->mapped_data = tgt; | |
323 | ||
324 | return; | |
325 | } | |
326 | ||
327 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
328 | tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
43895be5 | 329 | GOMP_MAP_VARS_OPENACC); |
ca4c3545 | 330 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); |
331 | tgt->prev = thr->mapped_data; | |
332 | thr->mapped_data = tgt; | |
333 | } | |
334 | ||
335 | void | |
336 | GOACC_data_end (void) | |
337 | { | |
338 | struct goacc_thread *thr = goacc_thread (); | |
339 | struct target_mem_desc *tgt = thr->mapped_data; | |
340 | ||
341 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
342 | thr->mapped_data = tgt->prev; | |
343 | gomp_unmap_vars (tgt, true); | |
344 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
345 | } | |
346 | ||
347 | void | |
813421cd | 348 | GOACC_enter_exit_data (int flags_m, size_t mapnum, |
ca4c3545 | 349 | void **hostaddrs, size_t *sizes, unsigned short *kinds, |
350 | int async, int num_waits, ...) | |
351 | { | |
813421cd | 352 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
353 | ||
ca4c3545 | 354 | struct goacc_thread *thr; |
355 | struct gomp_device_descr *acc_dev; | |
ca4c3545 | 356 | bool data_enter = false; |
357 | size_t i; | |
358 | ||
0a1fe572 | 359 | goacc_lazy_initialize (); |
ca4c3545 | 360 | |
361 | thr = goacc_thread (); | |
362 | acc_dev = thr->dev; | |
363 | ||
364 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
813421cd | 365 | || (flags & GOACC_FLAG_HOST_FALLBACK)) |
ca4c3545 | 366 | return; |
367 | ||
933b0582 | 368 | if (num_waits) |
ca4c3545 | 369 | { |
370 | va_list ap; | |
371 | ||
372 | va_start (ap, num_waits); | |
e561d5e1 | 373 | goacc_wait (async, num_waits, &ap); |
ca4c3545 | 374 | va_end (ap); |
375 | } | |
376 | ||
737cc978 | 377 | /* Determine whether "finalize" semantics apply to all mappings of this |
378 | OpenACC directive. */ | |
379 | bool finalize = false; | |
380 | if (mapnum > 0) | |
381 | { | |
382 | unsigned char kind = kinds[0] & 0xff; | |
383 | if (kind == GOMP_MAP_DELETE | |
384 | || kind == GOMP_MAP_FORCE_FROM) | |
385 | finalize = true; | |
386 | } | |
387 | ||
ca4c3545 | 388 | acc_dev->openacc.async_set_async_func (async); |
389 | ||
390 | /* Determine if this is an "acc enter data". */ | |
391 | for (i = 0; i < mapnum; ++i) | |
392 | { | |
393 | unsigned char kind = kinds[i] & 0xff; | |
394 | ||
395 | if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
396 | continue; | |
397 | ||
398 | if (kind == GOMP_MAP_FORCE_ALLOC | |
399 | || kind == GOMP_MAP_FORCE_PRESENT | |
737cc978 | 400 | || kind == GOMP_MAP_FORCE_TO |
401 | || kind == GOMP_MAP_TO | |
402 | || kind == GOMP_MAP_ALLOC) | |
ca4c3545 | 403 | { |
404 | data_enter = true; | |
405 | break; | |
406 | } | |
407 | ||
737cc978 | 408 | if (kind == GOMP_MAP_RELEASE |
409 | || kind == GOMP_MAP_DELETE | |
410 | || kind == GOMP_MAP_FROM | |
ca4c3545 | 411 | || kind == GOMP_MAP_FORCE_FROM) |
412 | break; | |
413 | ||
414 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
415 | kind); | |
416 | } | |
417 | ||
737cc978 | 418 | /* In c, non-pointers and arrays are represented by a single data clause. |
419 | Dynamically allocated arrays and subarrays are represented by a data | |
420 | clause followed by an internal GOMP_MAP_POINTER. | |
421 | ||
422 | In fortran, scalars and not allocated arrays are represented by a | |
423 | single data clause. Allocated arrays and subarrays have three mappings: | |
424 | 1) the original data clause, 2) a PSET 3) a pointer to the array data. | |
425 | */ | |
426 | ||
ca4c3545 | 427 | if (data_enter) |
428 | { | |
429 | for (i = 0; i < mapnum; i++) | |
430 | { | |
431 | unsigned char kind = kinds[i] & 0xff; | |
432 | ||
737cc978 | 433 | /* Scan for pointers and PSETs. */ |
434 | int pointer = find_pointer (i, mapnum, kinds); | |
ca4c3545 | 435 | |
737cc978 | 436 | if (!pointer) |
ca4c3545 | 437 | { |
438 | switch (kind) | |
439 | { | |
737cc978 | 440 | case GOMP_MAP_ALLOC: |
ca4c3545 | 441 | case GOMP_MAP_FORCE_ALLOC: |
442 | acc_create (hostaddrs[i], sizes[i]); | |
443 | break; | |
737cc978 | 444 | case GOMP_MAP_TO: |
ca4c3545 | 445 | case GOMP_MAP_FORCE_TO: |
737cc978 | 446 | acc_copyin (hostaddrs[i], sizes[i]); |
ca4c3545 | 447 | break; |
448 | default: | |
449 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
450 | kind); | |
451 | break; | |
452 | } | |
453 | } | |
454 | else | |
455 | { | |
737cc978 | 456 | gomp_acc_insert_pointer (pointer, &hostaddrs[i], |
457 | &sizes[i], &kinds[i]); | |
ca4c3545 | 458 | /* Increment 'i' by two because OpenACC requires fortran |
459 | arrays to be contiguous, so each PSET is associated with | |
460 | one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and | |
461 | one MAP_POINTER. */ | |
737cc978 | 462 | i += pointer - 1; |
ca4c3545 | 463 | } |
464 | } | |
465 | } | |
466 | else | |
467 | for (i = 0; i < mapnum; ++i) | |
468 | { | |
469 | unsigned char kind = kinds[i] & 0xff; | |
470 | ||
737cc978 | 471 | int pointer = find_pointer (i, mapnum, kinds); |
ca4c3545 | 472 | |
737cc978 | 473 | if (!pointer) |
ca4c3545 | 474 | { |
475 | switch (kind) | |
476 | { | |
737cc978 | 477 | case GOMP_MAP_RELEASE: |
5cb6b0b9 | 478 | case GOMP_MAP_DELETE: |
737cc978 | 479 | if (acc_is_present (hostaddrs[i], sizes[i])) |
480 | { | |
481 | if (finalize) | |
482 | acc_delete_finalize (hostaddrs[i], sizes[i]); | |
483 | else | |
484 | acc_delete (hostaddrs[i], sizes[i]); | |
485 | } | |
ca4c3545 | 486 | break; |
737cc978 | 487 | case GOMP_MAP_FROM: |
ca4c3545 | 488 | case GOMP_MAP_FORCE_FROM: |
737cc978 | 489 | if (finalize) |
490 | acc_copyout_finalize (hostaddrs[i], sizes[i]); | |
491 | else | |
492 | acc_copyout (hostaddrs[i], sizes[i]); | |
ca4c3545 | 493 | break; |
494 | default: | |
495 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
496 | kind); | |
497 | break; | |
498 | } | |
499 | } | |
500 | else | |
501 | { | |
737cc978 | 502 | bool copyfrom = (kind == GOMP_MAP_FORCE_FROM |
503 | || kind == GOMP_MAP_FROM); | |
504 | gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, | |
505 | finalize, pointer); | |
ca4c3545 | 506 | /* See the above comment. */ |
737cc978 | 507 | i += pointer - 1; |
ca4c3545 | 508 | } |
509 | } | |
510 | ||
511 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
512 | } | |
513 | ||
514 | static void | |
e561d5e1 | 515 | goacc_wait (int async, int num_waits, va_list *ap) |
ca4c3545 | 516 | { |
517 | struct goacc_thread *thr = goacc_thread (); | |
518 | struct gomp_device_descr *acc_dev = thr->dev; | |
ca4c3545 | 519 | |
933b0582 | 520 | while (num_waits--) |
ca4c3545 | 521 | { |
e561d5e1 | 522 | int qid = va_arg (*ap, int); |
523 | ||
ca4c3545 | 524 | if (acc_async_test (qid)) |
525 | continue; | |
526 | ||
933b0582 | 527 | if (async == acc_async_sync) |
528 | acc_wait (qid); | |
529 | else if (qid == async) | |
530 | ;/* If we're waiting on the same asynchronous queue as we're | |
531 | launching on, the queue itself will order work as | |
532 | required, so there's no need to wait explicitly. */ | |
533 | else | |
ca4c3545 | 534 | acc_dev->openacc.async_wait_async_func (qid, async); |
535 | } | |
536 | } | |
537 | ||
538 | void | |
813421cd | 539 | GOACC_update (int flags_m, size_t mapnum, |
ca4c3545 | 540 | void **hostaddrs, size_t *sizes, unsigned short *kinds, |
541 | int async, int num_waits, ...) | |
542 | { | |
813421cd | 543 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
544 | ||
ca4c3545 | 545 | size_t i; |
546 | ||
0a1fe572 | 547 | goacc_lazy_initialize (); |
ca4c3545 | 548 | |
549 | struct goacc_thread *thr = goacc_thread (); | |
550 | struct gomp_device_descr *acc_dev = thr->dev; | |
551 | ||
552 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
813421cd | 553 | || (flags & GOACC_FLAG_HOST_FALLBACK)) |
ca4c3545 | 554 | return; |
555 | ||
933b0582 | 556 | if (num_waits) |
ca4c3545 | 557 | { |
558 | va_list ap; | |
559 | ||
560 | va_start (ap, num_waits); | |
e561d5e1 | 561 | goacc_wait (async, num_waits, &ap); |
ca4c3545 | 562 | va_end (ap); |
563 | } | |
564 | ||
565 | acc_dev->openacc.async_set_async_func (async); | |
566 | ||
737cc978 | 567 | bool update_device = false; |
ca4c3545 | 568 | for (i = 0; i < mapnum; ++i) |
569 | { | |
570 | unsigned char kind = kinds[i] & 0xff; | |
571 | ||
572 | switch (kind) | |
573 | { | |
574 | case GOMP_MAP_POINTER: | |
575 | case GOMP_MAP_TO_PSET: | |
576 | break; | |
577 | ||
737cc978 | 578 | case GOMP_MAP_ALWAYS_POINTER: |
579 | if (update_device) | |
580 | { | |
581 | /* Save the contents of the host pointer. */ | |
582 | void *dptr = acc_deviceptr (hostaddrs[i-1]); | |
583 | uintptr_t t = *(uintptr_t *) hostaddrs[i]; | |
584 | ||
585 | /* Update the contents of the host pointer to reflect | |
586 | the value of the allocated device memory in the | |
587 | previous pointer. */ | |
588 | *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; | |
589 | acc_update_device (hostaddrs[i], sizeof (uintptr_t)); | |
590 | ||
591 | /* Restore the host pointer. */ | |
592 | *(uintptr_t *) hostaddrs[i] = t; | |
593 | update_device = false; | |
594 | } | |
595 | break; | |
596 | ||
597 | case GOMP_MAP_TO: | |
598 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
599 | { | |
600 | update_device = false; | |
601 | break; | |
602 | } | |
603 | /* Fallthru */ | |
ca4c3545 | 604 | case GOMP_MAP_FORCE_TO: |
737cc978 | 605 | update_device = true; |
ca4c3545 | 606 | acc_update_device (hostaddrs[i], sizes[i]); |
607 | break; | |
608 | ||
737cc978 | 609 | case GOMP_MAP_FROM: |
610 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
611 | { | |
612 | update_device = false; | |
613 | break; | |
614 | } | |
615 | /* Fallthru */ | |
ca4c3545 | 616 | case GOMP_MAP_FORCE_FROM: |
737cc978 | 617 | update_device = false; |
ca4c3545 | 618 | acc_update_self (hostaddrs[i], sizes[i]); |
619 | break; | |
620 | ||
621 | default: | |
622 | gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | |
623 | break; | |
624 | } | |
625 | } | |
626 | ||
627 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
628 | } | |
629 | ||
630 | void | |
631 | GOACC_wait (int async, int num_waits, ...) | |
632 | { | |
933b0582 | 633 | if (num_waits) |
634 | { | |
635 | va_list ap; | |
ca4c3545 | 636 | |
933b0582 | 637 | va_start (ap, num_waits); |
e561d5e1 | 638 | goacc_wait (async, num_waits, &ap); |
933b0582 | 639 | va_end (ap); |
640 | } | |
641 | else if (async == acc_async_sync) | |
642 | acc_wait_all (); | |
44b7d2b9 | 643 | else |
644 | acc_wait_all_async (async); | |
ca4c3545 | 645 | } |
646 | ||
647 | int | |
648 | GOACC_get_num_threads (void) | |
649 | { | |
650 | return 1; | |
651 | } | |
652 | ||
653 | int | |
654 | GOACC_get_thread_num (void) | |
655 | { | |
656 | return 0; | |
657 | } | |
2fc5e987 | 658 | |
659 | void | |
813421cd | 660 | GOACC_declare (int flags_m, size_t mapnum, |
2fc5e987 | 661 | void **hostaddrs, size_t *sizes, unsigned short *kinds) |
662 | { | |
663 | int i; | |
664 | ||
665 | for (i = 0; i < mapnum; i++) | |
666 | { | |
667 | unsigned char kind = kinds[i] & 0xff; | |
668 | ||
669 | if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
670 | continue; | |
671 | ||
672 | switch (kind) | |
673 | { | |
674 | case GOMP_MAP_FORCE_ALLOC: | |
2fc5e987 | 675 | case GOMP_MAP_FORCE_FROM: |
676 | case GOMP_MAP_FORCE_TO: | |
677 | case GOMP_MAP_POINTER: | |
737cc978 | 678 | case GOMP_MAP_RELEASE: |
5cb6b0b9 | 679 | case GOMP_MAP_DELETE: |
813421cd | 680 | GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
dffc468e | 681 | &kinds[i], GOMP_ASYNC_SYNC, 0); |
2fc5e987 | 682 | break; |
683 | ||
684 | case GOMP_MAP_FORCE_DEVICEPTR: | |
685 | break; | |
686 | ||
687 | case GOMP_MAP_ALLOC: | |
688 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
813421cd | 689 | GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
dffc468e | 690 | &kinds[i], GOMP_ASYNC_SYNC, 0); |
2fc5e987 | 691 | break; |
692 | ||
693 | case GOMP_MAP_TO: | |
813421cd | 694 | GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
dffc468e | 695 | &kinds[i], GOMP_ASYNC_SYNC, 0); |
2fc5e987 | 696 | |
697 | break; | |
698 | ||
699 | case GOMP_MAP_FROM: | |
813421cd | 700 | GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], |
dffc468e | 701 | &kinds[i], GOMP_ASYNC_SYNC, 0); |
2fc5e987 | 702 | break; |
703 | ||
704 | case GOMP_MAP_FORCE_PRESENT: | |
705 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
706 | gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], | |
707 | (unsigned long) sizes[i]); | |
708 | break; | |
709 | ||
710 | default: | |
711 | assert (0); | |
712 | break; | |
713 | } | |
714 | } | |
715 | } |