]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/oacc-parallel.c
Use plain -fopenacc to enable OpenACC kernels processing
[thirdparty/gcc.git] / libgomp / oacc-parallel.c
CommitLineData
818ab71a 1/* Copyright (C) 2013-2016 Free Software Foundation, Inc.
41dbbb37
TS
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27/* This file handles OpenACC constructs. */
28
29#include "openacc.h"
30#include "libgomp.h"
31#include "libgomp_g.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
01c0b3b0
KT
34#ifdef HAVE_INTTYPES_H
35# include <inttypes.h> /* For PRIu64. */
36#endif
41dbbb37
TS
37#include <string.h>
38#include <stdarg.h>
39#include <assert.h>
41dbbb37
TS
40
41static int
42find_pset (int pos, size_t mapnum, unsigned short *kinds)
43{
44 if (pos + 1 >= mapnum)
45 return 0;
46
47 unsigned char kind = kinds[pos+1] & 0xff;
48
49 return kind == GOMP_MAP_TO_PSET;
50}
51
3e32ee19
NS
52static void goacc_wait (int async, int num_waits, va_list *ap);
53
54
55/* Launch a possibly offloaded function on DEVICE. FN is the host fn
56 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
57 blocks to be copied to/from the device. Varadic arguments are
58 keyed optional parameters terminated with a zero. */
41dbbb37
TS
59
60void
3e32ee19
NS
61GOACC_parallel_keyed (int device, void (*fn) (void *),
62 size_t mapnum, void **hostaddrs, size_t *sizes,
63 unsigned short *kinds, ...)
41dbbb37
TS
64{
65 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
66 va_list ap;
67 struct goacc_thread *thr;
68 struct gomp_device_descr *acc_dev;
69 struct target_mem_desc *tgt;
70 void **devaddrs;
71 unsigned int i;
72 struct splay_tree_key_s k;
73 splay_tree_key tgt_fn_key;
74 void (*tgt_fn);
3e32ee19
NS
75 int async = GOMP_ASYNC_SYNC;
76 unsigned dims[GOMP_DIM_MAX];
77 unsigned tag;
41dbbb37 78
01c0b3b0 79#ifdef HAVE_INTTYPES_H
3e32ee19
NS
80 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
81 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
01c0b3b0 82#else
3e32ee19
NS
83 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
01c0b3b0 85#endif
d93bdab5 86 goacc_lazy_initialize ();
41dbbb37
TS
87
88 thr = goacc_thread ();
89 acc_dev = thr->dev;
90
91 /* Host fallback if "if" clause is false or if the current device is set to
92 the host. */
93 if (host_fallback)
94 {
95 goacc_save_and_set_bind (acc_device_host);
96 fn (hostaddrs);
97 goacc_restore_bind ();
98 return;
99 }
100 else if (acc_device_type (acc_dev->type) == acc_device_host)
101 {
102 fn (hostaddrs);
103 return;
104 }
105
f99c3557
TS
106 /* Default: let the runtime choose. */
107 for (i = 0; i != GOMP_DIM_MAX; i++)
108 dims[i] = 0;
109
3e32ee19
NS
110 va_start (ap, kinds);
111 /* TODO: This will need amending when device_type is implemented. */
112 while ((tag = va_arg (ap, unsigned)) != 0)
a091118d 113 {
3e32ee19
NS
114 if (GOMP_LAUNCH_DEVICE (tag))
115 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116 GOMP_LAUNCH_DEVICE (tag));
117
118 switch (GOMP_LAUNCH_CODE (tag))
119 {
120 case GOMP_LAUNCH_DIM:
121 {
122 unsigned mask = GOMP_LAUNCH_OP (tag);
123
124 for (i = 0; i != GOMP_DIM_MAX; i++)
125 if (mask & GOMP_DIM_MASK (i))
126 dims[i] = va_arg (ap, unsigned);
127 }
128 break;
129
130 case GOMP_LAUNCH_ASYNC:
131 {
132 /* Small constant values are encoded in the operand. */
133 async = GOMP_LAUNCH_OP (tag);
134
135 if (async == GOMP_LAUNCH_OP_MAX)
136 async = va_arg (ap, unsigned);
137 break;
138 }
139
140 case GOMP_LAUNCH_WAIT:
141 {
142 unsigned num_waits = GOMP_LAUNCH_OP (tag);
143
144 if (num_waits)
145 goacc_wait (async, num_waits, &ap);
146 break;
147 }
148
149 default:
150 gomp_fatal ("unrecognized offload code '%d',"
151 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
152 }
a091118d 153 }
3e32ee19 154 va_end (ap);
41dbbb37 155
41dbbb37
TS
156 acc_dev->openacc.async_set_async_func (async);
157
158 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
159 {
160 k.host_start = (uintptr_t) fn;
161 k.host_end = k.host_start + 1;
a51df54e
IV
162 gomp_mutex_lock (&acc_dev->lock);
163 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
164 gomp_mutex_unlock (&acc_dev->lock);
41dbbb37
TS
165
166 if (tgt_fn_key == NULL)
167 gomp_fatal ("target function wasn't mapped");
168
d93bdab5 169 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
41dbbb37
TS
170 }
171 else
172 tgt_fn = (void (*)) fn;
173
174 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
d9a6bd32 175 GOMP_MAP_VARS_OPENACC);
41dbbb37 176
6e36114c 177 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
41dbbb37 178 for (i = 0; i < mapnum; i++)
d9a6bd32
JJ
179 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
180 + tgt->list[i].key->tgt_offset);
41dbbb37 181
5c06742f
NS
182 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
183 async, dims, tgt);
41dbbb37
TS
184
185 /* If running synchronously, unmap immediately. */
186 if (async < acc_async_noval)
187 gomp_unmap_vars (tgt, true);
188 else
189 {
190 gomp_copy_from_async (tgt);
191 acc_dev->openacc.register_async_cleanup_func (tgt);
192 }
193
194 acc_dev->openacc.async_set_async_func (acc_async_sync);
195}
196
3e32ee19
NS
197/* Legacy entry point, only provide host execution. */
198
199void
200GOACC_parallel (int device, void (*fn) (void *),
201 size_t mapnum, void **hostaddrs, size_t *sizes,
202 unsigned short *kinds,
203 int num_gangs, int num_workers, int vector_length,
204 int async, int num_waits, ...)
205{
206 goacc_save_and_set_bind (acc_device_host);
207 fn (hostaddrs);
208 goacc_restore_bind ();
209}
210
41dbbb37 211void
128b26dc 212GOACC_data_start (int device, size_t mapnum,
41dbbb37
TS
213 void **hostaddrs, size_t *sizes, unsigned short *kinds)
214{
215 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
216 struct target_mem_desc *tgt;
217
01c0b3b0
KT
218#ifdef HAVE_INTTYPES_H
219 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
220 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
221#else
222 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
223 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
224#endif
41dbbb37 225
d93bdab5 226 goacc_lazy_initialize ();
41dbbb37
TS
227
228 struct goacc_thread *thr = goacc_thread ();
229 struct gomp_device_descr *acc_dev = thr->dev;
230
231 /* Host fallback or 'do nothing'. */
232 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
233 || host_fallback)
234 {
d9a6bd32
JJ
235 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
236 GOMP_MAP_VARS_OPENACC);
41dbbb37
TS
237 tgt->prev = thr->mapped_data;
238 thr->mapped_data = tgt;
239
240 return;
241 }
242
243 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
244 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
d9a6bd32 245 GOMP_MAP_VARS_OPENACC);
41dbbb37
TS
246 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
247 tgt->prev = thr->mapped_data;
248 thr->mapped_data = tgt;
249}
250
251void
252GOACC_data_end (void)
253{
254 struct goacc_thread *thr = goacc_thread ();
255 struct target_mem_desc *tgt = thr->mapped_data;
256
257 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
258 thr->mapped_data = tgt->prev;
259 gomp_unmap_vars (tgt, true);
260 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
261}
262
263void
128b26dc 264GOACC_enter_exit_data (int device, size_t mapnum,
41dbbb37
TS
265 void **hostaddrs, size_t *sizes, unsigned short *kinds,
266 int async, int num_waits, ...)
267{
268 struct goacc_thread *thr;
269 struct gomp_device_descr *acc_dev;
270 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
271 bool data_enter = false;
272 size_t i;
273
d93bdab5 274 goacc_lazy_initialize ();
41dbbb37
TS
275
276 thr = goacc_thread ();
277 acc_dev = thr->dev;
278
279 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
280 || host_fallback)
281 return;
282
a091118d 283 if (num_waits)
41dbbb37
TS
284 {
285 va_list ap;
286
287 va_start (ap, num_waits);
3e32ee19 288 goacc_wait (async, num_waits, &ap);
41dbbb37
TS
289 va_end (ap);
290 }
291
292 acc_dev->openacc.async_set_async_func (async);
293
294 /* Determine if this is an "acc enter data". */
295 for (i = 0; i < mapnum; ++i)
296 {
297 unsigned char kind = kinds[i] & 0xff;
298
299 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
300 continue;
301
302 if (kind == GOMP_MAP_FORCE_ALLOC
303 || kind == GOMP_MAP_FORCE_PRESENT
304 || kind == GOMP_MAP_FORCE_TO)
305 {
306 data_enter = true;
307 break;
308 }
309
310 if (kind == GOMP_MAP_FORCE_DEALLOC
311 || kind == GOMP_MAP_FORCE_FROM)
312 break;
313
314 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
315 kind);
316 }
317
318 if (data_enter)
319 {
320 for (i = 0; i < mapnum; i++)
321 {
322 unsigned char kind = kinds[i] & 0xff;
323
324 /* Scan for PSETs. */
325 int psets = find_pset (i, mapnum, kinds);
326
327 if (!psets)
328 {
329 switch (kind)
330 {
331 case GOMP_MAP_POINTER:
332 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
333 &kinds[i]);
334 break;
335 case GOMP_MAP_FORCE_ALLOC:
336 acc_create (hostaddrs[i], sizes[i]);
337 break;
338 case GOMP_MAP_FORCE_PRESENT:
339 acc_present_or_copyin (hostaddrs[i], sizes[i]);
340 break;
341 case GOMP_MAP_FORCE_TO:
342 acc_present_or_copyin (hostaddrs[i], sizes[i]);
343 break;
344 default:
345 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
346 kind);
347 break;
348 }
349 }
350 else
351 {
352 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
353 /* Increment 'i' by two because OpenACC requires fortran
354 arrays to be contiguous, so each PSET is associated with
355 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
356 one MAP_POINTER. */
357 i += 2;
358 }
359 }
360 }
361 else
362 for (i = 0; i < mapnum; ++i)
363 {
364 unsigned char kind = kinds[i] & 0xff;
365
366 int psets = find_pset (i, mapnum, kinds);
367
368 if (!psets)
369 {
370 switch (kind)
371 {
372 case GOMP_MAP_POINTER:
373 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
374 == GOMP_MAP_FORCE_FROM,
375 async, 1);
376 break;
377 case GOMP_MAP_FORCE_DEALLOC:
378 acc_delete (hostaddrs[i], sizes[i]);
379 break;
380 case GOMP_MAP_FORCE_FROM:
381 acc_copyout (hostaddrs[i], sizes[i]);
382 break;
383 default:
384 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
385 kind);
386 break;
387 }
388 }
389 else
390 {
391 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
392 == GOMP_MAP_FORCE_FROM, async, 3);
393 /* See the above comment. */
394 i += 2;
395 }
396 }
397
398 acc_dev->openacc.async_set_async_func (acc_async_sync);
399}
400
401static void
3e32ee19 402goacc_wait (int async, int num_waits, va_list *ap)
41dbbb37
TS
403{
404 struct goacc_thread *thr = goacc_thread ();
405 struct gomp_device_descr *acc_dev = thr->dev;
41dbbb37 406
a091118d 407 while (num_waits--)
41dbbb37 408 {
3e32ee19
NS
409 int qid = va_arg (*ap, int);
410
41dbbb37
TS
411 if (acc_async_test (qid))
412 continue;
413
a091118d
NS
414 if (async == acc_async_sync)
415 acc_wait (qid);
416 else if (qid == async)
417 ;/* If we're waiting on the same asynchronous queue as we're
418 launching on, the queue itself will order work as
419 required, so there's no need to wait explicitly. */
420 else
41dbbb37
TS
421 acc_dev->openacc.async_wait_async_func (qid, async);
422 }
423}
424
425void
128b26dc 426GOACC_update (int device, size_t mapnum,
41dbbb37
TS
427 void **hostaddrs, size_t *sizes, unsigned short *kinds,
428 int async, int num_waits, ...)
429{
430 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
431 size_t i;
432
d93bdab5 433 goacc_lazy_initialize ();
41dbbb37
TS
434
435 struct goacc_thread *thr = goacc_thread ();
436 struct gomp_device_descr *acc_dev = thr->dev;
437
438 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
439 || host_fallback)
440 return;
441
a091118d 442 if (num_waits)
41dbbb37
TS
443 {
444 va_list ap;
445
446 va_start (ap, num_waits);
3e32ee19 447 goacc_wait (async, num_waits, &ap);
41dbbb37
TS
448 va_end (ap);
449 }
450
451 acc_dev->openacc.async_set_async_func (async);
452
453 for (i = 0; i < mapnum; ++i)
454 {
455 unsigned char kind = kinds[i] & 0xff;
456
457 switch (kind)
458 {
459 case GOMP_MAP_POINTER:
460 case GOMP_MAP_TO_PSET:
461 break;
462
463 case GOMP_MAP_FORCE_TO:
464 acc_update_device (hostaddrs[i], sizes[i]);
465 break;
466
467 case GOMP_MAP_FORCE_FROM:
468 acc_update_self (hostaddrs[i], sizes[i]);
469 break;
470
471 default:
472 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
473 break;
474 }
475 }
476
477 acc_dev->openacc.async_set_async_func (acc_async_sync);
478}
479
480void
481GOACC_wait (int async, int num_waits, ...)
482{
a091118d
NS
483 if (num_waits)
484 {
485 va_list ap;
41dbbb37 486
a091118d 487 va_start (ap, num_waits);
3e32ee19 488 goacc_wait (async, num_waits, &ap);
a091118d
NS
489 va_end (ap);
490 }
491 else if (async == acc_async_sync)
492 acc_wait_all ();
493 else if (async == acc_async_noval)
a051317b 494 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
41dbbb37
TS
495}
496
497int
498GOACC_get_num_threads (void)
499{
500 return 1;
501}
502
503int
504GOACC_get_thread_num (void)
505{
506 return 0;
507}
6e232ba4
JN
508
509void
510GOACC_declare (int device, size_t mapnum,
511 void **hostaddrs, size_t *sizes, unsigned short *kinds)
512{
513 int i;
514
515 for (i = 0; i < mapnum; i++)
516 {
517 unsigned char kind = kinds[i] & 0xff;
518
519 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
520 continue;
521
522 switch (kind)
523 {
524 case GOMP_MAP_FORCE_ALLOC:
525 case GOMP_MAP_FORCE_DEALLOC:
526 case GOMP_MAP_FORCE_FROM:
527 case GOMP_MAP_FORCE_TO:
528 case GOMP_MAP_POINTER:
529 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
530 &kinds[i], 0, 0);
531 break;
532
533 case GOMP_MAP_FORCE_DEVICEPTR:
534 break;
535
536 case GOMP_MAP_ALLOC:
537 if (!acc_is_present (hostaddrs[i], sizes[i]))
538 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
539 &kinds[i], 0, 0);
540 break;
541
542 case GOMP_MAP_TO:
543 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
544 &kinds[i], 0, 0);
545
546 break;
547
548 case GOMP_MAP_FROM:
549 kinds[i] = GOMP_MAP_FORCE_FROM;
550 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
551 &kinds[i], 0, 0);
552 break;
553
554 case GOMP_MAP_FORCE_PRESENT:
555 if (!acc_is_present (hostaddrs[i], sizes[i]))
556 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
557 (unsigned long) sizes[i]);
558 break;
559
560 default:
561 assert (0);
562 break;
563 }
564 }
565}