]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/oacc-parallel.c
libgomp: rework initialization of offloading
[thirdparty/gcc.git] / libgomp / oacc-parallel.c
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 /* This file handles OpenACC constructs. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40
41 static int
42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
43 {
44 if (pos + 1 >= mapnum)
45 return 0;
46
47 unsigned char kind = kinds[pos+1] & 0xff;
48
49 return kind == GOMP_MAP_TO_PSET;
50 }
51
52
53 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
54 plugins have been loaded if appropriate). The ACC_dev variable for the
55 current thread will be set appropriately for the given device type on
56 return. */
57
58 attribute_hidden void
59 select_acc_device (int device_type)
60 {
61 goacc_lazy_initialize ();
62
63 if (device_type == GOMP_DEVICE_HOST_FALLBACK)
64 return;
65
66 if (device_type == acc_device_none)
67 device_type = acc_device_host;
68
69 if (device_type >= 0)
70 {
71 /* NOTE: this will go badly if the surrounding data environment is set up
72 to use a different device type. We'll just have to trust that users
73 know what they're doing... */
74 acc_set_device_type (device_type);
75 }
76 }
77
78 static void goacc_wait (int async, int num_waits, va_list ap);
79
80 void
81 GOACC_parallel (int device, void (*fn) (void *),
82 size_t mapnum, void **hostaddrs, size_t *sizes,
83 unsigned short *kinds,
84 int num_gangs, int num_workers, int vector_length,
85 int async, int num_waits, ...)
86 {
87 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
88 va_list ap;
89 struct goacc_thread *thr;
90 struct gomp_device_descr *acc_dev;
91 struct target_mem_desc *tgt;
92 void **devaddrs;
93 unsigned int i;
94 struct splay_tree_key_s k;
95 splay_tree_key tgt_fn_key;
96 void (*tgt_fn);
97
98 if (num_gangs != 1)
99 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
100 num_gangs);
101 if (num_workers != 1)
102 gomp_fatal ("num_workers (%d) different from one is not yet supported",
103 num_workers);
104
105 #ifdef HAVE_INTTYPES_H
106 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
107 "async = %d\n",
108 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
109 #else
110 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
111 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
112 async);
113 #endif
114 select_acc_device (device);
115
116 thr = goacc_thread ();
117 acc_dev = thr->dev;
118
119 /* Host fallback if "if" clause is false or if the current device is set to
120 the host. */
121 if (host_fallback)
122 {
123 goacc_save_and_set_bind (acc_device_host);
124 fn (hostaddrs);
125 goacc_restore_bind ();
126 return;
127 }
128 else if (acc_device_type (acc_dev->type) == acc_device_host)
129 {
130 fn (hostaddrs);
131 return;
132 }
133
134 va_start (ap, num_waits);
135
136 if (num_waits > 0)
137 goacc_wait (async, num_waits, ap);
138
139 va_end (ap);
140
141 acc_dev->openacc.async_set_async_func (async);
142
143 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
144 {
145 k.host_start = (uintptr_t) fn;
146 k.host_end = k.host_start + 1;
147 gomp_mutex_lock (&acc_dev->lock);
148 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
149 gomp_mutex_unlock (&acc_dev->lock);
150
151 if (tgt_fn_key == NULL)
152 gomp_fatal ("target function wasn't mapped");
153
154 tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
155 }
156 else
157 tgt_fn = (void (*)) fn;
158
159 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
160 false);
161
162 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
163 for (i = 0; i < mapnum; i++)
164 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
165 + tgt->list[i]->tgt_offset);
166
167 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
168 num_gangs, num_workers, vector_length, async,
169 tgt);
170
171 /* If running synchronously, unmap immediately. */
172 if (async < acc_async_noval)
173 gomp_unmap_vars (tgt, true);
174 else
175 {
176 gomp_copy_from_async (tgt);
177 acc_dev->openacc.register_async_cleanup_func (tgt);
178 }
179
180 acc_dev->openacc.async_set_async_func (acc_async_sync);
181 }
182
183 void
184 GOACC_data_start (int device, size_t mapnum,
185 void **hostaddrs, size_t *sizes, unsigned short *kinds)
186 {
187 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
188 struct target_mem_desc *tgt;
189
190 #ifdef HAVE_INTTYPES_H
191 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
192 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
193 #else
194 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
195 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
196 #endif
197
198 select_acc_device (device);
199
200 struct goacc_thread *thr = goacc_thread ();
201 struct gomp_device_descr *acc_dev = thr->dev;
202
203 /* Host fallback or 'do nothing'. */
204 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
205 || host_fallback)
206 {
207 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
208 tgt->prev = thr->mapped_data;
209 thr->mapped_data = tgt;
210
211 return;
212 }
213
214 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
215 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
216 false);
217 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
218 tgt->prev = thr->mapped_data;
219 thr->mapped_data = tgt;
220 }
221
222 void
223 GOACC_data_end (void)
224 {
225 struct goacc_thread *thr = goacc_thread ();
226 struct target_mem_desc *tgt = thr->mapped_data;
227
228 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
229 thr->mapped_data = tgt->prev;
230 gomp_unmap_vars (tgt, true);
231 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
232 }
233
234 void
235 GOACC_enter_exit_data (int device, size_t mapnum,
236 void **hostaddrs, size_t *sizes, unsigned short *kinds,
237 int async, int num_waits, ...)
238 {
239 struct goacc_thread *thr;
240 struct gomp_device_descr *acc_dev;
241 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
242 bool data_enter = false;
243 size_t i;
244
245 select_acc_device (device);
246
247 thr = goacc_thread ();
248 acc_dev = thr->dev;
249
250 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
251 || host_fallback)
252 return;
253
254 if (num_waits > 0)
255 {
256 va_list ap;
257
258 va_start (ap, num_waits);
259
260 goacc_wait (async, num_waits, ap);
261
262 va_end (ap);
263 }
264
265 acc_dev->openacc.async_set_async_func (async);
266
267 /* Determine if this is an "acc enter data". */
268 for (i = 0; i < mapnum; ++i)
269 {
270 unsigned char kind = kinds[i] & 0xff;
271
272 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
273 continue;
274
275 if (kind == GOMP_MAP_FORCE_ALLOC
276 || kind == GOMP_MAP_FORCE_PRESENT
277 || kind == GOMP_MAP_FORCE_TO)
278 {
279 data_enter = true;
280 break;
281 }
282
283 if (kind == GOMP_MAP_FORCE_DEALLOC
284 || kind == GOMP_MAP_FORCE_FROM)
285 break;
286
287 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
288 kind);
289 }
290
291 if (data_enter)
292 {
293 for (i = 0; i < mapnum; i++)
294 {
295 unsigned char kind = kinds[i] & 0xff;
296
297 /* Scan for PSETs. */
298 int psets = find_pset (i, mapnum, kinds);
299
300 if (!psets)
301 {
302 switch (kind)
303 {
304 case GOMP_MAP_POINTER:
305 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
306 &kinds[i]);
307 break;
308 case GOMP_MAP_FORCE_ALLOC:
309 acc_create (hostaddrs[i], sizes[i]);
310 break;
311 case GOMP_MAP_FORCE_PRESENT:
312 acc_present_or_copyin (hostaddrs[i], sizes[i]);
313 break;
314 case GOMP_MAP_FORCE_TO:
315 acc_present_or_copyin (hostaddrs[i], sizes[i]);
316 break;
317 default:
318 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
319 kind);
320 break;
321 }
322 }
323 else
324 {
325 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
326 /* Increment 'i' by two because OpenACC requires fortran
327 arrays to be contiguous, so each PSET is associated with
328 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
329 one MAP_POINTER. */
330 i += 2;
331 }
332 }
333 }
334 else
335 for (i = 0; i < mapnum; ++i)
336 {
337 unsigned char kind = kinds[i] & 0xff;
338
339 int psets = find_pset (i, mapnum, kinds);
340
341 if (!psets)
342 {
343 switch (kind)
344 {
345 case GOMP_MAP_POINTER:
346 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
347 == GOMP_MAP_FORCE_FROM,
348 async, 1);
349 break;
350 case GOMP_MAP_FORCE_DEALLOC:
351 acc_delete (hostaddrs[i], sizes[i]);
352 break;
353 case GOMP_MAP_FORCE_FROM:
354 acc_copyout (hostaddrs[i], sizes[i]);
355 break;
356 default:
357 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
358 kind);
359 break;
360 }
361 }
362 else
363 {
364 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
365 == GOMP_MAP_FORCE_FROM, async, 3);
366 /* See the above comment. */
367 i += 2;
368 }
369 }
370
371 acc_dev->openacc.async_set_async_func (acc_async_sync);
372 }
373
374 static void
375 goacc_wait (int async, int num_waits, va_list ap)
376 {
377 struct goacc_thread *thr = goacc_thread ();
378 struct gomp_device_descr *acc_dev = thr->dev;
379 int i;
380
381 assert (num_waits >= 0);
382
383 if (async == acc_async_sync && num_waits == 0)
384 {
385 acc_wait_all ();
386 return;
387 }
388
389 if (async == acc_async_sync && num_waits)
390 {
391 for (i = 0; i < num_waits; i++)
392 {
393 int qid = va_arg (ap, int);
394
395 if (acc_async_test (qid))
396 continue;
397
398 acc_wait (qid);
399 }
400 return;
401 }
402
403 if (async == acc_async_noval && num_waits == 0)
404 {
405 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
406 return;
407 }
408
409 for (i = 0; i < num_waits; i++)
410 {
411 int qid = va_arg (ap, int);
412
413 if (acc_async_test (qid))
414 continue;
415
416 /* If we're waiting on the same asynchronous queue as we're launching on,
417 the queue itself will order work as required, so there's no need to
418 wait explicitly. */
419 if (qid != async)
420 acc_dev->openacc.async_wait_async_func (qid, async);
421 }
422 }
423
424 void
425 GOACC_update (int device, size_t mapnum,
426 void **hostaddrs, size_t *sizes, unsigned short *kinds,
427 int async, int num_waits, ...)
428 {
429 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
430 size_t i;
431
432 select_acc_device (device);
433
434 struct goacc_thread *thr = goacc_thread ();
435 struct gomp_device_descr *acc_dev = thr->dev;
436
437 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
438 || host_fallback)
439 return;
440
441 if (num_waits > 0)
442 {
443 va_list ap;
444
445 va_start (ap, num_waits);
446
447 goacc_wait (async, num_waits, ap);
448
449 va_end (ap);
450 }
451
452 acc_dev->openacc.async_set_async_func (async);
453
454 for (i = 0; i < mapnum; ++i)
455 {
456 unsigned char kind = kinds[i] & 0xff;
457
458 switch (kind)
459 {
460 case GOMP_MAP_POINTER:
461 case GOMP_MAP_TO_PSET:
462 break;
463
464 case GOMP_MAP_FORCE_TO:
465 acc_update_device (hostaddrs[i], sizes[i]);
466 break;
467
468 case GOMP_MAP_FORCE_FROM:
469 acc_update_self (hostaddrs[i], sizes[i]);
470 break;
471
472 default:
473 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
474 break;
475 }
476 }
477
478 acc_dev->openacc.async_set_async_func (acc_async_sync);
479 }
480
481 void
482 GOACC_wait (int async, int num_waits, ...)
483 {
484 va_list ap;
485
486 va_start (ap, num_waits);
487
488 goacc_wait (async, num_waits, ap);
489
490 va_end (ap);
491 }
492
493 int
494 GOACC_get_num_threads (void)
495 {
496 return 1;
497 }
498
499 int
500 GOACC_get_thread_num (void)
501 {
502 return 0;
503 }