]>
Commit | Line | Data |
---|---|---|
41dbbb37 TS |
1 | /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
2 | ||
3 | Contributed by Mentor Embedded. | |
4 | ||
5 | This file is part of the GNU Offloading and Multi Processing Library | |
6 | (libgomp). | |
7 | ||
8 | Libgomp is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 | more details. | |
17 | ||
18 | Under Section 7 of GPL version 3, you are granted additional | |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
26 | ||
27 | /* This file handles OpenACC constructs. */ | |
28 | ||
29 | #include "openacc.h" | |
30 | #include "libgomp.h" | |
31 | #include "libgomp_g.h" | |
32 | #include "gomp-constants.h" | |
33 | #include "oacc-int.h" | |
34 | #include <string.h> | |
35 | #include <stdarg.h> | |
36 | #include <assert.h> | |
37 | #include <alloca.h> | |
38 | ||
39 | static int | |
40 | find_pset (int pos, size_t mapnum, unsigned short *kinds) | |
41 | { | |
42 | if (pos + 1 >= mapnum) | |
43 | return 0; | |
44 | ||
45 | unsigned char kind = kinds[pos+1] & 0xff; | |
46 | ||
47 | return kind == GOMP_MAP_TO_PSET; | |
48 | } | |
49 | ||
50 | ||
51 | /* Ensure that the target device for DEVICE_TYPE is initialised (and that | |
52 | plugins have been loaded if appropriate). The ACC_dev variable for the | |
53 | current thread will be set appropriately for the given device type on | |
54 | return. */ | |
55 | ||
56 | attribute_hidden void | |
57 | select_acc_device (int device_type) | |
58 | { | |
59 | goacc_lazy_initialize (); | |
60 | ||
61 | if (device_type == GOMP_DEVICE_HOST_FALLBACK) | |
62 | return; | |
63 | ||
64 | if (device_type == acc_device_none) | |
65 | device_type = acc_device_host; | |
66 | ||
67 | if (device_type >= 0) | |
68 | { | |
69 | /* NOTE: this will go badly if the surrounding data environment is set up | |
70 | to use a different device type. We'll just have to trust that users | |
71 | know what they're doing... */ | |
72 | acc_set_device_type (device_type); | |
73 | } | |
74 | } | |
75 | ||
76 | static void goacc_wait (int async, int num_waits, va_list ap); | |
77 | ||
78 | void | |
79 | GOACC_parallel (int device, void (*fn) (void *), const void *offload_table, | |
80 | size_t mapnum, void **hostaddrs, size_t *sizes, | |
81 | unsigned short *kinds, | |
82 | int num_gangs, int num_workers, int vector_length, | |
83 | int async, int num_waits, ...) | |
84 | { | |
85 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
86 | va_list ap; | |
87 | struct goacc_thread *thr; | |
88 | struct gomp_device_descr *acc_dev; | |
89 | struct target_mem_desc *tgt; | |
90 | void **devaddrs; | |
91 | unsigned int i; | |
92 | struct splay_tree_key_s k; | |
93 | splay_tree_key tgt_fn_key; | |
94 | void (*tgt_fn); | |
95 | ||
96 | if (num_gangs != 1) | |
97 | gomp_fatal ("num_gangs (%d) different from one is not yet supported", | |
98 | num_gangs); | |
99 | if (num_workers != 1) | |
100 | gomp_fatal ("num_workers (%d) different from one is not yet supported", | |
101 | num_workers); | |
102 | ||
103 | gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n", | |
104 | __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async); | |
105 | ||
106 | select_acc_device (device); | |
107 | ||
108 | thr = goacc_thread (); | |
109 | acc_dev = thr->dev; | |
110 | ||
111 | /* Host fallback if "if" clause is false or if the current device is set to | |
112 | the host. */ | |
113 | if (host_fallback) | |
114 | { | |
115 | goacc_save_and_set_bind (acc_device_host); | |
116 | fn (hostaddrs); | |
117 | goacc_restore_bind (); | |
118 | return; | |
119 | } | |
120 | else if (acc_device_type (acc_dev->type) == acc_device_host) | |
121 | { | |
122 | fn (hostaddrs); | |
123 | return; | |
124 | } | |
125 | ||
126 | va_start (ap, num_waits); | |
127 | ||
128 | if (num_waits > 0) | |
129 | goacc_wait (async, num_waits, ap); | |
130 | ||
131 | va_end (ap); | |
132 | ||
133 | acc_dev->openacc.async_set_async_func (async); | |
134 | ||
135 | if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) | |
136 | { | |
137 | k.host_start = (uintptr_t) fn; | |
138 | k.host_end = k.host_start + 1; | |
139 | gomp_mutex_lock (&acc_dev->mem_map.lock); | |
140 | tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k); | |
141 | gomp_mutex_unlock (&acc_dev->mem_map.lock); | |
142 | ||
143 | if (tgt_fn_key == NULL) | |
144 | gomp_fatal ("target function wasn't mapped"); | |
145 | ||
146 | tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start; | |
147 | } | |
148 | else | |
149 | tgt_fn = (void (*)) fn; | |
150 | ||
151 | tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
152 | false); | |
153 | ||
154 | devaddrs = alloca (sizeof (void *) * mapnum); | |
155 | for (i = 0; i < mapnum; i++) | |
156 | devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start | |
157 | + tgt->list[i]->tgt_offset); | |
158 | ||
159 | acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, | |
160 | num_gangs, num_workers, vector_length, async, | |
161 | tgt); | |
162 | ||
163 | /* If running synchronously, unmap immediately. */ | |
164 | if (async < acc_async_noval) | |
165 | gomp_unmap_vars (tgt, true); | |
166 | else | |
167 | { | |
168 | gomp_copy_from_async (tgt); | |
169 | acc_dev->openacc.register_async_cleanup_func (tgt); | |
170 | } | |
171 | ||
172 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
173 | } | |
174 | ||
175 | void | |
176 | GOACC_data_start (int device, const void *offload_table, size_t mapnum, | |
177 | void **hostaddrs, size_t *sizes, unsigned short *kinds) | |
178 | { | |
179 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
180 | struct target_mem_desc *tgt; | |
181 | ||
182 | gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
183 | __FUNCTION__, mapnum, hostaddrs, sizes, kinds); | |
184 | ||
185 | select_acc_device (device); | |
186 | ||
187 | struct goacc_thread *thr = goacc_thread (); | |
188 | struct gomp_device_descr *acc_dev = thr->dev; | |
189 | ||
190 | /* Host fallback or 'do nothing'. */ | |
191 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
192 | || host_fallback) | |
193 | { | |
194 | tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); | |
195 | tgt->prev = thr->mapped_data; | |
196 | thr->mapped_data = tgt; | |
197 | ||
198 | return; | |
199 | } | |
200 | ||
201 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
202 | tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
203 | false); | |
204 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); | |
205 | tgt->prev = thr->mapped_data; | |
206 | thr->mapped_data = tgt; | |
207 | } | |
208 | ||
209 | void | |
210 | GOACC_data_end (void) | |
211 | { | |
212 | struct goacc_thread *thr = goacc_thread (); | |
213 | struct target_mem_desc *tgt = thr->mapped_data; | |
214 | ||
215 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
216 | thr->mapped_data = tgt->prev; | |
217 | gomp_unmap_vars (tgt, true); | |
218 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
219 | } | |
220 | ||
221 | void | |
222 | GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum, | |
223 | void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
224 | int async, int num_waits, ...) | |
225 | { | |
226 | struct goacc_thread *thr; | |
227 | struct gomp_device_descr *acc_dev; | |
228 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
229 | bool data_enter = false; | |
230 | size_t i; | |
231 | ||
232 | select_acc_device (device); | |
233 | ||
234 | thr = goacc_thread (); | |
235 | acc_dev = thr->dev; | |
236 | ||
237 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
238 | || host_fallback) | |
239 | return; | |
240 | ||
241 | if (num_waits > 0) | |
242 | { | |
243 | va_list ap; | |
244 | ||
245 | va_start (ap, num_waits); | |
246 | ||
247 | goacc_wait (async, num_waits, ap); | |
248 | ||
249 | va_end (ap); | |
250 | } | |
251 | ||
252 | acc_dev->openacc.async_set_async_func (async); | |
253 | ||
254 | /* Determine if this is an "acc enter data". */ | |
255 | for (i = 0; i < mapnum; ++i) | |
256 | { | |
257 | unsigned char kind = kinds[i] & 0xff; | |
258 | ||
259 | if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
260 | continue; | |
261 | ||
262 | if (kind == GOMP_MAP_FORCE_ALLOC | |
263 | || kind == GOMP_MAP_FORCE_PRESENT | |
264 | || kind == GOMP_MAP_FORCE_TO) | |
265 | { | |
266 | data_enter = true; | |
267 | break; | |
268 | } | |
269 | ||
270 | if (kind == GOMP_MAP_FORCE_DEALLOC | |
271 | || kind == GOMP_MAP_FORCE_FROM) | |
272 | break; | |
273 | ||
274 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
275 | kind); | |
276 | } | |
277 | ||
278 | if (data_enter) | |
279 | { | |
280 | for (i = 0; i < mapnum; i++) | |
281 | { | |
282 | unsigned char kind = kinds[i] & 0xff; | |
283 | ||
284 | /* Scan for PSETs. */ | |
285 | int psets = find_pset (i, mapnum, kinds); | |
286 | ||
287 | if (!psets) | |
288 | { | |
289 | switch (kind) | |
290 | { | |
291 | case GOMP_MAP_POINTER: | |
292 | gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], | |
293 | &kinds[i]); | |
294 | break; | |
295 | case GOMP_MAP_FORCE_ALLOC: | |
296 | acc_create (hostaddrs[i], sizes[i]); | |
297 | break; | |
298 | case GOMP_MAP_FORCE_PRESENT: | |
299 | acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
300 | break; | |
301 | case GOMP_MAP_FORCE_TO: | |
302 | acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
303 | break; | |
304 | default: | |
305 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
306 | kind); | |
307 | break; | |
308 | } | |
309 | } | |
310 | else | |
311 | { | |
312 | gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); | |
313 | /* Increment 'i' by two because OpenACC requires fortran | |
314 | arrays to be contiguous, so each PSET is associated with | |
315 | one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and | |
316 | one MAP_POINTER. */ | |
317 | i += 2; | |
318 | } | |
319 | } | |
320 | } | |
321 | else | |
322 | for (i = 0; i < mapnum; ++i) | |
323 | { | |
324 | unsigned char kind = kinds[i] & 0xff; | |
325 | ||
326 | int psets = find_pset (i, mapnum, kinds); | |
327 | ||
328 | if (!psets) | |
329 | { | |
330 | switch (kind) | |
331 | { | |
332 | case GOMP_MAP_POINTER: | |
333 | gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) | |
334 | == GOMP_MAP_FORCE_FROM, | |
335 | async, 1); | |
336 | break; | |
337 | case GOMP_MAP_FORCE_DEALLOC: | |
338 | acc_delete (hostaddrs[i], sizes[i]); | |
339 | break; | |
340 | case GOMP_MAP_FORCE_FROM: | |
341 | acc_copyout (hostaddrs[i], sizes[i]); | |
342 | break; | |
343 | default: | |
344 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
345 | kind); | |
346 | break; | |
347 | } | |
348 | } | |
349 | else | |
350 | { | |
351 | gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) | |
352 | == GOMP_MAP_FORCE_FROM, async, 3); | |
353 | /* See the above comment. */ | |
354 | i += 2; | |
355 | } | |
356 | } | |
357 | ||
358 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
359 | } | |
360 | ||
361 | static void | |
362 | goacc_wait (int async, int num_waits, va_list ap) | |
363 | { | |
364 | struct goacc_thread *thr = goacc_thread (); | |
365 | struct gomp_device_descr *acc_dev = thr->dev; | |
366 | int i; | |
367 | ||
368 | assert (num_waits >= 0); | |
369 | ||
370 | if (async == acc_async_sync && num_waits == 0) | |
371 | { | |
372 | acc_wait_all (); | |
373 | return; | |
374 | } | |
375 | ||
376 | if (async == acc_async_sync && num_waits) | |
377 | { | |
378 | for (i = 0; i < num_waits; i++) | |
379 | { | |
380 | int qid = va_arg (ap, int); | |
381 | ||
382 | if (acc_async_test (qid)) | |
383 | continue; | |
384 | ||
385 | acc_wait (qid); | |
386 | } | |
387 | return; | |
388 | } | |
389 | ||
390 | if (async == acc_async_noval && num_waits == 0) | |
391 | { | |
392 | acc_dev->openacc.async_wait_all_async_func (acc_async_noval); | |
393 | return; | |
394 | } | |
395 | ||
396 | for (i = 0; i < num_waits; i++) | |
397 | { | |
398 | int qid = va_arg (ap, int); | |
399 | ||
400 | if (acc_async_test (qid)) | |
401 | continue; | |
402 | ||
403 | /* If we're waiting on the same asynchronous queue as we're launching on, | |
404 | the queue itself will order work as required, so there's no need to | |
405 | wait explicitly. */ | |
406 | if (qid != async) | |
407 | acc_dev->openacc.async_wait_async_func (qid, async); | |
408 | } | |
409 | } | |
410 | ||
411 | void | |
412 | GOACC_update (int device, const void *offload_table, size_t mapnum, | |
413 | void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
414 | int async, int num_waits, ...) | |
415 | { | |
416 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
417 | size_t i; | |
418 | ||
419 | select_acc_device (device); | |
420 | ||
421 | struct goacc_thread *thr = goacc_thread (); | |
422 | struct gomp_device_descr *acc_dev = thr->dev; | |
423 | ||
424 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
425 | || host_fallback) | |
426 | return; | |
427 | ||
428 | if (num_waits > 0) | |
429 | { | |
430 | va_list ap; | |
431 | ||
432 | va_start (ap, num_waits); | |
433 | ||
434 | goacc_wait (async, num_waits, ap); | |
435 | ||
436 | va_end (ap); | |
437 | } | |
438 | ||
439 | acc_dev->openacc.async_set_async_func (async); | |
440 | ||
441 | for (i = 0; i < mapnum; ++i) | |
442 | { | |
443 | unsigned char kind = kinds[i] & 0xff; | |
444 | ||
445 | switch (kind) | |
446 | { | |
447 | case GOMP_MAP_POINTER: | |
448 | case GOMP_MAP_TO_PSET: | |
449 | break; | |
450 | ||
451 | case GOMP_MAP_FORCE_TO: | |
452 | acc_update_device (hostaddrs[i], sizes[i]); | |
453 | break; | |
454 | ||
455 | case GOMP_MAP_FORCE_FROM: | |
456 | acc_update_self (hostaddrs[i], sizes[i]); | |
457 | break; | |
458 | ||
459 | default: | |
460 | gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | |
461 | break; | |
462 | } | |
463 | } | |
464 | ||
465 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
466 | } | |
467 | ||
468 | void | |
469 | GOACC_wait (int async, int num_waits, ...) | |
470 | { | |
471 | va_list ap; | |
472 | ||
473 | va_start (ap, num_waits); | |
474 | ||
475 | goacc_wait (async, num_waits, ap); | |
476 | ||
477 | va_end (ap); | |
478 | } | |
479 | ||
480 | int | |
481 | GOACC_get_num_threads (void) | |
482 | { | |
483 | return 1; | |
484 | } | |
485 | ||
486 | int | |
487 | GOACC_get_thread_num (void) | |
488 | { | |
489 | return 0; | |
490 | } |