]>
Commit | Line | Data |
---|---|---|
41dbbb37 TS |
1 | /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
2 | ||
3 | Contributed by Mentor Embedded. | |
4 | ||
5 | This file is part of the GNU Offloading and Multi Processing Library | |
6 | (libgomp). | |
7 | ||
8 | Libgomp is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 | more details. | |
17 | ||
18 | Under Section 7 of GPL version 3, you are granted additional | |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
26 | ||
27 | /* This file handles OpenACC constructs. */ | |
28 | ||
29 | #include "openacc.h" | |
30 | #include "libgomp.h" | |
31 | #include "libgomp_g.h" | |
32 | #include "gomp-constants.h" | |
33 | #include "oacc-int.h" | |
01c0b3b0 KT |
34 | #ifdef HAVE_INTTYPES_H |
35 | # include <inttypes.h> /* For PRIu64. */ | |
36 | #endif | |
41dbbb37 TS |
37 | #include <string.h> |
38 | #include <stdarg.h> | |
39 | #include <assert.h> | |
41dbbb37 TS |
40 | |
41 | static int | |
42 | find_pset (int pos, size_t mapnum, unsigned short *kinds) | |
43 | { | |
44 | if (pos + 1 >= mapnum) | |
45 | return 0; | |
46 | ||
47 | unsigned char kind = kinds[pos+1] & 0xff; | |
48 | ||
49 | return kind == GOMP_MAP_TO_PSET; | |
50 | } | |
51 | ||
41dbbb37 TS |
52 | static void goacc_wait (int async, int num_waits, va_list ap); |
53 | ||
54 | void | |
128b26dc | 55 | GOACC_parallel (int device, void (*fn) (void *), |
41dbbb37 TS |
56 | size_t mapnum, void **hostaddrs, size_t *sizes, |
57 | unsigned short *kinds, | |
58 | int num_gangs, int num_workers, int vector_length, | |
59 | int async, int num_waits, ...) | |
60 | { | |
61 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
62 | va_list ap; | |
63 | struct goacc_thread *thr; | |
64 | struct gomp_device_descr *acc_dev; | |
65 | struct target_mem_desc *tgt; | |
66 | void **devaddrs; | |
67 | unsigned int i; | |
68 | struct splay_tree_key_s k; | |
69 | splay_tree_key tgt_fn_key; | |
70 | void (*tgt_fn); | |
71 | ||
72 | if (num_gangs != 1) | |
73 | gomp_fatal ("num_gangs (%d) different from one is not yet supported", | |
74 | num_gangs); | |
75 | if (num_workers != 1) | |
76 | gomp_fatal ("num_workers (%d) different from one is not yet supported", | |
77 | num_workers); | |
78 | ||
01c0b3b0 KT |
79 | #ifdef HAVE_INTTYPES_H |
80 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, " | |
81 | "async = %d\n", | |
82 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async); | |
83 | #else | |
84 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n", | |
85 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds, | |
86 | async); | |
87 | #endif | |
d93bdab5 | 88 | goacc_lazy_initialize (); |
41dbbb37 TS |
89 | |
90 | thr = goacc_thread (); | |
91 | acc_dev = thr->dev; | |
92 | ||
93 | /* Host fallback if "if" clause is false or if the current device is set to | |
94 | the host. */ | |
95 | if (host_fallback) | |
96 | { | |
97 | goacc_save_and_set_bind (acc_device_host); | |
98 | fn (hostaddrs); | |
99 | goacc_restore_bind (); | |
100 | return; | |
101 | } | |
102 | else if (acc_device_type (acc_dev->type) == acc_device_host) | |
103 | { | |
104 | fn (hostaddrs); | |
105 | return; | |
106 | } | |
107 | ||
a091118d NS |
108 | if (num_waits) |
109 | { | |
110 | va_start (ap, num_waits); | |
111 | goacc_wait (async, num_waits, ap); | |
112 | va_end (ap); | |
113 | } | |
41dbbb37 | 114 | |
41dbbb37 TS |
115 | acc_dev->openacc.async_set_async_func (async); |
116 | ||
117 | if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) | |
118 | { | |
119 | k.host_start = (uintptr_t) fn; | |
120 | k.host_end = k.host_start + 1; | |
a51df54e IV |
121 | gomp_mutex_lock (&acc_dev->lock); |
122 | tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); | |
123 | gomp_mutex_unlock (&acc_dev->lock); | |
41dbbb37 TS |
124 | |
125 | if (tgt_fn_key == NULL) | |
126 | gomp_fatal ("target function wasn't mapped"); | |
127 | ||
d93bdab5 | 128 | tgt_fn = (void (*)) tgt_fn_key->tgt_offset; |
41dbbb37 TS |
129 | } |
130 | else | |
131 | tgt_fn = (void (*)) fn; | |
132 | ||
133 | tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
134 | false); | |
135 | ||
6e36114c | 136 | devaddrs = gomp_alloca (sizeof (void *) * mapnum); |
41dbbb37 TS |
137 | for (i = 0; i < mapnum; i++) |
138 | devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start | |
139 | + tgt->list[i]->tgt_offset); | |
140 | ||
141 | acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, | |
142 | num_gangs, num_workers, vector_length, async, | |
143 | tgt); | |
144 | ||
145 | /* If running synchronously, unmap immediately. */ | |
146 | if (async < acc_async_noval) | |
147 | gomp_unmap_vars (tgt, true); | |
148 | else | |
149 | { | |
150 | gomp_copy_from_async (tgt); | |
151 | acc_dev->openacc.register_async_cleanup_func (tgt); | |
152 | } | |
153 | ||
154 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
155 | } | |
156 | ||
157 | void | |
128b26dc | 158 | GOACC_data_start (int device, size_t mapnum, |
41dbbb37 TS |
159 | void **hostaddrs, size_t *sizes, unsigned short *kinds) |
160 | { | |
161 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
162 | struct target_mem_desc *tgt; | |
163 | ||
01c0b3b0 KT |
164 | #ifdef HAVE_INTTYPES_H |
165 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
166 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
167 | #else | |
168 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
169 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
170 | #endif | |
41dbbb37 | 171 | |
d93bdab5 | 172 | goacc_lazy_initialize (); |
41dbbb37 TS |
173 | |
174 | struct goacc_thread *thr = goacc_thread (); | |
175 | struct gomp_device_descr *acc_dev = thr->dev; | |
176 | ||
177 | /* Host fallback or 'do nothing'. */ | |
178 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
179 | || host_fallback) | |
180 | { | |
181 | tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); | |
182 | tgt->prev = thr->mapped_data; | |
183 | thr->mapped_data = tgt; | |
184 | ||
185 | return; | |
186 | } | |
187 | ||
188 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
189 | tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
190 | false); | |
191 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); | |
192 | tgt->prev = thr->mapped_data; | |
193 | thr->mapped_data = tgt; | |
194 | } | |
195 | ||
196 | void | |
197 | GOACC_data_end (void) | |
198 | { | |
199 | struct goacc_thread *thr = goacc_thread (); | |
200 | struct target_mem_desc *tgt = thr->mapped_data; | |
201 | ||
202 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
203 | thr->mapped_data = tgt->prev; | |
204 | gomp_unmap_vars (tgt, true); | |
205 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
206 | } | |
207 | ||
208 | void | |
128b26dc | 209 | GOACC_enter_exit_data (int device, size_t mapnum, |
41dbbb37 TS |
210 | void **hostaddrs, size_t *sizes, unsigned short *kinds, |
211 | int async, int num_waits, ...) | |
212 | { | |
213 | struct goacc_thread *thr; | |
214 | struct gomp_device_descr *acc_dev; | |
215 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
216 | bool data_enter = false; | |
217 | size_t i; | |
218 | ||
d93bdab5 | 219 | goacc_lazy_initialize (); |
41dbbb37 TS |
220 | |
221 | thr = goacc_thread (); | |
222 | acc_dev = thr->dev; | |
223 | ||
224 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
225 | || host_fallback) | |
226 | return; | |
227 | ||
a091118d | 228 | if (num_waits) |
41dbbb37 TS |
229 | { |
230 | va_list ap; | |
231 | ||
232 | va_start (ap, num_waits); | |
41dbbb37 | 233 | goacc_wait (async, num_waits, ap); |
41dbbb37 TS |
234 | va_end (ap); |
235 | } | |
236 | ||
237 | acc_dev->openacc.async_set_async_func (async); | |
238 | ||
239 | /* Determine if this is an "acc enter data". */ | |
240 | for (i = 0; i < mapnum; ++i) | |
241 | { | |
242 | unsigned char kind = kinds[i] & 0xff; | |
243 | ||
244 | if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) | |
245 | continue; | |
246 | ||
247 | if (kind == GOMP_MAP_FORCE_ALLOC | |
248 | || kind == GOMP_MAP_FORCE_PRESENT | |
249 | || kind == GOMP_MAP_FORCE_TO) | |
250 | { | |
251 | data_enter = true; | |
252 | break; | |
253 | } | |
254 | ||
255 | if (kind == GOMP_MAP_FORCE_DEALLOC | |
256 | || kind == GOMP_MAP_FORCE_FROM) | |
257 | break; | |
258 | ||
259 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
260 | kind); | |
261 | } | |
262 | ||
263 | if (data_enter) | |
264 | { | |
265 | for (i = 0; i < mapnum; i++) | |
266 | { | |
267 | unsigned char kind = kinds[i] & 0xff; | |
268 | ||
269 | /* Scan for PSETs. */ | |
270 | int psets = find_pset (i, mapnum, kinds); | |
271 | ||
272 | if (!psets) | |
273 | { | |
274 | switch (kind) | |
275 | { | |
276 | case GOMP_MAP_POINTER: | |
277 | gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], | |
278 | &kinds[i]); | |
279 | break; | |
280 | case GOMP_MAP_FORCE_ALLOC: | |
281 | acc_create (hostaddrs[i], sizes[i]); | |
282 | break; | |
283 | case GOMP_MAP_FORCE_PRESENT: | |
284 | acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
285 | break; | |
286 | case GOMP_MAP_FORCE_TO: | |
287 | acc_present_or_copyin (hostaddrs[i], sizes[i]); | |
288 | break; | |
289 | default: | |
290 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
291 | kind); | |
292 | break; | |
293 | } | |
294 | } | |
295 | else | |
296 | { | |
297 | gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); | |
298 | /* Increment 'i' by two because OpenACC requires fortran | |
299 | arrays to be contiguous, so each PSET is associated with | |
300 | one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and | |
301 | one MAP_POINTER. */ | |
302 | i += 2; | |
303 | } | |
304 | } | |
305 | } | |
306 | else | |
307 | for (i = 0; i < mapnum; ++i) | |
308 | { | |
309 | unsigned char kind = kinds[i] & 0xff; | |
310 | ||
311 | int psets = find_pset (i, mapnum, kinds); | |
312 | ||
313 | if (!psets) | |
314 | { | |
315 | switch (kind) | |
316 | { | |
317 | case GOMP_MAP_POINTER: | |
318 | gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) | |
319 | == GOMP_MAP_FORCE_FROM, | |
320 | async, 1); | |
321 | break; | |
322 | case GOMP_MAP_FORCE_DEALLOC: | |
323 | acc_delete (hostaddrs[i], sizes[i]); | |
324 | break; | |
325 | case GOMP_MAP_FORCE_FROM: | |
326 | acc_copyout (hostaddrs[i], sizes[i]); | |
327 | break; | |
328 | default: | |
329 | gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", | |
330 | kind); | |
331 | break; | |
332 | } | |
333 | } | |
334 | else | |
335 | { | |
336 | gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) | |
337 | == GOMP_MAP_FORCE_FROM, async, 3); | |
338 | /* See the above comment. */ | |
339 | i += 2; | |
340 | } | |
341 | } | |
342 | ||
343 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
344 | } | |
345 | ||
346 | static void | |
347 | goacc_wait (int async, int num_waits, va_list ap) | |
348 | { | |
349 | struct goacc_thread *thr = goacc_thread (); | |
350 | struct gomp_device_descr *acc_dev = thr->dev; | |
41dbbb37 | 351 | |
a091118d | 352 | while (num_waits--) |
41dbbb37 TS |
353 | { |
354 | int qid = va_arg (ap, int); | |
355 | ||
356 | if (acc_async_test (qid)) | |
357 | continue; | |
358 | ||
a091118d NS |
359 | if (async == acc_async_sync) |
360 | acc_wait (qid); | |
361 | else if (qid == async) | |
362 | ;/* If we're waiting on the same asynchronous queue as we're | |
363 | launching on, the queue itself will order work as | |
364 | required, so there's no need to wait explicitly. */ | |
365 | else | |
41dbbb37 TS |
366 | acc_dev->openacc.async_wait_async_func (qid, async); |
367 | } | |
368 | } | |
369 | ||
370 | void | |
128b26dc | 371 | GOACC_update (int device, size_t mapnum, |
41dbbb37 TS |
372 | void **hostaddrs, size_t *sizes, unsigned short *kinds, |
373 | int async, int num_waits, ...) | |
374 | { | |
375 | bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; | |
376 | size_t i; | |
377 | ||
d93bdab5 | 378 | goacc_lazy_initialize (); |
41dbbb37 TS |
379 | |
380 | struct goacc_thread *thr = goacc_thread (); | |
381 | struct gomp_device_descr *acc_dev = thr->dev; | |
382 | ||
383 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
384 | || host_fallback) | |
385 | return; | |
386 | ||
a091118d | 387 | if (num_waits) |
41dbbb37 TS |
388 | { |
389 | va_list ap; | |
390 | ||
391 | va_start (ap, num_waits); | |
41dbbb37 | 392 | goacc_wait (async, num_waits, ap); |
41dbbb37 TS |
393 | va_end (ap); |
394 | } | |
395 | ||
396 | acc_dev->openacc.async_set_async_func (async); | |
397 | ||
398 | for (i = 0; i < mapnum; ++i) | |
399 | { | |
400 | unsigned char kind = kinds[i] & 0xff; | |
401 | ||
402 | switch (kind) | |
403 | { | |
404 | case GOMP_MAP_POINTER: | |
405 | case GOMP_MAP_TO_PSET: | |
406 | break; | |
407 | ||
408 | case GOMP_MAP_FORCE_TO: | |
409 | acc_update_device (hostaddrs[i], sizes[i]); | |
410 | break; | |
411 | ||
412 | case GOMP_MAP_FORCE_FROM: | |
413 | acc_update_self (hostaddrs[i], sizes[i]); | |
414 | break; | |
415 | ||
416 | default: | |
417 | gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | |
418 | break; | |
419 | } | |
420 | } | |
421 | ||
422 | acc_dev->openacc.async_set_async_func (acc_async_sync); | |
423 | } | |
424 | ||
425 | void | |
426 | GOACC_wait (int async, int num_waits, ...) | |
427 | { | |
a091118d NS |
428 | if (num_waits) |
429 | { | |
430 | va_list ap; | |
41dbbb37 | 431 | |
a091118d NS |
432 | va_start (ap, num_waits); |
433 | goacc_wait (async, num_waits, ap); | |
434 | va_end (ap); | |
435 | } | |
436 | else if (async == acc_async_sync) | |
437 | acc_wait_all (); | |
438 | else if (async == acc_async_noval) | |
a051317b | 439 | goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); |
41dbbb37 TS |
440 | } |
441 | ||
442 | int | |
443 | GOACC_get_num_threads (void) | |
444 | { | |
445 | return 1; | |
446 | } | |
447 | ||
448 | int | |
449 | GOACC_get_thread_num (void) | |
450 | { | |
451 | return 0; | |
452 | } |