]>
Commit | Line | Data |
---|---|---|
1 | /* Copyright (C) 2013-2021 Free Software Foundation, Inc. | |
2 | ||
3 | Contributed by Mentor Embedded. | |
4 | ||
5 | This file is part of the GNU Offloading and Multi Processing Library | |
6 | (libgomp). | |
7 | ||
8 | Libgomp is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 | more details. | |
17 | ||
18 | Under Section 7 of GPL version 3, you are granted additional | |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
26 | ||
27 | /* This file handles OpenACC constructs. */ | |
28 | ||
29 | #include "openacc.h" | |
30 | #include "libgomp.h" | |
31 | #include "gomp-constants.h" | |
32 | #include "oacc-int.h" | |
33 | #ifdef HAVE_INTTYPES_H | |
34 | # include <inttypes.h> /* For PRIu64. */ | |
35 | #endif | |
36 | #include <string.h> | |
37 | #include <stdarg.h> | |
38 | #include <assert.h> | |
39 | ||
40 | ||
41 | /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we | |
42 | continue to support the following two legacy values. */ | |
43 | _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, | |
44 | "legacy GOMP_DEVICE_ICV broken"); | |
45 | _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) | |
46 | == GOACC_FLAG_HOST_FALLBACK, | |
47 | "legacy GOMP_DEVICE_HOST_FALLBACK broken"); | |
48 | ||
49 | ||
50 | /* Handle the mapping pair that are presented when a | |
51 | deviceptr clause is used with Fortran. */ | |
52 | ||
53 | static void | |
54 | handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, | |
55 | unsigned short *kinds) | |
56 | { | |
57 | int i; | |
58 | ||
59 | for (i = 0; i < mapnum; i++) | |
60 | { | |
61 | unsigned short kind1 = kinds[i] & 0xff; | |
62 | ||
63 | /* Handle Fortran deviceptr clause. */ | |
64 | if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) | |
65 | { | |
66 | unsigned short kind2; | |
67 | ||
68 | if (i < (signed)mapnum - 1) | |
69 | kind2 = kinds[i + 1] & 0xff; | |
70 | else | |
71 | kind2 = 0xffff; | |
72 | ||
73 | if (sizes[i] == sizeof (void *)) | |
74 | continue; | |
75 | ||
76 | /* At this point, we're dealing with a Fortran deviceptr. | |
77 | If the next element is not what we're expecting, then | |
78 | this is an instance of where the deviceptr variable was | |
79 | not used within the region and the pointer was removed | |
80 | by the gimplifier. */ | |
81 | if (kind2 == GOMP_MAP_POINTER | |
82 | && sizes[i + 1] == 0 | |
83 | && hostaddrs[i] == *(void **)hostaddrs[i + 1]) | |
84 | { | |
85 | kinds[i+1] = kinds[i]; | |
86 | sizes[i+1] = sizeof (void *); | |
87 | } | |
88 | ||
89 | /* Invalidate the entry. */ | |
90 | hostaddrs[i] = NULL; | |
91 | } | |
92 | } | |
93 | } | |
94 | ||
95 | ||
96 | /* Launch a possibly offloaded function with FLAGS. FN is the host fn | |
97 | address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory | |
98 | blocks to be copied to/from the device. Varadic arguments are | |
99 | keyed optional parameters terminated with a zero. */ | |
100 | ||
101 | void | |
102 | GOACC_parallel_keyed (int flags_m, void (*fn) (void *), | |
103 | size_t mapnum, void **hostaddrs, size_t *sizes, | |
104 | unsigned short *kinds, ...) | |
105 | { | |
106 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); | |
107 | ||
108 | va_list ap; | |
109 | struct goacc_thread *thr; | |
110 | struct gomp_device_descr *acc_dev; | |
111 | struct target_mem_desc *tgt; | |
112 | void **devaddrs; | |
113 | unsigned int i; | |
114 | struct splay_tree_key_s k; | |
115 | splay_tree_key tgt_fn_key; | |
116 | void (*tgt_fn); | |
117 | int async = GOMP_ASYNC_SYNC; | |
118 | unsigned dims[GOMP_DIM_MAX]; | |
119 | unsigned tag; | |
120 | ||
121 | #ifdef HAVE_INTTYPES_H | |
122 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
123 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
124 | #else | |
125 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
126 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
127 | #endif | |
128 | goacc_lazy_initialize (); | |
129 | ||
130 | thr = goacc_thread (); | |
131 | acc_dev = thr->dev; | |
132 | ||
133 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
134 | ||
135 | acc_prof_info prof_info; | |
136 | if (profiling_p) | |
137 | { | |
138 | thr->prof_info = &prof_info; | |
139 | ||
140 | prof_info.event_type = acc_ev_compute_construct_start; | |
141 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
142 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
143 | prof_info.device_type = acc_device_type (acc_dev->type); | |
144 | prof_info.device_number = acc_dev->target_id; | |
145 | prof_info.thread_id = -1; | |
146 | prof_info.async = async; | |
147 | prof_info.async_queue = prof_info.async; | |
148 | prof_info.src_file = NULL; | |
149 | prof_info.func_name = NULL; | |
150 | prof_info.line_no = -1; | |
151 | prof_info.end_line_no = -1; | |
152 | prof_info.func_line_no = -1; | |
153 | prof_info.func_end_line_no = -1; | |
154 | } | |
155 | acc_event_info compute_construct_event_info; | |
156 | if (profiling_p) | |
157 | { | |
158 | compute_construct_event_info.other_event.event_type | |
159 | = prof_info.event_type; | |
160 | compute_construct_event_info.other_event.valid_bytes | |
161 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
162 | compute_construct_event_info.other_event.parent_construct | |
163 | = acc_construct_parallel; | |
164 | compute_construct_event_info.other_event.implicit = 0; | |
165 | compute_construct_event_info.other_event.tool_info = NULL; | |
166 | } | |
167 | acc_api_info api_info; | |
168 | if (profiling_p) | |
169 | { | |
170 | thr->api_info = &api_info; | |
171 | ||
172 | api_info.device_api = acc_device_api_none; | |
173 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
174 | api_info.device_type = prof_info.device_type; | |
175 | api_info.vendor = -1; | |
176 | api_info.device_handle = NULL; | |
177 | api_info.context_handle = NULL; | |
178 | api_info.async_handle = NULL; | |
179 | } | |
180 | ||
181 | if (profiling_p) | |
182 | goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, | |
183 | &api_info); | |
184 | ||
185 | handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); | |
186 | ||
187 | /* Host fallback if "if" clause is false or if the current device is set to | |
188 | the host. */ | |
189 | if (flags & GOACC_FLAG_HOST_FALLBACK) | |
190 | { | |
191 | prof_info.device_type = acc_device_host; | |
192 | api_info.device_type = prof_info.device_type; | |
193 | goacc_save_and_set_bind (acc_device_host); | |
194 | fn (hostaddrs); | |
195 | goacc_restore_bind (); | |
196 | goto out_prof; | |
197 | } | |
198 | else if (acc_device_type (acc_dev->type) == acc_device_host) | |
199 | { | |
200 | fn (hostaddrs); | |
201 | goto out_prof; | |
202 | } | |
203 | ||
204 | /* Default: let the runtime choose. */ | |
205 | for (i = 0; i != GOMP_DIM_MAX; i++) | |
206 | dims[i] = 0; | |
207 | ||
208 | va_start (ap, kinds); | |
209 | /* TODO: This will need amending when device_type is implemented. */ | |
210 | while ((tag = va_arg (ap, unsigned)) != 0) | |
211 | { | |
212 | if (GOMP_LAUNCH_DEVICE (tag)) | |
213 | gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", | |
214 | GOMP_LAUNCH_DEVICE (tag)); | |
215 | ||
216 | switch (GOMP_LAUNCH_CODE (tag)) | |
217 | { | |
218 | case GOMP_LAUNCH_DIM: | |
219 | { | |
220 | unsigned mask = GOMP_LAUNCH_OP (tag); | |
221 | ||
222 | for (i = 0; i != GOMP_DIM_MAX; i++) | |
223 | if (mask & GOMP_DIM_MASK (i)) | |
224 | dims[i] = va_arg (ap, unsigned); | |
225 | } | |
226 | break; | |
227 | ||
228 | case GOMP_LAUNCH_ASYNC: | |
229 | { | |
230 | /* Small constant values are encoded in the operand. */ | |
231 | async = GOMP_LAUNCH_OP (tag); | |
232 | ||
233 | if (async == GOMP_LAUNCH_OP_MAX) | |
234 | async = va_arg (ap, unsigned); | |
235 | ||
236 | if (profiling_p) | |
237 | { | |
238 | prof_info.async = async; | |
239 | prof_info.async_queue = prof_info.async; | |
240 | } | |
241 | ||
242 | break; | |
243 | } | |
244 | ||
245 | case GOMP_LAUNCH_WAIT: | |
246 | { | |
247 | unsigned num_waits = GOMP_LAUNCH_OP (tag); | |
248 | goacc_wait (async, num_waits, &ap); | |
249 | break; | |
250 | } | |
251 | ||
252 | default: | |
253 | gomp_fatal ("unrecognized offload code '%d'," | |
254 | " libgomp is too old", GOMP_LAUNCH_CODE (tag)); | |
255 | } | |
256 | } | |
257 | va_end (ap); | |
258 | ||
259 | if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) | |
260 | { | |
261 | k.host_start = (uintptr_t) fn; | |
262 | k.host_end = k.host_start + 1; | |
263 | gomp_mutex_lock (&acc_dev->lock); | |
264 | tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); | |
265 | gomp_mutex_unlock (&acc_dev->lock); | |
266 | ||
267 | if (tgt_fn_key == NULL) | |
268 | gomp_fatal ("target function wasn't mapped"); | |
269 | ||
270 | tgt_fn = (void (*)) tgt_fn_key->tgt_offset; | |
271 | } | |
272 | else | |
273 | tgt_fn = (void (*)) fn; | |
274 | ||
275 | acc_event_info enter_exit_data_event_info; | |
276 | if (profiling_p) | |
277 | { | |
278 | prof_info.event_type = acc_ev_enter_data_start; | |
279 | enter_exit_data_event_info.other_event.event_type | |
280 | = prof_info.event_type; | |
281 | enter_exit_data_event_info.other_event.valid_bytes | |
282 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
283 | enter_exit_data_event_info.other_event.parent_construct | |
284 | = compute_construct_event_info.other_event.parent_construct; | |
285 | enter_exit_data_event_info.other_event.implicit = 1; | |
286 | enter_exit_data_event_info.other_event.tool_info = NULL; | |
287 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
288 | &api_info); | |
289 | } | |
290 | ||
291 | goacc_aq aq = get_goacc_asyncqueue (async); | |
292 | ||
293 | tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, | |
294 | true, 0); | |
295 | if (profiling_p) | |
296 | { | |
297 | prof_info.event_type = acc_ev_enter_data_end; | |
298 | enter_exit_data_event_info.other_event.event_type | |
299 | = prof_info.event_type; | |
300 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
301 | &api_info); | |
302 | } | |
303 | ||
304 | devaddrs = gomp_alloca (sizeof (void *) * mapnum); | |
305 | for (i = 0; i < mapnum; i++) | |
306 | devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); | |
307 | ||
308 | if (aq == NULL) | |
309 | acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, | |
310 | tgt); | |
311 | else | |
312 | acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, | |
313 | dims, tgt, aq); | |
314 | ||
315 | if (profiling_p) | |
316 | { | |
317 | prof_info.event_type = acc_ev_exit_data_start; | |
318 | enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
319 | enter_exit_data_event_info.other_event.tool_info = NULL; | |
320 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
321 | &api_info); | |
322 | } | |
323 | ||
324 | /* If running synchronously (aq == NULL), this will unmap immediately. */ | |
325 | goacc_unmap_vars (tgt, true, aq); | |
326 | ||
327 | if (profiling_p) | |
328 | { | |
329 | prof_info.event_type = acc_ev_exit_data_end; | |
330 | enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
331 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
332 | &api_info); | |
333 | } | |
334 | ||
335 | out_prof: | |
336 | if (profiling_p) | |
337 | { | |
338 | prof_info.event_type = acc_ev_compute_construct_end; | |
339 | compute_construct_event_info.other_event.event_type | |
340 | = prof_info.event_type; | |
341 | goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, | |
342 | &api_info); | |
343 | ||
344 | thr->prof_info = NULL; | |
345 | thr->api_info = NULL; | |
346 | } | |
347 | } | |
348 | ||
349 | /* Legacy entry point (GCC 5). Only provide host fallback execution. */ | |
350 | ||
351 | void | |
352 | GOACC_parallel (int flags_m, void (*fn) (void *), | |
353 | size_t mapnum, void **hostaddrs, size_t *sizes, | |
354 | unsigned short *kinds, | |
355 | int num_gangs, int num_workers, int vector_length, | |
356 | int async, int num_waits, ...) | |
357 | { | |
358 | goacc_save_and_set_bind (acc_device_host); | |
359 | fn (hostaddrs); | |
360 | goacc_restore_bind (); | |
361 | } | |
362 | ||
363 | void | |
364 | GOACC_data_start (int flags_m, size_t mapnum, | |
365 | void **hostaddrs, size_t *sizes, unsigned short *kinds) | |
366 | { | |
367 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); | |
368 | ||
369 | struct target_mem_desc *tgt; | |
370 | ||
371 | #ifdef HAVE_INTTYPES_H | |
372 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
373 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
374 | #else | |
375 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
376 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
377 | #endif | |
378 | ||
379 | goacc_lazy_initialize (); | |
380 | ||
381 | struct goacc_thread *thr = goacc_thread (); | |
382 | struct gomp_device_descr *acc_dev = thr->dev; | |
383 | ||
384 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
385 | ||
386 | acc_prof_info prof_info; | |
387 | if (profiling_p) | |
388 | { | |
389 | thr->prof_info = &prof_info; | |
390 | ||
391 | prof_info.event_type = acc_ev_enter_data_start; | |
392 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
393 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
394 | prof_info.device_type = acc_device_type (acc_dev->type); | |
395 | prof_info.device_number = acc_dev->target_id; | |
396 | prof_info.thread_id = -1; | |
397 | prof_info.async = acc_async_sync; /* Always synchronous. */ | |
398 | prof_info.async_queue = prof_info.async; | |
399 | prof_info.src_file = NULL; | |
400 | prof_info.func_name = NULL; | |
401 | prof_info.line_no = -1; | |
402 | prof_info.end_line_no = -1; | |
403 | prof_info.func_line_no = -1; | |
404 | prof_info.func_end_line_no = -1; | |
405 | } | |
406 | acc_event_info enter_data_event_info; | |
407 | if (profiling_p) | |
408 | { | |
409 | enter_data_event_info.other_event.event_type | |
410 | = prof_info.event_type; | |
411 | enter_data_event_info.other_event.valid_bytes | |
412 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
413 | enter_data_event_info.other_event.parent_construct = acc_construct_data; | |
414 | for (int i = 0; i < mapnum; ++i) | |
415 | if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR | |
416 | || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) | |
417 | { | |
418 | /* If there is one such data mapping kind, then this is actually an | |
419 | OpenACC 'host_data' construct. (GCC maps the OpenACC | |
420 | 'host_data' construct to the OpenACC 'data' construct.) Apart | |
421 | from artificial test cases (such as an OpenACC 'host_data' | |
422 | construct's (implicit) device initialization when there hasn't | |
423 | been any device data be set up before...), there can't really | |
424 | any meaningful events be generated from OpenACC 'host_data' | |
425 | constructs, though. */ | |
426 | enter_data_event_info.other_event.parent_construct | |
427 | = acc_construct_host_data; | |
428 | break; | |
429 | } | |
430 | enter_data_event_info.other_event.implicit = 0; | |
431 | enter_data_event_info.other_event.tool_info = NULL; | |
432 | } | |
433 | acc_api_info api_info; | |
434 | if (profiling_p) | |
435 | { | |
436 | thr->api_info = &api_info; | |
437 | ||
438 | api_info.device_api = acc_device_api_none; | |
439 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
440 | api_info.device_type = prof_info.device_type; | |
441 | api_info.vendor = -1; | |
442 | api_info.device_handle = NULL; | |
443 | api_info.context_handle = NULL; | |
444 | api_info.async_handle = NULL; | |
445 | } | |
446 | ||
447 | if (profiling_p) | |
448 | goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); | |
449 | ||
450 | /* Host fallback or 'do nothing'. */ | |
451 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
452 | || (flags & GOACC_FLAG_HOST_FALLBACK)) | |
453 | { | |
454 | prof_info.device_type = acc_device_host; | |
455 | api_info.device_type = prof_info.device_type; | |
456 | tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); | |
457 | tgt->prev = thr->mapped_data; | |
458 | thr->mapped_data = tgt; | |
459 | ||
460 | goto out_prof; | |
461 | } | |
462 | ||
463 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
464 | tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, | |
465 | true, 0); | |
466 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); | |
467 | tgt->prev = thr->mapped_data; | |
468 | thr->mapped_data = tgt; | |
469 | ||
470 | out_prof: | |
471 | if (profiling_p) | |
472 | { | |
473 | prof_info.event_type = acc_ev_enter_data_end; | |
474 | enter_data_event_info.other_event.event_type = prof_info.event_type; | |
475 | goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); | |
476 | ||
477 | thr->prof_info = NULL; | |
478 | thr->api_info = NULL; | |
479 | } | |
480 | } | |
481 | ||
482 | void | |
483 | GOACC_data_end (void) | |
484 | { | |
485 | struct goacc_thread *thr = goacc_thread (); | |
486 | struct gomp_device_descr *acc_dev = thr->dev; | |
487 | struct target_mem_desc *tgt = thr->mapped_data; | |
488 | ||
489 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
490 | ||
491 | acc_prof_info prof_info; | |
492 | if (profiling_p) | |
493 | { | |
494 | thr->prof_info = &prof_info; | |
495 | ||
496 | prof_info.event_type = acc_ev_exit_data_start; | |
497 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
498 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
499 | prof_info.device_type = acc_device_type (acc_dev->type); | |
500 | prof_info.device_number = acc_dev->target_id; | |
501 | prof_info.thread_id = -1; | |
502 | prof_info.async = acc_async_sync; /* Always synchronous. */ | |
503 | prof_info.async_queue = prof_info.async; | |
504 | prof_info.src_file = NULL; | |
505 | prof_info.func_name = NULL; | |
506 | prof_info.line_no = -1; | |
507 | prof_info.end_line_no = -1; | |
508 | prof_info.func_line_no = -1; | |
509 | prof_info.func_end_line_no = -1; | |
510 | } | |
511 | acc_event_info exit_data_event_info; | |
512 | if (profiling_p) | |
513 | { | |
514 | exit_data_event_info.other_event.event_type | |
515 | = prof_info.event_type; | |
516 | exit_data_event_info.other_event.valid_bytes | |
517 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
518 | exit_data_event_info.other_event.parent_construct = acc_construct_data; | |
519 | exit_data_event_info.other_event.implicit = 0; | |
520 | exit_data_event_info.other_event.tool_info = NULL; | |
521 | } | |
522 | acc_api_info api_info; | |
523 | if (profiling_p) | |
524 | { | |
525 | thr->api_info = &api_info; | |
526 | ||
527 | api_info.device_api = acc_device_api_none; | |
528 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
529 | api_info.device_type = prof_info.device_type; | |
530 | api_info.vendor = -1; | |
531 | api_info.device_handle = NULL; | |
532 | api_info.context_handle = NULL; | |
533 | api_info.async_handle = NULL; | |
534 | } | |
535 | ||
536 | if (profiling_p) | |
537 | goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); | |
538 | ||
539 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); | |
540 | thr->mapped_data = tgt->prev; | |
541 | goacc_unmap_vars (tgt, true, NULL); | |
542 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); | |
543 | ||
544 | if (profiling_p) | |
545 | { | |
546 | prof_info.event_type = acc_ev_exit_data_end; | |
547 | exit_data_event_info.other_event.event_type = prof_info.event_type; | |
548 | goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); | |
549 | ||
550 | thr->prof_info = NULL; | |
551 | thr->api_info = NULL; | |
552 | } | |
553 | } | |
554 | ||
555 | void | |
556 | GOACC_update (int flags_m, size_t mapnum, | |
557 | void **hostaddrs, size_t *sizes, unsigned short *kinds, | |
558 | int async, int num_waits, ...) | |
559 | { | |
560 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); | |
561 | ||
562 | size_t i; | |
563 | ||
564 | goacc_lazy_initialize (); | |
565 | ||
566 | struct goacc_thread *thr = goacc_thread (); | |
567 | struct gomp_device_descr *acc_dev = thr->dev; | |
568 | ||
569 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); | |
570 | ||
571 | acc_prof_info prof_info; | |
572 | if (profiling_p) | |
573 | { | |
574 | thr->prof_info = &prof_info; | |
575 | ||
576 | prof_info.event_type = acc_ev_update_start; | |
577 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
578 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
579 | prof_info.device_type = acc_device_type (acc_dev->type); | |
580 | prof_info.device_number = acc_dev->target_id; | |
581 | prof_info.thread_id = -1; | |
582 | prof_info.async = async; | |
583 | prof_info.async_queue = prof_info.async; | |
584 | prof_info.src_file = NULL; | |
585 | prof_info.func_name = NULL; | |
586 | prof_info.line_no = -1; | |
587 | prof_info.end_line_no = -1; | |
588 | prof_info.func_line_no = -1; | |
589 | prof_info.func_end_line_no = -1; | |
590 | } | |
591 | acc_event_info update_event_info; | |
592 | if (profiling_p) | |
593 | { | |
594 | update_event_info.other_event.event_type | |
595 | = prof_info.event_type; | |
596 | update_event_info.other_event.valid_bytes | |
597 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
598 | update_event_info.other_event.parent_construct = acc_construct_update; | |
599 | update_event_info.other_event.implicit = 0; | |
600 | update_event_info.other_event.tool_info = NULL; | |
601 | } | |
602 | acc_api_info api_info; | |
603 | if (profiling_p) | |
604 | { | |
605 | thr->api_info = &api_info; | |
606 | ||
607 | api_info.device_api = acc_device_api_none; | |
608 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
609 | api_info.device_type = prof_info.device_type; | |
610 | api_info.vendor = -1; | |
611 | api_info.device_handle = NULL; | |
612 | api_info.context_handle = NULL; | |
613 | api_info.async_handle = NULL; | |
614 | } | |
615 | ||
616 | if (profiling_p) | |
617 | goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); | |
618 | ||
619 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
620 | || (flags & GOACC_FLAG_HOST_FALLBACK)) | |
621 | { | |
622 | prof_info.device_type = acc_device_host; | |
623 | api_info.device_type = prof_info.device_type; | |
624 | ||
625 | goto out_prof; | |
626 | } | |
627 | ||
628 | if (num_waits) | |
629 | { | |
630 | va_list ap; | |
631 | ||
632 | va_start (ap, num_waits); | |
633 | goacc_wait (async, num_waits, &ap); | |
634 | va_end (ap); | |
635 | } | |
636 | ||
637 | bool update_device = false; | |
638 | for (i = 0; i < mapnum; ++i) | |
639 | { | |
640 | unsigned char kind = kinds[i] & 0xff; | |
641 | ||
642 | switch (kind) | |
643 | { | |
644 | case GOMP_MAP_POINTER: | |
645 | case GOMP_MAP_TO_PSET: | |
646 | break; | |
647 | ||
648 | case GOMP_MAP_ALWAYS_POINTER: | |
649 | if (update_device) | |
650 | { | |
651 | /* Save the contents of the host pointer. */ | |
652 | void *dptr = acc_deviceptr (hostaddrs[i-1]); | |
653 | uintptr_t t = *(uintptr_t *) hostaddrs[i]; | |
654 | ||
655 | /* Update the contents of the host pointer to reflect | |
656 | the value of the allocated device memory in the | |
657 | previous pointer. */ | |
658 | *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; | |
659 | /* TODO: verify that we really cannot use acc_update_device_async | |
660 | here. */ | |
661 | acc_update_device (hostaddrs[i], sizeof (uintptr_t)); | |
662 | ||
663 | /* Restore the host pointer. */ | |
664 | *(uintptr_t *) hostaddrs[i] = t; | |
665 | update_device = false; | |
666 | } | |
667 | break; | |
668 | ||
669 | case GOMP_MAP_TO: | |
670 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
671 | { | |
672 | update_device = false; | |
673 | break; | |
674 | } | |
675 | /* Fallthru */ | |
676 | case GOMP_MAP_FORCE_TO: | |
677 | update_device = true; | |
678 | acc_update_device_async (hostaddrs[i], sizes[i], async); | |
679 | break; | |
680 | ||
681 | case GOMP_MAP_FROM: | |
682 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
683 | { | |
684 | update_device = false; | |
685 | break; | |
686 | } | |
687 | /* Fallthru */ | |
688 | case GOMP_MAP_FORCE_FROM: | |
689 | update_device = false; | |
690 | acc_update_self_async (hostaddrs[i], sizes[i], async); | |
691 | break; | |
692 | ||
693 | default: | |
694 | gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | |
695 | break; | |
696 | } | |
697 | } | |
698 | ||
699 | out_prof: | |
700 | if (profiling_p) | |
701 | { | |
702 | prof_info.event_type = acc_ev_update_end; | |
703 | update_event_info.other_event.event_type = prof_info.event_type; | |
704 | goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); | |
705 | ||
706 | thr->prof_info = NULL; | |
707 | thr->api_info = NULL; | |
708 | } | |
709 | } | |
710 | ||
711 | ||
712 | /* Legacy entry point (GCC 5). */ | |
713 | ||
714 | int | |
715 | GOACC_get_num_threads (void) | |
716 | { | |
717 | return 1; | |
718 | } | |
719 | ||
720 | /* Legacy entry point (GCC 5). */ | |
721 | ||
722 | int | |
723 | GOACC_get_thread_num (void) | |
724 | { | |
725 | return 0; | |
726 | } |