]>
Commit | Line | Data |
---|---|---|
a945c346 | 1 | /* Copyright (C) 2013-2024 Free Software Foundation, Inc. |
41dbbb37 TS |
2 | |
3 | Contributed by Mentor Embedded. | |
4 | ||
5 | This file is part of the GNU Offloading and Multi Processing Library | |
6 | (libgomp). | |
7 | ||
8 | Libgomp is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 | more details. | |
17 | ||
18 | Under Section 7 of GPL version 3, you are granted additional | |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
26 | ||
27 | /* This file handles OpenACC constructs. */ | |
28 | ||
29 | #include "openacc.h" | |
30 | #include "libgomp.h" | |
41dbbb37 TS |
31 | #include "gomp-constants.h" |
32 | #include "oacc-int.h" | |
01c0b3b0 KT |
33 | #ifdef HAVE_INTTYPES_H |
34 | # include <inttypes.h> /* For PRIu64. */ | |
35 | #endif | |
41dbbb37 TS |
36 | #include <string.h> |
37 | #include <stdarg.h> | |
38 | #include <assert.h> | |
41dbbb37 | 39 | |
59d5960c TS |
40 | |
41 | /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we | |
42 | continue to support the following two legacy values. */ | |
43 | _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, | |
44 | "legacy GOMP_DEVICE_ICV broken"); | |
45 | _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) | |
46 | == GOACC_FLAG_HOST_FALLBACK, | |
47 | "legacy GOMP_DEVICE_HOST_FALLBACK broken"); | |
48 | ||
49 | ||
829c6349 CLT |
50 | /* Handle the mapping pair that are presented when a |
51 | deviceptr clause is used with Fortran. */ | |
52 | ||
53 | static void | |
54 | handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, | |
55 | unsigned short *kinds) | |
56 | { | |
57 | int i; | |
58 | ||
59 | for (i = 0; i < mapnum; i++) | |
60 | { | |
61 | unsigned short kind1 = kinds[i] & 0xff; | |
62 | ||
63 | /* Handle Fortran deviceptr clause. */ | |
64 | if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) | |
65 | { | |
66 | unsigned short kind2; | |
67 | ||
68 | if (i < (signed)mapnum - 1) | |
69 | kind2 = kinds[i + 1] & 0xff; | |
70 | else | |
71 | kind2 = 0xffff; | |
72 | ||
73 | if (sizes[i] == sizeof (void *)) | |
74 | continue; | |
75 | ||
76 | /* At this point, we're dealing with a Fortran deviceptr. | |
77 | If the next element is not what we're expecting, then | |
78 | this is an instance of where the deviceptr variable was | |
79 | not used within the region and the pointer was removed | |
80 | by the gimplifier. */ | |
81 | if (kind2 == GOMP_MAP_POINTER | |
82 | && sizes[i + 1] == 0 | |
83 | && hostaddrs[i] == *(void **)hostaddrs[i + 1]) | |
84 | { | |
85 | kinds[i+1] = kinds[i]; | |
86 | sizes[i+1] = sizeof (void *); | |
87 | } | |
88 | ||
89 | /* Invalidate the entry. */ | |
90 | hostaddrs[i] = NULL; | |
91 | } | |
92 | } | |
41dbbb37 TS |
93 | } |
94 | ||
3e32ee19 | 95 | |
59d5960c | 96 | /* Launch a possibly offloaded function with FLAGS. FN is the host fn |
3e32ee19 NS |
97 | address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory |
98 | blocks to be copied to/from the device. Varadic arguments are | |
99 | keyed optional parameters terminated with a zero. */ | |
41dbbb37 TS |
100 | |
101 | void | |
59d5960c | 102 | GOACC_parallel_keyed (int flags_m, void (*fn) (void *), |
3e32ee19 NS |
103 | size_t mapnum, void **hostaddrs, size_t *sizes, |
104 | unsigned short *kinds, ...) | |
41dbbb37 | 105 | { |
59d5960c TS |
106 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
107 | ||
41dbbb37 TS |
108 | va_list ap; |
109 | struct goacc_thread *thr; | |
110 | struct gomp_device_descr *acc_dev; | |
41dbbb37 TS |
111 | unsigned int i; |
112 | struct splay_tree_key_s k; | |
113 | splay_tree_key tgt_fn_key; | |
114 | void (*tgt_fn); | |
3e32ee19 NS |
115 | int async = GOMP_ASYNC_SYNC; |
116 | unsigned dims[GOMP_DIM_MAX]; | |
117 | unsigned tag; | |
41dbbb37 | 118 | |
01c0b3b0 | 119 | #ifdef HAVE_INTTYPES_H |
3e32ee19 NS |
120 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", |
121 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
01c0b3b0 | 122 | #else |
3e32ee19 NS |
123 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", |
124 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
01c0b3b0 | 125 | #endif |
d93bdab5 | 126 | goacc_lazy_initialize (); |
41dbbb37 TS |
127 | |
128 | thr = goacc_thread (); | |
129 | acc_dev = thr->dev; | |
130 | ||
5fae049d TS |
131 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); |
132 | ||
133 | acc_prof_info prof_info; | |
134 | if (profiling_p) | |
135 | { | |
136 | thr->prof_info = &prof_info; | |
137 | ||
138 | prof_info.event_type = acc_ev_compute_construct_start; | |
139 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
140 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
141 | prof_info.device_type = acc_device_type (acc_dev->type); | |
142 | prof_info.device_number = acc_dev->target_id; | |
143 | prof_info.thread_id = -1; | |
144 | prof_info.async = async; | |
145 | prof_info.async_queue = prof_info.async; | |
146 | prof_info.src_file = NULL; | |
147 | prof_info.func_name = NULL; | |
148 | prof_info.line_no = -1; | |
149 | prof_info.end_line_no = -1; | |
150 | prof_info.func_line_no = -1; | |
151 | prof_info.func_end_line_no = -1; | |
152 | } | |
153 | acc_event_info compute_construct_event_info; | |
154 | if (profiling_p) | |
155 | { | |
156 | compute_construct_event_info.other_event.event_type | |
157 | = prof_info.event_type; | |
158 | compute_construct_event_info.other_event.valid_bytes | |
159 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
160 | compute_construct_event_info.other_event.parent_construct | |
161 | = acc_construct_parallel; | |
162 | compute_construct_event_info.other_event.implicit = 0; | |
163 | compute_construct_event_info.other_event.tool_info = NULL; | |
164 | } | |
165 | acc_api_info api_info; | |
166 | if (profiling_p) | |
167 | { | |
168 | thr->api_info = &api_info; | |
169 | ||
170 | api_info.device_api = acc_device_api_none; | |
171 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
172 | api_info.device_type = prof_info.device_type; | |
173 | api_info.vendor = -1; | |
174 | api_info.device_handle = NULL; | |
175 | api_info.context_handle = NULL; | |
176 | api_info.async_handle = NULL; | |
177 | } | |
178 | ||
179 | if (profiling_p) | |
180 | goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, | |
181 | &api_info); | |
182 | ||
829c6349 CLT |
183 | handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); |
184 | ||
41dbbb37 TS |
185 | /* Host fallback if "if" clause is false or if the current device is set to |
186 | the host. */ | |
7b2ae64b | 187 | if ((flags & GOACC_FLAG_HOST_FALLBACK) |
3a359638 CLT |
188 | /* TODO: a proper pthreads based "multi-core CPU" local device |
189 | implementation. Currently, this is still the same as host-fallback. */ | |
7b2ae64b TS |
190 | || (flags & GOACC_FLAG_LOCAL_DEVICE)) |
191 | { | |
3a359638 CLT |
192 | prof_info.device_type = acc_device_host; |
193 | api_info.device_type = prof_info.device_type; | |
194 | goacc_save_and_set_bind (acc_device_host); | |
195 | fn (hostaddrs); | |
196 | goacc_restore_bind (); | |
197 | goto out_prof; | |
41dbbb37 TS |
198 | } |
199 | else if (acc_device_type (acc_dev->type) == acc_device_host) | |
200 | { | |
201 | fn (hostaddrs); | |
5fae049d | 202 | goto out_prof; |
41dbbb37 TS |
203 | } |
204 | ||
f99c3557 TS |
205 | /* Default: let the runtime choose. */ |
206 | for (i = 0; i != GOMP_DIM_MAX; i++) | |
207 | dims[i] = 0; | |
208 | ||
3e32ee19 NS |
209 | va_start (ap, kinds); |
210 | /* TODO: This will need amending when device_type is implemented. */ | |
211 | while ((tag = va_arg (ap, unsigned)) != 0) | |
a091118d | 212 | { |
3e32ee19 NS |
213 | if (GOMP_LAUNCH_DEVICE (tag)) |
214 | gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", | |
215 | GOMP_LAUNCH_DEVICE (tag)); | |
216 | ||
217 | switch (GOMP_LAUNCH_CODE (tag)) | |
218 | { | |
219 | case GOMP_LAUNCH_DIM: | |
220 | { | |
221 | unsigned mask = GOMP_LAUNCH_OP (tag); | |
222 | ||
223 | for (i = 0; i != GOMP_DIM_MAX; i++) | |
224 | if (mask & GOMP_DIM_MASK (i)) | |
225 | dims[i] = va_arg (ap, unsigned); | |
226 | } | |
227 | break; | |
228 | ||
229 | case GOMP_LAUNCH_ASYNC: | |
230 | { | |
231 | /* Small constant values are encoded in the operand. */ | |
232 | async = GOMP_LAUNCH_OP (tag); | |
233 | ||
234 | if (async == GOMP_LAUNCH_OP_MAX) | |
235 | async = va_arg (ap, unsigned); | |
5fae049d TS |
236 | |
237 | if (profiling_p) | |
238 | { | |
239 | prof_info.async = async; | |
240 | prof_info.async_queue = prof_info.async; | |
241 | } | |
242 | ||
3e32ee19 NS |
243 | break; |
244 | } | |
245 | ||
246 | case GOMP_LAUNCH_WAIT: | |
247 | { | |
248 | unsigned num_waits = GOMP_LAUNCH_OP (tag); | |
19695f4d | 249 | goacc_wait (async, num_waits, &ap); |
3e32ee19 NS |
250 | break; |
251 | } | |
252 | ||
253 | default: | |
254 | gomp_fatal ("unrecognized offload code '%d'," | |
255 | " libgomp is too old", GOMP_LAUNCH_CODE (tag)); | |
256 | } | |
a091118d | 257 | } |
3e32ee19 | 258 | va_end (ap); |
41dbbb37 | 259 | |
41dbbb37 TS |
260 | if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) |
261 | { | |
262 | k.host_start = (uintptr_t) fn; | |
263 | k.host_end = k.host_start + 1; | |
a51df54e IV |
264 | gomp_mutex_lock (&acc_dev->lock); |
265 | tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); | |
266 | gomp_mutex_unlock (&acc_dev->lock); | |
41dbbb37 TS |
267 | |
268 | if (tgt_fn_key == NULL) | |
269 | gomp_fatal ("target function wasn't mapped"); | |
270 | ||
d93bdab5 | 271 | tgt_fn = (void (*)) tgt_fn_key->tgt_offset; |
41dbbb37 TS |
272 | } |
273 | else | |
274 | tgt_fn = (void (*)) fn; | |
275 | ||
5fae049d TS |
276 | acc_event_info enter_exit_data_event_info; |
277 | if (profiling_p) | |
278 | { | |
279 | prof_info.event_type = acc_ev_enter_data_start; | |
280 | enter_exit_data_event_info.other_event.event_type | |
281 | = prof_info.event_type; | |
282 | enter_exit_data_event_info.other_event.valid_bytes | |
283 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
284 | enter_exit_data_event_info.other_event.parent_construct | |
285 | = compute_construct_event_info.other_event.parent_construct; | |
286 | enter_exit_data_event_info.other_event.implicit = 1; | |
287 | enter_exit_data_event_info.other_event.tool_info = NULL; | |
288 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
289 | &api_info); | |
290 | } | |
291 | ||
1f4c5b9b | 292 | goacc_aq aq = get_goacc_asyncqueue (async); |
41dbbb37 | 293 | |
f8332e52 TS |
294 | struct target_mem_desc *tgt |
295 | = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true, | |
296 | GOMP_MAP_VARS_TARGET); | |
297 | ||
5fae049d TS |
298 | if (profiling_p) |
299 | { | |
300 | prof_info.event_type = acc_ev_enter_data_end; | |
301 | enter_exit_data_event_info.other_event.event_type | |
302 | = prof_info.event_type; | |
303 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
304 | &api_info); | |
305 | } | |
275c736e | 306 | |
f8332e52 | 307 | void **devaddrs = (void **) tgt->tgt_start; |
1f4c5b9b | 308 | if (aq == NULL) |
5fae049d TS |
309 | acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, |
310 | tgt); | |
311 | else | |
312 | acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, | |
313 | dims, tgt, aq); | |
314 | ||
315 | if (profiling_p) | |
1f4c5b9b | 316 | { |
5fae049d TS |
317 | prof_info.event_type = acc_ev_exit_data_start; |
318 | enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
319 | enter_exit_data_event_info.other_event.tool_info = NULL; | |
320 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
321 | &api_info); | |
1f4c5b9b | 322 | } |
5fae049d | 323 | |
275c736e CLT |
324 | /* If running synchronously (aq == NULL), this will unmap immediately. */ |
325 | goacc_unmap_vars (tgt, true, aq); | |
5fae049d TS |
326 | |
327 | if (profiling_p) | |
829c6349 | 328 | { |
5fae049d TS |
329 | prof_info.event_type = acc_ev_exit_data_end; |
330 | enter_exit_data_event_info.other_event.event_type = prof_info.event_type; | |
331 | goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, | |
332 | &api_info); | |
333 | } | |
334 | ||
335 | out_prof: | |
336 | if (profiling_p) | |
337 | { | |
338 | prof_info.event_type = acc_ev_compute_construct_end; | |
339 | compute_construct_event_info.other_event.event_type | |
340 | = prof_info.event_type; | |
341 | goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, | |
342 | &api_info); | |
343 | ||
344 | thr->prof_info = NULL; | |
345 | thr->api_info = NULL; | |
829c6349 | 346 | } |
41dbbb37 TS |
347 | } |
348 | ||
2bbbfa4e | 349 | /* Legacy entry point (GCC 5). Only provide host fallback execution. */ |
3e32ee19 NS |
350 | |
351 | void | |
59d5960c | 352 | GOACC_parallel (int flags_m, void (*fn) (void *), |
3e32ee19 NS |
353 | size_t mapnum, void **hostaddrs, size_t *sizes, |
354 | unsigned short *kinds, | |
355 | int num_gangs, int num_workers, int vector_length, | |
356 | int async, int num_waits, ...) | |
357 | { | |
358 | goacc_save_and_set_bind (acc_device_host); | |
359 | fn (hostaddrs); | |
360 | goacc_restore_bind (); | |
361 | } | |
362 | ||
41dbbb37 | 363 | void |
59d5960c | 364 | GOACC_data_start (int flags_m, size_t mapnum, |
41dbbb37 TS |
365 | void **hostaddrs, size_t *sizes, unsigned short *kinds) |
366 | { | |
59d5960c TS |
367 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
368 | ||
41dbbb37 TS |
369 | struct target_mem_desc *tgt; |
370 | ||
01c0b3b0 KT |
371 | #ifdef HAVE_INTTYPES_H |
372 | gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", | |
373 | __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); | |
374 | #else | |
375 | gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", | |
376 | __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); | |
377 | #endif | |
41dbbb37 | 378 | |
d93bdab5 | 379 | goacc_lazy_initialize (); |
41dbbb37 TS |
380 | |
381 | struct goacc_thread *thr = goacc_thread (); | |
382 | struct gomp_device_descr *acc_dev = thr->dev; | |
383 | ||
5fae049d TS |
384 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); |
385 | ||
386 | acc_prof_info prof_info; | |
387 | if (profiling_p) | |
388 | { | |
389 | thr->prof_info = &prof_info; | |
390 | ||
391 | prof_info.event_type = acc_ev_enter_data_start; | |
392 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
393 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
394 | prof_info.device_type = acc_device_type (acc_dev->type); | |
395 | prof_info.device_number = acc_dev->target_id; | |
396 | prof_info.thread_id = -1; | |
397 | prof_info.async = acc_async_sync; /* Always synchronous. */ | |
398 | prof_info.async_queue = prof_info.async; | |
399 | prof_info.src_file = NULL; | |
400 | prof_info.func_name = NULL; | |
401 | prof_info.line_no = -1; | |
402 | prof_info.end_line_no = -1; | |
403 | prof_info.func_line_no = -1; | |
404 | prof_info.func_end_line_no = -1; | |
405 | } | |
406 | acc_event_info enter_data_event_info; | |
407 | if (profiling_p) | |
408 | { | |
409 | enter_data_event_info.other_event.event_type | |
410 | = prof_info.event_type; | |
411 | enter_data_event_info.other_event.valid_bytes | |
412 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
413 | enter_data_event_info.other_event.parent_construct = acc_construct_data; | |
414 | for (int i = 0; i < mapnum; ++i) | |
d5c23c6c TB |
415 | if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR |
416 | || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) | |
5fae049d TS |
417 | { |
418 | /* If there is one such data mapping kind, then this is actually an | |
419 | OpenACC 'host_data' construct. (GCC maps the OpenACC | |
420 | 'host_data' construct to the OpenACC 'data' construct.) Apart | |
421 | from artificial test cases (such as an OpenACC 'host_data' | |
422 | construct's (implicit) device initialization when there hasn't | |
423 | been any device data be set up before...), there can't really | |
424 | any meaningful events be generated from OpenACC 'host_data' | |
425 | constructs, though. */ | |
426 | enter_data_event_info.other_event.parent_construct | |
427 | = acc_construct_host_data; | |
428 | break; | |
429 | } | |
430 | enter_data_event_info.other_event.implicit = 0; | |
431 | enter_data_event_info.other_event.tool_info = NULL; | |
432 | } | |
433 | acc_api_info api_info; | |
434 | if (profiling_p) | |
435 | { | |
436 | thr->api_info = &api_info; | |
437 | ||
438 | api_info.device_api = acc_device_api_none; | |
439 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
440 | api_info.device_type = prof_info.device_type; | |
441 | api_info.vendor = -1; | |
442 | api_info.device_handle = NULL; | |
443 | api_info.context_handle = NULL; | |
444 | api_info.async_handle = NULL; | |
445 | } | |
446 | ||
447 | if (profiling_p) | |
448 | goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); | |
449 | ||
41dbbb37 TS |
450 | /* Host fallback or 'do nothing'. */ |
451 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) | |
7b2ae64b TS |
452 | || (flags & GOACC_FLAG_HOST_FALLBACK) |
453 | || (flags & GOACC_FLAG_LOCAL_DEVICE)) | |
41dbbb37 | 454 | { |
5fae049d TS |
455 | prof_info.device_type = acc_device_host; |
456 | api_info.device_type = prof_info.device_type; | |
275c736e | 457 | tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); |
41dbbb37 TS |
458 | tgt->prev = thr->mapped_data; |
459 | thr->mapped_data = tgt; | |
460 | ||
5fae049d | 461 | goto out_prof; |
41dbbb37 TS |
462 | } |
463 | ||
464 | gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); | |
275c736e CLT |
465 | tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, |
466 | true, 0); | |
41dbbb37 TS |
467 | gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); |
468 | tgt->prev = thr->mapped_data; | |
469 | thr->mapped_data = tgt; | |
5fae049d TS |
470 | |
471 | out_prof: | |
472 | if (profiling_p) | |
473 | { | |
474 | prof_info.event_type = acc_ev_enter_data_end; | |
475 | enter_data_event_info.other_event.event_type = prof_info.event_type; | |
476 | goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); | |
477 | ||
478 | thr->prof_info = NULL; | |
479 | thr->api_info = NULL; | |
480 | } | |
41dbbb37 TS |
481 | } |
482 | ||
483 | void | |
484 | GOACC_data_end (void) | |
485 | { | |
486 | struct goacc_thread *thr = goacc_thread (); | |
5fae049d | 487 | struct gomp_device_descr *acc_dev = thr->dev; |
41dbbb37 TS |
488 | struct target_mem_desc *tgt = thr->mapped_data; |
489 | ||
5fae049d TS |
490 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); |
491 | ||
492 | acc_prof_info prof_info; | |
493 | if (profiling_p) | |
494 | { | |
495 | thr->prof_info = &prof_info; | |
496 | ||
497 | prof_info.event_type = acc_ev_exit_data_start; | |
498 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
499 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
500 | prof_info.device_type = acc_device_type (acc_dev->type); | |
501 | prof_info.device_number = acc_dev->target_id; | |
502 | prof_info.thread_id = -1; | |
503 | prof_info.async = acc_async_sync; /* Always synchronous. */ | |
504 | prof_info.async_queue = prof_info.async; | |
505 | prof_info.src_file = NULL; | |
506 | prof_info.func_name = NULL; | |
507 | prof_info.line_no = -1; | |
508 | prof_info.end_line_no = -1; | |
509 | prof_info.func_line_no = -1; | |
510 | prof_info.func_end_line_no = -1; | |
511 | } | |
512 | acc_event_info exit_data_event_info; | |
513 | if (profiling_p) | |
514 | { | |
515 | exit_data_event_info.other_event.event_type | |
516 | = prof_info.event_type; | |
517 | exit_data_event_info.other_event.valid_bytes | |
518 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
519 | exit_data_event_info.other_event.parent_construct = acc_construct_data; | |
520 | exit_data_event_info.other_event.implicit = 0; | |
521 | exit_data_event_info.other_event.tool_info = NULL; | |
522 | } | |
523 | acc_api_info api_info; | |
524 | if (profiling_p) | |
525 | { | |
526 | thr->api_info = &api_info; | |
527 | ||
528 | api_info.device_api = acc_device_api_none; | |
529 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
530 | api_info.device_type = prof_info.device_type; | |
531 | api_info.vendor = -1; | |
532 | api_info.device_handle = NULL; | |
533 | api_info.context_handle = NULL; | |
534 | api_info.async_handle = NULL; | |
535 | } | |
536 | ||
537 | if (profiling_p) | |
538 | goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); | |
539 | ||
41dbbb37 TS |
540 | gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); |
541 | thr->mapped_data = tgt->prev; | |
275c736e | 542 | goacc_unmap_vars (tgt, true, NULL); |
41dbbb37 | 543 | gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); |
5fae049d TS |
544 | |
545 | if (profiling_p) | |
546 | { | |
547 | prof_info.event_type = acc_ev_exit_data_end; | |
548 | exit_data_event_info.other_event.event_type = prof_info.event_type; | |
549 | goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); | |
550 | ||
551 | thr->prof_info = NULL; | |
552 | thr->api_info = NULL; | |
553 | } | |
41dbbb37 TS |
554 | } |
555 | ||
41dbbb37 | 556 | void |
59d5960c | 557 | GOACC_update (int flags_m, size_t mapnum, |
41dbbb37 TS |
558 | void **hostaddrs, size_t *sizes, unsigned short *kinds, |
559 | int async, int num_waits, ...) | |
560 | { | |
59d5960c TS |
561 | int flags = GOACC_FLAGS_UNMARSHAL (flags_m); |
562 | ||
41dbbb37 TS |
563 | size_t i; |
564 | ||
d93bdab5 | 565 | goacc_lazy_initialize (); |
41dbbb37 TS |
566 | |
567 | struct goacc_thread *thr = goacc_thread (); | |
568 | struct gomp_device_descr *acc_dev = thr->dev; | |
569 | ||
5fae049d TS |
570 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); |
571 | ||
572 | acc_prof_info prof_info; | |
573 | if (profiling_p) | |
574 | { | |
575 | thr->prof_info = &prof_info; | |
576 | ||
577 | prof_info.event_type = acc_ev_update_start; | |
578 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
579 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
580 | prof_info.device_type = acc_device_type (acc_dev->type); | |
581 | prof_info.device_number = acc_dev->target_id; | |
582 | prof_info.thread_id = -1; | |
583 | prof_info.async = async; | |
584 | prof_info.async_queue = prof_info.async; | |
585 | prof_info.src_file = NULL; | |
586 | prof_info.func_name = NULL; | |
587 | prof_info.line_no = -1; | |
588 | prof_info.end_line_no = -1; | |
589 | prof_info.func_line_no = -1; | |
590 | prof_info.func_end_line_no = -1; | |
591 | } | |
592 | acc_event_info update_event_info; | |
593 | if (profiling_p) | |
594 | { | |
595 | update_event_info.other_event.event_type | |
596 | = prof_info.event_type; | |
597 | update_event_info.other_event.valid_bytes | |
598 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
599 | update_event_info.other_event.parent_construct = acc_construct_update; | |
600 | update_event_info.other_event.implicit = 0; | |
601 | update_event_info.other_event.tool_info = NULL; | |
602 | } | |
603 | acc_api_info api_info; | |
604 | if (profiling_p) | |
605 | { | |
606 | thr->api_info = &api_info; | |
607 | ||
608 | api_info.device_api = acc_device_api_none; | |
609 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
610 | api_info.device_type = prof_info.device_type; | |
611 | api_info.vendor = -1; | |
612 | api_info.device_handle = NULL; | |
613 | api_info.context_handle = NULL; | |
614 | api_info.async_handle = NULL; | |
615 | } | |
616 | ||
617 | if (profiling_p) | |
618 | goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); | |
619 | ||
41dbbb37 | 620 | if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) |
59d5960c | 621 | || (flags & GOACC_FLAG_HOST_FALLBACK)) |
5fae049d TS |
622 | { |
623 | prof_info.device_type = acc_device_host; | |
624 | api_info.device_type = prof_info.device_type; | |
625 | ||
626 | goto out_prof; | |
627 | } | |
41dbbb37 | 628 | |
a091118d | 629 | if (num_waits) |
41dbbb37 TS |
630 | { |
631 | va_list ap; | |
632 | ||
633 | va_start (ap, num_waits); | |
3e32ee19 | 634 | goacc_wait (async, num_waits, &ap); |
41dbbb37 TS |
635 | va_end (ap); |
636 | } | |
637 | ||
829c6349 | 638 | bool update_device = false; |
41dbbb37 TS |
639 | for (i = 0; i < mapnum; ++i) |
640 | { | |
641 | unsigned char kind = kinds[i] & 0xff; | |
642 | ||
643 | switch (kind) | |
644 | { | |
645 | case GOMP_MAP_POINTER: | |
646 | case GOMP_MAP_TO_PSET: | |
647 | break; | |
648 | ||
829c6349 CLT |
649 | case GOMP_MAP_ALWAYS_POINTER: |
650 | if (update_device) | |
651 | { | |
652 | /* Save the contents of the host pointer. */ | |
653 | void *dptr = acc_deviceptr (hostaddrs[i-1]); | |
654 | uintptr_t t = *(uintptr_t *) hostaddrs[i]; | |
655 | ||
656 | /* Update the contents of the host pointer to reflect | |
657 | the value of the allocated device memory in the | |
658 | previous pointer. */ | |
659 | *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; | |
1f4c5b9b CLT |
660 | /* TODO: verify that we really cannot use acc_update_device_async |
661 | here. */ | |
829c6349 CLT |
662 | acc_update_device (hostaddrs[i], sizeof (uintptr_t)); |
663 | ||
664 | /* Restore the host pointer. */ | |
665 | *(uintptr_t *) hostaddrs[i] = t; | |
666 | update_device = false; | |
667 | } | |
668 | break; | |
669 | ||
670 | case GOMP_MAP_TO: | |
671 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
672 | { | |
673 | update_device = false; | |
674 | break; | |
675 | } | |
676 | /* Fallthru */ | |
41dbbb37 | 677 | case GOMP_MAP_FORCE_TO: |
829c6349 | 678 | update_device = true; |
1f4c5b9b | 679 | acc_update_device_async (hostaddrs[i], sizes[i], async); |
41dbbb37 TS |
680 | break; |
681 | ||
829c6349 CLT |
682 | case GOMP_MAP_FROM: |
683 | if (!acc_is_present (hostaddrs[i], sizes[i])) | |
684 | { | |
685 | update_device = false; | |
686 | break; | |
687 | } | |
688 | /* Fallthru */ | |
41dbbb37 | 689 | case GOMP_MAP_FORCE_FROM: |
829c6349 | 690 | update_device = false; |
1f4c5b9b | 691 | acc_update_self_async (hostaddrs[i], sizes[i], async); |
41dbbb37 TS |
692 | break; |
693 | ||
694 | default: | |
695 | gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); | |
696 | break; | |
697 | } | |
698 | } | |
5fae049d TS |
699 | |
700 | out_prof: | |
701 | if (profiling_p) | |
702 | { | |
703 | prof_info.event_type = acc_ev_update_end; | |
704 | update_event_info.other_event.event_type = prof_info.event_type; | |
705 | goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); | |
706 | ||
707 | thr->prof_info = NULL; | |
708 | thr->api_info = NULL; | |
709 | } | |
41dbbb37 TS |
710 | } |
711 | ||
41dbbb37 | 712 | |
2bbbfa4e TS |
713 | /* Legacy entry point (GCC 5). */ |
714 | ||
41dbbb37 TS |
715 | int |
716 | GOACC_get_num_threads (void) | |
717 | { | |
718 | return 1; | |
719 | } | |
720 | ||
2bbbfa4e TS |
721 | /* Legacy entry point (GCC 5). */ |
722 | ||
41dbbb37 TS |
723 | int |
724 | GOACC_get_thread_num (void) | |
725 | { | |
726 | return 0; | |
727 | } |