]>
Commit | Line | Data |
---|---|---|
41dbbb37 TS |
1 | /* OpenACC Runtime Library Definitions. |
2 | ||
99dee823 | 3 | Copyright (C) 2013-2021 Free Software Foundation, Inc. |
41dbbb37 TS |
4 | |
5 | Contributed by Mentor Embedded. | |
1df3f842 | 6 | |
f1f3453e TS |
7 | This file is part of the GNU Offloading and Multi Processing Library |
8 | (libgomp). | |
1df3f842 JJ |
9 | |
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
d93bdab5 | 29 | #include <assert.h> |
1f4c5b9b | 30 | #include <string.h> |
41dbbb37 TS |
31 | #include "openacc.h" |
32 | #include "libgomp.h" | |
33 | #include "oacc-int.h" | |
34 | ||
1f4c5b9b CLT |
35 | static struct goacc_thread * |
36 | get_goacc_thread (void) | |
41dbbb37 | 37 | { |
d93bdab5 JB |
38 | struct goacc_thread *thr = goacc_thread (); |
39 | ||
40 | if (!thr || !thr->dev) | |
41 | gomp_fatal ("no device active"); | |
42 | ||
1f4c5b9b | 43 | return thr; |
41dbbb37 TS |
44 | } |
45 | ||
1f4c5b9b CLT |
46 | static int |
47 | validate_async_val (int async) | |
41dbbb37 | 48 | { |
edbd038a | 49 | if (!async_valid_p (async)) |
1f4c5b9b CLT |
50 | gomp_fatal ("invalid async-argument: %d", async); |
51 | ||
52 | if (async == acc_async_sync) | |
53 | return -1; | |
54 | ||
55 | if (async == acc_async_noval) | |
56 | return 0; | |
57 | ||
58 | if (async >= 0) | |
59 | /* TODO: we reserve 0 for acc_async_noval before we can clarify the | |
60 | semantics of "default_async". */ | |
61 | return 1 + async; | |
62 | else | |
63 | __builtin_unreachable (); | |
64 | } | |
65 | ||
66 | /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This | |
67 | might return NULL if no asyncqueue is to be used. Otherwise, if CREATE, | |
5fae049d TS |
68 | create the asyncqueue if it doesn't exist yet. |
69 | ||
70 | Unless CREATE, this will not generate any OpenACC Profiling Interface | |
71 | events. */ | |
1f4c5b9b CLT |
72 | |
73 | attribute_hidden struct goacc_asyncqueue * | |
74 | lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) | |
75 | { | |
76 | async = validate_async_val (async); | |
77 | if (async < 0) | |
78 | return NULL; | |
79 | ||
80 | struct goacc_asyncqueue *ret_aq = NULL; | |
81 | struct gomp_device_descr *dev = thr->dev; | |
82 | ||
83 | gomp_mutex_lock (&dev->openacc.async.lock); | |
41dbbb37 | 84 | |
1f4c5b9b CLT |
85 | if (!create |
86 | && (async >= dev->openacc.async.nasyncqueue | |
87 | || !dev->openacc.async.asyncqueue[async])) | |
88 | goto end; | |
89 | ||
90 | if (async >= dev->openacc.async.nasyncqueue) | |
91 | { | |
92 | int diff = async + 1 - dev->openacc.async.nasyncqueue; | |
93 | dev->openacc.async.asyncqueue | |
94 | = gomp_realloc (dev->openacc.async.asyncqueue, | |
95 | sizeof (goacc_aq) * (async + 1)); | |
96 | memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, | |
97 | 0, sizeof (goacc_aq) * diff); | |
98 | dev->openacc.async.nasyncqueue = async + 1; | |
99 | } | |
100 | ||
101 | if (!dev->openacc.async.asyncqueue[async]) | |
102 | { | |
d2903ce0 AS |
103 | dev->openacc.async.asyncqueue[async] |
104 | = dev->openacc.async.construct_func (dev->target_id); | |
1f4c5b9b CLT |
105 | |
106 | if (!dev->openacc.async.asyncqueue[async]) | |
107 | { | |
108 | gomp_mutex_unlock (&dev->openacc.async.lock); | |
109 | gomp_fatal ("async %d creation failed", async); | |
110 | } | |
111 | ||
112 | /* Link new async queue into active list. */ | |
113 | goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); | |
114 | n->aq = dev->openacc.async.asyncqueue[async]; | |
115 | n->next = dev->openacc.async.active; | |
116 | dev->openacc.async.active = n; | |
117 | } | |
118 | ||
119 | ret_aq = dev->openacc.async.asyncqueue[async]; | |
120 | ||
121 | end: | |
122 | gomp_mutex_unlock (&dev->openacc.async.lock); | |
123 | return ret_aq; | |
124 | } | |
125 | ||
126 | /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This | |
127 | might return NULL if no asyncqueue is to be used. Otherwise, create the | |
128 | asyncqueue if it doesn't exist yet. */ | |
129 | ||
130 | attribute_hidden struct goacc_asyncqueue * | |
131 | get_goacc_asyncqueue (int async) | |
132 | { | |
133 | struct goacc_thread *thr = get_goacc_thread (); | |
134 | return lookup_goacc_asyncqueue (thr, true, async); | |
135 | } | |
136 | ||
137 | int | |
138 | acc_async_test (int async) | |
139 | { | |
d93bdab5 JB |
140 | struct goacc_thread *thr = goacc_thread (); |
141 | ||
142 | if (!thr || !thr->dev) | |
143 | gomp_fatal ("no device active"); | |
144 | ||
1f4c5b9b CLT |
145 | goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); |
146 | if (!aq) | |
147 | return 1; | |
5fae049d TS |
148 | |
149 | acc_prof_info prof_info; | |
150 | acc_api_info api_info; | |
151 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
152 | if (profiling_p) | |
153 | { | |
154 | prof_info.async = async; | |
155 | prof_info.async_queue = prof_info.async; | |
156 | } | |
157 | ||
158 | int res = thr->dev->openacc.async.test_func (aq); | |
159 | ||
160 | if (profiling_p) | |
161 | { | |
162 | thr->prof_info = NULL; | |
163 | thr->api_info = NULL; | |
164 | } | |
165 | ||
166 | return res; | |
1f4c5b9b CLT |
167 | } |
168 | ||
169 | int | |
170 | acc_async_test_all (void) | |
171 | { | |
172 | struct goacc_thread *thr = get_goacc_thread (); | |
173 | ||
5fae049d TS |
174 | acc_prof_info prof_info; |
175 | acc_api_info api_info; | |
176 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
177 | ||
1f4c5b9b CLT |
178 | int ret = 1; |
179 | gomp_mutex_lock (&thr->dev->openacc.async.lock); | |
180 | for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) | |
181 | if (!thr->dev->openacc.async.test_func (l->aq)) | |
182 | { | |
183 | ret = 0; | |
184 | break; | |
185 | } | |
186 | gomp_mutex_unlock (&thr->dev->openacc.async.lock); | |
5fae049d TS |
187 | |
188 | if (profiling_p) | |
189 | { | |
190 | thr->prof_info = NULL; | |
191 | thr->api_info = NULL; | |
192 | } | |
193 | ||
1f4c5b9b CLT |
194 | return ret; |
195 | } | |
196 | ||
197 | void | |
198 | acc_wait (int async) | |
199 | { | |
200 | struct goacc_thread *thr = get_goacc_thread (); | |
201 | ||
202 | goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); | |
5fae049d TS |
203 | if (!aq) |
204 | return; | |
205 | ||
206 | acc_prof_info prof_info; | |
207 | acc_api_info api_info; | |
208 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
209 | if (profiling_p) | |
210 | { | |
211 | prof_info.async = async; | |
212 | prof_info.async_queue = prof_info.async; | |
213 | } | |
214 | ||
215 | if (!thr->dev->openacc.async.synchronize_func (aq)) | |
1f4c5b9b | 216 | gomp_fatal ("wait on %d failed", async); |
5fae049d TS |
217 | |
218 | if (profiling_p) | |
219 | { | |
220 | thr->prof_info = NULL; | |
221 | thr->api_info = NULL; | |
222 | } | |
41dbbb37 TS |
223 | } |
224 | ||
7ce64403 TS |
225 | /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ |
226 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
227 | strong_alias (acc_wait, acc_async_wait) | |
228 | #else | |
229 | void | |
230 | acc_async_wait (int async) | |
231 | { | |
232 | acc_wait (async); | |
233 | } | |
234 | #endif | |
235 | ||
41dbbb37 TS |
236 | void |
237 | acc_wait_async (int async1, int async2) | |
238 | { | |
1f4c5b9b | 239 | struct goacc_thread *thr = get_goacc_thread (); |
d93bdab5 | 240 | |
1f4c5b9b CLT |
241 | goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); |
242 | /* TODO: Is this also correct for acc_async_sync, assuming that in this case, | |
243 | we'll always be synchronous anyways? */ | |
244 | if (!aq1) | |
245 | return; | |
246 | ||
5fae049d TS |
247 | acc_prof_info prof_info; |
248 | acc_api_info api_info; | |
249 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
250 | if (profiling_p) | |
251 | { | |
252 | prof_info.async = async2; | |
253 | prof_info.async_queue = prof_info.async; | |
254 | } | |
255 | ||
1f4c5b9b CLT |
256 | goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); |
257 | /* An async queue is always synchronized with itself. */ | |
258 | if (aq1 == aq2) | |
5fae049d | 259 | goto out_prof; |
d93bdab5 | 260 | |
1f4c5b9b CLT |
261 | if (aq2) |
262 | { | |
263 | if (!thr->dev->openacc.async.serialize_func (aq1, aq2)) | |
264 | gomp_fatal ("ordering of async ids %d and %d failed", async1, async2); | |
265 | } | |
266 | else | |
267 | { | |
268 | /* TODO: Local thread synchronization. | |
269 | Necessary for the "async2 == acc_async_sync" case, or can just skip? */ | |
270 | if (!thr->dev->openacc.async.synchronize_func (aq1)) | |
271 | gomp_fatal ("wait on %d failed", async1); | |
272 | } | |
5fae049d TS |
273 | |
274 | out_prof: | |
275 | if (profiling_p) | |
276 | { | |
277 | thr->prof_info = NULL; | |
278 | thr->api_info = NULL; | |
279 | } | |
41dbbb37 TS |
280 | } |
281 | ||
282 | void | |
283 | acc_wait_all (void) | |
1df3f842 | 284 | { |
5fae049d TS |
285 | struct goacc_thread *thr = goacc_thread (); |
286 | ||
287 | acc_prof_info prof_info; | |
288 | acc_api_info api_info; | |
289 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
d93bdab5 | 290 | |
1f4c5b9b | 291 | bool ret = true; |
5fae049d TS |
292 | gomp_mutex_lock (&thr->dev->openacc.async.lock); |
293 | for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) | |
294 | ret &= thr->dev->openacc.async.synchronize_func (l->aq); | |
295 | gomp_mutex_unlock (&thr->dev->openacc.async.lock); | |
296 | ||
297 | if (profiling_p) | |
298 | { | |
299 | thr->prof_info = NULL; | |
300 | thr->api_info = NULL; | |
301 | } | |
d93bdab5 | 302 | |
1f4c5b9b CLT |
303 | if (!ret) |
304 | gomp_fatal ("wait all failed"); | |
41dbbb37 | 305 | } |
1df3f842 | 306 | |
7ce64403 TS |
307 | /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ |
308 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
309 | strong_alias (acc_wait_all, acc_async_wait_all) | |
310 | #else | |
311 | void | |
312 | acc_async_wait_all (void) | |
313 | { | |
314 | acc_wait_all (); | |
315 | } | |
316 | #endif | |
317 | ||
41dbbb37 TS |
318 | void |
319 | acc_wait_all_async (int async) | |
1df3f842 | 320 | { |
1f4c5b9b | 321 | struct goacc_thread *thr = get_goacc_thread (); |
1df3f842 | 322 | |
5fae049d TS |
323 | acc_prof_info prof_info; |
324 | acc_api_info api_info; | |
325 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
326 | if (profiling_p) | |
327 | { | |
328 | prof_info.async = async; | |
329 | prof_info.async_queue = prof_info.async; | |
330 | } | |
331 | ||
1f4c5b9b | 332 | goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); |
d93bdab5 | 333 | |
1f4c5b9b CLT |
334 | bool ret = true; |
335 | gomp_mutex_lock (&thr->dev->openacc.async.lock); | |
336 | for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) | |
337 | { | |
338 | if (waiting_queue) | |
339 | ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); | |
340 | else | |
341 | /* TODO: Local thread synchronization. | |
342 | Necessary for the "async2 == acc_async_sync" case, or can just skip? */ | |
343 | ret &= thr->dev->openacc.async.synchronize_func (l->aq); | |
344 | } | |
345 | gomp_mutex_unlock (&thr->dev->openacc.async.lock); | |
346 | ||
5fae049d TS |
347 | if (profiling_p) |
348 | { | |
349 | thr->prof_info = NULL; | |
350 | thr->api_info = NULL; | |
351 | } | |
352 | ||
1f4c5b9b CLT |
353 | if (!ret) |
354 | gomp_fatal ("wait all async(%d) failed", async); | |
355 | } | |
356 | ||
c5578b56 TS |
357 | void |
358 | GOACC_wait (int async, int num_waits, ...) | |
359 | { | |
360 | goacc_lazy_initialize (); | |
361 | ||
362 | struct goacc_thread *thr = goacc_thread (); | |
363 | ||
364 | /* No nesting. */ | |
365 | assert (thr->prof_info == NULL); | |
366 | assert (thr->api_info == NULL); | |
367 | acc_prof_info prof_info; | |
368 | acc_api_info api_info; | |
369 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
370 | if (profiling_p) | |
371 | { | |
372 | prof_info.async = async; | |
373 | prof_info.async_queue = prof_info.async; | |
374 | } | |
375 | ||
376 | if (num_waits) | |
377 | { | |
378 | va_list ap; | |
379 | ||
380 | va_start (ap, num_waits); | |
381 | goacc_wait (async, num_waits, &ap); | |
382 | va_end (ap); | |
383 | } | |
384 | else if (async == acc_async_sync) | |
385 | acc_wait_all (); | |
386 | else | |
387 | acc_wait_all_async (async); | |
388 | ||
389 | if (profiling_p) | |
390 | { | |
391 | thr->prof_info = NULL; | |
392 | thr->api_info = NULL; | |
393 | } | |
394 | } | |
395 | ||
396 | attribute_hidden void | |
397 | goacc_wait (int async, int num_waits, va_list *ap) | |
398 | { | |
399 | while (num_waits--) | |
400 | { | |
401 | int qid = va_arg (*ap, int); | |
402 | ||
403 | /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */ | |
404 | if (qid == acc_async_noval) | |
405 | { | |
406 | if (async == acc_async_sync) | |
407 | acc_wait_all (); | |
408 | else | |
409 | acc_wait_all_async (async); | |
410 | break; | |
411 | } | |
412 | ||
413 | if (acc_async_test (qid)) | |
414 | continue; | |
415 | ||
416 | if (async == acc_async_sync) | |
417 | acc_wait (qid); | |
418 | else if (qid == async) | |
419 | /* If we're waiting on the same asynchronous queue as we're | |
420 | launching on, the queue itself will order work as | |
421 | required, so there's no need to wait explicitly. */ | |
422 | ; | |
423 | else | |
424 | acc_wait_async (qid, async); | |
425 | } | |
426 | } | |
427 | ||
1f4c5b9b CLT |
428 | attribute_hidden void |
429 | goacc_async_free (struct gomp_device_descr *devicep, | |
430 | struct goacc_asyncqueue *aq, void *ptr) | |
431 | { | |
432 | if (!aq) | |
433 | free (ptr); | |
434 | else | |
435 | devicep->openacc.async.queue_callback_func (aq, free, ptr); | |
436 | } | |
437 | ||
438 | /* This function initializes the asyncqueues for the device specified by | |
439 | DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on | |
440 | return. */ | |
441 | ||
442 | attribute_hidden void | |
443 | goacc_init_asyncqueues (struct gomp_device_descr *devicep) | |
444 | { | |
445 | devicep->openacc.async.nasyncqueue = 0; | |
446 | devicep->openacc.async.asyncqueue = NULL; | |
447 | devicep->openacc.async.active = NULL; | |
448 | gomp_mutex_init (&devicep->openacc.async.lock); | |
449 | } | |
d93bdab5 | 450 | |
1f4c5b9b CLT |
451 | /* This function finalizes the asyncqueues for the device specified by DEVICEP. |
452 | TODO DEVICEP must be locked on entry, and remains locked on return. */ | |
453 | ||
454 | attribute_hidden bool | |
455 | goacc_fini_asyncqueues (struct gomp_device_descr *devicep) | |
456 | { | |
457 | bool ret = true; | |
458 | gomp_mutex_lock (&devicep->openacc.async.lock); | |
459 | if (devicep->openacc.async.nasyncqueue > 0) | |
460 | { | |
461 | goacc_aq_list next; | |
462 | for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) | |
463 | { | |
464 | ret &= devicep->openacc.async.destruct_func (l->aq); | |
465 | next = l->next; | |
466 | free (l); | |
467 | } | |
468 | free (devicep->openacc.async.asyncqueue); | |
469 | devicep->openacc.async.nasyncqueue = 0; | |
470 | devicep->openacc.async.asyncqueue = NULL; | |
471 | devicep->openacc.async.active = NULL; | |
472 | } | |
473 | gomp_mutex_unlock (&devicep->openacc.async.lock); | |
474 | gomp_mutex_destroy (&devicep->openacc.async.lock); | |
475 | return ret; | |
41dbbb37 | 476 | } |