]>
Commit | Line | Data |
---|---|---|
41dbbb37 TS |
1 | /* OpenACC Runtime initialization routines |
2 | ||
83ffe9cd | 3 | Copyright (C) 2013-2023 Free Software Foundation, Inc. |
41dbbb37 TS |
4 | |
5 | Contributed by Mentor Embedded. | |
6 | ||
7 | This file is part of the GNU Offloading and Multi Processing Library | |
8 | (libgomp). | |
9 | ||
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
29 | #include "libgomp.h" | |
30 | #include "oacc-int.h" | |
31 | #include "openacc.h" | |
32 | #include <assert.h> | |
33 | #include <stdlib.h> | |
34 | #include <strings.h> | |
35 | #include <stdbool.h> | |
36 | #include <string.h> | |
37 | ||
d2463960 JB |
38 | /* This lock is used to protect access to cached_base_dev, dispatchers and |
39 | the (abstract) initialisation state of attached offloading devices. */ | |
40 | ||
41dbbb37 TS |
41 | static gomp_mutex_t acc_device_lock; |
42 | ||
b52643ab KCY |
43 | static gomp_mutex_t acc_init_state_lock; |
44 | static enum { uninitialized, initializing, initialized } acc_init_state | |
45 | = uninitialized; | |
46 | static pthread_t acc_init_thread; | |
47 | ||
d93bdab5 JB |
48 | /* A cached version of the dispatcher for the global "current" accelerator type, |
49 | e.g. used as the default when creating new host threads. This is the | |
50 | device-type equivalent of goacc_device_num (which specifies which device to | |
51 | use out of potentially several of the same type). If there are several | |
52 | devices of a given type, this points at the first one. */ | |
53 | ||
54 | static struct gomp_device_descr *cached_base_dev = NULL; | |
41dbbb37 TS |
55 | |
56 | #if defined HAVE_TLS || defined USE_EMUTLS | |
57 | __thread struct goacc_thread *goacc_tls_data; | |
58 | #else | |
59 | pthread_key_t goacc_tls_key; | |
60 | #endif | |
61 | static pthread_key_t goacc_cleanup_key; | |
62 | ||
41dbbb37 TS |
63 | static struct goacc_thread *goacc_threads; |
64 | static gomp_mutex_t goacc_thread_lock; | |
65 | ||
66 | /* An array of dispatchers for device types, indexed by the type. This array | |
67 | only references "base" devices, and other instances of the same type are | |
68 | found by simply indexing from each such device (which are stored linearly, | |
69 | grouped by device in target.c:devices). */ | |
70 | static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; | |
71 | ||
72 | attribute_hidden void | |
73 | goacc_register (struct gomp_device_descr *disp) | |
74 | { | |
75 | /* Only register the 0th device here. */ | |
76 | if (disp->target_id != 0) | |
77 | return; | |
78 | ||
79 | gomp_mutex_lock (&acc_device_lock); | |
80 | ||
81 | assert (acc_device_type (disp->type) != acc_device_none | |
82 | && acc_device_type (disp->type) != acc_device_default | |
83 | && acc_device_type (disp->type) != acc_device_not_host); | |
84 | assert (!dispatchers[disp->type]); | |
85 | dispatchers[disp->type] = disp; | |
86 | ||
87 | gomp_mutex_unlock (&acc_device_lock); | |
88 | } | |
89 | ||
d8f0024b FH |
90 | static bool |
91 | known_device_type_p (acc_device_t d) | |
92 | { | |
93 | return d >= 0 && d < _ACC_device_hwm; | |
94 | } | |
95 | ||
96 | static void | |
97 | unknown_device_type_error (acc_device_t invalid_type) | |
98 | { | |
99 | gomp_fatal ("unknown device type %u", invalid_type); | |
100 | } | |
101 | ||
41dbbb37 TS |
102 | /* OpenACC names some things a little differently. */ |
103 | ||
104 | static const char * | |
105 | get_openacc_name (const char *name) | |
106 | { | |
4912a04f TS |
107 | if (strcmp (name, "gcn") == 0) |
108 | return "radeon"; | |
109 | else if (strcmp (name, "nvptx") == 0) | |
41dbbb37 TS |
110 | return "nvidia"; |
111 | else | |
112 | return name; | |
113 | } | |
114 | ||
d93bdab5 JB |
115 | static const char * |
116 | name_of_acc_device_t (enum acc_device_t type) | |
117 | { | |
118 | switch (type) | |
119 | { | |
120 | case acc_device_none: return "none"; | |
121 | case acc_device_default: return "default"; | |
122 | case acc_device_host: return "host"; | |
d93bdab5 JB |
123 | case acc_device_not_host: return "not_host"; |
124 | case acc_device_nvidia: return "nvidia"; | |
989a5fb3 | 125 | case acc_device_radeon: return "radeon"; |
d8f0024b | 126 | default: unknown_device_type_error (type); |
d93bdab5 | 127 | } |
d8f0024b | 128 | __builtin_unreachable (); |
d93bdab5 JB |
129 | } |
130 | ||
9fb5fd44 JB |
131 | /* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR |
132 | is true, this function raises an error if there are no devices of type D, | |
133 | otherwise it returns NULL in that case. */ | |
d2463960 | 134 | |
41dbbb37 | 135 | static struct gomp_device_descr * |
9fb5fd44 | 136 | resolve_device (acc_device_t d, bool fail_is_error) |
41dbbb37 TS |
137 | { |
138 | acc_device_t d_arg = d; | |
139 | ||
140 | switch (d) | |
141 | { | |
142 | case acc_device_default: | |
143 | { | |
144 | if (goacc_device_type) | |
145 | { | |
146 | /* Lookup the named device. */ | |
d8f0024b | 147 | while (known_device_type_p (++d)) |
41dbbb37 TS |
148 | if (dispatchers[d] |
149 | && !strcasecmp (goacc_device_type, | |
150 | get_openacc_name (dispatchers[d]->name)) | |
683f1184 | 151 | && dispatchers[d]->get_num_devices_func (0) > 0) |
41dbbb37 TS |
152 | goto found; |
153 | ||
9fb5fd44 JB |
154 | if (fail_is_error) |
155 | { | |
156 | gomp_mutex_unlock (&acc_device_lock); | |
157 | gomp_fatal ("device type %s not supported", goacc_device_type); | |
158 | } | |
159 | else | |
160 | return NULL; | |
41dbbb37 TS |
161 | } |
162 | ||
163 | /* No default device specified, so start scanning for any non-host | |
164 | device that is available. */ | |
165 | d = acc_device_not_host; | |
166 | } | |
167 | /* FALLTHROUGH */ | |
168 | ||
169 | case acc_device_not_host: | |
170 | /* Find the first available device after acc_device_not_host. */ | |
d8f0024b | 171 | while (known_device_type_p (++d)) |
683f1184 | 172 | if (dispatchers[d] && dispatchers[d]->get_num_devices_func (0) > 0) |
41dbbb37 TS |
173 | goto found; |
174 | if (d_arg == acc_device_default) | |
175 | { | |
176 | d = acc_device_host; | |
177 | goto found; | |
178 | } | |
9fb5fd44 JB |
179 | if (fail_is_error) |
180 | { | |
181 | gomp_mutex_unlock (&acc_device_lock); | |
182 | gomp_fatal ("no device found"); | |
183 | } | |
184 | else | |
185 | return NULL; | |
41dbbb37 TS |
186 | break; |
187 | ||
188 | case acc_device_host: | |
189 | break; | |
190 | ||
191 | default: | |
d8f0024b | 192 | if (!known_device_type_p (d)) |
9fb5fd44 JB |
193 | { |
194 | if (fail_is_error) | |
195 | goto unsupported_device; | |
196 | else | |
197 | return NULL; | |
198 | } | |
41dbbb37 TS |
199 | break; |
200 | } | |
201 | found: | |
202 | ||
203 | assert (d != acc_device_none | |
204 | && d != acc_device_default | |
205 | && d != acc_device_not_host); | |
206 | ||
9fb5fd44 JB |
207 | if (dispatchers[d] == NULL && fail_is_error) |
208 | { | |
209 | unsupported_device: | |
210 | gomp_mutex_unlock (&acc_device_lock); | |
211 | gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); | |
212 | } | |
213 | ||
41dbbb37 TS |
214 | return dispatchers[d]; |
215 | } | |
216 | ||
9fb5fd44 JB |
217 | /* Emit a suitable error if no device of a particular type is available, or |
218 | the given device number is out-of-range. */ | |
219 | static void | |
220 | acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) | |
221 | { | |
222 | if (ndevs == 0) | |
223 | gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); | |
224 | else | |
225 | gomp_fatal ("device %u out of range", ord); | |
226 | } | |
227 | ||
41dbbb37 TS |
228 | /* This is called when plugins have been initialized, and serves to call |
229 | (indirectly) the target's device_init hook. Calling multiple times without | |
9fb5fd44 | 230 | an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be |
d2463960 | 231 | held before calling this function. */ |
41dbbb37 TS |
232 | |
233 | static struct gomp_device_descr * | |
5fae049d | 234 | acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit) |
41dbbb37 | 235 | { |
b52643ab KCY |
236 | gomp_mutex_lock (&acc_init_state_lock); |
237 | acc_init_state = initializing; | |
238 | acc_init_thread = pthread_self (); | |
239 | gomp_mutex_unlock (&acc_init_state_lock); | |
240 | ||
5fae049d TS |
241 | bool check_not_nested_p; |
242 | if (implicit) | |
243 | { | |
244 | /* In the implicit case, there should (TODO: must?) already be something | |
245 | have been set up for an outer construct. */ | |
246 | check_not_nested_p = false; | |
247 | } | |
248 | else | |
249 | { | |
250 | check_not_nested_p = true; | |
251 | /* TODO: should we set 'thr->prof_info' etc. in this case ('acc_init')? | |
252 | The problem is, that we don't have 'thr' yet? (So, | |
253 | 'check_not_nested_p = true' also is pointless actually.) */ | |
254 | } | |
255 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (check_not_nested_p); | |
256 | ||
257 | acc_prof_info prof_info; | |
258 | if (profiling_p) | |
259 | { | |
260 | prof_info.event_type = acc_ev_device_init_start; | |
261 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
262 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
263 | prof_info.device_type = d; | |
264 | prof_info.device_number = goacc_device_num; | |
265 | prof_info.thread_id = -1; | |
266 | prof_info.async = acc_async_sync; | |
267 | prof_info.async_queue = prof_info.async; | |
268 | prof_info.src_file = NULL; | |
269 | prof_info.func_name = NULL; | |
270 | prof_info.line_no = -1; | |
271 | prof_info.end_line_no = -1; | |
272 | prof_info.func_line_no = -1; | |
273 | prof_info.func_end_line_no = -1; | |
274 | } | |
275 | acc_event_info device_init_event_info; | |
276 | if (profiling_p) | |
277 | { | |
278 | device_init_event_info.other_event.event_type = prof_info.event_type; | |
279 | device_init_event_info.other_event.valid_bytes | |
280 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
281 | device_init_event_info.other_event.parent_construct = parent_construct; | |
282 | device_init_event_info.other_event.implicit = implicit; | |
283 | device_init_event_info.other_event.tool_info = NULL; | |
284 | } | |
285 | acc_api_info api_info; | |
286 | if (profiling_p) | |
287 | { | |
288 | api_info.device_api = acc_device_api_none; | |
289 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
290 | api_info.device_type = prof_info.device_type; | |
291 | api_info.vendor = -1; | |
292 | api_info.device_handle = NULL; | |
293 | api_info.context_handle = NULL; | |
294 | api_info.async_handle = NULL; | |
295 | } | |
296 | ||
297 | if (profiling_p) | |
298 | goacc_profiling_dispatch (&prof_info, &device_init_event_info, &api_info); | |
299 | ||
d93bdab5 JB |
300 | struct gomp_device_descr *base_dev, *acc_dev; |
301 | int ndevs; | |
41dbbb37 | 302 | |
9fb5fd44 | 303 | base_dev = resolve_device (d, true); |
d93bdab5 | 304 | |
683f1184 | 305 | ndevs = base_dev->get_num_devices_func (0); |
d93bdab5 | 306 | |
9fb5fd44 JB |
307 | if (ndevs <= 0 || goacc_device_num >= ndevs) |
308 | acc_dev_num_out_of_range (d, goacc_device_num, ndevs); | |
41dbbb37 | 309 | |
d93bdab5 | 310 | acc_dev = &base_dev[goacc_device_num]; |
41dbbb37 | 311 | |
d2463960 | 312 | gomp_mutex_lock (&acc_dev->lock); |
d84ffc0a | 313 | if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
d2463960 JB |
314 | { |
315 | gomp_mutex_unlock (&acc_dev->lock); | |
316 | gomp_fatal ("device already active"); | |
317 | } | |
41dbbb37 | 318 | |
41dbbb37 | 319 | gomp_init_device (acc_dev); |
d2463960 | 320 | gomp_mutex_unlock (&acc_dev->lock); |
41dbbb37 | 321 | |
5fae049d TS |
322 | if (profiling_p) |
323 | { | |
324 | prof_info.event_type = acc_ev_device_init_end; | |
325 | device_init_event_info.other_event.event_type = prof_info.event_type; | |
326 | goacc_profiling_dispatch (&prof_info, &device_init_event_info, | |
327 | &api_info); | |
328 | } | |
329 | ||
b52643ab KCY |
330 | /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a |
331 | nested 'acc_get_device_type' called from a profiling callback still sees | |
332 | 'initializing', so that we don't deadlock when it then again tries to lock | |
333 | 'goacc_prof_lock'. See also the discussion in 'acc_get_device_type'. */ | |
334 | gomp_mutex_lock (&acc_init_state_lock); | |
335 | acc_init_state = initialized; | |
336 | gomp_mutex_unlock (&acc_init_state_lock); | |
337 | ||
d93bdab5 JB |
338 | return base_dev; |
339 | } | |
340 | ||
9fb5fd44 | 341 | /* ACC_DEVICE_LOCK must be held before calling this function. */ |
d2463960 | 342 | |
d93bdab5 JB |
343 | static void |
344 | acc_shutdown_1 (acc_device_t d) | |
345 | { | |
346 | struct gomp_device_descr *base_dev; | |
347 | struct goacc_thread *walk; | |
348 | int ndevs, i; | |
349 | bool devices_active = false; | |
350 | ||
351 | /* Get the base device for this device type. */ | |
9fb5fd44 | 352 | base_dev = resolve_device (d, true); |
d93bdab5 | 353 | |
683f1184 | 354 | ndevs = base_dev->get_num_devices_func (0); |
22be2349 NS |
355 | |
356 | /* Unload all the devices of this type that have been opened. */ | |
357 | for (i = 0; i < ndevs; i++) | |
358 | { | |
359 | struct gomp_device_descr *acc_dev = &base_dev[i]; | |
360 | ||
361 | gomp_mutex_lock (&acc_dev->lock); | |
362 | gomp_unload_device (acc_dev); | |
363 | gomp_mutex_unlock (&acc_dev->lock); | |
364 | } | |
365 | ||
d93bdab5 JB |
366 | gomp_mutex_lock (&goacc_thread_lock); |
367 | ||
368 | /* Free target-specific TLS data and close all devices. */ | |
369 | for (walk = goacc_threads; walk != NULL; walk = walk->next) | |
370 | { | |
371 | if (walk->target_tls) | |
372 | base_dev->openacc.destroy_thread_data_func (walk->target_tls); | |
373 | ||
374 | walk->target_tls = NULL; | |
375 | ||
376 | /* This would mean the user is shutting down OpenACC in the middle of an | |
377 | "acc data" pragma. Likely not intentional. */ | |
378 | if (walk->mapped_data) | |
e38fdba4 JB |
379 | { |
380 | gomp_mutex_unlock (&goacc_thread_lock); | |
381 | gomp_fatal ("shutdown in 'acc data' region"); | |
382 | } | |
d93bdab5 JB |
383 | |
384 | /* Similarly, if this happens then user code has done something weird. */ | |
385 | if (walk->saved_bound_dev) | |
e38fdba4 JB |
386 | { |
387 | gomp_mutex_unlock (&goacc_thread_lock); | |
388 | gomp_fatal ("shutdown during host fallback"); | |
389 | } | |
d93bdab5 JB |
390 | |
391 | if (walk->dev) | |
392 | { | |
393 | gomp_mutex_lock (&walk->dev->lock); | |
378da98f JB |
394 | |
395 | while (walk->dev->mem_map.root) | |
396 | { | |
397 | splay_tree_key k = &walk->dev->mem_map.root->key; | |
398 | if (k->aux) | |
399 | k->aux->link_key = NULL; | |
400 | gomp_remove_var (walk->dev, k); | |
401 | } | |
402 | ||
d93bdab5 JB |
403 | gomp_mutex_unlock (&walk->dev->lock); |
404 | ||
405 | walk->dev = NULL; | |
406 | walk->base_dev = NULL; | |
407 | } | |
408 | } | |
409 | ||
410 | gomp_mutex_unlock (&goacc_thread_lock); | |
411 | ||
d93bdab5 | 412 | /* Close all the devices of this type that have been opened. */ |
6ce13072 | 413 | bool ret = true; |
d93bdab5 JB |
414 | for (i = 0; i < ndevs; i++) |
415 | { | |
416 | struct gomp_device_descr *acc_dev = &base_dev[i]; | |
d2463960 | 417 | gomp_mutex_lock (&acc_dev->lock); |
d84ffc0a | 418 | if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
d93bdab5 JB |
419 | { |
420 | devices_active = true; | |
1f4c5b9b | 421 | ret &= gomp_fini_device (acc_dev); |
d84ffc0a | 422 | acc_dev->state = GOMP_DEVICE_UNINITIALIZED; |
d93bdab5 | 423 | } |
d2463960 | 424 | gomp_mutex_unlock (&acc_dev->lock); |
d93bdab5 JB |
425 | } |
426 | ||
6ce13072 CLT |
427 | if (!ret) |
428 | gomp_fatal ("device finalization failed"); | |
429 | ||
d93bdab5 JB |
430 | if (!devices_active) |
431 | gomp_fatal ("no device initialized"); | |
41dbbb37 TS |
432 | } |
433 | ||
434 | static struct goacc_thread * | |
435 | goacc_new_thread (void) | |
436 | { | |
015c7760 | 437 | struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread)); |
41dbbb37 TS |
438 | |
439 | #if defined HAVE_TLS || defined USE_EMUTLS | |
440 | goacc_tls_data = thr; | |
441 | #else | |
442 | pthread_setspecific (goacc_tls_key, thr); | |
443 | #endif | |
444 | ||
445 | pthread_setspecific (goacc_cleanup_key, thr); | |
446 | ||
447 | gomp_mutex_lock (&goacc_thread_lock); | |
448 | thr->next = goacc_threads; | |
449 | goacc_threads = thr; | |
450 | gomp_mutex_unlock (&goacc_thread_lock); | |
451 | ||
452 | return thr; | |
453 | } | |
454 | ||
455 | static void | |
456 | goacc_destroy_thread (void *data) | |
457 | { | |
458 | struct goacc_thread *thr = data, *walk, *prev; | |
459 | ||
460 | gomp_mutex_lock (&goacc_thread_lock); | |
461 | ||
462 | if (thr) | |
463 | { | |
d93bdab5 JB |
464 | struct gomp_device_descr *acc_dev = thr->dev; |
465 | ||
466 | if (acc_dev && thr->target_tls) | |
41dbbb37 | 467 | { |
d93bdab5 | 468 | acc_dev->openacc.destroy_thread_data_func (thr->target_tls); |
41dbbb37 TS |
469 | thr->target_tls = NULL; |
470 | } | |
471 | ||
472 | assert (!thr->mapped_data); | |
473 | ||
474 | /* Remove from thread list. */ | |
475 | for (prev = NULL, walk = goacc_threads; walk; | |
476 | prev = walk, walk = walk->next) | |
477 | if (walk == thr) | |
478 | { | |
479 | if (prev == NULL) | |
480 | goacc_threads = walk->next; | |
481 | else | |
482 | prev->next = walk->next; | |
483 | ||
484 | free (thr); | |
485 | ||
486 | break; | |
487 | } | |
488 | ||
489 | assert (walk); | |
490 | } | |
491 | ||
492 | gomp_mutex_unlock (&goacc_thread_lock); | |
493 | } | |
494 | ||
d93bdab5 JB |
495 | /* Use the ORD'th device instance for the current host thread (or -1 for the |
496 | current global default). The device (and the runtime) must be initialised | |
497 | before calling this function. */ | |
41dbbb37 | 498 | |
d93bdab5 JB |
499 | void |
500 | goacc_attach_host_thread_to_device (int ord) | |
41dbbb37 TS |
501 | { |
502 | struct goacc_thread *thr = goacc_thread (); | |
d93bdab5 JB |
503 | struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; |
504 | int num_devices; | |
505 | ||
506 | if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) | |
507 | return; | |
508 | ||
41dbbb37 TS |
509 | if (ord < 0) |
510 | ord = goacc_device_num; | |
d93bdab5 JB |
511 | |
512 | /* Decide which type of device to use. If the current thread has a device | |
513 | type already (e.g. set by acc_set_device_type), use that, else use the | |
514 | global default. */ | |
515 | if (thr && thr->base_dev) | |
516 | base_dev = thr->base_dev; | |
517 | else | |
518 | { | |
519 | assert (cached_base_dev); | |
520 | base_dev = cached_base_dev; | |
521 | } | |
522 | ||
683f1184 | 523 | num_devices = base_dev->get_num_devices_func (0); |
d93bdab5 | 524 | if (num_devices <= 0 || ord >= num_devices) |
9fb5fd44 JB |
525 | acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, |
526 | num_devices); | |
d93bdab5 | 527 | |
41dbbb37 TS |
528 | if (!thr) |
529 | thr = goacc_new_thread (); | |
d93bdab5 JB |
530 | |
531 | thr->base_dev = base_dev; | |
532 | thr->dev = acc_dev = &base_dev[ord]; | |
41dbbb37 TS |
533 | thr->saved_bound_dev = NULL; |
534 | thr->mapped_data = NULL; | |
5fae049d TS |
535 | thr->prof_info = NULL; |
536 | thr->api_info = NULL; | |
537 | /* Initially, all callbacks for all events are enabled. */ | |
538 | thr->prof_callbacks_enabled = true; | |
539 | ||
41dbbb37 | 540 | thr->target_tls |
d93bdab5 | 541 | = acc_dev->openacc.create_thread_data_func (ord); |
41dbbb37 TS |
542 | } |
543 | ||
544 | /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of | |
545 | init/shutdown is per-process or per-thread. We choose per-process. */ | |
546 | ||
547 | void | |
548 | acc_init (acc_device_t d) | |
549 | { | |
d8f0024b FH |
550 | if (!known_device_type_p (d)) |
551 | unknown_device_type_error (d); | |
552 | ||
e37288a6 | 553 | gomp_init_targets_once (); |
41dbbb37 TS |
554 | |
555 | gomp_mutex_lock (&acc_device_lock); | |
5fae049d | 556 | cached_base_dev = acc_init_1 (d, acc_construct_runtime_api, 0); |
41dbbb37 | 557 | gomp_mutex_unlock (&acc_device_lock); |
d93bdab5 JB |
558 | |
559 | goacc_attach_host_thread_to_device (-1); | |
41dbbb37 TS |
560 | } |
561 | ||
562 | ialias (acc_init) | |
563 | ||
41dbbb37 TS |
564 | void |
565 | acc_shutdown (acc_device_t d) | |
566 | { | |
d8f0024b FH |
567 | if (!known_device_type_p (d)) |
568 | unknown_device_type_error (d); | |
569 | ||
aa8b7d30 TS |
570 | gomp_init_targets_once (); |
571 | ||
41dbbb37 TS |
572 | gomp_mutex_lock (&acc_device_lock); |
573 | ||
574 | acc_shutdown_1 (d); | |
575 | ||
576 | gomp_mutex_unlock (&acc_device_lock); | |
577 | } | |
578 | ||
579 | ialias (acc_shutdown) | |
580 | ||
41dbbb37 TS |
581 | int |
582 | acc_get_num_devices (acc_device_t d) | |
583 | { | |
d8f0024b FH |
584 | if (!known_device_type_p (d)) |
585 | unknown_device_type_error (d); | |
586 | ||
41dbbb37 | 587 | int n = 0; |
d93bdab5 | 588 | struct gomp_device_descr *acc_dev; |
41dbbb37 TS |
589 | |
590 | if (d == acc_device_none) | |
591 | return 0; | |
592 | ||
d93bdab5 | 593 | gomp_init_targets_once (); |
41dbbb37 | 594 | |
d2463960 | 595 | gomp_mutex_lock (&acc_device_lock); |
9fb5fd44 | 596 | acc_dev = resolve_device (d, false); |
d2463960 JB |
597 | gomp_mutex_unlock (&acc_device_lock); |
598 | ||
41dbbb37 TS |
599 | if (!acc_dev) |
600 | return 0; | |
601 | ||
683f1184 | 602 | n = acc_dev->get_num_devices_func (0); |
41dbbb37 TS |
603 | if (n < 0) |
604 | n = 0; | |
605 | ||
606 | return n; | |
607 | } | |
608 | ||
609 | ialias (acc_get_num_devices) | |
610 | ||
d93bdab5 JB |
611 | /* Set the device type for the current thread only (using the current global |
612 | default device number), initialising that device if necessary. Also set the | |
613 | default device type for new threads to D. */ | |
614 | ||
41dbbb37 TS |
615 | void |
616 | acc_set_device_type (acc_device_t d) | |
617 | { | |
d8f0024b FH |
618 | if (!known_device_type_p (d)) |
619 | unknown_device_type_error (d); | |
620 | ||
d93bdab5 JB |
621 | struct gomp_device_descr *base_dev, *acc_dev; |
622 | struct goacc_thread *thr = goacc_thread (); | |
623 | ||
5fae049d TS |
624 | acc_prof_info prof_info; |
625 | acc_api_info api_info; | |
626 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
627 | if (profiling_p) | |
628 | prof_info.device_type = d; | |
629 | ||
e37288a6 | 630 | gomp_init_targets_once (); |
d93bdab5 | 631 | |
e37288a6 | 632 | gomp_mutex_lock (&acc_device_lock); |
d93bdab5 | 633 | |
9fb5fd44 | 634 | cached_base_dev = base_dev = resolve_device (d, true); |
d93bdab5 JB |
635 | acc_dev = &base_dev[goacc_device_num]; |
636 | ||
d2463960 | 637 | gomp_mutex_lock (&acc_dev->lock); |
d84ffc0a | 638 | if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
d93bdab5 | 639 | gomp_init_device (acc_dev); |
d2463960 | 640 | gomp_mutex_unlock (&acc_dev->lock); |
d93bdab5 JB |
641 | |
642 | gomp_mutex_unlock (&acc_device_lock); | |
643 | ||
644 | /* We're changing device type: invalidate the current thread's dev and | |
645 | base_dev pointers. */ | |
646 | if (thr && thr->base_dev != base_dev) | |
647 | { | |
648 | thr->base_dev = thr->dev = NULL; | |
649 | if (thr->mapped_data) | |
650 | gomp_fatal ("acc_set_device_type in 'acc data' region"); | |
651 | } | |
652 | ||
653 | goacc_attach_host_thread_to_device (-1); | |
5fae049d TS |
654 | |
655 | if (profiling_p) | |
656 | { | |
657 | thr->prof_info = NULL; | |
658 | thr->api_info = NULL; | |
659 | } | |
41dbbb37 TS |
660 | } |
661 | ||
662 | ialias (acc_set_device_type) | |
663 | ||
b52643ab KCY |
664 | static bool |
665 | self_initializing_p (void) | |
666 | { | |
667 | bool res; | |
668 | gomp_mutex_lock (&acc_init_state_lock); | |
669 | res = (acc_init_state == initializing | |
670 | && pthread_equal (acc_init_thread, pthread_self ())); | |
671 | gomp_mutex_unlock (&acc_init_state_lock); | |
672 | return res; | |
673 | } | |
674 | ||
41dbbb37 TS |
675 | acc_device_t |
676 | acc_get_device_type (void) | |
677 | { | |
678 | acc_device_t res = acc_device_none; | |
d93bdab5 JB |
679 | struct gomp_device_descr *dev; |
680 | struct goacc_thread *thr = goacc_thread (); | |
41dbbb37 | 681 | |
d93bdab5 JB |
682 | if (thr && thr->base_dev) |
683 | res = acc_device_type (thr->base_dev->type); | |
b52643ab KCY |
684 | else if (self_initializing_p ()) |
685 | /* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during the | |
686 | acc_ev_device_init_start event callback, which is dispatched during | |
687 | acc_init_1. Trying to lock acc_device_lock during such a call (as we do | |
688 | in the else clause below), will result in deadlock, since the lock has | |
689 | already been taken by the acc_init_1 caller. We work around this problem | |
690 | by using the acc_get_device_type property "If the device type has not yet | |
691 | been selected, the value acc_device_none may be returned". */ | |
692 | ; | |
41dbbb37 TS |
693 | else |
694 | { | |
5fae049d TS |
695 | acc_prof_info prof_info; |
696 | acc_api_info api_info; | |
697 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
698 | ||
41dbbb37 TS |
699 | gomp_init_targets_once (); |
700 | ||
d2463960 | 701 | gomp_mutex_lock (&acc_device_lock); |
9fb5fd44 | 702 | dev = resolve_device (acc_device_default, true); |
d2463960 | 703 | gomp_mutex_unlock (&acc_device_lock); |
41dbbb37 | 704 | res = acc_device_type (dev->type); |
5fae049d TS |
705 | |
706 | if (profiling_p) | |
707 | { | |
708 | thr->prof_info = NULL; | |
709 | thr->api_info = NULL; | |
710 | } | |
41dbbb37 TS |
711 | } |
712 | ||
713 | assert (res != acc_device_default | |
6c84c8bf MR |
714 | && res != acc_device_not_host |
715 | && res != acc_device_current); | |
41dbbb37 TS |
716 | |
717 | return res; | |
718 | } | |
719 | ||
720 | ialias (acc_get_device_type) | |
721 | ||
722 | int | |
723 | acc_get_device_num (acc_device_t d) | |
724 | { | |
d8f0024b FH |
725 | if (!known_device_type_p (d)) |
726 | unknown_device_type_error (d); | |
727 | ||
41dbbb37 | 728 | const struct gomp_device_descr *dev; |
d93bdab5 | 729 | struct goacc_thread *thr = goacc_thread (); |
41dbbb37 | 730 | |
5fae049d TS |
731 | acc_prof_info prof_info; |
732 | acc_api_info api_info; | |
733 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
734 | if (profiling_p) | |
735 | prof_info.device_type = d; | |
736 | ||
e37288a6 | 737 | gomp_init_targets_once (); |
41dbbb37 | 738 | |
d2463960 | 739 | gomp_mutex_lock (&acc_device_lock); |
9fb5fd44 | 740 | dev = resolve_device (d, true); |
d2463960 | 741 | gomp_mutex_unlock (&acc_device_lock); |
41dbbb37 | 742 | |
5fae049d TS |
743 | if (profiling_p) |
744 | { | |
745 | thr->prof_info = NULL; | |
746 | thr->api_info = NULL; | |
747 | } | |
748 | ||
d93bdab5 JB |
749 | if (thr && thr->base_dev == dev && thr->dev) |
750 | return thr->dev->target_id; | |
41dbbb37 | 751 | |
d93bdab5 | 752 | return goacc_device_num; |
41dbbb37 TS |
753 | } |
754 | ||
755 | ialias (acc_get_device_num) | |
756 | ||
757 | void | |
d93bdab5 | 758 | acc_set_device_num (int ord, acc_device_t d) |
41dbbb37 | 759 | { |
d8f0024b FH |
760 | if (!known_device_type_p (d)) |
761 | unknown_device_type_error (d); | |
762 | ||
d93bdab5 | 763 | struct gomp_device_descr *base_dev, *acc_dev; |
41dbbb37 TS |
764 | int num_devices; |
765 | ||
e37288a6 | 766 | gomp_init_targets_once (); |
41dbbb37 | 767 | |
d93bdab5 JB |
768 | if (ord < 0) |
769 | ord = goacc_device_num; | |
41dbbb37 | 770 | |
d93bdab5 JB |
771 | if ((int) d == 0) |
772 | /* Set whatever device is being used by the current host thread to use | |
773 | device instance ORD. It's unclear if this is supposed to affect other | |
774 | host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ | |
775 | goacc_attach_host_thread_to_device (ord); | |
41dbbb37 TS |
776 | else |
777 | { | |
41dbbb37 TS |
778 | gomp_mutex_lock (&acc_device_lock); |
779 | ||
9fb5fd44 | 780 | cached_base_dev = base_dev = resolve_device (d, true); |
41dbbb37 | 781 | |
683f1184 | 782 | num_devices = base_dev->get_num_devices_func (0); |
41dbbb37 | 783 | |
9fb5fd44 JB |
784 | if (num_devices <= 0 || ord >= num_devices) |
785 | acc_dev_num_out_of_range (d, ord, num_devices); | |
41dbbb37 | 786 | |
d93bdab5 | 787 | acc_dev = &base_dev[ord]; |
41dbbb37 | 788 | |
d2463960 | 789 | gomp_mutex_lock (&acc_dev->lock); |
d84ffc0a | 790 | if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
d93bdab5 | 791 | gomp_init_device (acc_dev); |
d2463960 | 792 | gomp_mutex_unlock (&acc_dev->lock); |
41dbbb37 TS |
793 | |
794 | gomp_mutex_unlock (&acc_device_lock); | |
d93bdab5 JB |
795 | |
796 | goacc_attach_host_thread_to_device (ord); | |
41dbbb37 | 797 | } |
d93bdab5 JB |
798 | |
799 | goacc_device_num = ord; | |
41dbbb37 TS |
800 | } |
801 | ||
802 | ialias (acc_set_device_num) | |
803 | ||
6fc0385c | 804 | static union goacc_property_value |
6c84c8bf MR |
805 | get_property_any (int ord, acc_device_t d, acc_device_property_t prop) |
806 | { | |
807 | goacc_lazy_initialize (); | |
808 | struct goacc_thread *thr = goacc_thread (); | |
809 | ||
810 | if (d == acc_device_current && thr && thr->dev) | |
6fc0385c | 811 | return thr->dev->openacc.get_property_func (thr->dev->target_id, prop); |
6c84c8bf MR |
812 | |
813 | gomp_mutex_lock (&acc_device_lock); | |
814 | ||
815 | struct gomp_device_descr *dev = resolve_device (d, true); | |
816 | ||
683f1184 | 817 | int num_devices = dev->get_num_devices_func (0); |
6c84c8bf MR |
818 | |
819 | if (num_devices <= 0 || ord >= num_devices) | |
820 | acc_dev_num_out_of_range (d, ord, num_devices); | |
821 | ||
822 | dev += ord; | |
823 | ||
824 | gomp_mutex_lock (&dev->lock); | |
825 | if (dev->state == GOMP_DEVICE_UNINITIALIZED) | |
826 | gomp_init_device (dev); | |
827 | gomp_mutex_unlock (&dev->lock); | |
828 | ||
829 | gomp_mutex_unlock (&acc_device_lock); | |
830 | ||
831 | assert (dev); | |
832 | ||
6fc0385c | 833 | return dev->openacc.get_property_func (dev->target_id, prop); |
6c84c8bf MR |
834 | } |
835 | ||
836 | size_t | |
837 | acc_get_property (int ord, acc_device_t d, acc_device_property_t prop) | |
838 | { | |
839 | if (!known_device_type_p (d)) | |
840 | unknown_device_type_error(d); | |
841 | ||
6fc0385c | 842 | if (prop & GOACC_PROPERTY_STRING_MASK) |
6c84c8bf MR |
843 | return 0; |
844 | else | |
845 | return get_property_any (ord, d, prop).val; | |
846 | } | |
847 | ||
848 | ialias (acc_get_property) | |
849 | ||
850 | const char * | |
851 | acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop) | |
852 | { | |
853 | if (!known_device_type_p (d)) | |
854 | unknown_device_type_error(d); | |
855 | ||
6fc0385c | 856 | if (prop & GOACC_PROPERTY_STRING_MASK) |
6c84c8bf MR |
857 | return get_property_any (ord, d, prop).ptr; |
858 | else | |
859 | return NULL; | |
860 | } | |
861 | ||
862 | ialias (acc_get_property_string) | |
863 | ||
113020dc TS |
864 | /* For -O and higher, the compiler always attempts to expand acc_on_device, but |
865 | if the user disables the builtin, or calls it via a pointer, we'll need this | |
866 | version. | |
867 | ||
868 | Compile this with optimization, so that the compiler expands | |
d8f0024b FH |
869 | this, rather than generating infinitely recursive code. |
870 | ||
871 | The function just forwards its argument to __builtin_acc_on_device. It does | |
872 | not verify that the argument is a valid acc_device_t enumeration value. */ | |
164453bb NS |
873 | |
874 | int __attribute__ ((__optimize__ ("O2"))) | |
41dbbb37 TS |
875 | acc_on_device (acc_device_t dev) |
876 | { | |
b97e78b7 | 877 | return __builtin_acc_on_device (dev); |
41dbbb37 TS |
878 | } |
879 | ||
880 | ialias (acc_on_device) | |
881 | ||
882 | attribute_hidden void | |
883 | goacc_runtime_initialize (void) | |
884 | { | |
885 | gomp_mutex_init (&acc_device_lock); | |
886 | ||
887 | #if !(defined HAVE_TLS || defined USE_EMUTLS) | |
888 | pthread_key_create (&goacc_tls_key, NULL); | |
889 | #endif | |
890 | ||
891 | pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); | |
892 | ||
d93bdab5 | 893 | cached_base_dev = NULL; |
41dbbb37 TS |
894 | |
895 | goacc_threads = NULL; | |
896 | gomp_mutex_init (&goacc_thread_lock); | |
6bb4c3e2 CLT |
897 | |
898 | /* Initialize and register the 'host' device type. */ | |
899 | goacc_host_init (); | |
41dbbb37 TS |
900 | } |
901 | ||
2c829799 AM |
902 | static void __attribute__((destructor)) |
903 | goacc_runtime_deinitialize (void) | |
904 | { | |
905 | #if !(defined HAVE_TLS || defined USE_EMUTLS) | |
906 | pthread_key_delete (goacc_tls_key); | |
907 | #endif | |
908 | pthread_key_delete (goacc_cleanup_key); | |
909 | } | |
910 | ||
41dbbb37 TS |
911 | /* Compiler helper functions */ |
912 | ||
913 | attribute_hidden void | |
914 | goacc_save_and_set_bind (acc_device_t d) | |
915 | { | |
916 | struct goacc_thread *thr = goacc_thread (); | |
917 | ||
918 | assert (!thr->saved_bound_dev); | |
919 | ||
920 | thr->saved_bound_dev = thr->dev; | |
921 | thr->dev = dispatchers[d]; | |
922 | } | |
923 | ||
924 | attribute_hidden void | |
925 | goacc_restore_bind (void) | |
926 | { | |
927 | struct goacc_thread *thr = goacc_thread (); | |
928 | ||
929 | thr->dev = thr->saved_bound_dev; | |
930 | thr->saved_bound_dev = NULL; | |
931 | } | |
932 | ||
933 | /* This is called from any OpenACC support function that may need to implicitly | |
d93bdab5 JB |
934 | initialize the libgomp runtime, either globally or from a new host thread. |
935 | On exit "goacc_thread" will return a valid & populated thread block. */ | |
41dbbb37 TS |
936 | |
937 | attribute_hidden void | |
938 | goacc_lazy_initialize (void) | |
939 | { | |
940 | struct goacc_thread *thr = goacc_thread (); | |
941 | ||
942 | if (thr && thr->dev) | |
943 | return; | |
944 | ||
5fae049d TS |
945 | gomp_init_targets_once (); |
946 | ||
947 | gomp_mutex_lock (&acc_device_lock); | |
d93bdab5 | 948 | if (!cached_base_dev) |
5fae049d TS |
949 | cached_base_dev = acc_init_1 (acc_device_default, |
950 | acc_construct_parallel, 1); | |
951 | gomp_mutex_unlock (&acc_device_lock); | |
952 | ||
953 | goacc_attach_host_thread_to_device (-1); | |
41dbbb37 | 954 | } |