]>
Commit | Line | Data |
---|---|---|
ca4c3545 | 1 | /* OpenACC Runtime initialization routines |
2 | ||
fbd26352 | 3 | Copyright (C) 2013-2019 Free Software Foundation, Inc. |
ca4c3545 | 4 | |
5 | Contributed by Mentor Embedded. | |
6 | ||
7 | This file is part of the GNU Offloading and Multi Processing Library | |
8 | (libgomp). | |
9 | ||
10 | Libgomp is free software; you can redistribute it and/or modify it | |
11 | under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 3, or (at your option) | |
13 | any later version. | |
14 | ||
15 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 | more details. | |
19 | ||
20 | Under Section 7 of GPL version 3, you are granted additional | |
21 | permissions described in the GCC Runtime Library Exception, version | |
22 | 3.1, as published by the Free Software Foundation. | |
23 | ||
24 | You should have received a copy of the GNU General Public License and | |
25 | a copy of the GCC Runtime Library Exception along with this program; | |
26 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
27 | <http://www.gnu.org/licenses/>. */ | |
28 | ||
29 | #include "libgomp.h" | |
30 | #include "oacc-int.h" | |
31 | #include "openacc.h" | |
32 | #include <assert.h> | |
33 | #include <stdlib.h> | |
34 | #include <strings.h> | |
35 | #include <stdbool.h> | |
36 | #include <string.h> | |
37 | ||
d39966e5 | 38 | /* This lock is used to protect access to cached_base_dev, dispatchers and |
39 | the (abstract) initialisation state of attached offloading devices. */ | |
40 | ||
ca4c3545 | 41 | static gomp_mutex_t acc_device_lock; |
42 | ||
0a1fe572 | 43 | /* A cached version of the dispatcher for the global "current" accelerator type, |
44 | e.g. used as the default when creating new host threads. This is the | |
45 | device-type equivalent of goacc_device_num (which specifies which device to | |
46 | use out of potentially several of the same type). If there are several | |
47 | devices of a given type, this points at the first one. */ | |
48 | ||
49 | static struct gomp_device_descr *cached_base_dev = NULL; | |
ca4c3545 | 50 | |
51 | #if defined HAVE_TLS || defined USE_EMUTLS | |
52 | __thread struct goacc_thread *goacc_tls_data; | |
53 | #else | |
54 | pthread_key_t goacc_tls_key; | |
55 | #endif | |
56 | static pthread_key_t goacc_cleanup_key; | |
57 | ||
ca4c3545 | 58 | static struct goacc_thread *goacc_threads; |
59 | static gomp_mutex_t goacc_thread_lock; | |
60 | ||
61 | /* An array of dispatchers for device types, indexed by the type. This array | |
62 | only references "base" devices, and other instances of the same type are | |
63 | found by simply indexing from each such device (which are stored linearly, | |
64 | grouped by device in target.c:devices). */ | |
65 | static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; | |
66 | ||
67 | attribute_hidden void | |
68 | goacc_register (struct gomp_device_descr *disp) | |
69 | { | |
70 | /* Only register the 0th device here. */ | |
71 | if (disp->target_id != 0) | |
72 | return; | |
73 | ||
74 | gomp_mutex_lock (&acc_device_lock); | |
75 | ||
76 | assert (acc_device_type (disp->type) != acc_device_none | |
77 | && acc_device_type (disp->type) != acc_device_default | |
78 | && acc_device_type (disp->type) != acc_device_not_host); | |
79 | assert (!dispatchers[disp->type]); | |
80 | dispatchers[disp->type] = disp; | |
81 | ||
82 | gomp_mutex_unlock (&acc_device_lock); | |
83 | } | |
84 | ||
85 | /* OpenACC names some things a little differently. */ | |
86 | ||
87 | static const char * | |
88 | get_openacc_name (const char *name) | |
89 | { | |
90 | if (strcmp (name, "nvptx") == 0) | |
91 | return "nvidia"; | |
92 | else | |
93 | return name; | |
94 | } | |
95 | ||
0a1fe572 | 96 | static const char * |
97 | name_of_acc_device_t (enum acc_device_t type) | |
98 | { | |
99 | switch (type) | |
100 | { | |
101 | case acc_device_none: return "none"; | |
102 | case acc_device_default: return "default"; | |
103 | case acc_device_host: return "host"; | |
0a1fe572 | 104 | case acc_device_not_host: return "not_host"; |
105 | case acc_device_nvidia: return "nvidia"; | |
106 | default: gomp_fatal ("unknown device type %u", (unsigned) type); | |
107 | } | |
108 | } | |
109 | ||
dd7f667e | 110 | /* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR |
111 | is true, this function raises an error if there are no devices of type D, | |
112 | otherwise it returns NULL in that case. */ | |
d39966e5 | 113 | |
ca4c3545 | 114 | static struct gomp_device_descr * |
dd7f667e | 115 | resolve_device (acc_device_t d, bool fail_is_error) |
ca4c3545 | 116 | { |
117 | acc_device_t d_arg = d; | |
118 | ||
119 | switch (d) | |
120 | { | |
121 | case acc_device_default: | |
122 | { | |
123 | if (goacc_device_type) | |
124 | { | |
125 | /* Lookup the named device. */ | |
126 | while (++d != _ACC_device_hwm) | |
127 | if (dispatchers[d] | |
128 | && !strcasecmp (goacc_device_type, | |
129 | get_openacc_name (dispatchers[d]->name)) | |
130 | && dispatchers[d]->get_num_devices_func () > 0) | |
131 | goto found; | |
132 | ||
dd7f667e | 133 | if (fail_is_error) |
134 | { | |
135 | gomp_mutex_unlock (&acc_device_lock); | |
136 | gomp_fatal ("device type %s not supported", goacc_device_type); | |
137 | } | |
138 | else | |
139 | return NULL; | |
ca4c3545 | 140 | } |
141 | ||
142 | /* No default device specified, so start scanning for any non-host | |
143 | device that is available. */ | |
144 | d = acc_device_not_host; | |
145 | } | |
146 | /* FALLTHROUGH */ | |
147 | ||
148 | case acc_device_not_host: | |
149 | /* Find the first available device after acc_device_not_host. */ | |
150 | while (++d != _ACC_device_hwm) | |
151 | if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0) | |
152 | goto found; | |
153 | if (d_arg == acc_device_default) | |
154 | { | |
155 | d = acc_device_host; | |
156 | goto found; | |
157 | } | |
dd7f667e | 158 | if (fail_is_error) |
159 | { | |
160 | gomp_mutex_unlock (&acc_device_lock); | |
161 | gomp_fatal ("no device found"); | |
162 | } | |
163 | else | |
164 | return NULL; | |
ca4c3545 | 165 | break; |
166 | ||
167 | case acc_device_host: | |
168 | break; | |
169 | ||
170 | default: | |
171 | if (d > _ACC_device_hwm) | |
dd7f667e | 172 | { |
173 | if (fail_is_error) | |
174 | goto unsupported_device; | |
175 | else | |
176 | return NULL; | |
177 | } | |
ca4c3545 | 178 | break; |
179 | } | |
180 | found: | |
181 | ||
182 | assert (d != acc_device_none | |
183 | && d != acc_device_default | |
184 | && d != acc_device_not_host); | |
185 | ||
dd7f667e | 186 | if (dispatchers[d] == NULL && fail_is_error) |
187 | { | |
188 | unsupported_device: | |
189 | gomp_mutex_unlock (&acc_device_lock); | |
190 | gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); | |
191 | } | |
192 | ||
ca4c3545 | 193 | return dispatchers[d]; |
194 | } | |
195 | ||
dd7f667e | 196 | /* Emit a suitable error if no device of a particular type is available, or |
197 | the given device number is out-of-range. */ | |
198 | static void | |
199 | acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) | |
200 | { | |
201 | if (ndevs == 0) | |
202 | gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); | |
203 | else | |
204 | gomp_fatal ("device %u out of range", ord); | |
205 | } | |
206 | ||
ca4c3545 | 207 | /* This is called when plugins have been initialized, and serves to call |
208 | (indirectly) the target's device_init hook. Calling multiple times without | |
dd7f667e | 209 | an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be |
d39966e5 | 210 | held before calling this function. */ |
ca4c3545 | 211 | |
212 | static struct gomp_device_descr * | |
5feffd51 | 213 | acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit) |
ca4c3545 | 214 | { |
5feffd51 | 215 | bool check_not_nested_p; |
216 | if (implicit) | |
217 | { | |
218 | /* In the implicit case, there should (TODO: must?) already be something | |
219 | have been set up for an outer construct. */ | |
220 | check_not_nested_p = false; | |
221 | } | |
222 | else | |
223 | { | |
224 | check_not_nested_p = true; | |
225 | /* TODO: should we set 'thr->prof_info' etc. in this case ('acc_init')? | |
226 | The problem is, that we don't have 'thr' yet? (So, | |
227 | 'check_not_nested_p = true' also is pointless actually.) */ | |
228 | } | |
229 | bool profiling_p = GOACC_PROFILING_DISPATCH_P (check_not_nested_p); | |
230 | ||
231 | acc_prof_info prof_info; | |
232 | if (profiling_p) | |
233 | { | |
234 | prof_info.event_type = acc_ev_device_init_start; | |
235 | prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; | |
236 | prof_info.version = _ACC_PROF_INFO_VERSION; | |
237 | prof_info.device_type = d; | |
238 | prof_info.device_number = goacc_device_num; | |
239 | prof_info.thread_id = -1; | |
240 | prof_info.async = acc_async_sync; | |
241 | prof_info.async_queue = prof_info.async; | |
242 | prof_info.src_file = NULL; | |
243 | prof_info.func_name = NULL; | |
244 | prof_info.line_no = -1; | |
245 | prof_info.end_line_no = -1; | |
246 | prof_info.func_line_no = -1; | |
247 | prof_info.func_end_line_no = -1; | |
248 | } | |
249 | acc_event_info device_init_event_info; | |
250 | if (profiling_p) | |
251 | { | |
252 | device_init_event_info.other_event.event_type = prof_info.event_type; | |
253 | device_init_event_info.other_event.valid_bytes | |
254 | = _ACC_OTHER_EVENT_INFO_VALID_BYTES; | |
255 | device_init_event_info.other_event.parent_construct = parent_construct; | |
256 | device_init_event_info.other_event.implicit = implicit; | |
257 | device_init_event_info.other_event.tool_info = NULL; | |
258 | } | |
259 | acc_api_info api_info; | |
260 | if (profiling_p) | |
261 | { | |
262 | api_info.device_api = acc_device_api_none; | |
263 | api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; | |
264 | api_info.device_type = prof_info.device_type; | |
265 | api_info.vendor = -1; | |
266 | api_info.device_handle = NULL; | |
267 | api_info.context_handle = NULL; | |
268 | api_info.async_handle = NULL; | |
269 | } | |
270 | ||
271 | if (profiling_p) | |
272 | goacc_profiling_dispatch (&prof_info, &device_init_event_info, &api_info); | |
273 | ||
0a1fe572 | 274 | struct gomp_device_descr *base_dev, *acc_dev; |
275 | int ndevs; | |
ca4c3545 | 276 | |
dd7f667e | 277 | base_dev = resolve_device (d, true); |
0a1fe572 | 278 | |
279 | ndevs = base_dev->get_num_devices_func (); | |
280 | ||
dd7f667e | 281 | if (ndevs <= 0 || goacc_device_num >= ndevs) |
282 | acc_dev_num_out_of_range (d, goacc_device_num, ndevs); | |
ca4c3545 | 283 | |
0a1fe572 | 284 | acc_dev = &base_dev[goacc_device_num]; |
ca4c3545 | 285 | |
d39966e5 | 286 | gomp_mutex_lock (&acc_dev->lock); |
f87b2900 | 287 | if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
d39966e5 | 288 | { |
289 | gomp_mutex_unlock (&acc_dev->lock); | |
290 | gomp_fatal ("device already active"); | |
291 | } | |
ca4c3545 | 292 | |
ca4c3545 | 293 | gomp_init_device (acc_dev); |
d39966e5 | 294 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 295 | |
5feffd51 | 296 | if (profiling_p) |
297 | { | |
298 | prof_info.event_type = acc_ev_device_init_end; | |
299 | device_init_event_info.other_event.event_type = prof_info.event_type; | |
300 | goacc_profiling_dispatch (&prof_info, &device_init_event_info, | |
301 | &api_info); | |
302 | } | |
303 | ||
0a1fe572 | 304 | return base_dev; |
305 | } | |
306 | ||
dd7f667e | 307 | /* ACC_DEVICE_LOCK must be held before calling this function. */ |
d39966e5 | 308 | |
0a1fe572 | 309 | static void |
310 | acc_shutdown_1 (acc_device_t d) | |
311 | { | |
312 | struct gomp_device_descr *base_dev; | |
313 | struct goacc_thread *walk; | |
314 | int ndevs, i; | |
315 | bool devices_active = false; | |
316 | ||
317 | /* Get the base device for this device type. */ | |
dd7f667e | 318 | base_dev = resolve_device (d, true); |
0a1fe572 | 319 | |
7de5731e | 320 | ndevs = base_dev->get_num_devices_func (); |
321 | ||
322 | /* Unload all the devices of this type that have been opened. */ | |
323 | for (i = 0; i < ndevs; i++) | |
324 | { | |
325 | struct gomp_device_descr *acc_dev = &base_dev[i]; | |
326 | ||
327 | gomp_mutex_lock (&acc_dev->lock); | |
328 | gomp_unload_device (acc_dev); | |
329 | gomp_mutex_unlock (&acc_dev->lock); | |
330 | } | |
331 | ||
0a1fe572 | 332 | gomp_mutex_lock (&goacc_thread_lock); |
333 | ||
334 | /* Free target-specific TLS data and close all devices. */ | |
335 | for (walk = goacc_threads; walk != NULL; walk = walk->next) | |
336 | { | |
337 | if (walk->target_tls) | |
338 | base_dev->openacc.destroy_thread_data_func (walk->target_tls); | |
339 | ||
340 | walk->target_tls = NULL; | |
341 | ||
342 | /* This would mean the user is shutting down OpenACC in the middle of an | |
343 | "acc data" pragma. Likely not intentional. */ | |
344 | if (walk->mapped_data) | |
09f66ac1 | 345 | { |
346 | gomp_mutex_unlock (&goacc_thread_lock); | |
347 | gomp_fatal ("shutdown in 'acc data' region"); | |
348 | } | |
0a1fe572 | 349 | |
350 | /* Similarly, if this happens then user code has done something weird. */ | |
351 | if (walk->saved_bound_dev) | |
09f66ac1 | 352 | { |
353 | gomp_mutex_unlock (&goacc_thread_lock); | |
354 | gomp_fatal ("shutdown during host fallback"); | |
355 | } | |
0a1fe572 | 356 | |
357 | if (walk->dev) | |
358 | { | |
359 | gomp_mutex_lock (&walk->dev->lock); | |
360 | gomp_free_memmap (&walk->dev->mem_map); | |
361 | gomp_mutex_unlock (&walk->dev->lock); | |
362 | ||
363 | walk->dev = NULL; | |
364 | walk->base_dev = NULL; | |
365 | } | |
366 | } | |
367 | ||
368 | gomp_mutex_unlock (&goacc_thread_lock); | |
369 | ||
0a1fe572 | 370 | /* Close all the devices of this type that have been opened. */ |
9b50ad1d | 371 | bool ret = true; |
0a1fe572 | 372 | for (i = 0; i < ndevs; i++) |
373 | { | |
374 | struct gomp_device_descr *acc_dev = &base_dev[i]; | |
d39966e5 | 375 | gomp_mutex_lock (&acc_dev->lock); |
f87b2900 | 376 | if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
0a1fe572 | 377 | { |
378 | devices_active = true; | |
534b5e00 | 379 | ret &= gomp_fini_device (acc_dev); |
f87b2900 | 380 | acc_dev->state = GOMP_DEVICE_UNINITIALIZED; |
0a1fe572 | 381 | } |
d39966e5 | 382 | gomp_mutex_unlock (&acc_dev->lock); |
0a1fe572 | 383 | } |
384 | ||
9b50ad1d | 385 | if (!ret) |
386 | gomp_fatal ("device finalization failed"); | |
387 | ||
0a1fe572 | 388 | if (!devices_active) |
389 | gomp_fatal ("no device initialized"); | |
ca4c3545 | 390 | } |
391 | ||
392 | static struct goacc_thread * | |
393 | goacc_new_thread (void) | |
394 | { | |
43f7268a | 395 | struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread)); |
ca4c3545 | 396 | |
397 | #if defined HAVE_TLS || defined USE_EMUTLS | |
398 | goacc_tls_data = thr; | |
399 | #else | |
400 | pthread_setspecific (goacc_tls_key, thr); | |
401 | #endif | |
402 | ||
403 | pthread_setspecific (goacc_cleanup_key, thr); | |
404 | ||
405 | gomp_mutex_lock (&goacc_thread_lock); | |
406 | thr->next = goacc_threads; | |
407 | goacc_threads = thr; | |
408 | gomp_mutex_unlock (&goacc_thread_lock); | |
409 | ||
410 | return thr; | |
411 | } | |
412 | ||
413 | static void | |
414 | goacc_destroy_thread (void *data) | |
415 | { | |
416 | struct goacc_thread *thr = data, *walk, *prev; | |
417 | ||
418 | gomp_mutex_lock (&goacc_thread_lock); | |
419 | ||
420 | if (thr) | |
421 | { | |
0a1fe572 | 422 | struct gomp_device_descr *acc_dev = thr->dev; |
423 | ||
424 | if (acc_dev && thr->target_tls) | |
ca4c3545 | 425 | { |
0a1fe572 | 426 | acc_dev->openacc.destroy_thread_data_func (thr->target_tls); |
ca4c3545 | 427 | thr->target_tls = NULL; |
428 | } | |
429 | ||
430 | assert (!thr->mapped_data); | |
431 | ||
432 | /* Remove from thread list. */ | |
433 | for (prev = NULL, walk = goacc_threads; walk; | |
434 | prev = walk, walk = walk->next) | |
435 | if (walk == thr) | |
436 | { | |
437 | if (prev == NULL) | |
438 | goacc_threads = walk->next; | |
439 | else | |
440 | prev->next = walk->next; | |
441 | ||
442 | free (thr); | |
443 | ||
444 | break; | |
445 | } | |
446 | ||
447 | assert (walk); | |
448 | } | |
449 | ||
450 | gomp_mutex_unlock (&goacc_thread_lock); | |
451 | } | |
452 | ||
0a1fe572 | 453 | /* Use the ORD'th device instance for the current host thread (or -1 for the |
454 | current global default). The device (and the runtime) must be initialised | |
455 | before calling this function. */ | |
ca4c3545 | 456 | |
0a1fe572 | 457 | void |
458 | goacc_attach_host_thread_to_device (int ord) | |
ca4c3545 | 459 | { |
460 | struct goacc_thread *thr = goacc_thread (); | |
0a1fe572 | 461 | struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; |
462 | int num_devices; | |
463 | ||
464 | if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) | |
465 | return; | |
466 | ||
ca4c3545 | 467 | if (ord < 0) |
468 | ord = goacc_device_num; | |
0a1fe572 | 469 | |
470 | /* Decide which type of device to use. If the current thread has a device | |
471 | type already (e.g. set by acc_set_device_type), use that, else use the | |
472 | global default. */ | |
473 | if (thr && thr->base_dev) | |
474 | base_dev = thr->base_dev; | |
475 | else | |
476 | { | |
477 | assert (cached_base_dev); | |
478 | base_dev = cached_base_dev; | |
479 | } | |
480 | ||
481 | num_devices = base_dev->get_num_devices_func (); | |
482 | if (num_devices <= 0 || ord >= num_devices) | |
dd7f667e | 483 | acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, |
484 | num_devices); | |
0a1fe572 | 485 | |
ca4c3545 | 486 | if (!thr) |
487 | thr = goacc_new_thread (); | |
0a1fe572 | 488 | |
489 | thr->base_dev = base_dev; | |
490 | thr->dev = acc_dev = &base_dev[ord]; | |
ca4c3545 | 491 | thr->saved_bound_dev = NULL; |
492 | thr->mapped_data = NULL; | |
5feffd51 | 493 | thr->prof_info = NULL; |
494 | thr->api_info = NULL; | |
495 | /* Initially, all callbacks for all events are enabled. */ | |
496 | thr->prof_callbacks_enabled = true; | |
497 | ||
ca4c3545 | 498 | thr->target_tls |
0a1fe572 | 499 | = acc_dev->openacc.create_thread_data_func (ord); |
ca4c3545 | 500 | } |
501 | ||
502 | /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of | |
503 | init/shutdown is per-process or per-thread. We choose per-process. */ | |
504 | ||
505 | void | |
506 | acc_init (acc_device_t d) | |
507 | { | |
921d472d | 508 | gomp_init_targets_once (); |
ca4c3545 | 509 | |
510 | gomp_mutex_lock (&acc_device_lock); | |
5feffd51 | 511 | cached_base_dev = acc_init_1 (d, acc_construct_runtime_api, 0); |
ca4c3545 | 512 | gomp_mutex_unlock (&acc_device_lock); |
0a1fe572 | 513 | |
514 | goacc_attach_host_thread_to_device (-1); | |
ca4c3545 | 515 | } |
516 | ||
517 | ialias (acc_init) | |
518 | ||
ca4c3545 | 519 | void |
520 | acc_shutdown (acc_device_t d) | |
521 | { | |
cfe316ad | 522 | gomp_init_targets_once (); |
523 | ||
ca4c3545 | 524 | gomp_mutex_lock (&acc_device_lock); |
525 | ||
526 | acc_shutdown_1 (d); | |
527 | ||
528 | gomp_mutex_unlock (&acc_device_lock); | |
529 | } | |
530 | ||
531 | ialias (acc_shutdown) | |
532 | ||
ca4c3545 | 533 | int |
534 | acc_get_num_devices (acc_device_t d) | |
535 | { | |
536 | int n = 0; | |
0a1fe572 | 537 | struct gomp_device_descr *acc_dev; |
ca4c3545 | 538 | |
539 | if (d == acc_device_none) | |
540 | return 0; | |
541 | ||
0a1fe572 | 542 | gomp_init_targets_once (); |
ca4c3545 | 543 | |
d39966e5 | 544 | gomp_mutex_lock (&acc_device_lock); |
dd7f667e | 545 | acc_dev = resolve_device (d, false); |
d39966e5 | 546 | gomp_mutex_unlock (&acc_device_lock); |
547 | ||
ca4c3545 | 548 | if (!acc_dev) |
549 | return 0; | |
550 | ||
551 | n = acc_dev->get_num_devices_func (); | |
552 | if (n < 0) | |
553 | n = 0; | |
554 | ||
555 | return n; | |
556 | } | |
557 | ||
558 | ialias (acc_get_num_devices) | |
559 | ||
0a1fe572 | 560 | /* Set the device type for the current thread only (using the current global |
561 | default device number), initialising that device if necessary. Also set the | |
562 | default device type for new threads to D. */ | |
563 | ||
ca4c3545 | 564 | void |
565 | acc_set_device_type (acc_device_t d) | |
566 | { | |
0a1fe572 | 567 | struct gomp_device_descr *base_dev, *acc_dev; |
568 | struct goacc_thread *thr = goacc_thread (); | |
569 | ||
5feffd51 | 570 | acc_prof_info prof_info; |
571 | acc_api_info api_info; | |
572 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
573 | if (profiling_p) | |
574 | prof_info.device_type = d; | |
575 | ||
921d472d | 576 | gomp_init_targets_once (); |
0a1fe572 | 577 | |
921d472d | 578 | gomp_mutex_lock (&acc_device_lock); |
0a1fe572 | 579 | |
dd7f667e | 580 | cached_base_dev = base_dev = resolve_device (d, true); |
0a1fe572 | 581 | acc_dev = &base_dev[goacc_device_num]; |
582 | ||
d39966e5 | 583 | gomp_mutex_lock (&acc_dev->lock); |
f87b2900 | 584 | if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
0a1fe572 | 585 | gomp_init_device (acc_dev); |
d39966e5 | 586 | gomp_mutex_unlock (&acc_dev->lock); |
0a1fe572 | 587 | |
588 | gomp_mutex_unlock (&acc_device_lock); | |
589 | ||
590 | /* We're changing device type: invalidate the current thread's dev and | |
591 | base_dev pointers. */ | |
592 | if (thr && thr->base_dev != base_dev) | |
593 | { | |
594 | thr->base_dev = thr->dev = NULL; | |
595 | if (thr->mapped_data) | |
596 | gomp_fatal ("acc_set_device_type in 'acc data' region"); | |
597 | } | |
598 | ||
599 | goacc_attach_host_thread_to_device (-1); | |
5feffd51 | 600 | |
601 | if (profiling_p) | |
602 | { | |
603 | thr->prof_info = NULL; | |
604 | thr->api_info = NULL; | |
605 | } | |
ca4c3545 | 606 | } |
607 | ||
608 | ialias (acc_set_device_type) | |
609 | ||
610 | acc_device_t | |
611 | acc_get_device_type (void) | |
612 | { | |
613 | acc_device_t res = acc_device_none; | |
0a1fe572 | 614 | struct gomp_device_descr *dev; |
615 | struct goacc_thread *thr = goacc_thread (); | |
ca4c3545 | 616 | |
0a1fe572 | 617 | if (thr && thr->base_dev) |
618 | res = acc_device_type (thr->base_dev->type); | |
ca4c3545 | 619 | else |
620 | { | |
5feffd51 | 621 | acc_prof_info prof_info; |
622 | acc_api_info api_info; | |
623 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
624 | ||
ca4c3545 | 625 | gomp_init_targets_once (); |
626 | ||
d39966e5 | 627 | gomp_mutex_lock (&acc_device_lock); |
dd7f667e | 628 | dev = resolve_device (acc_device_default, true); |
d39966e5 | 629 | gomp_mutex_unlock (&acc_device_lock); |
ca4c3545 | 630 | res = acc_device_type (dev->type); |
5feffd51 | 631 | |
632 | if (profiling_p) | |
633 | { | |
634 | thr->prof_info = NULL; | |
635 | thr->api_info = NULL; | |
636 | } | |
ca4c3545 | 637 | } |
638 | ||
639 | assert (res != acc_device_default | |
640 | && res != acc_device_not_host); | |
641 | ||
642 | return res; | |
643 | } | |
644 | ||
645 | ialias (acc_get_device_type) | |
646 | ||
647 | int | |
648 | acc_get_device_num (acc_device_t d) | |
649 | { | |
650 | const struct gomp_device_descr *dev; | |
0a1fe572 | 651 | struct goacc_thread *thr = goacc_thread (); |
ca4c3545 | 652 | |
653 | if (d >= _ACC_device_hwm) | |
dd7f667e | 654 | gomp_fatal ("unknown device type %u", (unsigned) d); |
ca4c3545 | 655 | |
5feffd51 | 656 | acc_prof_info prof_info; |
657 | acc_api_info api_info; | |
658 | bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); | |
659 | if (profiling_p) | |
660 | prof_info.device_type = d; | |
661 | ||
921d472d | 662 | gomp_init_targets_once (); |
ca4c3545 | 663 | |
d39966e5 | 664 | gomp_mutex_lock (&acc_device_lock); |
dd7f667e | 665 | dev = resolve_device (d, true); |
d39966e5 | 666 | gomp_mutex_unlock (&acc_device_lock); |
ca4c3545 | 667 | |
5feffd51 | 668 | if (profiling_p) |
669 | { | |
670 | thr->prof_info = NULL; | |
671 | thr->api_info = NULL; | |
672 | } | |
673 | ||
0a1fe572 | 674 | if (thr && thr->base_dev == dev && thr->dev) |
675 | return thr->dev->target_id; | |
ca4c3545 | 676 | |
0a1fe572 | 677 | return goacc_device_num; |
ca4c3545 | 678 | } |
679 | ||
680 | ialias (acc_get_device_num) | |
681 | ||
682 | void | |
0a1fe572 | 683 | acc_set_device_num (int ord, acc_device_t d) |
ca4c3545 | 684 | { |
0a1fe572 | 685 | struct gomp_device_descr *base_dev, *acc_dev; |
ca4c3545 | 686 | int num_devices; |
687 | ||
921d472d | 688 | gomp_init_targets_once (); |
ca4c3545 | 689 | |
0a1fe572 | 690 | if (ord < 0) |
691 | ord = goacc_device_num; | |
ca4c3545 | 692 | |
0a1fe572 | 693 | if ((int) d == 0) |
694 | /* Set whatever device is being used by the current host thread to use | |
695 | device instance ORD. It's unclear if this is supposed to affect other | |
696 | host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ | |
697 | goacc_attach_host_thread_to_device (ord); | |
ca4c3545 | 698 | else |
699 | { | |
ca4c3545 | 700 | gomp_mutex_lock (&acc_device_lock); |
701 | ||
dd7f667e | 702 | cached_base_dev = base_dev = resolve_device (d, true); |
ca4c3545 | 703 | |
704 | num_devices = base_dev->get_num_devices_func (); | |
705 | ||
dd7f667e | 706 | if (num_devices <= 0 || ord >= num_devices) |
707 | acc_dev_num_out_of_range (d, ord, num_devices); | |
ca4c3545 | 708 | |
0a1fe572 | 709 | acc_dev = &base_dev[ord]; |
ca4c3545 | 710 | |
d39966e5 | 711 | gomp_mutex_lock (&acc_dev->lock); |
f87b2900 | 712 | if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
0a1fe572 | 713 | gomp_init_device (acc_dev); |
d39966e5 | 714 | gomp_mutex_unlock (&acc_dev->lock); |
ca4c3545 | 715 | |
716 | gomp_mutex_unlock (&acc_device_lock); | |
0a1fe572 | 717 | |
718 | goacc_attach_host_thread_to_device (ord); | |
ca4c3545 | 719 | } |
0a1fe572 | 720 | |
721 | goacc_device_num = ord; | |
ca4c3545 | 722 | } |
723 | ||
724 | ialias (acc_set_device_num) | |
725 | ||
689db5ed | 726 | /* For -O and higher, the compiler always attempts to expand acc_on_device, but |
727 | if the user disables the builtin, or calls it via a pointer, we'll need this | |
728 | version. | |
729 | ||
730 | Compile this with optimization, so that the compiler expands | |
1ae4e7aa | 731 | this, rather than generating infinitely recursive code. */ |
732 | ||
733 | int __attribute__ ((__optimize__ ("O2"))) | |
ca4c3545 | 734 | acc_on_device (acc_device_t dev) |
735 | { | |
f212338e | 736 | return __builtin_acc_on_device (dev); |
ca4c3545 | 737 | } |
738 | ||
739 | ialias (acc_on_device) | |
740 | ||
741 | attribute_hidden void | |
742 | goacc_runtime_initialize (void) | |
743 | { | |
744 | gomp_mutex_init (&acc_device_lock); | |
745 | ||
746 | #if !(defined HAVE_TLS || defined USE_EMUTLS) | |
747 | pthread_key_create (&goacc_tls_key, NULL); | |
748 | #endif | |
749 | ||
750 | pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); | |
751 | ||
0a1fe572 | 752 | cached_base_dev = NULL; |
ca4c3545 | 753 | |
754 | goacc_threads = NULL; | |
755 | gomp_mutex_init (&goacc_thread_lock); | |
2f6c4c35 | 756 | |
757 | /* Initialize and register the 'host' device type. */ | |
758 | goacc_host_init (); | |
ca4c3545 | 759 | } |
760 | ||
761 | /* Compiler helper functions */ | |
762 | ||
763 | attribute_hidden void | |
764 | goacc_save_and_set_bind (acc_device_t d) | |
765 | { | |
766 | struct goacc_thread *thr = goacc_thread (); | |
767 | ||
768 | assert (!thr->saved_bound_dev); | |
769 | ||
770 | thr->saved_bound_dev = thr->dev; | |
771 | thr->dev = dispatchers[d]; | |
772 | } | |
773 | ||
774 | attribute_hidden void | |
775 | goacc_restore_bind (void) | |
776 | { | |
777 | struct goacc_thread *thr = goacc_thread (); | |
778 | ||
779 | thr->dev = thr->saved_bound_dev; | |
780 | thr->saved_bound_dev = NULL; | |
781 | } | |
782 | ||
783 | /* This is called from any OpenACC support function that may need to implicitly | |
0a1fe572 | 784 | initialize the libgomp runtime, either globally or from a new host thread. |
785 | On exit "goacc_thread" will return a valid & populated thread block. */ | |
ca4c3545 | 786 | |
787 | attribute_hidden void | |
788 | goacc_lazy_initialize (void) | |
789 | { | |
790 | struct goacc_thread *thr = goacc_thread (); | |
791 | ||
792 | if (thr && thr->dev) | |
793 | return; | |
794 | ||
5feffd51 | 795 | gomp_init_targets_once (); |
796 | ||
797 | gomp_mutex_lock (&acc_device_lock); | |
0a1fe572 | 798 | if (!cached_base_dev) |
5feffd51 | 799 | cached_base_dev = acc_init_1 (acc_device_default, |
800 | acc_construct_parallel, 1); | |
801 | gomp_mutex_unlock (&acc_device_lock); | |
802 | ||
803 | goacc_attach_host_thread_to_device (-1); | |
ca4c3545 | 804 | } |