]>
Commit | Line | Data |
---|---|---|
5624e564 | 1 | /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
acf0174b JJ |
2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
acf0174b JJ |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
1df3f842 | 26 | /* This file contains the support of offloading. */ |
acf0174b | 27 | |
1df3f842 | 28 | #include "config.h" |
acf0174b | 29 | #include "libgomp.h" |
41dbbb37 TS |
30 | #include "oacc-plugin.h" |
31 | #include "oacc-int.h" | |
32 | #include "gomp-constants.h" | |
acf0174b JJ |
33 | #include <limits.h> |
34 | #include <stdbool.h> | |
35 | #include <stdlib.h> | |
36 | #include <string.h> | |
41dbbb37 | 37 | #include <assert.h> |
acf0174b | 38 | |
1df3f842 JJ |
39 | #ifdef PLUGIN_SUPPORT |
40 | #include <dlfcn.h> | |
b5f7a6ca | 41 | #include "plugin-suffix.h" |
1df3f842 JJ |
42 | #endif |
43 | ||
44 | static void gomp_target_init (void); | |
45 | ||
41dbbb37 | 46 | /* The whole initialization code for offloading plugins is only run one. */ |
1df3f842 JJ |
47 | static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; |
48 | ||
1df3f842 JJ |
49 | /* This structure describes an offload image. |
50 | It contains type of the target device, pointer to host table descriptor, and | |
51 | pointer to target data. */ | |
52 | struct offload_image_descr { | |
53 | enum offload_target_type type; | |
54 | void *host_table; | |
55 | void *target_data; | |
56 | }; | |
57 | ||
58 | /* Array of descriptors of offload images. */ | |
59 | static struct offload_image_descr *offload_images; | |
60 | ||
61 | /* Total number of offload images. */ | |
62 | static int num_offload_images; | |
63 | ||
64 | /* Array of descriptors for all available devices. */ | |
65 | static struct gomp_device_descr *devices; | |
66 | ||
973e9808 | 67 | #ifdef PLUGIN_SUPPORT |
1df3f842 JJ |
68 | /* Total number of available devices. */ |
69 | static int num_devices; | |
973e9808 | 70 | #endif |
1df3f842 | 71 | |
41dbbb37 TS |
72 | /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ |
73 | static int num_devices_openmp; | |
74 | ||
1df3f842 JJ |
75 | /* The comparison function. */ |
76 | ||
41dbbb37 | 77 | attribute_hidden int |
1df3f842 JJ |
78 | splay_compare (splay_tree_key x, splay_tree_key y) |
79 | { | |
80 | if (x->host_start == x->host_end | |
81 | && y->host_start == y->host_end) | |
82 | return 0; | |
83 | if (x->host_end <= y->host_start) | |
84 | return -1; | |
85 | if (x->host_start >= y->host_end) | |
86 | return 1; | |
87 | return 0; | |
88 | } | |
89 | ||
90 | #include "splay-tree.h" | |
91 | ||
41dbbb37 TS |
92 | attribute_hidden void |
93 | gomp_init_targets_once (void) | |
1df3f842 | 94 | { |
41dbbb37 TS |
95 | (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
96 | } | |
1df3f842 | 97 | |
acf0174b JJ |
98 | attribute_hidden int |
99 | gomp_get_num_devices (void) | |
100 | { | |
41dbbb37 TS |
101 | gomp_init_targets_once (); |
102 | return num_devices_openmp; | |
1df3f842 JJ |
103 | } |
104 | ||
105 | static struct gomp_device_descr * | |
106 | resolve_device (int device_id) | |
107 | { | |
41dbbb37 | 108 | if (device_id == GOMP_DEVICE_ICV) |
1df3f842 JJ |
109 | { |
110 | struct gomp_task_icv *icv = gomp_icv (false); | |
111 | device_id = icv->default_device_var; | |
112 | } | |
113 | ||
114 | if (device_id < 0 || device_id >= gomp_get_num_devices ()) | |
115 | return NULL; | |
116 | ||
117 | return &devices[device_id]; | |
118 | } | |
119 | ||
120 | ||
121 | /* Handle the case where splay_tree_lookup found oldn for newn. | |
122 | Helper function of gomp_map_vars. */ | |
123 | ||
124 | static inline void | |
125 | gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn, | |
126 | unsigned char kind) | |
127 | { | |
41dbbb37 TS |
128 | if ((kind & GOMP_MAP_FLAG_FORCE) |
129 | || oldn->host_start > newn->host_start | |
1df3f842 | 130 | || oldn->host_end < newn->host_end) |
41dbbb37 | 131 | gomp_fatal ("Trying to map into device [%p..%p) object when " |
1df3f842 JJ |
132 | "[%p..%p) is already mapped", |
133 | (void *) newn->host_start, (void *) newn->host_end, | |
134 | (void *) oldn->host_start, (void *) oldn->host_end); | |
135 | oldn->refcount++; | |
136 | } | |
137 | ||
41dbbb37 TS |
138 | static int |
139 | get_kind (bool is_openacc, void *kinds, int idx) | |
140 | { | |
141 | return is_openacc ? ((unsigned short *) kinds)[idx] | |
142 | : ((unsigned char *) kinds)[idx]; | |
143 | } | |
144 | ||
145 | attribute_hidden struct target_mem_desc * | |
1df3f842 | 146 | gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, |
41dbbb37 TS |
147 | void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, |
148 | bool is_openacc, bool is_target) | |
1df3f842 JJ |
149 | { |
150 | size_t i, tgt_align, tgt_size, not_found_cnt = 0; | |
41dbbb37 TS |
151 | const int rshift = is_openacc ? 8 : 3; |
152 | const int typemask = is_openacc ? 0xff : 0x7; | |
153 | struct gomp_memory_mapping *mm = &devicep->mem_map; | |
1df3f842 JJ |
154 | struct splay_tree_key_s cur_node; |
155 | struct target_mem_desc *tgt | |
156 | = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); | |
157 | tgt->list_count = mapnum; | |
158 | tgt->refcount = 1; | |
159 | tgt->device_descr = devicep; | |
41dbbb37 | 160 | tgt->mem_map = mm; |
1df3f842 JJ |
161 | |
162 | if (mapnum == 0) | |
163 | return tgt; | |
164 | ||
165 | tgt_align = sizeof (void *); | |
166 | tgt_size = 0; | |
167 | if (is_target) | |
168 | { | |
169 | size_t align = 4 * sizeof (void *); | |
170 | tgt_align = align; | |
171 | tgt_size = mapnum * sizeof (void *); | |
172 | } | |
173 | ||
41dbbb37 TS |
174 | gomp_mutex_lock (&mm->lock); |
175 | ||
1df3f842 JJ |
176 | for (i = 0; i < mapnum; i++) |
177 | { | |
41dbbb37 | 178 | int kind = get_kind (is_openacc, kinds, i); |
1df3f842 JJ |
179 | if (hostaddrs[i] == NULL) |
180 | { | |
181 | tgt->list[i] = NULL; | |
182 | continue; | |
183 | } | |
184 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
41dbbb37 | 185 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
1df3f842 JJ |
186 | cur_node.host_end = cur_node.host_start + sizes[i]; |
187 | else | |
188 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
41dbbb37 | 189 | splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node); |
1df3f842 JJ |
190 | if (n) |
191 | { | |
192 | tgt->list[i] = n; | |
41dbbb37 | 193 | gomp_map_vars_existing (n, &cur_node, kind & typemask); |
1df3f842 JJ |
194 | } |
195 | else | |
196 | { | |
1df3f842 | 197 | tgt->list[i] = NULL; |
41dbbb37 TS |
198 | |
199 | size_t align = (size_t) 1 << (kind >> rshift); | |
1df3f842 JJ |
200 | not_found_cnt++; |
201 | if (tgt_align < align) | |
202 | tgt_align = align; | |
203 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
204 | tgt_size += cur_node.host_end - cur_node.host_start; | |
41dbbb37 | 205 | if ((kind & typemask) == GOMP_MAP_TO_PSET) |
1df3f842 JJ |
206 | { |
207 | size_t j; | |
208 | for (j = i + 1; j < mapnum; j++) | |
41dbbb37 TS |
209 | if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) |
210 | & typemask)) | |
1df3f842 JJ |
211 | break; |
212 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
213 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
214 | > cur_node.host_end)) | |
215 | break; | |
216 | else | |
217 | { | |
218 | tgt->list[j] = NULL; | |
219 | i++; | |
220 | } | |
221 | } | |
222 | } | |
223 | } | |
224 | ||
41dbbb37 TS |
225 | if (devaddrs) |
226 | { | |
227 | if (mapnum != 1) | |
228 | gomp_fatal ("unexpected aggregation"); | |
229 | tgt->to_free = devaddrs[0]; | |
230 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
231 | tgt->tgt_end = tgt->tgt_start + sizes[0]; | |
232 | } | |
233 | else if (not_found_cnt || is_target) | |
1df3f842 JJ |
234 | { |
235 | /* Allocate tgt_align aligned tgt_size block of memory. */ | |
236 | /* FIXME: Perhaps change interface to allocate properly aligned | |
237 | memory. */ | |
238 | tgt->to_free = devicep->alloc_func (devicep->target_id, | |
239 | tgt_size + tgt_align - 1); | |
240 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
241 | tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); | |
242 | tgt->tgt_end = tgt->tgt_start + tgt_size; | |
243 | } | |
244 | else | |
245 | { | |
246 | tgt->to_free = NULL; | |
247 | tgt->tgt_start = 0; | |
248 | tgt->tgt_end = 0; | |
249 | } | |
250 | ||
251 | tgt_size = 0; | |
252 | if (is_target) | |
253 | tgt_size = mapnum * sizeof (void *); | |
254 | ||
255 | tgt->array = NULL; | |
256 | if (not_found_cnt) | |
257 | { | |
258 | tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); | |
259 | splay_tree_node array = tgt->array; | |
260 | size_t j; | |
261 | ||
262 | for (i = 0; i < mapnum; i++) | |
263 | if (tgt->list[i] == NULL) | |
264 | { | |
41dbbb37 | 265 | int kind = get_kind (is_openacc, kinds, i); |
1df3f842 JJ |
266 | if (hostaddrs[i] == NULL) |
267 | continue; | |
268 | splay_tree_key k = &array->key; | |
269 | k->host_start = (uintptr_t) hostaddrs[i]; | |
41dbbb37 | 270 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
1df3f842 JJ |
271 | k->host_end = k->host_start + sizes[i]; |
272 | else | |
273 | k->host_end = k->host_start + sizeof (void *); | |
41dbbb37 | 274 | splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k); |
1df3f842 JJ |
275 | if (n) |
276 | { | |
277 | tgt->list[i] = n; | |
41dbbb37 | 278 | gomp_map_vars_existing (n, k, kind & typemask); |
1df3f842 JJ |
279 | } |
280 | else | |
281 | { | |
41dbbb37 | 282 | size_t align = (size_t) 1 << (kind >> rshift); |
1df3f842 JJ |
283 | tgt->list[i] = k; |
284 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
285 | k->tgt = tgt; | |
286 | k->tgt_offset = tgt_size; | |
287 | tgt_size += k->host_end - k->host_start; | |
41dbbb37 | 288 | k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); |
1df3f842 | 289 | k->refcount = 1; |
41dbbb37 | 290 | k->async_refcount = 0; |
1df3f842 JJ |
291 | tgt->refcount++; |
292 | array->left = NULL; | |
293 | array->right = NULL; | |
41dbbb37 TS |
294 | splay_tree_insert (&mm->splay_tree, array); |
295 | switch (kind & typemask) | |
1df3f842 | 296 | { |
41dbbb37 TS |
297 | case GOMP_MAP_ALLOC: |
298 | case GOMP_MAP_FROM: | |
299 | case GOMP_MAP_FORCE_ALLOC: | |
300 | case GOMP_MAP_FORCE_FROM: | |
1df3f842 | 301 | break; |
41dbbb37 TS |
302 | case GOMP_MAP_TO: |
303 | case GOMP_MAP_TOFROM: | |
304 | case GOMP_MAP_FORCE_TO: | |
305 | case GOMP_MAP_FORCE_TOFROM: | |
1df3f842 JJ |
306 | /* FIXME: Perhaps add some smarts, like if copying |
307 | several adjacent fields from host to target, use some | |
308 | host buffer to avoid sending each var individually. */ | |
309 | devicep->host2dev_func (devicep->target_id, | |
310 | (void *) (tgt->tgt_start | |
311 | + k->tgt_offset), | |
312 | (void *) k->host_start, | |
313 | k->host_end - k->host_start); | |
314 | break; | |
41dbbb37 | 315 | case GOMP_MAP_POINTER: |
1df3f842 JJ |
316 | cur_node.host_start |
317 | = (uintptr_t) *(void **) k->host_start; | |
318 | if (cur_node.host_start == (uintptr_t) NULL) | |
319 | { | |
320 | cur_node.tgt_offset = (uintptr_t) NULL; | |
41dbbb37 | 321 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
322 | devicep->host2dev_func (devicep->target_id, |
323 | (void *) (tgt->tgt_start | |
324 | + k->tgt_offset), | |
325 | (void *) &cur_node.tgt_offset, | |
326 | sizeof (void *)); | |
327 | break; | |
328 | } | |
329 | /* Add bias to the pointer value. */ | |
330 | cur_node.host_start += sizes[i]; | |
331 | cur_node.host_end = cur_node.host_start + 1; | |
41dbbb37 | 332 | n = splay_tree_lookup (&mm->splay_tree, &cur_node); |
1df3f842 JJ |
333 | if (n == NULL) |
334 | { | |
335 | /* Could be possibly zero size array section. */ | |
336 | cur_node.host_end--; | |
41dbbb37 | 337 | n = splay_tree_lookup (&mm->splay_tree, &cur_node); |
1df3f842 JJ |
338 | if (n == NULL) |
339 | { | |
340 | cur_node.host_start--; | |
41dbbb37 | 341 | n = splay_tree_lookup (&mm->splay_tree, &cur_node); |
1df3f842 JJ |
342 | cur_node.host_start++; |
343 | } | |
344 | } | |
345 | if (n == NULL) | |
346 | gomp_fatal ("Pointer target of array section " | |
347 | "wasn't mapped"); | |
348 | cur_node.host_start -= n->host_start; | |
349 | cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset | |
350 | + cur_node.host_start; | |
351 | /* At this point tgt_offset is target address of the | |
352 | array section. Now subtract bias to get what we want | |
353 | to initialize the pointer with. */ | |
354 | cur_node.tgt_offset -= sizes[i]; | |
41dbbb37 | 355 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
356 | devicep->host2dev_func (devicep->target_id, |
357 | (void *) (tgt->tgt_start | |
358 | + k->tgt_offset), | |
359 | (void *) &cur_node.tgt_offset, | |
360 | sizeof (void *)); | |
361 | break; | |
41dbbb37 TS |
362 | case GOMP_MAP_TO_PSET: |
363 | /* FIXME: see above FIXME comment. */ | |
1df3f842 JJ |
364 | devicep->host2dev_func (devicep->target_id, |
365 | (void *) (tgt->tgt_start | |
366 | + k->tgt_offset), | |
367 | (void *) k->host_start, | |
368 | k->host_end - k->host_start); | |
41dbbb37 | 369 | |
1df3f842 | 370 | for (j = i + 1; j < mapnum; j++) |
41dbbb37 TS |
371 | if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) |
372 | & typemask)) | |
1df3f842 JJ |
373 | break; |
374 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
375 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
376 | > k->host_end)) | |
377 | break; | |
378 | else | |
379 | { | |
380 | tgt->list[j] = k; | |
381 | k->refcount++; | |
382 | cur_node.host_start | |
383 | = (uintptr_t) *(void **) hostaddrs[j]; | |
384 | if (cur_node.host_start == (uintptr_t) NULL) | |
385 | { | |
386 | cur_node.tgt_offset = (uintptr_t) NULL; | |
41dbbb37 | 387 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
388 | devicep->host2dev_func (devicep->target_id, |
389 | (void *) (tgt->tgt_start + k->tgt_offset | |
390 | + ((uintptr_t) hostaddrs[j] | |
391 | - k->host_start)), | |
392 | (void *) &cur_node.tgt_offset, | |
393 | sizeof (void *)); | |
394 | i++; | |
395 | continue; | |
396 | } | |
397 | /* Add bias to the pointer value. */ | |
398 | cur_node.host_start += sizes[j]; | |
399 | cur_node.host_end = cur_node.host_start + 1; | |
41dbbb37 | 400 | n = splay_tree_lookup (&mm->splay_tree, &cur_node); |
1df3f842 JJ |
401 | if (n == NULL) |
402 | { | |
403 | /* Could be possibly zero size array section. */ | |
404 | cur_node.host_end--; | |
41dbbb37 | 405 | n = splay_tree_lookup (&mm->splay_tree, |
1df3f842 JJ |
406 | &cur_node); |
407 | if (n == NULL) | |
408 | { | |
409 | cur_node.host_start--; | |
41dbbb37 TS |
410 | n = splay_tree_lookup (&mm->splay_tree, |
411 | &cur_node); | |
1df3f842 JJ |
412 | cur_node.host_start++; |
413 | } | |
414 | } | |
415 | if (n == NULL) | |
416 | gomp_fatal ("Pointer target of array section " | |
417 | "wasn't mapped"); | |
418 | cur_node.host_start -= n->host_start; | |
419 | cur_node.tgt_offset = n->tgt->tgt_start | |
420 | + n->tgt_offset | |
421 | + cur_node.host_start; | |
422 | /* At this point tgt_offset is target address of the | |
423 | array section. Now subtract bias to get what we | |
424 | want to initialize the pointer with. */ | |
425 | cur_node.tgt_offset -= sizes[j]; | |
41dbbb37 | 426 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
427 | devicep->host2dev_func (devicep->target_id, |
428 | (void *) (tgt->tgt_start + k->tgt_offset | |
429 | + ((uintptr_t) hostaddrs[j] | |
430 | - k->host_start)), | |
431 | (void *) &cur_node.tgt_offset, | |
432 | sizeof (void *)); | |
433 | i++; | |
434 | } | |
41dbbb37 TS |
435 | break; |
436 | case GOMP_MAP_FORCE_PRESENT: | |
437 | { | |
438 | /* We already looked up the memory region above and it | |
439 | was missing. */ | |
440 | size_t size = k->host_end - k->host_start; | |
441 | gomp_fatal ("present clause: !acc_is_present (%p, " | |
442 | "%zd (0x%zx))", (void *) k->host_start, | |
443 | size, size); | |
444 | } | |
445 | break; | |
446 | case GOMP_MAP_FORCE_DEVICEPTR: | |
447 | assert (k->host_end - k->host_start == sizeof (void *)); | |
448 | ||
449 | devicep->host2dev_func (devicep->target_id, | |
450 | (void *) (tgt->tgt_start | |
451 | + k->tgt_offset), | |
452 | (void *) k->host_start, | |
453 | sizeof (void *)); | |
454 | break; | |
455 | default: | |
456 | gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, | |
457 | kind); | |
1df3f842 JJ |
458 | } |
459 | array++; | |
460 | } | |
461 | } | |
462 | } | |
41dbbb37 | 463 | |
1df3f842 JJ |
464 | if (is_target) |
465 | { | |
466 | for (i = 0; i < mapnum; i++) | |
467 | { | |
468 | if (tgt->list[i] == NULL) | |
469 | cur_node.tgt_offset = (uintptr_t) NULL; | |
470 | else | |
471 | cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start | |
472 | + tgt->list[i]->tgt_offset; | |
41dbbb37 | 473 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
474 | devicep->host2dev_func (devicep->target_id, |
475 | (void *) (tgt->tgt_start | |
476 | + i * sizeof (void *)), | |
477 | (void *) &cur_node.tgt_offset, | |
478 | sizeof (void *)); | |
479 | } | |
480 | } | |
481 | ||
41dbbb37 | 482 | gomp_mutex_unlock (&mm->lock); |
1df3f842 JJ |
483 | return tgt; |
484 | } | |
485 | ||
486 | static void | |
487 | gomp_unmap_tgt (struct target_mem_desc *tgt) | |
488 | { | |
489 | /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ | |
490 | if (tgt->tgt_end) | |
491 | tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free); | |
492 | ||
493 | free (tgt->array); | |
494 | free (tgt); | |
495 | } | |
496 | ||
41dbbb37 TS |
497 | /* Decrease the refcount for a set of mapped variables, and queue asychronous |
498 | copies from the device back to the host after any work that has been issued. | |
499 | Because the regions are still "live", increment an asynchronous reference | |
500 | count to indicate that they should not be unmapped from host-side data | |
501 | structures until the asynchronous copy has completed. */ | |
502 | ||
503 | attribute_hidden void | |
504 | gomp_copy_from_async (struct target_mem_desc *tgt) | |
505 | { | |
506 | struct gomp_device_descr *devicep = tgt->device_descr; | |
507 | struct gomp_memory_mapping *mm = tgt->mem_map; | |
508 | size_t i; | |
509 | ||
510 | gomp_mutex_lock (&mm->lock); | |
511 | ||
512 | for (i = 0; i < tgt->list_count; i++) | |
513 | if (tgt->list[i] == NULL) | |
514 | ; | |
515 | else if (tgt->list[i]->refcount > 1) | |
516 | { | |
517 | tgt->list[i]->refcount--; | |
518 | tgt->list[i]->async_refcount++; | |
519 | } | |
520 | else | |
521 | { | |
522 | splay_tree_key k = tgt->list[i]; | |
523 | if (k->copy_from) | |
524 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, | |
525 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
526 | k->host_end - k->host_start); | |
527 | } | |
528 | ||
529 | gomp_mutex_unlock (&mm->lock); | |
530 | } | |
531 | ||
532 | /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant | |
533 | variables back from device to host: if it is false, it is assumed that this | |
534 | has been done already, i.e. by gomp_copy_from_async above. */ | |
535 | ||
536 | attribute_hidden void | |
537 | gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) | |
1df3f842 JJ |
538 | { |
539 | struct gomp_device_descr *devicep = tgt->device_descr; | |
41dbbb37 | 540 | struct gomp_memory_mapping *mm = tgt->mem_map; |
1df3f842 JJ |
541 | |
542 | if (tgt->list_count == 0) | |
543 | { | |
544 | free (tgt); | |
545 | return; | |
546 | } | |
547 | ||
41dbbb37 TS |
548 | gomp_mutex_lock (&mm->lock); |
549 | ||
1df3f842 | 550 | size_t i; |
1df3f842 JJ |
551 | for (i = 0; i < tgt->list_count; i++) |
552 | if (tgt->list[i] == NULL) | |
553 | ; | |
554 | else if (tgt->list[i]->refcount > 1) | |
555 | tgt->list[i]->refcount--; | |
41dbbb37 TS |
556 | else if (tgt->list[i]->async_refcount > 0) |
557 | tgt->list[i]->async_refcount--; | |
1df3f842 JJ |
558 | else |
559 | { | |
560 | splay_tree_key k = tgt->list[i]; | |
41dbbb37 | 561 | if (k->copy_from && do_copyfrom) |
1df3f842 JJ |
562 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, |
563 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
564 | k->host_end - k->host_start); | |
41dbbb37 | 565 | splay_tree_remove (&mm->splay_tree, k); |
1df3f842 JJ |
566 | if (k->tgt->refcount > 1) |
567 | k->tgt->refcount--; | |
568 | else | |
569 | gomp_unmap_tgt (k->tgt); | |
570 | } | |
571 | ||
572 | if (tgt->refcount > 1) | |
573 | tgt->refcount--; | |
574 | else | |
575 | gomp_unmap_tgt (tgt); | |
41dbbb37 TS |
576 | |
577 | gomp_mutex_unlock (&mm->lock); | |
1df3f842 JJ |
578 | } |
579 | ||
580 | static void | |
41dbbb37 TS |
581 | gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm, |
582 | size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, | |
583 | bool is_openacc) | |
1df3f842 JJ |
584 | { |
585 | size_t i; | |
586 | struct splay_tree_key_s cur_node; | |
41dbbb37 | 587 | const int typemask = is_openacc ? 0xff : 0x7; |
1df3f842 JJ |
588 | |
589 | if (!devicep) | |
590 | return; | |
591 | ||
592 | if (mapnum == 0) | |
593 | return; | |
594 | ||
41dbbb37 | 595 | gomp_mutex_lock (&mm->lock); |
1df3f842 JJ |
596 | for (i = 0; i < mapnum; i++) |
597 | if (sizes[i]) | |
598 | { | |
599 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
600 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
41dbbb37 | 601 | splay_tree_key n = splay_tree_lookup (&mm->splay_tree, |
1df3f842 JJ |
602 | &cur_node); |
603 | if (n) | |
604 | { | |
41dbbb37 | 605 | int kind = get_kind (is_openacc, kinds, i); |
1df3f842 JJ |
606 | if (n->host_start > cur_node.host_start |
607 | || n->host_end < cur_node.host_end) | |
608 | gomp_fatal ("Trying to update [%p..%p) object when" | |
609 | "only [%p..%p) is mapped", | |
610 | (void *) cur_node.host_start, | |
611 | (void *) cur_node.host_end, | |
612 | (void *) n->host_start, | |
613 | (void *) n->host_end); | |
41dbbb37 | 614 | if (GOMP_MAP_COPY_TO_P (kind & typemask)) |
1df3f842 JJ |
615 | devicep->host2dev_func (devicep->target_id, |
616 | (void *) (n->tgt->tgt_start | |
617 | + n->tgt_offset | |
618 | + cur_node.host_start | |
619 | - n->host_start), | |
620 | (void *) cur_node.host_start, | |
621 | cur_node.host_end - cur_node.host_start); | |
41dbbb37 | 622 | if (GOMP_MAP_COPY_FROM_P (kind & typemask)) |
1df3f842 JJ |
623 | devicep->dev2host_func (devicep->target_id, |
624 | (void *) cur_node.host_start, | |
625 | (void *) (n->tgt->tgt_start | |
626 | + n->tgt_offset | |
627 | + cur_node.host_start | |
628 | - n->host_start), | |
629 | cur_node.host_end - cur_node.host_start); | |
630 | } | |
631 | else | |
632 | gomp_fatal ("Trying to update [%p..%p) object that is not mapped", | |
633 | (void *) cur_node.host_start, | |
634 | (void *) cur_node.host_end); | |
635 | } | |
41dbbb37 | 636 | gomp_mutex_unlock (&mm->lock); |
1df3f842 JJ |
637 | } |
638 | ||
639 | /* This function should be called from every offload image. | |
640 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of | |
641 | the target, and TARGET_DATA needed by target plugin. */ | |
642 | ||
643 | void | |
644 | GOMP_offload_register (void *host_table, enum offload_target_type target_type, | |
645 | void *target_data) | |
646 | { | |
647 | offload_images = gomp_realloc (offload_images, | |
648 | (num_offload_images + 1) | |
649 | * sizeof (struct offload_image_descr)); | |
650 | ||
651 | offload_images[num_offload_images].type = target_type; | |
652 | offload_images[num_offload_images].host_table = host_table; | |
653 | offload_images[num_offload_images].target_data = target_data; | |
654 | ||
655 | num_offload_images++; | |
656 | } | |
657 | ||
41dbbb37 TS |
658 | /* This function initializes the target device, specified by DEVICEP. DEVICEP |
659 | must be locked on entry, and remains locked on return. */ | |
1df3f842 | 660 | |
41dbbb37 | 661 | attribute_hidden void |
1df3f842 JJ |
662 | gomp_init_device (struct gomp_device_descr *devicep) |
663 | { | |
664 | devicep->init_device_func (devicep->target_id); | |
41dbbb37 TS |
665 | devicep->is_initialized = true; |
666 | } | |
1df3f842 | 667 | |
41dbbb37 TS |
668 | /* Initialize address mapping tables. MM must be locked on entry, and remains |
669 | locked on return. */ | |
670 | ||
671 | attribute_hidden void | |
672 | gomp_init_tables (struct gomp_device_descr *devicep, | |
673 | struct gomp_memory_mapping *mm) | |
674 | { | |
1df3f842 JJ |
675 | /* Get address mapping table for device. */ |
676 | struct mapping_table *table = NULL; | |
677 | int num_entries = devicep->get_table_func (devicep->target_id, &table); | |
678 | ||
679 | /* Insert host-target address mapping into dev_splay_tree. */ | |
680 | int i; | |
681 | for (i = 0; i < num_entries; i++) | |
682 | { | |
683 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
684 | tgt->refcount = 1; | |
685 | tgt->array = gomp_malloc (sizeof (*tgt->array)); | |
686 | tgt->tgt_start = table[i].tgt_start; | |
687 | tgt->tgt_end = table[i].tgt_end; | |
688 | tgt->to_free = NULL; | |
689 | tgt->list_count = 0; | |
690 | tgt->device_descr = devicep; | |
691 | splay_tree_node node = tgt->array; | |
692 | splay_tree_key k = &node->key; | |
693 | k->host_start = table[i].host_start; | |
694 | k->host_end = table[i].host_end; | |
695 | k->tgt_offset = 0; | |
696 | k->refcount = 1; | |
697 | k->copy_from = false; | |
698 | k->tgt = tgt; | |
699 | node->left = NULL; | |
700 | node->right = NULL; | |
41dbbb37 | 701 | splay_tree_insert (&mm->splay_tree, node); |
1df3f842 JJ |
702 | } |
703 | ||
704 | free (table); | |
41dbbb37 TS |
705 | mm->is_initialized = true; |
706 | } | |
707 | ||
708 | /* Free address mapping tables. MM must be locked on entry, and remains locked | |
709 | on return. */ | |
710 | ||
711 | attribute_hidden void | |
712 | gomp_free_memmap (struct gomp_memory_mapping *mm) | |
713 | { | |
714 | while (mm->splay_tree.root) | |
715 | { | |
716 | struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt; | |
717 | ||
718 | splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key); | |
719 | free (tgt->array); | |
720 | free (tgt); | |
721 | } | |
722 | ||
723 | mm->is_initialized = false; | |
724 | } | |
725 | ||
726 | /* This function de-initializes the target device, specified by DEVICEP. | |
727 | DEVICEP must be locked on entry, and remains locked on return. */ | |
728 | ||
729 | attribute_hidden void | |
730 | gomp_fini_device (struct gomp_device_descr *devicep) | |
731 | { | |
732 | if (devicep->is_initialized) | |
733 | devicep->fini_device_func (devicep->target_id); | |
734 | ||
735 | devicep->is_initialized = false; | |
acf0174b JJ |
736 | } |
737 | ||
738 | /* Called when encountering a target directive. If DEVICE | |
41dbbb37 TS |
739 | is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
740 | GOMP_DEVICE_HOST_FALLBACK (or any value | |
741 | larger than last available hw device), use host fallback. | |
128b26dc TS |
742 | FN is address of host code, UNUSED is part of the current ABI, but |
743 | we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays | |
acf0174b JJ |
744 | with MAPNUM entries, with addresses of the host objects, |
745 | sizes of the host objects (resp. for pointer kind pointer bias | |
746 | and assumed sizeof (void *) size) and kinds. */ | |
747 | ||
748 | void | |
128b26dc | 749 | GOMP_target (int device, void (*fn) (void *), const void *unused, |
acf0174b JJ |
750 | size_t mapnum, void **hostaddrs, size_t *sizes, |
751 | unsigned char *kinds) | |
752 | { | |
1df3f842 | 753 | struct gomp_device_descr *devicep = resolve_device (device); |
41dbbb37 TS |
754 | |
755 | if (devicep == NULL | |
756 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1df3f842 JJ |
757 | { |
758 | /* Host fallback. */ | |
759 | struct gomp_thread old_thr, *thr = gomp_thread (); | |
760 | old_thr = *thr; | |
761 | memset (thr, '\0', sizeof (*thr)); | |
762 | if (gomp_places_list) | |
763 | { | |
764 | thr->place = old_thr.place; | |
765 | thr->ts.place_partition_len = gomp_places_list_len; | |
766 | } | |
767 | fn (hostaddrs); | |
768 | gomp_free_thread (thr); | |
769 | *thr = old_thr; | |
770 | return; | |
771 | } | |
772 | ||
41dbbb37 | 773 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
774 | if (!devicep->is_initialized) |
775 | gomp_init_device (devicep); | |
41dbbb37 TS |
776 | gomp_mutex_unlock (&devicep->lock); |
777 | ||
778 | void *fn_addr; | |
779 | ||
780 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) | |
781 | fn_addr = (void *) fn; | |
782 | else | |
783 | { | |
784 | struct gomp_memory_mapping *mm = &devicep->mem_map; | |
785 | gomp_mutex_lock (&mm->lock); | |
786 | ||
787 | if (!mm->is_initialized) | |
788 | gomp_init_tables (devicep, mm); | |
789 | ||
790 | struct splay_tree_key_s k; | |
791 | k.host_start = (uintptr_t) fn; | |
792 | k.host_end = k.host_start + 1; | |
793 | splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k); | |
794 | if (tgt_fn == NULL) | |
795 | gomp_fatal ("Target function wasn't mapped"); | |
1df3f842 | 796 | |
41dbbb37 TS |
797 | gomp_mutex_unlock (&mm->lock); |
798 | ||
799 | fn_addr = (void *) tgt_fn->tgt->tgt_start; | |
800 | } | |
1df3f842 JJ |
801 | |
802 | struct target_mem_desc *tgt_vars | |
41dbbb37 TS |
803 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
804 | true); | |
acf0174b JJ |
805 | struct gomp_thread old_thr, *thr = gomp_thread (); |
806 | old_thr = *thr; | |
807 | memset (thr, '\0', sizeof (*thr)); | |
808 | if (gomp_places_list) | |
809 | { | |
810 | thr->place = old_thr.place; | |
811 | thr->ts.place_partition_len = gomp_places_list_len; | |
812 | } | |
41dbbb37 | 813 | devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); |
acf0174b JJ |
814 | gomp_free_thread (thr); |
815 | *thr = old_thr; | |
41dbbb37 | 816 | gomp_unmap_vars (tgt_vars, true); |
acf0174b JJ |
817 | } |
818 | ||
819 | void | |
128b26dc | 820 | GOMP_target_data (int device, const void *unused, size_t mapnum, |
acf0174b JJ |
821 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
822 | { | |
1df3f842 | 823 | struct gomp_device_descr *devicep = resolve_device (device); |
41dbbb37 TS |
824 | |
825 | if (devicep == NULL | |
826 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1df3f842 JJ |
827 | { |
828 | /* Host fallback. */ | |
829 | struct gomp_task_icv *icv = gomp_icv (false); | |
830 | if (icv->target_data) | |
831 | { | |
832 | /* Even when doing a host fallback, if there are any active | |
833 | #pragma omp target data constructs, need to remember the | |
834 | new #pragma omp target data, otherwise GOMP_target_end_data | |
835 | would get out of sync. */ | |
836 | struct target_mem_desc *tgt | |
41dbbb37 | 837 | = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); |
1df3f842 JJ |
838 | tgt->prev = icv->target_data; |
839 | icv->target_data = tgt; | |
840 | } | |
841 | return; | |
842 | } | |
843 | ||
41dbbb37 | 844 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
845 | if (!devicep->is_initialized) |
846 | gomp_init_device (devicep); | |
41dbbb37 TS |
847 | gomp_mutex_unlock (&devicep->lock); |
848 | ||
849 | struct gomp_memory_mapping *mm = &devicep->mem_map; | |
850 | gomp_mutex_lock (&mm->lock); | |
851 | if (!mm->is_initialized) | |
852 | gomp_init_tables (devicep, mm); | |
853 | gomp_mutex_unlock (&mm->lock); | |
1df3f842 JJ |
854 | |
855 | struct target_mem_desc *tgt | |
41dbbb37 TS |
856 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
857 | false); | |
1df3f842 JJ |
858 | struct gomp_task_icv *icv = gomp_icv (true); |
859 | tgt->prev = icv->target_data; | |
860 | icv->target_data = tgt; | |
acf0174b JJ |
861 | } |
862 | ||
863 | void | |
864 | GOMP_target_end_data (void) | |
865 | { | |
1df3f842 JJ |
866 | struct gomp_task_icv *icv = gomp_icv (false); |
867 | if (icv->target_data) | |
868 | { | |
869 | struct target_mem_desc *tgt = icv->target_data; | |
870 | icv->target_data = tgt->prev; | |
41dbbb37 | 871 | gomp_unmap_vars (tgt, true); |
1df3f842 | 872 | } |
acf0174b JJ |
873 | } |
874 | ||
875 | void | |
128b26dc | 876 | GOMP_target_update (int device, const void *unused, size_t mapnum, |
acf0174b JJ |
877 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
878 | { | |
1df3f842 | 879 | struct gomp_device_descr *devicep = resolve_device (device); |
41dbbb37 TS |
880 | |
881 | if (devicep == NULL | |
882 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1df3f842 JJ |
883 | return; |
884 | ||
41dbbb37 | 885 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
886 | if (!devicep->is_initialized) |
887 | gomp_init_device (devicep); | |
41dbbb37 TS |
888 | gomp_mutex_unlock (&devicep->lock); |
889 | ||
890 | struct gomp_memory_mapping *mm = &devicep->mem_map; | |
891 | gomp_mutex_lock (&mm->lock); | |
892 | if (!mm->is_initialized) | |
893 | gomp_init_tables (devicep, mm); | |
894 | gomp_mutex_unlock (&mm->lock); | |
1df3f842 | 895 | |
41dbbb37 | 896 | gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false); |
acf0174b JJ |
897 | } |
898 | ||
899 | void | |
900 | GOMP_teams (unsigned int num_teams, unsigned int thread_limit) | |
901 | { | |
902 | if (thread_limit) | |
903 | { | |
904 | struct gomp_task_icv *icv = gomp_icv (true); | |
905 | icv->thread_limit_var | |
906 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
907 | } | |
908 | (void) num_teams; | |
909 | } | |
1df3f842 JJ |
910 | |
911 | #ifdef PLUGIN_SUPPORT | |
912 | ||
913 | /* This function tries to load a plugin for DEVICE. Name of plugin is passed | |
914 | in PLUGIN_NAME. | |
915 | The handles of the found functions are stored in the corresponding fields | |
916 | of DEVICE. The function returns TRUE on success and FALSE otherwise. */ | |
917 | ||
918 | static bool | |
919 | gomp_load_plugin_for_device (struct gomp_device_descr *device, | |
920 | const char *plugin_name) | |
921 | { | |
41dbbb37 TS |
922 | char *err = NULL, *last_missing = NULL; |
923 | int optional_present, optional_total; | |
924 | ||
925 | /* Clear any existing error. */ | |
926 | dlerror (); | |
927 | ||
1df3f842 JJ |
928 | void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); |
929 | if (!plugin_handle) | |
41dbbb37 TS |
930 | { |
931 | err = dlerror (); | |
932 | goto out; | |
933 | } | |
1df3f842 JJ |
934 | |
935 | /* Check if all required functions are available in the plugin and store | |
936 | their handlers. */ | |
41dbbb37 TS |
937 | #define DLSYM(f) \ |
938 | do \ | |
939 | { \ | |
940 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \ | |
941 | err = dlerror (); \ | |
942 | if (err != NULL) \ | |
943 | goto out; \ | |
944 | } \ | |
1df3f842 | 945 | while (0) |
41dbbb37 TS |
946 | /* Similar, but missing functions are not an error. */ |
947 | #define DLSYM_OPT(f, n) \ | |
948 | do \ | |
949 | { \ | |
950 | char *tmp_err; \ | |
951 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ | |
952 | tmp_err = dlerror (); \ | |
953 | if (tmp_err == NULL) \ | |
954 | optional_present++; \ | |
955 | else \ | |
956 | last_missing = #n; \ | |
957 | optional_total++; \ | |
958 | } \ | |
959 | while (0) | |
960 | ||
961 | DLSYM (get_name); | |
962 | DLSYM (get_caps); | |
1df3f842 JJ |
963 | DLSYM (get_type); |
964 | DLSYM (get_num_devices); | |
965 | DLSYM (register_image); | |
966 | DLSYM (init_device); | |
41dbbb37 | 967 | DLSYM (fini_device); |
1df3f842 JJ |
968 | DLSYM (get_table); |
969 | DLSYM (alloc); | |
970 | DLSYM (free); | |
971 | DLSYM (dev2host); | |
972 | DLSYM (host2dev); | |
41dbbb37 TS |
973 | device->capabilities = device->get_caps_func (); |
974 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
975 | DLSYM (run); | |
976 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
977 | { | |
978 | optional_present = optional_total = 0; | |
979 | DLSYM_OPT (openacc.exec, openacc_parallel); | |
980 | DLSYM_OPT (openacc.open_device, openacc_open_device); | |
981 | DLSYM_OPT (openacc.close_device, openacc_close_device); | |
982 | DLSYM_OPT (openacc.get_device_num, openacc_get_device_num); | |
983 | DLSYM_OPT (openacc.set_device_num, openacc_set_device_num); | |
984 | DLSYM_OPT (openacc.register_async_cleanup, | |
985 | openacc_register_async_cleanup); | |
986 | DLSYM_OPT (openacc.async_test, openacc_async_test); | |
987 | DLSYM_OPT (openacc.async_test_all, openacc_async_test_all); | |
988 | DLSYM_OPT (openacc.async_wait, openacc_async_wait); | |
989 | DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async); | |
990 | DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all); | |
991 | DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async); | |
992 | DLSYM_OPT (openacc.async_set_async, openacc_async_set_async); | |
993 | DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data); | |
994 | DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data); | |
995 | /* Require all the OpenACC handlers if we have | |
996 | GOMP_OFFLOAD_CAP_OPENACC_200. */ | |
997 | if (optional_present != optional_total) | |
998 | { | |
999 | err = "plugin missing OpenACC handler function"; | |
1000 | goto out; | |
1001 | } | |
1002 | optional_present = optional_total = 0; | |
1003 | DLSYM_OPT (openacc.cuda.get_current_device, | |
1004 | openacc_get_current_cuda_device); | |
1005 | DLSYM_OPT (openacc.cuda.get_current_context, | |
1006 | openacc_get_current_cuda_context); | |
1007 | DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream); | |
1008 | DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream); | |
1009 | /* Make sure all the CUDA functions are there if any of them are. */ | |
1010 | if (optional_present && optional_present != optional_total) | |
1011 | { | |
1012 | err = "plugin missing OpenACC CUDA handler function"; | |
1013 | goto out; | |
1014 | } | |
1015 | } | |
1df3f842 | 1016 | #undef DLSYM |
41dbbb37 | 1017 | #undef DLSYM_OPT |
1df3f842 | 1018 | |
41dbbb37 TS |
1019 | out: |
1020 | if (err != NULL) | |
1021 | { | |
1022 | gomp_error ("while loading %s: %s", plugin_name, err); | |
1023 | if (last_missing) | |
1024 | gomp_error ("missing function was %s", last_missing); | |
1025 | if (plugin_handle) | |
1026 | dlclose (plugin_handle); | |
1027 | } | |
1028 | return err == NULL; | |
1df3f842 JJ |
1029 | } |
1030 | ||
41dbbb37 TS |
1031 | /* This function adds a compatible offload image IMAGE to an accelerator device |
1032 | DEVICE. DEVICE must be locked on entry, and remains locked on return. */ | |
1df3f842 JJ |
1033 | |
1034 | static void | |
41dbbb37 TS |
1035 | gomp_register_image_for_device (struct gomp_device_descr *device, |
1036 | struct offload_image_descr *image) | |
1df3f842 | 1037 | { |
41dbbb37 TS |
1038 | if (!device->offload_regions_registered |
1039 | && (device->type == image->type | |
1040 | || device->type == OFFLOAD_TARGET_TYPE_HOST)) | |
1df3f842 | 1041 | { |
41dbbb37 TS |
1042 | device->register_image_func (image->host_table, image->target_data); |
1043 | device->offload_regions_registered = true; | |
1df3f842 JJ |
1044 | } |
1045 | } | |
1046 | ||
1047 | /* This function initializes the runtime needed for offloading. | |
41dbbb37 TS |
1048 | It parses the list of offload targets and tries to load the plugins for |
1049 | these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP | |
1050 | will be set, and the array DEVICES initialized, containing descriptors for | |
1051 | corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows | |
1052 | by the others. */ | |
1df3f842 JJ |
1053 | |
1054 | static void | |
1055 | gomp_target_init (void) | |
1056 | { | |
1057 | const char *prefix ="libgomp-plugin-"; | |
b5f7a6ca | 1058 | const char *suffix = SONAME_SUFFIX (1); |
1df3f842 JJ |
1059 | const char *cur, *next; |
1060 | char *plugin_name; | |
1061 | int i, new_num_devices; | |
1062 | ||
1063 | num_devices = 0; | |
1064 | devices = NULL; | |
1065 | ||
1066 | cur = OFFLOAD_TARGETS; | |
1067 | if (*cur) | |
1068 | do | |
1069 | { | |
1070 | struct gomp_device_descr current_device; | |
1071 | ||
1072 | next = strchr (cur, ','); | |
1073 | ||
1074 | plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur)) | |
1075 | + strlen (prefix) + strlen (suffix)); | |
1076 | if (!plugin_name) | |
1077 | { | |
1078 | num_devices = 0; | |
1079 | break; | |
1080 | } | |
1081 | ||
1082 | strcpy (plugin_name, prefix); | |
1083 | strncat (plugin_name, cur, next ? next - cur : strlen (cur)); | |
1084 | strcat (plugin_name, suffix); | |
1085 | ||
1086 | if (gomp_load_plugin_for_device (¤t_device, plugin_name)) | |
1087 | { | |
1088 | new_num_devices = current_device.get_num_devices_func (); | |
1089 | if (new_num_devices >= 1) | |
1090 | { | |
41dbbb37 TS |
1091 | /* Augment DEVICES and NUM_DEVICES. */ |
1092 | ||
1df3f842 JJ |
1093 | devices = realloc (devices, (num_devices + new_num_devices) |
1094 | * sizeof (struct gomp_device_descr)); | |
1095 | if (!devices) | |
1096 | { | |
1097 | num_devices = 0; | |
1098 | free (plugin_name); | |
1099 | break; | |
1100 | } | |
1101 | ||
41dbbb37 TS |
1102 | current_device.name = current_device.get_name_func (); |
1103 | /* current_device.capabilities has already been set. */ | |
1df3f842 | 1104 | current_device.type = current_device.get_type_func (); |
41dbbb37 TS |
1105 | current_device.mem_map.is_initialized = false; |
1106 | current_device.mem_map.splay_tree.root = NULL; | |
1df3f842 | 1107 | current_device.is_initialized = false; |
41dbbb37 TS |
1108 | current_device.offload_regions_registered = false; |
1109 | current_device.openacc.data_environ = NULL; | |
1110 | current_device.openacc.target_data = NULL; | |
1df3f842 JJ |
1111 | for (i = 0; i < new_num_devices; i++) |
1112 | { | |
1df3f842 JJ |
1113 | current_device.target_id = i; |
1114 | devices[num_devices] = current_device; | |
41dbbb37 TS |
1115 | gomp_mutex_init (&devices[num_devices].mem_map.lock); |
1116 | gomp_mutex_init (&devices[num_devices].lock); | |
1df3f842 JJ |
1117 | num_devices++; |
1118 | } | |
1119 | } | |
1120 | } | |
1121 | ||
1122 | free (plugin_name); | |
1123 | cur = next + 1; | |
1124 | } | |
1125 | while (next); | |
1126 | ||
41dbbb37 TS |
1127 | /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set |
1128 | NUM_DEVICES_OPENMP. */ | |
1129 | struct gomp_device_descr *devices_s | |
1130 | = malloc (num_devices * sizeof (struct gomp_device_descr)); | |
1131 | if (!devices_s) | |
1132 | { | |
1133 | num_devices = 0; | |
1134 | free (devices); | |
1135 | devices = NULL; | |
1136 | } | |
1137 | num_devices_openmp = 0; | |
1138 | for (i = 0; i < num_devices; i++) | |
1139 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
1140 | devices_s[num_devices_openmp++] = devices[i]; | |
1141 | int num_devices_after_openmp = num_devices_openmp; | |
1142 | for (i = 0; i < num_devices; i++) | |
1143 | if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1144 | devices_s[num_devices_after_openmp++] = devices[i]; | |
1145 | free (devices); | |
1146 | devices = devices_s; | |
1147 | ||
1148 | for (i = 0; i < num_devices; i++) | |
1149 | { | |
1150 | int j; | |
1151 | ||
1152 | for (j = 0; j < num_offload_images; j++) | |
1153 | gomp_register_image_for_device (&devices[i], &offload_images[j]); | |
1154 | ||
1155 | /* The 'devices' array can be moved (by the realloc call) until we have | |
1156 | found all the plugins, so registering with the OpenACC runtime (which | |
1157 | takes a copy of the pointer argument) must be delayed until now. */ | |
1158 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
1159 | goacc_register (&devices[i]); | |
1160 | } | |
1161 | ||
1df3f842 JJ |
1162 | free (offload_images); |
1163 | offload_images = NULL; | |
1164 | num_offload_images = 0; | |
1165 | } | |
1166 | ||
1167 | #else /* PLUGIN_SUPPORT */ | |
1168 | /* If dlfcn.h is unavailable we always fallback to host execution. | |
1169 | GOMP_target* routines are just stubs for this case. */ | |
1170 | static void | |
1171 | gomp_target_init (void) | |
1172 | { | |
1173 | } | |
1174 | #endif /* PLUGIN_SUPPORT */ |