]>
Commit | Line | Data |
---|---|---|
5624e564 | 1 | /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
acf0174b JJ |
2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
acf0174b JJ |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
1df3f842 | 26 | /* This file contains the support of offloading. */ |
acf0174b | 27 | |
1df3f842 | 28 | #include "config.h" |
acf0174b | 29 | #include "libgomp.h" |
41dbbb37 TS |
30 | #include "oacc-plugin.h" |
31 | #include "oacc-int.h" | |
32 | #include "gomp-constants.h" | |
acf0174b JJ |
33 | #include <limits.h> |
34 | #include <stdbool.h> | |
35 | #include <stdlib.h> | |
01c0b3b0 KT |
36 | #ifdef HAVE_INTTYPES_H |
37 | # include <inttypes.h> /* For PRIu64. */ | |
38 | #endif | |
acf0174b | 39 | #include <string.h> |
41dbbb37 | 40 | #include <assert.h> |
acf0174b | 41 | |
1df3f842 JJ |
42 | #ifdef PLUGIN_SUPPORT |
43 | #include <dlfcn.h> | |
b5f7a6ca | 44 | #include "plugin-suffix.h" |
1df3f842 JJ |
45 | #endif |
46 | ||
47 | static void gomp_target_init (void); | |
48 | ||
41dbbb37 | 49 | /* The whole initialization code for offloading plugins is only run one. */ |
1df3f842 JJ |
50 | static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; |
51 | ||
a51df54e IV |
52 | /* Mutex for offload image registration. */ |
53 | static gomp_mutex_t register_lock; | |
54 | ||
1df3f842 JJ |
55 | /* This structure describes an offload image. |
56 | It contains type of the target device, pointer to host table descriptor, and | |
57 | pointer to target data. */ | |
58 | struct offload_image_descr { | |
59 | enum offload_target_type type; | |
60 | void *host_table; | |
61 | void *target_data; | |
62 | }; | |
63 | ||
64 | /* Array of descriptors of offload images. */ | |
65 | static struct offload_image_descr *offload_images; | |
66 | ||
67 | /* Total number of offload images. */ | |
68 | static int num_offload_images; | |
69 | ||
70 | /* Array of descriptors for all available devices. */ | |
71 | static struct gomp_device_descr *devices; | |
72 | ||
73 | /* Total number of available devices. */ | |
74 | static int num_devices; | |
75 | ||
41dbbb37 TS |
76 | /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ |
77 | static int num_devices_openmp; | |
78 | ||
a51df54e IV |
79 | /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ |
80 | ||
81 | static void * | |
82 | gomp_realloc_unlock (void *old, size_t size) | |
83 | { | |
84 | void *ret = realloc (old, size); | |
85 | if (ret == NULL) | |
86 | { | |
87 | gomp_mutex_unlock (®ister_lock); | |
88 | gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); | |
89 | } | |
90 | return ret; | |
91 | } | |
92 | ||
1df3f842 JJ |
93 | /* The comparison function. */ |
94 | ||
41dbbb37 | 95 | attribute_hidden int |
1df3f842 JJ |
96 | splay_compare (splay_tree_key x, splay_tree_key y) |
97 | { | |
98 | if (x->host_start == x->host_end | |
99 | && y->host_start == y->host_end) | |
100 | return 0; | |
101 | if (x->host_end <= y->host_start) | |
102 | return -1; | |
103 | if (x->host_start >= y->host_end) | |
104 | return 1; | |
105 | return 0; | |
106 | } | |
107 | ||
108 | #include "splay-tree.h" | |
109 | ||
41dbbb37 TS |
110 | attribute_hidden void |
111 | gomp_init_targets_once (void) | |
1df3f842 | 112 | { |
41dbbb37 TS |
113 | (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
114 | } | |
1df3f842 | 115 | |
acf0174b JJ |
116 | attribute_hidden int |
117 | gomp_get_num_devices (void) | |
118 | { | |
41dbbb37 TS |
119 | gomp_init_targets_once (); |
120 | return num_devices_openmp; | |
1df3f842 JJ |
121 | } |
122 | ||
123 | static struct gomp_device_descr * | |
124 | resolve_device (int device_id) | |
125 | { | |
41dbbb37 | 126 | if (device_id == GOMP_DEVICE_ICV) |
1df3f842 JJ |
127 | { |
128 | struct gomp_task_icv *icv = gomp_icv (false); | |
129 | device_id = icv->default_device_var; | |
130 | } | |
131 | ||
132 | if (device_id < 0 || device_id >= gomp_get_num_devices ()) | |
133 | return NULL; | |
134 | ||
135 | return &devices[device_id]; | |
136 | } | |
137 | ||
138 | ||
139 | /* Handle the case where splay_tree_lookup found oldn for newn. | |
140 | Helper function of gomp_map_vars. */ | |
141 | ||
142 | static inline void | |
a51df54e IV |
143 | gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, |
144 | splay_tree_key newn, unsigned char kind) | |
1df3f842 | 145 | { |
41dbbb37 TS |
146 | if ((kind & GOMP_MAP_FLAG_FORCE) |
147 | || oldn->host_start > newn->host_start | |
1df3f842 | 148 | || oldn->host_end < newn->host_end) |
a51df54e IV |
149 | { |
150 | gomp_mutex_unlock (&devicep->lock); | |
151 | gomp_fatal ("Trying to map into device [%p..%p) object when " | |
152 | "[%p..%p) is already mapped", | |
153 | (void *) newn->host_start, (void *) newn->host_end, | |
154 | (void *) oldn->host_start, (void *) oldn->host_end); | |
155 | } | |
1df3f842 JJ |
156 | oldn->refcount++; |
157 | } | |
158 | ||
41dbbb37 TS |
159 | static int |
160 | get_kind (bool is_openacc, void *kinds, int idx) | |
161 | { | |
162 | return is_openacc ? ((unsigned short *) kinds)[idx] | |
163 | : ((unsigned char *) kinds)[idx]; | |
164 | } | |
165 | ||
166 | attribute_hidden struct target_mem_desc * | |
1df3f842 | 167 | gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, |
41dbbb37 TS |
168 | void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, |
169 | bool is_openacc, bool is_target) | |
1df3f842 JJ |
170 | { |
171 | size_t i, tgt_align, tgt_size, not_found_cnt = 0; | |
41dbbb37 TS |
172 | const int rshift = is_openacc ? 8 : 3; |
173 | const int typemask = is_openacc ? 0xff : 0x7; | |
a51df54e | 174 | struct splay_tree_s *mem_map = &devicep->mem_map; |
1df3f842 JJ |
175 | struct splay_tree_key_s cur_node; |
176 | struct target_mem_desc *tgt | |
177 | = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); | |
178 | tgt->list_count = mapnum; | |
179 | tgt->refcount = 1; | |
180 | tgt->device_descr = devicep; | |
a51df54e | 181 | tgt->mem_map = mem_map; |
1df3f842 JJ |
182 | |
183 | if (mapnum == 0) | |
184 | return tgt; | |
185 | ||
186 | tgt_align = sizeof (void *); | |
187 | tgt_size = 0; | |
188 | if (is_target) | |
189 | { | |
190 | size_t align = 4 * sizeof (void *); | |
191 | tgt_align = align; | |
192 | tgt_size = mapnum * sizeof (void *); | |
193 | } | |
194 | ||
a51df54e | 195 | gomp_mutex_lock (&devicep->lock); |
41dbbb37 | 196 | |
1df3f842 JJ |
197 | for (i = 0; i < mapnum; i++) |
198 | { | |
41dbbb37 | 199 | int kind = get_kind (is_openacc, kinds, i); |
1df3f842 JJ |
200 | if (hostaddrs[i] == NULL) |
201 | { | |
202 | tgt->list[i] = NULL; | |
203 | continue; | |
204 | } | |
205 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
41dbbb37 | 206 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
1df3f842 JJ |
207 | cur_node.host_end = cur_node.host_start + sizes[i]; |
208 | else | |
209 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
a51df54e | 210 | splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
211 | if (n) |
212 | { | |
213 | tgt->list[i] = n; | |
a51df54e | 214 | gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask); |
1df3f842 JJ |
215 | } |
216 | else | |
217 | { | |
1df3f842 | 218 | tgt->list[i] = NULL; |
41dbbb37 TS |
219 | |
220 | size_t align = (size_t) 1 << (kind >> rshift); | |
1df3f842 JJ |
221 | not_found_cnt++; |
222 | if (tgt_align < align) | |
223 | tgt_align = align; | |
224 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
225 | tgt_size += cur_node.host_end - cur_node.host_start; | |
41dbbb37 | 226 | if ((kind & typemask) == GOMP_MAP_TO_PSET) |
1df3f842 JJ |
227 | { |
228 | size_t j; | |
229 | for (j = i + 1; j < mapnum; j++) | |
41dbbb37 TS |
230 | if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) |
231 | & typemask)) | |
1df3f842 JJ |
232 | break; |
233 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
234 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
235 | > cur_node.host_end)) | |
236 | break; | |
237 | else | |
238 | { | |
239 | tgt->list[j] = NULL; | |
240 | i++; | |
241 | } | |
242 | } | |
243 | } | |
244 | } | |
245 | ||
41dbbb37 TS |
246 | if (devaddrs) |
247 | { | |
248 | if (mapnum != 1) | |
a51df54e IV |
249 | { |
250 | gomp_mutex_unlock (&devicep->lock); | |
251 | gomp_fatal ("unexpected aggregation"); | |
252 | } | |
41dbbb37 TS |
253 | tgt->to_free = devaddrs[0]; |
254 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
255 | tgt->tgt_end = tgt->tgt_start + sizes[0]; | |
256 | } | |
257 | else if (not_found_cnt || is_target) | |
1df3f842 JJ |
258 | { |
259 | /* Allocate tgt_align aligned tgt_size block of memory. */ | |
260 | /* FIXME: Perhaps change interface to allocate properly aligned | |
261 | memory. */ | |
262 | tgt->to_free = devicep->alloc_func (devicep->target_id, | |
263 | tgt_size + tgt_align - 1); | |
264 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
265 | tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); | |
266 | tgt->tgt_end = tgt->tgt_start + tgt_size; | |
267 | } | |
268 | else | |
269 | { | |
270 | tgt->to_free = NULL; | |
271 | tgt->tgt_start = 0; | |
272 | tgt->tgt_end = 0; | |
273 | } | |
274 | ||
275 | tgt_size = 0; | |
276 | if (is_target) | |
277 | tgt_size = mapnum * sizeof (void *); | |
278 | ||
279 | tgt->array = NULL; | |
280 | if (not_found_cnt) | |
281 | { | |
282 | tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); | |
283 | splay_tree_node array = tgt->array; | |
284 | size_t j; | |
285 | ||
286 | for (i = 0; i < mapnum; i++) | |
287 | if (tgt->list[i] == NULL) | |
288 | { | |
41dbbb37 | 289 | int kind = get_kind (is_openacc, kinds, i); |
1df3f842 JJ |
290 | if (hostaddrs[i] == NULL) |
291 | continue; | |
292 | splay_tree_key k = &array->key; | |
293 | k->host_start = (uintptr_t) hostaddrs[i]; | |
41dbbb37 | 294 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
1df3f842 JJ |
295 | k->host_end = k->host_start + sizes[i]; |
296 | else | |
297 | k->host_end = k->host_start + sizeof (void *); | |
a51df54e | 298 | splay_tree_key n = splay_tree_lookup (mem_map, k); |
1df3f842 JJ |
299 | if (n) |
300 | { | |
301 | tgt->list[i] = n; | |
a51df54e | 302 | gomp_map_vars_existing (devicep, n, k, kind & typemask); |
1df3f842 JJ |
303 | } |
304 | else | |
305 | { | |
41dbbb37 | 306 | size_t align = (size_t) 1 << (kind >> rshift); |
1df3f842 JJ |
307 | tgt->list[i] = k; |
308 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
309 | k->tgt = tgt; | |
310 | k->tgt_offset = tgt_size; | |
311 | tgt_size += k->host_end - k->host_start; | |
41dbbb37 | 312 | k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); |
1df3f842 | 313 | k->refcount = 1; |
41dbbb37 | 314 | k->async_refcount = 0; |
1df3f842 JJ |
315 | tgt->refcount++; |
316 | array->left = NULL; | |
317 | array->right = NULL; | |
a51df54e | 318 | splay_tree_insert (mem_map, array); |
41dbbb37 | 319 | switch (kind & typemask) |
1df3f842 | 320 | { |
41dbbb37 TS |
321 | case GOMP_MAP_ALLOC: |
322 | case GOMP_MAP_FROM: | |
323 | case GOMP_MAP_FORCE_ALLOC: | |
324 | case GOMP_MAP_FORCE_FROM: | |
1df3f842 | 325 | break; |
41dbbb37 TS |
326 | case GOMP_MAP_TO: |
327 | case GOMP_MAP_TOFROM: | |
328 | case GOMP_MAP_FORCE_TO: | |
329 | case GOMP_MAP_FORCE_TOFROM: | |
1df3f842 JJ |
330 | /* FIXME: Perhaps add some smarts, like if copying |
331 | several adjacent fields from host to target, use some | |
332 | host buffer to avoid sending each var individually. */ | |
333 | devicep->host2dev_func (devicep->target_id, | |
334 | (void *) (tgt->tgt_start | |
335 | + k->tgt_offset), | |
336 | (void *) k->host_start, | |
337 | k->host_end - k->host_start); | |
338 | break; | |
41dbbb37 | 339 | case GOMP_MAP_POINTER: |
1df3f842 JJ |
340 | cur_node.host_start |
341 | = (uintptr_t) *(void **) k->host_start; | |
342 | if (cur_node.host_start == (uintptr_t) NULL) | |
343 | { | |
344 | cur_node.tgt_offset = (uintptr_t) NULL; | |
41dbbb37 | 345 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
346 | devicep->host2dev_func (devicep->target_id, |
347 | (void *) (tgt->tgt_start | |
348 | + k->tgt_offset), | |
349 | (void *) &cur_node.tgt_offset, | |
350 | sizeof (void *)); | |
351 | break; | |
352 | } | |
353 | /* Add bias to the pointer value. */ | |
354 | cur_node.host_start += sizes[i]; | |
355 | cur_node.host_end = cur_node.host_start + 1; | |
a51df54e | 356 | n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
357 | if (n == NULL) |
358 | { | |
359 | /* Could be possibly zero size array section. */ | |
360 | cur_node.host_end--; | |
a51df54e | 361 | n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
362 | if (n == NULL) |
363 | { | |
364 | cur_node.host_start--; | |
a51df54e | 365 | n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
366 | cur_node.host_start++; |
367 | } | |
368 | } | |
369 | if (n == NULL) | |
a51df54e IV |
370 | { |
371 | gomp_mutex_unlock (&devicep->lock); | |
372 | gomp_fatal ("Pointer target of array section " | |
373 | "wasn't mapped"); | |
374 | } | |
1df3f842 JJ |
375 | cur_node.host_start -= n->host_start; |
376 | cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset | |
377 | + cur_node.host_start; | |
378 | /* At this point tgt_offset is target address of the | |
379 | array section. Now subtract bias to get what we want | |
380 | to initialize the pointer with. */ | |
381 | cur_node.tgt_offset -= sizes[i]; | |
41dbbb37 | 382 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
383 | devicep->host2dev_func (devicep->target_id, |
384 | (void *) (tgt->tgt_start | |
385 | + k->tgt_offset), | |
386 | (void *) &cur_node.tgt_offset, | |
387 | sizeof (void *)); | |
388 | break; | |
41dbbb37 TS |
389 | case GOMP_MAP_TO_PSET: |
390 | /* FIXME: see above FIXME comment. */ | |
1df3f842 JJ |
391 | devicep->host2dev_func (devicep->target_id, |
392 | (void *) (tgt->tgt_start | |
393 | + k->tgt_offset), | |
394 | (void *) k->host_start, | |
395 | k->host_end - k->host_start); | |
41dbbb37 | 396 | |
1df3f842 | 397 | for (j = i + 1; j < mapnum; j++) |
41dbbb37 TS |
398 | if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) |
399 | & typemask)) | |
1df3f842 JJ |
400 | break; |
401 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
402 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
403 | > k->host_end)) | |
404 | break; | |
405 | else | |
406 | { | |
407 | tgt->list[j] = k; | |
408 | k->refcount++; | |
409 | cur_node.host_start | |
410 | = (uintptr_t) *(void **) hostaddrs[j]; | |
411 | if (cur_node.host_start == (uintptr_t) NULL) | |
412 | { | |
413 | cur_node.tgt_offset = (uintptr_t) NULL; | |
41dbbb37 | 414 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
415 | devicep->host2dev_func (devicep->target_id, |
416 | (void *) (tgt->tgt_start + k->tgt_offset | |
417 | + ((uintptr_t) hostaddrs[j] | |
418 | - k->host_start)), | |
419 | (void *) &cur_node.tgt_offset, | |
420 | sizeof (void *)); | |
421 | i++; | |
422 | continue; | |
423 | } | |
424 | /* Add bias to the pointer value. */ | |
425 | cur_node.host_start += sizes[j]; | |
426 | cur_node.host_end = cur_node.host_start + 1; | |
a51df54e | 427 | n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
428 | if (n == NULL) |
429 | { | |
430 | /* Could be possibly zero size array section. */ | |
431 | cur_node.host_end--; | |
a51df54e | 432 | n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
433 | if (n == NULL) |
434 | { | |
435 | cur_node.host_start--; | |
a51df54e | 436 | n = splay_tree_lookup (mem_map, &cur_node); |
1df3f842 JJ |
437 | cur_node.host_start++; |
438 | } | |
439 | } | |
440 | if (n == NULL) | |
a51df54e IV |
441 | { |
442 | gomp_mutex_unlock (&devicep->lock); | |
443 | gomp_fatal ("Pointer target of array section " | |
444 | "wasn't mapped"); | |
445 | } | |
1df3f842 JJ |
446 | cur_node.host_start -= n->host_start; |
447 | cur_node.tgt_offset = n->tgt->tgt_start | |
448 | + n->tgt_offset | |
449 | + cur_node.host_start; | |
450 | /* At this point tgt_offset is target address of the | |
451 | array section. Now subtract bias to get what we | |
452 | want to initialize the pointer with. */ | |
453 | cur_node.tgt_offset -= sizes[j]; | |
41dbbb37 | 454 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
455 | devicep->host2dev_func (devicep->target_id, |
456 | (void *) (tgt->tgt_start + k->tgt_offset | |
457 | + ((uintptr_t) hostaddrs[j] | |
458 | - k->host_start)), | |
459 | (void *) &cur_node.tgt_offset, | |
460 | sizeof (void *)); | |
461 | i++; | |
462 | } | |
41dbbb37 TS |
463 | break; |
464 | case GOMP_MAP_FORCE_PRESENT: | |
465 | { | |
466 | /* We already looked up the memory region above and it | |
467 | was missing. */ | |
468 | size_t size = k->host_end - k->host_start; | |
a51df54e | 469 | gomp_mutex_unlock (&devicep->lock); |
01c0b3b0 KT |
470 | #ifdef HAVE_INTTYPES_H |
471 | gomp_fatal ("present clause: !acc_is_present (%p, " | |
472 | "%"PRIu64" (0x%"PRIx64"))", | |
473 | (void *) k->host_start, | |
474 | (uint64_t) size, (uint64_t) size); | |
475 | #else | |
41dbbb37 | 476 | gomp_fatal ("present clause: !acc_is_present (%p, " |
01c0b3b0 KT |
477 | "%lu (0x%lx))", (void *) k->host_start, |
478 | (unsigned long) size, (unsigned long) size); | |
479 | #endif | |
41dbbb37 TS |
480 | } |
481 | break; | |
482 | case GOMP_MAP_FORCE_DEVICEPTR: | |
483 | assert (k->host_end - k->host_start == sizeof (void *)); | |
484 | ||
485 | devicep->host2dev_func (devicep->target_id, | |
486 | (void *) (tgt->tgt_start | |
487 | + k->tgt_offset), | |
488 | (void *) k->host_start, | |
489 | sizeof (void *)); | |
490 | break; | |
491 | default: | |
a51df54e | 492 | gomp_mutex_unlock (&devicep->lock); |
41dbbb37 TS |
493 | gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, |
494 | kind); | |
1df3f842 JJ |
495 | } |
496 | array++; | |
497 | } | |
498 | } | |
499 | } | |
41dbbb37 | 500 | |
1df3f842 JJ |
501 | if (is_target) |
502 | { | |
503 | for (i = 0; i < mapnum; i++) | |
504 | { | |
505 | if (tgt->list[i] == NULL) | |
506 | cur_node.tgt_offset = (uintptr_t) NULL; | |
507 | else | |
508 | cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start | |
509 | + tgt->list[i]->tgt_offset; | |
41dbbb37 | 510 | /* FIXME: see above FIXME comment. */ |
1df3f842 JJ |
511 | devicep->host2dev_func (devicep->target_id, |
512 | (void *) (tgt->tgt_start | |
513 | + i * sizeof (void *)), | |
514 | (void *) &cur_node.tgt_offset, | |
515 | sizeof (void *)); | |
516 | } | |
517 | } | |
518 | ||
a51df54e | 519 | gomp_mutex_unlock (&devicep->lock); |
1df3f842 JJ |
520 | return tgt; |
521 | } | |
522 | ||
523 | static void | |
524 | gomp_unmap_tgt (struct target_mem_desc *tgt) | |
525 | { | |
526 | /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ | |
527 | if (tgt->tgt_end) | |
528 | tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free); | |
529 | ||
530 | free (tgt->array); | |
531 | free (tgt); | |
532 | } | |
533 | ||
41dbbb37 TS |
534 | /* Decrease the refcount for a set of mapped variables, and queue asychronous |
535 | copies from the device back to the host after any work that has been issued. | |
536 | Because the regions are still "live", increment an asynchronous reference | |
537 | count to indicate that they should not be unmapped from host-side data | |
538 | structures until the asynchronous copy has completed. */ | |
539 | ||
540 | attribute_hidden void | |
541 | gomp_copy_from_async (struct target_mem_desc *tgt) | |
542 | { | |
543 | struct gomp_device_descr *devicep = tgt->device_descr; | |
41dbbb37 TS |
544 | size_t i; |
545 | ||
a51df54e | 546 | gomp_mutex_lock (&devicep->lock); |
41dbbb37 TS |
547 | |
548 | for (i = 0; i < tgt->list_count; i++) | |
549 | if (tgt->list[i] == NULL) | |
550 | ; | |
551 | else if (tgt->list[i]->refcount > 1) | |
552 | { | |
553 | tgt->list[i]->refcount--; | |
554 | tgt->list[i]->async_refcount++; | |
555 | } | |
556 | else | |
557 | { | |
558 | splay_tree_key k = tgt->list[i]; | |
559 | if (k->copy_from) | |
560 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, | |
561 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
562 | k->host_end - k->host_start); | |
563 | } | |
564 | ||
a51df54e | 565 | gomp_mutex_unlock (&devicep->lock); |
41dbbb37 TS |
566 | } |
567 | ||
568 | /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant | |
569 | variables back from device to host: if it is false, it is assumed that this | |
570 | has been done already, i.e. by gomp_copy_from_async above. */ | |
571 | ||
572 | attribute_hidden void | |
573 | gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) | |
1df3f842 JJ |
574 | { |
575 | struct gomp_device_descr *devicep = tgt->device_descr; | |
576 | ||
577 | if (tgt->list_count == 0) | |
578 | { | |
579 | free (tgt); | |
580 | return; | |
581 | } | |
582 | ||
a51df54e | 583 | gomp_mutex_lock (&devicep->lock); |
41dbbb37 | 584 | |
1df3f842 | 585 | size_t i; |
1df3f842 JJ |
586 | for (i = 0; i < tgt->list_count; i++) |
587 | if (tgt->list[i] == NULL) | |
588 | ; | |
589 | else if (tgt->list[i]->refcount > 1) | |
590 | tgt->list[i]->refcount--; | |
41dbbb37 TS |
591 | else if (tgt->list[i]->async_refcount > 0) |
592 | tgt->list[i]->async_refcount--; | |
1df3f842 JJ |
593 | else |
594 | { | |
595 | splay_tree_key k = tgt->list[i]; | |
41dbbb37 | 596 | if (k->copy_from && do_copyfrom) |
1df3f842 JJ |
597 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, |
598 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
599 | k->host_end - k->host_start); | |
a51df54e | 600 | splay_tree_remove (tgt->mem_map, k); |
1df3f842 JJ |
601 | if (k->tgt->refcount > 1) |
602 | k->tgt->refcount--; | |
603 | else | |
604 | gomp_unmap_tgt (k->tgt); | |
605 | } | |
606 | ||
607 | if (tgt->refcount > 1) | |
608 | tgt->refcount--; | |
609 | else | |
610 | gomp_unmap_tgt (tgt); | |
41dbbb37 | 611 | |
a51df54e | 612 | gomp_mutex_unlock (&devicep->lock); |
1df3f842 JJ |
613 | } |
614 | ||
615 | static void | |
a51df54e IV |
616 | gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, |
617 | size_t *sizes, void *kinds, bool is_openacc) | |
1df3f842 JJ |
618 | { |
619 | size_t i; | |
620 | struct splay_tree_key_s cur_node; | |
41dbbb37 | 621 | const int typemask = is_openacc ? 0xff : 0x7; |
1df3f842 JJ |
622 | |
623 | if (!devicep) | |
624 | return; | |
625 | ||
626 | if (mapnum == 0) | |
627 | return; | |
628 | ||
a51df54e | 629 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
630 | for (i = 0; i < mapnum; i++) |
631 | if (sizes[i]) | |
632 | { | |
633 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
634 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
a51df54e | 635 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); |
1df3f842 JJ |
636 | if (n) |
637 | { | |
41dbbb37 | 638 | int kind = get_kind (is_openacc, kinds, i); |
1df3f842 JJ |
639 | if (n->host_start > cur_node.host_start |
640 | || n->host_end < cur_node.host_end) | |
a51df54e IV |
641 | { |
642 | gomp_mutex_unlock (&devicep->lock); | |
643 | gomp_fatal ("Trying to update [%p..%p) object when " | |
644 | "only [%p..%p) is mapped", | |
645 | (void *) cur_node.host_start, | |
646 | (void *) cur_node.host_end, | |
647 | (void *) n->host_start, | |
648 | (void *) n->host_end); | |
649 | } | |
41dbbb37 | 650 | if (GOMP_MAP_COPY_TO_P (kind & typemask)) |
1df3f842 JJ |
651 | devicep->host2dev_func (devicep->target_id, |
652 | (void *) (n->tgt->tgt_start | |
653 | + n->tgt_offset | |
654 | + cur_node.host_start | |
655 | - n->host_start), | |
656 | (void *) cur_node.host_start, | |
657 | cur_node.host_end - cur_node.host_start); | |
41dbbb37 | 658 | if (GOMP_MAP_COPY_FROM_P (kind & typemask)) |
1df3f842 JJ |
659 | devicep->dev2host_func (devicep->target_id, |
660 | (void *) cur_node.host_start, | |
661 | (void *) (n->tgt->tgt_start | |
662 | + n->tgt_offset | |
663 | + cur_node.host_start | |
664 | - n->host_start), | |
665 | cur_node.host_end - cur_node.host_start); | |
666 | } | |
667 | else | |
a51df54e IV |
668 | { |
669 | gomp_mutex_unlock (&devicep->lock); | |
670 | gomp_fatal ("Trying to update [%p..%p) object that is not mapped", | |
671 | (void *) cur_node.host_start, | |
672 | (void *) cur_node.host_end); | |
673 | } | |
1df3f842 | 674 | } |
a51df54e IV |
675 | gomp_mutex_unlock (&devicep->lock); |
676 | } | |
677 | ||
678 | /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. | |
679 | And insert to splay tree the mapping between addresses from HOST_TABLE and | |
680 | from loaded target image. */ | |
681 | ||
682 | static void | |
683 | gomp_offload_image_to_device (struct gomp_device_descr *devicep, | |
684 | void *host_table, void *target_data, | |
685 | bool is_register_lock) | |
686 | { | |
687 | void **host_func_table = ((void ***) host_table)[0]; | |
688 | void **host_funcs_end = ((void ***) host_table)[1]; | |
689 | void **host_var_table = ((void ***) host_table)[2]; | |
690 | void **host_vars_end = ((void ***) host_table)[3]; | |
691 | ||
692 | /* The func table contains only addresses, the var table contains addresses | |
693 | and corresponding sizes. */ | |
694 | int num_funcs = host_funcs_end - host_func_table; | |
695 | int num_vars = (host_vars_end - host_var_table) / 2; | |
696 | ||
697 | /* Load image to device and get target addresses for the image. */ | |
698 | struct addr_pair *target_table = NULL; | |
699 | int i, num_target_entries | |
700 | = devicep->load_image_func (devicep->target_id, target_data, &target_table); | |
701 | ||
702 | if (num_target_entries != num_funcs + num_vars) | |
703 | { | |
704 | gomp_mutex_unlock (&devicep->lock); | |
705 | if (is_register_lock) | |
706 | gomp_mutex_unlock (®ister_lock); | |
707 | gomp_fatal ("Can't map target functions or variables"); | |
708 | } | |
709 | ||
710 | /* Insert host-target address mapping into splay tree. */ | |
711 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
712 | tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); | |
713 | tgt->refcount = 1; | |
714 | tgt->tgt_start = 0; | |
715 | tgt->tgt_end = 0; | |
716 | tgt->to_free = NULL; | |
717 | tgt->prev = NULL; | |
718 | tgt->list_count = 0; | |
719 | tgt->device_descr = devicep; | |
720 | splay_tree_node array = tgt->array; | |
721 | ||
722 | for (i = 0; i < num_funcs; i++) | |
723 | { | |
724 | splay_tree_key k = &array->key; | |
725 | k->host_start = (uintptr_t) host_func_table[i]; | |
726 | k->host_end = k->host_start + 1; | |
727 | k->tgt = tgt; | |
728 | k->tgt_offset = target_table[i].start; | |
729 | k->refcount = 1; | |
730 | k->async_refcount = 0; | |
731 | k->copy_from = false; | |
732 | array->left = NULL; | |
733 | array->right = NULL; | |
734 | splay_tree_insert (&devicep->mem_map, array); | |
735 | array++; | |
736 | } | |
737 | ||
738 | for (i = 0; i < num_vars; i++) | |
739 | { | |
740 | struct addr_pair *target_var = &target_table[num_funcs + i]; | |
741 | if (target_var->end - target_var->start | |
742 | != (uintptr_t) host_var_table[i * 2 + 1]) | |
743 | { | |
744 | gomp_mutex_unlock (&devicep->lock); | |
745 | if (is_register_lock) | |
746 | gomp_mutex_unlock (®ister_lock); | |
747 | gomp_fatal ("Can't map target variables (size mismatch)"); | |
748 | } | |
749 | ||
750 | splay_tree_key k = &array->key; | |
751 | k->host_start = (uintptr_t) host_var_table[i * 2]; | |
752 | k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; | |
753 | k->tgt = tgt; | |
754 | k->tgt_offset = target_var->start; | |
755 | k->refcount = 1; | |
756 | k->async_refcount = 0; | |
757 | k->copy_from = false; | |
758 | array->left = NULL; | |
759 | array->right = NULL; | |
760 | splay_tree_insert (&devicep->mem_map, array); | |
761 | array++; | |
762 | } | |
763 | ||
764 | free (target_table); | |
1df3f842 JJ |
765 | } |
766 | ||
a51df54e | 767 | /* This function should be called from every offload image while loading. |
1df3f842 JJ |
768 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
769 | the target, and TARGET_DATA needed by target plugin. */ | |
770 | ||
771 | void | |
772 | GOMP_offload_register (void *host_table, enum offload_target_type target_type, | |
773 | void *target_data) | |
774 | { | |
a51df54e IV |
775 | int i; |
776 | gomp_mutex_lock (®ister_lock); | |
777 | ||
778 | /* Load image to all initialized devices. */ | |
779 | for (i = 0; i < num_devices; i++) | |
780 | { | |
781 | struct gomp_device_descr *devicep = &devices[i]; | |
782 | gomp_mutex_lock (&devicep->lock); | |
783 | if (devicep->type == target_type && devicep->is_initialized) | |
784 | gomp_offload_image_to_device (devicep, host_table, target_data, true); | |
785 | gomp_mutex_unlock (&devicep->lock); | |
786 | } | |
1df3f842 | 787 | |
a51df54e IV |
788 | /* Insert image to array of pending images. */ |
789 | offload_images | |
790 | = gomp_realloc_unlock (offload_images, | |
791 | (num_offload_images + 1) | |
792 | * sizeof (struct offload_image_descr)); | |
1df3f842 JJ |
793 | offload_images[num_offload_images].type = target_type; |
794 | offload_images[num_offload_images].host_table = host_table; | |
795 | offload_images[num_offload_images].target_data = target_data; | |
796 | ||
797 | num_offload_images++; | |
a51df54e | 798 | gomp_mutex_unlock (®ister_lock); |
1df3f842 JJ |
799 | } |
800 | ||
a51df54e IV |
801 | /* This function should be called from every offload image while unloading. |
802 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of | |
803 | the target, and TARGET_DATA needed by target plugin. */ | |
1df3f842 | 804 | |
a51df54e IV |
805 | void |
806 | GOMP_offload_unregister (void *host_table, enum offload_target_type target_type, | |
807 | void *target_data) | |
1df3f842 | 808 | { |
a51df54e IV |
809 | void **host_func_table = ((void ***) host_table)[0]; |
810 | void **host_funcs_end = ((void ***) host_table)[1]; | |
811 | void **host_var_table = ((void ***) host_table)[2]; | |
812 | void **host_vars_end = ((void ***) host_table)[3]; | |
813 | int i; | |
814 | ||
815 | /* The func table contains only addresses, the var table contains addresses | |
816 | and corresponding sizes. */ | |
817 | int num_funcs = host_funcs_end - host_func_table; | |
818 | int num_vars = (host_vars_end - host_var_table) / 2; | |
819 | ||
820 | gomp_mutex_lock (®ister_lock); | |
821 | ||
822 | /* Unload image from all initialized devices. */ | |
823 | for (i = 0; i < num_devices; i++) | |
824 | { | |
825 | int j; | |
826 | struct gomp_device_descr *devicep = &devices[i]; | |
827 | gomp_mutex_lock (&devicep->lock); | |
828 | if (devicep->type != target_type || !devicep->is_initialized) | |
829 | { | |
830 | gomp_mutex_unlock (&devicep->lock); | |
831 | continue; | |
832 | } | |
833 | ||
834 | devicep->unload_image_func (devicep->target_id, target_data); | |
835 | ||
836 | /* Remove mapping from splay tree. */ | |
837 | struct splay_tree_key_s k; | |
838 | splay_tree_key node = NULL; | |
839 | if (num_funcs > 0) | |
840 | { | |
841 | k.host_start = (uintptr_t) host_func_table[0]; | |
842 | k.host_end = k.host_start + 1; | |
843 | node = splay_tree_lookup (&devicep->mem_map, &k); | |
844 | } | |
845 | else if (num_vars > 0) | |
846 | { | |
847 | k.host_start = (uintptr_t) host_var_table[0]; | |
848 | k.host_end = k.host_start + (uintptr_t) host_var_table[1]; | |
849 | node = splay_tree_lookup (&devicep->mem_map, &k); | |
850 | } | |
851 | ||
852 | for (j = 0; j < num_funcs; j++) | |
853 | { | |
854 | k.host_start = (uintptr_t) host_func_table[j]; | |
855 | k.host_end = k.host_start + 1; | |
856 | splay_tree_remove (&devicep->mem_map, &k); | |
857 | } | |
858 | ||
859 | for (j = 0; j < num_vars; j++) | |
860 | { | |
861 | k.host_start = (uintptr_t) host_var_table[j * 2]; | |
862 | k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1]; | |
863 | splay_tree_remove (&devicep->mem_map, &k); | |
864 | } | |
865 | ||
866 | if (node) | |
867 | { | |
868 | free (node->tgt); | |
869 | free (node); | |
870 | } | |
871 | ||
872 | gomp_mutex_unlock (&devicep->lock); | |
873 | } | |
874 | ||
875 | /* Remove image from array of pending images. */ | |
876 | for (i = 0; i < num_offload_images; i++) | |
877 | if (offload_images[i].target_data == target_data) | |
878 | { | |
879 | offload_images[i] = offload_images[--num_offload_images]; | |
880 | break; | |
881 | } | |
882 | ||
883 | gomp_mutex_unlock (®ister_lock); | |
41dbbb37 | 884 | } |
1df3f842 | 885 | |
a51df54e IV |
886 | /* This function initializes the target device, specified by DEVICEP. DEVICEP |
887 | must be locked on entry, and remains locked on return. */ | |
41dbbb37 TS |
888 | |
889 | attribute_hidden void | |
a51df54e | 890 | gomp_init_device (struct gomp_device_descr *devicep) |
41dbbb37 | 891 | { |
1df3f842 | 892 | int i; |
a51df54e IV |
893 | devicep->init_device_func (devicep->target_id); |
894 | ||
895 | /* Load to device all images registered by the moment. */ | |
896 | for (i = 0; i < num_offload_images; i++) | |
1df3f842 | 897 | { |
a51df54e IV |
898 | struct offload_image_descr *image = &offload_images[i]; |
899 | if (image->type == devicep->type) | |
900 | gomp_offload_image_to_device (devicep, image->host_table, | |
901 | image->target_data, false); | |
1df3f842 JJ |
902 | } |
903 | ||
a51df54e | 904 | devicep->is_initialized = true; |
41dbbb37 TS |
905 | } |
906 | ||
907 | /* Free address mapping tables. MM must be locked on entry, and remains locked | |
908 | on return. */ | |
909 | ||
910 | attribute_hidden void | |
a51df54e | 911 | gomp_free_memmap (struct splay_tree_s *mem_map) |
41dbbb37 | 912 | { |
a51df54e | 913 | while (mem_map->root) |
41dbbb37 | 914 | { |
a51df54e | 915 | struct target_mem_desc *tgt = mem_map->root->key.tgt; |
41dbbb37 | 916 | |
a51df54e | 917 | splay_tree_remove (mem_map, &mem_map->root->key); |
41dbbb37 TS |
918 | free (tgt->array); |
919 | free (tgt); | |
920 | } | |
41dbbb37 TS |
921 | } |
922 | ||
923 | /* This function de-initializes the target device, specified by DEVICEP. | |
924 | DEVICEP must be locked on entry, and remains locked on return. */ | |
925 | ||
926 | attribute_hidden void | |
927 | gomp_fini_device (struct gomp_device_descr *devicep) | |
928 | { | |
929 | if (devicep->is_initialized) | |
930 | devicep->fini_device_func (devicep->target_id); | |
931 | ||
932 | devicep->is_initialized = false; | |
acf0174b JJ |
933 | } |
934 | ||
935 | /* Called when encountering a target directive. If DEVICE | |
41dbbb37 TS |
936 | is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
937 | GOMP_DEVICE_HOST_FALLBACK (or any value | |
938 | larger than last available hw device), use host fallback. | |
128b26dc TS |
939 | FN is address of host code, UNUSED is part of the current ABI, but |
940 | we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays | |
acf0174b JJ |
941 | with MAPNUM entries, with addresses of the host objects, |
942 | sizes of the host objects (resp. for pointer kind pointer bias | |
943 | and assumed sizeof (void *) size) and kinds. */ | |
944 | ||
945 | void | |
128b26dc | 946 | GOMP_target (int device, void (*fn) (void *), const void *unused, |
acf0174b JJ |
947 | size_t mapnum, void **hostaddrs, size_t *sizes, |
948 | unsigned char *kinds) | |
949 | { | |
1df3f842 | 950 | struct gomp_device_descr *devicep = resolve_device (device); |
41dbbb37 TS |
951 | |
952 | if (devicep == NULL | |
953 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1df3f842 JJ |
954 | { |
955 | /* Host fallback. */ | |
956 | struct gomp_thread old_thr, *thr = gomp_thread (); | |
957 | old_thr = *thr; | |
958 | memset (thr, '\0', sizeof (*thr)); | |
959 | if (gomp_places_list) | |
960 | { | |
961 | thr->place = old_thr.place; | |
962 | thr->ts.place_partition_len = gomp_places_list_len; | |
963 | } | |
964 | fn (hostaddrs); | |
965 | gomp_free_thread (thr); | |
966 | *thr = old_thr; | |
967 | return; | |
968 | } | |
969 | ||
41dbbb37 | 970 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
971 | if (!devicep->is_initialized) |
972 | gomp_init_device (devicep); | |
41dbbb37 TS |
973 | gomp_mutex_unlock (&devicep->lock); |
974 | ||
975 | void *fn_addr; | |
976 | ||
977 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) | |
978 | fn_addr = (void *) fn; | |
979 | else | |
980 | { | |
a51df54e | 981 | gomp_mutex_lock (&devicep->lock); |
41dbbb37 TS |
982 | struct splay_tree_key_s k; |
983 | k.host_start = (uintptr_t) fn; | |
984 | k.host_end = k.host_start + 1; | |
a51df54e | 985 | splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); |
41dbbb37 | 986 | if (tgt_fn == NULL) |
a51df54e IV |
987 | { |
988 | gomp_mutex_unlock (&devicep->lock); | |
989 | gomp_fatal ("Target function wasn't mapped"); | |
990 | } | |
991 | gomp_mutex_unlock (&devicep->lock); | |
41dbbb37 | 992 | |
a51df54e | 993 | fn_addr = (void *) tgt_fn->tgt_offset; |
41dbbb37 | 994 | } |
1df3f842 JJ |
995 | |
996 | struct target_mem_desc *tgt_vars | |
41dbbb37 TS |
997 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
998 | true); | |
acf0174b JJ |
999 | struct gomp_thread old_thr, *thr = gomp_thread (); |
1000 | old_thr = *thr; | |
1001 | memset (thr, '\0', sizeof (*thr)); | |
1002 | if (gomp_places_list) | |
1003 | { | |
1004 | thr->place = old_thr.place; | |
1005 | thr->ts.place_partition_len = gomp_places_list_len; | |
1006 | } | |
41dbbb37 | 1007 | devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); |
acf0174b JJ |
1008 | gomp_free_thread (thr); |
1009 | *thr = old_thr; | |
41dbbb37 | 1010 | gomp_unmap_vars (tgt_vars, true); |
acf0174b JJ |
1011 | } |
1012 | ||
1013 | void | |
128b26dc | 1014 | GOMP_target_data (int device, const void *unused, size_t mapnum, |
acf0174b JJ |
1015 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
1016 | { | |
1df3f842 | 1017 | struct gomp_device_descr *devicep = resolve_device (device); |
41dbbb37 TS |
1018 | |
1019 | if (devicep == NULL | |
1020 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1df3f842 JJ |
1021 | { |
1022 | /* Host fallback. */ | |
1023 | struct gomp_task_icv *icv = gomp_icv (false); | |
1024 | if (icv->target_data) | |
1025 | { | |
1026 | /* Even when doing a host fallback, if there are any active | |
1027 | #pragma omp target data constructs, need to remember the | |
1028 | new #pragma omp target data, otherwise GOMP_target_end_data | |
1029 | would get out of sync. */ | |
1030 | struct target_mem_desc *tgt | |
41dbbb37 | 1031 | = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); |
1df3f842 JJ |
1032 | tgt->prev = icv->target_data; |
1033 | icv->target_data = tgt; | |
1034 | } | |
1035 | return; | |
1036 | } | |
1037 | ||
41dbbb37 | 1038 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
1039 | if (!devicep->is_initialized) |
1040 | gomp_init_device (devicep); | |
41dbbb37 TS |
1041 | gomp_mutex_unlock (&devicep->lock); |
1042 | ||
1df3f842 | 1043 | struct target_mem_desc *tgt |
41dbbb37 TS |
1044 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
1045 | false); | |
1df3f842 JJ |
1046 | struct gomp_task_icv *icv = gomp_icv (true); |
1047 | tgt->prev = icv->target_data; | |
1048 | icv->target_data = tgt; | |
acf0174b JJ |
1049 | } |
1050 | ||
1051 | void | |
1052 | GOMP_target_end_data (void) | |
1053 | { | |
1df3f842 JJ |
1054 | struct gomp_task_icv *icv = gomp_icv (false); |
1055 | if (icv->target_data) | |
1056 | { | |
1057 | struct target_mem_desc *tgt = icv->target_data; | |
1058 | icv->target_data = tgt->prev; | |
41dbbb37 | 1059 | gomp_unmap_vars (tgt, true); |
1df3f842 | 1060 | } |
acf0174b JJ |
1061 | } |
1062 | ||
1063 | void | |
128b26dc | 1064 | GOMP_target_update (int device, const void *unused, size_t mapnum, |
acf0174b JJ |
1065 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
1066 | { | |
1df3f842 | 1067 | struct gomp_device_descr *devicep = resolve_device (device); |
41dbbb37 TS |
1068 | |
1069 | if (devicep == NULL | |
1070 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1df3f842 JJ |
1071 | return; |
1072 | ||
41dbbb37 | 1073 | gomp_mutex_lock (&devicep->lock); |
1df3f842 JJ |
1074 | if (!devicep->is_initialized) |
1075 | gomp_init_device (devicep); | |
41dbbb37 TS |
1076 | gomp_mutex_unlock (&devicep->lock); |
1077 | ||
a51df54e | 1078 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); |
acf0174b JJ |
1079 | } |
1080 | ||
1081 | void | |
1082 | GOMP_teams (unsigned int num_teams, unsigned int thread_limit) | |
1083 | { | |
1084 | if (thread_limit) | |
1085 | { | |
1086 | struct gomp_task_icv *icv = gomp_icv (true); | |
1087 | icv->thread_limit_var | |
1088 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
1089 | } | |
1090 | (void) num_teams; | |
1091 | } | |
1df3f842 JJ |
1092 | |
1093 | #ifdef PLUGIN_SUPPORT | |
1094 | ||
1095 | /* This function tries to load a plugin for DEVICE. Name of plugin is passed | |
1096 | in PLUGIN_NAME. | |
1097 | The handles of the found functions are stored in the corresponding fields | |
1098 | of DEVICE. The function returns TRUE on success and FALSE otherwise. */ | |
1099 | ||
1100 | static bool | |
1101 | gomp_load_plugin_for_device (struct gomp_device_descr *device, | |
1102 | const char *plugin_name) | |
1103 | { | |
196904d8 | 1104 | const char *err = NULL, *last_missing = NULL; |
41dbbb37 TS |
1105 | int optional_present, optional_total; |
1106 | ||
1107 | /* Clear any existing error. */ | |
1108 | dlerror (); | |
1109 | ||
1df3f842 JJ |
1110 | void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); |
1111 | if (!plugin_handle) | |
41dbbb37 TS |
1112 | { |
1113 | err = dlerror (); | |
1114 | goto out; | |
1115 | } | |
1df3f842 JJ |
1116 | |
1117 | /* Check if all required functions are available in the plugin and store | |
1118 | their handlers. */ | |
41dbbb37 TS |
1119 | #define DLSYM(f) \ |
1120 | do \ | |
1121 | { \ | |
1122 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \ | |
1123 | err = dlerror (); \ | |
1124 | if (err != NULL) \ | |
1125 | goto out; \ | |
1126 | } \ | |
1df3f842 | 1127 | while (0) |
41dbbb37 TS |
1128 | /* Similar, but missing functions are not an error. */ |
1129 | #define DLSYM_OPT(f, n) \ | |
1130 | do \ | |
1131 | { \ | |
196904d8 | 1132 | const char *tmp_err; \ |
41dbbb37 TS |
1133 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ |
1134 | tmp_err = dlerror (); \ | |
1135 | if (tmp_err == NULL) \ | |
1136 | optional_present++; \ | |
1137 | else \ | |
1138 | last_missing = #n; \ | |
1139 | optional_total++; \ | |
1140 | } \ | |
1141 | while (0) | |
1142 | ||
1143 | DLSYM (get_name); | |
1144 | DLSYM (get_caps); | |
1df3f842 JJ |
1145 | DLSYM (get_type); |
1146 | DLSYM (get_num_devices); | |
1df3f842 | 1147 | DLSYM (init_device); |
41dbbb37 | 1148 | DLSYM (fini_device); |
a51df54e IV |
1149 | DLSYM (load_image); |
1150 | DLSYM (unload_image); | |
1df3f842 JJ |
1151 | DLSYM (alloc); |
1152 | DLSYM (free); | |
1153 | DLSYM (dev2host); | |
1154 | DLSYM (host2dev); | |
41dbbb37 TS |
1155 | device->capabilities = device->get_caps_func (); |
1156 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
1157 | DLSYM (run); | |
1158 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
1159 | { | |
1160 | optional_present = optional_total = 0; | |
1161 | DLSYM_OPT (openacc.exec, openacc_parallel); | |
1162 | DLSYM_OPT (openacc.open_device, openacc_open_device); | |
1163 | DLSYM_OPT (openacc.close_device, openacc_close_device); | |
1164 | DLSYM_OPT (openacc.get_device_num, openacc_get_device_num); | |
1165 | DLSYM_OPT (openacc.set_device_num, openacc_set_device_num); | |
1166 | DLSYM_OPT (openacc.register_async_cleanup, | |
1167 | openacc_register_async_cleanup); | |
1168 | DLSYM_OPT (openacc.async_test, openacc_async_test); | |
1169 | DLSYM_OPT (openacc.async_test_all, openacc_async_test_all); | |
1170 | DLSYM_OPT (openacc.async_wait, openacc_async_wait); | |
1171 | DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async); | |
1172 | DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all); | |
1173 | DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async); | |
1174 | DLSYM_OPT (openacc.async_set_async, openacc_async_set_async); | |
1175 | DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data); | |
1176 | DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data); | |
1177 | /* Require all the OpenACC handlers if we have | |
1178 | GOMP_OFFLOAD_CAP_OPENACC_200. */ | |
1179 | if (optional_present != optional_total) | |
1180 | { | |
1181 | err = "plugin missing OpenACC handler function"; | |
1182 | goto out; | |
1183 | } | |
1184 | optional_present = optional_total = 0; | |
1185 | DLSYM_OPT (openacc.cuda.get_current_device, | |
1186 | openacc_get_current_cuda_device); | |
1187 | DLSYM_OPT (openacc.cuda.get_current_context, | |
1188 | openacc_get_current_cuda_context); | |
1189 | DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream); | |
1190 | DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream); | |
1191 | /* Make sure all the CUDA functions are there if any of them are. */ | |
1192 | if (optional_present && optional_present != optional_total) | |
1193 | { | |
1194 | err = "plugin missing OpenACC CUDA handler function"; | |
1195 | goto out; | |
1196 | } | |
1197 | } | |
1df3f842 | 1198 | #undef DLSYM |
41dbbb37 | 1199 | #undef DLSYM_OPT |
1df3f842 | 1200 | |
41dbbb37 TS |
1201 | out: |
1202 | if (err != NULL) | |
1203 | { | |
1204 | gomp_error ("while loading %s: %s", plugin_name, err); | |
1205 | if (last_missing) | |
1206 | gomp_error ("missing function was %s", last_missing); | |
1207 | if (plugin_handle) | |
1208 | dlclose (plugin_handle); | |
1209 | } | |
1210 | return err == NULL; | |
1df3f842 JJ |
1211 | } |
1212 | ||
1df3f842 | 1213 | /* This function initializes the runtime needed for offloading. |
41dbbb37 TS |
1214 | It parses the list of offload targets and tries to load the plugins for |
1215 | these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP | |
1216 | will be set, and the array DEVICES initialized, containing descriptors for | |
1217 | corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows | |
1218 | by the others. */ | |
1df3f842 JJ |
1219 | |
1220 | static void | |
1221 | gomp_target_init (void) | |
1222 | { | |
1223 | const char *prefix ="libgomp-plugin-"; | |
b5f7a6ca | 1224 | const char *suffix = SONAME_SUFFIX (1); |
1df3f842 JJ |
1225 | const char *cur, *next; |
1226 | char *plugin_name; | |
1227 | int i, new_num_devices; | |
1228 | ||
1229 | num_devices = 0; | |
1230 | devices = NULL; | |
1231 | ||
1232 | cur = OFFLOAD_TARGETS; | |
1233 | if (*cur) | |
1234 | do | |
1235 | { | |
1236 | struct gomp_device_descr current_device; | |
1237 | ||
1238 | next = strchr (cur, ','); | |
1239 | ||
1240 | plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur)) | |
1241 | + strlen (prefix) + strlen (suffix)); | |
1242 | if (!plugin_name) | |
1243 | { | |
1244 | num_devices = 0; | |
1245 | break; | |
1246 | } | |
1247 | ||
1248 | strcpy (plugin_name, prefix); | |
1249 | strncat (plugin_name, cur, next ? next - cur : strlen (cur)); | |
1250 | strcat (plugin_name, suffix); | |
1251 | ||
1252 | if (gomp_load_plugin_for_device (¤t_device, plugin_name)) | |
1253 | { | |
1254 | new_num_devices = current_device.get_num_devices_func (); | |
1255 | if (new_num_devices >= 1) | |
1256 | { | |
41dbbb37 TS |
1257 | /* Augment DEVICES and NUM_DEVICES. */ |
1258 | ||
1df3f842 JJ |
1259 | devices = realloc (devices, (num_devices + new_num_devices) |
1260 | * sizeof (struct gomp_device_descr)); | |
1261 | if (!devices) | |
1262 | { | |
1263 | num_devices = 0; | |
1264 | free (plugin_name); | |
1265 | break; | |
1266 | } | |
1267 | ||
41dbbb37 TS |
1268 | current_device.name = current_device.get_name_func (); |
1269 | /* current_device.capabilities has already been set. */ | |
1df3f842 | 1270 | current_device.type = current_device.get_type_func (); |
a51df54e | 1271 | current_device.mem_map.root = NULL; |
1df3f842 | 1272 | current_device.is_initialized = false; |
41dbbb37 TS |
1273 | current_device.openacc.data_environ = NULL; |
1274 | current_device.openacc.target_data = NULL; | |
1df3f842 JJ |
1275 | for (i = 0; i < new_num_devices; i++) |
1276 | { | |
1df3f842 JJ |
1277 | current_device.target_id = i; |
1278 | devices[num_devices] = current_device; | |
41dbbb37 | 1279 | gomp_mutex_init (&devices[num_devices].lock); |
1df3f842 JJ |
1280 | num_devices++; |
1281 | } | |
1282 | } | |
1283 | } | |
1284 | ||
1285 | free (plugin_name); | |
1286 | cur = next + 1; | |
1287 | } | |
1288 | while (next); | |
1289 | ||
41dbbb37 TS |
1290 | /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set |
1291 | NUM_DEVICES_OPENMP. */ | |
1292 | struct gomp_device_descr *devices_s | |
1293 | = malloc (num_devices * sizeof (struct gomp_device_descr)); | |
1294 | if (!devices_s) | |
1295 | { | |
1296 | num_devices = 0; | |
1297 | free (devices); | |
1298 | devices = NULL; | |
1299 | } | |
1300 | num_devices_openmp = 0; | |
1301 | for (i = 0; i < num_devices; i++) | |
1302 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
1303 | devices_s[num_devices_openmp++] = devices[i]; | |
1304 | int num_devices_after_openmp = num_devices_openmp; | |
1305 | for (i = 0; i < num_devices; i++) | |
1306 | if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1307 | devices_s[num_devices_after_openmp++] = devices[i]; | |
1308 | free (devices); | |
1309 | devices = devices_s; | |
1310 | ||
1311 | for (i = 0; i < num_devices; i++) | |
1312 | { | |
41dbbb37 TS |
1313 | /* The 'devices' array can be moved (by the realloc call) until we have |
1314 | found all the plugins, so registering with the OpenACC runtime (which | |
1315 | takes a copy of the pointer argument) must be delayed until now. */ | |
1316 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
1317 | goacc_register (&devices[i]); | |
1318 | } | |
1df3f842 JJ |
1319 | } |
1320 | ||
1321 | #else /* PLUGIN_SUPPORT */ | |
1322 | /* If dlfcn.h is unavailable we always fallback to host execution. | |
1323 | GOMP_target* routines are just stubs for this case. */ | |
1324 | static void | |
1325 | gomp_target_init (void) | |
1326 | { | |
1327 | } | |
1328 | #endif /* PLUGIN_SUPPORT */ |