]>
Commit | Line | Data |
---|---|---|
d353bf18 | 1 | /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
bc7bff74 | 2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
c35c9a62 | 4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
bc7bff74 | 6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
995b27b9 | 26 | /* This file contains the support of offloading. */ |
bc7bff74 | 27 | |
995b27b9 | 28 | #include "config.h" |
bc7bff74 | 29 | #include "libgomp.h" |
995b27b9 | 30 | #include "libgomp_target.h" |
bc7bff74 | 31 | #include <limits.h> |
32 | #include <stdbool.h> | |
33 | #include <stdlib.h> | |
34 | #include <string.h> | |
35 | ||
995b27b9 | 36 | #ifdef PLUGIN_SUPPORT |
37 | #include <dlfcn.h> | |
38 | #endif | |
39 | ||
40 | static void gomp_target_init (void); | |
41 | ||
42 | static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; | |
43 | ||
44 | /* Forward declaration for a node in the tree. */ | |
45 | typedef struct splay_tree_node_s *splay_tree_node; | |
46 | typedef struct splay_tree_s *splay_tree; | |
47 | typedef struct splay_tree_key_s *splay_tree_key; | |
48 | ||
49 | struct target_mem_desc { | |
50 | /* Reference count. */ | |
51 | uintptr_t refcount; | |
52 | /* All the splay nodes allocated together. */ | |
53 | splay_tree_node array; | |
54 | /* Start of the target region. */ | |
55 | uintptr_t tgt_start; | |
56 | /* End of the targer region. */ | |
57 | uintptr_t tgt_end; | |
58 | /* Handle to free. */ | |
59 | void *to_free; | |
60 | /* Previous target_mem_desc. */ | |
61 | struct target_mem_desc *prev; | |
62 | /* Number of items in following list. */ | |
63 | size_t list_count; | |
64 | ||
65 | /* Corresponding target device descriptor. */ | |
66 | struct gomp_device_descr *device_descr; | |
67 | ||
68 | /* List of splay keys to remove (or decrease refcount) | |
69 | at the end of region. */ | |
70 | splay_tree_key list[]; | |
71 | }; | |
72 | ||
73 | struct splay_tree_key_s { | |
74 | /* Address of the host object. */ | |
75 | uintptr_t host_start; | |
76 | /* Address immediately after the host object. */ | |
77 | uintptr_t host_end; | |
78 | /* Descriptor of the target memory. */ | |
79 | struct target_mem_desc *tgt; | |
80 | /* Offset from tgt->tgt_start to the start of the target object. */ | |
81 | uintptr_t tgt_offset; | |
82 | /* Reference count. */ | |
83 | uintptr_t refcount; | |
84 | /* True if data should be copied from device to host at the end. */ | |
85 | bool copy_from; | |
86 | }; | |
87 | ||
88 | /* This structure describes an offload image. | |
89 | It contains type of the target device, pointer to host table descriptor, and | |
90 | pointer to target data. */ | |
91 | struct offload_image_descr { | |
92 | enum offload_target_type type; | |
93 | void *host_table; | |
94 | void *target_data; | |
95 | }; | |
96 | ||
97 | /* Array of descriptors of offload images. */ | |
98 | static struct offload_image_descr *offload_images; | |
99 | ||
100 | /* Total number of offload images. */ | |
101 | static int num_offload_images; | |
102 | ||
103 | /* Array of descriptors for all available devices. */ | |
104 | static struct gomp_device_descr *devices; | |
105 | ||
106 | /* Total number of available devices. */ | |
107 | static int num_devices; | |
108 | ||
109 | /* The comparison function. */ | |
110 | ||
111 | static int | |
112 | splay_compare (splay_tree_key x, splay_tree_key y) | |
113 | { | |
114 | if (x->host_start == x->host_end | |
115 | && y->host_start == y->host_end) | |
116 | return 0; | |
117 | if (x->host_end <= y->host_start) | |
118 | return -1; | |
119 | if (x->host_start >= y->host_end) | |
120 | return 1; | |
121 | return 0; | |
122 | } | |
123 | ||
124 | #include "splay-tree.h" | |
125 | ||
126 | /* This structure describes accelerator device. | |
127 | It contains ID-number of the device, its type, function handlers for | |
128 | interaction with the device, and information about mapped memory. */ | |
129 | struct gomp_device_descr | |
130 | { | |
131 | /* This is the ID number of device. It could be specified in DEVICE-clause of | |
132 | TARGET construct. */ | |
133 | int id; | |
134 | ||
135 | /* This is the ID number of device among devices of the same type. */ | |
136 | int target_id; | |
137 | ||
138 | /* This is the TYPE of device. */ | |
139 | enum offload_target_type type; | |
140 | ||
141 | /* Set to true when device is initialized. */ | |
142 | bool is_initialized; | |
143 | ||
144 | /* Function handlers. */ | |
145 | int (*get_type_func) (void); | |
146 | int (*get_num_devices_func) (void); | |
147 | void (*register_image_func) (void *, void *); | |
148 | void (*init_device_func) (int); | |
149 | int (*get_table_func) (int, void *); | |
150 | void *(*alloc_func) (int, size_t); | |
151 | void (*free_func) (int, void *); | |
152 | void *(*host2dev_func) (int, void *, const void *, size_t); | |
153 | void *(*dev2host_func) (int, void *, const void *, size_t); | |
154 | void (*run_func) (int, void *, void *); | |
155 | ||
156 | /* Splay tree containing information about mapped memory regions. */ | |
157 | struct splay_tree_s dev_splay_tree; | |
158 | ||
159 | /* Mutex for operating with the splay tree and other shared structures. */ | |
160 | gomp_mutex_t dev_env_lock; | |
161 | }; | |
162 | ||
bc7bff74 | 163 | attribute_hidden int |
164 | gomp_get_num_devices (void) | |
165 | { | |
995b27b9 | 166 | (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
167 | return num_devices; | |
168 | } | |
169 | ||
170 | static struct gomp_device_descr * | |
171 | resolve_device (int device_id) | |
172 | { | |
173 | if (device_id == -1) | |
174 | { | |
175 | struct gomp_task_icv *icv = gomp_icv (false); | |
176 | device_id = icv->default_device_var; | |
177 | } | |
178 | ||
179 | if (device_id < 0 || device_id >= gomp_get_num_devices ()) | |
180 | return NULL; | |
181 | ||
182 | return &devices[device_id]; | |
183 | } | |
184 | ||
185 | ||
186 | /* Handle the case where splay_tree_lookup found oldn for newn. | |
187 | Helper function of gomp_map_vars. */ | |
188 | ||
189 | static inline void | |
190 | gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn, | |
191 | unsigned char kind) | |
192 | { | |
193 | if (oldn->host_start > newn->host_start | |
194 | || oldn->host_end < newn->host_end) | |
195 | gomp_fatal ("Trying to map into device [%p..%p) object when" | |
196 | "[%p..%p) is already mapped", | |
197 | (void *) newn->host_start, (void *) newn->host_end, | |
198 | (void *) oldn->host_start, (void *) oldn->host_end); | |
199 | oldn->refcount++; | |
200 | } | |
201 | ||
202 | static struct target_mem_desc * | |
203 | gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, | |
204 | void **hostaddrs, size_t *sizes, unsigned char *kinds, | |
205 | bool is_target) | |
206 | { | |
207 | size_t i, tgt_align, tgt_size, not_found_cnt = 0; | |
208 | struct splay_tree_key_s cur_node; | |
209 | struct target_mem_desc *tgt | |
210 | = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); | |
211 | tgt->list_count = mapnum; | |
212 | tgt->refcount = 1; | |
213 | tgt->device_descr = devicep; | |
214 | ||
215 | if (mapnum == 0) | |
216 | return tgt; | |
217 | ||
218 | tgt_align = sizeof (void *); | |
219 | tgt_size = 0; | |
220 | if (is_target) | |
221 | { | |
222 | size_t align = 4 * sizeof (void *); | |
223 | tgt_align = align; | |
224 | tgt_size = mapnum * sizeof (void *); | |
225 | } | |
226 | ||
227 | gomp_mutex_lock (&devicep->dev_env_lock); | |
228 | for (i = 0; i < mapnum; i++) | |
229 | { | |
230 | if (hostaddrs[i] == NULL) | |
231 | { | |
232 | tgt->list[i] = NULL; | |
233 | continue; | |
234 | } | |
235 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
236 | if ((kinds[i] & 7) != 4) | |
237 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
238 | else | |
239 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
240 | splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree, | |
241 | &cur_node); | |
242 | if (n) | |
243 | { | |
244 | tgt->list[i] = n; | |
245 | gomp_map_vars_existing (n, &cur_node, kinds[i]); | |
246 | } | |
247 | else | |
248 | { | |
249 | size_t align = (size_t) 1 << (kinds[i] >> 3); | |
250 | tgt->list[i] = NULL; | |
251 | not_found_cnt++; | |
252 | if (tgt_align < align) | |
253 | tgt_align = align; | |
254 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
255 | tgt_size += cur_node.host_end - cur_node.host_start; | |
256 | if ((kinds[i] & 7) == 5) | |
257 | { | |
258 | size_t j; | |
259 | for (j = i + 1; j < mapnum; j++) | |
260 | if ((kinds[j] & 7) != 4) | |
261 | break; | |
262 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
263 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
264 | > cur_node.host_end)) | |
265 | break; | |
266 | else | |
267 | { | |
268 | tgt->list[j] = NULL; | |
269 | i++; | |
270 | } | |
271 | } | |
272 | } | |
273 | } | |
274 | ||
275 | if (not_found_cnt || is_target) | |
276 | { | |
277 | /* Allocate tgt_align aligned tgt_size block of memory. */ | |
278 | /* FIXME: Perhaps change interface to allocate properly aligned | |
279 | memory. */ | |
280 | tgt->to_free = devicep->alloc_func (devicep->target_id, | |
281 | tgt_size + tgt_align - 1); | |
282 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
283 | tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); | |
284 | tgt->tgt_end = tgt->tgt_start + tgt_size; | |
285 | } | |
286 | else | |
287 | { | |
288 | tgt->to_free = NULL; | |
289 | tgt->tgt_start = 0; | |
290 | tgt->tgt_end = 0; | |
291 | } | |
292 | ||
293 | tgt_size = 0; | |
294 | if (is_target) | |
295 | tgt_size = mapnum * sizeof (void *); | |
296 | ||
297 | tgt->array = NULL; | |
298 | if (not_found_cnt) | |
299 | { | |
300 | tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); | |
301 | splay_tree_node array = tgt->array; | |
302 | size_t j; | |
303 | ||
304 | for (i = 0; i < mapnum; i++) | |
305 | if (tgt->list[i] == NULL) | |
306 | { | |
307 | if (hostaddrs[i] == NULL) | |
308 | continue; | |
309 | splay_tree_key k = &array->key; | |
310 | k->host_start = (uintptr_t) hostaddrs[i]; | |
311 | if ((kinds[i] & 7) != 4) | |
312 | k->host_end = k->host_start + sizes[i]; | |
313 | else | |
314 | k->host_end = k->host_start + sizeof (void *); | |
315 | splay_tree_key n | |
316 | = splay_tree_lookup (&devicep->dev_splay_tree, k); | |
317 | if (n) | |
318 | { | |
319 | tgt->list[i] = n; | |
320 | gomp_map_vars_existing (n, k, kinds[i]); | |
321 | } | |
322 | else | |
323 | { | |
324 | size_t align = (size_t) 1 << (kinds[i] >> 3); | |
325 | tgt->list[i] = k; | |
326 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
327 | k->tgt = tgt; | |
328 | k->tgt_offset = tgt_size; | |
329 | tgt_size += k->host_end - k->host_start; | |
330 | k->copy_from = false; | |
331 | if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3) | |
332 | k->copy_from = true; | |
333 | k->refcount = 1; | |
334 | tgt->refcount++; | |
335 | array->left = NULL; | |
336 | array->right = NULL; | |
337 | splay_tree_insert (&devicep->dev_splay_tree, array); | |
338 | switch (kinds[i] & 7) | |
339 | { | |
340 | case 0: /* ALLOC */ | |
341 | case 2: /* FROM */ | |
342 | break; | |
343 | case 1: /* TO */ | |
344 | case 3: /* TOFROM */ | |
345 | /* FIXME: Perhaps add some smarts, like if copying | |
346 | several adjacent fields from host to target, use some | |
347 | host buffer to avoid sending each var individually. */ | |
348 | devicep->host2dev_func (devicep->target_id, | |
349 | (void *) (tgt->tgt_start | |
350 | + k->tgt_offset), | |
351 | (void *) k->host_start, | |
352 | k->host_end - k->host_start); | |
353 | break; | |
354 | case 4: /* POINTER */ | |
355 | cur_node.host_start | |
356 | = (uintptr_t) *(void **) k->host_start; | |
357 | if (cur_node.host_start == (uintptr_t) NULL) | |
358 | { | |
359 | cur_node.tgt_offset = (uintptr_t) NULL; | |
360 | devicep->host2dev_func (devicep->target_id, | |
361 | (void *) (tgt->tgt_start | |
362 | + k->tgt_offset), | |
363 | (void *) &cur_node.tgt_offset, | |
364 | sizeof (void *)); | |
365 | break; | |
366 | } | |
367 | /* Add bias to the pointer value. */ | |
368 | cur_node.host_start += sizes[i]; | |
369 | cur_node.host_end = cur_node.host_start + 1; | |
370 | n = splay_tree_lookup (&devicep->dev_splay_tree, | |
371 | &cur_node); | |
372 | if (n == NULL) | |
373 | { | |
374 | /* Could be possibly zero size array section. */ | |
375 | cur_node.host_end--; | |
376 | n = splay_tree_lookup (&devicep->dev_splay_tree, | |
377 | &cur_node); | |
378 | if (n == NULL) | |
379 | { | |
380 | cur_node.host_start--; | |
381 | n = splay_tree_lookup (&devicep->dev_splay_tree, | |
382 | &cur_node); | |
383 | cur_node.host_start++; | |
384 | } | |
385 | } | |
386 | if (n == NULL) | |
387 | gomp_fatal ("Pointer target of array section " | |
388 | "wasn't mapped"); | |
389 | cur_node.host_start -= n->host_start; | |
390 | cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset | |
391 | + cur_node.host_start; | |
392 | /* At this point tgt_offset is target address of the | |
393 | array section. Now subtract bias to get what we want | |
394 | to initialize the pointer with. */ | |
395 | cur_node.tgt_offset -= sizes[i]; | |
396 | devicep->host2dev_func (devicep->target_id, | |
397 | (void *) (tgt->tgt_start | |
398 | + k->tgt_offset), | |
399 | (void *) &cur_node.tgt_offset, | |
400 | sizeof (void *)); | |
401 | break; | |
402 | case 5: /* TO_PSET */ | |
403 | devicep->host2dev_func (devicep->target_id, | |
404 | (void *) (tgt->tgt_start | |
405 | + k->tgt_offset), | |
406 | (void *) k->host_start, | |
407 | k->host_end - k->host_start); | |
408 | for (j = i + 1; j < mapnum; j++) | |
409 | if ((kinds[j] & 7) != 4) | |
410 | break; | |
411 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
412 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
413 | > k->host_end)) | |
414 | break; | |
415 | else | |
416 | { | |
417 | tgt->list[j] = k; | |
418 | k->refcount++; | |
419 | cur_node.host_start | |
420 | = (uintptr_t) *(void **) hostaddrs[j]; | |
421 | if (cur_node.host_start == (uintptr_t) NULL) | |
422 | { | |
423 | cur_node.tgt_offset = (uintptr_t) NULL; | |
424 | devicep->host2dev_func (devicep->target_id, | |
425 | (void *) (tgt->tgt_start + k->tgt_offset | |
426 | + ((uintptr_t) hostaddrs[j] | |
427 | - k->host_start)), | |
428 | (void *) &cur_node.tgt_offset, | |
429 | sizeof (void *)); | |
430 | i++; | |
431 | continue; | |
432 | } | |
433 | /* Add bias to the pointer value. */ | |
434 | cur_node.host_start += sizes[j]; | |
435 | cur_node.host_end = cur_node.host_start + 1; | |
436 | n = splay_tree_lookup (&devicep->dev_splay_tree, | |
437 | &cur_node); | |
438 | if (n == NULL) | |
439 | { | |
440 | /* Could be possibly zero size array section. */ | |
441 | cur_node.host_end--; | |
442 | n = splay_tree_lookup (&devicep->dev_splay_tree, | |
443 | &cur_node); | |
444 | if (n == NULL) | |
445 | { | |
446 | cur_node.host_start--; | |
447 | n = splay_tree_lookup | |
448 | (&devicep->dev_splay_tree, &cur_node); | |
449 | cur_node.host_start++; | |
450 | } | |
451 | } | |
452 | if (n == NULL) | |
453 | gomp_fatal ("Pointer target of array section " | |
454 | "wasn't mapped"); | |
455 | cur_node.host_start -= n->host_start; | |
456 | cur_node.tgt_offset = n->tgt->tgt_start | |
457 | + n->tgt_offset | |
458 | + cur_node.host_start; | |
459 | /* At this point tgt_offset is target address of the | |
460 | array section. Now subtract bias to get what we | |
461 | want to initialize the pointer with. */ | |
462 | cur_node.tgt_offset -= sizes[j]; | |
463 | devicep->host2dev_func (devicep->target_id, | |
464 | (void *) (tgt->tgt_start + k->tgt_offset | |
465 | + ((uintptr_t) hostaddrs[j] | |
466 | - k->host_start)), | |
467 | (void *) &cur_node.tgt_offset, | |
468 | sizeof (void *)); | |
469 | i++; | |
470 | } | |
471 | break; | |
472 | } | |
473 | array++; | |
474 | } | |
475 | } | |
476 | } | |
477 | if (is_target) | |
478 | { | |
479 | for (i = 0; i < mapnum; i++) | |
480 | { | |
481 | if (tgt->list[i] == NULL) | |
482 | cur_node.tgt_offset = (uintptr_t) NULL; | |
483 | else | |
484 | cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start | |
485 | + tgt->list[i]->tgt_offset; | |
486 | devicep->host2dev_func (devicep->target_id, | |
487 | (void *) (tgt->tgt_start | |
488 | + i * sizeof (void *)), | |
489 | (void *) &cur_node.tgt_offset, | |
490 | sizeof (void *)); | |
491 | } | |
492 | } | |
493 | ||
494 | gomp_mutex_unlock (&devicep->dev_env_lock); | |
495 | return tgt; | |
496 | } | |
497 | ||
498 | static void | |
499 | gomp_unmap_tgt (struct target_mem_desc *tgt) | |
500 | { | |
501 | /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ | |
502 | if (tgt->tgt_end) | |
503 | tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free); | |
504 | ||
505 | free (tgt->array); | |
506 | free (tgt); | |
507 | } | |
508 | ||
509 | static void | |
510 | gomp_unmap_vars (struct target_mem_desc *tgt) | |
511 | { | |
512 | struct gomp_device_descr *devicep = tgt->device_descr; | |
513 | ||
514 | if (tgt->list_count == 0) | |
515 | { | |
516 | free (tgt); | |
517 | return; | |
518 | } | |
519 | ||
520 | size_t i; | |
521 | gomp_mutex_lock (&devicep->dev_env_lock); | |
522 | for (i = 0; i < tgt->list_count; i++) | |
523 | if (tgt->list[i] == NULL) | |
524 | ; | |
525 | else if (tgt->list[i]->refcount > 1) | |
526 | tgt->list[i]->refcount--; | |
527 | else | |
528 | { | |
529 | splay_tree_key k = tgt->list[i]; | |
530 | if (k->copy_from) | |
531 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, | |
532 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
533 | k->host_end - k->host_start); | |
534 | splay_tree_remove (&devicep->dev_splay_tree, k); | |
535 | if (k->tgt->refcount > 1) | |
536 | k->tgt->refcount--; | |
537 | else | |
538 | gomp_unmap_tgt (k->tgt); | |
539 | } | |
540 | ||
541 | if (tgt->refcount > 1) | |
542 | tgt->refcount--; | |
543 | else | |
544 | gomp_unmap_tgt (tgt); | |
545 | gomp_mutex_unlock (&devicep->dev_env_lock); | |
546 | } | |
547 | ||
548 | static void | |
549 | gomp_update (struct gomp_device_descr *devicep, size_t mapnum, | |
550 | void **hostaddrs, size_t *sizes, unsigned char *kinds) | |
551 | { | |
552 | size_t i; | |
553 | struct splay_tree_key_s cur_node; | |
554 | ||
555 | if (!devicep) | |
556 | return; | |
557 | ||
558 | if (mapnum == 0) | |
559 | return; | |
560 | ||
561 | gomp_mutex_lock (&devicep->dev_env_lock); | |
562 | for (i = 0; i < mapnum; i++) | |
563 | if (sizes[i]) | |
564 | { | |
565 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
566 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
567 | splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree, | |
568 | &cur_node); | |
569 | if (n) | |
570 | { | |
571 | if (n->host_start > cur_node.host_start | |
572 | || n->host_end < cur_node.host_end) | |
573 | gomp_fatal ("Trying to update [%p..%p) object when" | |
574 | "only [%p..%p) is mapped", | |
575 | (void *) cur_node.host_start, | |
576 | (void *) cur_node.host_end, | |
577 | (void *) n->host_start, | |
578 | (void *) n->host_end); | |
579 | if ((kinds[i] & 7) == 1) | |
580 | devicep->host2dev_func (devicep->target_id, | |
581 | (void *) (n->tgt->tgt_start | |
582 | + n->tgt_offset | |
583 | + cur_node.host_start | |
584 | - n->host_start), | |
585 | (void *) cur_node.host_start, | |
586 | cur_node.host_end - cur_node.host_start); | |
587 | else if ((kinds[i] & 7) == 2) | |
588 | devicep->dev2host_func (devicep->target_id, | |
589 | (void *) cur_node.host_start, | |
590 | (void *) (n->tgt->tgt_start | |
591 | + n->tgt_offset | |
592 | + cur_node.host_start | |
593 | - n->host_start), | |
594 | cur_node.host_end - cur_node.host_start); | |
595 | } | |
596 | else | |
597 | gomp_fatal ("Trying to update [%p..%p) object that is not mapped", | |
598 | (void *) cur_node.host_start, | |
599 | (void *) cur_node.host_end); | |
600 | } | |
601 | gomp_mutex_unlock (&devicep->dev_env_lock); | |
602 | } | |
603 | ||
604 | /* This function should be called from every offload image. | |
605 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of | |
606 | the target, and TARGET_DATA needed by target plugin. */ | |
607 | ||
608 | void | |
609 | GOMP_offload_register (void *host_table, enum offload_target_type target_type, | |
610 | void *target_data) | |
611 | { | |
612 | offload_images = gomp_realloc (offload_images, | |
613 | (num_offload_images + 1) | |
614 | * sizeof (struct offload_image_descr)); | |
615 | ||
616 | offload_images[num_offload_images].type = target_type; | |
617 | offload_images[num_offload_images].host_table = host_table; | |
618 | offload_images[num_offload_images].target_data = target_data; | |
619 | ||
620 | num_offload_images++; | |
621 | } | |
622 | ||
623 | /* This function initializes the target device, specified by DEVICEP. */ | |
624 | ||
625 | static void | |
626 | gomp_init_device (struct gomp_device_descr *devicep) | |
627 | { | |
628 | devicep->init_device_func (devicep->target_id); | |
629 | ||
630 | /* Get address mapping table for device. */ | |
631 | struct mapping_table *table = NULL; | |
632 | int num_entries = devicep->get_table_func (devicep->target_id, &table); | |
633 | ||
634 | /* Insert host-target address mapping into dev_splay_tree. */ | |
635 | int i; | |
636 | for (i = 0; i < num_entries; i++) | |
637 | { | |
638 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
639 | tgt->refcount = 1; | |
640 | tgt->array = gomp_malloc (sizeof (*tgt->array)); | |
641 | tgt->tgt_start = table[i].tgt_start; | |
642 | tgt->tgt_end = table[i].tgt_end; | |
643 | tgt->to_free = NULL; | |
644 | tgt->list_count = 0; | |
645 | tgt->device_descr = devicep; | |
646 | splay_tree_node node = tgt->array; | |
647 | splay_tree_key k = &node->key; | |
648 | k->host_start = table[i].host_start; | |
649 | k->host_end = table[i].host_end; | |
650 | k->tgt_offset = 0; | |
651 | k->refcount = 1; | |
652 | k->copy_from = false; | |
653 | k->tgt = tgt; | |
654 | node->left = NULL; | |
655 | node->right = NULL; | |
656 | splay_tree_insert (&devicep->dev_splay_tree, node); | |
657 | } | |
658 | ||
659 | free (table); | |
660 | devicep->is_initialized = true; | |
bc7bff74 | 661 | } |
662 | ||
663 | /* Called when encountering a target directive. If DEVICE | |
664 | is -1, it means use device-var ICV. If it is -2 (or any other value | |
665 | larger than last available hw device, use host fallback. | |
666 | FN is address of host code, OPENMP_TARGET contains value of the | |
667 | __OPENMP_TARGET__ symbol in the shared library or binary that invokes | |
668 | GOMP_target. HOSTADDRS, SIZES and KINDS are arrays | |
669 | with MAPNUM entries, with addresses of the host objects, | |
670 | sizes of the host objects (resp. for pointer kind pointer bias | |
671 | and assumed sizeof (void *) size) and kinds. */ | |
672 | ||
673 | void | |
674 | GOMP_target (int device, void (*fn) (void *), const void *openmp_target, | |
675 | size_t mapnum, void **hostaddrs, size_t *sizes, | |
676 | unsigned char *kinds) | |
677 | { | |
995b27b9 | 678 | struct gomp_device_descr *devicep = resolve_device (device); |
679 | if (devicep == NULL) | |
680 | { | |
681 | /* Host fallback. */ | |
682 | struct gomp_thread old_thr, *thr = gomp_thread (); | |
683 | old_thr = *thr; | |
684 | memset (thr, '\0', sizeof (*thr)); | |
685 | if (gomp_places_list) | |
686 | { | |
687 | thr->place = old_thr.place; | |
688 | thr->ts.place_partition_len = gomp_places_list_len; | |
689 | } | |
690 | fn (hostaddrs); | |
691 | gomp_free_thread (thr); | |
692 | *thr = old_thr; | |
693 | return; | |
694 | } | |
695 | ||
696 | gomp_mutex_lock (&devicep->dev_env_lock); | |
697 | if (!devicep->is_initialized) | |
698 | gomp_init_device (devicep); | |
699 | ||
700 | struct splay_tree_key_s k; | |
701 | k.host_start = (uintptr_t) fn; | |
702 | k.host_end = k.host_start + 1; | |
703 | splay_tree_key tgt_fn = splay_tree_lookup (&devicep->dev_splay_tree, &k); | |
704 | if (tgt_fn == NULL) | |
705 | gomp_fatal ("Target function wasn't mapped"); | |
706 | gomp_mutex_unlock (&devicep->dev_env_lock); | |
707 | ||
708 | struct target_mem_desc *tgt_vars | |
709 | = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, true); | |
bc7bff74 | 710 | struct gomp_thread old_thr, *thr = gomp_thread (); |
711 | old_thr = *thr; | |
712 | memset (thr, '\0', sizeof (*thr)); | |
713 | if (gomp_places_list) | |
714 | { | |
715 | thr->place = old_thr.place; | |
716 | thr->ts.place_partition_len = gomp_places_list_len; | |
717 | } | |
995b27b9 | 718 | devicep->run_func (devicep->target_id, (void *) tgt_fn->tgt->tgt_start, |
719 | (void *) tgt_vars->tgt_start); | |
bc7bff74 | 720 | gomp_free_thread (thr); |
721 | *thr = old_thr; | |
995b27b9 | 722 | gomp_unmap_vars (tgt_vars); |
bc7bff74 | 723 | } |
724 | ||
725 | void | |
726 | GOMP_target_data (int device, const void *openmp_target, size_t mapnum, | |
727 | void **hostaddrs, size_t *sizes, unsigned char *kinds) | |
728 | { | |
995b27b9 | 729 | struct gomp_device_descr *devicep = resolve_device (device); |
730 | if (devicep == NULL) | |
731 | { | |
732 | /* Host fallback. */ | |
733 | struct gomp_task_icv *icv = gomp_icv (false); | |
734 | if (icv->target_data) | |
735 | { | |
736 | /* Even when doing a host fallback, if there are any active | |
737 | #pragma omp target data constructs, need to remember the | |
738 | new #pragma omp target data, otherwise GOMP_target_end_data | |
739 | would get out of sync. */ | |
740 | struct target_mem_desc *tgt | |
741 | = gomp_map_vars (NULL, 0, NULL, NULL, NULL, false); | |
742 | tgt->prev = icv->target_data; | |
743 | icv->target_data = tgt; | |
744 | } | |
745 | return; | |
746 | } | |
747 | ||
748 | gomp_mutex_lock (&devicep->dev_env_lock); | |
749 | if (!devicep->is_initialized) | |
750 | gomp_init_device (devicep); | |
751 | gomp_mutex_unlock (&devicep->dev_env_lock); | |
752 | ||
753 | struct target_mem_desc *tgt | |
754 | = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, false); | |
755 | struct gomp_task_icv *icv = gomp_icv (true); | |
756 | tgt->prev = icv->target_data; | |
757 | icv->target_data = tgt; | |
bc7bff74 | 758 | } |
759 | ||
760 | void | |
761 | GOMP_target_end_data (void) | |
762 | { | |
995b27b9 | 763 | struct gomp_task_icv *icv = gomp_icv (false); |
764 | if (icv->target_data) | |
765 | { | |
766 | struct target_mem_desc *tgt = icv->target_data; | |
767 | icv->target_data = tgt->prev; | |
768 | gomp_unmap_vars (tgt); | |
769 | } | |
bc7bff74 | 770 | } |
771 | ||
772 | void | |
773 | GOMP_target_update (int device, const void *openmp_target, size_t mapnum, | |
774 | void **hostaddrs, size_t *sizes, unsigned char *kinds) | |
775 | { | |
995b27b9 | 776 | struct gomp_device_descr *devicep = resolve_device (device); |
777 | if (devicep == NULL) | |
778 | return; | |
779 | ||
780 | gomp_mutex_lock (&devicep->dev_env_lock); | |
781 | if (!devicep->is_initialized) | |
782 | gomp_init_device (devicep); | |
783 | gomp_mutex_unlock (&devicep->dev_env_lock); | |
784 | ||
785 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds); | |
bc7bff74 | 786 | } |
787 | ||
788 | void | |
789 | GOMP_teams (unsigned int num_teams, unsigned int thread_limit) | |
790 | { | |
791 | if (thread_limit) | |
792 | { | |
793 | struct gomp_task_icv *icv = gomp_icv (true); | |
794 | icv->thread_limit_var | |
795 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
796 | } | |
797 | (void) num_teams; | |
798 | } | |
995b27b9 | 799 | |
800 | #ifdef PLUGIN_SUPPORT | |
801 | ||
802 | /* This function tries to load a plugin for DEVICE. Name of plugin is passed | |
803 | in PLUGIN_NAME. | |
804 | The handles of the found functions are stored in the corresponding fields | |
805 | of DEVICE. The function returns TRUE on success and FALSE otherwise. */ | |
806 | ||
807 | static bool | |
808 | gomp_load_plugin_for_device (struct gomp_device_descr *device, | |
809 | const char *plugin_name) | |
810 | { | |
811 | void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); | |
812 | if (!plugin_handle) | |
813 | return false; | |
814 | ||
815 | /* Check if all required functions are available in the plugin and store | |
816 | their handlers. */ | |
817 | #define DLSYM(f) \ | |
818 | do \ | |
819 | { \ | |
820 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_"#f); \ | |
821 | if (!device->f##_func) \ | |
822 | return false; \ | |
823 | } \ | |
824 | while (0) | |
825 | DLSYM (get_type); | |
826 | DLSYM (get_num_devices); | |
827 | DLSYM (register_image); | |
828 | DLSYM (init_device); | |
829 | DLSYM (get_table); | |
830 | DLSYM (alloc); | |
831 | DLSYM (free); | |
832 | DLSYM (dev2host); | |
833 | DLSYM (host2dev); | |
834 | DLSYM (run); | |
835 | #undef DLSYM | |
836 | ||
837 | return true; | |
838 | } | |
839 | ||
840 | /* This function finds OFFLOAD_IMAGES corresponding to DEVICE type, and | |
841 | registers them in the plugin. */ | |
842 | ||
843 | static void | |
844 | gomp_register_images_for_device (struct gomp_device_descr *device) | |
845 | { | |
846 | int i; | |
847 | for (i = 0; i < num_offload_images; i++) | |
848 | { | |
849 | struct offload_image_descr *image = &offload_images[i]; | |
850 | if (image->type == device->type) | |
851 | device->register_image_func (image->host_table, image->target_data); | |
852 | } | |
853 | } | |
854 | ||
855 | /* This function initializes the runtime needed for offloading. | |
856 | It parses the list of offload targets and tries to load the plugins for these | |
857 | targets. Result of the function is properly initialized variable NUM_DEVICES | |
858 | and array DEVICES, containing descriptors for corresponding devices. */ | |
859 | ||
860 | static void | |
861 | gomp_target_init (void) | |
862 | { | |
863 | const char *prefix ="libgomp-plugin-"; | |
864 | const char *suffix = ".so.1"; | |
865 | const char *cur, *next; | |
866 | char *plugin_name; | |
867 | int i, new_num_devices; | |
868 | ||
869 | num_devices = 0; | |
870 | devices = NULL; | |
871 | ||
872 | cur = OFFLOAD_TARGETS; | |
873 | if (*cur) | |
874 | do | |
875 | { | |
876 | struct gomp_device_descr current_device; | |
877 | ||
878 | next = strchr (cur, ','); | |
879 | ||
880 | plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur)) | |
881 | + strlen (prefix) + strlen (suffix)); | |
882 | if (!plugin_name) | |
883 | { | |
884 | num_devices = 0; | |
885 | break; | |
886 | } | |
887 | ||
888 | strcpy (plugin_name, prefix); | |
889 | strncat (plugin_name, cur, next ? next - cur : strlen (cur)); | |
890 | strcat (plugin_name, suffix); | |
891 | ||
892 | if (gomp_load_plugin_for_device (¤t_device, plugin_name)) | |
893 | { | |
894 | new_num_devices = current_device.get_num_devices_func (); | |
895 | if (new_num_devices >= 1) | |
896 | { | |
897 | devices = realloc (devices, (num_devices + new_num_devices) | |
898 | * sizeof (struct gomp_device_descr)); | |
899 | if (!devices) | |
900 | { | |
901 | num_devices = 0; | |
902 | free (plugin_name); | |
903 | break; | |
904 | } | |
905 | ||
906 | current_device.type = current_device.get_type_func (); | |
907 | current_device.is_initialized = false; | |
908 | current_device.dev_splay_tree.root = NULL; | |
909 | gomp_register_images_for_device (¤t_device); | |
910 | for (i = 0; i < new_num_devices; i++) | |
911 | { | |
912 | current_device.id = num_devices + 1; | |
913 | current_device.target_id = i; | |
914 | devices[num_devices] = current_device; | |
915 | gomp_mutex_init (&devices[num_devices].dev_env_lock); | |
916 | num_devices++; | |
917 | } | |
918 | } | |
919 | } | |
920 | ||
921 | free (plugin_name); | |
922 | cur = next + 1; | |
923 | } | |
924 | while (next); | |
925 | ||
926 | free (offload_images); | |
927 | offload_images = NULL; | |
928 | num_offload_images = 0; | |
929 | } | |
930 | ||
931 | #else /* PLUGIN_SUPPORT */ | |
932 | /* If dlfcn.h is unavailable we always fallback to host execution. | |
933 | GOMP_target* routines are just stubs for this case. */ | |
934 | static void | |
935 | gomp_target_init (void) | |
936 | { | |
937 | } | |
938 | #endif /* PLUGIN_SUPPORT */ |