]>
Commit | Line | Data |
---|---|---|
d353bf18 | 1 | /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
bc7bff74 | 2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
c35c9a62 | 4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
bc7bff74 | 6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
995b27b9 | 26 | /* This file contains the support of offloading. */ |
bc7bff74 | 27 | |
995b27b9 | 28 | #include "config.h" |
bc7bff74 | 29 | #include "libgomp.h" |
ca4c3545 | 30 | #include "oacc-plugin.h" |
31 | #include "oacc-int.h" | |
32 | #include "gomp-constants.h" | |
bc7bff74 | 33 | #include <limits.h> |
34 | #include <stdbool.h> | |
35 | #include <stdlib.h> | |
2634aed9 | 36 | #ifdef HAVE_INTTYPES_H |
37 | # include <inttypes.h> /* For PRIu64. */ | |
38 | #endif | |
bc7bff74 | 39 | #include <string.h> |
ca4c3545 | 40 | #include <assert.h> |
bc7bff74 | 41 | |
995b27b9 | 42 | #ifdef PLUGIN_SUPPORT |
43 | #include <dlfcn.h> | |
4fda895a | 44 | #include "plugin-suffix.h" |
995b27b9 | 45 | #endif |
46 | ||
47 | static void gomp_target_init (void); | |
48 | ||
ca4c3545 | 49 | /* The whole initialization code for offloading plugins is only run one. */ |
995b27b9 | 50 | static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; |
51 | ||
0d8c703d | 52 | /* Mutex for offload image registration. */ |
53 | static gomp_mutex_t register_lock; | |
54 | ||
995b27b9 | 55 | /* This structure describes an offload image. |
56 | It contains type of the target device, pointer to host table descriptor, and | |
57 | pointer to target data. */ | |
58 | struct offload_image_descr { | |
59 | enum offload_target_type type; | |
4e985e0f | 60 | const void *host_table; |
70046055 | 61 | const void *target_data; |
995b27b9 | 62 | }; |
63 | ||
64 | /* Array of descriptors of offload images. */ | |
65 | static struct offload_image_descr *offload_images; | |
66 | ||
67 | /* Total number of offload images. */ | |
68 | static int num_offload_images; | |
69 | ||
70 | /* Array of descriptors for all available devices. */ | |
71 | static struct gomp_device_descr *devices; | |
72 | ||
73 | /* Total number of available devices. */ | |
74 | static int num_devices; | |
75 | ||
ca4c3545 | 76 | /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ |
77 | static int num_devices_openmp; | |
78 | ||
0d8c703d | 79 | /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ |
80 | ||
81 | static void * | |
82 | gomp_realloc_unlock (void *old, size_t size) | |
83 | { | |
84 | void *ret = realloc (old, size); | |
85 | if (ret == NULL) | |
86 | { | |
87 | gomp_mutex_unlock (®ister_lock); | |
88 | gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); | |
89 | } | |
90 | return ret; | |
91 | } | |
92 | ||
995b27b9 | 93 | /* The comparison function. */ |
94 | ||
ca4c3545 | 95 | attribute_hidden int |
995b27b9 | 96 | splay_compare (splay_tree_key x, splay_tree_key y) |
97 | { | |
98 | if (x->host_start == x->host_end | |
99 | && y->host_start == y->host_end) | |
100 | return 0; | |
101 | if (x->host_end <= y->host_start) | |
102 | return -1; | |
103 | if (x->host_start >= y->host_end) | |
104 | return 1; | |
105 | return 0; | |
106 | } | |
107 | ||
108 | #include "splay-tree.h" | |
109 | ||
ca4c3545 | 110 | attribute_hidden void |
111 | gomp_init_targets_once (void) | |
995b27b9 | 112 | { |
ca4c3545 | 113 | (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
114 | } | |
995b27b9 | 115 | |
bc7bff74 | 116 | attribute_hidden int |
117 | gomp_get_num_devices (void) | |
118 | { | |
ca4c3545 | 119 | gomp_init_targets_once (); |
120 | return num_devices_openmp; | |
995b27b9 | 121 | } |
122 | ||
123 | static struct gomp_device_descr * | |
124 | resolve_device (int device_id) | |
125 | { | |
ca4c3545 | 126 | if (device_id == GOMP_DEVICE_ICV) |
995b27b9 | 127 | { |
128 | struct gomp_task_icv *icv = gomp_icv (false); | |
129 | device_id = icv->default_device_var; | |
130 | } | |
131 | ||
132 | if (device_id < 0 || device_id >= gomp_get_num_devices ()) | |
133 | return NULL; | |
134 | ||
135 | return &devices[device_id]; | |
136 | } | |
137 | ||
138 | ||
139 | /* Handle the case where splay_tree_lookup found oldn for newn. | |
140 | Helper function of gomp_map_vars. */ | |
141 | ||
142 | static inline void | |
0d8c703d | 143 | gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, |
144 | splay_tree_key newn, unsigned char kind) | |
995b27b9 | 145 | { |
ca4c3545 | 146 | if ((kind & GOMP_MAP_FLAG_FORCE) |
147 | || oldn->host_start > newn->host_start | |
995b27b9 | 148 | || oldn->host_end < newn->host_end) |
0d8c703d | 149 | { |
150 | gomp_mutex_unlock (&devicep->lock); | |
151 | gomp_fatal ("Trying to map into device [%p..%p) object when " | |
152 | "[%p..%p) is already mapped", | |
153 | (void *) newn->host_start, (void *) newn->host_end, | |
154 | (void *) oldn->host_start, (void *) oldn->host_end); | |
155 | } | |
995b27b9 | 156 | oldn->refcount++; |
157 | } | |
158 | ||
ca4c3545 | 159 | static int |
160 | get_kind (bool is_openacc, void *kinds, int idx) | |
161 | { | |
162 | return is_openacc ? ((unsigned short *) kinds)[idx] | |
163 | : ((unsigned char *) kinds)[idx]; | |
164 | } | |
165 | ||
84f53106 | 166 | static void |
167 | gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, | |
168 | uintptr_t target_offset, uintptr_t bias) | |
169 | { | |
170 | struct gomp_device_descr *devicep = tgt->device_descr; | |
171 | struct splay_tree_s *mem_map = &devicep->mem_map; | |
172 | struct splay_tree_key_s cur_node; | |
173 | ||
174 | cur_node.host_start = host_ptr; | |
175 | if (cur_node.host_start == (uintptr_t) NULL) | |
176 | { | |
177 | cur_node.tgt_offset = (uintptr_t) NULL; | |
178 | /* FIXME: see comment about coalescing host/dev transfers below. */ | |
179 | devicep->host2dev_func (devicep->target_id, | |
180 | (void *) (tgt->tgt_start + target_offset), | |
181 | (void *) &cur_node.tgt_offset, | |
182 | sizeof (void *)); | |
183 | return; | |
184 | } | |
185 | /* Add bias to the pointer value. */ | |
186 | cur_node.host_start += bias; | |
187 | cur_node.host_end = cur_node.host_start + 1; | |
188 | splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); | |
189 | if (n == NULL) | |
190 | { | |
191 | /* Could be possibly zero size array section. */ | |
192 | cur_node.host_end--; | |
193 | n = splay_tree_lookup (mem_map, &cur_node); | |
194 | if (n == NULL) | |
195 | { | |
196 | cur_node.host_start--; | |
197 | n = splay_tree_lookup (mem_map, &cur_node); | |
198 | cur_node.host_start++; | |
199 | } | |
200 | } | |
201 | if (n == NULL) | |
202 | { | |
203 | gomp_mutex_unlock (&devicep->lock); | |
204 | gomp_fatal ("Pointer target of array section wasn't mapped"); | |
205 | } | |
206 | cur_node.host_start -= n->host_start; | |
207 | cur_node.tgt_offset | |
208 | = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; | |
209 | /* At this point tgt_offset is target address of the | |
210 | array section. Now subtract bias to get what we want | |
211 | to initialize the pointer with. */ | |
212 | cur_node.tgt_offset -= bias; | |
213 | /* FIXME: see comment about coalescing host/dev transfers below. */ | |
214 | devicep->host2dev_func (devicep->target_id, | |
215 | (void *) (tgt->tgt_start + target_offset), | |
216 | (void *) &cur_node.tgt_offset, | |
217 | sizeof (void *)); | |
218 | } | |
219 | ||
ca4c3545 | 220 | attribute_hidden struct target_mem_desc * |
995b27b9 | 221 | gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, |
ca4c3545 | 222 | void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, |
223 | bool is_openacc, bool is_target) | |
995b27b9 | 224 | { |
225 | size_t i, tgt_align, tgt_size, not_found_cnt = 0; | |
ca4c3545 | 226 | const int rshift = is_openacc ? 8 : 3; |
227 | const int typemask = is_openacc ? 0xff : 0x7; | |
0d8c703d | 228 | struct splay_tree_s *mem_map = &devicep->mem_map; |
995b27b9 | 229 | struct splay_tree_key_s cur_node; |
230 | struct target_mem_desc *tgt | |
231 | = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); | |
232 | tgt->list_count = mapnum; | |
233 | tgt->refcount = 1; | |
234 | tgt->device_descr = devicep; | |
235 | ||
236 | if (mapnum == 0) | |
237 | return tgt; | |
238 | ||
239 | tgt_align = sizeof (void *); | |
240 | tgt_size = 0; | |
241 | if (is_target) | |
242 | { | |
243 | size_t align = 4 * sizeof (void *); | |
244 | tgt_align = align; | |
245 | tgt_size = mapnum * sizeof (void *); | |
246 | } | |
247 | ||
0d8c703d | 248 | gomp_mutex_lock (&devicep->lock); |
ca4c3545 | 249 | |
995b27b9 | 250 | for (i = 0; i < mapnum; i++) |
251 | { | |
ca4c3545 | 252 | int kind = get_kind (is_openacc, kinds, i); |
995b27b9 | 253 | if (hostaddrs[i] == NULL) |
254 | { | |
255 | tgt->list[i] = NULL; | |
256 | continue; | |
257 | } | |
258 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
ca4c3545 | 259 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
995b27b9 | 260 | cur_node.host_end = cur_node.host_start + sizes[i]; |
261 | else | |
262 | cur_node.host_end = cur_node.host_start + sizeof (void *); | |
0d8c703d | 263 | splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); |
995b27b9 | 264 | if (n) |
265 | { | |
266 | tgt->list[i] = n; | |
0d8c703d | 267 | gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask); |
995b27b9 | 268 | } |
269 | else | |
270 | { | |
995b27b9 | 271 | tgt->list[i] = NULL; |
ca4c3545 | 272 | |
273 | size_t align = (size_t) 1 << (kind >> rshift); | |
995b27b9 | 274 | not_found_cnt++; |
275 | if (tgt_align < align) | |
276 | tgt_align = align; | |
277 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
278 | tgt_size += cur_node.host_end - cur_node.host_start; | |
ca4c3545 | 279 | if ((kind & typemask) == GOMP_MAP_TO_PSET) |
995b27b9 | 280 | { |
281 | size_t j; | |
282 | for (j = i + 1; j < mapnum; j++) | |
ca4c3545 | 283 | if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) |
284 | & typemask)) | |
995b27b9 | 285 | break; |
286 | else if ((uintptr_t) hostaddrs[j] < cur_node.host_start | |
287 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
288 | > cur_node.host_end)) | |
289 | break; | |
290 | else | |
291 | { | |
292 | tgt->list[j] = NULL; | |
293 | i++; | |
294 | } | |
295 | } | |
296 | } | |
297 | } | |
298 | ||
ca4c3545 | 299 | if (devaddrs) |
300 | { | |
301 | if (mapnum != 1) | |
0d8c703d | 302 | { |
303 | gomp_mutex_unlock (&devicep->lock); | |
304 | gomp_fatal ("unexpected aggregation"); | |
305 | } | |
ca4c3545 | 306 | tgt->to_free = devaddrs[0]; |
307 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
308 | tgt->tgt_end = tgt->tgt_start + sizes[0]; | |
309 | } | |
310 | else if (not_found_cnt || is_target) | |
995b27b9 | 311 | { |
312 | /* Allocate tgt_align aligned tgt_size block of memory. */ | |
313 | /* FIXME: Perhaps change interface to allocate properly aligned | |
314 | memory. */ | |
315 | tgt->to_free = devicep->alloc_func (devicep->target_id, | |
316 | tgt_size + tgt_align - 1); | |
317 | tgt->tgt_start = (uintptr_t) tgt->to_free; | |
318 | tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); | |
319 | tgt->tgt_end = tgt->tgt_start + tgt_size; | |
320 | } | |
321 | else | |
322 | { | |
323 | tgt->to_free = NULL; | |
324 | tgt->tgt_start = 0; | |
325 | tgt->tgt_end = 0; | |
326 | } | |
327 | ||
328 | tgt_size = 0; | |
329 | if (is_target) | |
330 | tgt_size = mapnum * sizeof (void *); | |
331 | ||
332 | tgt->array = NULL; | |
333 | if (not_found_cnt) | |
334 | { | |
335 | tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); | |
336 | splay_tree_node array = tgt->array; | |
337 | size_t j; | |
338 | ||
339 | for (i = 0; i < mapnum; i++) | |
340 | if (tgt->list[i] == NULL) | |
341 | { | |
ca4c3545 | 342 | int kind = get_kind (is_openacc, kinds, i); |
995b27b9 | 343 | if (hostaddrs[i] == NULL) |
344 | continue; | |
345 | splay_tree_key k = &array->key; | |
346 | k->host_start = (uintptr_t) hostaddrs[i]; | |
ca4c3545 | 347 | if (!GOMP_MAP_POINTER_P (kind & typemask)) |
995b27b9 | 348 | k->host_end = k->host_start + sizes[i]; |
349 | else | |
350 | k->host_end = k->host_start + sizeof (void *); | |
0d8c703d | 351 | splay_tree_key n = splay_tree_lookup (mem_map, k); |
995b27b9 | 352 | if (n) |
353 | { | |
354 | tgt->list[i] = n; | |
0d8c703d | 355 | gomp_map_vars_existing (devicep, n, k, kind & typemask); |
995b27b9 | 356 | } |
357 | else | |
358 | { | |
ca4c3545 | 359 | size_t align = (size_t) 1 << (kind >> rshift); |
995b27b9 | 360 | tgt->list[i] = k; |
361 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
362 | k->tgt = tgt; | |
363 | k->tgt_offset = tgt_size; | |
364 | tgt_size += k->host_end - k->host_start; | |
ca4c3545 | 365 | k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); |
995b27b9 | 366 | k->refcount = 1; |
ca4c3545 | 367 | k->async_refcount = 0; |
995b27b9 | 368 | tgt->refcount++; |
369 | array->left = NULL; | |
370 | array->right = NULL; | |
0d8c703d | 371 | splay_tree_insert (mem_map, array); |
ca4c3545 | 372 | switch (kind & typemask) |
995b27b9 | 373 | { |
ca4c3545 | 374 | case GOMP_MAP_ALLOC: |
375 | case GOMP_MAP_FROM: | |
376 | case GOMP_MAP_FORCE_ALLOC: | |
377 | case GOMP_MAP_FORCE_FROM: | |
995b27b9 | 378 | break; |
ca4c3545 | 379 | case GOMP_MAP_TO: |
380 | case GOMP_MAP_TOFROM: | |
381 | case GOMP_MAP_FORCE_TO: | |
382 | case GOMP_MAP_FORCE_TOFROM: | |
995b27b9 | 383 | /* FIXME: Perhaps add some smarts, like if copying |
384 | several adjacent fields from host to target, use some | |
385 | host buffer to avoid sending each var individually. */ | |
386 | devicep->host2dev_func (devicep->target_id, | |
387 | (void *) (tgt->tgt_start | |
388 | + k->tgt_offset), | |
389 | (void *) k->host_start, | |
390 | k->host_end - k->host_start); | |
391 | break; | |
ca4c3545 | 392 | case GOMP_MAP_POINTER: |
84f53106 | 393 | gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start, |
394 | k->tgt_offset, sizes[i]); | |
995b27b9 | 395 | break; |
ca4c3545 | 396 | case GOMP_MAP_TO_PSET: |
397 | /* FIXME: see above FIXME comment. */ | |
995b27b9 | 398 | devicep->host2dev_func (devicep->target_id, |
399 | (void *) (tgt->tgt_start | |
400 | + k->tgt_offset), | |
401 | (void *) k->host_start, | |
402 | k->host_end - k->host_start); | |
ca4c3545 | 403 | |
995b27b9 | 404 | for (j = i + 1; j < mapnum; j++) |
ca4c3545 | 405 | if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) |
406 | & typemask)) | |
995b27b9 | 407 | break; |
408 | else if ((uintptr_t) hostaddrs[j] < k->host_start | |
409 | || ((uintptr_t) hostaddrs[j] + sizeof (void *) | |
410 | > k->host_end)) | |
411 | break; | |
412 | else | |
413 | { | |
414 | tgt->list[j] = k; | |
415 | k->refcount++; | |
84f53106 | 416 | gomp_map_pointer (tgt, |
417 | (uintptr_t) *(void **) hostaddrs[j], | |
418 | k->tgt_offset | |
419 | + ((uintptr_t) hostaddrs[j] | |
420 | - k->host_start), | |
421 | sizes[j]); | |
995b27b9 | 422 | i++; |
423 | } | |
ca4c3545 | 424 | break; |
425 | case GOMP_MAP_FORCE_PRESENT: | |
426 | { | |
427 | /* We already looked up the memory region above and it | |
428 | was missing. */ | |
429 | size_t size = k->host_end - k->host_start; | |
0d8c703d | 430 | gomp_mutex_unlock (&devicep->lock); |
2634aed9 | 431 | #ifdef HAVE_INTTYPES_H |
432 | gomp_fatal ("present clause: !acc_is_present (%p, " | |
433 | "%"PRIu64" (0x%"PRIx64"))", | |
434 | (void *) k->host_start, | |
435 | (uint64_t) size, (uint64_t) size); | |
436 | #else | |
ca4c3545 | 437 | gomp_fatal ("present clause: !acc_is_present (%p, " |
2634aed9 | 438 | "%lu (0x%lx))", (void *) k->host_start, |
439 | (unsigned long) size, (unsigned long) size); | |
440 | #endif | |
ca4c3545 | 441 | } |
442 | break; | |
443 | case GOMP_MAP_FORCE_DEVICEPTR: | |
444 | assert (k->host_end - k->host_start == sizeof (void *)); | |
445 | ||
446 | devicep->host2dev_func (devicep->target_id, | |
447 | (void *) (tgt->tgt_start | |
448 | + k->tgt_offset), | |
449 | (void *) k->host_start, | |
450 | sizeof (void *)); | |
451 | break; | |
452 | default: | |
0d8c703d | 453 | gomp_mutex_unlock (&devicep->lock); |
ca4c3545 | 454 | gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, |
455 | kind); | |
995b27b9 | 456 | } |
457 | array++; | |
458 | } | |
459 | } | |
460 | } | |
ca4c3545 | 461 | |
995b27b9 | 462 | if (is_target) |
463 | { | |
464 | for (i = 0; i < mapnum; i++) | |
465 | { | |
466 | if (tgt->list[i] == NULL) | |
467 | cur_node.tgt_offset = (uintptr_t) NULL; | |
468 | else | |
469 | cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start | |
470 | + tgt->list[i]->tgt_offset; | |
ca4c3545 | 471 | /* FIXME: see above FIXME comment. */ |
995b27b9 | 472 | devicep->host2dev_func (devicep->target_id, |
473 | (void *) (tgt->tgt_start | |
474 | + i * sizeof (void *)), | |
475 | (void *) &cur_node.tgt_offset, | |
476 | sizeof (void *)); | |
477 | } | |
478 | } | |
479 | ||
0d8c703d | 480 | gomp_mutex_unlock (&devicep->lock); |
995b27b9 | 481 | return tgt; |
482 | } | |
483 | ||
484 | static void | |
485 | gomp_unmap_tgt (struct target_mem_desc *tgt) | |
486 | { | |
487 | /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ | |
488 | if (tgt->tgt_end) | |
489 | tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free); | |
490 | ||
491 | free (tgt->array); | |
492 | free (tgt); | |
493 | } | |
494 | ||
ca4c3545 | 495 | /* Decrease the refcount for a set of mapped variables, and queue asychronous |
496 | copies from the device back to the host after any work that has been issued. | |
497 | Because the regions are still "live", increment an asynchronous reference | |
498 | count to indicate that they should not be unmapped from host-side data | |
499 | structures until the asynchronous copy has completed. */ | |
500 | ||
501 | attribute_hidden void | |
502 | gomp_copy_from_async (struct target_mem_desc *tgt) | |
503 | { | |
504 | struct gomp_device_descr *devicep = tgt->device_descr; | |
ca4c3545 | 505 | size_t i; |
506 | ||
0d8c703d | 507 | gomp_mutex_lock (&devicep->lock); |
ca4c3545 | 508 | |
509 | for (i = 0; i < tgt->list_count; i++) | |
510 | if (tgt->list[i] == NULL) | |
511 | ; | |
512 | else if (tgt->list[i]->refcount > 1) | |
513 | { | |
514 | tgt->list[i]->refcount--; | |
515 | tgt->list[i]->async_refcount++; | |
516 | } | |
517 | else | |
518 | { | |
519 | splay_tree_key k = tgt->list[i]; | |
520 | if (k->copy_from) | |
521 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, | |
522 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
523 | k->host_end - k->host_start); | |
524 | } | |
525 | ||
0d8c703d | 526 | gomp_mutex_unlock (&devicep->lock); |
ca4c3545 | 527 | } |
528 | ||
529 | /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant | |
530 | variables back from device to host: if it is false, it is assumed that this | |
531 | has been done already, i.e. by gomp_copy_from_async above. */ | |
532 | ||
533 | attribute_hidden void | |
534 | gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) | |
995b27b9 | 535 | { |
536 | struct gomp_device_descr *devicep = tgt->device_descr; | |
537 | ||
538 | if (tgt->list_count == 0) | |
539 | { | |
540 | free (tgt); | |
541 | return; | |
542 | } | |
543 | ||
0d8c703d | 544 | gomp_mutex_lock (&devicep->lock); |
ca4c3545 | 545 | |
995b27b9 | 546 | size_t i; |
995b27b9 | 547 | for (i = 0; i < tgt->list_count; i++) |
548 | if (tgt->list[i] == NULL) | |
549 | ; | |
550 | else if (tgt->list[i]->refcount > 1) | |
551 | tgt->list[i]->refcount--; | |
ca4c3545 | 552 | else if (tgt->list[i]->async_refcount > 0) |
553 | tgt->list[i]->async_refcount--; | |
995b27b9 | 554 | else |
555 | { | |
556 | splay_tree_key k = tgt->list[i]; | |
ca4c3545 | 557 | if (k->copy_from && do_copyfrom) |
995b27b9 | 558 | devicep->dev2host_func (devicep->target_id, (void *) k->host_start, |
559 | (void *) (k->tgt->tgt_start + k->tgt_offset), | |
560 | k->host_end - k->host_start); | |
0a1fe572 | 561 | splay_tree_remove (&devicep->mem_map, k); |
995b27b9 | 562 | if (k->tgt->refcount > 1) |
563 | k->tgt->refcount--; | |
564 | else | |
565 | gomp_unmap_tgt (k->tgt); | |
566 | } | |
567 | ||
568 | if (tgt->refcount > 1) | |
569 | tgt->refcount--; | |
570 | else | |
571 | gomp_unmap_tgt (tgt); | |
ca4c3545 | 572 | |
0d8c703d | 573 | gomp_mutex_unlock (&devicep->lock); |
995b27b9 | 574 | } |
575 | ||
576 | static void | |
0d8c703d | 577 | gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, |
578 | size_t *sizes, void *kinds, bool is_openacc) | |
995b27b9 | 579 | { |
580 | size_t i; | |
581 | struct splay_tree_key_s cur_node; | |
ca4c3545 | 582 | const int typemask = is_openacc ? 0xff : 0x7; |
995b27b9 | 583 | |
584 | if (!devicep) | |
585 | return; | |
586 | ||
587 | if (mapnum == 0) | |
588 | return; | |
589 | ||
0d8c703d | 590 | gomp_mutex_lock (&devicep->lock); |
995b27b9 | 591 | for (i = 0; i < mapnum; i++) |
592 | if (sizes[i]) | |
593 | { | |
594 | cur_node.host_start = (uintptr_t) hostaddrs[i]; | |
595 | cur_node.host_end = cur_node.host_start + sizes[i]; | |
0d8c703d | 596 | splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); |
995b27b9 | 597 | if (n) |
598 | { | |
ca4c3545 | 599 | int kind = get_kind (is_openacc, kinds, i); |
995b27b9 | 600 | if (n->host_start > cur_node.host_start |
601 | || n->host_end < cur_node.host_end) | |
0d8c703d | 602 | { |
603 | gomp_mutex_unlock (&devicep->lock); | |
604 | gomp_fatal ("Trying to update [%p..%p) object when " | |
605 | "only [%p..%p) is mapped", | |
606 | (void *) cur_node.host_start, | |
607 | (void *) cur_node.host_end, | |
608 | (void *) n->host_start, | |
609 | (void *) n->host_end); | |
610 | } | |
ca4c3545 | 611 | if (GOMP_MAP_COPY_TO_P (kind & typemask)) |
995b27b9 | 612 | devicep->host2dev_func (devicep->target_id, |
613 | (void *) (n->tgt->tgt_start | |
614 | + n->tgt_offset | |
615 | + cur_node.host_start | |
616 | - n->host_start), | |
617 | (void *) cur_node.host_start, | |
618 | cur_node.host_end - cur_node.host_start); | |
ca4c3545 | 619 | if (GOMP_MAP_COPY_FROM_P (kind & typemask)) |
995b27b9 | 620 | devicep->dev2host_func (devicep->target_id, |
621 | (void *) cur_node.host_start, | |
622 | (void *) (n->tgt->tgt_start | |
623 | + n->tgt_offset | |
624 | + cur_node.host_start | |
625 | - n->host_start), | |
626 | cur_node.host_end - cur_node.host_start); | |
627 | } | |
628 | else | |
0d8c703d | 629 | { |
630 | gomp_mutex_unlock (&devicep->lock); | |
631 | gomp_fatal ("Trying to update [%p..%p) object that is not mapped", | |
632 | (void *) cur_node.host_start, | |
633 | (void *) cur_node.host_end); | |
634 | } | |
995b27b9 | 635 | } |
0d8c703d | 636 | gomp_mutex_unlock (&devicep->lock); |
637 | } | |
638 | ||
639 | /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. | |
640 | And insert to splay tree the mapping between addresses from HOST_TABLE and | |
641 | from loaded target image. */ | |
642 | ||
643 | static void | |
644 | gomp_offload_image_to_device (struct gomp_device_descr *devicep, | |
4e985e0f | 645 | const void *host_table, const void *target_data, |
0d8c703d | 646 | bool is_register_lock) |
647 | { | |
648 | void **host_func_table = ((void ***) host_table)[0]; | |
649 | void **host_funcs_end = ((void ***) host_table)[1]; | |
650 | void **host_var_table = ((void ***) host_table)[2]; | |
651 | void **host_vars_end = ((void ***) host_table)[3]; | |
652 | ||
653 | /* The func table contains only addresses, the var table contains addresses | |
654 | and corresponding sizes. */ | |
655 | int num_funcs = host_funcs_end - host_func_table; | |
656 | int num_vars = (host_vars_end - host_var_table) / 2; | |
657 | ||
658 | /* Load image to device and get target addresses for the image. */ | |
659 | struct addr_pair *target_table = NULL; | |
660 | int i, num_target_entries | |
661 | = devicep->load_image_func (devicep->target_id, target_data, &target_table); | |
662 | ||
663 | if (num_target_entries != num_funcs + num_vars) | |
664 | { | |
665 | gomp_mutex_unlock (&devicep->lock); | |
666 | if (is_register_lock) | |
667 | gomp_mutex_unlock (®ister_lock); | |
668 | gomp_fatal ("Can't map target functions or variables"); | |
669 | } | |
670 | ||
671 | /* Insert host-target address mapping into splay tree. */ | |
672 | struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); | |
673 | tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); | |
674 | tgt->refcount = 1; | |
675 | tgt->tgt_start = 0; | |
676 | tgt->tgt_end = 0; | |
677 | tgt->to_free = NULL; | |
678 | tgt->prev = NULL; | |
679 | tgt->list_count = 0; | |
680 | tgt->device_descr = devicep; | |
681 | splay_tree_node array = tgt->array; | |
682 | ||
683 | for (i = 0; i < num_funcs; i++) | |
684 | { | |
685 | splay_tree_key k = &array->key; | |
686 | k->host_start = (uintptr_t) host_func_table[i]; | |
687 | k->host_end = k->host_start + 1; | |
688 | k->tgt = tgt; | |
689 | k->tgt_offset = target_table[i].start; | |
690 | k->refcount = 1; | |
691 | k->async_refcount = 0; | |
692 | k->copy_from = false; | |
693 | array->left = NULL; | |
694 | array->right = NULL; | |
695 | splay_tree_insert (&devicep->mem_map, array); | |
696 | array++; | |
697 | } | |
698 | ||
699 | for (i = 0; i < num_vars; i++) | |
700 | { | |
701 | struct addr_pair *target_var = &target_table[num_funcs + i]; | |
702 | if (target_var->end - target_var->start | |
703 | != (uintptr_t) host_var_table[i * 2 + 1]) | |
704 | { | |
705 | gomp_mutex_unlock (&devicep->lock); | |
706 | if (is_register_lock) | |
707 | gomp_mutex_unlock (®ister_lock); | |
708 | gomp_fatal ("Can't map target variables (size mismatch)"); | |
709 | } | |
710 | ||
711 | splay_tree_key k = &array->key; | |
712 | k->host_start = (uintptr_t) host_var_table[i * 2]; | |
713 | k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; | |
714 | k->tgt = tgt; | |
715 | k->tgt_offset = target_var->start; | |
716 | k->refcount = 1; | |
717 | k->async_refcount = 0; | |
718 | k->copy_from = false; | |
719 | array->left = NULL; | |
720 | array->right = NULL; | |
721 | splay_tree_insert (&devicep->mem_map, array); | |
722 | array++; | |
723 | } | |
724 | ||
725 | free (target_table); | |
995b27b9 | 726 | } |
727 | ||
0d8c703d | 728 | /* This function should be called from every offload image while loading. |
995b27b9 | 729 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
730 | the target, and TARGET_DATA needed by target plugin. */ | |
731 | ||
732 | void | |
ff96563a | 733 | GOMP_offload_register (const void *host_table, int target_type, |
70046055 | 734 | const void *target_data) |
995b27b9 | 735 | { |
0d8c703d | 736 | int i; |
737 | gomp_mutex_lock (®ister_lock); | |
738 | ||
739 | /* Load image to all initialized devices. */ | |
740 | for (i = 0; i < num_devices; i++) | |
741 | { | |
742 | struct gomp_device_descr *devicep = &devices[i]; | |
743 | gomp_mutex_lock (&devicep->lock); | |
744 | if (devicep->type == target_type && devicep->is_initialized) | |
745 | gomp_offload_image_to_device (devicep, host_table, target_data, true); | |
746 | gomp_mutex_unlock (&devicep->lock); | |
747 | } | |
995b27b9 | 748 | |
0d8c703d | 749 | /* Insert image to array of pending images. */ |
750 | offload_images | |
751 | = gomp_realloc_unlock (offload_images, | |
752 | (num_offload_images + 1) | |
753 | * sizeof (struct offload_image_descr)); | |
995b27b9 | 754 | offload_images[num_offload_images].type = target_type; |
755 | offload_images[num_offload_images].host_table = host_table; | |
756 | offload_images[num_offload_images].target_data = target_data; | |
757 | ||
758 | num_offload_images++; | |
0d8c703d | 759 | gomp_mutex_unlock (®ister_lock); |
995b27b9 | 760 | } |
761 | ||
0d8c703d | 762 | /* This function should be called from every offload image while unloading. |
763 | It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of | |
764 | the target, and TARGET_DATA needed by target plugin. */ | |
995b27b9 | 765 | |
0d8c703d | 766 | void |
ff96563a | 767 | GOMP_offload_unregister (const void *host_table, int target_type, |
70046055 | 768 | const void *target_data) |
995b27b9 | 769 | { |
0d8c703d | 770 | void **host_func_table = ((void ***) host_table)[0]; |
771 | void **host_funcs_end = ((void ***) host_table)[1]; | |
772 | void **host_var_table = ((void ***) host_table)[2]; | |
773 | void **host_vars_end = ((void ***) host_table)[3]; | |
774 | int i; | |
775 | ||
776 | /* The func table contains only addresses, the var table contains addresses | |
777 | and corresponding sizes. */ | |
778 | int num_funcs = host_funcs_end - host_func_table; | |
779 | int num_vars = (host_vars_end - host_var_table) / 2; | |
780 | ||
781 | gomp_mutex_lock (®ister_lock); | |
782 | ||
783 | /* Unload image from all initialized devices. */ | |
784 | for (i = 0; i < num_devices; i++) | |
785 | { | |
786 | int j; | |
787 | struct gomp_device_descr *devicep = &devices[i]; | |
788 | gomp_mutex_lock (&devicep->lock); | |
789 | if (devicep->type != target_type || !devicep->is_initialized) | |
790 | { | |
791 | gomp_mutex_unlock (&devicep->lock); | |
792 | continue; | |
793 | } | |
794 | ||
795 | devicep->unload_image_func (devicep->target_id, target_data); | |
796 | ||
797 | /* Remove mapping from splay tree. */ | |
798 | struct splay_tree_key_s k; | |
799 | splay_tree_key node = NULL; | |
800 | if (num_funcs > 0) | |
801 | { | |
802 | k.host_start = (uintptr_t) host_func_table[0]; | |
803 | k.host_end = k.host_start + 1; | |
804 | node = splay_tree_lookup (&devicep->mem_map, &k); | |
805 | } | |
806 | else if (num_vars > 0) | |
807 | { | |
808 | k.host_start = (uintptr_t) host_var_table[0]; | |
809 | k.host_end = k.host_start + (uintptr_t) host_var_table[1]; | |
810 | node = splay_tree_lookup (&devicep->mem_map, &k); | |
811 | } | |
812 | ||
813 | for (j = 0; j < num_funcs; j++) | |
814 | { | |
815 | k.host_start = (uintptr_t) host_func_table[j]; | |
816 | k.host_end = k.host_start + 1; | |
817 | splay_tree_remove (&devicep->mem_map, &k); | |
818 | } | |
819 | ||
820 | for (j = 0; j < num_vars; j++) | |
821 | { | |
822 | k.host_start = (uintptr_t) host_var_table[j * 2]; | |
823 | k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1]; | |
824 | splay_tree_remove (&devicep->mem_map, &k); | |
825 | } | |
826 | ||
827 | if (node) | |
828 | { | |
829 | free (node->tgt); | |
830 | free (node); | |
831 | } | |
832 | ||
833 | gomp_mutex_unlock (&devicep->lock); | |
834 | } | |
835 | ||
836 | /* Remove image from array of pending images. */ | |
837 | for (i = 0; i < num_offload_images; i++) | |
838 | if (offload_images[i].target_data == target_data) | |
839 | { | |
840 | offload_images[i] = offload_images[--num_offload_images]; | |
841 | break; | |
842 | } | |
843 | ||
844 | gomp_mutex_unlock (®ister_lock); | |
ca4c3545 | 845 | } |
995b27b9 | 846 | |
0d8c703d | 847 | /* This function initializes the target device, specified by DEVICEP. DEVICEP |
848 | must be locked on entry, and remains locked on return. */ | |
ca4c3545 | 849 | |
850 | attribute_hidden void | |
0d8c703d | 851 | gomp_init_device (struct gomp_device_descr *devicep) |
ca4c3545 | 852 | { |
995b27b9 | 853 | int i; |
0d8c703d | 854 | devicep->init_device_func (devicep->target_id); |
855 | ||
856 | /* Load to device all images registered by the moment. */ | |
857 | for (i = 0; i < num_offload_images; i++) | |
995b27b9 | 858 | { |
0d8c703d | 859 | struct offload_image_descr *image = &offload_images[i]; |
860 | if (image->type == devicep->type) | |
861 | gomp_offload_image_to_device (devicep, image->host_table, | |
862 | image->target_data, false); | |
995b27b9 | 863 | } |
864 | ||
0d8c703d | 865 | devicep->is_initialized = true; |
ca4c3545 | 866 | } |
867 | ||
868 | /* Free address mapping tables. MM must be locked on entry, and remains locked | |
869 | on return. */ | |
870 | ||
871 | attribute_hidden void | |
0d8c703d | 872 | gomp_free_memmap (struct splay_tree_s *mem_map) |
ca4c3545 | 873 | { |
0d8c703d | 874 | while (mem_map->root) |
ca4c3545 | 875 | { |
0d8c703d | 876 | struct target_mem_desc *tgt = mem_map->root->key.tgt; |
ca4c3545 | 877 | |
0d8c703d | 878 | splay_tree_remove (mem_map, &mem_map->root->key); |
ca4c3545 | 879 | free (tgt->array); |
880 | free (tgt); | |
881 | } | |
ca4c3545 | 882 | } |
883 | ||
884 | /* This function de-initializes the target device, specified by DEVICEP. | |
885 | DEVICEP must be locked on entry, and remains locked on return. */ | |
886 | ||
887 | attribute_hidden void | |
888 | gomp_fini_device (struct gomp_device_descr *devicep) | |
889 | { | |
890 | if (devicep->is_initialized) | |
891 | devicep->fini_device_func (devicep->target_id); | |
892 | ||
893 | devicep->is_initialized = false; | |
bc7bff74 | 894 | } |
895 | ||
896 | /* Called when encountering a target directive. If DEVICE | |
ca4c3545 | 897 | is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
898 | GOMP_DEVICE_HOST_FALLBACK (or any value | |
899 | larger than last available hw device), use host fallback. | |
dc19c8fd | 900 | FN is address of host code, UNUSED is part of the current ABI, but |
901 | we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays | |
bc7bff74 | 902 | with MAPNUM entries, with addresses of the host objects, |
903 | sizes of the host objects (resp. for pointer kind pointer bias | |
904 | and assumed sizeof (void *) size) and kinds. */ | |
905 | ||
906 | void | |
dc19c8fd | 907 | GOMP_target (int device, void (*fn) (void *), const void *unused, |
bc7bff74 | 908 | size_t mapnum, void **hostaddrs, size_t *sizes, |
909 | unsigned char *kinds) | |
910 | { | |
995b27b9 | 911 | struct gomp_device_descr *devicep = resolve_device (device); |
ca4c3545 | 912 | |
913 | if (devicep == NULL | |
914 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
995b27b9 | 915 | { |
916 | /* Host fallback. */ | |
917 | struct gomp_thread old_thr, *thr = gomp_thread (); | |
918 | old_thr = *thr; | |
919 | memset (thr, '\0', sizeof (*thr)); | |
920 | if (gomp_places_list) | |
921 | { | |
922 | thr->place = old_thr.place; | |
923 | thr->ts.place_partition_len = gomp_places_list_len; | |
924 | } | |
925 | fn (hostaddrs); | |
926 | gomp_free_thread (thr); | |
927 | *thr = old_thr; | |
928 | return; | |
929 | } | |
930 | ||
ca4c3545 | 931 | gomp_mutex_lock (&devicep->lock); |
995b27b9 | 932 | if (!devicep->is_initialized) |
933 | gomp_init_device (devicep); | |
ca4c3545 | 934 | gomp_mutex_unlock (&devicep->lock); |
935 | ||
936 | void *fn_addr; | |
937 | ||
938 | if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) | |
939 | fn_addr = (void *) fn; | |
940 | else | |
941 | { | |
0d8c703d | 942 | gomp_mutex_lock (&devicep->lock); |
ca4c3545 | 943 | struct splay_tree_key_s k; |
944 | k.host_start = (uintptr_t) fn; | |
945 | k.host_end = k.host_start + 1; | |
0d8c703d | 946 | splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); |
ca4c3545 | 947 | if (tgt_fn == NULL) |
0d8c703d | 948 | { |
949 | gomp_mutex_unlock (&devicep->lock); | |
950 | gomp_fatal ("Target function wasn't mapped"); | |
951 | } | |
952 | gomp_mutex_unlock (&devicep->lock); | |
ca4c3545 | 953 | |
0d8c703d | 954 | fn_addr = (void *) tgt_fn->tgt_offset; |
ca4c3545 | 955 | } |
995b27b9 | 956 | |
957 | struct target_mem_desc *tgt_vars | |
ca4c3545 | 958 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
959 | true); | |
bc7bff74 | 960 | struct gomp_thread old_thr, *thr = gomp_thread (); |
961 | old_thr = *thr; | |
962 | memset (thr, '\0', sizeof (*thr)); | |
963 | if (gomp_places_list) | |
964 | { | |
965 | thr->place = old_thr.place; | |
966 | thr->ts.place_partition_len = gomp_places_list_len; | |
967 | } | |
ca4c3545 | 968 | devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); |
bc7bff74 | 969 | gomp_free_thread (thr); |
970 | *thr = old_thr; | |
ca4c3545 | 971 | gomp_unmap_vars (tgt_vars, true); |
bc7bff74 | 972 | } |
973 | ||
974 | void | |
dc19c8fd | 975 | GOMP_target_data (int device, const void *unused, size_t mapnum, |
bc7bff74 | 976 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
977 | { | |
995b27b9 | 978 | struct gomp_device_descr *devicep = resolve_device (device); |
ca4c3545 | 979 | |
980 | if (devicep == NULL | |
981 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
995b27b9 | 982 | { |
983 | /* Host fallback. */ | |
984 | struct gomp_task_icv *icv = gomp_icv (false); | |
985 | if (icv->target_data) | |
986 | { | |
987 | /* Even when doing a host fallback, if there are any active | |
988 | #pragma omp target data constructs, need to remember the | |
989 | new #pragma omp target data, otherwise GOMP_target_end_data | |
990 | would get out of sync. */ | |
991 | struct target_mem_desc *tgt | |
ca4c3545 | 992 | = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); |
995b27b9 | 993 | tgt->prev = icv->target_data; |
994 | icv->target_data = tgt; | |
995 | } | |
996 | return; | |
997 | } | |
998 | ||
ca4c3545 | 999 | gomp_mutex_lock (&devicep->lock); |
995b27b9 | 1000 | if (!devicep->is_initialized) |
1001 | gomp_init_device (devicep); | |
ca4c3545 | 1002 | gomp_mutex_unlock (&devicep->lock); |
1003 | ||
995b27b9 | 1004 | struct target_mem_desc *tgt |
ca4c3545 | 1005 | = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
1006 | false); | |
995b27b9 | 1007 | struct gomp_task_icv *icv = gomp_icv (true); |
1008 | tgt->prev = icv->target_data; | |
1009 | icv->target_data = tgt; | |
bc7bff74 | 1010 | } |
1011 | ||
1012 | void | |
1013 | GOMP_target_end_data (void) | |
1014 | { | |
995b27b9 | 1015 | struct gomp_task_icv *icv = gomp_icv (false); |
1016 | if (icv->target_data) | |
1017 | { | |
1018 | struct target_mem_desc *tgt = icv->target_data; | |
1019 | icv->target_data = tgt->prev; | |
ca4c3545 | 1020 | gomp_unmap_vars (tgt, true); |
995b27b9 | 1021 | } |
bc7bff74 | 1022 | } |
1023 | ||
1024 | void | |
dc19c8fd | 1025 | GOMP_target_update (int device, const void *unused, size_t mapnum, |
bc7bff74 | 1026 | void **hostaddrs, size_t *sizes, unsigned char *kinds) |
1027 | { | |
995b27b9 | 1028 | struct gomp_device_descr *devicep = resolve_device (device); |
ca4c3545 | 1029 | |
1030 | if (devicep == NULL | |
1031 | || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
995b27b9 | 1032 | return; |
1033 | ||
ca4c3545 | 1034 | gomp_mutex_lock (&devicep->lock); |
995b27b9 | 1035 | if (!devicep->is_initialized) |
1036 | gomp_init_device (devicep); | |
ca4c3545 | 1037 | gomp_mutex_unlock (&devicep->lock); |
1038 | ||
0d8c703d | 1039 | gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); |
bc7bff74 | 1040 | } |
1041 | ||
1042 | void | |
1043 | GOMP_teams (unsigned int num_teams, unsigned int thread_limit) | |
1044 | { | |
1045 | if (thread_limit) | |
1046 | { | |
1047 | struct gomp_task_icv *icv = gomp_icv (true); | |
1048 | icv->thread_limit_var | |
1049 | = thread_limit > INT_MAX ? UINT_MAX : thread_limit; | |
1050 | } | |
1051 | (void) num_teams; | |
1052 | } | |
995b27b9 | 1053 | |
1054 | #ifdef PLUGIN_SUPPORT | |
1055 | ||
1056 | /* This function tries to load a plugin for DEVICE. Name of plugin is passed | |
1057 | in PLUGIN_NAME. | |
1058 | The handles of the found functions are stored in the corresponding fields | |
1059 | of DEVICE. The function returns TRUE on success and FALSE otherwise. */ | |
1060 | ||
1061 | static bool | |
1062 | gomp_load_plugin_for_device (struct gomp_device_descr *device, | |
1063 | const char *plugin_name) | |
1064 | { | |
c6aa02c6 | 1065 | const char *err = NULL, *last_missing = NULL; |
ca4c3545 | 1066 | int optional_present, optional_total; |
1067 | ||
1068 | /* Clear any existing error. */ | |
1069 | dlerror (); | |
1070 | ||
995b27b9 | 1071 | void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); |
1072 | if (!plugin_handle) | |
ca4c3545 | 1073 | { |
1074 | err = dlerror (); | |
1075 | goto out; | |
1076 | } | |
995b27b9 | 1077 | |
1078 | /* Check if all required functions are available in the plugin and store | |
1079 | their handlers. */ | |
ca4c3545 | 1080 | #define DLSYM(f) \ |
1081 | do \ | |
1082 | { \ | |
1083 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \ | |
1084 | err = dlerror (); \ | |
1085 | if (err != NULL) \ | |
1086 | goto out; \ | |
1087 | } \ | |
995b27b9 | 1088 | while (0) |
ca4c3545 | 1089 | /* Similar, but missing functions are not an error. */ |
1090 | #define DLSYM_OPT(f, n) \ | |
1091 | do \ | |
1092 | { \ | |
c6aa02c6 | 1093 | const char *tmp_err; \ |
ca4c3545 | 1094 | device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ |
1095 | tmp_err = dlerror (); \ | |
1096 | if (tmp_err == NULL) \ | |
1097 | optional_present++; \ | |
1098 | else \ | |
1099 | last_missing = #n; \ | |
1100 | optional_total++; \ | |
1101 | } \ | |
1102 | while (0) | |
1103 | ||
1104 | DLSYM (get_name); | |
1105 | DLSYM (get_caps); | |
995b27b9 | 1106 | DLSYM (get_type); |
1107 | DLSYM (get_num_devices); | |
995b27b9 | 1108 | DLSYM (init_device); |
ca4c3545 | 1109 | DLSYM (fini_device); |
0d8c703d | 1110 | DLSYM (load_image); |
1111 | DLSYM (unload_image); | |
995b27b9 | 1112 | DLSYM (alloc); |
1113 | DLSYM (free); | |
1114 | DLSYM (dev2host); | |
1115 | DLSYM (host2dev); | |
ca4c3545 | 1116 | device->capabilities = device->get_caps_func (); |
1117 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
1118 | DLSYM (run); | |
1119 | if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
1120 | { | |
1121 | optional_present = optional_total = 0; | |
1122 | DLSYM_OPT (openacc.exec, openacc_parallel); | |
ca4c3545 | 1123 | DLSYM_OPT (openacc.register_async_cleanup, |
1124 | openacc_register_async_cleanup); | |
1125 | DLSYM_OPT (openacc.async_test, openacc_async_test); | |
1126 | DLSYM_OPT (openacc.async_test_all, openacc_async_test_all); | |
1127 | DLSYM_OPT (openacc.async_wait, openacc_async_wait); | |
1128 | DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async); | |
1129 | DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all); | |
1130 | DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async); | |
1131 | DLSYM_OPT (openacc.async_set_async, openacc_async_set_async); | |
1132 | DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data); | |
1133 | DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data); | |
1134 | /* Require all the OpenACC handlers if we have | |
1135 | GOMP_OFFLOAD_CAP_OPENACC_200. */ | |
1136 | if (optional_present != optional_total) | |
1137 | { | |
1138 | err = "plugin missing OpenACC handler function"; | |
1139 | goto out; | |
1140 | } | |
1141 | optional_present = optional_total = 0; | |
1142 | DLSYM_OPT (openacc.cuda.get_current_device, | |
1143 | openacc_get_current_cuda_device); | |
1144 | DLSYM_OPT (openacc.cuda.get_current_context, | |
1145 | openacc_get_current_cuda_context); | |
1146 | DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream); | |
1147 | DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream); | |
1148 | /* Make sure all the CUDA functions are there if any of them are. */ | |
1149 | if (optional_present && optional_present != optional_total) | |
1150 | { | |
1151 | err = "plugin missing OpenACC CUDA handler function"; | |
1152 | goto out; | |
1153 | } | |
1154 | } | |
995b27b9 | 1155 | #undef DLSYM |
ca4c3545 | 1156 | #undef DLSYM_OPT |
995b27b9 | 1157 | |
ca4c3545 | 1158 | out: |
1159 | if (err != NULL) | |
1160 | { | |
1161 | gomp_error ("while loading %s: %s", plugin_name, err); | |
1162 | if (last_missing) | |
1163 | gomp_error ("missing function was %s", last_missing); | |
1164 | if (plugin_handle) | |
1165 | dlclose (plugin_handle); | |
1166 | } | |
1167 | return err == NULL; | |
995b27b9 | 1168 | } |
1169 | ||
995b27b9 | 1170 | /* This function initializes the runtime needed for offloading. |
ca4c3545 | 1171 | It parses the list of offload targets and tries to load the plugins for |
1172 | these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP | |
1173 | will be set, and the array DEVICES initialized, containing descriptors for | |
1174 | corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows | |
1175 | by the others. */ | |
995b27b9 | 1176 | |
1177 | static void | |
1178 | gomp_target_init (void) | |
1179 | { | |
1180 | const char *prefix ="libgomp-plugin-"; | |
4fda895a | 1181 | const char *suffix = SONAME_SUFFIX (1); |
995b27b9 | 1182 | const char *cur, *next; |
1183 | char *plugin_name; | |
1184 | int i, new_num_devices; | |
1185 | ||
1186 | num_devices = 0; | |
1187 | devices = NULL; | |
1188 | ||
1189 | cur = OFFLOAD_TARGETS; | |
1190 | if (*cur) | |
1191 | do | |
1192 | { | |
1193 | struct gomp_device_descr current_device; | |
1194 | ||
1195 | next = strchr (cur, ','); | |
1196 | ||
1197 | plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur)) | |
1198 | + strlen (prefix) + strlen (suffix)); | |
1199 | if (!plugin_name) | |
1200 | { | |
1201 | num_devices = 0; | |
1202 | break; | |
1203 | } | |
1204 | ||
1205 | strcpy (plugin_name, prefix); | |
1206 | strncat (plugin_name, cur, next ? next - cur : strlen (cur)); | |
1207 | strcat (plugin_name, suffix); | |
1208 | ||
1209 | if (gomp_load_plugin_for_device (¤t_device, plugin_name)) | |
1210 | { | |
1211 | new_num_devices = current_device.get_num_devices_func (); | |
1212 | if (new_num_devices >= 1) | |
1213 | { | |
ca4c3545 | 1214 | /* Augment DEVICES and NUM_DEVICES. */ |
1215 | ||
995b27b9 | 1216 | devices = realloc (devices, (num_devices + new_num_devices) |
1217 | * sizeof (struct gomp_device_descr)); | |
1218 | if (!devices) | |
1219 | { | |
1220 | num_devices = 0; | |
1221 | free (plugin_name); | |
1222 | break; | |
1223 | } | |
1224 | ||
ca4c3545 | 1225 | current_device.name = current_device.get_name_func (); |
1226 | /* current_device.capabilities has already been set. */ | |
995b27b9 | 1227 | current_device.type = current_device.get_type_func (); |
0d8c703d | 1228 | current_device.mem_map.root = NULL; |
995b27b9 | 1229 | current_device.is_initialized = false; |
ca4c3545 | 1230 | current_device.openacc.data_environ = NULL; |
995b27b9 | 1231 | for (i = 0; i < new_num_devices; i++) |
1232 | { | |
995b27b9 | 1233 | current_device.target_id = i; |
1234 | devices[num_devices] = current_device; | |
ca4c3545 | 1235 | gomp_mutex_init (&devices[num_devices].lock); |
995b27b9 | 1236 | num_devices++; |
1237 | } | |
1238 | } | |
1239 | } | |
1240 | ||
1241 | free (plugin_name); | |
1242 | cur = next + 1; | |
1243 | } | |
1244 | while (next); | |
1245 | ||
ca4c3545 | 1246 | /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set |
1247 | NUM_DEVICES_OPENMP. */ | |
1248 | struct gomp_device_descr *devices_s | |
1249 | = malloc (num_devices * sizeof (struct gomp_device_descr)); | |
1250 | if (!devices_s) | |
1251 | { | |
1252 | num_devices = 0; | |
1253 | free (devices); | |
1254 | devices = NULL; | |
1255 | } | |
1256 | num_devices_openmp = 0; | |
1257 | for (i = 0; i < num_devices; i++) | |
1258 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) | |
1259 | devices_s[num_devices_openmp++] = devices[i]; | |
1260 | int num_devices_after_openmp = num_devices_openmp; | |
1261 | for (i = 0; i < num_devices; i++) | |
1262 | if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
1263 | devices_s[num_devices_after_openmp++] = devices[i]; | |
1264 | free (devices); | |
1265 | devices = devices_s; | |
1266 | ||
1267 | for (i = 0; i < num_devices; i++) | |
1268 | { | |
ca4c3545 | 1269 | /* The 'devices' array can be moved (by the realloc call) until we have |
1270 | found all the plugins, so registering with the OpenACC runtime (which | |
1271 | takes a copy of the pointer argument) must be delayed until now. */ | |
1272 | if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) | |
1273 | goacc_register (&devices[i]); | |
1274 | } | |
995b27b9 | 1275 | } |
1276 | ||
1277 | #else /* PLUGIN_SUPPORT */ | |
1278 | /* If dlfcn.h is unavailable we always fallback to host execution. | |
1279 | GOMP_target* routines are just stubs for this case. */ | |
1280 | static void | |
1281 | gomp_target_init (void) | |
1282 | { | |
1283 | } | |
1284 | #endif /* PLUGIN_SUPPORT */ |