]>
Commit | Line | Data |
---|---|---|
b2b40051 MJ |
1 | /* Plugin for HSAIL execution. |
2 | ||
cbe34bb5 | 3 | Copyright (C) 2013-2017 Free Software Foundation, Inc. |
b2b40051 MJ |
4 | |
5 | Contributed by Martin Jambor <mjambor@suse.cz> and | |
6 | Martin Liska <mliska@suse.cz>. | |
7 | ||
8 | This file is part of the GNU Offloading and Multi Processing Library | |
9 | (libgomp). | |
10 | ||
11 | Libgomp is free software; you can redistribute it and/or modify it | |
12 | under the terms of the GNU General Public License as published by | |
13 | the Free Software Foundation; either version 3, or (at your option) | |
14 | any later version. | |
15 | ||
16 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
17 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
18 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
19 | more details. | |
20 | ||
21 | Under Section 7 of GPL version 3, you are granted additional | |
22 | permissions described in the GCC Runtime Library Exception, version | |
23 | 3.1, as published by the Free Software Foundation. | |
24 | ||
25 | You should have received a copy of the GNU General Public License and | |
26 | a copy of the GCC Runtime Library Exception along with this program; | |
27 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
28 | <http://www.gnu.org/licenses/>. */ | |
29 | ||
b8d89b03 | 30 | #include "config.h" |
b2b40051 MJ |
31 | #include <stdio.h> |
32 | #include <stdlib.h> | |
33 | #include <string.h> | |
34 | #include <pthread.h> | |
b8d89b03 ML |
35 | #include <inttypes.h> |
36 | #include <stdbool.h> | |
37 | #include <plugin/hsa.h> | |
38 | #include <plugin/hsa_ext_finalize.h> | |
b2b40051 MJ |
39 | #include <dlfcn.h> |
40 | #include "libgomp-plugin.h" | |
41 | #include "gomp-constants.h" | |
42 | ||
b8d89b03 ML |
43 | /* Secure getenv() which returns NULL if running as SUID/SGID. */ |
44 | #ifndef HAVE_SECURE_GETENV | |
45 | #ifdef HAVE___SECURE_GETENV | |
46 | #define secure_getenv __secure_getenv | |
47 | #elif defined (HAVE_UNISTD_H) && defined(HAVE_GETUID) && defined(HAVE_GETEUID) \ | |
48 | && defined(HAVE_GETGID) && defined(HAVE_GETEGID) | |
49 | ||
50 | #include <unistd.h> | |
51 | ||
52 | /* Implementation of secure_getenv() for targets where it is not provided but | |
53 | we have at least means to test real and effective IDs. */ | |
54 | ||
55 | static char * | |
56 | secure_getenv (const char *name) | |
57 | { | |
58 | if ((getuid () == geteuid ()) && (getgid () == getegid ())) | |
59 | return getenv (name); | |
60 | else | |
61 | return NULL; | |
62 | } | |
63 | ||
64 | #else | |
65 | #define secure_getenv getenv | |
66 | #endif | |
67 | #endif | |
68 | ||
69 | /* As an HSA runtime is dlopened, following structure defines function | |
70 | pointers utilized by the HSA plug-in. */ | |
71 | ||
72 | struct hsa_runtime_fn_info | |
73 | { | |
74 | /* HSA runtime. */ | |
75 | hsa_status_t (*hsa_status_string_fn) (hsa_status_t status, | |
76 | const char **status_string); | |
77 | hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent, | |
78 | hsa_agent_info_t attribute, | |
79 | void *value); | |
80 | hsa_status_t (*hsa_init_fn) (void); | |
81 | hsa_status_t (*hsa_iterate_agents_fn) | |
82 | (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data); | |
83 | hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region, | |
84 | hsa_region_info_t attribute, | |
85 | void *value); | |
86 | hsa_status_t (*hsa_queue_create_fn) | |
87 | (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, | |
88 | void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), | |
89 | void *data, uint32_t private_segment_size, | |
90 | uint32_t group_segment_size, hsa_queue_t **queue); | |
91 | hsa_status_t (*hsa_agent_iterate_regions_fn) | |
92 | (hsa_agent_t agent, | |
93 | hsa_status_t (*callback)(hsa_region_t region, void *data), void *data); | |
94 | hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable); | |
95 | hsa_status_t (*hsa_executable_create_fn) | |
96 | (hsa_profile_t profile, hsa_executable_state_t executable_state, | |
97 | const char *options, hsa_executable_t *executable); | |
98 | hsa_status_t (*hsa_executable_global_variable_define_fn) | |
99 | (hsa_executable_t executable, const char *variable_name, void *address); | |
100 | hsa_status_t (*hsa_executable_load_code_object_fn) | |
101 | (hsa_executable_t executable, hsa_agent_t agent, | |
102 | hsa_code_object_t code_object, const char *options); | |
103 | hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable, | |
104 | const char *options); | |
105 | hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value, | |
106 | uint32_t num_consumers, | |
107 | const hsa_agent_t *consumers, | |
108 | hsa_signal_t *signal); | |
109 | hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size, | |
110 | void **ptr); | |
111 | hsa_status_t (*hsa_memory_free_fn) (void *ptr); | |
112 | hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal); | |
113 | hsa_status_t (*hsa_executable_get_symbol_fn) | |
114 | (hsa_executable_t executable, const char *module_name, | |
115 | const char *symbol_name, hsa_agent_t agent, int32_t call_convention, | |
116 | hsa_executable_symbol_t *symbol); | |
117 | hsa_status_t (*hsa_executable_symbol_get_info_fn) | |
118 | (hsa_executable_symbol_t executable_symbol, | |
119 | hsa_executable_symbol_info_t attribute, void *value); | |
120 | uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue, | |
121 | uint64_t value); | |
122 | uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue); | |
123 | void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal, | |
124 | hsa_signal_value_t value); | |
125 | void (*hsa_signal_store_release_fn) (hsa_signal_t signal, | |
126 | hsa_signal_value_t value); | |
127 | hsa_signal_value_t (*hsa_signal_wait_acquire_fn) | |
128 | (hsa_signal_t signal, hsa_signal_condition_t condition, | |
129 | hsa_signal_value_t compare_value, uint64_t timeout_hint, | |
130 | hsa_wait_state_t wait_state_hint); | |
131 | hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal); | |
132 | hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue); | |
133 | ||
134 | /* HSA finalizer. */ | |
135 | hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program, | |
136 | hsa_ext_module_t module); | |
137 | hsa_status_t (*hsa_ext_program_create_fn) | |
138 | (hsa_machine_model_t machine_model, hsa_profile_t profile, | |
139 | hsa_default_float_rounding_mode_t default_float_rounding_mode, | |
140 | const char *options, hsa_ext_program_t *program); | |
141 | hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program); | |
142 | hsa_status_t (*hsa_ext_program_finalize_fn) | |
143 | (hsa_ext_program_t program,hsa_isa_t isa, | |
144 | int32_t call_convention, hsa_ext_control_directives_t control_directives, | |
145 | const char *options, hsa_code_object_type_t code_object_type, | |
146 | hsa_code_object_t *code_object); | |
147 | }; | |
148 | ||
149 | /* HSA runtime functions that are initialized in init_hsa_context. */ | |
150 | ||
151 | static struct hsa_runtime_fn_info hsa_fns; | |
152 | ||
b2b40051 MJ |
153 | /* Keep the following GOMP prefixed structures in sync with respective parts of |
154 | the compiler. */ | |
155 | ||
156 | /* Structure describing the run-time and grid properties of an HSA kernel | |
157 | lauch. */ | |
158 | ||
159 | struct GOMP_kernel_launch_attributes | |
160 | { | |
161 | /* Number of dimensions the workload has. Maximum number is 3. */ | |
162 | uint32_t ndim; | |
163 | /* Size of the grid in the three respective dimensions. */ | |
164 | uint32_t gdims[3]; | |
165 | /* Size of work-groups in the respective dimensions. */ | |
166 | uint32_t wdims[3]; | |
167 | }; | |
168 | ||
169 | /* Collection of information needed for a dispatch of a kernel from a | |
170 | kernel. */ | |
171 | ||
172 | struct GOMP_hsa_kernel_dispatch | |
173 | { | |
174 | /* Pointer to a command queue associated with a kernel dispatch agent. */ | |
175 | void *queue; | |
176 | /* Pointer to reserved memory for OMP data struct copying. */ | |
177 | void *omp_data_memory; | |
178 | /* Pointer to a memory space used for kernel arguments passing. */ | |
179 | void *kernarg_address; | |
180 | /* Kernel object. */ | |
181 | uint64_t object; | |
182 | /* Synchronization signal used for dispatch synchronization. */ | |
183 | uint64_t signal; | |
184 | /* Private segment size. */ | |
185 | uint32_t private_segment_size; | |
186 | /* Group segment size. */ | |
187 | uint32_t group_segment_size; | |
188 | /* Number of children kernel dispatches. */ | |
189 | uint64_t kernel_dispatch_count; | |
190 | /* Debug purpose argument. */ | |
191 | uint64_t debug; | |
192 | /* Levels-var ICV. */ | |
193 | uint64_t omp_level; | |
194 | /* Kernel dispatch structures created for children kernel dispatches. */ | |
195 | struct GOMP_hsa_kernel_dispatch **children_dispatches; | |
196 | /* Number of threads. */ | |
197 | uint32_t omp_num_threads; | |
198 | }; | |
199 | ||
200 | /* Part of the libgomp plugin interface. Return the name of the accelerator, | |
201 | which is "hsa". */ | |
202 | ||
203 | const char * | |
204 | GOMP_OFFLOAD_get_name (void) | |
205 | { | |
206 | return "hsa"; | |
207 | } | |
208 | ||
209 | /* Part of the libgomp plugin interface. Return the specific capabilities the | |
210 | HSA accelerator have. */ | |
211 | ||
212 | unsigned int | |
213 | GOMP_OFFLOAD_get_caps (void) | |
214 | { | |
215 | return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400; | |
216 | } | |
217 | ||
218 | /* Part of the libgomp plugin interface. Identify as HSA accelerator. */ | |
219 | ||
220 | int | |
221 | GOMP_OFFLOAD_get_type (void) | |
222 | { | |
223 | return OFFLOAD_TARGET_TYPE_HSA; | |
224 | } | |
225 | ||
226 | /* Return the libgomp version number we're compatible with. There is | |
227 | no requirement for cross-version compatibility. */ | |
228 | ||
229 | unsigned | |
230 | GOMP_OFFLOAD_version (void) | |
231 | { | |
232 | return GOMP_VERSION; | |
233 | } | |
234 | ||
235 | /* Flag to decide whether print to stderr information about what is going on. | |
236 | Set in init_debug depending on environment variables. */ | |
237 | ||
238 | static bool debug; | |
239 | ||
240 | /* Flag to decide if the runtime should suppress a possible fallback to host | |
241 | execution. */ | |
242 | ||
243 | static bool suppress_host_fallback; | |
244 | ||
b8d89b03 ML |
245 | /* Flag to locate HSA runtime shared library that is dlopened |
246 | by this plug-in. */ | |
247 | ||
248 | static const char *hsa_runtime_lib; | |
249 | ||
250 | /* Flag to decide if the runtime should support also CPU devices (can be | |
251 | a simulator). */ | |
252 | ||
253 | static bool support_cpu_devices; | |
254 | ||
b2b40051 MJ |
255 | /* Initialize debug and suppress_host_fallback according to the environment. */ |
256 | ||
257 | static void | |
258 | init_enviroment_variables (void) | |
259 | { | |
b8d89b03 | 260 | if (secure_getenv ("HSA_DEBUG")) |
b2b40051 MJ |
261 | debug = true; |
262 | else | |
263 | debug = false; | |
264 | ||
b8d89b03 | 265 | if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK")) |
b2b40051 MJ |
266 | suppress_host_fallback = true; |
267 | else | |
268 | suppress_host_fallback = false; | |
b8d89b03 ML |
269 | |
270 | hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB"); | |
271 | if (hsa_runtime_lib == NULL) | |
272 | hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so"; | |
273 | ||
274 | support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES"); | |
b2b40051 MJ |
275 | } |
276 | ||
277 | /* Print a logging message with PREFIX to stderr if HSA_DEBUG value | |
278 | is set to true. */ | |
279 | ||
280 | #define HSA_LOG(prefix, ...) \ | |
281 | do \ | |
282 | { \ | |
283 | if (debug) \ | |
284 | { \ | |
285 | fprintf (stderr, prefix); \ | |
286 | fprintf (stderr, __VA_ARGS__); \ | |
287 | } \ | |
288 | } \ | |
289 | while (false); | |
290 | ||
291 | /* Print a debugging message to stderr. */ | |
292 | ||
293 | #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__) | |
294 | ||
295 | /* Print a warning message to stderr. */ | |
296 | ||
297 | #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__) | |
298 | ||
299 | /* Print HSA warning STR with an HSA STATUS code. */ | |
300 | ||
301 | static void | |
302 | hsa_warn (const char *str, hsa_status_t status) | |
303 | { | |
304 | if (!debug) | |
305 | return; | |
306 | ||
6ce13072 | 307 | const char *hsa_error_msg; |
b8d89b03 | 308 | hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); |
b2b40051 | 309 | |
6ce13072 | 310 | fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg); |
b2b40051 MJ |
311 | } |
312 | ||
313 | /* Report a fatal error STR together with the HSA error corresponding to STATUS | |
314 | and terminate execution of the current process. */ | |
315 | ||
316 | static void | |
317 | hsa_fatal (const char *str, hsa_status_t status) | |
318 | { | |
6ce13072 | 319 | const char *hsa_error_msg; |
b8d89b03 | 320 | hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); |
b2b40051 | 321 | GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str, |
6ce13072 CLT |
322 | hsa_error_msg); |
323 | } | |
324 | ||
325 | /* Like hsa_fatal, except only report error message, and return FALSE | |
326 | for propagating error processing to outside of plugin. */ | |
327 | ||
328 | static bool | |
329 | hsa_error (const char *str, hsa_status_t status) | |
330 | { | |
331 | const char *hsa_error_msg; | |
b8d89b03 | 332 | hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); |
6ce13072 CLT |
333 | GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str, |
334 | hsa_error_msg); | |
335 | return false; | |
b2b40051 MJ |
336 | } |
337 | ||
338 | struct hsa_kernel_description | |
339 | { | |
340 | const char *name; | |
341 | unsigned omp_data_size; | |
342 | bool gridified_kernel_p; | |
343 | unsigned kernel_dependencies_count; | |
344 | const char **kernel_dependencies; | |
345 | }; | |
346 | ||
347 | struct global_var_info | |
348 | { | |
349 | const char *name; | |
350 | void *address; | |
351 | }; | |
352 | ||
353 | /* Data passed by the static initializer of a compilation unit containing BRIG | |
354 | to GOMP_offload_register. */ | |
355 | ||
356 | struct brig_image_desc | |
357 | { | |
358 | hsa_ext_module_t brig_module; | |
359 | const unsigned kernel_count; | |
360 | struct hsa_kernel_description *kernel_infos; | |
361 | const unsigned global_variable_count; | |
362 | struct global_var_info *global_variables; | |
363 | }; | |
364 | ||
365 | struct agent_info; | |
366 | ||
367 | /* Information required to identify, finalize and run any given kernel. */ | |
368 | ||
369 | struct kernel_info | |
370 | { | |
371 | /* Name of the kernel, required to locate it within the brig module. */ | |
372 | const char *name; | |
373 | /* Size of memory space for OMP data. */ | |
374 | unsigned omp_data_size; | |
375 | /* The specific agent the kernel has been or will be finalized for and run | |
376 | on. */ | |
377 | struct agent_info *agent; | |
378 | /* The specific module where the kernel takes place. */ | |
379 | struct module_info *module; | |
380 | /* Mutex enforcing that at most once thread ever initializes a kernel for | |
381 | use. A thread should have locked agent->modules_rwlock for reading before | |
382 | acquiring it. */ | |
383 | pthread_mutex_t init_mutex; | |
384 | /* Flag indicating whether the kernel has been initialized and all fields | |
385 | below it contain valid data. */ | |
386 | bool initialized; | |
387 | /* Flag indicating that the kernel has a problem that blocks an execution. */ | |
388 | bool initialization_failed; | |
389 | /* The object to be put into the dispatch queue. */ | |
390 | uint64_t object; | |
391 | /* Required size of kernel arguments. */ | |
392 | uint32_t kernarg_segment_size; | |
393 | /* Required size of group segment. */ | |
394 | uint32_t group_segment_size; | |
395 | /* Required size of private segment. */ | |
396 | uint32_t private_segment_size; | |
397 | /* List of all kernel dependencies. */ | |
398 | const char **dependencies; | |
399 | /* Number of dependencies. */ | |
400 | unsigned dependencies_count; | |
401 | /* Maximum OMP data size necessary for kernel from kernel dispatches. */ | |
402 | unsigned max_omp_data_size; | |
403 | /* True if the kernel is gridified. */ | |
404 | bool gridified_kernel_p; | |
405 | }; | |
406 | ||
407 | /* Information about a particular brig module, its image and kernels. */ | |
408 | ||
409 | struct module_info | |
410 | { | |
411 | /* The next and previous module in the linked list of modules of an agent. */ | |
412 | struct module_info *next, *prev; | |
413 | /* The description with which the program has registered the image. */ | |
414 | struct brig_image_desc *image_desc; | |
415 | ||
416 | /* Number of kernels in this module. */ | |
417 | int kernel_count; | |
418 | /* An array of kernel_info structures describing each kernel in this | |
419 | module. */ | |
420 | struct kernel_info kernels[]; | |
421 | }; | |
422 | ||
423 | /* Information about shared brig library. */ | |
424 | ||
425 | struct brig_library_info | |
426 | { | |
427 | char *file_name; | |
428 | hsa_ext_module_t image; | |
429 | }; | |
430 | ||
431 | /* Description of an HSA GPU agent and the program associated with it. */ | |
432 | ||
433 | struct agent_info | |
434 | { | |
435 | /* The HSA ID of the agent. Assigned when hsa_context is initialized. */ | |
436 | hsa_agent_t id; | |
437 | /* Whether the agent has been initialized. The fields below are usable only | |
438 | if it has been. */ | |
439 | bool initialized; | |
440 | /* The HSA ISA of this agent. */ | |
441 | hsa_isa_t isa; | |
442 | /* Command queue of the agent. */ | |
443 | hsa_queue_t *command_q; | |
444 | /* Kernel from kernel dispatch command queue. */ | |
445 | hsa_queue_t *kernel_dispatch_command_q; | |
446 | /* The HSA memory region from which to allocate kernel arguments. */ | |
447 | hsa_region_t kernarg_region; | |
448 | ||
449 | /* Read-write lock that protects kernels which are running or about to be run | |
450 | from interference with loading and unloading of images. Needs to be | |
451 | locked for reading while a kernel is being run, and for writing if the | |
452 | list of modules is manipulated (and thus the HSA program invalidated). */ | |
453 | pthread_rwlock_t modules_rwlock; | |
454 | /* The first module in a linked list of modules associated with this | |
455 | kernel. */ | |
456 | struct module_info *first_module; | |
457 | ||
458 | /* Mutex enforcing that only one thread will finalize the HSA program. A | |
459 | thread should have locked agent->modules_rwlock for reading before | |
460 | acquiring it. */ | |
461 | pthread_mutex_t prog_mutex; | |
462 | /* Flag whether the HSA program that consists of all the modules has been | |
463 | finalized. */ | |
464 | bool prog_finalized; | |
465 | /* Flag whether the program was finalized but with a failure. */ | |
466 | bool prog_finalized_error; | |
467 | /* HSA executable - the finalized program that is used to locate kernels. */ | |
468 | hsa_executable_t executable; | |
469 | /* List of BRIG libraries. */ | |
470 | struct brig_library_info **brig_libraries; | |
471 | /* Number of loaded shared BRIG libraries. */ | |
472 | unsigned brig_libraries_count; | |
473 | }; | |
474 | ||
475 | /* Information about the whole HSA environment and all of its agents. */ | |
476 | ||
477 | struct hsa_context_info | |
478 | { | |
479 | /* Whether the structure has been initialized. */ | |
480 | bool initialized; | |
481 | /* Number of usable GPU HSA agents in the system. */ | |
482 | int agent_count; | |
483 | /* Array of agent_info structures describing the individual HSA agents. */ | |
484 | struct agent_info *agents; | |
485 | }; | |
486 | ||
487 | /* Information about the whole HSA environment and all of its agents. */ | |
488 | ||
489 | static struct hsa_context_info hsa_context; | |
490 | ||
b8d89b03 ML |
491 | #define DLSYM_FN(function) \ |
492 | hsa_fns.function##_fn = dlsym (handle, #function); \ | |
493 | if (hsa_fns.function##_fn == NULL) \ | |
494 | return false; | |
495 | ||
496 | static bool | |
497 | init_hsa_runtime_functions (void) | |
498 | { | |
499 | void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY); | |
500 | if (handle == NULL) | |
501 | return false; | |
502 | ||
503 | DLSYM_FN (hsa_status_string) | |
504 | DLSYM_FN (hsa_agent_get_info) | |
505 | DLSYM_FN (hsa_init) | |
506 | DLSYM_FN (hsa_iterate_agents) | |
507 | DLSYM_FN (hsa_region_get_info) | |
508 | DLSYM_FN (hsa_queue_create) | |
509 | DLSYM_FN (hsa_agent_iterate_regions) | |
510 | DLSYM_FN (hsa_executable_destroy) | |
511 | DLSYM_FN (hsa_executable_create) | |
512 | DLSYM_FN (hsa_executable_global_variable_define) | |
513 | DLSYM_FN (hsa_executable_load_code_object) | |
514 | DLSYM_FN (hsa_executable_freeze) | |
515 | DLSYM_FN (hsa_signal_create) | |
516 | DLSYM_FN (hsa_memory_allocate) | |
517 | DLSYM_FN (hsa_memory_free) | |
518 | DLSYM_FN (hsa_signal_destroy) | |
519 | DLSYM_FN (hsa_executable_get_symbol) | |
520 | DLSYM_FN (hsa_executable_symbol_get_info) | |
521 | DLSYM_FN (hsa_queue_add_write_index_release) | |
522 | DLSYM_FN (hsa_queue_load_read_index_acquire) | |
523 | DLSYM_FN (hsa_signal_wait_acquire) | |
524 | DLSYM_FN (hsa_signal_store_relaxed) | |
525 | DLSYM_FN (hsa_signal_store_release) | |
526 | DLSYM_FN (hsa_signal_load_acquire) | |
527 | DLSYM_FN (hsa_queue_destroy) | |
528 | DLSYM_FN (hsa_ext_program_add_module) | |
529 | DLSYM_FN (hsa_ext_program_create) | |
530 | DLSYM_FN (hsa_ext_program_destroy) | |
531 | DLSYM_FN (hsa_ext_program_finalize) | |
532 | return true; | |
533 | } | |
534 | ||
b2b40051 MJ |
535 | /* Find kernel for an AGENT by name provided in KERNEL_NAME. */ |
536 | ||
537 | static struct kernel_info * | |
538 | get_kernel_for_agent (struct agent_info *agent, const char *kernel_name) | |
539 | { | |
540 | struct module_info *module = agent->first_module; | |
541 | ||
542 | while (module) | |
543 | { | |
544 | for (unsigned i = 0; i < module->kernel_count; i++) | |
545 | if (strcmp (module->kernels[i].name, kernel_name) == 0) | |
546 | return &module->kernels[i]; | |
547 | ||
548 | module = module->next; | |
549 | } | |
550 | ||
551 | return NULL; | |
552 | } | |
553 | ||
554 | /* Return true if the agent is a GPU and acceptable of concurrent submissions | |
555 | from different threads. */ | |
556 | ||
557 | static bool | |
558 | suitable_hsa_agent_p (hsa_agent_t agent) | |
559 | { | |
560 | hsa_device_type_t device_type; | |
561 | hsa_status_t status | |
b8d89b03 ML |
562 | = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE, |
563 | &device_type); | |
564 | if (status != HSA_STATUS_SUCCESS) | |
b2b40051 MJ |
565 | return false; |
566 | ||
b8d89b03 ML |
567 | switch (device_type) |
568 | { | |
569 | case HSA_DEVICE_TYPE_GPU: | |
570 | break; | |
571 | case HSA_DEVICE_TYPE_CPU: | |
572 | if (!support_cpu_devices) | |
573 | return false; | |
574 | break; | |
575 | default: | |
576 | return false; | |
577 | } | |
578 | ||
b2b40051 | 579 | uint32_t features = 0; |
b8d89b03 ML |
580 | status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE, |
581 | &features); | |
b2b40051 MJ |
582 | if (status != HSA_STATUS_SUCCESS |
583 | || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) | |
584 | return false; | |
585 | hsa_queue_type_t queue_type; | |
b8d89b03 ML |
586 | status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE, |
587 | &queue_type); | |
b2b40051 MJ |
588 | if (status != HSA_STATUS_SUCCESS |
589 | || (queue_type != HSA_QUEUE_TYPE_MULTI)) | |
590 | return false; | |
591 | ||
592 | return true; | |
593 | } | |
594 | ||
595 | /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment | |
596 | agent_count in hsa_context. */ | |
597 | ||
598 | static hsa_status_t | |
599 | count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused))) | |
600 | { | |
601 | if (suitable_hsa_agent_p (agent)) | |
602 | hsa_context.agent_count++; | |
603 | return HSA_STATUS_SUCCESS; | |
604 | } | |
605 | ||
606 | /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent | |
607 | id to the describing structure in the hsa context. The index of the | |
608 | structure is pointed to by DATA, increment it afterwards. */ | |
609 | ||
610 | static hsa_status_t | |
611 | assign_agent_ids (hsa_agent_t agent, void *data) | |
612 | { | |
613 | if (suitable_hsa_agent_p (agent)) | |
614 | { | |
615 | int *agent_index = (int *) data; | |
616 | hsa_context.agents[*agent_index].id = agent; | |
617 | ++*agent_index; | |
618 | } | |
619 | return HSA_STATUS_SUCCESS; | |
620 | } | |
621 | ||
6ce13072 CLT |
622 | /* Initialize hsa_context if it has not already been done. |
623 | Return TRUE on success. */ | |
b2b40051 | 624 | |
6ce13072 | 625 | static bool |
b2b40051 MJ |
626 | init_hsa_context (void) |
627 | { | |
628 | hsa_status_t status; | |
629 | int agent_index = 0; | |
630 | ||
631 | if (hsa_context.initialized) | |
6ce13072 | 632 | return true; |
b2b40051 | 633 | init_enviroment_variables (); |
b8d89b03 ML |
634 | if (!init_hsa_runtime_functions ()) |
635 | { | |
636 | HSA_DEBUG ("Run-time could not be dynamically opened\n"); | |
637 | return false; | |
638 | } | |
639 | status = hsa_fns.hsa_init_fn (); | |
b2b40051 | 640 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 641 | return hsa_error ("Run-time could not be initialized", status); |
b2b40051 | 642 | HSA_DEBUG ("HSA run-time initialized\n"); |
b8d89b03 | 643 | status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL); |
b2b40051 | 644 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 645 | return hsa_error ("HSA GPU devices could not be enumerated", status); |
b2b40051 MJ |
646 | HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count); |
647 | ||
648 | hsa_context.agents | |
649 | = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count | |
650 | * sizeof (struct agent_info)); | |
b8d89b03 | 651 | status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index); |
b2b40051 | 652 | if (agent_index != hsa_context.agent_count) |
6ce13072 CLT |
653 | { |
654 | GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents"); | |
655 | return false; | |
656 | } | |
b2b40051 | 657 | hsa_context.initialized = true; |
6ce13072 | 658 | return true; |
b2b40051 MJ |
659 | } |
660 | ||
661 | /* Callback of dispatch queues to report errors. */ | |
662 | ||
663 | static void | |
664 | queue_callback (hsa_status_t status, | |
665 | hsa_queue_t *queue __attribute__ ((unused)), | |
666 | void *data __attribute__ ((unused))) | |
667 | { | |
668 | hsa_fatal ("Asynchronous queue error", status); | |
669 | } | |
670 | ||
671 | /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be | |
672 | used for kernarg allocations and if so write it to the memory pointed to by | |
673 | DATA and break the query. */ | |
674 | ||
675 | static hsa_status_t | |
676 | get_kernarg_memory_region (hsa_region_t region, void *data) | |
677 | { | |
678 | hsa_status_t status; | |
679 | hsa_region_segment_t segment; | |
680 | ||
b8d89b03 ML |
681 | status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT, |
682 | &segment); | |
b2b40051 MJ |
683 | if (status != HSA_STATUS_SUCCESS) |
684 | return status; | |
685 | if (segment != HSA_REGION_SEGMENT_GLOBAL) | |
686 | return HSA_STATUS_SUCCESS; | |
687 | ||
688 | uint32_t flags; | |
b8d89b03 ML |
689 | status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS, |
690 | &flags); | |
b2b40051 MJ |
691 | if (status != HSA_STATUS_SUCCESS) |
692 | return status; | |
693 | if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) | |
694 | { | |
695 | hsa_region_t *ret = (hsa_region_t *) data; | |
696 | *ret = region; | |
697 | return HSA_STATUS_INFO_BREAK; | |
698 | } | |
699 | return HSA_STATUS_SUCCESS; | |
700 | } | |
701 | ||
702 | /* Part of the libgomp plugin interface. Return the number of HSA devices on | |
703 | the system. */ | |
704 | ||
705 | int | |
706 | GOMP_OFFLOAD_get_num_devices (void) | |
707 | { | |
6ce13072 CLT |
708 | if (!init_hsa_context ()) |
709 | return 0; | |
b2b40051 MJ |
710 | return hsa_context.agent_count; |
711 | } | |
712 | ||
713 | /* Part of the libgomp plugin interface. Initialize agent number N so that it | |
6ce13072 | 714 | can be used for computation. Return TRUE on success. */ |
b2b40051 | 715 | |
6ce13072 | 716 | bool |
b2b40051 MJ |
717 | GOMP_OFFLOAD_init_device (int n) |
718 | { | |
6ce13072 CLT |
719 | if (!init_hsa_context ()) |
720 | return false; | |
b2b40051 | 721 | if (n >= hsa_context.agent_count) |
6ce13072 CLT |
722 | { |
723 | GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n); | |
724 | return false; | |
725 | } | |
b2b40051 MJ |
726 | struct agent_info *agent = &hsa_context.agents[n]; |
727 | ||
728 | if (agent->initialized) | |
6ce13072 | 729 | return true; |
b2b40051 MJ |
730 | |
731 | if (pthread_rwlock_init (&agent->modules_rwlock, NULL)) | |
6ce13072 CLT |
732 | { |
733 | GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock"); | |
734 | return false; | |
735 | } | |
b2b40051 | 736 | if (pthread_mutex_init (&agent->prog_mutex, NULL)) |
6ce13072 CLT |
737 | { |
738 | GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex"); | |
739 | return false; | |
740 | } | |
b2b40051 MJ |
741 | |
742 | uint32_t queue_size; | |
743 | hsa_status_t status; | |
b8d89b03 ML |
744 | status = hsa_fns.hsa_agent_get_info_fn (agent->id, |
745 | HSA_AGENT_INFO_QUEUE_MAX_SIZE, | |
746 | &queue_size); | |
b2b40051 | 747 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 748 | return hsa_error ("Error requesting maximum queue size of the HSA agent", |
b8d89b03 ML |
749 | status); |
750 | status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA, | |
751 | &agent->isa); | |
b2b40051 | 752 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 753 | return hsa_error ("Error querying the ISA of the agent", status); |
b8d89b03 ML |
754 | status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, |
755 | HSA_QUEUE_TYPE_MULTI, | |
756 | queue_callback, NULL, UINT32_MAX, | |
757 | UINT32_MAX, | |
758 | &agent->command_q); | |
b2b40051 | 759 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 760 | return hsa_error ("Error creating command queue", status); |
b2b40051 | 761 | |
b8d89b03 ML |
762 | status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, |
763 | HSA_QUEUE_TYPE_MULTI, | |
764 | queue_callback, NULL, UINT32_MAX, | |
765 | UINT32_MAX, | |
766 | &agent->kernel_dispatch_command_q); | |
b2b40051 | 767 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 768 | return hsa_error ("Error creating kernel dispatch command queue", status); |
b2b40051 MJ |
769 | |
770 | agent->kernarg_region.handle = (uint64_t) -1; | |
b8d89b03 ML |
771 | status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id, |
772 | get_kernarg_memory_region, | |
773 | &agent->kernarg_region); | |
b2b40051 | 774 | if (agent->kernarg_region.handle == (uint64_t) -1) |
6ce13072 CLT |
775 | { |
776 | GOMP_PLUGIN_error ("Could not find suitable memory region for kernel " | |
777 | "arguments"); | |
778 | return false; | |
779 | } | |
b2b40051 MJ |
780 | HSA_DEBUG ("HSA agent initialized, queue has id %llu\n", |
781 | (long long unsigned) agent->command_q->id); | |
782 | HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n", | |
783 | (long long unsigned) agent->kernel_dispatch_command_q->id); | |
784 | agent->initialized = true; | |
6ce13072 | 785 | return true; |
b2b40051 MJ |
786 | } |
787 | ||
788 | /* Verify that hsa_context has already been initialized and return the | |
6ce13072 | 789 | agent_info structure describing device number N. Return NULL on error. */ |
b2b40051 MJ |
790 | |
791 | static struct agent_info * | |
792 | get_agent_info (int n) | |
793 | { | |
794 | if (!hsa_context.initialized) | |
6ce13072 CLT |
795 | { |
796 | GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context."); | |
797 | return NULL; | |
798 | } | |
b2b40051 | 799 | if (n >= hsa_context.agent_count) |
6ce13072 CLT |
800 | { |
801 | GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n); | |
802 | return NULL; | |
803 | } | |
b2b40051 | 804 | if (!hsa_context.agents[n].initialized) |
6ce13072 CLT |
805 | { |
806 | GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent."); | |
807 | return NULL; | |
808 | } | |
b2b40051 MJ |
809 | return &hsa_context.agents[n]; |
810 | } | |
811 | ||
812 | /* Insert MODULE to the linked list of modules of AGENT. */ | |
813 | ||
814 | static void | |
815 | add_module_to_agent (struct agent_info *agent, struct module_info *module) | |
816 | { | |
817 | if (agent->first_module) | |
818 | agent->first_module->prev = module; | |
819 | module->next = agent->first_module; | |
820 | module->prev = NULL; | |
821 | agent->first_module = module; | |
822 | } | |
823 | ||
824 | /* Remove MODULE from the linked list of modules of AGENT. */ | |
825 | ||
826 | static void | |
827 | remove_module_from_agent (struct agent_info *agent, struct module_info *module) | |
828 | { | |
829 | if (agent->first_module == module) | |
830 | agent->first_module = module->next; | |
831 | if (module->prev) | |
832 | module->prev->next = module->next; | |
833 | if (module->next) | |
834 | module->next->prev = module->prev; | |
835 | } | |
836 | ||
837 | /* Free the HSA program in agent and everything associated with it and set | |
6ce13072 CLT |
838 | agent->prog_finalized and the initialized flags of all kernels to false. |
839 | Return TRUE on success. */ | |
b2b40051 | 840 | |
6ce13072 | 841 | static bool |
b2b40051 MJ |
842 | destroy_hsa_program (struct agent_info *agent) |
843 | { | |
844 | if (!agent->prog_finalized || agent->prog_finalized_error) | |
6ce13072 | 845 | return true; |
b2b40051 MJ |
846 | |
847 | hsa_status_t status; | |
848 | ||
849 | HSA_DEBUG ("Destroying the current HSA program.\n"); | |
850 | ||
b8d89b03 | 851 | status = hsa_fns.hsa_executable_destroy_fn (agent->executable); |
b2b40051 | 852 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 853 | return hsa_error ("Could not destroy HSA executable", status); |
b2b40051 MJ |
854 | |
855 | struct module_info *module; | |
856 | for (module = agent->first_module; module; module = module->next) | |
857 | { | |
858 | int i; | |
859 | for (i = 0; i < module->kernel_count; i++) | |
860 | module->kernels[i].initialized = false; | |
861 | } | |
862 | agent->prog_finalized = false; | |
6ce13072 | 863 | return true; |
b2b40051 MJ |
864 | } |
865 | ||
b8d89b03 ML |
866 | /* Initialize KERNEL from D and other parameters. Return true on success. */ |
867 | ||
868 | static bool | |
869 | init_basic_kernel_info (struct kernel_info *kernel, | |
870 | struct hsa_kernel_description *d, | |
871 | struct agent_info *agent, | |
872 | struct module_info *module) | |
873 | { | |
874 | kernel->agent = agent; | |
875 | kernel->module = module; | |
876 | kernel->name = d->name; | |
877 | kernel->omp_data_size = d->omp_data_size; | |
878 | kernel->gridified_kernel_p = d->gridified_kernel_p; | |
879 | kernel->dependencies_count = d->kernel_dependencies_count; | |
880 | kernel->dependencies = d->kernel_dependencies; | |
881 | if (pthread_mutex_init (&kernel->init_mutex, NULL)) | |
882 | { | |
883 | GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex"); | |
884 | return false; | |
885 | } | |
886 | return true; | |
887 | } | |
888 | ||
b2b40051 MJ |
889 | /* Part of the libgomp plugin interface. Load BRIG module described by struct |
890 | brig_image_desc in TARGET_DATA and return references to kernel descriptors | |
891 | in TARGET_TABLE. */ | |
892 | ||
893 | int | |
894 | GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data, | |
895 | struct addr_pair **target_table) | |
896 | { | |
897 | if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA) | |
6ce13072 CLT |
898 | { |
899 | GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin" | |
900 | " (expected %u, received %u)", | |
901 | GOMP_VERSION_HSA, GOMP_VERSION_DEV (version)); | |
902 | return -1; | |
903 | } | |
b2b40051 MJ |
904 | |
905 | struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data; | |
906 | struct agent_info *agent; | |
907 | struct addr_pair *pair; | |
908 | struct module_info *module; | |
909 | struct kernel_info *kernel; | |
910 | int kernel_count = image_desc->kernel_count; | |
911 | ||
912 | agent = get_agent_info (ord); | |
6ce13072 CLT |
913 | if (!agent) |
914 | return -1; | |
915 | ||
b2b40051 | 916 | if (pthread_rwlock_wrlock (&agent->modules_rwlock)) |
6ce13072 CLT |
917 | { |
918 | GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock"); | |
919 | return -1; | |
920 | } | |
921 | if (agent->prog_finalized | |
922 | && !destroy_hsa_program (agent)) | |
923 | return -1; | |
b2b40051 MJ |
924 | |
925 | HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count); | |
926 | pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair)); | |
927 | *target_table = pair; | |
928 | module = (struct module_info *) | |
929 | GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info) | |
930 | + kernel_count * sizeof (struct kernel_info)); | |
931 | module->image_desc = image_desc; | |
932 | module->kernel_count = kernel_count; | |
933 | ||
934 | kernel = &module->kernels[0]; | |
935 | ||
936 | /* Allocate memory for kernel dependencies. */ | |
937 | for (unsigned i = 0; i < kernel_count; i++) | |
938 | { | |
939 | pair->start = (uintptr_t) kernel; | |
940 | pair->end = (uintptr_t) (kernel + 1); | |
941 | ||
942 | struct hsa_kernel_description *d = &image_desc->kernel_infos[i]; | |
b8d89b03 ML |
943 | if (!init_basic_kernel_info (kernel, d, agent, module)) |
944 | return -1; | |
b2b40051 MJ |
945 | kernel++; |
946 | pair++; | |
947 | } | |
948 | ||
949 | add_module_to_agent (agent, module); | |
950 | if (pthread_rwlock_unlock (&agent->modules_rwlock)) | |
6ce13072 CLT |
951 | { |
952 | GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock"); | |
953 | return -1; | |
954 | } | |
b2b40051 MJ |
955 | return kernel_count; |
956 | } | |
957 | ||
958 | /* Add a shared BRIG library from a FILE_NAME to an AGENT. */ | |
959 | ||
960 | static struct brig_library_info * | |
961 | add_shared_library (const char *file_name, struct agent_info *agent) | |
962 | { | |
963 | struct brig_library_info *library = NULL; | |
964 | ||
965 | void *f = dlopen (file_name, RTLD_NOW); | |
966 | void *start = dlsym (f, "__brig_start"); | |
967 | void *end = dlsym (f, "__brig_end"); | |
968 | ||
969 | if (start == NULL || end == NULL) | |
970 | return NULL; | |
971 | ||
972 | unsigned size = end - start; | |
973 | char *buf = (char *) GOMP_PLUGIN_malloc (size); | |
974 | memcpy (buf, start, size); | |
975 | ||
976 | library = GOMP_PLUGIN_malloc (sizeof (struct agent_info)); | |
977 | library->file_name = (char *) GOMP_PLUGIN_malloc | |
978 | ((strlen (file_name) + 1)); | |
979 | strcpy (library->file_name, file_name); | |
980 | library->image = (hsa_ext_module_t) buf; | |
981 | ||
982 | return library; | |
983 | } | |
984 | ||
985 | /* Release memory used for BRIG shared libraries that correspond | |
986 | to an AGENT. */ | |
987 | ||
988 | static void | |
989 | release_agent_shared_libraries (struct agent_info *agent) | |
990 | { | |
991 | for (unsigned i = 0; i < agent->brig_libraries_count; i++) | |
992 | if (agent->brig_libraries[i]) | |
993 | { | |
994 | free (agent->brig_libraries[i]->file_name); | |
995 | free (agent->brig_libraries[i]->image); | |
996 | free (agent->brig_libraries[i]); | |
997 | } | |
998 | ||
999 | free (agent->brig_libraries); | |
1000 | } | |
1001 | ||
1002 | /* Create and finalize the program consisting of all loaded modules. */ | |
1003 | ||
1004 | static void | |
1005 | create_and_finalize_hsa_program (struct agent_info *agent) | |
1006 | { | |
1007 | hsa_status_t status; | |
1008 | hsa_ext_program_t prog_handle; | |
1009 | int mi = 0; | |
1010 | ||
1011 | if (pthread_mutex_lock (&agent->prog_mutex)) | |
1012 | GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex"); | |
1013 | if (agent->prog_finalized) | |
1014 | goto final; | |
1015 | ||
b8d89b03 ML |
1016 | status = hsa_fns.hsa_ext_program_create_fn |
1017 | (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, | |
1018 | HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, | |
1019 | NULL, &prog_handle); | |
b2b40051 MJ |
1020 | if (status != HSA_STATUS_SUCCESS) |
1021 | hsa_fatal ("Could not create an HSA program", status); | |
1022 | ||
1023 | HSA_DEBUG ("Created a finalized program\n"); | |
1024 | ||
1025 | struct module_info *module = agent->first_module; | |
1026 | while (module) | |
1027 | { | |
b8d89b03 ML |
1028 | status = hsa_fns.hsa_ext_program_add_module_fn |
1029 | (prog_handle, module->image_desc->brig_module); | |
b2b40051 MJ |
1030 | if (status != HSA_STATUS_SUCCESS) |
1031 | hsa_fatal ("Could not add a module to the HSA program", status); | |
1032 | module = module->next; | |
1033 | mi++; | |
1034 | } | |
1035 | ||
1036 | /* Load all shared libraries. */ | |
1037 | const char *libraries[] = { "libhsamath.so", "libhsastd.so" }; | |
1038 | const unsigned libraries_count = sizeof (libraries) / sizeof (const char *); | |
1039 | ||
1040 | agent->brig_libraries_count = libraries_count; | |
1041 | agent->brig_libraries = GOMP_PLUGIN_malloc_cleared | |
1042 | (sizeof (struct brig_library_info) * libraries_count); | |
1043 | ||
1044 | for (unsigned i = 0; i < libraries_count; i++) | |
1045 | { | |
1046 | struct brig_library_info *library = add_shared_library (libraries[i], | |
1047 | agent); | |
1048 | if (library == NULL) | |
1049 | { | |
1050 | HSA_WARNING ("Could not open a shared BRIG library: %s\n", | |
1051 | libraries[i]); | |
1052 | continue; | |
1053 | } | |
1054 | ||
b8d89b03 ML |
1055 | status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle, |
1056 | library->image); | |
b2b40051 MJ |
1057 | if (status != HSA_STATUS_SUCCESS) |
1058 | hsa_warn ("Could not add a shared BRIG library the HSA program", | |
1059 | status); | |
1060 | else | |
1061 | HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n", | |
1062 | libraries[i]); | |
1063 | } | |
1064 | ||
1065 | hsa_ext_control_directives_t control_directives; | |
1066 | memset (&control_directives, 0, sizeof (control_directives)); | |
1067 | hsa_code_object_t code_object; | |
b8d89b03 ML |
1068 | status = hsa_fns.hsa_ext_program_finalize_fn |
1069 | (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, | |
1070 | control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); | |
b2b40051 MJ |
1071 | if (status != HSA_STATUS_SUCCESS) |
1072 | { | |
1073 | hsa_warn ("Finalization of the HSA program failed", status); | |
1074 | goto failure; | |
1075 | } | |
1076 | ||
1077 | HSA_DEBUG ("Finalization done\n"); | |
b8d89b03 | 1078 | hsa_fns.hsa_ext_program_destroy_fn (prog_handle); |
b2b40051 MJ |
1079 | |
1080 | status | |
b8d89b03 ML |
1081 | = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL, |
1082 | HSA_EXECUTABLE_STATE_UNFROZEN, | |
1083 | "", &agent->executable); | |
b2b40051 MJ |
1084 | if (status != HSA_STATUS_SUCCESS) |
1085 | hsa_fatal ("Could not create HSA executable", status); | |
1086 | ||
1087 | module = agent->first_module; | |
1088 | while (module) | |
1089 | { | |
1090 | /* Initialize all global variables declared in the module. */ | |
1091 | for (unsigned i = 0; i < module->image_desc->global_variable_count; i++) | |
1092 | { | |
1093 | struct global_var_info *var; | |
1094 | var = &module->image_desc->global_variables[i]; | |
b8d89b03 ML |
1095 | status = hsa_fns.hsa_executable_global_variable_define_fn |
1096 | (agent->executable, var->name, var->address); | |
b2b40051 MJ |
1097 | |
1098 | HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name, | |
1099 | var->address); | |
1100 | ||
1101 | if (status != HSA_STATUS_SUCCESS) | |
1102 | hsa_fatal ("Could not define a global variable in the HSA program", | |
1103 | status); | |
1104 | } | |
1105 | ||
1106 | module = module->next; | |
1107 | } | |
1108 | ||
b8d89b03 ML |
1109 | status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable, |
1110 | agent->id, | |
1111 | code_object, ""); | |
b2b40051 MJ |
1112 | if (status != HSA_STATUS_SUCCESS) |
1113 | hsa_fatal ("Could not add a code object to the HSA executable", status); | |
b8d89b03 | 1114 | status = hsa_fns.hsa_executable_freeze_fn (agent->executable, ""); |
b2b40051 MJ |
1115 | if (status != HSA_STATUS_SUCCESS) |
1116 | hsa_fatal ("Could not freeze the HSA executable", status); | |
1117 | ||
1118 | HSA_DEBUG ("Froze HSA executable with the finalized code object\n"); | |
1119 | ||
1120 | /* If all goes good, jump to final. */ | |
1121 | goto final; | |
1122 | ||
1123 | failure: | |
1124 | agent->prog_finalized_error = true; | |
1125 | ||
1126 | final: | |
1127 | agent->prog_finalized = true; | |
1128 | ||
1129 | if (pthread_mutex_unlock (&agent->prog_mutex)) | |
1130 | GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex"); | |
1131 | } | |
1132 | ||
1133 | /* Create kernel dispatch data structure for given KERNEL. */ | |
1134 | ||
1135 | static struct GOMP_hsa_kernel_dispatch * | |
1136 | create_single_kernel_dispatch (struct kernel_info *kernel, | |
1137 | unsigned omp_data_size) | |
1138 | { | |
1139 | struct agent_info *agent = kernel->agent; | |
1140 | struct GOMP_hsa_kernel_dispatch *shadow | |
1141 | = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch)); | |
1142 | ||
1143 | shadow->queue = agent->command_q; | |
1144 | shadow->omp_data_memory | |
1145 | = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL; | |
1146 | unsigned dispatch_count = kernel->dependencies_count; | |
1147 | shadow->kernel_dispatch_count = dispatch_count; | |
1148 | ||
1149 | shadow->children_dispatches | |
1150 | = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow)); | |
1151 | ||
1152 | shadow->object = kernel->object; | |
1153 | ||
1154 | hsa_signal_t sync_signal; | |
b8d89b03 | 1155 | hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal); |
b2b40051 MJ |
1156 | if (status != HSA_STATUS_SUCCESS) |
1157 | hsa_fatal ("Error creating the HSA sync signal", status); | |
1158 | ||
1159 | shadow->signal = sync_signal.handle; | |
1160 | shadow->private_segment_size = kernel->private_segment_size; | |
1161 | shadow->group_segment_size = kernel->group_segment_size; | |
1162 | ||
1163 | status | |
b8d89b03 ML |
1164 | = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region, |
1165 | kernel->kernarg_segment_size, | |
1166 | &shadow->kernarg_address); | |
b2b40051 MJ |
1167 | if (status != HSA_STATUS_SUCCESS) |
1168 | hsa_fatal ("Could not allocate memory for HSA kernel arguments", status); | |
1169 | ||
1170 | return shadow; | |
1171 | } | |
1172 | ||
1173 | /* Release data structure created for a kernel dispatch in SHADOW argument. */ | |
1174 | ||
1175 | static void | |
1176 | release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow) | |
1177 | { | |
1178 | HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow, | |
1179 | shadow->debug, (void *) shadow->debug); | |
1180 | ||
b8d89b03 | 1181 | hsa_fns.hsa_memory_free_fn (shadow->kernarg_address); |
b2b40051 MJ |
1182 | |
1183 | hsa_signal_t s; | |
1184 | s.handle = shadow->signal; | |
b8d89b03 | 1185 | hsa_fns.hsa_signal_destroy_fn (s); |
b2b40051 MJ |
1186 | |
1187 | free (shadow->omp_data_memory); | |
1188 | ||
1189 | for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) | |
1190 | release_kernel_dispatch (shadow->children_dispatches[i]); | |
1191 | ||
1192 | free (shadow->children_dispatches); | |
1193 | free (shadow); | |
1194 | } | |
1195 | ||
1196 | /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used | |
1197 | to calculate maximum necessary memory for OMP data allocation. */ | |
1198 | ||
1199 | static void | |
1200 | init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) | |
1201 | { | |
1202 | hsa_status_t status; | |
1203 | struct agent_info *agent = kernel->agent; | |
1204 | hsa_executable_symbol_t kernel_symbol; | |
b8d89b03 ML |
1205 | status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, |
1206 | kernel->name, agent->id, | |
1207 | 0, &kernel_symbol); | |
b2b40051 MJ |
1208 | if (status != HSA_STATUS_SUCCESS) |
1209 | { | |
1210 | hsa_warn ("Could not find symbol for kernel in the code object", status); | |
1211 | goto failure; | |
1212 | } | |
1213 | HSA_DEBUG ("Located kernel %s\n", kernel->name); | |
b8d89b03 ML |
1214 | status = hsa_fns.hsa_executable_symbol_get_info_fn |
1215 | (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object); | |
b2b40051 MJ |
1216 | if (status != HSA_STATUS_SUCCESS) |
1217 | hsa_fatal ("Could not extract a kernel object from its symbol", status); | |
b8d89b03 | 1218 | status = hsa_fns.hsa_executable_symbol_get_info_fn |
b2b40051 MJ |
1219 | (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, |
1220 | &kernel->kernarg_segment_size); | |
1221 | if (status != HSA_STATUS_SUCCESS) | |
1222 | hsa_fatal ("Could not get info about kernel argument size", status); | |
b8d89b03 | 1223 | status = hsa_fns.hsa_executable_symbol_get_info_fn |
b2b40051 MJ |
1224 | (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, |
1225 | &kernel->group_segment_size); | |
1226 | if (status != HSA_STATUS_SUCCESS) | |
1227 | hsa_fatal ("Could not get info about kernel group segment size", status); | |
b8d89b03 | 1228 | status = hsa_fns.hsa_executable_symbol_get_info_fn |
b2b40051 MJ |
1229 | (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, |
1230 | &kernel->private_segment_size); | |
1231 | if (status != HSA_STATUS_SUCCESS) | |
1232 | hsa_fatal ("Could not get info about kernel private segment size", | |
1233 | status); | |
1234 | ||
1235 | HSA_DEBUG ("Kernel structure for %s fully initialized with " | |
1236 | "following segment sizes: \n", kernel->name); | |
1237 | HSA_DEBUG (" group_segment_size: %u\n", | |
1238 | (unsigned) kernel->group_segment_size); | |
1239 | HSA_DEBUG (" private_segment_size: %u\n", | |
1240 | (unsigned) kernel->private_segment_size); | |
1241 | HSA_DEBUG (" kernarg_segment_size: %u\n", | |
1242 | (unsigned) kernel->kernarg_segment_size); | |
1243 | HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size); | |
1244 | HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p); | |
1245 | ||
1246 | if (kernel->omp_data_size > *max_omp_data_size) | |
1247 | *max_omp_data_size = kernel->omp_data_size; | |
1248 | ||
1249 | for (unsigned i = 0; i < kernel->dependencies_count; i++) | |
1250 | { | |
1251 | struct kernel_info *dependency | |
1252 | = get_kernel_for_agent (agent, kernel->dependencies[i]); | |
1253 | ||
1254 | if (dependency == NULL) | |
1255 | { | |
1256 | HSA_DEBUG ("Could not find a dependency for a kernel: %s, " | |
1257 | "dependency name: %s\n", kernel->name, | |
1258 | kernel->dependencies[i]); | |
1259 | goto failure; | |
1260 | } | |
1261 | ||
1262 | if (dependency->dependencies_count > 0) | |
1263 | { | |
1264 | HSA_DEBUG ("HSA does not allow kernel dispatching code with " | |
1265 | "a depth bigger than one\n") | |
1266 | goto failure; | |
1267 | } | |
1268 | ||
1269 | init_single_kernel (dependency, max_omp_data_size); | |
1270 | } | |
1271 | ||
1272 | return; | |
1273 | ||
1274 | failure: | |
1275 | kernel->initialization_failed = true; | |
1276 | } | |
1277 | ||
1278 | /* Indent stream F by INDENT spaces. */ | |
1279 | ||
1280 | static void | |
1281 | indent_stream (FILE *f, unsigned indent) | |
1282 | { | |
1283 | fprintf (f, "%*s", indent, ""); | |
1284 | } | |
1285 | ||
1286 | /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */ | |
1287 | ||
1288 | static void | |
1289 | print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent) | |
1290 | { | |
1291 | indent_stream (stderr, indent); | |
1292 | fprintf (stderr, "this: %p\n", dispatch); | |
1293 | indent_stream (stderr, indent); | |
1294 | fprintf (stderr, "queue: %p\n", dispatch->queue); | |
1295 | indent_stream (stderr, indent); | |
1296 | fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory); | |
1297 | indent_stream (stderr, indent); | |
1298 | fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address); | |
1299 | indent_stream (stderr, indent); | |
1300 | fprintf (stderr, "object: %lu\n", dispatch->object); | |
1301 | indent_stream (stderr, indent); | |
1302 | fprintf (stderr, "signal: %lu\n", dispatch->signal); | |
1303 | indent_stream (stderr, indent); | |
1304 | fprintf (stderr, "private_segment_size: %u\n", | |
1305 | dispatch->private_segment_size); | |
1306 | indent_stream (stderr, indent); | |
1307 | fprintf (stderr, "group_segment_size: %u\n", | |
1308 | dispatch->group_segment_size); | |
1309 | indent_stream (stderr, indent); | |
1310 | fprintf (stderr, "children dispatches: %lu\n", | |
1311 | dispatch->kernel_dispatch_count); | |
1312 | indent_stream (stderr, indent); | |
1313 | fprintf (stderr, "omp_num_threads: %u\n", | |
1314 | dispatch->omp_num_threads); | |
1315 | fprintf (stderr, "\n"); | |
1316 | ||
1317 | for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++) | |
1318 | print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2); | |
1319 | } | |
1320 | ||
1321 | /* Create kernel dispatch data structure for a KERNEL and all its | |
1322 | dependencies. */ | |
1323 | ||
1324 | static struct GOMP_hsa_kernel_dispatch * | |
1325 | create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) | |
1326 | { | |
1327 | struct GOMP_hsa_kernel_dispatch *shadow | |
1328 | = create_single_kernel_dispatch (kernel, omp_data_size); | |
1329 | shadow->omp_num_threads = 64; | |
1330 | shadow->debug = 0; | |
1331 | shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0; | |
1332 | ||
1333 | /* Create kernel dispatch data structures. We do not allow to have | |
1334 | a kernel dispatch with depth bigger than one. */ | |
1335 | for (unsigned i = 0; i < kernel->dependencies_count; i++) | |
1336 | { | |
1337 | struct kernel_info *dependency | |
1338 | = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]); | |
1339 | shadow->children_dispatches[i] | |
1340 | = create_single_kernel_dispatch (dependency, omp_data_size); | |
1341 | shadow->children_dispatches[i]->queue | |
1342 | = kernel->agent->kernel_dispatch_command_q; | |
1343 | shadow->children_dispatches[i]->omp_level = 1; | |
1344 | } | |
1345 | ||
1346 | return shadow; | |
1347 | } | |
1348 | ||
1349 | /* Do all the work that is necessary before running KERNEL for the first time. | |
1350 | The function assumes the program has been created, finalized and frozen by | |
1351 | create_and_finalize_hsa_program. */ | |
1352 | ||
1353 | static void | |
1354 | init_kernel (struct kernel_info *kernel) | |
1355 | { | |
1356 | if (pthread_mutex_lock (&kernel->init_mutex)) | |
1357 | GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex"); | |
1358 | if (kernel->initialized) | |
1359 | { | |
1360 | if (pthread_mutex_unlock (&kernel->init_mutex)) | |
1361 | GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization " | |
1362 | "mutex"); | |
1363 | ||
1364 | return; | |
1365 | } | |
1366 | ||
1367 | /* Precomputed maximum size of OMP data necessary for a kernel from kernel | |
1368 | dispatch operation. */ | |
1369 | init_single_kernel (kernel, &kernel->max_omp_data_size); | |
1370 | ||
1371 | if (!kernel->initialization_failed) | |
1372 | HSA_DEBUG ("\n"); | |
1373 | ||
1374 | kernel->initialized = true; | |
1375 | if (pthread_mutex_unlock (&kernel->init_mutex)) | |
1376 | GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization " | |
1377 | "mutex"); | |
1378 | } | |
1379 | ||
1380 | /* Parse the target attributes INPUT provided by the compiler and return true | |
1381 | if we should run anything all. If INPUT is NULL, fill DEF with default | |
1382 | values, then store INPUT or DEF into *RESULT. */ | |
1383 | ||
1384 | static bool | |
1385 | parse_target_attributes (void **input, | |
1386 | struct GOMP_kernel_launch_attributes *def, | |
1387 | struct GOMP_kernel_launch_attributes **result) | |
1388 | { | |
1389 | if (!input) | |
1390 | GOMP_PLUGIN_fatal ("No target arguments provided"); | |
1391 | ||
1392 | bool attrs_found = false; | |
1393 | while (*input) | |
1394 | { | |
1395 | uintptr_t id = (uintptr_t) *input; | |
1396 | if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA | |
1397 | && ((id & GOMP_TARGET_ARG_ID_MASK) | |
1398 | == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES)) | |
1399 | { | |
1400 | input++; | |
1401 | attrs_found = true; | |
1402 | break; | |
1403 | } | |
1404 | ||
1405 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
1406 | input++; | |
1407 | input++; | |
1408 | } | |
1409 | ||
1410 | if (!attrs_found) | |
1411 | { | |
1412 | def->ndim = 1; | |
1413 | def->gdims[0] = 1; | |
1414 | def->gdims[1] = 1; | |
1415 | def->gdims[2] = 1; | |
1416 | def->wdims[0] = 1; | |
1417 | def->wdims[1] = 1; | |
1418 | def->wdims[2] = 1; | |
1419 | *result = def; | |
1420 | HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n"); | |
1421 | return true; | |
1422 | } | |
1423 | ||
1424 | struct GOMP_kernel_launch_attributes *kla; | |
1425 | kla = (struct GOMP_kernel_launch_attributes *) *input; | |
1426 | *result = kla; | |
b8d89b03 ML |
1427 | if (kla->ndim == 0 || kla->ndim > 3) |
1428 | GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim); | |
b2b40051 | 1429 | |
b8d89b03 ML |
1430 | HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim); |
1431 | unsigned i; | |
1432 | for (i = 0; i < kla->ndim; i++) | |
1433 | { | |
1434 | HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i, | |
1435 | kla->gdims[i], kla->wdims[i]); | |
1436 | if (kla->gdims[i] == 0) | |
1437 | return false; | |
1438 | } | |
b2b40051 MJ |
1439 | return true; |
1440 | } | |
1441 | ||
b8d89b03 ML |
1442 | /* Return the group size given the requested GROUP size, GRID size and number |
1443 | of grid dimensions NDIM. */ | |
1444 | ||
1445 | static uint32_t | |
1446 | get_group_size (uint32_t ndim, uint32_t grid, uint32_t group) | |
1447 | { | |
1448 | if (group == 0) | |
1449 | { | |
1450 | /* TODO: Provide a default via environment or device characteristics. */ | |
1451 | if (ndim == 1) | |
1452 | group = 64; | |
1453 | else if (ndim == 2) | |
1454 | group = 8; | |
1455 | else | |
1456 | group = 4; | |
1457 | } | |
1458 | ||
1459 | if (group > grid) | |
1460 | group = grid; | |
1461 | return group; | |
1462 | } | |
1463 | ||
b2b40051 MJ |
1464 | /* Return true if the HSA runtime can run function FN_PTR. */ |
1465 | ||
1466 | bool | |
1467 | GOMP_OFFLOAD_can_run (void *fn_ptr) | |
1468 | { | |
1469 | struct kernel_info *kernel = (struct kernel_info *) fn_ptr; | |
1470 | struct agent_info *agent = kernel->agent; | |
1471 | create_and_finalize_hsa_program (agent); | |
1472 | ||
1473 | if (agent->prog_finalized_error) | |
1474 | goto failure; | |
1475 | ||
1476 | init_kernel (kernel); | |
1477 | if (kernel->initialization_failed) | |
1478 | goto failure; | |
1479 | ||
1480 | return true; | |
1481 | ||
1482 | failure: | |
1483 | if (suppress_host_fallback) | |
1484 | GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed"); | |
1485 | HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n"); | |
1486 | return false; | |
1487 | } | |
1488 | ||
f9c8babb ML |
1489 | /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */ |
1490 | ||
1491 | void | |
1492 | packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest) | |
1493 | { | |
1494 | __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE); | |
1495 | } | |
1496 | ||
b8d89b03 ML |
1497 | /* Run KERNEL on its agent, pass VARS to it as arguments and take |
1498 | launchattributes from KLA. */ | |
b2b40051 MJ |
1499 | |
1500 | void | |
b8d89b03 ML |
1501 | run_kernel (struct kernel_info *kernel, void *vars, |
1502 | struct GOMP_kernel_launch_attributes *kla) | |
b2b40051 | 1503 | { |
b2b40051 | 1504 | struct agent_info *agent = kernel->agent; |
b2b40051 MJ |
1505 | if (pthread_rwlock_rdlock (&agent->modules_rwlock)) |
1506 | GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock"); | |
1507 | ||
1508 | if (!agent->initialized) | |
1509 | GOMP_PLUGIN_fatal ("Agent must be initialized"); | |
1510 | ||
1511 | if (!kernel->initialized) | |
1512 | GOMP_PLUGIN_fatal ("Called kernel must be initialized"); | |
1513 | ||
1514 | struct GOMP_hsa_kernel_dispatch *shadow | |
1515 | = create_kernel_dispatch (kernel, kernel->max_omp_data_size); | |
1516 | ||
1517 | if (debug) | |
1518 | { | |
1519 | fprintf (stderr, "\nKernel has following dependencies:\n"); | |
1520 | print_kernel_dispatch (shadow, 2); | |
1521 | } | |
1522 | ||
b8d89b03 ML |
1523 | uint64_t index |
1524 | = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1); | |
b2b40051 MJ |
1525 | HSA_DEBUG ("Got AQL index %llu\n", (long long int) index); |
1526 | ||
1527 | /* Wait until the queue is not full before writing the packet. */ | |
b8d89b03 | 1528 | while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q) |
b2b40051 MJ |
1529 | >= agent->command_q->size) |
1530 | ; | |
1531 | ||
1532 | hsa_kernel_dispatch_packet_t *packet; | |
1533 | packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address) | |
1534 | + index % agent->command_q->size; | |
1535 | ||
1536 | memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4); | |
b2b40051 | 1537 | packet->grid_size_x = kla->gdims[0]; |
b8d89b03 ML |
1538 | packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0], |
1539 | kla->wdims[0]); | |
1540 | ||
1541 | if (kla->ndim >= 2) | |
1542 | { | |
1543 | packet->grid_size_y = kla->gdims[1]; | |
1544 | packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1], | |
1545 | kla->wdims[1]); | |
1546 | } | |
1547 | else | |
1548 | { | |
1549 | packet->grid_size_y = 1; | |
1550 | packet->workgroup_size_y = 1; | |
1551 | } | |
1552 | ||
1553 | if (kla->ndim == 3) | |
1554 | { | |
1555 | packet->grid_size_z = kla->gdims[2]; | |
1556 | packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2], | |
1557 | kla->wdims[2]); | |
1558 | } | |
1559 | else | |
1560 | { | |
1561 | packet->grid_size_z = 1; | |
1562 | packet->workgroup_size_z = 1; | |
1563 | } | |
1564 | ||
b2b40051 MJ |
1565 | packet->private_segment_size = kernel->private_segment_size; |
1566 | packet->group_segment_size = kernel->group_segment_size; | |
1567 | packet->kernel_object = kernel->object; | |
1568 | packet->kernarg_address = shadow->kernarg_address; | |
1569 | hsa_signal_t s; | |
1570 | s.handle = shadow->signal; | |
1571 | packet->completion_signal = s; | |
b8d89b03 | 1572 | hsa_fns.hsa_signal_store_relaxed_fn (s, 1); |
b2b40051 MJ |
1573 | memcpy (shadow->kernarg_address, &vars, sizeof (vars)); |
1574 | ||
7397fce2 ML |
1575 | /* PR hsa/70337. */ |
1576 | size_t vars_size = sizeof (vars); | |
1577 | if (kernel->kernarg_segment_size > vars_size) | |
1578 | { | |
1579 | if (kernel->kernarg_segment_size != vars_size | |
1580 | + sizeof (struct hsa_kernel_runtime *)) | |
1581 | GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value"); | |
1582 | memcpy (packet->kernarg_address + vars_size, &shadow, | |
1583 | sizeof (struct hsa_kernel_runtime *)); | |
1584 | } | |
b2b40051 MJ |
1585 | |
1586 | HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n"); | |
1587 | ||
1588 | uint16_t header; | |
1589 | header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; | |
1590 | header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; | |
1591 | header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; | |
1592 | ||
1593 | HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name); | |
1594 | ||
f9c8babb | 1595 | packet_store_release ((uint32_t *) packet, header, |
b8d89b03 | 1596 | (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS); |
f9c8babb | 1597 | |
b8d89b03 ML |
1598 | hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal, |
1599 | index); | |
b2b40051 MJ |
1600 | |
1601 | /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for | |
1602 | signal wait and signal load operations on their own and we need to | |
1603 | periodically call the hsa_signal_load_acquire on completion signals of | |
1604 | children kernels in the CPU to make that happen. As soon the | |
1605 | limitation will be resolved, this workaround can be removed. */ | |
1606 | ||
1607 | HSA_DEBUG ("Kernel dispatched, waiting for completion\n"); | |
1608 | ||
1609 | /* Root signal waits with 1ms timeout. */ | |
b8d89b03 ML |
1610 | while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1, |
1611 | 1000 * 1000, | |
1612 | HSA_WAIT_STATE_BLOCKED) != 0) | |
b2b40051 MJ |
1613 | for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) |
1614 | { | |
1615 | hsa_signal_t child_s; | |
1616 | child_s.handle = shadow->children_dispatches[i]->signal; | |
1617 | ||
1618 | HSA_DEBUG ("Waiting for children completion signal: %lu\n", | |
1619 | shadow->children_dispatches[i]->signal); | |
b8d89b03 | 1620 | hsa_fns.hsa_signal_load_acquire_fn (child_s); |
b2b40051 MJ |
1621 | } |
1622 | ||
1623 | release_kernel_dispatch (shadow); | |
1624 | ||
1625 | if (pthread_rwlock_unlock (&agent->modules_rwlock)) | |
1626 | GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock"); | |
1627 | } | |
1628 | ||
b8d89b03 ML |
1629 | /* Part of the libgomp plugin interface. Run a kernel on device N (the number |
1630 | is actually ignored, we assume the FN_PTR has been mapped using the correct | |
1631 | device) and pass it an array of pointers in VARS as a parameter. The kernel | |
1632 | is identified by FN_PTR which must point to a kernel_info structure. */ | |
1633 | ||
1634 | void | |
1635 | GOMP_OFFLOAD_run (int n __attribute__((unused)), | |
1636 | void *fn_ptr, void *vars, void **args) | |
1637 | { | |
1638 | struct kernel_info *kernel = (struct kernel_info *) fn_ptr; | |
1639 | struct GOMP_kernel_launch_attributes def; | |
1640 | struct GOMP_kernel_launch_attributes *kla; | |
1641 | if (!parse_target_attributes (args, &def, &kla)) | |
1642 | { | |
1643 | HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n"); | |
1644 | return; | |
1645 | } | |
1646 | run_kernel (kernel, vars, kla); | |
1647 | } | |
1648 | ||
b2b40051 MJ |
1649 | /* Information to be passed to a thread running a kernel asycnronously. */ |
1650 | ||
1651 | struct async_run_info | |
1652 | { | |
1653 | int device; | |
1654 | void *tgt_fn; | |
1655 | void *tgt_vars; | |
1656 | void **args; | |
1657 | void *async_data; | |
1658 | }; | |
1659 | ||
1660 | /* Thread routine to run a kernel asynchronously. */ | |
1661 | ||
1662 | static void * | |
1663 | run_kernel_asynchronously (void *thread_arg) | |
1664 | { | |
1665 | struct async_run_info *info = (struct async_run_info *) thread_arg; | |
1666 | int device = info->device; | |
1667 | void *tgt_fn = info->tgt_fn; | |
1668 | void *tgt_vars = info->tgt_vars; | |
1669 | void **args = info->args; | |
1670 | void *async_data = info->async_data; | |
1671 | ||
1672 | free (info); | |
1673 | GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args); | |
1674 | GOMP_PLUGIN_target_task_completion (async_data); | |
1675 | return NULL; | |
1676 | } | |
1677 | ||
1678 | /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run | |
1679 | does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it | |
1680 | has finished. */ | |
1681 | ||
1682 | void | |
1683 | GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, | |
1684 | void **args, void *async_data) | |
1685 | { | |
1686 | pthread_t pt; | |
1687 | struct async_run_info *info; | |
1688 | HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n") | |
1689 | info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info)); | |
1690 | ||
1691 | info->device = device; | |
1692 | info->tgt_fn = tgt_fn; | |
1693 | info->tgt_vars = tgt_vars; | |
1694 | info->args = args; | |
1695 | info->async_data = async_data; | |
1696 | ||
1697 | int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info); | |
1698 | if (err != 0) | |
1699 | GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s", | |
1700 | strerror (err)); | |
1701 | err = pthread_detach (pt); | |
1702 | if (err != 0) | |
1703 | GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel " | |
1704 | "asynchronously: %s", strerror (err)); | |
1705 | } | |
1706 | ||
1707 | /* Deinitialize all information associated with MODULE and kernels within | |
6ce13072 | 1708 | it. Return TRUE on success. */ |
b2b40051 | 1709 | |
6ce13072 | 1710 | static bool |
b2b40051 MJ |
1711 | destroy_module (struct module_info *module) |
1712 | { | |
1713 | int i; | |
1714 | for (i = 0; i < module->kernel_count; i++) | |
1715 | if (pthread_mutex_destroy (&module->kernels[i].init_mutex)) | |
6ce13072 CLT |
1716 | { |
1717 | GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization " | |
1718 | "mutex"); | |
1719 | return false; | |
1720 | } | |
1721 | return true; | |
b2b40051 MJ |
1722 | } |
1723 | ||
1724 | /* Part of the libgomp plugin interface. Unload BRIG module described by | |
6ce13072 CLT |
1725 | struct brig_image_desc in TARGET_DATA from agent number N. Return |
1726 | TRUE on success. */ | |
b2b40051 | 1727 | |
6ce13072 | 1728 | bool |
b2b40051 MJ |
1729 | GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data) |
1730 | { | |
1731 | if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA) | |
6ce13072 CLT |
1732 | { |
1733 | GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin" | |
1734 | " (expected %u, received %u)", | |
1735 | GOMP_VERSION_HSA, GOMP_VERSION_DEV (version)); | |
1736 | return false; | |
1737 | } | |
b2b40051 MJ |
1738 | |
1739 | struct agent_info *agent; | |
1740 | agent = get_agent_info (n); | |
6ce13072 CLT |
1741 | if (!agent) |
1742 | return false; | |
b2b40051 | 1743 | |
6ce13072 CLT |
1744 | if (pthread_rwlock_wrlock (&agent->modules_rwlock)) |
1745 | { | |
1746 | GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock"); | |
1747 | return false; | |
1748 | } | |
b2b40051 MJ |
1749 | struct module_info *module = agent->first_module; |
1750 | while (module) | |
1751 | { | |
1752 | if (module->image_desc == target_data) | |
1753 | break; | |
1754 | module = module->next; | |
1755 | } | |
1756 | if (!module) | |
6ce13072 CLT |
1757 | { |
1758 | GOMP_PLUGIN_error ("Attempt to unload an image that has never been " | |
1759 | "loaded before"); | |
1760 | return false; | |
1761 | } | |
b2b40051 MJ |
1762 | |
1763 | remove_module_from_agent (agent, module); | |
6ce13072 CLT |
1764 | if (!destroy_module (module)) |
1765 | return false; | |
b2b40051 | 1766 | free (module); |
6ce13072 CLT |
1767 | if (!destroy_hsa_program (agent)) |
1768 | return false; | |
b2b40051 | 1769 | if (pthread_rwlock_unlock (&agent->modules_rwlock)) |
6ce13072 CLT |
1770 | { |
1771 | GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock"); | |
1772 | return false; | |
1773 | } | |
1774 | return true; | |
b2b40051 MJ |
1775 | } |
1776 | ||
1777 | /* Part of the libgomp plugin interface. Deinitialize all information and | |
1778 | status associated with agent number N. We do not attempt any | |
1779 | synchronization, assuming the user and libgomp will not attempt | |
1780 | deinitialization of a device that is in any way being used at the same | |
6ce13072 | 1781 | time. Return TRUE on success. */ |
b2b40051 | 1782 | |
6ce13072 | 1783 | bool |
b2b40051 MJ |
1784 | GOMP_OFFLOAD_fini_device (int n) |
1785 | { | |
1786 | struct agent_info *agent = get_agent_info (n); | |
6ce13072 CLT |
1787 | if (!agent) |
1788 | return false; | |
1789 | ||
b2b40051 | 1790 | if (!agent->initialized) |
6ce13072 | 1791 | return true; |
b2b40051 MJ |
1792 | |
1793 | struct module_info *next_module = agent->first_module; | |
1794 | while (next_module) | |
1795 | { | |
1796 | struct module_info *module = next_module; | |
1797 | next_module = module->next; | |
6ce13072 CLT |
1798 | if (!destroy_module (module)) |
1799 | return false; | |
b2b40051 MJ |
1800 | free (module); |
1801 | } | |
1802 | agent->first_module = NULL; | |
6ce13072 CLT |
1803 | if (!destroy_hsa_program (agent)) |
1804 | return false; | |
b2b40051 MJ |
1805 | |
1806 | release_agent_shared_libraries (agent); | |
1807 | ||
b8d89b03 | 1808 | hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q); |
b2b40051 | 1809 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 1810 | return hsa_error ("Error destroying command queue", status); |
b8d89b03 | 1811 | status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q); |
b2b40051 | 1812 | if (status != HSA_STATUS_SUCCESS) |
6ce13072 | 1813 | return hsa_error ("Error destroying kernel dispatch command queue", status); |
b2b40051 | 1814 | if (pthread_mutex_destroy (&agent->prog_mutex)) |
6ce13072 CLT |
1815 | { |
1816 | GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex"); | |
1817 | return false; | |
1818 | } | |
b2b40051 | 1819 | if (pthread_rwlock_destroy (&agent->modules_rwlock)) |
6ce13072 CLT |
1820 | { |
1821 | GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock"); | |
1822 | return false; | |
1823 | } | |
b2b40051 | 1824 | agent->initialized = false; |
6ce13072 | 1825 | return true; |
b2b40051 MJ |
1826 | } |
1827 | ||
1828 | /* Part of the libgomp plugin interface. Not implemented as it is not required | |
1829 | for HSA. */ | |
1830 | ||
1831 | void * | |
1832 | GOMP_OFFLOAD_alloc (int ord, size_t size) | |
1833 | { | |
6ce13072 | 1834 | GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because " |
b2b40051 | 1835 | "it should never be called"); |
6ce13072 | 1836 | return NULL; |
b2b40051 MJ |
1837 | } |
1838 | ||
1839 | /* Part of the libgomp plugin interface. Not implemented as it is not required | |
1840 | for HSA. */ | |
1841 | ||
6ce13072 | 1842 | bool |
b2b40051 MJ |
1843 | GOMP_OFFLOAD_free (int ord, void *ptr) |
1844 | { | |
6ce13072 | 1845 | GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because " |
b2b40051 | 1846 | "it should never be called"); |
6ce13072 | 1847 | return false; |
b2b40051 MJ |
1848 | } |
1849 | ||
1850 | /* Part of the libgomp plugin interface. Not implemented as it is not required | |
1851 | for HSA. */ | |
1852 | ||
6ce13072 | 1853 | bool |
b2b40051 MJ |
1854 | GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) |
1855 | { | |
6ce13072 | 1856 | GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because " |
b2b40051 | 1857 | "it should never be called"); |
6ce13072 | 1858 | return false; |
b2b40051 MJ |
1859 | } |
1860 | ||
1861 | /* Part of the libgomp plugin interface. Not implemented as it is not required | |
1862 | for HSA. */ | |
1863 | ||
6ce13072 | 1864 | bool |
b2b40051 MJ |
1865 | GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n) |
1866 | { | |
6ce13072 | 1867 | GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because " |
b2b40051 | 1868 | "it should never be called"); |
6ce13072 | 1869 | return false; |
b2b40051 MJ |
1870 | } |
1871 | ||
1872 | /* Part of the libgomp plugin interface. Not implemented as it is not required | |
1873 | for HSA. */ | |
1874 | ||
6ce13072 | 1875 | bool |
b2b40051 MJ |
1876 | GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) |
1877 | { | |
6ce13072 | 1878 | GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because " |
b2b40051 | 1879 | "it should never be called"); |
6ce13072 | 1880 | return false; |
b2b40051 | 1881 | } |