1 /* Run a stand-alone AMD GCN kernel.
3 Copyright 2017 Mentor Graphics Corporation
4 Copyright (C) 2018-2021 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* This program will run a compiled stand-alone GCN kernel on a GPU.
21 The kernel entry point's signature must use a standard main signature:
23 int main(int argc, char **argv)
39 #ifndef HSA_RUNTIME_LIB
40 #define HSA_RUNTIME_LIB "libhsa-runtime64.so.1"
43 #ifndef VERSION_STRING
44 #define VERSION_STRING "(version unknown)"
49 hsa_agent_t device
= { 0 };
50 hsa_queue_t
*queue
= NULL
;
51 uint64_t init_array_kernel
= 0;
52 uint64_t fini_array_kernel
= 0;
53 uint64_t main_kernel
= 0;
54 hsa_executable_t executable
= { 0 };
56 hsa_region_t kernargs_region
= { 0 };
57 hsa_region_t heap_region
= { 0 };
58 uint32_t kernarg_segment_size
= 0;
59 uint32_t group_segment_size
= 0;
60 uint32_t private_segment_size
= 0;
63 usage (const char *progname
)
65 printf ("Usage: %s [options] kernel [kernel-args]\n\n"
69 " --debug\n", progname
);
73 version (const char *progname
)
75 printf ("%s " VERSION_STRING
"\n", progname
);
78 /* As an HSA runtime is dlopened, following structure defines the necessary
80 Code adapted from libgomp. */
82 struct hsa_runtime_fn_info
85 hsa_status_t (*hsa_status_string_fn
) (hsa_status_t status
,
86 const char **status_string
);
87 hsa_status_t (*hsa_agent_get_info_fn
) (hsa_agent_t agent
,
88 hsa_agent_info_t attribute
,
90 hsa_status_t (*hsa_init_fn
) (void);
91 hsa_status_t (*hsa_iterate_agents_fn
)
92 (hsa_status_t (*callback
) (hsa_agent_t agent
, void *data
), void *data
);
93 hsa_status_t (*hsa_region_get_info_fn
) (hsa_region_t region
,
94 hsa_region_info_t attribute
,
96 hsa_status_t (*hsa_queue_create_fn
)
97 (hsa_agent_t agent
, uint32_t size
, hsa_queue_type_t type
,
98 void (*callback
) (hsa_status_t status
, hsa_queue_t
*source
, void *data
),
99 void *data
, uint32_t private_segment_size
,
100 uint32_t group_segment_size
, hsa_queue_t
**queue
);
101 hsa_status_t (*hsa_agent_iterate_regions_fn
)
103 hsa_status_t (*callback
) (hsa_region_t region
, void *data
), void *data
);
104 hsa_status_t (*hsa_executable_destroy_fn
) (hsa_executable_t executable
);
105 hsa_status_t (*hsa_executable_create_fn
)
106 (hsa_profile_t profile
, hsa_executable_state_t executable_state
,
107 const char *options
, hsa_executable_t
*executable
);
108 hsa_status_t (*hsa_executable_global_variable_define_fn
)
109 (hsa_executable_t executable
, const char *variable_name
, void *address
);
110 hsa_status_t (*hsa_executable_load_code_object_fn
)
111 (hsa_executable_t executable
, hsa_agent_t agent
,
112 hsa_code_object_t code_object
, const char *options
);
113 hsa_status_t (*hsa_executable_freeze_fn
) (hsa_executable_t executable
,
114 const char *options
);
115 hsa_status_t (*hsa_signal_create_fn
) (hsa_signal_value_t initial_value
,
116 uint32_t num_consumers
,
117 const hsa_agent_t
*consumers
,
118 hsa_signal_t
*signal
);
119 hsa_status_t (*hsa_memory_allocate_fn
) (hsa_region_t region
, size_t size
,
121 hsa_status_t (*hsa_memory_assign_agent_fn
) (void *ptr
, hsa_agent_t agent
,
122 hsa_access_permission_t access
);
123 hsa_status_t (*hsa_memory_copy_fn
) (void *dst
, const void *src
,
125 hsa_status_t (*hsa_memory_free_fn
) (void *ptr
);
126 hsa_status_t (*hsa_signal_destroy_fn
) (hsa_signal_t signal
);
127 hsa_status_t (*hsa_executable_get_symbol_fn
)
128 (hsa_executable_t executable
, const char *module_name
,
129 const char *symbol_name
, hsa_agent_t agent
, int32_t call_convention
,
130 hsa_executable_symbol_t
*symbol
);
131 hsa_status_t (*hsa_executable_symbol_get_info_fn
)
132 (hsa_executable_symbol_t executable_symbol
,
133 hsa_executable_symbol_info_t attribute
, void *value
);
134 void (*hsa_signal_store_relaxed_fn
) (hsa_signal_t signal
,
135 hsa_signal_value_t value
);
136 hsa_signal_value_t (*hsa_signal_wait_acquire_fn
)
137 (hsa_signal_t signal
, hsa_signal_condition_t condition
,
138 hsa_signal_value_t compare_value
, uint64_t timeout_hint
,
139 hsa_wait_state_t wait_state_hint
);
140 hsa_signal_value_t (*hsa_signal_wait_relaxed_fn
)
141 (hsa_signal_t signal
, hsa_signal_condition_t condition
,
142 hsa_signal_value_t compare_value
, uint64_t timeout_hint
,
143 hsa_wait_state_t wait_state_hint
);
144 hsa_status_t (*hsa_queue_destroy_fn
) (hsa_queue_t
*queue
);
145 hsa_status_t (*hsa_code_object_deserialize_fn
)
146 (void *serialized_code_object
, size_t serialized_code_object_size
,
147 const char *options
, hsa_code_object_t
*code_object
);
148 uint64_t (*hsa_queue_load_write_index_relaxed_fn
)
149 (const hsa_queue_t
*queue
);
150 void (*hsa_queue_store_write_index_relaxed_fn
)
151 (const hsa_queue_t
*queue
, uint64_t value
);
152 hsa_status_t (*hsa_shut_down_fn
) ();
155 /* HSA runtime functions that are initialized in init_hsa_context.
156 Code adapted from libgomp. */
158 static struct hsa_runtime_fn_info hsa_fns
;
160 #define DLSYM_FN(function) \
161 *(void**)(&hsa_fns.function##_fn) = dlsym (handle, #function); \
162 if (hsa_fns.function##_fn == NULL) \
166 init_hsa_runtime_functions (void)
168 void *handle
= dlopen (HSA_RUNTIME_LIB
, RTLD_LAZY
);
172 "The HSA runtime is required to run GCN kernels on hardware.\n"
173 "%s: File not found or could not be opened\n",
178 DLSYM_FN (hsa_status_string
)
179 DLSYM_FN (hsa_agent_get_info
)
181 DLSYM_FN (hsa_iterate_agents
)
182 DLSYM_FN (hsa_region_get_info
)
183 DLSYM_FN (hsa_queue_create
)
184 DLSYM_FN (hsa_agent_iterate_regions
)
185 DLSYM_FN (hsa_executable_destroy
)
186 DLSYM_FN (hsa_executable_create
)
187 DLSYM_FN (hsa_executable_global_variable_define
)
188 DLSYM_FN (hsa_executable_load_code_object
)
189 DLSYM_FN (hsa_executable_freeze
)
190 DLSYM_FN (hsa_signal_create
)
191 DLSYM_FN (hsa_memory_allocate
)
192 DLSYM_FN (hsa_memory_assign_agent
)
193 DLSYM_FN (hsa_memory_copy
)
194 DLSYM_FN (hsa_memory_free
)
195 DLSYM_FN (hsa_signal_destroy
)
196 DLSYM_FN (hsa_executable_get_symbol
)
197 DLSYM_FN (hsa_executable_symbol_get_info
)
198 DLSYM_FN (hsa_signal_wait_acquire
)
199 DLSYM_FN (hsa_signal_wait_relaxed
)
200 DLSYM_FN (hsa_signal_store_relaxed
)
201 DLSYM_FN (hsa_queue_destroy
)
202 DLSYM_FN (hsa_code_object_deserialize
)
203 DLSYM_FN (hsa_queue_load_write_index_relaxed
)
204 DLSYM_FN (hsa_queue_store_write_index_relaxed
)
205 DLSYM_FN (hsa_shut_down
)
210 fprintf (stderr
, "Failed to find HSA functions in " HSA_RUNTIME_LIB
"\n");
216 /* Report a fatal error STR together with the HSA error corresponding to
217 STATUS and terminate execution of the current process. */
220 hsa_fatal (const char *str
, hsa_status_t status
)
222 const char *hsa_error_msg
;
223 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
224 fprintf (stderr
, "%s: FAILED\nHSA Runtime message: %s\n", str
,
229 /* Helper macros to ensure we check the return values from the HSA Runtime.
230 These just keep the rest of the code a bit cleaner. */
232 #define XHSA_CMP(FN, CMP, MSG) \
234 hsa_status_t status = (FN); \
236 hsa_fatal ((MSG), status); \
238 fprintf (stderr, "%s: OK\n", (MSG)); \
240 #define XHSA(FN, MSG) XHSA_CMP(FN, status == HSA_STATUS_SUCCESS, MSG)
242 /* Callback of hsa_iterate_agents.
243 Called once for each available device, and returns "break" when a
244 suitable one has been found. */
247 get_gpu_agent (hsa_agent_t agent
, void *data
__attribute__ ((unused
)))
249 hsa_device_type_t device_type
;
250 XHSA (hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_DEVICE
,
254 /* Select only GPU devices. */
255 /* TODO: support selecting from multiple GPUs. */
256 if (HSA_DEVICE_TYPE_GPU
== device_type
)
259 return HSA_STATUS_INFO_BREAK
;
262 /* The device was not suitable. */
263 return HSA_STATUS_SUCCESS
;
266 /* Callback of hsa_iterate_regions.
267 Called once for each available memory region, and returns "break" when a
268 suitable one has been found. */
271 get_memory_region (hsa_region_t region
, hsa_region_t
*retval
,
272 hsa_region_global_flag_t kind
)
274 /* Reject non-global regions. */
275 hsa_region_segment_t segment
;
276 hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_SEGMENT
, &segment
);
277 if (HSA_REGION_SEGMENT_GLOBAL
!= segment
)
278 return HSA_STATUS_SUCCESS
;
280 /* Find a region with the KERNARG flag set. */
281 hsa_region_global_flag_t flags
;
282 hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_GLOBAL_FLAGS
,
287 return HSA_STATUS_INFO_BREAK
;
290 /* The region was not suitable. */
291 return HSA_STATUS_SUCCESS
;
295 get_kernarg_region (hsa_region_t region
, void *data
__attribute__((unused
)))
297 return get_memory_region (region
, &kernargs_region
,
298 HSA_REGION_GLOBAL_FLAG_KERNARG
);
302 get_heap_region (hsa_region_t region
, void *data
__attribute__((unused
)))
304 return get_memory_region (region
, &heap_region
,
305 HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED
);
308 /* Initialize the HSA Runtime library and GPU device. */
313 /* Load the shared library and find the API functions. */
314 init_hsa_runtime_functions ();
316 /* Initialize the HSA Runtime. */
317 XHSA (hsa_fns
.hsa_init_fn (),
318 "Initialize run-time");
320 /* Select a suitable device.
321 The call-back function, get_gpu_agent, does the selection. */
322 XHSA_CMP (hsa_fns
.hsa_iterate_agents_fn (get_gpu_agent
, NULL
),
323 status
== HSA_STATUS_SUCCESS
|| status
== HSA_STATUS_INFO_BREAK
,
326 /* Initialize the queue used for launching kernels. */
327 uint32_t queue_size
= 0;
328 XHSA (hsa_fns
.hsa_agent_get_info_fn (device
, HSA_AGENT_INFO_QUEUE_MAX_SIZE
,
330 "Find max queue size");
331 XHSA (hsa_fns
.hsa_queue_create_fn (device
, queue_size
,
332 HSA_QUEUE_TYPE_SINGLE
, NULL
,
333 NULL
, UINT32_MAX
, UINT32_MAX
, &queue
),
334 "Set up a device queue");
336 /* Select a memory region for the kernel arguments.
337 The call-back function, get_kernarg_region, does the selection. */
338 XHSA_CMP (hsa_fns
.hsa_agent_iterate_regions_fn (device
, get_kernarg_region
,
340 status
== HSA_STATUS_SUCCESS
|| status
== HSA_STATUS_INFO_BREAK
,
341 "Locate kernargs memory");
343 /* Select a memory region for the kernel heap.
344 The call-back function, get_heap_region, does the selection. */
345 XHSA_CMP (hsa_fns
.hsa_agent_iterate_regions_fn (device
, get_heap_region
,
347 status
== HSA_STATUS_SUCCESS
|| status
== HSA_STATUS_INFO_BREAK
,
348 "Locate device memory");
352 /* Read a whole input file.
353 Code copied from mkoffload. */
356 read_file (const char *filename
, size_t *plen
)
358 size_t alloc
= 16384;
362 FILE *stream
= fopen (filename
, "rb");
369 if (!fseek (stream
, 0, SEEK_END
))
371 /* Get the file size. */
372 long s
= ftell (stream
);
375 fseek (stream
, 0, SEEK_SET
);
377 buffer
= malloc (alloc
);
381 size_t n
= fread (buffer
+ base
, 1, alloc
- base
- 1, stream
);
386 if (base
+ 1 == alloc
)
389 buffer
= realloc (buffer
, alloc
);
400 /* Read a HSA Code Object (HSACO) from file, and load it into the device. */
403 load_image (const char *filename
)
406 Elf64_Ehdr
*image
= (void *) read_file (filename
, &image_size
);
408 /* An "executable" consists of one or more code objects. */
409 XHSA (hsa_fns
.hsa_executable_create_fn (HSA_PROFILE_FULL
,
410 HSA_EXECUTABLE_STATE_UNFROZEN
, "",
412 "Initialize GCN executable");
414 /* Add the HSACO to the executable. */
415 hsa_code_object_t co
= { 0 };
416 XHSA (hsa_fns
.hsa_code_object_deserialize_fn (image
, image_size
, NULL
, &co
),
417 "Deserialize GCN code object");
418 XHSA (hsa_fns
.hsa_executable_load_code_object_fn (executable
, device
, co
,
420 "Load GCN code object");
422 /* We're done modifying he executable. */
423 XHSA (hsa_fns
.hsa_executable_freeze_fn (executable
, ""),
424 "Freeze GCN executable");
426 /* Locate the "_init_array" function, and read the kernel's properties. */
427 hsa_executable_symbol_t symbol
;
428 XHSA (hsa_fns
.hsa_executable_get_symbol_fn (executable
, NULL
,
429 "_init_array.kd", device
, 0,
431 "Find '_init_array' function");
432 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
433 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
,
435 "Extract '_init_array' kernel object kernel object");
437 /* Locate the "_fini_array" function, and read the kernel's properties. */
438 XHSA (hsa_fns
.hsa_executable_get_symbol_fn (executable
, NULL
,
439 "_fini_array.kd", device
, 0,
441 "Find '_fini_array' function");
442 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
443 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
,
445 "Extract '_fini_array' kernel object kernel object");
447 /* Locate the "main" function, and read the kernel's properties. */
448 XHSA (hsa_fns
.hsa_executable_get_symbol_fn (executable
, NULL
, "main.kd",
450 "Find 'main' function");
451 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
452 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
, &main_kernel
),
453 "Extract 'main' kernel object");
454 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
455 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE
,
456 &kernarg_segment_size
),
457 "Extract kernarg segment size");
458 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
459 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE
,
460 &group_segment_size
),
461 "Extract group segment size");
462 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
463 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE
,
464 &private_segment_size
),
465 "Extract private segment size");
468 /* Allocate some device memory from the kernargs region.
469 The returned address will be 32-bit (with excess zeroed on 64-bit host),
470 and accessible via the same address on both host and target (via
471 __flat_scalar GCN address space). */
474 device_malloc (size_t size
, hsa_region_t region
)
477 XHSA (hsa_fns
.hsa_memory_allocate_fn (region
, size
, &result
),
478 "Allocate device memory");
482 /* These are the device pointers that will be transferred to the target.
483 The HSA Runtime points the kernargs register here.
484 They correspond to function signature:
485 int main (int argc, char *argv[], int *return_value)
486 The compiler expects this, for kernel functions, and will
487 automatically assign the exit value to *return_value. */
500 unsigned int next_output
;
513 unsigned int consumed
;
523 /* Print any console output from the kernel.
524 We print all entries from "consumed" to the next entry without a "written"
525 flag, or "next_output" is reached. The buffer is circular, but the
526 indices are absolute. It is assumed the kernel will stop writing data
527 if "next_output" wraps (becomes smaller than "consumed"). */
529 gomp_print_output (struct kernargs
*kernargs
, bool final
)
531 unsigned int limit
= (sizeof (kernargs
->output_data
.queue
)
532 / sizeof (kernargs
->output_data
.queue
[0]));
534 unsigned int from
= __atomic_load_n (&kernargs
->output_data
.consumed
,
536 unsigned int to
= kernargs
->output_data
.next_output
;
542 printf ("GCN print buffer overflowed.\n");
547 for (i
= from
; i
< to
; i
++)
549 struct printf_data
*data
= &kernargs
->output_data
.queue
[i
%limit
];
551 if (!data
->written
&& !final
)
557 printf ("%.128s%ld\n", data
->msg
, data
->ivalue
);
560 printf ("%.128s%f\n", data
->msg
, data
->dvalue
);
563 printf ("%.128s%.128s\n", data
->msg
, data
->text
);
566 printf ("%.128s%.128s", data
->msg
, data
->text
);
569 printf ("GCN print buffer error!\n");
574 __atomic_store_n (&kernargs
->output_data
.consumed
, i
+1,
580 /* Execute an already-loaded kernel on the device. */
583 run (uint64_t kernel
, void *kernargs
)
585 /* A "signal" is used to launch and monitor the kernel. */
587 XHSA (hsa_fns
.hsa_signal_create_fn (1, 0, NULL
, &signal
),
590 /* Configure for a single-worker kernel. */
591 uint64_t index
= hsa_fns
.hsa_queue_load_write_index_relaxed_fn (queue
);
592 const uint32_t queueMask
= queue
->size
- 1;
593 hsa_kernel_dispatch_packet_t
*dispatch_packet
=
594 &(((hsa_kernel_dispatch_packet_t
*) (queue
->base_address
))[index
&
596 dispatch_packet
->setup
|= 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS
;
597 dispatch_packet
->workgroup_size_x
= (uint16_t) 1;
598 dispatch_packet
->workgroup_size_y
= (uint16_t) 64;
599 dispatch_packet
->workgroup_size_z
= (uint16_t) 1;
600 dispatch_packet
->grid_size_x
= 1;
601 dispatch_packet
->grid_size_y
= 64;
602 dispatch_packet
->grid_size_z
= 1;
603 dispatch_packet
->completion_signal
= signal
;
604 dispatch_packet
->kernel_object
= kernel
;
605 dispatch_packet
->kernarg_address
= (void *) kernargs
;
606 dispatch_packet
->private_segment_size
= private_segment_size
;
607 dispatch_packet
->group_segment_size
= group_segment_size
;
610 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE
;
611 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE
;
612 header
|= HSA_PACKET_TYPE_KERNEL_DISPATCH
<< HSA_PACKET_HEADER_TYPE
;
614 __atomic_store_n ((uint32_t *) dispatch_packet
,
615 header
| (dispatch_packet
->setup
<< 16),
619 fprintf (stderr
, "Launch kernel\n");
621 hsa_fns
.hsa_queue_store_write_index_relaxed_fn (queue
, index
+ 1);
622 hsa_fns
.hsa_signal_store_relaxed_fn (queue
->doorbell_signal
, index
);
623 /* Kernel running ...... */
624 while (hsa_fns
.hsa_signal_wait_relaxed_fn (signal
, HSA_SIGNAL_CONDITION_LT
,
626 HSA_WAIT_STATE_ACTIVE
) != 0)
629 gomp_print_output (kernargs
, false);
632 gomp_print_output (kernargs
, true);
635 fprintf (stderr
, "Kernel exited\n");
637 XHSA (hsa_fns
.hsa_signal_destroy_fn (signal
),
642 main (int argc
, char *argv
[])
645 for (int i
= 1; i
< argc
; i
++)
647 if (!strcmp (argv
[i
], "--help"))
652 else if (!strcmp (argv
[i
], "--version"))
657 else if (!strcmp (argv
[i
], "--debug"))
659 else if (argv
[i
][0] == '-')
673 /* No kernel arguments were found. */
678 /* The remaining arguments are for the GCN kernel. */
679 int kernel_argc
= argc
- kernel_arg
;
680 char **kernel_argv
= &argv
[kernel_arg
];
683 load_image (kernel_argv
[0]);
685 /* Calculate size of function parameters + argv data. */
686 size_t args_size
= 0;
687 for (int i
= 0; i
< kernel_argc
; i
++)
688 args_size
+= strlen (kernel_argv
[i
]) + 1;
690 /* Allocate device memory for both function parameters and the argv
692 struct kernargs
*kernargs
= device_malloc (sizeof (*kernargs
),
696 int64_t argv_data
[kernel_argc
];
697 char strings
[args_size
];
698 } *args
= device_malloc (sizeof (struct argdata
), kernargs_region
);
700 size_t heap_size
= 10 * 1024 * 1024; /* 10MB. */
701 struct heap
*heap
= device_malloc (heap_size
, heap_region
);
702 XHSA (hsa_fns
.hsa_memory_assign_agent_fn (heap
, device
,
703 HSA_ACCESS_PERMISSION_RW
),
704 "Assign heap to device agent");
706 /* Write the data to the target. */
707 kernargs
->argc
= kernel_argc
;
708 kernargs
->argv
= (int64_t) args
->argv_data
;
709 kernargs
->out_ptr
= (int64_t) &kernargs
->output_data
;
710 kernargs
->output_data
.return_value
= 0xcafe0000; /* Default return value. */
711 kernargs
->output_data
.next_output
= 0;
712 for (unsigned i
= 0; i
< (sizeof (kernargs
->output_data
.queue
)
713 / sizeof (kernargs
->output_data
.queue
[0])); i
++)
714 kernargs
->output_data
.queue
[i
].written
= 0;
715 kernargs
->output_data
.consumed
= 0;
717 for (int i
= 0; i
< kernel_argc
; i
++)
719 size_t arg_len
= strlen (kernel_argv
[i
]) + 1;
720 args
->argv_data
[i
] = (int64_t) &args
->strings
[offset
];
721 memcpy (&args
->strings
[offset
], kernel_argv
[i
], arg_len
+ 1);
724 kernargs
->heap_ptr
= (int64_t) heap
;
725 hsa_fns
.hsa_memory_copy_fn (&heap
->size
, &heap_size
, sizeof (heap_size
));
727 /* Run constructors on the GPU. */
728 run (init_array_kernel
, kernargs
);
730 /* Run the kernel on the GPU. */
731 run (main_kernel
, kernargs
);
732 unsigned int return_value
=
733 (unsigned int) kernargs
->output_data
.return_value
;
735 /* Run destructors on the GPU. */
736 run (fini_array_kernel
, kernargs
);
738 unsigned int upper
= (return_value
& ~0xffff) >> 16;
741 printf ("Kernel exit value was never set\n");
744 else if (upper
== 0xffff)
747 ; /* Set by return from main. */
749 printf ("Possible kernel exit value corruption, 2 most significant bytes "
750 "aren't 0xffff, 0xcafe, or 0: 0x%x\n", return_value
);
754 unsigned int signal
= (return_value
>> 8) & 0xff;
755 if (signal
== SIGABRT
)
756 printf ("Kernel aborted\n");
757 else if (signal
!= 0)
758 printf ("Kernel received unkown signal\n");
762 printf ("Kernel exit value: %d\n", return_value
& 0xff);
764 /* Clean shut down. */
765 XHSA (hsa_fns
.hsa_memory_free_fn (kernargs
),
766 "Clean up device memory");
767 XHSA (hsa_fns
.hsa_executable_destroy_fn (executable
),
768 "Clean up GCN executable");
769 XHSA (hsa_fns
.hsa_queue_destroy_fn (queue
),
770 "Clean up device queue");
771 XHSA (hsa_fns
.hsa_shut_down_fn (),
772 "Shut down run-time");
774 return return_value
& 0xff;