]>
Commit | Line | Data |
---|---|---|
5f520819 | 1 | /* |
df26a50d | 2 | Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved. |
5f520819 KY |
3 | |
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | * Redistributions in binary form must reproduce the above copyright | |
11 | notice, this list of conditions and the following disclaimer in the | |
12 | documentation and/or other materials provided with the distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | */ | |
29 | ||
30 | ||
31 | /*! \file | |
32 | \brief The parts of the runtime library used only on the host | |
33 | */ | |
34 | ||
35 | #ifndef OFFLOAD_HOST_H_INCLUDED | |
36 | #define OFFLOAD_HOST_H_INCLUDED | |
37 | ||
38 | #ifndef TARGET_WINNT | |
39 | #include <unistd.h> | |
40 | #endif // TARGET_WINNT | |
41 | #include "offload_common.h" | |
42 | #include "offload_util.h" | |
43 | #include "offload_engine.h" | |
44 | #include "offload_env.h" | |
45 | #include "offload_orsl.h" | |
46 | #include "coi/coi_client.h" | |
47 | ||
48 | // MIC engines. | |
2eab9666 IV |
49 | DLL_LOCAL extern Engine* mic_engines; |
50 | DLL_LOCAL extern uint32_t mic_engines_total; | |
51 | ||
52 | // DMA channel count used by COI and set via | |
53 | // OFFLOAD_DMA_CHANNEL_COUNT environment variable | |
54 | DLL_LOCAL extern uint32_t mic_dma_channel_count; | |
5f520819 KY |
55 | |
56 | //! The target image is packed as follows. | |
57 | /*! 1. 8 bytes containing the size of the target binary */ | |
58 | /*! 2. a null-terminated string which is the binary name */ | |
59 | /*! 3. <size> number of bytes that are the contents of the image */ | |
60 | /*! The address of symbol __offload_target_image | |
61 | is the address of this structure. */ | |
62 | struct Image { | |
63 | int64_t size; //!< Size in bytes of the target binary name and contents | |
64 | char data[]; //!< The name and contents of the target image | |
65 | }; | |
66 | ||
67 | // The offload descriptor. | |
68 | class OffloadDescriptor | |
69 | { | |
70 | public: | |
2eab9666 IV |
71 | enum OmpAsyncLastEventType { |
72 | c_last_not, // not last event | |
73 | c_last_write, // the last event that is write | |
74 | c_last_read, // the last event that is read | |
75 | c_last_runfunc // the last event that is runfunction | |
76 | }; | |
77 | ||
5f520819 KY |
78 | OffloadDescriptor( |
79 | int index, | |
80 | _Offload_status *status, | |
81 | bool is_mandatory, | |
82 | bool is_openmp, | |
83 | OffloadHostTimerData * timer_data | |
84 | ) : | |
2eab9666 | 85 | m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]), |
5f520819 KY |
86 | m_is_mandatory(is_mandatory), |
87 | m_is_openmp(is_openmp), | |
88 | m_inout_buf(0), | |
89 | m_func_desc(0), | |
90 | m_func_desc_size(0), | |
df26a50d IV |
91 | m_num_in_dependencies(0), |
92 | m_p_in_dependencies(0), | |
5f520819 KY |
93 | m_in_deps(0), |
94 | m_in_deps_total(0), | |
2eab9666 | 95 | m_in_deps_allocated(0), |
5f520819 KY |
96 | m_out_deps(0), |
97 | m_out_deps_total(0), | |
2eab9666 | 98 | m_out_deps_allocated(0), |
5f520819 KY |
99 | m_vars(0), |
100 | m_vars_extra(0), | |
101 | m_status(status), | |
2eab9666 IV |
102 | m_timer_data(timer_data), |
103 | m_out_with_preallocated(false), | |
104 | m_preallocated_alloc(false), | |
105 | m_traceback_called(false), | |
106 | m_stream(-1), | |
df26a50d IV |
107 | m_signal(0), |
108 | m_has_signal(0), | |
2eab9666 IV |
109 | m_omp_async_last_event_type(c_last_not) |
110 | { | |
111 | m_wait_all_devices = index == -1; | |
112 | } | |
5f520819 KY |
113 | |
114 | ~OffloadDescriptor() | |
115 | { | |
116 | if (m_in_deps != 0) { | |
117 | free(m_in_deps); | |
118 | } | |
119 | if (m_out_deps != 0) { | |
120 | free(m_out_deps); | |
121 | } | |
122 | if (m_func_desc != 0) { | |
123 | free(m_func_desc); | |
124 | } | |
125 | if (m_vars != 0) { | |
126 | free(m_vars); | |
127 | free(m_vars_extra); | |
128 | } | |
129 | } | |
130 | ||
131 | bool offload(const char *name, bool is_empty, | |
132 | VarDesc *vars, VarDesc2 *vars2, int vars_total, | |
133 | const void **waits, int num_waits, const void **signal, | |
2eab9666 IV |
134 | int entry_id, const void *stack_addr, |
135 | OffloadFlags offload_flags); | |
136 | ||
137 | bool offload_finish(bool is_traceback); | |
5f520819 KY |
138 | |
139 | bool is_signaled(); | |
140 | ||
141 | OffloadHostTimerData* get_timer_data() const { | |
142 | return m_timer_data; | |
143 | } | |
144 | ||
2eab9666 IV |
145 | void set_stream(_Offload_stream stream) { |
146 | m_stream = stream; | |
147 | } | |
148 | ||
149 | _Offload_stream get_stream() { | |
150 | return(m_stream); | |
151 | } | |
152 | ||
df26a50d IV |
153 | Engine& get_device() { |
154 | return m_device; | |
155 | } | |
156 | ||
157 | void* get_signal() { | |
158 | return(m_signal); | |
159 | } | |
160 | ||
161 | void set_signal(const void* signal) { | |
162 | m_has_signal = 1; | |
163 | m_signal = const_cast<void*>(signal); | |
164 | } | |
165 | ||
166 | void cleanup(); | |
167 | ||
168 | uint32_t m_event_count; | |
169 | bool m_has_signal; | |
170 | ||
5f520819 | 171 | private: |
2eab9666 IV |
172 | bool offload_wrap(const char *name, bool is_empty, |
173 | VarDesc *vars, VarDesc2 *vars2, int vars_total, | |
174 | const void **waits, int num_waits, const void **signal, | |
175 | int entry_id, const void *stack_addr, | |
176 | OffloadFlags offload_flags); | |
177 | bool wait_dependencies(const void **waits, int num_waits, | |
178 | _Offload_stream stream); | |
5f520819 KY |
179 | bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total, |
180 | int entry_id, const void *stack_addr); | |
181 | bool setup_misc_data(const char *name); | |
2eab9666 | 182 | bool send_pointer_data(bool is_async, void* info); |
5f520819 KY |
183 | bool send_noncontiguous_pointer_data( |
184 | int i, | |
185 | PtrData* src_buf, | |
186 | PtrData* dst_buf, | |
2eab9666 IV |
187 | COIEVENT *event, |
188 | uint64_t &sent_data, | |
189 | uint32_t in_deps_amount, | |
190 | COIEVENT *in_deps | |
191 | ); | |
192 | bool receive_noncontiguous_pointer_data( | |
5f520819 | 193 | int i, |
5f520819 | 194 | COIBUFFER dst_buf, |
2eab9666 IV |
195 | COIEVENT *event, |
196 | uint64_t &received_data, | |
197 | uint32_t in_deps_amount, | |
198 | COIEVENT *in_deps | |
199 | ); | |
5f520819 KY |
200 | |
201 | bool gather_copyin_data(); | |
202 | ||
2eab9666 | 203 | bool compute(void *); |
5f520819 | 204 | |
2eab9666 | 205 | bool receive_pointer_data(bool is_async, bool first_run, void * info); |
5f520819 KY |
206 | bool scatter_copyout_data(); |
207 | ||
5f520819 | 208 | bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, |
2eab9666 IV |
209 | int64_t length, bool is_targptr, |
210 | bool error_does_not_exist = true); | |
df26a50d IV |
211 | |
212 | void find_device_ptr( int64_t* &device_ptr, | |
213 | void *host_ptr); | |
214 | ||
5f520819 | 215 | bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, |
2eab9666 IV |
216 | int64_t length, int64_t alloc_disp, int align, |
217 | bool is_targptr, bool is_prealloc, bool pin); | |
218 | bool create_preallocated_buffer(PtrData* ptr_data, void *base); | |
5f520819 KY |
219 | bool init_static_ptr_data(PtrData *ptr_data); |
220 | bool init_mic_address(PtrData *ptr_data); | |
df26a50d IV |
221 | bool offload_stack_memory_manager( |
222 | const void * stack_begin, | |
223 | int routine_id, | |
224 | int buf_size, | |
225 | int align, | |
226 | bool thread_specific_function_locals, | |
227 | bool *is_new); | |
228 | char *get_this_threads_cpu_stack_addr( | |
229 | const void * stack_begin, | |
230 | int routine_id, | |
231 | bool thread_specific_function_locals); | |
232 | PtrData *get_this_threads_mic_stack_addr( | |
233 | const void * stack_begin, | |
234 | int routine_id, | |
235 | bool thread_specific_function_locals); | |
5f520819 KY |
236 | bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size); |
237 | ||
238 | bool gen_var_descs_for_pointer_array(int i); | |
239 | ||
2eab9666 IV |
240 | void get_stream_in_dependencies(uint32_t &in_deps_amount, |
241 | COIEVENT* &in_deps); | |
242 | ||
5f520819 KY |
243 | void report_coi_error(error_types msg, COIRESULT res); |
244 | _Offload_result translate_coi_error(COIRESULT res) const; | |
df26a50d | 245 | |
2eab9666 | 246 | void setup_omp_async_info(); |
df26a50d IV |
247 | |
248 | void setup_use_device_ptr(int i); | |
249 | ||
250 | void register_event_call_back(void (*)( | |
251 | COIEVENT, | |
252 | const COIRESULT, | |
253 | const void*), | |
254 | const COIEVENT *event, | |
255 | const void *info); | |
256 | ||
2eab9666 | 257 | void register_omp_event_call_back(const COIEVENT *event, const void *info); |
df26a50d | 258 | |
5f520819 KY |
259 | private: |
260 | typedef std::list<COIBUFFER> BufferList; | |
261 | ||
262 | // extra data associated with each variable descriptor | |
263 | struct VarExtra { | |
264 | PtrData* src_data; | |
265 | PtrData* dst_data; | |
266 | AutoData* auto_data; | |
267 | int64_t cpu_disp; | |
268 | int64_t cpu_offset; | |
2eab9666 | 269 | void *alloc; |
df26a50d IV |
270 | union { |
271 | CeanReadRanges *read_rng_src; | |
272 | NonContigDesc *noncont_desc; | |
273 | }; | |
5f520819 KY |
274 | CeanReadRanges *read_rng_dst; |
275 | int64_t ptr_arr_offset; | |
276 | bool is_arr_ptr_el; | |
2eab9666 | 277 | OmpAsyncLastEventType omp_last_event_type; |
df26a50d IV |
278 | int64_t pointer_offset; |
279 | uint16_t type_src; | |
280 | uint16_t type_dst; | |
5f520819 KY |
281 | }; |
282 | ||
283 | template<typename T> class ReadArrElements { | |
284 | public: | |
285 | ReadArrElements(): | |
286 | ranges(NULL), | |
287 | el_size(sizeof(T)), | |
288 | offset(0), | |
289 | count(0), | |
290 | is_empty(true), | |
291 | base(NULL) | |
292 | {} | |
293 | ||
294 | bool read_next(bool flag) | |
295 | { | |
296 | if (flag != 0) { | |
297 | if (is_empty) { | |
298 | if (ranges) { | |
299 | if (!get_next_range(ranges, &offset)) { | |
300 | // ranges are over | |
301 | return false; | |
302 | } | |
303 | } | |
304 | // all contiguous elements are over | |
305 | else if (count != 0) { | |
306 | return false; | |
307 | } | |
308 | ||
309 | length_cur = size; | |
310 | } | |
311 | else { | |
312 | offset += el_size; | |
313 | } | |
314 | val = (T)get_el_value(base, offset, el_size); | |
315 | length_cur -= el_size; | |
316 | count++; | |
317 | is_empty = length_cur == 0; | |
318 | } | |
319 | return true; | |
320 | } | |
321 | public: | |
322 | CeanReadRanges * ranges; | |
323 | T val; | |
324 | int el_size; | |
325 | int64_t size, | |
326 | offset, | |
327 | length_cur; | |
328 | bool is_empty; | |
329 | int count; | |
330 | char *base; | |
331 | }; | |
332 | ||
333 | // ptr_data for persistent auto objects | |
334 | PtrData* m_stack_ptr_data; | |
335 | PtrDataList m_destroy_stack; | |
336 | ||
337 | // Engine | |
338 | Engine& m_device; | |
339 | ||
2eab9666 IV |
340 | // true for offload_wait target(mic) stream(0) |
341 | bool m_wait_all_devices; | |
342 | ||
5f520819 KY |
343 | // if true offload is mandatory |
344 | bool m_is_mandatory; | |
345 | ||
346 | // if true offload has openmp origin | |
347 | const bool m_is_openmp; | |
348 | ||
349 | // The Marshaller for the inputs of the offloaded region. | |
350 | Marshaller m_in; | |
351 | ||
352 | // The Marshaller for the outputs of the offloaded region. | |
353 | Marshaller m_out; | |
354 | ||
355 | // List of buffers that are passed to dispatch call | |
356 | BufferList m_compute_buffers; | |
357 | ||
358 | // List of buffers that need to be destroyed at the end of offload | |
359 | BufferList m_destroy_buffers; | |
360 | ||
361 | // Variable descriptors | |
362 | VarDesc* m_vars; | |
363 | VarExtra* m_vars_extra; | |
364 | int m_vars_total; | |
365 | ||
366 | // Pointer to a user-specified status variable | |
367 | _Offload_status *m_status; | |
368 | ||
369 | // Function descriptor | |
370 | FunctionDescriptor* m_func_desc; | |
371 | uint32_t m_func_desc_size; | |
372 | ||
373 | // Buffer for transferring copyin/copyout data | |
374 | COIBUFFER m_inout_buf; | |
375 | ||
df26a50d | 376 | |
5f520819 KY |
377 | // Dependencies |
378 | COIEVENT *m_in_deps; | |
379 | uint32_t m_in_deps_total; | |
2eab9666 | 380 | uint32_t m_in_deps_allocated; |
5f520819 KY |
381 | COIEVENT *m_out_deps; |
382 | uint32_t m_out_deps_total; | |
df26a50d IV |
383 | uint32_t m_out_deps_allocated; |
384 | ||
385 | // 2 variables defines input dependencies for current COI API. | |
386 | // The calls to routines as BufferWrite/PipelineRunFunction/BufferRead | |
387 | // is supposed to have input dependencies. | |
388 | // 2 variables below defines the number and vector of dependencies | |
389 | // in every current moment of offload. | |
390 | // So any phase of offload can use its values as input dependencies | |
391 | // for the COI API that the phase calls. | |
392 | // It means that all phases (of Write, RunFunction,Read) must keep | |
393 | // the variables correct to be used by following phase. | |
394 | // If some consequent offloads are connected (i.e. by the same stream) | |
395 | // the final 2 variables of the offload is used as initial inputs | |
396 | // for the next offload. | |
397 | uint32_t m_num_in_dependencies; | |
398 | COIEVENT *m_p_in_dependencies; | |
2eab9666 IV |
399 | |
400 | // Stream | |
401 | _Offload_stream m_stream; | |
5f520819 | 402 | |
df26a50d IV |
403 | // Signal |
404 | void* m_signal; | |
405 | ||
5f520819 KY |
406 | // Timer data |
407 | OffloadHostTimerData *m_timer_data; | |
408 | ||
409 | // copyin/copyout data length | |
410 | uint64_t m_in_datalen; | |
411 | uint64_t m_out_datalen; | |
412 | ||
413 | // a boolean value calculated in setup_descriptors. If true we need to do | |
414 | // a run function on the target. Otherwise it may be optimized away. | |
415 | bool m_need_runfunction; | |
2eab9666 IV |
416 | |
417 | // initialized value of m_need_runfunction; | |
418 | // is used to recognize offload_transfer | |
419 | bool m_initial_need_runfunction; | |
420 | ||
421 | // a Boolean value set to true when OUT clauses with preallocated targetptr | |
422 | // is encountered to indicate that call receive_pointer_data needs to be | |
423 | // invoked again after call to scatter_copyout_data. | |
424 | bool m_out_with_preallocated; | |
425 | ||
426 | // a Boolean value set to true if an alloc_if(1) is used with preallocated | |
427 | // targetptr to indicate the need to scatter_copyout_data even for | |
428 | // async offload | |
429 | bool m_preallocated_alloc; | |
430 | ||
431 | // a Boolean value set to true if traceback routine is called | |
432 | bool m_traceback_called; | |
433 | ||
434 | OmpAsyncLastEventType m_omp_async_last_event_type; | |
5f520819 KY |
435 | }; |
436 | ||
437 | // Initialization types for MIC | |
438 | enum OffloadInitType { | |
439 | c_init_on_start, // all devices before entering main | |
440 | c_init_on_offload, // single device before starting the first offload | |
441 | c_init_on_offload_all // all devices before starting the first offload | |
442 | }; | |
443 | ||
2eab9666 IV |
444 | // Determines if MIC code is an executable or a shared library |
445 | extern "C" bool __offload_target_image_is_executable(const void *target_image); | |
446 | ||
5f520819 | 447 | // Initializes library and registers specified offload image. |
2eab9666 | 448 | extern "C" bool __offload_register_image(const void* image); |
5f520819 KY |
449 | extern "C" void __offload_unregister_image(const void* image); |
450 | ||
e4606348 JJ |
451 | // Registers asynchronous task completion callback |
452 | extern "C" void __offload_register_task_callback(void (*cb)(void *)); | |
453 | ||
5f520819 | 454 | // Initializes offload runtime library. |
2eab9666 | 455 | DLL_LOCAL extern int __offload_init_library(void); |
5f520819 KY |
456 | |
457 | // thread data for associating pipelines with threads | |
2eab9666 IV |
458 | DLL_LOCAL extern pthread_key_t mic_thread_key; |
459 | ||
460 | // location of offload_main executable | |
461 | // To be used if the main application has no offload and is not built | |
462 | // with -offload but dynamic library linked in has offload pragma | |
463 | DLL_LOCAL extern char* mic_device_main; | |
5f520819 KY |
464 | |
465 | // Environment variables for devices | |
2eab9666 | 466 | DLL_LOCAL extern MicEnvVar mic_env_vars; |
5f520819 KY |
467 | |
468 | // CPU frequency | |
2eab9666 | 469 | DLL_LOCAL extern uint64_t cpu_frequency; |
5f520819 | 470 | |
df26a50d IV |
471 | // LD_LIBRARY_PATH for KNC libraries |
472 | DLL_LOCAL extern char* knc_library_path; | |
473 | ||
474 | // LD_LIBRARY_PATH for KNL libraries | |
475 | DLL_LOCAL extern char* knl_library_path; | |
5f520819 KY |
476 | |
477 | // stack size for target | |
2eab9666 | 478 | DLL_LOCAL extern uint32_t mic_stack_size; |
5f520819 KY |
479 | |
480 | // Preallocated memory size for buffers on MIC | |
2eab9666 IV |
481 | DLL_LOCAL extern uint64_t mic_buffer_size; |
482 | ||
483 | // Preallocated 4K page memory size for buffers on MIC | |
484 | DLL_LOCAL extern uint64_t mic_4k_buffer_size; | |
485 | ||
486 | // Preallocated 2M page memory size for buffers on MIC | |
487 | DLL_LOCAL extern uint64_t mic_2m_buffer_size; | |
5f520819 KY |
488 | |
489 | // Setting controlling inout proxy | |
2eab9666 IV |
490 | DLL_LOCAL extern bool mic_proxy_io; |
491 | DLL_LOCAL extern char* mic_proxy_fs_root; | |
5f520819 KY |
492 | |
493 | // Threshold for creating buffers with large pages | |
2eab9666 | 494 | DLL_LOCAL extern uint64_t __offload_use_2mb_buffers; |
5f520819 KY |
495 | |
496 | // offload initialization type | |
2eab9666 | 497 | DLL_LOCAL extern OffloadInitType __offload_init_type; |
5f520819 KY |
498 | |
499 | // Device number to offload to when device is not explicitly specified. | |
2eab9666 | 500 | DLL_LOCAL extern int __omp_device_num; |
5f520819 KY |
501 | |
502 | // target executable | |
2eab9666 | 503 | DLL_LOCAL extern TargetImage* __target_exe; |
5f520819 | 504 | |
df26a50d IV |
505 | // is true if last loaded image is dll |
506 | DLL_LOCAL extern bool __current_image_is_dll; | |
507 | // is true if myo library is loaded when dll is loaded | |
508 | DLL_LOCAL extern bool __myo_init_in_so; | |
509 | ||
5f520819 KY |
510 | // IDB support |
511 | ||
512 | // Called by the offload runtime after initialization of offload infrastructure | |
513 | // has been completed. | |
514 | extern "C" void __dbg_target_so_loaded(); | |
515 | ||
516 | // Called by the offload runtime when the offload infrastructure is about to be | |
517 | // shut down, currently at application exit. | |
518 | extern "C" void __dbg_target_so_unloaded(); | |
519 | ||
520 | // Null-terminated string containing path to the process image of the hosting | |
521 | // application (offload_main) | |
522 | #define MAX_TARGET_NAME 512 | |
523 | extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME]; | |
524 | ||
525 | // Integer specifying the process id | |
526 | extern "C" pid_t __dbg_target_so_pid; | |
527 | ||
528 | // Integer specifying the 0-based device number | |
529 | extern "C" int __dbg_target_id; | |
530 | ||
531 | // Set to non-zero by the host-side debugger to enable offload debugging | |
532 | // support | |
533 | extern "C" int __dbg_is_attached; | |
534 | ||
535 | // Major version of the debugger support API | |
536 | extern "C" const int __dbg_api_major_version; | |
537 | ||
538 | // Minor version of the debugger support API | |
539 | extern "C" const int __dbg_api_minor_version; | |
540 | ||
541 | #endif // OFFLOAD_HOST_H_INCLUDED |