]>
Commit | Line | Data |
---|---|---|
5f520819 | 1 | /* |
2eab9666 | 2 | Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved. |
5f520819 KY |
3 | |
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | * Redistributions in binary form must reproduce the above copyright | |
11 | notice, this list of conditions and the following disclaimer in the | |
12 | documentation and/or other materials provided with the distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | */ | |
29 | ||
30 | ||
31 | /*! \file | |
32 | \brief The parts of the runtime library used only on the host | |
33 | */ | |
34 | ||
35 | #ifndef OFFLOAD_HOST_H_INCLUDED | |
36 | #define OFFLOAD_HOST_H_INCLUDED | |
37 | ||
38 | #ifndef TARGET_WINNT | |
39 | #include <unistd.h> | |
40 | #endif // TARGET_WINNT | |
41 | #include "offload_common.h" | |
42 | #include "offload_util.h" | |
43 | #include "offload_engine.h" | |
44 | #include "offload_env.h" | |
45 | #include "offload_orsl.h" | |
46 | #include "coi/coi_client.h" | |
47 | ||
48 | // MIC engines. | |
2eab9666 IV |
49 | DLL_LOCAL extern Engine* mic_engines; |
50 | DLL_LOCAL extern uint32_t mic_engines_total; | |
51 | ||
52 | // DMA channel count used by COI and set via | |
53 | // OFFLOAD_DMA_CHANNEL_COUNT environment variable | |
54 | DLL_LOCAL extern uint32_t mic_dma_channel_count; | |
5f520819 KY |
55 | |
56 | //! The target image is packed as follows. | |
57 | /*! 1. 8 bytes containing the size of the target binary */ | |
58 | /*! 2. a null-terminated string which is the binary name */ | |
59 | /*! 3. <size> number of bytes that are the contents of the image */ | |
60 | /*! The address of symbol __offload_target_image | |
61 | is the address of this structure. */ | |
62 | struct Image { | |
63 | int64_t size; //!< Size in bytes of the target binary name and contents | |
64 | char data[]; //!< The name and contents of the target image | |
65 | }; | |
66 | ||
67 | // The offload descriptor. | |
68 | class OffloadDescriptor | |
69 | { | |
70 | public: | |
2eab9666 IV |
71 | enum OmpAsyncLastEventType { |
72 | c_last_not, // not last event | |
73 | c_last_write, // the last event that is write | |
74 | c_last_read, // the last event that is read | |
75 | c_last_runfunc // the last event that is runfunction | |
76 | }; | |
77 | ||
5f520819 KY |
78 | OffloadDescriptor( |
79 | int index, | |
80 | _Offload_status *status, | |
81 | bool is_mandatory, | |
82 | bool is_openmp, | |
83 | OffloadHostTimerData * timer_data | |
84 | ) : | |
2eab9666 | 85 | m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]), |
5f520819 KY |
86 | m_is_mandatory(is_mandatory), |
87 | m_is_openmp(is_openmp), | |
88 | m_inout_buf(0), | |
89 | m_func_desc(0), | |
90 | m_func_desc_size(0), | |
91 | m_in_deps(0), | |
92 | m_in_deps_total(0), | |
2eab9666 | 93 | m_in_deps_allocated(0), |
5f520819 KY |
94 | m_out_deps(0), |
95 | m_out_deps_total(0), | |
2eab9666 | 96 | m_out_deps_allocated(0), |
5f520819 KY |
97 | m_vars(0), |
98 | m_vars_extra(0), | |
99 | m_status(status), | |
2eab9666 IV |
100 | m_timer_data(timer_data), |
101 | m_out_with_preallocated(false), | |
102 | m_preallocated_alloc(false), | |
103 | m_traceback_called(false), | |
104 | m_stream(-1), | |
105 | m_omp_async_last_event_type(c_last_not) | |
106 | { | |
107 | m_wait_all_devices = index == -1; | |
108 | } | |
5f520819 KY |
109 | |
110 | ~OffloadDescriptor() | |
111 | { | |
112 | if (m_in_deps != 0) { | |
113 | free(m_in_deps); | |
114 | } | |
115 | if (m_out_deps != 0) { | |
116 | free(m_out_deps); | |
117 | } | |
118 | if (m_func_desc != 0) { | |
119 | free(m_func_desc); | |
120 | } | |
121 | if (m_vars != 0) { | |
122 | free(m_vars); | |
123 | free(m_vars_extra); | |
124 | } | |
125 | } | |
126 | ||
127 | bool offload(const char *name, bool is_empty, | |
128 | VarDesc *vars, VarDesc2 *vars2, int vars_total, | |
129 | const void **waits, int num_waits, const void **signal, | |
2eab9666 IV |
130 | int entry_id, const void *stack_addr, |
131 | OffloadFlags offload_flags); | |
132 | ||
133 | bool offload_finish(bool is_traceback); | |
5f520819 KY |
134 | |
135 | bool is_signaled(); | |
136 | ||
137 | OffloadHostTimerData* get_timer_data() const { | |
138 | return m_timer_data; | |
139 | } | |
140 | ||
2eab9666 IV |
141 | void set_stream(_Offload_stream stream) { |
142 | m_stream = stream; | |
143 | } | |
144 | ||
145 | _Offload_stream get_stream() { | |
146 | return(m_stream); | |
147 | } | |
148 | ||
5f520819 | 149 | private: |
2eab9666 IV |
150 | bool offload_wrap(const char *name, bool is_empty, |
151 | VarDesc *vars, VarDesc2 *vars2, int vars_total, | |
152 | const void **waits, int num_waits, const void **signal, | |
153 | int entry_id, const void *stack_addr, | |
154 | OffloadFlags offload_flags); | |
155 | bool wait_dependencies(const void **waits, int num_waits, | |
156 | _Offload_stream stream); | |
5f520819 KY |
157 | bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total, |
158 | int entry_id, const void *stack_addr); | |
159 | bool setup_misc_data(const char *name); | |
2eab9666 | 160 | bool send_pointer_data(bool is_async, void* info); |
5f520819 KY |
161 | bool send_noncontiguous_pointer_data( |
162 | int i, | |
163 | PtrData* src_buf, | |
164 | PtrData* dst_buf, | |
2eab9666 IV |
165 | COIEVENT *event, |
166 | uint64_t &sent_data, | |
167 | uint32_t in_deps_amount, | |
168 | COIEVENT *in_deps | |
169 | ); | |
170 | bool receive_noncontiguous_pointer_data( | |
5f520819 | 171 | int i, |
5f520819 | 172 | COIBUFFER dst_buf, |
2eab9666 IV |
173 | COIEVENT *event, |
174 | uint64_t &received_data, | |
175 | uint32_t in_deps_amount, | |
176 | COIEVENT *in_deps | |
177 | ); | |
5f520819 KY |
178 | |
179 | bool gather_copyin_data(); | |
180 | ||
2eab9666 | 181 | bool compute(void *); |
5f520819 | 182 | |
2eab9666 | 183 | bool receive_pointer_data(bool is_async, bool first_run, void * info); |
5f520819 KY |
184 | bool scatter_copyout_data(); |
185 | ||
186 | void cleanup(); | |
187 | ||
188 | bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, | |
2eab9666 IV |
189 | int64_t length, bool is_targptr, |
190 | bool error_does_not_exist = true); | |
5f520819 | 191 | bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, |
2eab9666 IV |
192 | int64_t length, int64_t alloc_disp, int align, |
193 | bool is_targptr, bool is_prealloc, bool pin); | |
194 | bool create_preallocated_buffer(PtrData* ptr_data, void *base); | |
5f520819 KY |
195 | bool init_static_ptr_data(PtrData *ptr_data); |
196 | bool init_mic_address(PtrData *ptr_data); | |
197 | bool offload_stack_memory_manager(const void * stack_begin, int routine_id, | |
198 | int buf_size, int align, bool *is_new); | |
199 | bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size); | |
200 | ||
201 | bool gen_var_descs_for_pointer_array(int i); | |
202 | ||
2eab9666 IV |
203 | void get_stream_in_dependencies(uint32_t &in_deps_amount, |
204 | COIEVENT* &in_deps); | |
205 | ||
5f520819 KY |
206 | void report_coi_error(error_types msg, COIRESULT res); |
207 | _Offload_result translate_coi_error(COIRESULT res) const; | |
2eab9666 IV |
208 | |
209 | void setup_omp_async_info(); | |
210 | void register_omp_event_call_back(const COIEVENT *event, const void *info); | |
211 | ||
5f520819 KY |
212 | private: |
213 | typedef std::list<COIBUFFER> BufferList; | |
214 | ||
215 | // extra data associated with each variable descriptor | |
216 | struct VarExtra { | |
217 | PtrData* src_data; | |
218 | PtrData* dst_data; | |
219 | AutoData* auto_data; | |
220 | int64_t cpu_disp; | |
221 | int64_t cpu_offset; | |
2eab9666 | 222 | void *alloc; |
5f520819 KY |
223 | CeanReadRanges *read_rng_src; |
224 | CeanReadRanges *read_rng_dst; | |
225 | int64_t ptr_arr_offset; | |
226 | bool is_arr_ptr_el; | |
2eab9666 | 227 | OmpAsyncLastEventType omp_last_event_type; |
5f520819 KY |
228 | }; |
229 | ||
230 | template<typename T> class ReadArrElements { | |
231 | public: | |
232 | ReadArrElements(): | |
233 | ranges(NULL), | |
234 | el_size(sizeof(T)), | |
235 | offset(0), | |
236 | count(0), | |
237 | is_empty(true), | |
238 | base(NULL) | |
239 | {} | |
240 | ||
241 | bool read_next(bool flag) | |
242 | { | |
243 | if (flag != 0) { | |
244 | if (is_empty) { | |
245 | if (ranges) { | |
246 | if (!get_next_range(ranges, &offset)) { | |
247 | // ranges are over | |
248 | return false; | |
249 | } | |
250 | } | |
251 | // all contiguous elements are over | |
252 | else if (count != 0) { | |
253 | return false; | |
254 | } | |
255 | ||
256 | length_cur = size; | |
257 | } | |
258 | else { | |
259 | offset += el_size; | |
260 | } | |
261 | val = (T)get_el_value(base, offset, el_size); | |
262 | length_cur -= el_size; | |
263 | count++; | |
264 | is_empty = length_cur == 0; | |
265 | } | |
266 | return true; | |
267 | } | |
268 | public: | |
269 | CeanReadRanges * ranges; | |
270 | T val; | |
271 | int el_size; | |
272 | int64_t size, | |
273 | offset, | |
274 | length_cur; | |
275 | bool is_empty; | |
276 | int count; | |
277 | char *base; | |
278 | }; | |
279 | ||
280 | // ptr_data for persistent auto objects | |
281 | PtrData* m_stack_ptr_data; | |
282 | PtrDataList m_destroy_stack; | |
283 | ||
284 | // Engine | |
285 | Engine& m_device; | |
286 | ||
2eab9666 IV |
287 | // true for offload_wait target(mic) stream(0) |
288 | bool m_wait_all_devices; | |
289 | ||
5f520819 KY |
290 | // if true offload is mandatory |
291 | bool m_is_mandatory; | |
292 | ||
293 | // if true offload has openmp origin | |
294 | const bool m_is_openmp; | |
295 | ||
296 | // The Marshaller for the inputs of the offloaded region. | |
297 | Marshaller m_in; | |
298 | ||
299 | // The Marshaller for the outputs of the offloaded region. | |
300 | Marshaller m_out; | |
301 | ||
302 | // List of buffers that are passed to dispatch call | |
303 | BufferList m_compute_buffers; | |
304 | ||
305 | // List of buffers that need to be destroyed at the end of offload | |
306 | BufferList m_destroy_buffers; | |
307 | ||
308 | // Variable descriptors | |
309 | VarDesc* m_vars; | |
310 | VarExtra* m_vars_extra; | |
311 | int m_vars_total; | |
312 | ||
313 | // Pointer to a user-specified status variable | |
314 | _Offload_status *m_status; | |
315 | ||
316 | // Function descriptor | |
317 | FunctionDescriptor* m_func_desc; | |
318 | uint32_t m_func_desc_size; | |
319 | ||
320 | // Buffer for transferring copyin/copyout data | |
321 | COIBUFFER m_inout_buf; | |
322 | ||
323 | // Dependencies | |
324 | COIEVENT *m_in_deps; | |
325 | uint32_t m_in_deps_total; | |
2eab9666 | 326 | uint32_t m_in_deps_allocated; |
5f520819 KY |
327 | COIEVENT *m_out_deps; |
328 | uint32_t m_out_deps_total; | |
2eab9666 IV |
329 | uint32_t m_out_deps_allocated; |
330 | ||
331 | // Stream | |
332 | _Offload_stream m_stream; | |
5f520819 KY |
333 | |
334 | // Timer data | |
335 | OffloadHostTimerData *m_timer_data; | |
336 | ||
337 | // copyin/copyout data length | |
338 | uint64_t m_in_datalen; | |
339 | uint64_t m_out_datalen; | |
340 | ||
341 | // a boolean value calculated in setup_descriptors. If true we need to do | |
342 | // a run function on the target. Otherwise it may be optimized away. | |
343 | bool m_need_runfunction; | |
2eab9666 IV |
344 | |
345 | // initialized value of m_need_runfunction; | |
346 | // is used to recognize offload_transfer | |
347 | bool m_initial_need_runfunction; | |
348 | ||
349 | // a Boolean value set to true when OUT clauses with preallocated targetptr | |
350 | // is encountered to indicate that call receive_pointer_data needs to be | |
351 | // invoked again after call to scatter_copyout_data. | |
352 | bool m_out_with_preallocated; | |
353 | ||
354 | // a Boolean value set to true if an alloc_if(1) is used with preallocated | |
355 | // targetptr to indicate the need to scatter_copyout_data even for | |
356 | // async offload | |
357 | bool m_preallocated_alloc; | |
358 | ||
359 | // a Boolean value set to true if traceback routine is called | |
360 | bool m_traceback_called; | |
361 | ||
362 | OmpAsyncLastEventType m_omp_async_last_event_type; | |
5f520819 KY |
363 | }; |
364 | ||
365 | // Initialization types for MIC | |
366 | enum OffloadInitType { | |
367 | c_init_on_start, // all devices before entering main | |
368 | c_init_on_offload, // single device before starting the first offload | |
369 | c_init_on_offload_all // all devices before starting the first offload | |
370 | }; | |
371 | ||
2eab9666 IV |
372 | // Determines if MIC code is an executable or a shared library |
373 | extern "C" bool __offload_target_image_is_executable(const void *target_image); | |
374 | ||
5f520819 | 375 | // Initializes library and registers specified offload image. |
2eab9666 | 376 | extern "C" bool __offload_register_image(const void* image); |
5f520819 KY |
377 | extern "C" void __offload_unregister_image(const void* image); |
378 | ||
379 | // Initializes offload runtime library. | |
2eab9666 | 380 | DLL_LOCAL extern int __offload_init_library(void); |
5f520819 KY |
381 | |
382 | // thread data for associating pipelines with threads | |
2eab9666 IV |
383 | DLL_LOCAL extern pthread_key_t mic_thread_key; |
384 | ||
385 | // location of offload_main executable | |
386 | // To be used if the main application has no offload and is not built | |
387 | // with -offload but dynamic library linked in has offload pragma | |
388 | DLL_LOCAL extern char* mic_device_main; | |
5f520819 KY |
389 | |
390 | // Environment variables for devices | |
2eab9666 | 391 | DLL_LOCAL extern MicEnvVar mic_env_vars; |
5f520819 KY |
392 | |
393 | // CPU frequency | |
2eab9666 | 394 | DLL_LOCAL extern uint64_t cpu_frequency; |
5f520819 KY |
395 | |
396 | // LD_LIBRARY_PATH for MIC libraries | |
2eab9666 | 397 | DLL_LOCAL extern char* mic_library_path; |
5f520819 KY |
398 | |
399 | // stack size for target | |
2eab9666 | 400 | DLL_LOCAL extern uint32_t mic_stack_size; |
5f520819 KY |
401 | |
402 | // Preallocated memory size for buffers on MIC | |
2eab9666 IV |
403 | DLL_LOCAL extern uint64_t mic_buffer_size; |
404 | ||
405 | // Preallocated 4K page memory size for buffers on MIC | |
406 | DLL_LOCAL extern uint64_t mic_4k_buffer_size; | |
407 | ||
408 | // Preallocated 2M page memory size for buffers on MIC | |
409 | DLL_LOCAL extern uint64_t mic_2m_buffer_size; | |
5f520819 KY |
410 | |
411 | // Setting controlling inout proxy | |
2eab9666 IV |
412 | DLL_LOCAL extern bool mic_proxy_io; |
413 | DLL_LOCAL extern char* mic_proxy_fs_root; | |
5f520819 KY |
414 | |
415 | // Threshold for creating buffers with large pages | |
2eab9666 | 416 | DLL_LOCAL extern uint64_t __offload_use_2mb_buffers; |
5f520819 KY |
417 | |
418 | // offload initialization type | |
2eab9666 | 419 | DLL_LOCAL extern OffloadInitType __offload_init_type; |
5f520819 KY |
420 | |
421 | // Device number to offload to when device is not explicitly specified. | |
2eab9666 | 422 | DLL_LOCAL extern int __omp_device_num; |
5f520819 KY |
423 | |
424 | // target executable | |
2eab9666 | 425 | DLL_LOCAL extern TargetImage* __target_exe; |
5f520819 KY |
426 | |
427 | // IDB support | |
428 | ||
429 | // Called by the offload runtime after initialization of offload infrastructure | |
430 | // has been completed. | |
431 | extern "C" void __dbg_target_so_loaded(); | |
432 | ||
433 | // Called by the offload runtime when the offload infrastructure is about to be | |
434 | // shut down, currently at application exit. | |
435 | extern "C" void __dbg_target_so_unloaded(); | |
436 | ||
437 | // Null-terminated string containing path to the process image of the hosting | |
438 | // application (offload_main) | |
439 | #define MAX_TARGET_NAME 512 | |
440 | extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME]; | |
441 | ||
442 | // Integer specifying the process id | |
443 | extern "C" pid_t __dbg_target_so_pid; | |
444 | ||
445 | // Integer specifying the 0-based device number | |
446 | extern "C" int __dbg_target_id; | |
447 | ||
448 | // Set to non-zero by the host-side debugger to enable offload debugging | |
449 | // support | |
450 | extern "C" int __dbg_is_attached; | |
451 | ||
452 | // Major version of the debugger support API | |
453 | extern "C" const int __dbg_api_major_version; | |
454 | ||
455 | // Minor version of the debugger support API | |
456 | extern "C" const int __dbg_api_minor_version; | |
457 | ||
458 | #endif // OFFLOAD_HOST_H_INCLUDED |