]>
Commit | Line | Data |
---|---|---|
5f520819 | 1 | /* |
2eab9666 | 2 | Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved. |
5f520819 KY |
3 | |
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | * Redistributions in binary form must reproduce the above copyright | |
11 | notice, this list of conditions and the following disclaimer in the | |
12 | documentation and/or other materials provided with the distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | */ | |
29 | ||
30 | ||
31 | #include "offload_target.h" | |
32 | #include <stdlib.h> | |
33 | #include <unistd.h> | |
34 | #ifdef SEP_SUPPORT | |
35 | #include <fcntl.h> | |
36 | #include <sys/ioctl.h> | |
37 | #endif // SEP_SUPPORT | |
38 | #include <omp.h> | |
39 | #include <map> | |
40 | ||
41 | // typedef offload_func_with_parms. | |
42 | // Pointer to function that represents an offloaded entry point. | |
43 | // The parameters are a temporary fix for parameters on the stack. | |
44 | typedef void (*offload_func_with_parms)(void *); | |
45 | ||
46 | // Target console and file logging | |
47 | const char *prefix; | |
48 | int console_enabled = 0; | |
49 | int offload_report_level = 0; | |
50 | ||
51 | // Trace information | |
52 | static const char* vardesc_direction_as_string[] = { | |
53 | "NOCOPY", | |
54 | "IN", | |
55 | "OUT", | |
56 | "INOUT" | |
57 | }; | |
58 | static const char* vardesc_type_as_string[] = { | |
59 | "unknown", | |
60 | "data", | |
61 | "data_ptr", | |
62 | "func_ptr", | |
63 | "void_ptr", | |
64 | "string_ptr", | |
65 | "dv", | |
66 | "dv_data", | |
67 | "dv_data_slice", | |
68 | "dv_ptr", | |
69 | "dv_ptr_data", | |
70 | "dv_ptr_data_slice", | |
71 | "cean_var", | |
72 | "cean_var_ptr", | |
73 | "c_data_ptr_array" | |
74 | }; | |
75 | ||
76 | int mic_index = -1; | |
77 | int mic_engines_total = -1; | |
78 | uint64_t mic_frequency = 0; | |
79 | int offload_number = 0; | |
80 | static std::map<void*, RefInfo*> ref_data; | |
81 | static mutex_t add_ref_lock; | |
82 | ||
83 | #ifdef SEP_SUPPORT | |
84 | static const char* sep_monitor_env = "SEP_MONITOR"; | |
85 | static bool sep_monitor = false; | |
86 | static const char* sep_device_env = "SEP_DEVICE"; | |
87 | static const char* sep_device = "/dev/sep3.8/c"; | |
88 | static int sep_counter = 0; | |
89 | ||
90 | #define SEP_API_IOC_MAGIC 99 | |
91 | #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31) | |
92 | #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32) | |
93 | ||
94 | static void add_ref_count(void * buf, bool created) | |
95 | { | |
96 | mutex_locker_t locker(add_ref_lock); | |
97 | RefInfo * info = ref_data[buf]; | |
98 | ||
99 | if (info) { | |
100 | info->count++; | |
101 | } | |
102 | else { | |
103 | info = new RefInfo((int)created,(long)1); | |
104 | } | |
105 | info->is_added |= created; | |
106 | ref_data[buf] = info; | |
107 | } | |
108 | ||
109 | static void BufReleaseRef(void * buf) | |
110 | { | |
111 | mutex_locker_t locker(add_ref_lock); | |
112 | RefInfo * info = ref_data[buf]; | |
113 | ||
114 | if (info) { | |
115 | --info->count; | |
116 | if (info->count == 0 && info->is_added) { | |
2eab9666 IV |
117 | OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n", |
118 | ((RefInfo *) ref_data[buf])->count); | |
5f520819 KY |
119 | BufferReleaseRef(buf); |
120 | info->is_added = 0; | |
121 | } | |
122 | } | |
123 | } | |
124 | ||
125 | static int VTPauseSampling(void) | |
126 | { | |
127 | int ret = -1; | |
128 | int handle = open(sep_device, O_RDWR); | |
129 | if (handle > 0) { | |
130 | ret = ioctl(handle, SEP_IOCTL_PAUSE); | |
131 | close(handle); | |
132 | } | |
133 | return ret; | |
134 | } | |
135 | ||
136 | static int VTResumeSampling(void) | |
137 | { | |
138 | int ret = -1; | |
139 | int handle = open(sep_device, O_RDWR); | |
140 | if (handle > 0) { | |
141 | ret = ioctl(handle, SEP_IOCTL_RESUME); | |
142 | close(handle); | |
143 | } | |
144 | return ret; | |
145 | } | |
146 | #endif // SEP_SUPPORT | |
147 | ||
148 | void OffloadDescriptor::offload( | |
149 | uint32_t buffer_count, | |
150 | void** buffers, | |
151 | void* misc_data, | |
152 | uint16_t misc_data_len, | |
153 | void* return_data, | |
154 | uint16_t return_data_len | |
155 | ) | |
156 | { | |
157 | FunctionDescriptor *func = (FunctionDescriptor*) misc_data; | |
158 | const char *name = func->data; | |
159 | OffloadDescriptor ofld; | |
160 | char *in_data = 0; | |
161 | char *out_data = 0; | |
162 | char *timer_data = 0; | |
163 | ||
164 | console_enabled = func->console_enabled; | |
165 | timer_enabled = func->timer_enabled; | |
166 | offload_report_level = func->offload_report_level; | |
167 | offload_number = func->offload_number; | |
168 | ofld.set_offload_number(func->offload_number); | |
169 | ||
170 | #ifdef SEP_SUPPORT | |
171 | if (sep_monitor) { | |
172 | if (__sync_fetch_and_add(&sep_counter, 1) == 0) { | |
173 | OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); | |
174 | VTResumeSampling(); | |
175 | } | |
176 | } | |
177 | #endif // SEP_SUPPORT | |
178 | ||
179 | OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), | |
180 | c_offload_start_target_func, | |
181 | "Offload \"%s\" started\n", name); | |
182 | ||
183 | // initialize timer data | |
184 | OFFLOAD_TIMER_INIT(); | |
185 | ||
186 | OFFLOAD_TIMER_START(c_offload_target_total_time); | |
187 | ||
188 | OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); | |
189 | ||
190 | // get input/output buffer addresses | |
191 | if (func->in_datalen > 0 || func->out_datalen > 0) { | |
192 | if (func->data_offset != 0) { | |
193 | in_data = (char*) misc_data + func->data_offset; | |
194 | out_data = (char*) return_data; | |
195 | } | |
196 | else { | |
197 | char *inout_buf = (char*) buffers[--buffer_count]; | |
198 | in_data = inout_buf; | |
199 | out_data = inout_buf; | |
200 | } | |
201 | } | |
202 | ||
203 | // assign variable descriptors | |
204 | ofld.m_vars_total = func->vars_num; | |
205 | if (ofld.m_vars_total > 0) { | |
206 | uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); | |
207 | ||
208 | ofld.m_vars = (VarDesc*) malloc(var_data_len); | |
209 | if (ofld.m_vars == NULL) | |
210 | LIBOFFLOAD_ERROR(c_malloc); | |
211 | memcpy(ofld.m_vars, in_data, var_data_len); | |
212 | ||
213 | in_data += var_data_len; | |
214 | func->in_datalen -= var_data_len; | |
215 | } | |
216 | ||
217 | // timer data | |
218 | if (func->timer_enabled) { | |
219 | uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); | |
220 | ||
221 | timer_data = out_data; | |
222 | out_data += timer_data_len; | |
223 | func->out_datalen -= timer_data_len; | |
224 | } | |
225 | ||
226 | // init Marshallers | |
227 | ofld.m_in.init_buffer(in_data, func->in_datalen); | |
228 | ofld.m_out.init_buffer(out_data, func->out_datalen); | |
229 | ||
230 | // copy buffers to offload descriptor | |
231 | std::copy(buffers, buffers + buffer_count, | |
232 | std::back_inserter(ofld.m_buffers)); | |
233 | ||
234 | OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); | |
235 | ||
236 | // find offload entry address | |
237 | OFFLOAD_TIMER_START(c_offload_target_func_lookup); | |
238 | ||
239 | offload_func_with_parms entry = (offload_func_with_parms) | |
240 | __offload_entries.find_addr(name); | |
241 | ||
242 | if (entry == NULL) { | |
243 | #if OFFLOAD_DEBUG > 0 | |
244 | if (console_enabled > 2) { | |
245 | __offload_entries.dump(); | |
246 | } | |
247 | #endif | |
248 | LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); | |
249 | exit(1); | |
250 | } | |
251 | ||
252 | OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); | |
253 | ||
254 | OFFLOAD_TIMER_START(c_offload_target_func_time); | |
255 | ||
256 | // execute offload entry | |
257 | entry(&ofld); | |
258 | ||
259 | OFFLOAD_TIMER_STOP(c_offload_target_func_time); | |
260 | ||
261 | OFFLOAD_TIMER_STOP(c_offload_target_total_time); | |
262 | ||
263 | // copy timer data to the buffer | |
264 | OFFLOAD_TIMER_TARGET_DATA(timer_data); | |
265 | ||
266 | OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); | |
267 | ||
268 | #ifdef SEP_SUPPORT | |
269 | if (sep_monitor) { | |
270 | if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { | |
271 | OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); | |
272 | VTPauseSampling(); | |
273 | } | |
274 | } | |
275 | #endif // SEP_SUPPORT | |
276 | } | |
277 | ||
278 | void OffloadDescriptor::merge_var_descs( | |
279 | VarDesc *vars, | |
280 | VarDesc2 *vars2, | |
281 | int vars_total | |
282 | ) | |
283 | { | |
284 | // number of variable descriptors received from host and generated | |
285 | // locally should match | |
286 | if (m_vars_total < vars_total) { | |
287 | LIBOFFLOAD_ERROR(c_merge_var_descs1); | |
288 | exit(1); | |
289 | } | |
290 | ||
291 | for (int i = 0; i < m_vars_total; i++) { | |
292 | if (i < vars_total) { | |
293 | // variable type must match | |
294 | if (m_vars[i].type.bits != vars[i].type.bits) { | |
295 | LIBOFFLOAD_ERROR(c_merge_var_descs2); | |
296 | exit(1); | |
297 | } | |
298 | ||
299 | m_vars[i].ptr = vars[i].ptr; | |
300 | m_vars[i].into = vars[i].into; | |
301 | ||
302 | const char *var_sname = ""; | |
303 | if (vars2 != NULL) { | |
304 | if (vars2[i].sname != NULL) { | |
305 | var_sname = vars2[i].sname; | |
306 | } | |
307 | } | |
308 | OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, | |
309 | " VarDesc %d, var=%s, %s, %s\n", | |
310 | i, var_sname, | |
311 | vardesc_direction_as_string[m_vars[i].direction.bits], | |
312 | vardesc_type_as_string[m_vars[i].type.src]); | |
313 | if (vars2 != NULL && vars2[i].dname != NULL) { | |
314 | OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, | |
315 | vardesc_type_as_string[m_vars[i].type.dst]); | |
316 | } | |
317 | } | |
318 | OFFLOAD_TRACE(2, | |
319 | " type_src=%d, type_dstn=%d, direction=%d, " | |
320 | "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " | |
321 | "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", | |
322 | m_vars[i].type.src, | |
323 | m_vars[i].type.dst, | |
324 | m_vars[i].direction.bits, | |
325 | m_vars[i].alloc_if, | |
326 | m_vars[i].free_if, | |
327 | m_vars[i].align, | |
328 | m_vars[i].mic_offset, | |
329 | m_vars[i].flags.bits, | |
330 | m_vars[i].offset, | |
331 | m_vars[i].size, | |
332 | m_vars[i].count, | |
333 | m_vars[i].ptr, | |
334 | m_vars[i].into); | |
335 | } | |
336 | } | |
337 | ||
338 | void OffloadDescriptor::scatter_copyin_data() | |
339 | { | |
340 | OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); | |
341 | ||
342 | OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", | |
343 | m_in.get_buffer_start(), | |
344 | m_in.get_buffer_size()); | |
345 | OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), | |
346 | m_in.get_buffer_size()); | |
347 | ||
348 | // receive data | |
349 | for (int i = 0; i < m_vars_total; i++) { | |
350 | bool src_is_for_mic = (m_vars[i].direction.out || | |
351 | m_vars[i].into == NULL); | |
352 | void** ptr_addr = src_is_for_mic ? | |
353 | static_cast<void**>(m_vars[i].ptr) : | |
354 | static_cast<void**>(m_vars[i].into); | |
355 | int type = src_is_for_mic ? m_vars[i].type.src : | |
356 | m_vars[i].type.dst; | |
357 | bool is_static = src_is_for_mic ? | |
358 | m_vars[i].flags.is_static : | |
359 | m_vars[i].flags.is_static_dstn; | |
360 | void *ptr = NULL; | |
361 | ||
362 | if (m_vars[i].flags.alloc_disp) { | |
363 | int64_t offset = 0; | |
364 | m_in.receive_data(&offset, sizeof(offset)); | |
5f520819 KY |
365 | } |
366 | if (VAR_TYPE_IS_DV_DATA_SLICE(type) || | |
367 | VAR_TYPE_IS_DV_DATA(type)) { | |
368 | ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? | |
369 | reinterpret_cast<ArrDesc*>(ptr_addr) : | |
370 | *reinterpret_cast<ArrDesc**>(ptr_addr); | |
371 | ptr_addr = reinterpret_cast<void**>(&dvp->Base); | |
372 | } | |
5f520819 KY |
373 | // Set pointer values |
374 | switch (type) { | |
375 | case c_data_ptr_array: | |
376 | { | |
377 | int j = m_vars[i].ptr_arr_offset; | |
378 | int max_el = j + m_vars[i].count; | |
379 | char *dst_arr_ptr = (src_is_for_mic)? | |
380 | *(reinterpret_cast<char**>(m_vars[i].ptr)) : | |
381 | reinterpret_cast<char*>(m_vars[i].into); | |
382 | ||
2eab9666 IV |
383 | if (m_vars[i].flags.is_pointer) { |
384 | dst_arr_ptr = *((char**)dst_arr_ptr); | |
385 | } | |
5f520819 KY |
386 | for (; j < max_el; j++) { |
387 | if (src_is_for_mic) { | |
388 | m_vars[j].ptr = | |
389 | dst_arr_ptr + m_vars[j].ptr_arr_offset; | |
390 | } | |
391 | else { | |
392 | m_vars[j].into = | |
393 | dst_arr_ptr + m_vars[j].ptr_arr_offset; | |
394 | } | |
395 | } | |
396 | } | |
397 | break; | |
398 | case c_data: | |
399 | case c_void_ptr: | |
400 | case c_cean_var: | |
401 | case c_dv: | |
402 | break; | |
403 | ||
404 | case c_string_ptr: | |
405 | case c_data_ptr: | |
406 | case c_cean_var_ptr: | |
407 | case c_dv_ptr: | |
2eab9666 IV |
408 | if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) { |
409 | void *buf = NULL; | |
5f520819 KY |
410 | if (m_vars[i].flags.sink_addr) { |
411 | m_in.receive_data(&buf, sizeof(buf)); | |
412 | } | |
413 | else { | |
414 | buf = m_buffers.front(); | |
415 | m_buffers.pop_front(); | |
416 | } | |
417 | if (buf) { | |
418 | if (!is_static) { | |
419 | if (!m_vars[i].flags.sink_addr) { | |
420 | // increment buffer reference | |
421 | OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); | |
422 | BufferAddRef(buf); | |
2eab9666 | 423 | OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf); |
5f520819 KY |
424 | OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); |
425 | } | |
426 | add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); | |
2eab9666 IV |
427 | OFFLOAD_TRACE(1, " AddRef count = %d\n", |
428 | ((RefInfo *) ref_data[buf])->count); | |
5f520819 KY |
429 | } |
430 | ptr = static_cast<char*>(buf) + | |
431 | m_vars[i].mic_offset + | |
432 | (m_vars[i].flags.is_stack_buf ? | |
433 | 0 : m_vars[i].offset); | |
434 | } | |
435 | *ptr_addr = ptr; | |
436 | } | |
437 | else if (m_vars[i].flags.sink_addr) { | |
438 | void *buf; | |
439 | m_in.receive_data(&buf, sizeof(buf)); | |
440 | void *ptr = static_cast<char*>(buf) + | |
441 | m_vars[i].mic_offset + | |
442 | (m_vars[i].flags.is_stack_buf ? | |
443 | 0 : m_vars[i].offset); | |
444 | *ptr_addr = ptr; | |
445 | } | |
446 | break; | |
447 | ||
448 | case c_func_ptr: | |
449 | break; | |
450 | ||
451 | case c_dv_data: | |
452 | case c_dv_ptr_data: | |
453 | case c_dv_data_slice: | |
454 | case c_dv_ptr_data_slice: | |
455 | if (m_vars[i].alloc_if) { | |
456 | void *buf; | |
457 | if (m_vars[i].flags.sink_addr) { | |
458 | m_in.receive_data(&buf, sizeof(buf)); | |
459 | } | |
460 | else { | |
461 | buf = m_buffers.front(); | |
462 | m_buffers.pop_front(); | |
463 | } | |
464 | if (buf) { | |
465 | if (!is_static) { | |
466 | if (!m_vars[i].flags.sink_addr) { | |
467 | // increment buffer reference | |
468 | OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); | |
469 | BufferAddRef(buf); | |
470 | OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); | |
471 | } | |
472 | add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); | |
473 | } | |
474 | ptr = static_cast<char*>(buf) + | |
475 | m_vars[i].mic_offset + m_vars[i].offset; | |
476 | } | |
477 | *ptr_addr = ptr; | |
478 | } | |
479 | else if (m_vars[i].flags.sink_addr) { | |
480 | void *buf; | |
481 | m_in.receive_data(&buf, sizeof(buf)); | |
482 | ptr = static_cast<char*>(buf) + | |
483 | m_vars[i].mic_offset + m_vars[i].offset; | |
484 | *ptr_addr = ptr; | |
485 | } | |
486 | break; | |
487 | ||
488 | default: | |
489 | LIBOFFLOAD_ERROR(c_unknown_var_type, type); | |
490 | abort(); | |
491 | } | |
492 | // Release obsolete buffers for stack of persistent objects | |
493 | if (type = c_data_ptr && | |
494 | m_vars[i].flags.is_stack_buf && | |
495 | !m_vars[i].direction.bits && | |
496 | m_vars[i].alloc_if && | |
497 | m_vars[i].size != 0) { | |
498 | for (int j=0; j < m_vars[i].size; j++) { | |
499 | void *buf; | |
500 | m_in.receive_data(&buf, sizeof(buf)); | |
501 | BufferReleaseRef(buf); | |
502 | ref_data.erase(buf); | |
503 | } | |
504 | } | |
505 | // Do copyin | |
506 | switch (m_vars[i].type.dst) { | |
507 | case c_data_ptr_array: | |
508 | break; | |
509 | case c_data: | |
510 | case c_void_ptr: | |
511 | case c_cean_var: | |
512 | if (m_vars[i].direction.in && | |
513 | !m_vars[i].flags.is_static_dstn) { | |
514 | int64_t size; | |
515 | int64_t disp; | |
516 | char* ptr = m_vars[i].into ? | |
517 | static_cast<char*>(m_vars[i].into) : | |
518 | static_cast<char*>(m_vars[i].ptr); | |
519 | if (m_vars[i].type.dst == c_cean_var) { | |
520 | m_in.receive_data((&size), sizeof(int64_t)); | |
521 | m_in.receive_data((&disp), sizeof(int64_t)); | |
522 | } | |
523 | else { | |
524 | size = m_vars[i].size; | |
525 | disp = 0; | |
526 | } | |
527 | m_in.receive_data(ptr + disp, size); | |
528 | } | |
529 | break; | |
530 | ||
531 | case c_dv: | |
532 | if (m_vars[i].direction.bits || | |
533 | m_vars[i].alloc_if || | |
534 | m_vars[i].free_if) { | |
535 | char* ptr = m_vars[i].into ? | |
536 | static_cast<char*>(m_vars[i].into) : | |
537 | static_cast<char*>(m_vars[i].ptr); | |
538 | m_in.receive_data(ptr + sizeof(uint64_t), | |
539 | m_vars[i].size - sizeof(uint64_t)); | |
540 | } | |
541 | break; | |
542 | ||
543 | case c_string_ptr: | |
544 | case c_data_ptr: | |
545 | case c_cean_var_ptr: | |
546 | case c_dv_ptr: | |
547 | case c_dv_data: | |
548 | case c_dv_ptr_data: | |
549 | case c_dv_data_slice: | |
550 | case c_dv_ptr_data_slice: | |
551 | break; | |
552 | ||
553 | case c_func_ptr: | |
554 | if (m_vars[i].direction.in) { | |
555 | m_in.receive_func_ptr((const void**) m_vars[i].ptr); | |
556 | } | |
557 | break; | |
558 | ||
559 | default: | |
560 | LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); | |
561 | abort(); | |
562 | } | |
563 | } | |
564 | ||
565 | OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", | |
566 | m_in.get_tfr_size()); | |
567 | ||
568 | OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); | |
569 | ||
570 | OFFLOAD_TIMER_START(c_offload_target_compute); | |
571 | } | |
572 | ||
573 | void OffloadDescriptor::gather_copyout_data() | |
574 | { | |
575 | OFFLOAD_TIMER_STOP(c_offload_target_compute); | |
576 | ||
577 | OFFLOAD_TIMER_START(c_offload_target_gather_outputs); | |
578 | ||
579 | for (int i = 0; i < m_vars_total; i++) { | |
580 | bool src_is_for_mic = (m_vars[i].direction.out || | |
581 | m_vars[i].into == NULL); | |
582 | ||
583 | switch (m_vars[i].type.src) { | |
584 | case c_data_ptr_array: | |
585 | break; | |
586 | case c_data: | |
587 | case c_void_ptr: | |
588 | case c_cean_var: | |
589 | if (m_vars[i].direction.out && | |
590 | !m_vars[i].flags.is_static) { | |
591 | m_out.send_data( | |
592 | static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, | |
593 | m_vars[i].size); | |
594 | } | |
595 | break; | |
596 | ||
597 | case c_dv: | |
598 | break; | |
599 | ||
600 | case c_string_ptr: | |
601 | case c_data_ptr: | |
602 | case c_cean_var_ptr: | |
603 | case c_dv_ptr: | |
604 | if (m_vars[i].free_if && | |
605 | src_is_for_mic && | |
2eab9666 | 606 | !m_vars[i].flags.preallocated && |
5f520819 KY |
607 | !m_vars[i].flags.is_static) { |
608 | void *buf = *static_cast<char**>(m_vars[i].ptr) - | |
609 | m_vars[i].mic_offset - | |
610 | (m_vars[i].flags.is_stack_buf? | |
611 | 0 : m_vars[i].offset); | |
612 | if (buf == NULL) { | |
613 | break; | |
614 | } | |
615 | // decrement buffer reference count | |
616 | OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); | |
617 | BufReleaseRef(buf); | |
618 | OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); | |
619 | } | |
2eab9666 IV |
620 | if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) { |
621 | m_out.send_data((void*) m_vars[i].ptr, sizeof(void*)); | |
622 | } | |
5f520819 KY |
623 | break; |
624 | ||
625 | case c_func_ptr: | |
626 | if (m_vars[i].direction.out) { | |
627 | m_out.send_func_ptr(*((void**) m_vars[i].ptr)); | |
628 | } | |
629 | break; | |
630 | ||
631 | case c_dv_data: | |
632 | case c_dv_ptr_data: | |
633 | case c_dv_data_slice: | |
634 | case c_dv_ptr_data_slice: | |
635 | if (src_is_for_mic && | |
636 | m_vars[i].free_if && | |
637 | !m_vars[i].flags.is_static) { | |
638 | ArrDesc *dvp = (m_vars[i].type.src == c_dv_data || | |
639 | m_vars[i].type.src == c_dv_data_slice) ? | |
640 | static_cast<ArrDesc*>(m_vars[i].ptr) : | |
641 | *static_cast<ArrDesc**>(m_vars[i].ptr); | |
642 | ||
643 | void *buf = reinterpret_cast<char*>(dvp->Base) - | |
644 | m_vars[i].mic_offset - | |
645 | m_vars[i].offset; | |
646 | ||
647 | if (buf == NULL) { | |
648 | break; | |
649 | } | |
650 | ||
651 | // decrement buffer reference count | |
652 | OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); | |
653 | BufReleaseRef(buf); | |
654 | OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); | |
655 | } | |
656 | break; | |
657 | ||
658 | default: | |
659 | LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); | |
660 | abort(); | |
661 | } | |
662 | ||
663 | if (m_vars[i].into) { | |
664 | switch (m_vars[i].type.dst) { | |
665 | case c_data_ptr_array: | |
666 | break; | |
667 | case c_data: | |
668 | case c_void_ptr: | |
669 | case c_cean_var: | |
670 | case c_dv: | |
671 | break; | |
672 | ||
673 | case c_string_ptr: | |
674 | case c_data_ptr: | |
675 | case c_cean_var_ptr: | |
676 | case c_dv_ptr: | |
677 | if (m_vars[i].direction.in && | |
678 | m_vars[i].free_if && | |
679 | !m_vars[i].flags.is_static_dstn) { | |
680 | void *buf = *static_cast<char**>(m_vars[i].into) - | |
681 | m_vars[i].mic_offset - | |
682 | (m_vars[i].flags.is_stack_buf? | |
683 | 0 : m_vars[i].offset); | |
684 | ||
685 | if (buf == NULL) { | |
686 | break; | |
687 | } | |
688 | // decrement buffer reference count | |
689 | OFFLOAD_TIMER_START( | |
690 | c_offload_target_release_buffer_refs); | |
691 | BufReleaseRef(buf); | |
692 | OFFLOAD_TIMER_STOP( | |
693 | c_offload_target_release_buffer_refs); | |
694 | } | |
695 | break; | |
696 | ||
697 | case c_func_ptr: | |
698 | break; | |
699 | ||
700 | case c_dv_data: | |
701 | case c_dv_ptr_data: | |
702 | case c_dv_data_slice: | |
703 | case c_dv_ptr_data_slice: | |
704 | if (m_vars[i].free_if && | |
705 | m_vars[i].direction.in && | |
706 | !m_vars[i].flags.is_static_dstn) { | |
707 | ArrDesc *dvp = | |
708 | (m_vars[i].type.dst == c_dv_data_slice || | |
709 | m_vars[i].type.dst == c_dv_data) ? | |
710 | static_cast<ArrDesc*>(m_vars[i].into) : | |
711 | *static_cast<ArrDesc**>(m_vars[i].into); | |
712 | void *buf = reinterpret_cast<char*>(dvp->Base) - | |
713 | m_vars[i].mic_offset - | |
714 | m_vars[i].offset; | |
715 | ||
716 | if (buf == NULL) { | |
717 | break; | |
718 | } | |
719 | // decrement buffer reference count | |
720 | OFFLOAD_TIMER_START( | |
721 | c_offload_target_release_buffer_refs); | |
722 | BufReleaseRef(buf); | |
723 | OFFLOAD_TIMER_STOP( | |
724 | c_offload_target_release_buffer_refs); | |
725 | } | |
726 | break; | |
727 | ||
728 | default: | |
729 | LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); | |
730 | abort(); | |
731 | } | |
732 | } | |
733 | } | |
734 | ||
735 | OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", | |
736 | m_out.get_buffer_start(), | |
737 | m_out.get_buffer_size()); | |
738 | ||
739 | OFFLOAD_DEBUG_DUMP_BYTES(2, | |
740 | m_out.get_buffer_start(), | |
741 | m_out.get_buffer_size()); | |
742 | ||
743 | OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, | |
744 | "Total copyout data sent to host: [%lld] bytes\n", | |
745 | m_out.get_tfr_size()); | |
746 | ||
747 | OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); | |
748 | } | |
749 | ||
750 | void __offload_target_init(void) | |
751 | { | |
752 | #ifdef SEP_SUPPORT | |
753 | const char* env_var = getenv(sep_monitor_env); | |
754 | if (env_var != 0 && *env_var != '\0') { | |
755 | sep_monitor = atoi(env_var); | |
756 | } | |
757 | env_var = getenv(sep_device_env); | |
758 | if (env_var != 0 && *env_var != '\0') { | |
759 | sep_device = env_var; | |
760 | } | |
761 | #endif // SEP_SUPPORT | |
762 | ||
763 | prefix = report_get_message_str(c_report_mic); | |
764 | ||
765 | // init frequency | |
766 | mic_frequency = COIPerfGetCycleFrequency(); | |
767 | } | |
768 | ||
769 | // User-visible offload API | |
770 | ||
771 | int _Offload_number_of_devices(void) | |
772 | { | |
773 | return mic_engines_total; | |
774 | } | |
775 | ||
776 | int _Offload_get_device_number(void) | |
777 | { | |
778 | return mic_index; | |
779 | } | |
780 | ||
781 | int _Offload_get_physical_device_number(void) | |
782 | { | |
783 | uint32_t index; | |
784 | EngineGetIndex(&index); | |
785 | return index; | |
786 | } |