]>
Commit | Line | Data |
---|---|---|
5f520819 | 1 | /* |
df26a50d | 2 | Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved. |
5f520819 KY |
3 | |
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | * Redistributions in binary form must reproduce the above copyright | |
11 | notice, this list of conditions and the following disclaimer in the | |
12 | documentation and/or other materials provided with the distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | */ | |
29 | ||
30 | ||
31 | #include "offload_target.h" | |
32 | #include <stdlib.h> | |
33 | #include <unistd.h> | |
34 | #ifdef SEP_SUPPORT | |
35 | #include <fcntl.h> | |
36 | #include <sys/ioctl.h> | |
37 | #endif // SEP_SUPPORT | |
38 | #include <omp.h> | |
39 | #include <map> | |
40 | ||
41 | // typedef offload_func_with_parms. | |
42 | // Pointer to function that represents an offloaded entry point. | |
43 | // The parameters are a temporary fix for parameters on the stack. | |
44 | typedef void (*offload_func_with_parms)(void *); | |
45 | ||
46 | // Target console and file logging | |
47 | const char *prefix; | |
48 | int console_enabled = 0; | |
49 | int offload_report_level = 0; | |
50 | ||
51 | // Trace information | |
52 | static const char* vardesc_direction_as_string[] = { | |
53 | "NOCOPY", | |
54 | "IN", | |
55 | "OUT", | |
56 | "INOUT" | |
57 | }; | |
58 | static const char* vardesc_type_as_string[] = { | |
59 | "unknown", | |
60 | "data", | |
61 | "data_ptr", | |
62 | "func_ptr", | |
63 | "void_ptr", | |
64 | "string_ptr", | |
65 | "dv", | |
66 | "dv_data", | |
67 | "dv_data_slice", | |
68 | "dv_ptr", | |
69 | "dv_ptr_data", | |
70 | "dv_ptr_data_slice", | |
71 | "cean_var", | |
72 | "cean_var_ptr", | |
df26a50d IV |
73 | "c_data_ptr_array", |
74 | "c_extended_type", | |
75 | "c_func_ptr_array", | |
76 | "c_void_ptr_array", | |
77 | "c_string_ptr_array", | |
78 | "c_data_ptr_ptr", | |
79 | "c_func_ptr_ptr", | |
80 | "c_void_ptr_ptr", | |
81 | "c_string_ptr_ptr", | |
82 | "c_cean_var_ptr_ptr", | |
5f520819 KY |
83 | }; |
84 | ||
85 | int mic_index = -1; | |
86 | int mic_engines_total = -1; | |
87 | uint64_t mic_frequency = 0; | |
88 | int offload_number = 0; | |
89 | static std::map<void*, RefInfo*> ref_data; | |
90 | static mutex_t add_ref_lock; | |
91 | ||
92 | #ifdef SEP_SUPPORT | |
93 | static const char* sep_monitor_env = "SEP_MONITOR"; | |
94 | static bool sep_monitor = false; | |
95 | static const char* sep_device_env = "SEP_DEVICE"; | |
96 | static const char* sep_device = "/dev/sep3.8/c"; | |
97 | static int sep_counter = 0; | |
98 | ||
99 | #define SEP_API_IOC_MAGIC 99 | |
100 | #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31) | |
101 | #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32) | |
102 | ||
103 | static void add_ref_count(void * buf, bool created) | |
104 | { | |
105 | mutex_locker_t locker(add_ref_lock); | |
106 | RefInfo * info = ref_data[buf]; | |
107 | ||
108 | if (info) { | |
109 | info->count++; | |
110 | } | |
111 | else { | |
112 | info = new RefInfo((int)created,(long)1); | |
113 | } | |
114 | info->is_added |= created; | |
115 | ref_data[buf] = info; | |
116 | } | |
117 | ||
118 | static void BufReleaseRef(void * buf) | |
119 | { | |
120 | mutex_locker_t locker(add_ref_lock); | |
121 | RefInfo * info = ref_data[buf]; | |
122 | ||
123 | if (info) { | |
124 | --info->count; | |
125 | if (info->count == 0 && info->is_added) { | |
2eab9666 | 126 | OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n", |
df26a50d | 127 | ((RefInfo *) ref_data[buf])->count); |
5f520819 KY |
128 | BufferReleaseRef(buf); |
129 | info->is_added = 0; | |
130 | } | |
131 | } | |
132 | } | |
133 | ||
134 | static int VTPauseSampling(void) | |
135 | { | |
136 | int ret = -1; | |
137 | int handle = open(sep_device, O_RDWR); | |
138 | if (handle > 0) { | |
139 | ret = ioctl(handle, SEP_IOCTL_PAUSE); | |
140 | close(handle); | |
141 | } | |
142 | return ret; | |
143 | } | |
144 | ||
145 | static int VTResumeSampling(void) | |
146 | { | |
147 | int ret = -1; | |
148 | int handle = open(sep_device, O_RDWR); | |
149 | if (handle > 0) { | |
150 | ret = ioctl(handle, SEP_IOCTL_RESUME); | |
151 | close(handle); | |
152 | } | |
153 | return ret; | |
154 | } | |
155 | #endif // SEP_SUPPORT | |
156 | ||
157 | void OffloadDescriptor::offload( | |
158 | uint32_t buffer_count, | |
159 | void** buffers, | |
160 | void* misc_data, | |
161 | uint16_t misc_data_len, | |
162 | void* return_data, | |
163 | uint16_t return_data_len | |
164 | ) | |
165 | { | |
166 | FunctionDescriptor *func = (FunctionDescriptor*) misc_data; | |
167 | const char *name = func->data; | |
168 | OffloadDescriptor ofld; | |
169 | char *in_data = 0; | |
170 | char *out_data = 0; | |
171 | char *timer_data = 0; | |
172 | ||
173 | console_enabled = func->console_enabled; | |
174 | timer_enabled = func->timer_enabled; | |
175 | offload_report_level = func->offload_report_level; | |
176 | offload_number = func->offload_number; | |
177 | ofld.set_offload_number(func->offload_number); | |
178 | ||
179 | #ifdef SEP_SUPPORT | |
180 | if (sep_monitor) { | |
181 | if (__sync_fetch_and_add(&sep_counter, 1) == 0) { | |
182 | OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); | |
183 | VTResumeSampling(); | |
184 | } | |
185 | } | |
186 | #endif // SEP_SUPPORT | |
187 | ||
188 | OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), | |
189 | c_offload_start_target_func, | |
190 | "Offload \"%s\" started\n", name); | |
191 | ||
192 | // initialize timer data | |
193 | OFFLOAD_TIMER_INIT(); | |
194 | ||
195 | OFFLOAD_TIMER_START(c_offload_target_total_time); | |
196 | ||
197 | OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); | |
198 | ||
199 | // get input/output buffer addresses | |
200 | if (func->in_datalen > 0 || func->out_datalen > 0) { | |
201 | if (func->data_offset != 0) { | |
202 | in_data = (char*) misc_data + func->data_offset; | |
203 | out_data = (char*) return_data; | |
204 | } | |
205 | else { | |
206 | char *inout_buf = (char*) buffers[--buffer_count]; | |
207 | in_data = inout_buf; | |
208 | out_data = inout_buf; | |
209 | } | |
210 | } | |
211 | ||
212 | // assign variable descriptors | |
213 | ofld.m_vars_total = func->vars_num; | |
214 | if (ofld.m_vars_total > 0) { | |
215 | uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); | |
216 | ||
217 | ofld.m_vars = (VarDesc*) malloc(var_data_len); | |
218 | if (ofld.m_vars == NULL) | |
219 | LIBOFFLOAD_ERROR(c_malloc); | |
220 | memcpy(ofld.m_vars, in_data, var_data_len); | |
221 | ||
df26a50d IV |
222 | ofld.m_vars_extra = |
223 | (VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra)); | |
224 | if (ofld.m_vars == NULL) | |
225 | LIBOFFLOAD_ERROR(c_malloc); | |
226 | ||
5f520819 KY |
227 | in_data += var_data_len; |
228 | func->in_datalen -= var_data_len; | |
229 | } | |
230 | ||
231 | // timer data | |
232 | if (func->timer_enabled) { | |
233 | uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); | |
234 | ||
235 | timer_data = out_data; | |
236 | out_data += timer_data_len; | |
237 | func->out_datalen -= timer_data_len; | |
238 | } | |
239 | ||
240 | // init Marshallers | |
241 | ofld.m_in.init_buffer(in_data, func->in_datalen); | |
242 | ofld.m_out.init_buffer(out_data, func->out_datalen); | |
243 | ||
244 | // copy buffers to offload descriptor | |
245 | std::copy(buffers, buffers + buffer_count, | |
246 | std::back_inserter(ofld.m_buffers)); | |
247 | ||
248 | OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); | |
249 | ||
250 | // find offload entry address | |
251 | OFFLOAD_TIMER_START(c_offload_target_func_lookup); | |
252 | ||
253 | offload_func_with_parms entry = (offload_func_with_parms) | |
254 | __offload_entries.find_addr(name); | |
255 | ||
256 | if (entry == NULL) { | |
257 | #if OFFLOAD_DEBUG > 0 | |
258 | if (console_enabled > 2) { | |
259 | __offload_entries.dump(); | |
260 | } | |
261 | #endif | |
262 | LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); | |
263 | exit(1); | |
264 | } | |
265 | ||
266 | OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); | |
267 | ||
268 | OFFLOAD_TIMER_START(c_offload_target_func_time); | |
269 | ||
270 | // execute offload entry | |
271 | entry(&ofld); | |
272 | ||
273 | OFFLOAD_TIMER_STOP(c_offload_target_func_time); | |
274 | ||
275 | OFFLOAD_TIMER_STOP(c_offload_target_total_time); | |
276 | ||
277 | // copy timer data to the buffer | |
278 | OFFLOAD_TIMER_TARGET_DATA(timer_data); | |
279 | ||
280 | OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); | |
281 | ||
282 | #ifdef SEP_SUPPORT | |
283 | if (sep_monitor) { | |
284 | if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { | |
285 | OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); | |
286 | VTPauseSampling(); | |
287 | } | |
288 | } | |
289 | #endif // SEP_SUPPORT | |
290 | } | |
291 | ||
292 | void OffloadDescriptor::merge_var_descs( | |
293 | VarDesc *vars, | |
294 | VarDesc2 *vars2, | |
295 | int vars_total | |
296 | ) | |
297 | { | |
298 | // number of variable descriptors received from host and generated | |
299 | // locally should match | |
300 | if (m_vars_total < vars_total) { | |
301 | LIBOFFLOAD_ERROR(c_merge_var_descs1); | |
302 | exit(1); | |
303 | } | |
304 | ||
305 | for (int i = 0; i < m_vars_total; i++) { | |
df26a50d IV |
306 | // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src |
307 | ||
5f520819 KY |
308 | if (i < vars_total) { |
309 | // variable type must match | |
310 | if (m_vars[i].type.bits != vars[i].type.bits) { | |
df26a50d IV |
311 | OFFLOAD_TRACE(2, |
312 | "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n", | |
313 | i, m_vars[i].type.bits, i, vars[i].type.bits); | |
5f520819 KY |
314 | LIBOFFLOAD_ERROR(c_merge_var_descs2); |
315 | exit(1); | |
316 | } | |
317 | ||
df26a50d IV |
318 | if (m_vars[i].type.src == c_extended_type) { |
319 | VarDescExtendedType *etype = | |
320 | reinterpret_cast<VarDescExtendedType*>(vars[i].ptr); | |
321 | m_vars_extra[i].type_src = etype->extended_type; | |
322 | m_vars[i].ptr = etype->ptr; | |
323 | } | |
324 | else { | |
325 | m_vars_extra[i].type_src = m_vars[i].type.src; | |
326 | if (!(m_vars[i].flags.use_device_ptr && | |
327 | m_vars[i].type.src == c_dv)) { | |
328 | m_vars[i].ptr = vars[i].ptr; | |
329 | } | |
330 | } | |
331 | // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst | |
332 | if (m_vars[i].type.dst == c_extended_type && i < vars_total) { | |
333 | VarDescExtendedType *etype = | |
334 | reinterpret_cast<VarDescExtendedType*>(vars[i].into); | |
335 | m_vars_extra[i].type_dst = etype->extended_type; | |
336 | m_vars[i].into = etype->ptr; | |
337 | } | |
338 | else { | |
339 | m_vars_extra[i].type_dst = m_vars[i].type.dst; | |
340 | m_vars[i].into = vars[i].into; | |
341 | } | |
5f520819 KY |
342 | |
343 | const char *var_sname = ""; | |
344 | if (vars2 != NULL) { | |
345 | if (vars2[i].sname != NULL) { | |
346 | var_sname = vars2[i].sname; | |
347 | } | |
348 | } | |
349 | OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, | |
350 | " VarDesc %d, var=%s, %s, %s\n", | |
351 | i, var_sname, | |
352 | vardesc_direction_as_string[m_vars[i].direction.bits], | |
df26a50d | 353 | vardesc_type_as_string[m_vars_extra[i].type_src]); |
5f520819 KY |
354 | if (vars2 != NULL && vars2[i].dname != NULL) { |
355 | OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, | |
df26a50d | 356 | vardesc_type_as_string[m_vars_extra[i].type_dst]); |
5f520819 KY |
357 | } |
358 | } | |
df26a50d IV |
359 | else { |
360 | m_vars_extra[i].type_src = m_vars[i].type.src; | |
361 | m_vars_extra[i].type_dst = m_vars[i].type.dst; | |
362 | } | |
363 | ||
5f520819 KY |
364 | OFFLOAD_TRACE(2, |
365 | " type_src=%d, type_dstn=%d, direction=%d, " | |
366 | "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " | |
367 | "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", | |
df26a50d IV |
368 | m_vars_extra[i].type_src, |
369 | m_vars_extra[i].type_dst, | |
5f520819 KY |
370 | m_vars[i].direction.bits, |
371 | m_vars[i].alloc_if, | |
372 | m_vars[i].free_if, | |
373 | m_vars[i].align, | |
374 | m_vars[i].mic_offset, | |
375 | m_vars[i].flags.bits, | |
376 | m_vars[i].offset, | |
377 | m_vars[i].size, | |
378 | m_vars[i].count, | |
379 | m_vars[i].ptr, | |
380 | m_vars[i].into); | |
381 | } | |
382 | } | |
383 | ||
384 | void OffloadDescriptor::scatter_copyin_data() | |
385 | { | |
386 | OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); | |
387 | ||
388 | OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", | |
389 | m_in.get_buffer_start(), | |
390 | m_in.get_buffer_size()); | |
391 | OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), | |
392 | m_in.get_buffer_size()); | |
393 | ||
394 | // receive data | |
395 | for (int i = 0; i < m_vars_total; i++) { | |
396 | bool src_is_for_mic = (m_vars[i].direction.out || | |
397 | m_vars[i].into == NULL); | |
398 | void** ptr_addr = src_is_for_mic ? | |
399 | static_cast<void**>(m_vars[i].ptr) : | |
400 | static_cast<void**>(m_vars[i].into); | |
df26a50d IV |
401 | int type = src_is_for_mic ? m_vars_extra[i].type_src : |
402 | m_vars_extra[i].type_dst; | |
5f520819 KY |
403 | bool is_static = src_is_for_mic ? |
404 | m_vars[i].flags.is_static : | |
405 | m_vars[i].flags.is_static_dstn; | |
406 | void *ptr = NULL; | |
407 | ||
408 | if (m_vars[i].flags.alloc_disp) { | |
409 | int64_t offset = 0; | |
410 | m_in.receive_data(&offset, sizeof(offset)); | |
5f520819 KY |
411 | } |
412 | if (VAR_TYPE_IS_DV_DATA_SLICE(type) || | |
413 | VAR_TYPE_IS_DV_DATA(type)) { | |
414 | ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? | |
415 | reinterpret_cast<ArrDesc*>(ptr_addr) : | |
416 | *reinterpret_cast<ArrDesc**>(ptr_addr); | |
417 | ptr_addr = reinterpret_cast<void**>(&dvp->Base); | |
418 | } | |
5f520819 KY |
419 | // Set pointer values |
420 | switch (type) { | |
421 | case c_data_ptr_array: | |
422 | { | |
423 | int j = m_vars[i].ptr_arr_offset; | |
424 | int max_el = j + m_vars[i].count; | |
425 | char *dst_arr_ptr = (src_is_for_mic)? | |
426 | *(reinterpret_cast<char**>(m_vars[i].ptr)) : | |
427 | reinterpret_cast<char*>(m_vars[i].into); | |
428 | ||
df26a50d IV |
429 | // if is_pointer is 1 it means that pointer array itself |
430 | // is defined either via pointer or as class member. | |
431 | // i.e. arr_ptr[0:5] or this->ARR[0:5] | |
2eab9666 | 432 | if (m_vars[i].flags.is_pointer) { |
df26a50d IV |
433 | int64_t offset = 0; |
434 | m_in.receive_data(&offset, sizeof(offset)); | |
435 | dst_arr_ptr = *((char**)dst_arr_ptr) + offset; | |
2eab9666 | 436 | } |
5f520819 KY |
437 | for (; j < max_el; j++) { |
438 | if (src_is_for_mic) { | |
439 | m_vars[j].ptr = | |
440 | dst_arr_ptr + m_vars[j].ptr_arr_offset; | |
441 | } | |
442 | else { | |
443 | m_vars[j].into = | |
444 | dst_arr_ptr + m_vars[j].ptr_arr_offset; | |
445 | } | |
446 | } | |
447 | } | |
448 | break; | |
449 | case c_data: | |
450 | case c_void_ptr: | |
df26a50d | 451 | case c_void_ptr_ptr: |
5f520819 KY |
452 | case c_cean_var: |
453 | case c_dv: | |
454 | break; | |
455 | ||
456 | case c_string_ptr: | |
457 | case c_data_ptr: | |
df26a50d IV |
458 | case c_string_ptr_ptr: |
459 | case c_data_ptr_ptr: | |
5f520819 | 460 | case c_cean_var_ptr: |
df26a50d | 461 | case c_cean_var_ptr_ptr: |
5f520819 | 462 | case c_dv_ptr: |
df26a50d IV |
463 | // Don't need ptr_addr value for variables from stack buffer. |
464 | // Stack buffer address is set at var_desc with #0. | |
465 | if (i != 0 && m_vars[i].flags.is_stack_buf) { | |
466 | break; | |
467 | } | |
468 | if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) || | |
469 | TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) { | |
470 | int64_t offset; | |
471 | ||
472 | m_in.receive_data(&offset, sizeof(offset)); | |
473 | ptr_addr = reinterpret_cast<void**>( | |
474 | reinterpret_cast<char*>(*ptr_addr) + offset); | |
475 | ||
476 | } | |
477 | ||
2eab9666 IV |
478 | if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) { |
479 | void *buf = NULL; | |
5f520819 KY |
480 | if (m_vars[i].flags.sink_addr) { |
481 | m_in.receive_data(&buf, sizeof(buf)); | |
482 | } | |
483 | else { | |
484 | buf = m_buffers.front(); | |
485 | m_buffers.pop_front(); | |
486 | } | |
487 | if (buf) { | |
488 | if (!is_static) { | |
489 | if (!m_vars[i].flags.sink_addr) { | |
490 | // increment buffer reference | |
491 | OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); | |
492 | BufferAddRef(buf); | |
2eab9666 | 493 | OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf); |
5f520819 KY |
494 | OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); |
495 | } | |
496 | add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); | |
2eab9666 IV |
497 | OFFLOAD_TRACE(1, " AddRef count = %d\n", |
498 | ((RefInfo *) ref_data[buf])->count); | |
5f520819 KY |
499 | } |
500 | ptr = static_cast<char*>(buf) + | |
501 | m_vars[i].mic_offset + | |
502 | (m_vars[i].flags.is_stack_buf ? | |
503 | 0 : m_vars[i].offset); | |
df26a50d | 504 | |
5f520819 KY |
505 | } |
506 | *ptr_addr = ptr; | |
507 | } | |
508 | else if (m_vars[i].flags.sink_addr) { | |
509 | void *buf; | |
510 | m_in.receive_data(&buf, sizeof(buf)); | |
511 | void *ptr = static_cast<char*>(buf) + | |
512 | m_vars[i].mic_offset + | |
513 | (m_vars[i].flags.is_stack_buf ? | |
514 | 0 : m_vars[i].offset); | |
515 | *ptr_addr = ptr; | |
516 | } | |
517 | break; | |
518 | ||
519 | case c_func_ptr: | |
df26a50d | 520 | case c_func_ptr_ptr: |
5f520819 KY |
521 | break; |
522 | ||
523 | case c_dv_data: | |
524 | case c_dv_ptr_data: | |
525 | case c_dv_data_slice: | |
526 | case c_dv_ptr_data_slice: | |
527 | if (m_vars[i].alloc_if) { | |
528 | void *buf; | |
529 | if (m_vars[i].flags.sink_addr) { | |
530 | m_in.receive_data(&buf, sizeof(buf)); | |
531 | } | |
532 | else { | |
533 | buf = m_buffers.front(); | |
534 | m_buffers.pop_front(); | |
535 | } | |
536 | if (buf) { | |
537 | if (!is_static) { | |
538 | if (!m_vars[i].flags.sink_addr) { | |
539 | // increment buffer reference | |
540 | OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); | |
541 | BufferAddRef(buf); | |
542 | OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); | |
543 | } | |
544 | add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); | |
545 | } | |
546 | ptr = static_cast<char*>(buf) + | |
547 | m_vars[i].mic_offset + m_vars[i].offset; | |
548 | } | |
549 | *ptr_addr = ptr; | |
550 | } | |
551 | else if (m_vars[i].flags.sink_addr) { | |
552 | void *buf; | |
553 | m_in.receive_data(&buf, sizeof(buf)); | |
554 | ptr = static_cast<char*>(buf) + | |
555 | m_vars[i].mic_offset + m_vars[i].offset; | |
556 | *ptr_addr = ptr; | |
557 | } | |
558 | break; | |
559 | ||
560 | default: | |
561 | LIBOFFLOAD_ERROR(c_unknown_var_type, type); | |
562 | abort(); | |
563 | } | |
df26a50d IV |
564 | // Release obsolete buffers for stack of persistent objects. |
565 | // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for | |
566 | // stack buffer pointer. | |
567 | if (i == 0 && | |
5f520819 KY |
568 | m_vars[i].flags.is_stack_buf && |
569 | !m_vars[i].direction.bits && | |
570 | m_vars[i].alloc_if && | |
571 | m_vars[i].size != 0) { | |
572 | for (int j=0; j < m_vars[i].size; j++) { | |
573 | void *buf; | |
574 | m_in.receive_data(&buf, sizeof(buf)); | |
df26a50d | 575 | OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf); |
5f520819 KY |
576 | BufferReleaseRef(buf); |
577 | ref_data.erase(buf); | |
578 | } | |
579 | } | |
580 | // Do copyin | |
df26a50d | 581 | switch (m_vars_extra[i].type_dst) { |
5f520819 KY |
582 | case c_data_ptr_array: |
583 | break; | |
584 | case c_data: | |
585 | case c_void_ptr: | |
df26a50d | 586 | case c_void_ptr_ptr: |
5f520819 KY |
587 | case c_cean_var: |
588 | if (m_vars[i].direction.in && | |
589 | !m_vars[i].flags.is_static_dstn) { | |
590 | int64_t size; | |
591 | int64_t disp; | |
592 | char* ptr = m_vars[i].into ? | |
593 | static_cast<char*>(m_vars[i].into) : | |
594 | static_cast<char*>(m_vars[i].ptr); | |
df26a50d | 595 | if (m_vars_extra[i].type_dst == c_cean_var) { |
5f520819 KY |
596 | m_in.receive_data((&size), sizeof(int64_t)); |
597 | m_in.receive_data((&disp), sizeof(int64_t)); | |
598 | } | |
599 | else { | |
600 | size = m_vars[i].size; | |
601 | disp = 0; | |
602 | } | |
603 | m_in.receive_data(ptr + disp, size); | |
604 | } | |
605 | break; | |
606 | ||
607 | case c_dv: | |
608 | if (m_vars[i].direction.bits || | |
609 | m_vars[i].alloc_if || | |
610 | m_vars[i].free_if) { | |
611 | char* ptr = m_vars[i].into ? | |
612 | static_cast<char*>(m_vars[i].into) : | |
613 | static_cast<char*>(m_vars[i].ptr); | |
614 | m_in.receive_data(ptr + sizeof(uint64_t), | |
615 | m_vars[i].size - sizeof(uint64_t)); | |
616 | } | |
617 | break; | |
618 | ||
619 | case c_string_ptr: | |
620 | case c_data_ptr: | |
df26a50d IV |
621 | case c_string_ptr_ptr: |
622 | case c_data_ptr_ptr: | |
5f520819 | 623 | case c_cean_var_ptr: |
df26a50d | 624 | case c_cean_var_ptr_ptr: |
5f520819 KY |
625 | case c_dv_ptr: |
626 | case c_dv_data: | |
627 | case c_dv_ptr_data: | |
628 | case c_dv_data_slice: | |
629 | case c_dv_ptr_data_slice: | |
630 | break; | |
631 | ||
632 | case c_func_ptr: | |
df26a50d | 633 | case c_func_ptr_ptr: |
5f520819 KY |
634 | if (m_vars[i].direction.in) { |
635 | m_in.receive_func_ptr((const void**) m_vars[i].ptr); | |
636 | } | |
637 | break; | |
638 | ||
639 | default: | |
df26a50d | 640 | LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); |
5f520819 KY |
641 | abort(); |
642 | } | |
643 | } | |
644 | ||
645 | OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", | |
646 | m_in.get_tfr_size()); | |
647 | ||
648 | OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); | |
649 | ||
650 | OFFLOAD_TIMER_START(c_offload_target_compute); | |
651 | } | |
652 | ||
653 | void OffloadDescriptor::gather_copyout_data() | |
654 | { | |
655 | OFFLOAD_TIMER_STOP(c_offload_target_compute); | |
656 | ||
657 | OFFLOAD_TIMER_START(c_offload_target_gather_outputs); | |
658 | ||
659 | for (int i = 0; i < m_vars_total; i++) { | |
660 | bool src_is_for_mic = (m_vars[i].direction.out || | |
661 | m_vars[i].into == NULL); | |
df26a50d IV |
662 | if (m_vars[i].flags.is_stack_buf) { |
663 | continue; | |
664 | } | |
665 | switch (m_vars_extra[i].type_src) { | |
5f520819 KY |
666 | case c_data_ptr_array: |
667 | break; | |
668 | case c_data: | |
669 | case c_void_ptr: | |
df26a50d | 670 | case c_void_ptr_ptr: |
5f520819 KY |
671 | case c_cean_var: |
672 | if (m_vars[i].direction.out && | |
673 | !m_vars[i].flags.is_static) { | |
674 | m_out.send_data( | |
675 | static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, | |
676 | m_vars[i].size); | |
677 | } | |
678 | break; | |
679 | ||
680 | case c_dv: | |
681 | break; | |
682 | ||
683 | case c_string_ptr: | |
684 | case c_data_ptr: | |
df26a50d IV |
685 | case c_string_ptr_ptr: |
686 | case c_data_ptr_ptr: | |
5f520819 | 687 | case c_cean_var_ptr: |
df26a50d | 688 | case c_cean_var_ptr_ptr: |
5f520819 KY |
689 | case c_dv_ptr: |
690 | if (m_vars[i].free_if && | |
691 | src_is_for_mic && | |
2eab9666 | 692 | !m_vars[i].flags.preallocated && |
5f520819 KY |
693 | !m_vars[i].flags.is_static) { |
694 | void *buf = *static_cast<char**>(m_vars[i].ptr) - | |
695 | m_vars[i].mic_offset - | |
696 | (m_vars[i].flags.is_stack_buf? | |
697 | 0 : m_vars[i].offset); | |
698 | if (buf == NULL) { | |
699 | break; | |
700 | } | |
701 | // decrement buffer reference count | |
702 | OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); | |
703 | BufReleaseRef(buf); | |
704 | OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); | |
705 | } | |
2eab9666 IV |
706 | if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) { |
707 | m_out.send_data((void*) m_vars[i].ptr, sizeof(void*)); | |
708 | } | |
5f520819 KY |
709 | break; |
710 | ||
711 | case c_func_ptr: | |
df26a50d | 712 | case c_func_ptr_ptr: |
5f520819 KY |
713 | if (m_vars[i].direction.out) { |
714 | m_out.send_func_ptr(*((void**) m_vars[i].ptr)); | |
715 | } | |
716 | break; | |
717 | ||
718 | case c_dv_data: | |
719 | case c_dv_ptr_data: | |
720 | case c_dv_data_slice: | |
721 | case c_dv_ptr_data_slice: | |
722 | if (src_is_for_mic && | |
723 | m_vars[i].free_if && | |
724 | !m_vars[i].flags.is_static) { | |
df26a50d IV |
725 | ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data || |
726 | m_vars_extra[i].type_src == c_dv_data_slice) ? | |
727 | static_cast<ArrDesc*>(m_vars[i].ptr) : | |
728 | *static_cast<ArrDesc**>(m_vars[i].ptr); | |
5f520819 KY |
729 | |
730 | void *buf = reinterpret_cast<char*>(dvp->Base) - | |
731 | m_vars[i].mic_offset - | |
732 | m_vars[i].offset; | |
733 | ||
734 | if (buf == NULL) { | |
735 | break; | |
736 | } | |
737 | ||
738 | // decrement buffer reference count | |
739 | OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); | |
740 | BufReleaseRef(buf); | |
741 | OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); | |
742 | } | |
743 | break; | |
744 | ||
745 | default: | |
df26a50d | 746 | LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); |
5f520819 KY |
747 | abort(); |
748 | } | |
749 | ||
750 | if (m_vars[i].into) { | |
df26a50d | 751 | switch (m_vars_extra[i].type_dst) { |
5f520819 KY |
752 | case c_data_ptr_array: |
753 | break; | |
754 | case c_data: | |
755 | case c_void_ptr: | |
df26a50d | 756 | case c_void_ptr_ptr: |
5f520819 KY |
757 | case c_cean_var: |
758 | case c_dv: | |
759 | break; | |
760 | ||
761 | case c_string_ptr: | |
762 | case c_data_ptr: | |
df26a50d IV |
763 | case c_string_ptr_ptr: |
764 | case c_data_ptr_ptr: | |
5f520819 | 765 | case c_cean_var_ptr: |
df26a50d | 766 | case c_cean_var_ptr_ptr: |
5f520819 KY |
767 | case c_dv_ptr: |
768 | if (m_vars[i].direction.in && | |
769 | m_vars[i].free_if && | |
770 | !m_vars[i].flags.is_static_dstn) { | |
771 | void *buf = *static_cast<char**>(m_vars[i].into) - | |
772 | m_vars[i].mic_offset - | |
773 | (m_vars[i].flags.is_stack_buf? | |
774 | 0 : m_vars[i].offset); | |
775 | ||
776 | if (buf == NULL) { | |
777 | break; | |
778 | } | |
779 | // decrement buffer reference count | |
780 | OFFLOAD_TIMER_START( | |
781 | c_offload_target_release_buffer_refs); | |
782 | BufReleaseRef(buf); | |
783 | OFFLOAD_TIMER_STOP( | |
784 | c_offload_target_release_buffer_refs); | |
785 | } | |
786 | break; | |
787 | ||
788 | case c_func_ptr: | |
df26a50d | 789 | case c_func_ptr_ptr: |
5f520819 KY |
790 | break; |
791 | ||
792 | case c_dv_data: | |
793 | case c_dv_ptr_data: | |
794 | case c_dv_data_slice: | |
795 | case c_dv_ptr_data_slice: | |
796 | if (m_vars[i].free_if && | |
797 | m_vars[i].direction.in && | |
798 | !m_vars[i].flags.is_static_dstn) { | |
799 | ArrDesc *dvp = | |
df26a50d IV |
800 | (m_vars_extra[i].type_dst == c_dv_data_slice || |
801 | m_vars_extra[i].type_dst == c_dv_data) ? | |
5f520819 KY |
802 | static_cast<ArrDesc*>(m_vars[i].into) : |
803 | *static_cast<ArrDesc**>(m_vars[i].into); | |
804 | void *buf = reinterpret_cast<char*>(dvp->Base) - | |
805 | m_vars[i].mic_offset - | |
806 | m_vars[i].offset; | |
807 | ||
808 | if (buf == NULL) { | |
809 | break; | |
810 | } | |
811 | // decrement buffer reference count | |
812 | OFFLOAD_TIMER_START( | |
813 | c_offload_target_release_buffer_refs); | |
814 | BufReleaseRef(buf); | |
815 | OFFLOAD_TIMER_STOP( | |
816 | c_offload_target_release_buffer_refs); | |
817 | } | |
818 | break; | |
819 | ||
820 | default: | |
df26a50d | 821 | LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); |
5f520819 KY |
822 | abort(); |
823 | } | |
824 | } | |
825 | } | |
826 | ||
827 | OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", | |
828 | m_out.get_buffer_start(), | |
829 | m_out.get_buffer_size()); | |
830 | ||
831 | OFFLOAD_DEBUG_DUMP_BYTES(2, | |
832 | m_out.get_buffer_start(), | |
833 | m_out.get_buffer_size()); | |
834 | ||
835 | OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, | |
836 | "Total copyout data sent to host: [%lld] bytes\n", | |
837 | m_out.get_tfr_size()); | |
838 | ||
839 | OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); | |
840 | } | |
841 | ||
842 | void __offload_target_init(void) | |
843 | { | |
844 | #ifdef SEP_SUPPORT | |
845 | const char* env_var = getenv(sep_monitor_env); | |
846 | if (env_var != 0 && *env_var != '\0') { | |
847 | sep_monitor = atoi(env_var); | |
848 | } | |
849 | env_var = getenv(sep_device_env); | |
850 | if (env_var != 0 && *env_var != '\0') { | |
851 | sep_device = env_var; | |
852 | } | |
853 | #endif // SEP_SUPPORT | |
854 | ||
855 | prefix = report_get_message_str(c_report_mic); | |
856 | ||
857 | // init frequency | |
858 | mic_frequency = COIPerfGetCycleFrequency(); | |
859 | } | |
860 | ||
861 | // User-visible offload API | |
862 | ||
863 | int _Offload_number_of_devices(void) | |
864 | { | |
865 | return mic_engines_total; | |
866 | } | |
867 | ||
868 | int _Offload_get_device_number(void) | |
869 | { | |
870 | return mic_index; | |
871 | } | |
872 | ||
873 | int _Offload_get_physical_device_number(void) | |
874 | { | |
875 | uint32_t index; | |
876 | EngineGetIndex(&index); | |
877 | return index; | |
878 | } |