2 Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "offload_target.h"
36 #include <sys/ioctl.h>
41 // typedef offload_func_with_parms.
42 // Pointer to function that represents an offloaded entry point.
43 // The parameters are a temporary fix for parameters on the stack.
44 typedef void (*offload_func_with_parms
)(void *);
46 // Target console and file logging
48 int console_enabled
= 0;
49 int offload_report_level
= 0;
52 static const char* vardesc_direction_as_string
[] = {
58 static const char* vardesc_type_as_string
[] = {
77 int mic_engines_total
= -1;
78 uint64_t mic_frequency
= 0;
79 int offload_number
= 0;
80 static std::map
<void*, RefInfo
*> ref_data
;
81 static mutex_t add_ref_lock
;
84 static const char* sep_monitor_env
= "SEP_MONITOR";
85 static bool sep_monitor
= false;
86 static const char* sep_device_env
= "SEP_DEVICE";
87 static const char* sep_device
= "/dev/sep3.8/c";
88 static int sep_counter
= 0;
90 #define SEP_API_IOC_MAGIC 99
91 #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
92 #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
94 static void add_ref_count(void * buf
, bool created
)
96 mutex_locker_t
locker(add_ref_lock
);
97 RefInfo
* info
= ref_data
[buf
];
103 info
= new RefInfo((int)created
,(long)1);
105 info
->is_added
|= created
;
106 ref_data
[buf
] = info
;
109 static void BufReleaseRef(void * buf
)
111 mutex_locker_t
locker(add_ref_lock
);
112 RefInfo
* info
= ref_data
[buf
];
116 if (info
->count
== 0 && info
->is_added
) {
117 OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
118 ((RefInfo
*) ref_data
[buf
])->count
);
119 BufferReleaseRef(buf
);
125 static int VTPauseSampling(void)
128 int handle
= open(sep_device
, O_RDWR
);
130 ret
= ioctl(handle
, SEP_IOCTL_PAUSE
);
136 static int VTResumeSampling(void)
139 int handle
= open(sep_device
, O_RDWR
);
141 ret
= ioctl(handle
, SEP_IOCTL_RESUME
);
146 #endif // SEP_SUPPORT
148 void OffloadDescriptor::offload(
149 uint32_t buffer_count
,
152 uint16_t misc_data_len
,
154 uint16_t return_data_len
157 FunctionDescriptor
*func
= (FunctionDescriptor
*) misc_data
;
158 const char *name
= func
->data
;
159 OffloadDescriptor ofld
;
162 char *timer_data
= 0;
164 console_enabled
= func
->console_enabled
;
165 timer_enabled
= func
->timer_enabled
;
166 offload_report_level
= func
->offload_report_level
;
167 offload_number
= func
->offload_number
;
168 ofld
.set_offload_number(func
->offload_number
);
172 if (__sync_fetch_and_add(&sep_counter
, 1) == 0) {
173 OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
177 #endif // SEP_SUPPORT
179 OFFLOAD_DEBUG_TRACE_1(2, ofld
.get_offload_number(),
180 c_offload_start_target_func
,
181 "Offload \"%s\" started\n", name
);
183 // initialize timer data
184 OFFLOAD_TIMER_INIT();
186 OFFLOAD_TIMER_START(c_offload_target_total_time
);
188 OFFLOAD_TIMER_START(c_offload_target_descriptor_setup
);
190 // get input/output buffer addresses
191 if (func
->in_datalen
> 0 || func
->out_datalen
> 0) {
192 if (func
->data_offset
!= 0) {
193 in_data
= (char*) misc_data
+ func
->data_offset
;
194 out_data
= (char*) return_data
;
197 char *inout_buf
= (char*) buffers
[--buffer_count
];
199 out_data
= inout_buf
;
203 // assign variable descriptors
204 ofld
.m_vars_total
= func
->vars_num
;
205 if (ofld
.m_vars_total
> 0) {
206 uint64_t var_data_len
= ofld
.m_vars_total
* sizeof(VarDesc
);
208 ofld
.m_vars
= (VarDesc
*) malloc(var_data_len
);
209 if (ofld
.m_vars
== NULL
)
210 LIBOFFLOAD_ERROR(c_malloc
);
211 memcpy(ofld
.m_vars
, in_data
, var_data_len
);
213 in_data
+= var_data_len
;
214 func
->in_datalen
-= var_data_len
;
218 if (func
->timer_enabled
) {
219 uint64_t timer_data_len
= OFFLOAD_TIMER_DATALEN();
221 timer_data
= out_data
;
222 out_data
+= timer_data_len
;
223 func
->out_datalen
-= timer_data_len
;
227 ofld
.m_in
.init_buffer(in_data
, func
->in_datalen
);
228 ofld
.m_out
.init_buffer(out_data
, func
->out_datalen
);
230 // copy buffers to offload descriptor
231 std::copy(buffers
, buffers
+ buffer_count
,
232 std::back_inserter(ofld
.m_buffers
));
234 OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup
);
236 // find offload entry address
237 OFFLOAD_TIMER_START(c_offload_target_func_lookup
);
239 offload_func_with_parms entry
= (offload_func_with_parms
)
240 __offload_entries
.find_addr(name
);
243 #if OFFLOAD_DEBUG > 0
244 if (console_enabled
> 2) {
245 __offload_entries
.dump();
248 LIBOFFLOAD_ERROR(c_offload_descriptor_offload
, name
);
252 OFFLOAD_TIMER_STOP(c_offload_target_func_lookup
);
254 OFFLOAD_TIMER_START(c_offload_target_func_time
);
256 // execute offload entry
259 OFFLOAD_TIMER_STOP(c_offload_target_func_time
);
261 OFFLOAD_TIMER_STOP(c_offload_target_total_time
);
263 // copy timer data to the buffer
264 OFFLOAD_TIMER_TARGET_DATA(timer_data
);
266 OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name
);
270 if (__sync_sub_and_fetch(&sep_counter
, 1) == 0) {
271 OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
275 #endif // SEP_SUPPORT
278 void OffloadDescriptor::merge_var_descs(
284 // number of variable descriptors received from host and generated
285 // locally should match
286 if (m_vars_total
< vars_total
) {
287 LIBOFFLOAD_ERROR(c_merge_var_descs1
);
291 for (int i
= 0; i
< m_vars_total
; i
++) {
292 if (i
< vars_total
) {
293 // variable type must match
294 if (m_vars
[i
].type
.bits
!= vars
[i
].type
.bits
) {
295 LIBOFFLOAD_ERROR(c_merge_var_descs2
);
299 m_vars
[i
].ptr
= vars
[i
].ptr
;
300 m_vars
[i
].into
= vars
[i
].into
;
302 const char *var_sname
= "";
304 if (vars2
[i
].sname
!= NULL
) {
305 var_sname
= vars2
[i
].sname
;
308 OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var
,
309 " VarDesc %d, var=%s, %s, %s\n",
311 vardesc_direction_as_string
[m_vars
[i
].direction
.bits
],
312 vardesc_type_as_string
[m_vars
[i
].type
.src
]);
313 if (vars2
!= NULL
&& vars2
[i
].dname
!= NULL
) {
314 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2
[i
].dname
,
315 vardesc_type_as_string
[m_vars
[i
].type
.dst
]);
319 " type_src=%d, type_dstn=%d, direction=%d, "
320 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
321 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
324 m_vars
[i
].direction
.bits
,
328 m_vars
[i
].mic_offset
,
329 m_vars
[i
].flags
.bits
,
338 void OffloadDescriptor::scatter_copyin_data()
340 OFFLOAD_TIMER_START(c_offload_target_scatter_inputs
);
342 OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
343 m_in
.get_buffer_start(),
344 m_in
.get_buffer_size());
345 OFFLOAD_DEBUG_DUMP_BYTES(2, m_in
.get_buffer_start(),
346 m_in
.get_buffer_size());
349 for (int i
= 0; i
< m_vars_total
; i
++) {
350 bool src_is_for_mic
= (m_vars
[i
].direction
.out
||
351 m_vars
[i
].into
== NULL
);
352 void** ptr_addr
= src_is_for_mic
?
353 static_cast<void**>(m_vars
[i
].ptr
) :
354 static_cast<void**>(m_vars
[i
].into
);
355 int type
= src_is_for_mic
? m_vars
[i
].type
.src
:
357 bool is_static
= src_is_for_mic
?
358 m_vars
[i
].flags
.is_static
:
359 m_vars
[i
].flags
.is_static_dstn
;
362 if (m_vars
[i
].flags
.alloc_disp
) {
364 m_in
.receive_data(&offset
, sizeof(offset
));
366 if (VAR_TYPE_IS_DV_DATA_SLICE(type
) ||
367 VAR_TYPE_IS_DV_DATA(type
)) {
368 ArrDesc
*dvp
= (type
== c_dv_data_slice
|| type
== c_dv_data
)?
369 reinterpret_cast<ArrDesc
*>(ptr_addr
) :
370 *reinterpret_cast<ArrDesc
**>(ptr_addr
);
371 ptr_addr
= reinterpret_cast<void**>(&dvp
->Base
);
373 // Set pointer values
375 case c_data_ptr_array
:
377 int j
= m_vars
[i
].ptr_arr_offset
;
378 int max_el
= j
+ m_vars
[i
].count
;
379 char *dst_arr_ptr
= (src_is_for_mic
)?
380 *(reinterpret_cast<char**>(m_vars
[i
].ptr
)) :
381 reinterpret_cast<char*>(m_vars
[i
].into
);
383 if (m_vars
[i
].flags
.is_pointer
) {
384 dst_arr_ptr
= *((char**)dst_arr_ptr
);
386 for (; j
< max_el
; j
++) {
387 if (src_is_for_mic
) {
389 dst_arr_ptr
+ m_vars
[j
].ptr_arr_offset
;
393 dst_arr_ptr
+ m_vars
[j
].ptr_arr_offset
;
408 if (m_vars
[i
].alloc_if
&& !m_vars
[i
].flags
.preallocated
) {
410 if (m_vars
[i
].flags
.sink_addr
) {
411 m_in
.receive_data(&buf
, sizeof(buf
));
414 buf
= m_buffers
.front();
415 m_buffers
.pop_front();
419 if (!m_vars
[i
].flags
.sink_addr
) {
420 // increment buffer reference
421 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs
);
423 OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf
);
424 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs
);
426 add_ref_count(buf
, 0 == m_vars
[i
].flags
.sink_addr
);
427 OFFLOAD_TRACE(1, " AddRef count = %d\n",
428 ((RefInfo
*) ref_data
[buf
])->count
);
430 ptr
= static_cast<char*>(buf
) +
431 m_vars
[i
].mic_offset
+
432 (m_vars
[i
].flags
.is_stack_buf
?
433 0 : m_vars
[i
].offset
);
437 else if (m_vars
[i
].flags
.sink_addr
) {
439 m_in
.receive_data(&buf
, sizeof(buf
));
440 void *ptr
= static_cast<char*>(buf
) +
441 m_vars
[i
].mic_offset
+
442 (m_vars
[i
].flags
.is_stack_buf
?
443 0 : m_vars
[i
].offset
);
453 case c_dv_data_slice
:
454 case c_dv_ptr_data_slice
:
455 if (m_vars
[i
].alloc_if
) {
457 if (m_vars
[i
].flags
.sink_addr
) {
458 m_in
.receive_data(&buf
, sizeof(buf
));
461 buf
= m_buffers
.front();
462 m_buffers
.pop_front();
466 if (!m_vars
[i
].flags
.sink_addr
) {
467 // increment buffer reference
468 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs
);
470 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs
);
472 add_ref_count(buf
, 0 == m_vars
[i
].flags
.sink_addr
);
474 ptr
= static_cast<char*>(buf
) +
475 m_vars
[i
].mic_offset
+ m_vars
[i
].offset
;
479 else if (m_vars
[i
].flags
.sink_addr
) {
481 m_in
.receive_data(&buf
, sizeof(buf
));
482 ptr
= static_cast<char*>(buf
) +
483 m_vars
[i
].mic_offset
+ m_vars
[i
].offset
;
489 LIBOFFLOAD_ERROR(c_unknown_var_type
, type
);
492 // Release obsolete buffers for stack of persistent objects
493 if (type
= c_data_ptr
&&
494 m_vars
[i
].flags
.is_stack_buf
&&
495 !m_vars
[i
].direction
.bits
&&
496 m_vars
[i
].alloc_if
&&
497 m_vars
[i
].size
!= 0) {
498 for (int j
=0; j
< m_vars
[i
].size
; j
++) {
500 m_in
.receive_data(&buf
, sizeof(buf
));
501 BufferReleaseRef(buf
);
506 switch (m_vars
[i
].type
.dst
) {
507 case c_data_ptr_array
:
512 if (m_vars
[i
].direction
.in
&&
513 !m_vars
[i
].flags
.is_static_dstn
) {
516 char* ptr
= m_vars
[i
].into
?
517 static_cast<char*>(m_vars
[i
].into
) :
518 static_cast<char*>(m_vars
[i
].ptr
);
519 if (m_vars
[i
].type
.dst
== c_cean_var
) {
520 m_in
.receive_data((&size
), sizeof(int64_t));
521 m_in
.receive_data((&disp
), sizeof(int64_t));
524 size
= m_vars
[i
].size
;
527 m_in
.receive_data(ptr
+ disp
, size
);
532 if (m_vars
[i
].direction
.bits
||
533 m_vars
[i
].alloc_if
||
535 char* ptr
= m_vars
[i
].into
?
536 static_cast<char*>(m_vars
[i
].into
) :
537 static_cast<char*>(m_vars
[i
].ptr
);
538 m_in
.receive_data(ptr
+ sizeof(uint64_t),
539 m_vars
[i
].size
- sizeof(uint64_t));
549 case c_dv_data_slice
:
550 case c_dv_ptr_data_slice
:
554 if (m_vars
[i
].direction
.in
) {
555 m_in
.receive_func_ptr((const void**) m_vars
[i
].ptr
);
560 LIBOFFLOAD_ERROR(c_unknown_var_type
, m_vars
[i
].type
.dst
);
565 OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
566 m_in
.get_tfr_size());
568 OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs
);
570 OFFLOAD_TIMER_START(c_offload_target_compute
);
573 void OffloadDescriptor::gather_copyout_data()
575 OFFLOAD_TIMER_STOP(c_offload_target_compute
);
577 OFFLOAD_TIMER_START(c_offload_target_gather_outputs
);
579 for (int i
= 0; i
< m_vars_total
; i
++) {
580 bool src_is_for_mic
= (m_vars
[i
].direction
.out
||
581 m_vars
[i
].into
== NULL
);
583 switch (m_vars
[i
].type
.src
) {
584 case c_data_ptr_array
:
589 if (m_vars
[i
].direction
.out
&&
590 !m_vars
[i
].flags
.is_static
) {
592 static_cast<char*>(m_vars
[i
].ptr
) + m_vars
[i
].disp
,
604 if (m_vars
[i
].free_if
&&
606 !m_vars
[i
].flags
.preallocated
&&
607 !m_vars
[i
].flags
.is_static
) {
608 void *buf
= *static_cast<char**>(m_vars
[i
].ptr
) -
609 m_vars
[i
].mic_offset
-
610 (m_vars
[i
].flags
.is_stack_buf
?
611 0 : m_vars
[i
].offset
);
615 // decrement buffer reference count
616 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs
);
618 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs
);
620 if (m_vars
[i
].flags
.preallocated
&& m_vars
[i
].alloc_if
) {
621 m_out
.send_data((void*) m_vars
[i
].ptr
, sizeof(void*));
626 if (m_vars
[i
].direction
.out
) {
627 m_out
.send_func_ptr(*((void**) m_vars
[i
].ptr
));
633 case c_dv_data_slice
:
634 case c_dv_ptr_data_slice
:
635 if (src_is_for_mic
&&
637 !m_vars
[i
].flags
.is_static
) {
638 ArrDesc
*dvp
= (m_vars
[i
].type
.src
== c_dv_data
||
639 m_vars
[i
].type
.src
== c_dv_data_slice
) ?
640 static_cast<ArrDesc
*>(m_vars
[i
].ptr
) :
641 *static_cast<ArrDesc
**>(m_vars
[i
].ptr
);
643 void *buf
= reinterpret_cast<char*>(dvp
->Base
) -
644 m_vars
[i
].mic_offset
-
651 // decrement buffer reference count
652 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs
);
654 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs
);
659 LIBOFFLOAD_ERROR(c_unknown_var_type
, m_vars
[i
].type
.dst
);
663 if (m_vars
[i
].into
) {
664 switch (m_vars
[i
].type
.dst
) {
665 case c_data_ptr_array
:
677 if (m_vars
[i
].direction
.in
&&
679 !m_vars
[i
].flags
.is_static_dstn
) {
680 void *buf
= *static_cast<char**>(m_vars
[i
].into
) -
681 m_vars
[i
].mic_offset
-
682 (m_vars
[i
].flags
.is_stack_buf
?
683 0 : m_vars
[i
].offset
);
688 // decrement buffer reference count
690 c_offload_target_release_buffer_refs
);
693 c_offload_target_release_buffer_refs
);
702 case c_dv_data_slice
:
703 case c_dv_ptr_data_slice
:
704 if (m_vars
[i
].free_if
&&
705 m_vars
[i
].direction
.in
&&
706 !m_vars
[i
].flags
.is_static_dstn
) {
708 (m_vars
[i
].type
.dst
== c_dv_data_slice
||
709 m_vars
[i
].type
.dst
== c_dv_data
) ?
710 static_cast<ArrDesc
*>(m_vars
[i
].into
) :
711 *static_cast<ArrDesc
**>(m_vars
[i
].into
);
712 void *buf
= reinterpret_cast<char*>(dvp
->Base
) -
713 m_vars
[i
].mic_offset
-
719 // decrement buffer reference count
721 c_offload_target_release_buffer_refs
);
724 c_offload_target_release_buffer_refs
);
729 LIBOFFLOAD_ERROR(c_unknown_var_type
, m_vars
[i
].type
.dst
);
735 OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
736 m_out
.get_buffer_start(),
737 m_out
.get_buffer_size());
739 OFFLOAD_DEBUG_DUMP_BYTES(2,
740 m_out
.get_buffer_start(),
741 m_out
.get_buffer_size());
743 OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data
,
744 "Total copyout data sent to host: [%lld] bytes\n",
745 m_out
.get_tfr_size());
747 OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs
);
750 void __offload_target_init(void)
753 const char* env_var
= getenv(sep_monitor_env
);
754 if (env_var
!= 0 && *env_var
!= '\0') {
755 sep_monitor
= atoi(env_var
);
757 env_var
= getenv(sep_device_env
);
758 if (env_var
!= 0 && *env_var
!= '\0') {
759 sep_device
= env_var
;
761 #endif // SEP_SUPPORT
763 prefix
= report_get_message_str(c_report_mic
);
766 mic_frequency
= COIPerfGetCycleFrequency();
769 // User-visible offload API
771 int _Offload_number_of_devices(void)
773 return mic_engines_total
;
776 int _Offload_get_device_number(void)
781 int _Offload_get_physical_device_number(void)
784 EngineGetIndex(&index
);