]> git.ipfire.org Git - thirdparty/gcc.git/blob - liboffloadmic/runtime/offload_target.cpp
backport: Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host and...
[thirdparty/gcc.git] / liboffloadmic / runtime / offload_target.cpp
1 /*
2 Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31 #include "offload_target.h"
32 #include <stdlib.h>
33 #include <unistd.h>
34 #ifdef SEP_SUPPORT
35 #include <fcntl.h>
36 #include <sys/ioctl.h>
37 #endif // SEP_SUPPORT
38 #include <omp.h>
39 #include <map>
40
41 // typedef offload_func_with_parms.
42 // Pointer to function that represents an offloaded entry point.
43 // The parameters are a temporary fix for parameters on the stack.
44 typedef void (*offload_func_with_parms)(void *);
45
46 // Target console and file logging
47 const char *prefix;
48 int console_enabled = 0;
49 int offload_report_level = 0;
50
51 // Trace information
52 static const char* vardesc_direction_as_string[] = {
53 "NOCOPY",
54 "IN",
55 "OUT",
56 "INOUT"
57 };
58 static const char* vardesc_type_as_string[] = {
59 "unknown",
60 "data",
61 "data_ptr",
62 "func_ptr",
63 "void_ptr",
64 "string_ptr",
65 "dv",
66 "dv_data",
67 "dv_data_slice",
68 "dv_ptr",
69 "dv_ptr_data",
70 "dv_ptr_data_slice",
71 "cean_var",
72 "cean_var_ptr",
73 "c_data_ptr_array"
74 };
75
76 int mic_index = -1;
77 int mic_engines_total = -1;
78 uint64_t mic_frequency = 0;
79 int offload_number = 0;
80 static std::map<void*, RefInfo*> ref_data;
81 static mutex_t add_ref_lock;
82
83 #ifdef SEP_SUPPORT
84 static const char* sep_monitor_env = "SEP_MONITOR";
85 static bool sep_monitor = false;
86 static const char* sep_device_env = "SEP_DEVICE";
87 static const char* sep_device = "/dev/sep3.8/c";
88 static int sep_counter = 0;
89
90 #define SEP_API_IOC_MAGIC 99
91 #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
92 #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
93
94 static void add_ref_count(void * buf, bool created)
95 {
96 mutex_locker_t locker(add_ref_lock);
97 RefInfo * info = ref_data[buf];
98
99 if (info) {
100 info->count++;
101 }
102 else {
103 info = new RefInfo((int)created,(long)1);
104 }
105 info->is_added |= created;
106 ref_data[buf] = info;
107 }
108
109 static void BufReleaseRef(void * buf)
110 {
111 mutex_locker_t locker(add_ref_lock);
112 RefInfo * info = ref_data[buf];
113
114 if (info) {
115 --info->count;
116 if (info->count == 0 && info->is_added) {
117 OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
118 ((RefInfo *) ref_data[buf])->count);
119 BufferReleaseRef(buf);
120 info->is_added = 0;
121 }
122 }
123 }
124
125 static int VTPauseSampling(void)
126 {
127 int ret = -1;
128 int handle = open(sep_device, O_RDWR);
129 if (handle > 0) {
130 ret = ioctl(handle, SEP_IOCTL_PAUSE);
131 close(handle);
132 }
133 return ret;
134 }
135
136 static int VTResumeSampling(void)
137 {
138 int ret = -1;
139 int handle = open(sep_device, O_RDWR);
140 if (handle > 0) {
141 ret = ioctl(handle, SEP_IOCTL_RESUME);
142 close(handle);
143 }
144 return ret;
145 }
146 #endif // SEP_SUPPORT
147
148 void OffloadDescriptor::offload(
149 uint32_t buffer_count,
150 void** buffers,
151 void* misc_data,
152 uint16_t misc_data_len,
153 void* return_data,
154 uint16_t return_data_len
155 )
156 {
157 FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
158 const char *name = func->data;
159 OffloadDescriptor ofld;
160 char *in_data = 0;
161 char *out_data = 0;
162 char *timer_data = 0;
163
164 console_enabled = func->console_enabled;
165 timer_enabled = func->timer_enabled;
166 offload_report_level = func->offload_report_level;
167 offload_number = func->offload_number;
168 ofld.set_offload_number(func->offload_number);
169
170 #ifdef SEP_SUPPORT
171 if (sep_monitor) {
172 if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
173 OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
174 VTResumeSampling();
175 }
176 }
177 #endif // SEP_SUPPORT
178
179 OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
180 c_offload_start_target_func,
181 "Offload \"%s\" started\n", name);
182
183 // initialize timer data
184 OFFLOAD_TIMER_INIT();
185
186 OFFLOAD_TIMER_START(c_offload_target_total_time);
187
188 OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
189
190 // get input/output buffer addresses
191 if (func->in_datalen > 0 || func->out_datalen > 0) {
192 if (func->data_offset != 0) {
193 in_data = (char*) misc_data + func->data_offset;
194 out_data = (char*) return_data;
195 }
196 else {
197 char *inout_buf = (char*) buffers[--buffer_count];
198 in_data = inout_buf;
199 out_data = inout_buf;
200 }
201 }
202
203 // assign variable descriptors
204 ofld.m_vars_total = func->vars_num;
205 if (ofld.m_vars_total > 0) {
206 uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
207
208 ofld.m_vars = (VarDesc*) malloc(var_data_len);
209 if (ofld.m_vars == NULL)
210 LIBOFFLOAD_ERROR(c_malloc);
211 memcpy(ofld.m_vars, in_data, var_data_len);
212
213 in_data += var_data_len;
214 func->in_datalen -= var_data_len;
215 }
216
217 // timer data
218 if (func->timer_enabled) {
219 uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
220
221 timer_data = out_data;
222 out_data += timer_data_len;
223 func->out_datalen -= timer_data_len;
224 }
225
226 // init Marshallers
227 ofld.m_in.init_buffer(in_data, func->in_datalen);
228 ofld.m_out.init_buffer(out_data, func->out_datalen);
229
230 // copy buffers to offload descriptor
231 std::copy(buffers, buffers + buffer_count,
232 std::back_inserter(ofld.m_buffers));
233
234 OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
235
236 // find offload entry address
237 OFFLOAD_TIMER_START(c_offload_target_func_lookup);
238
239 offload_func_with_parms entry = (offload_func_with_parms)
240 __offload_entries.find_addr(name);
241
242 if (entry == NULL) {
243 #if OFFLOAD_DEBUG > 0
244 if (console_enabled > 2) {
245 __offload_entries.dump();
246 }
247 #endif
248 LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
249 exit(1);
250 }
251
252 OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
253
254 OFFLOAD_TIMER_START(c_offload_target_func_time);
255
256 // execute offload entry
257 entry(&ofld);
258
259 OFFLOAD_TIMER_STOP(c_offload_target_func_time);
260
261 OFFLOAD_TIMER_STOP(c_offload_target_total_time);
262
263 // copy timer data to the buffer
264 OFFLOAD_TIMER_TARGET_DATA(timer_data);
265
266 OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
267
268 #ifdef SEP_SUPPORT
269 if (sep_monitor) {
270 if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
271 OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
272 VTPauseSampling();
273 }
274 }
275 #endif // SEP_SUPPORT
276 }
277
278 void OffloadDescriptor::merge_var_descs(
279 VarDesc *vars,
280 VarDesc2 *vars2,
281 int vars_total
282 )
283 {
284 // number of variable descriptors received from host and generated
285 // locally should match
286 if (m_vars_total < vars_total) {
287 LIBOFFLOAD_ERROR(c_merge_var_descs1);
288 exit(1);
289 }
290
291 for (int i = 0; i < m_vars_total; i++) {
292 if (i < vars_total) {
293 // variable type must match
294 if (m_vars[i].type.bits != vars[i].type.bits) {
295 LIBOFFLOAD_ERROR(c_merge_var_descs2);
296 exit(1);
297 }
298
299 m_vars[i].ptr = vars[i].ptr;
300 m_vars[i].into = vars[i].into;
301
302 const char *var_sname = "";
303 if (vars2 != NULL) {
304 if (vars2[i].sname != NULL) {
305 var_sname = vars2[i].sname;
306 }
307 }
308 OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
309 " VarDesc %d, var=%s, %s, %s\n",
310 i, var_sname,
311 vardesc_direction_as_string[m_vars[i].direction.bits],
312 vardesc_type_as_string[m_vars[i].type.src]);
313 if (vars2 != NULL && vars2[i].dname != NULL) {
314 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
315 vardesc_type_as_string[m_vars[i].type.dst]);
316 }
317 }
318 OFFLOAD_TRACE(2,
319 " type_src=%d, type_dstn=%d, direction=%d, "
320 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
321 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
322 m_vars[i].type.src,
323 m_vars[i].type.dst,
324 m_vars[i].direction.bits,
325 m_vars[i].alloc_if,
326 m_vars[i].free_if,
327 m_vars[i].align,
328 m_vars[i].mic_offset,
329 m_vars[i].flags.bits,
330 m_vars[i].offset,
331 m_vars[i].size,
332 m_vars[i].count,
333 m_vars[i].ptr,
334 m_vars[i].into);
335 }
336 }
337
338 void OffloadDescriptor::scatter_copyin_data()
339 {
340 OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
341
342 OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
343 m_in.get_buffer_start(),
344 m_in.get_buffer_size());
345 OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
346 m_in.get_buffer_size());
347
348 // receive data
349 for (int i = 0; i < m_vars_total; i++) {
350 bool src_is_for_mic = (m_vars[i].direction.out ||
351 m_vars[i].into == NULL);
352 void** ptr_addr = src_is_for_mic ?
353 static_cast<void**>(m_vars[i].ptr) :
354 static_cast<void**>(m_vars[i].into);
355 int type = src_is_for_mic ? m_vars[i].type.src :
356 m_vars[i].type.dst;
357 bool is_static = src_is_for_mic ?
358 m_vars[i].flags.is_static :
359 m_vars[i].flags.is_static_dstn;
360 void *ptr = NULL;
361
362 if (m_vars[i].flags.alloc_disp) {
363 int64_t offset = 0;
364 m_in.receive_data(&offset, sizeof(offset));
365 }
366 if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
367 VAR_TYPE_IS_DV_DATA(type)) {
368 ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
369 reinterpret_cast<ArrDesc*>(ptr_addr) :
370 *reinterpret_cast<ArrDesc**>(ptr_addr);
371 ptr_addr = reinterpret_cast<void**>(&dvp->Base);
372 }
373 // Set pointer values
374 switch (type) {
375 case c_data_ptr_array:
376 {
377 int j = m_vars[i].ptr_arr_offset;
378 int max_el = j + m_vars[i].count;
379 char *dst_arr_ptr = (src_is_for_mic)?
380 *(reinterpret_cast<char**>(m_vars[i].ptr)) :
381 reinterpret_cast<char*>(m_vars[i].into);
382
383 if (m_vars[i].flags.is_pointer) {
384 dst_arr_ptr = *((char**)dst_arr_ptr);
385 }
386 for (; j < max_el; j++) {
387 if (src_is_for_mic) {
388 m_vars[j].ptr =
389 dst_arr_ptr + m_vars[j].ptr_arr_offset;
390 }
391 else {
392 m_vars[j].into =
393 dst_arr_ptr + m_vars[j].ptr_arr_offset;
394 }
395 }
396 }
397 break;
398 case c_data:
399 case c_void_ptr:
400 case c_cean_var:
401 case c_dv:
402 break;
403
404 case c_string_ptr:
405 case c_data_ptr:
406 case c_cean_var_ptr:
407 case c_dv_ptr:
408 if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
409 void *buf = NULL;
410 if (m_vars[i].flags.sink_addr) {
411 m_in.receive_data(&buf, sizeof(buf));
412 }
413 else {
414 buf = m_buffers.front();
415 m_buffers.pop_front();
416 }
417 if (buf) {
418 if (!is_static) {
419 if (!m_vars[i].flags.sink_addr) {
420 // increment buffer reference
421 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
422 BufferAddRef(buf);
423 OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
424 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
425 }
426 add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
427 OFFLOAD_TRACE(1, " AddRef count = %d\n",
428 ((RefInfo *) ref_data[buf])->count);
429 }
430 ptr = static_cast<char*>(buf) +
431 m_vars[i].mic_offset +
432 (m_vars[i].flags.is_stack_buf ?
433 0 : m_vars[i].offset);
434 }
435 *ptr_addr = ptr;
436 }
437 else if (m_vars[i].flags.sink_addr) {
438 void *buf;
439 m_in.receive_data(&buf, sizeof(buf));
440 void *ptr = static_cast<char*>(buf) +
441 m_vars[i].mic_offset +
442 (m_vars[i].flags.is_stack_buf ?
443 0 : m_vars[i].offset);
444 *ptr_addr = ptr;
445 }
446 break;
447
448 case c_func_ptr:
449 break;
450
451 case c_dv_data:
452 case c_dv_ptr_data:
453 case c_dv_data_slice:
454 case c_dv_ptr_data_slice:
455 if (m_vars[i].alloc_if) {
456 void *buf;
457 if (m_vars[i].flags.sink_addr) {
458 m_in.receive_data(&buf, sizeof(buf));
459 }
460 else {
461 buf = m_buffers.front();
462 m_buffers.pop_front();
463 }
464 if (buf) {
465 if (!is_static) {
466 if (!m_vars[i].flags.sink_addr) {
467 // increment buffer reference
468 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
469 BufferAddRef(buf);
470 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
471 }
472 add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
473 }
474 ptr = static_cast<char*>(buf) +
475 m_vars[i].mic_offset + m_vars[i].offset;
476 }
477 *ptr_addr = ptr;
478 }
479 else if (m_vars[i].flags.sink_addr) {
480 void *buf;
481 m_in.receive_data(&buf, sizeof(buf));
482 ptr = static_cast<char*>(buf) +
483 m_vars[i].mic_offset + m_vars[i].offset;
484 *ptr_addr = ptr;
485 }
486 break;
487
488 default:
489 LIBOFFLOAD_ERROR(c_unknown_var_type, type);
490 abort();
491 }
492 // Release obsolete buffers for stack of persistent objects
493 if (type = c_data_ptr &&
494 m_vars[i].flags.is_stack_buf &&
495 !m_vars[i].direction.bits &&
496 m_vars[i].alloc_if &&
497 m_vars[i].size != 0) {
498 for (int j=0; j < m_vars[i].size; j++) {
499 void *buf;
500 m_in.receive_data(&buf, sizeof(buf));
501 BufferReleaseRef(buf);
502 ref_data.erase(buf);
503 }
504 }
505 // Do copyin
506 switch (m_vars[i].type.dst) {
507 case c_data_ptr_array:
508 break;
509 case c_data:
510 case c_void_ptr:
511 case c_cean_var:
512 if (m_vars[i].direction.in &&
513 !m_vars[i].flags.is_static_dstn) {
514 int64_t size;
515 int64_t disp;
516 char* ptr = m_vars[i].into ?
517 static_cast<char*>(m_vars[i].into) :
518 static_cast<char*>(m_vars[i].ptr);
519 if (m_vars[i].type.dst == c_cean_var) {
520 m_in.receive_data((&size), sizeof(int64_t));
521 m_in.receive_data((&disp), sizeof(int64_t));
522 }
523 else {
524 size = m_vars[i].size;
525 disp = 0;
526 }
527 m_in.receive_data(ptr + disp, size);
528 }
529 break;
530
531 case c_dv:
532 if (m_vars[i].direction.bits ||
533 m_vars[i].alloc_if ||
534 m_vars[i].free_if) {
535 char* ptr = m_vars[i].into ?
536 static_cast<char*>(m_vars[i].into) :
537 static_cast<char*>(m_vars[i].ptr);
538 m_in.receive_data(ptr + sizeof(uint64_t),
539 m_vars[i].size - sizeof(uint64_t));
540 }
541 break;
542
543 case c_string_ptr:
544 case c_data_ptr:
545 case c_cean_var_ptr:
546 case c_dv_ptr:
547 case c_dv_data:
548 case c_dv_ptr_data:
549 case c_dv_data_slice:
550 case c_dv_ptr_data_slice:
551 break;
552
553 case c_func_ptr:
554 if (m_vars[i].direction.in) {
555 m_in.receive_func_ptr((const void**) m_vars[i].ptr);
556 }
557 break;
558
559 default:
560 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
561 abort();
562 }
563 }
564
565 OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
566 m_in.get_tfr_size());
567
568 OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
569
570 OFFLOAD_TIMER_START(c_offload_target_compute);
571 }
572
573 void OffloadDescriptor::gather_copyout_data()
574 {
575 OFFLOAD_TIMER_STOP(c_offload_target_compute);
576
577 OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
578
579 for (int i = 0; i < m_vars_total; i++) {
580 bool src_is_for_mic = (m_vars[i].direction.out ||
581 m_vars[i].into == NULL);
582
583 switch (m_vars[i].type.src) {
584 case c_data_ptr_array:
585 break;
586 case c_data:
587 case c_void_ptr:
588 case c_cean_var:
589 if (m_vars[i].direction.out &&
590 !m_vars[i].flags.is_static) {
591 m_out.send_data(
592 static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
593 m_vars[i].size);
594 }
595 break;
596
597 case c_dv:
598 break;
599
600 case c_string_ptr:
601 case c_data_ptr:
602 case c_cean_var_ptr:
603 case c_dv_ptr:
604 if (m_vars[i].free_if &&
605 src_is_for_mic &&
606 !m_vars[i].flags.preallocated &&
607 !m_vars[i].flags.is_static) {
608 void *buf = *static_cast<char**>(m_vars[i].ptr) -
609 m_vars[i].mic_offset -
610 (m_vars[i].flags.is_stack_buf?
611 0 : m_vars[i].offset);
612 if (buf == NULL) {
613 break;
614 }
615 // decrement buffer reference count
616 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
617 BufReleaseRef(buf);
618 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
619 }
620 if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
621 m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
622 }
623 break;
624
625 case c_func_ptr:
626 if (m_vars[i].direction.out) {
627 m_out.send_func_ptr(*((void**) m_vars[i].ptr));
628 }
629 break;
630
631 case c_dv_data:
632 case c_dv_ptr_data:
633 case c_dv_data_slice:
634 case c_dv_ptr_data_slice:
635 if (src_is_for_mic &&
636 m_vars[i].free_if &&
637 !m_vars[i].flags.is_static) {
638 ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
639 m_vars[i].type.src == c_dv_data_slice) ?
640 static_cast<ArrDesc*>(m_vars[i].ptr) :
641 *static_cast<ArrDesc**>(m_vars[i].ptr);
642
643 void *buf = reinterpret_cast<char*>(dvp->Base) -
644 m_vars[i].mic_offset -
645 m_vars[i].offset;
646
647 if (buf == NULL) {
648 break;
649 }
650
651 // decrement buffer reference count
652 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
653 BufReleaseRef(buf);
654 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
655 }
656 break;
657
658 default:
659 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
660 abort();
661 }
662
663 if (m_vars[i].into) {
664 switch (m_vars[i].type.dst) {
665 case c_data_ptr_array:
666 break;
667 case c_data:
668 case c_void_ptr:
669 case c_cean_var:
670 case c_dv:
671 break;
672
673 case c_string_ptr:
674 case c_data_ptr:
675 case c_cean_var_ptr:
676 case c_dv_ptr:
677 if (m_vars[i].direction.in &&
678 m_vars[i].free_if &&
679 !m_vars[i].flags.is_static_dstn) {
680 void *buf = *static_cast<char**>(m_vars[i].into) -
681 m_vars[i].mic_offset -
682 (m_vars[i].flags.is_stack_buf?
683 0 : m_vars[i].offset);
684
685 if (buf == NULL) {
686 break;
687 }
688 // decrement buffer reference count
689 OFFLOAD_TIMER_START(
690 c_offload_target_release_buffer_refs);
691 BufReleaseRef(buf);
692 OFFLOAD_TIMER_STOP(
693 c_offload_target_release_buffer_refs);
694 }
695 break;
696
697 case c_func_ptr:
698 break;
699
700 case c_dv_data:
701 case c_dv_ptr_data:
702 case c_dv_data_slice:
703 case c_dv_ptr_data_slice:
704 if (m_vars[i].free_if &&
705 m_vars[i].direction.in &&
706 !m_vars[i].flags.is_static_dstn) {
707 ArrDesc *dvp =
708 (m_vars[i].type.dst == c_dv_data_slice ||
709 m_vars[i].type.dst == c_dv_data) ?
710 static_cast<ArrDesc*>(m_vars[i].into) :
711 *static_cast<ArrDesc**>(m_vars[i].into);
712 void *buf = reinterpret_cast<char*>(dvp->Base) -
713 m_vars[i].mic_offset -
714 m_vars[i].offset;
715
716 if (buf == NULL) {
717 break;
718 }
719 // decrement buffer reference count
720 OFFLOAD_TIMER_START(
721 c_offload_target_release_buffer_refs);
722 BufReleaseRef(buf);
723 OFFLOAD_TIMER_STOP(
724 c_offload_target_release_buffer_refs);
725 }
726 break;
727
728 default:
729 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
730 abort();
731 }
732 }
733 }
734
735 OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
736 m_out.get_buffer_start(),
737 m_out.get_buffer_size());
738
739 OFFLOAD_DEBUG_DUMP_BYTES(2,
740 m_out.get_buffer_start(),
741 m_out.get_buffer_size());
742
743 OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
744 "Total copyout data sent to host: [%lld] bytes\n",
745 m_out.get_tfr_size());
746
747 OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
748 }
749
750 void __offload_target_init(void)
751 {
752 #ifdef SEP_SUPPORT
753 const char* env_var = getenv(sep_monitor_env);
754 if (env_var != 0 && *env_var != '\0') {
755 sep_monitor = atoi(env_var);
756 }
757 env_var = getenv(sep_device_env);
758 if (env_var != 0 && *env_var != '\0') {
759 sep_device = env_var;
760 }
761 #endif // SEP_SUPPORT
762
763 prefix = report_get_message_str(c_report_mic);
764
765 // init frequency
766 mic_frequency = COIPerfGetCycleFrequency();
767 }
768
769 // User-visible offload API
770
771 int _Offload_number_of_devices(void)
772 {
773 return mic_engines_total;
774 }
775
776 int _Offload_get_device_number(void)
777 {
778 return mic_index;
779 }
780
781 int _Offload_get_physical_device_number(void)
782 {
783 uint32_t index;
784 EngineGetIndex(&index);
785 return index;
786 }