]> git.ipfire.org Git - thirdparty/gcc.git/blame - liboffloadmic/runtime/offload_target.cpp
backport: Makefile.am (myo_inc_dir): Remove.
[thirdparty/gcc.git] / liboffloadmic / runtime / offload_target.cpp
CommitLineData
5f520819 1/*
df26a50d 2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
5f520819
KY
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30
31#include "offload_target.h"
32#include <stdlib.h>
33#include <unistd.h>
34#ifdef SEP_SUPPORT
35#include <fcntl.h>
36#include <sys/ioctl.h>
37#endif // SEP_SUPPORT
38#include <omp.h>
39#include <map>
40
41// typedef offload_func_with_parms.
42// Pointer to function that represents an offloaded entry point.
43// The parameters are a temporary fix for parameters on the stack.
44typedef void (*offload_func_with_parms)(void *);
45
46// Target console and file logging
47const char *prefix;
48int console_enabled = 0;
49int offload_report_level = 0;
50
51// Trace information
52static const char* vardesc_direction_as_string[] = {
53 "NOCOPY",
54 "IN",
55 "OUT",
56 "INOUT"
57};
58static const char* vardesc_type_as_string[] = {
59 "unknown",
60 "data",
61 "data_ptr",
62 "func_ptr",
63 "void_ptr",
64 "string_ptr",
65 "dv",
66 "dv_data",
67 "dv_data_slice",
68 "dv_ptr",
69 "dv_ptr_data",
70 "dv_ptr_data_slice",
71 "cean_var",
72 "cean_var_ptr",
df26a50d
IV
73 "c_data_ptr_array",
74 "c_extended_type",
75 "c_func_ptr_array",
76 "c_void_ptr_array",
77 "c_string_ptr_array",
78 "c_data_ptr_ptr",
79 "c_func_ptr_ptr",
80 "c_void_ptr_ptr",
81 "c_string_ptr_ptr",
82 "c_cean_var_ptr_ptr",
5f520819
KY
83};
84
85int mic_index = -1;
86int mic_engines_total = -1;
87uint64_t mic_frequency = 0;
88int offload_number = 0;
89static std::map<void*, RefInfo*> ref_data;
90static mutex_t add_ref_lock;
91
92#ifdef SEP_SUPPORT
93static const char* sep_monitor_env = "SEP_MONITOR";
94static bool sep_monitor = false;
95static const char* sep_device_env = "SEP_DEVICE";
96static const char* sep_device = "/dev/sep3.8/c";
97static int sep_counter = 0;
98
99#define SEP_API_IOC_MAGIC 99
100#define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
101#define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
102
103static void add_ref_count(void * buf, bool created)
104{
105 mutex_locker_t locker(add_ref_lock);
106 RefInfo * info = ref_data[buf];
107
108 if (info) {
109 info->count++;
110 }
111 else {
112 info = new RefInfo((int)created,(long)1);
113 }
114 info->is_added |= created;
115 ref_data[buf] = info;
116}
117
118static void BufReleaseRef(void * buf)
119{
120 mutex_locker_t locker(add_ref_lock);
121 RefInfo * info = ref_data[buf];
122
123 if (info) {
124 --info->count;
125 if (info->count == 0 && info->is_added) {
2eab9666 126 OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
df26a50d 127 ((RefInfo *) ref_data[buf])->count);
5f520819
KY
128 BufferReleaseRef(buf);
129 info->is_added = 0;
130 }
131 }
132}
133
134static int VTPauseSampling(void)
135{
136 int ret = -1;
137 int handle = open(sep_device, O_RDWR);
138 if (handle > 0) {
139 ret = ioctl(handle, SEP_IOCTL_PAUSE);
140 close(handle);
141 }
142 return ret;
143}
144
145static int VTResumeSampling(void)
146{
147 int ret = -1;
148 int handle = open(sep_device, O_RDWR);
149 if (handle > 0) {
150 ret = ioctl(handle, SEP_IOCTL_RESUME);
151 close(handle);
152 }
153 return ret;
154}
155#endif // SEP_SUPPORT
156
157void OffloadDescriptor::offload(
158 uint32_t buffer_count,
159 void** buffers,
160 void* misc_data,
161 uint16_t misc_data_len,
162 void* return_data,
163 uint16_t return_data_len
164)
165{
166 FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
167 const char *name = func->data;
168 OffloadDescriptor ofld;
169 char *in_data = 0;
170 char *out_data = 0;
171 char *timer_data = 0;
172
173 console_enabled = func->console_enabled;
174 timer_enabled = func->timer_enabled;
175 offload_report_level = func->offload_report_level;
176 offload_number = func->offload_number;
177 ofld.set_offload_number(func->offload_number);
178
179#ifdef SEP_SUPPORT
180 if (sep_monitor) {
181 if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
182 OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
183 VTResumeSampling();
184 }
185 }
186#endif // SEP_SUPPORT
187
188 OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
189 c_offload_start_target_func,
190 "Offload \"%s\" started\n", name);
191
192 // initialize timer data
193 OFFLOAD_TIMER_INIT();
194
195 OFFLOAD_TIMER_START(c_offload_target_total_time);
196
197 OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
198
199 // get input/output buffer addresses
200 if (func->in_datalen > 0 || func->out_datalen > 0) {
201 if (func->data_offset != 0) {
202 in_data = (char*) misc_data + func->data_offset;
203 out_data = (char*) return_data;
204 }
205 else {
206 char *inout_buf = (char*) buffers[--buffer_count];
207 in_data = inout_buf;
208 out_data = inout_buf;
209 }
210 }
211
212 // assign variable descriptors
213 ofld.m_vars_total = func->vars_num;
214 if (ofld.m_vars_total > 0) {
215 uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
216
217 ofld.m_vars = (VarDesc*) malloc(var_data_len);
218 if (ofld.m_vars == NULL)
219 LIBOFFLOAD_ERROR(c_malloc);
220 memcpy(ofld.m_vars, in_data, var_data_len);
221
df26a50d
IV
222 ofld.m_vars_extra =
223 (VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra));
224 if (ofld.m_vars == NULL)
225 LIBOFFLOAD_ERROR(c_malloc);
226
5f520819
KY
227 in_data += var_data_len;
228 func->in_datalen -= var_data_len;
229 }
230
231 // timer data
232 if (func->timer_enabled) {
233 uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
234
235 timer_data = out_data;
236 out_data += timer_data_len;
237 func->out_datalen -= timer_data_len;
238 }
239
240 // init Marshallers
241 ofld.m_in.init_buffer(in_data, func->in_datalen);
242 ofld.m_out.init_buffer(out_data, func->out_datalen);
243
244 // copy buffers to offload descriptor
245 std::copy(buffers, buffers + buffer_count,
246 std::back_inserter(ofld.m_buffers));
247
248 OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
249
250 // find offload entry address
251 OFFLOAD_TIMER_START(c_offload_target_func_lookup);
252
253 offload_func_with_parms entry = (offload_func_with_parms)
254 __offload_entries.find_addr(name);
255
256 if (entry == NULL) {
257#if OFFLOAD_DEBUG > 0
258 if (console_enabled > 2) {
259 __offload_entries.dump();
260 }
261#endif
262 LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
263 exit(1);
264 }
265
266 OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
267
268 OFFLOAD_TIMER_START(c_offload_target_func_time);
269
270 // execute offload entry
271 entry(&ofld);
272
273 OFFLOAD_TIMER_STOP(c_offload_target_func_time);
274
275 OFFLOAD_TIMER_STOP(c_offload_target_total_time);
276
277 // copy timer data to the buffer
278 OFFLOAD_TIMER_TARGET_DATA(timer_data);
279
280 OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
281
282#ifdef SEP_SUPPORT
283 if (sep_monitor) {
284 if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
285 OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
286 VTPauseSampling();
287 }
288 }
289#endif // SEP_SUPPORT
290}
291
292void OffloadDescriptor::merge_var_descs(
293 VarDesc *vars,
294 VarDesc2 *vars2,
295 int vars_total
296)
297{
298 // number of variable descriptors received from host and generated
299 // locally should match
300 if (m_vars_total < vars_total) {
301 LIBOFFLOAD_ERROR(c_merge_var_descs1);
302 exit(1);
303 }
304
305 for (int i = 0; i < m_vars_total; i++) {
df26a50d
IV
306 // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src
307
5f520819
KY
308 if (i < vars_total) {
309 // variable type must match
310 if (m_vars[i].type.bits != vars[i].type.bits) {
df26a50d
IV
311 OFFLOAD_TRACE(2,
312 "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n",
313 i, m_vars[i].type.bits, i, vars[i].type.bits);
5f520819
KY
314 LIBOFFLOAD_ERROR(c_merge_var_descs2);
315 exit(1);
316 }
317
df26a50d
IV
318 if (m_vars[i].type.src == c_extended_type) {
319 VarDescExtendedType *etype =
320 reinterpret_cast<VarDescExtendedType*>(vars[i].ptr);
321 m_vars_extra[i].type_src = etype->extended_type;
322 m_vars[i].ptr = etype->ptr;
323 }
324 else {
325 m_vars_extra[i].type_src = m_vars[i].type.src;
326 if (!(m_vars[i].flags.use_device_ptr &&
327 m_vars[i].type.src == c_dv)) {
328 m_vars[i].ptr = vars[i].ptr;
329 }
330 }
331 // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
332 if (m_vars[i].type.dst == c_extended_type && i < vars_total) {
333 VarDescExtendedType *etype =
334 reinterpret_cast<VarDescExtendedType*>(vars[i].into);
335 m_vars_extra[i].type_dst = etype->extended_type;
336 m_vars[i].into = etype->ptr;
337 }
338 else {
339 m_vars_extra[i].type_dst = m_vars[i].type.dst;
340 m_vars[i].into = vars[i].into;
341 }
5f520819
KY
342
343 const char *var_sname = "";
344 if (vars2 != NULL) {
345 if (vars2[i].sname != NULL) {
346 var_sname = vars2[i].sname;
347 }
348 }
349 OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
350 " VarDesc %d, var=%s, %s, %s\n",
351 i, var_sname,
352 vardesc_direction_as_string[m_vars[i].direction.bits],
df26a50d 353 vardesc_type_as_string[m_vars_extra[i].type_src]);
5f520819
KY
354 if (vars2 != NULL && vars2[i].dname != NULL) {
355 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
df26a50d 356 vardesc_type_as_string[m_vars_extra[i].type_dst]);
5f520819
KY
357 }
358 }
df26a50d
IV
359 else {
360 m_vars_extra[i].type_src = m_vars[i].type.src;
361 m_vars_extra[i].type_dst = m_vars[i].type.dst;
362 }
363
5f520819
KY
364 OFFLOAD_TRACE(2,
365 " type_src=%d, type_dstn=%d, direction=%d, "
366 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
367 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
df26a50d
IV
368 m_vars_extra[i].type_src,
369 m_vars_extra[i].type_dst,
5f520819
KY
370 m_vars[i].direction.bits,
371 m_vars[i].alloc_if,
372 m_vars[i].free_if,
373 m_vars[i].align,
374 m_vars[i].mic_offset,
375 m_vars[i].flags.bits,
376 m_vars[i].offset,
377 m_vars[i].size,
378 m_vars[i].count,
379 m_vars[i].ptr,
380 m_vars[i].into);
381 }
382}
383
384void OffloadDescriptor::scatter_copyin_data()
385{
386 OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
387
388 OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
389 m_in.get_buffer_start(),
390 m_in.get_buffer_size());
391 OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
392 m_in.get_buffer_size());
393
394 // receive data
395 for (int i = 0; i < m_vars_total; i++) {
396 bool src_is_for_mic = (m_vars[i].direction.out ||
397 m_vars[i].into == NULL);
398 void** ptr_addr = src_is_for_mic ?
399 static_cast<void**>(m_vars[i].ptr) :
400 static_cast<void**>(m_vars[i].into);
df26a50d
IV
401 int type = src_is_for_mic ? m_vars_extra[i].type_src :
402 m_vars_extra[i].type_dst;
5f520819
KY
403 bool is_static = src_is_for_mic ?
404 m_vars[i].flags.is_static :
405 m_vars[i].flags.is_static_dstn;
406 void *ptr = NULL;
407
408 if (m_vars[i].flags.alloc_disp) {
409 int64_t offset = 0;
410 m_in.receive_data(&offset, sizeof(offset));
5f520819
KY
411 }
412 if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
413 VAR_TYPE_IS_DV_DATA(type)) {
414 ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
415 reinterpret_cast<ArrDesc*>(ptr_addr) :
416 *reinterpret_cast<ArrDesc**>(ptr_addr);
417 ptr_addr = reinterpret_cast<void**>(&dvp->Base);
418 }
5f520819
KY
419 // Set pointer values
420 switch (type) {
421 case c_data_ptr_array:
422 {
423 int j = m_vars[i].ptr_arr_offset;
424 int max_el = j + m_vars[i].count;
425 char *dst_arr_ptr = (src_is_for_mic)?
426 *(reinterpret_cast<char**>(m_vars[i].ptr)) :
427 reinterpret_cast<char*>(m_vars[i].into);
428
df26a50d
IV
429 // if is_pointer is 1 it means that pointer array itself
430 // is defined either via pointer or as class member.
431 // i.e. arr_ptr[0:5] or this->ARR[0:5]
2eab9666 432 if (m_vars[i].flags.is_pointer) {
df26a50d
IV
433 int64_t offset = 0;
434 m_in.receive_data(&offset, sizeof(offset));
435 dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
2eab9666 436 }
5f520819
KY
437 for (; j < max_el; j++) {
438 if (src_is_for_mic) {
439 m_vars[j].ptr =
440 dst_arr_ptr + m_vars[j].ptr_arr_offset;
441 }
442 else {
443 m_vars[j].into =
444 dst_arr_ptr + m_vars[j].ptr_arr_offset;
445 }
446 }
447 }
448 break;
449 case c_data:
450 case c_void_ptr:
df26a50d 451 case c_void_ptr_ptr:
5f520819
KY
452 case c_cean_var:
453 case c_dv:
454 break;
455
456 case c_string_ptr:
457 case c_data_ptr:
df26a50d
IV
458 case c_string_ptr_ptr:
459 case c_data_ptr_ptr:
5f520819 460 case c_cean_var_ptr:
df26a50d 461 case c_cean_var_ptr_ptr:
5f520819 462 case c_dv_ptr:
df26a50d
IV
463 // Don't need ptr_addr value for variables from stack buffer.
464 // Stack buffer address is set at var_desc with #0.
465 if (i != 0 && m_vars[i].flags.is_stack_buf) {
466 break;
467 }
468 if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
469 TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
470 int64_t offset;
471
472 m_in.receive_data(&offset, sizeof(offset));
473 ptr_addr = reinterpret_cast<void**>(
474 reinterpret_cast<char*>(*ptr_addr) + offset);
475
476 }
477
2eab9666
IV
478 if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
479 void *buf = NULL;
5f520819
KY
480 if (m_vars[i].flags.sink_addr) {
481 m_in.receive_data(&buf, sizeof(buf));
482 }
483 else {
484 buf = m_buffers.front();
485 m_buffers.pop_front();
486 }
487 if (buf) {
488 if (!is_static) {
489 if (!m_vars[i].flags.sink_addr) {
490 // increment buffer reference
491 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
492 BufferAddRef(buf);
2eab9666 493 OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
5f520819
KY
494 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
495 }
496 add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
2eab9666
IV
497 OFFLOAD_TRACE(1, " AddRef count = %d\n",
498 ((RefInfo *) ref_data[buf])->count);
5f520819
KY
499 }
500 ptr = static_cast<char*>(buf) +
501 m_vars[i].mic_offset +
502 (m_vars[i].flags.is_stack_buf ?
503 0 : m_vars[i].offset);
df26a50d 504
5f520819
KY
505 }
506 *ptr_addr = ptr;
507 }
508 else if (m_vars[i].flags.sink_addr) {
509 void *buf;
510 m_in.receive_data(&buf, sizeof(buf));
511 void *ptr = static_cast<char*>(buf) +
512 m_vars[i].mic_offset +
513 (m_vars[i].flags.is_stack_buf ?
514 0 : m_vars[i].offset);
515 *ptr_addr = ptr;
516 }
517 break;
518
519 case c_func_ptr:
df26a50d 520 case c_func_ptr_ptr:
5f520819
KY
521 break;
522
523 case c_dv_data:
524 case c_dv_ptr_data:
525 case c_dv_data_slice:
526 case c_dv_ptr_data_slice:
527 if (m_vars[i].alloc_if) {
528 void *buf;
529 if (m_vars[i].flags.sink_addr) {
530 m_in.receive_data(&buf, sizeof(buf));
531 }
532 else {
533 buf = m_buffers.front();
534 m_buffers.pop_front();
535 }
536 if (buf) {
537 if (!is_static) {
538 if (!m_vars[i].flags.sink_addr) {
539 // increment buffer reference
540 OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
541 BufferAddRef(buf);
542 OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
543 }
544 add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
545 }
546 ptr = static_cast<char*>(buf) +
547 m_vars[i].mic_offset + m_vars[i].offset;
548 }
549 *ptr_addr = ptr;
550 }
551 else if (m_vars[i].flags.sink_addr) {
552 void *buf;
553 m_in.receive_data(&buf, sizeof(buf));
554 ptr = static_cast<char*>(buf) +
555 m_vars[i].mic_offset + m_vars[i].offset;
556 *ptr_addr = ptr;
557 }
558 break;
559
560 default:
561 LIBOFFLOAD_ERROR(c_unknown_var_type, type);
562 abort();
563 }
df26a50d
IV
564 // Release obsolete buffers for stack of persistent objects.
565 // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
566 // stack buffer pointer.
567 if (i == 0 &&
5f520819
KY
568 m_vars[i].flags.is_stack_buf &&
569 !m_vars[i].direction.bits &&
570 m_vars[i].alloc_if &&
571 m_vars[i].size != 0) {
572 for (int j=0; j < m_vars[i].size; j++) {
573 void *buf;
574 m_in.receive_data(&buf, sizeof(buf));
df26a50d 575 OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
5f520819
KY
576 BufferReleaseRef(buf);
577 ref_data.erase(buf);
578 }
579 }
580 // Do copyin
df26a50d 581 switch (m_vars_extra[i].type_dst) {
5f520819
KY
582 case c_data_ptr_array:
583 break;
584 case c_data:
585 case c_void_ptr:
df26a50d 586 case c_void_ptr_ptr:
5f520819
KY
587 case c_cean_var:
588 if (m_vars[i].direction.in &&
589 !m_vars[i].flags.is_static_dstn) {
590 int64_t size;
591 int64_t disp;
592 char* ptr = m_vars[i].into ?
593 static_cast<char*>(m_vars[i].into) :
594 static_cast<char*>(m_vars[i].ptr);
df26a50d 595 if (m_vars_extra[i].type_dst == c_cean_var) {
5f520819
KY
596 m_in.receive_data((&size), sizeof(int64_t));
597 m_in.receive_data((&disp), sizeof(int64_t));
598 }
599 else {
600 size = m_vars[i].size;
601 disp = 0;
602 }
603 m_in.receive_data(ptr + disp, size);
604 }
605 break;
606
607 case c_dv:
608 if (m_vars[i].direction.bits ||
609 m_vars[i].alloc_if ||
610 m_vars[i].free_if) {
611 char* ptr = m_vars[i].into ?
612 static_cast<char*>(m_vars[i].into) :
613 static_cast<char*>(m_vars[i].ptr);
614 m_in.receive_data(ptr + sizeof(uint64_t),
615 m_vars[i].size - sizeof(uint64_t));
616 }
617 break;
618
619 case c_string_ptr:
620 case c_data_ptr:
df26a50d
IV
621 case c_string_ptr_ptr:
622 case c_data_ptr_ptr:
5f520819 623 case c_cean_var_ptr:
df26a50d 624 case c_cean_var_ptr_ptr:
5f520819
KY
625 case c_dv_ptr:
626 case c_dv_data:
627 case c_dv_ptr_data:
628 case c_dv_data_slice:
629 case c_dv_ptr_data_slice:
630 break;
631
632 case c_func_ptr:
df26a50d 633 case c_func_ptr_ptr:
5f520819
KY
634 if (m_vars[i].direction.in) {
635 m_in.receive_func_ptr((const void**) m_vars[i].ptr);
636 }
637 break;
638
639 default:
df26a50d 640 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
5f520819
KY
641 abort();
642 }
643 }
644
645 OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
646 m_in.get_tfr_size());
647
648 OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
649
650 OFFLOAD_TIMER_START(c_offload_target_compute);
651}
652
653void OffloadDescriptor::gather_copyout_data()
654{
655 OFFLOAD_TIMER_STOP(c_offload_target_compute);
656
657 OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
658
659 for (int i = 0; i < m_vars_total; i++) {
660 bool src_is_for_mic = (m_vars[i].direction.out ||
661 m_vars[i].into == NULL);
df26a50d
IV
662 if (m_vars[i].flags.is_stack_buf) {
663 continue;
664 }
665 switch (m_vars_extra[i].type_src) {
5f520819
KY
666 case c_data_ptr_array:
667 break;
668 case c_data:
669 case c_void_ptr:
df26a50d 670 case c_void_ptr_ptr:
5f520819
KY
671 case c_cean_var:
672 if (m_vars[i].direction.out &&
673 !m_vars[i].flags.is_static) {
674 m_out.send_data(
675 static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
676 m_vars[i].size);
677 }
678 break;
679
680 case c_dv:
681 break;
682
683 case c_string_ptr:
684 case c_data_ptr:
df26a50d
IV
685 case c_string_ptr_ptr:
686 case c_data_ptr_ptr:
5f520819 687 case c_cean_var_ptr:
df26a50d 688 case c_cean_var_ptr_ptr:
5f520819
KY
689 case c_dv_ptr:
690 if (m_vars[i].free_if &&
691 src_is_for_mic &&
2eab9666 692 !m_vars[i].flags.preallocated &&
5f520819
KY
693 !m_vars[i].flags.is_static) {
694 void *buf = *static_cast<char**>(m_vars[i].ptr) -
695 m_vars[i].mic_offset -
696 (m_vars[i].flags.is_stack_buf?
697 0 : m_vars[i].offset);
698 if (buf == NULL) {
699 break;
700 }
701 // decrement buffer reference count
702 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
703 BufReleaseRef(buf);
704 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
705 }
2eab9666
IV
706 if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
707 m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
708 }
5f520819
KY
709 break;
710
711 case c_func_ptr:
df26a50d 712 case c_func_ptr_ptr:
5f520819
KY
713 if (m_vars[i].direction.out) {
714 m_out.send_func_ptr(*((void**) m_vars[i].ptr));
715 }
716 break;
717
718 case c_dv_data:
719 case c_dv_ptr_data:
720 case c_dv_data_slice:
721 case c_dv_ptr_data_slice:
722 if (src_is_for_mic &&
723 m_vars[i].free_if &&
724 !m_vars[i].flags.is_static) {
df26a50d
IV
725 ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data ||
726 m_vars_extra[i].type_src == c_dv_data_slice) ?
727 static_cast<ArrDesc*>(m_vars[i].ptr) :
728 *static_cast<ArrDesc**>(m_vars[i].ptr);
5f520819
KY
729
730 void *buf = reinterpret_cast<char*>(dvp->Base) -
731 m_vars[i].mic_offset -
732 m_vars[i].offset;
733
734 if (buf == NULL) {
735 break;
736 }
737
738 // decrement buffer reference count
739 OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
740 BufReleaseRef(buf);
741 OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
742 }
743 break;
744
745 default:
df26a50d 746 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
5f520819
KY
747 abort();
748 }
749
750 if (m_vars[i].into) {
df26a50d 751 switch (m_vars_extra[i].type_dst) {
5f520819
KY
752 case c_data_ptr_array:
753 break;
754 case c_data:
755 case c_void_ptr:
df26a50d 756 case c_void_ptr_ptr:
5f520819
KY
757 case c_cean_var:
758 case c_dv:
759 break;
760
761 case c_string_ptr:
762 case c_data_ptr:
df26a50d
IV
763 case c_string_ptr_ptr:
764 case c_data_ptr_ptr:
5f520819 765 case c_cean_var_ptr:
df26a50d 766 case c_cean_var_ptr_ptr:
5f520819
KY
767 case c_dv_ptr:
768 if (m_vars[i].direction.in &&
769 m_vars[i].free_if &&
770 !m_vars[i].flags.is_static_dstn) {
771 void *buf = *static_cast<char**>(m_vars[i].into) -
772 m_vars[i].mic_offset -
773 (m_vars[i].flags.is_stack_buf?
774 0 : m_vars[i].offset);
775
776 if (buf == NULL) {
777 break;
778 }
779 // decrement buffer reference count
780 OFFLOAD_TIMER_START(
781 c_offload_target_release_buffer_refs);
782 BufReleaseRef(buf);
783 OFFLOAD_TIMER_STOP(
784 c_offload_target_release_buffer_refs);
785 }
786 break;
787
788 case c_func_ptr:
df26a50d 789 case c_func_ptr_ptr:
5f520819
KY
790 break;
791
792 case c_dv_data:
793 case c_dv_ptr_data:
794 case c_dv_data_slice:
795 case c_dv_ptr_data_slice:
796 if (m_vars[i].free_if &&
797 m_vars[i].direction.in &&
798 !m_vars[i].flags.is_static_dstn) {
799 ArrDesc *dvp =
df26a50d
IV
800 (m_vars_extra[i].type_dst == c_dv_data_slice ||
801 m_vars_extra[i].type_dst == c_dv_data) ?
5f520819
KY
802 static_cast<ArrDesc*>(m_vars[i].into) :
803 *static_cast<ArrDesc**>(m_vars[i].into);
804 void *buf = reinterpret_cast<char*>(dvp->Base) -
805 m_vars[i].mic_offset -
806 m_vars[i].offset;
807
808 if (buf == NULL) {
809 break;
810 }
811 // decrement buffer reference count
812 OFFLOAD_TIMER_START(
813 c_offload_target_release_buffer_refs);
814 BufReleaseRef(buf);
815 OFFLOAD_TIMER_STOP(
816 c_offload_target_release_buffer_refs);
817 }
818 break;
819
820 default:
df26a50d 821 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
5f520819
KY
822 abort();
823 }
824 }
825 }
826
827 OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
828 m_out.get_buffer_start(),
829 m_out.get_buffer_size());
830
831 OFFLOAD_DEBUG_DUMP_BYTES(2,
832 m_out.get_buffer_start(),
833 m_out.get_buffer_size());
834
835 OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
836 "Total copyout data sent to host: [%lld] bytes\n",
837 m_out.get_tfr_size());
838
839 OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
840}
841
842void __offload_target_init(void)
843{
844#ifdef SEP_SUPPORT
845 const char* env_var = getenv(sep_monitor_env);
846 if (env_var != 0 && *env_var != '\0') {
847 sep_monitor = atoi(env_var);
848 }
849 env_var = getenv(sep_device_env);
850 if (env_var != 0 && *env_var != '\0') {
851 sep_device = env_var;
852 }
853#endif // SEP_SUPPORT
854
855 prefix = report_get_message_str(c_report_mic);
856
857 // init frequency
858 mic_frequency = COIPerfGetCycleFrequency();
859}
860
861// User-visible offload API
862
863int _Offload_number_of_devices(void)
864{
865 return mic_engines_total;
866}
867
868int _Offload_get_device_number(void)
869{
870 return mic_index;
871}
872
873int _Offload_get_physical_device_number(void)
874{
875 uint32_t index;
876 EngineGetIndex(&index);
877 return index;
878}