]> git.ipfire.org Git - thirdparty/gcc.git/blame - liboffloadmic/runtime/offload_host.cpp
offload_host.cpp (OffloadDescriptor::setup_misc_data): Use calloc instead of malloc.
[thirdparty/gcc.git] / liboffloadmic / runtime / offload_host.cpp
CommitLineData
5f520819 1/*
2eab9666 2 Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
5f520819
KY
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30
2eab9666
IV
31// Forward declaration as the following 2 functions are declared as friend
32// in offload_engine.h.
5f520819
KY
33// CLANG does not like static to been after friend declaration.
34static void __offload_init_library_once(void);
35static void __offload_fini_library(void);
36
37#include "offload_host.h"
38#ifdef MYO_SUPPORT
39#include "offload_myo_host.h"
40#endif
41
42#include <malloc.h>
43#ifndef TARGET_WINNT
44#include <alloca.h>
45#include <elf.h>
46#endif // TARGET_WINNT
47#include <errno.h>
48#include <fcntl.h>
49#include <stdlib.h>
50#include <string.h>
51#include <sys/stat.h>
52#include <sys/types.h>
53#include <sys/stat.h>
54
55#include <algorithm>
56#include <bitset>
57
58#if defined(HOST_WINNT)
59#define PATH_SEPARATOR ";"
60#else
61#define PATH_SEPARATOR ":"
62#endif
63
64#define GET_OFFLOAD_NUMBER(timer_data) \
65 timer_data? timer_data->offload_number : 0
66
2eab9666
IV
67extern "C" {
68#ifdef TARGET_WINNT
69// Windows does not support imports from libraries without actually
70// including them as dependence. We don't want to include in the
71// dependence since is it used only for Fortran when traceback is enabled.
72// Chose to implement it with GetProcAddress.
73#define FORTRAN_TRACE_BACK win_for__continue_traceback
74int win_for__continue_traceback( _Offload_result coi_offload_result )
75{
76 HINSTANCE hDLL;
77 int (* TraceBackRoutine)(_Offload_result value);
78
79 hDLL = LoadLibrary("libifcoremd.dll");
80 if (hDLL != 0) {
81 TraceBackRoutine = (int (*)(_Offload_result)) GetProcAddress(hDLL,
82 "for__continue_traceback");
83 if (TraceBackRoutine != 0) {
84 return TraceBackRoutine(coi_offload_result);
85 }
86 else {
87 OFFLOAD_TRACE(3,
88 "Cannot find for__continue_traceback routine in libifcorert.dll\n");
89 exit(1);
90 }
91 }
92 else {
93 OFFLOAD_TRACE(3, "Cannot load libifcorert.dll\n");
94 exit(1);
95 }
96 return 0;
97}
98
99#else // TARGET_WINNT
100
101#define FORTRAN_TRACE_BACK for__continue_traceback
102
103// for__continue_traceback is provided as a dummy to resolve link time symbols
104// for C/C++ programs. For Fortran the actual fortran library function in
105// libifcore.so is used.
106#pragma weak for__continue_traceback
107int for__continue_traceback( _Offload_result coi_offload_result )
108{
109 OFFLOAD_TRACE(3,
110 "liboffload function for_continue_traceback should not be called.\n");
111 exit(1);
112}
113#endif //TARGET_WINNT
114} // extern "C"
115
5f520819
KY
116#ifdef TARGET_WINNT
117// Small subset of ELF declarations for Windows which is needed to compile
118// this file. ELF header is used to understand what binary type is contained
119// in the target image - shared library or executable.
120
121typedef uint16_t Elf64_Half;
122typedef uint32_t Elf64_Word;
123typedef uint64_t Elf64_Addr;
124typedef uint64_t Elf64_Off;
125
126#define EI_NIDENT 16
127
128#define ET_EXEC 2
129#define ET_DYN 3
130
131typedef struct
132{
133 unsigned char e_ident[EI_NIDENT];
134 Elf64_Half e_type;
135 Elf64_Half e_machine;
136 Elf64_Word e_version;
137 Elf64_Addr e_entry;
138 Elf64_Off e_phoff;
139 Elf64_Off e_shoff;
140 Elf64_Word e_flags;
141 Elf64_Half e_ehsize;
142 Elf64_Half e_phentsize;
143 Elf64_Half e_phnum;
144 Elf64_Half e_shentsize;
145 Elf64_Half e_shnum;
146 Elf64_Half e_shstrndx;
147} Elf64_Ehdr;
148#endif // TARGET_WINNT
149
150// Host console and file logging
151const char *prefix;
152int console_enabled = 0;
153int offload_number = 0;
154
155static const char *htrace_envname = "H_TRACE";
156static const char *offload_report_envname = "OFFLOAD_REPORT";
2eab9666
IV
157static const char *timer_envname = "H_TIME";
158
159// location of offload_main executable
160// To be used if the main application has no offload and is not built
161// with -offload but dynamic library linked in has offload pragma
162char* mic_device_main = 0;
163
164// DMA channel count used by COI and set via
165// OFFLOAD_DMA_CHANNEL_COUNT environment variable
166uint32_t mic_dma_channel_count;
5f520819
KY
167
168// Trace information
169static const char* vardesc_direction_as_string[] = {
170 "NOCOPY",
171 "IN",
172 "OUT",
173 "INOUT"
174};
175static const char* vardesc_type_as_string[] = {
176 "unknown",
177 "data",
178 "data_ptr",
179 "func_ptr",
180 "void_ptr",
181 "string_ptr",
182 "dv",
183 "dv_data",
184 "dv_data_slice",
185 "dv_ptr",
186 "dv_ptr_data",
187 "dv_ptr_data_slice",
188 "cean_var",
189 "cean_var_ptr",
190 "c_data_ptr_array",
191 "c_func_ptr_array",
192 "c_void_ptr_array",
193 "c_string_ptr_array"
194};
195
196Engine* mic_engines = 0;
197uint32_t mic_engines_total = 0;
198pthread_key_t mic_thread_key;
199MicEnvVar mic_env_vars;
200uint64_t cpu_frequency = 0;
201
202// MIC_STACKSIZE
203uint32_t mic_stack_size = 12 * 1024 * 1024;
204
205// MIC_BUFFERSIZE
206uint64_t mic_buffer_size = 0;
207
2eab9666
IV
208// Preallocated 4K page memory size for buffers on MIC
209uint64_t mic_4k_buffer_size = 0;
210
211// Preallocated 2M page memory size for buffers on MIC
212uint64_t mic_2m_buffer_size = 0;
213
214
5f520819
KY
215// MIC_LD_LIBRARY_PATH
216char* mic_library_path = 0;
217
218// MIC_PROXY_IO
219bool mic_proxy_io = true;
220
221// MIC_PROXY_FS_ROOT
222char* mic_proxy_fs_root = 0;
223
224// Threshold for creating buffers with large pages. Buffer is created
225// with large pages hint if its size exceeds the threshold value.
226// By default large pages are disabled right now (by setting default
227// value for threshold to MAX) due to HSD 4114629.
228uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
229static const char *mic_use_2mb_buffers_envname =
230 "MIC_USE_2MB_BUFFERS";
231
232static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
233static const char *mic_use_async_buffer_write_envname =
234 "MIC_USE_ASYNC_BUFFER_WRITE";
235
236static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
237static const char *mic_use_async_buffer_read_envname =
238 "MIC_USE_ASYNC_BUFFER_READ";
239
240// device initialization type
241OffloadInitType __offload_init_type = c_init_on_offload_all;
242static const char *offload_init_envname = "OFFLOAD_INIT";
243
244// active wait
245static bool __offload_active_wait = true;
246static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
247
248// OMP_DEFAULT_DEVICE
249int __omp_device_num = 0;
250static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
251
2eab9666
IV
252//OFFLOAD_PARALLEL_COPY
253static bool __offload_parallel_copy = false;
254static const char *parallel_copy_envname = "OFFLOAD_PARALLEL_COPY";
255
256//Use COI interface for noncontiguous transfer if it exists.
257static bool __offload_use_coi_noncontiguous_transfer = false;
258static const char *use_coi_noncontiguous_transfer_envname =
259 "MIC_USE_COI_MULTI_D";
260
5f520819
KY
261// The list of pending target libraries
262static bool __target_libs;
263static TargetImageList __target_libs_list;
264static mutex_t __target_libs_lock;
265static mutex_t stack_alloc_lock;
266
267// Target executable
268TargetImage* __target_exe;
269
2eab9666
IV
270// Print readable offload flags
271static void trace_offload_flags(
272 OffloadHostTimerData* timer_data,
273 OffloadFlags offload_flags
274)
275{
276 // Sized big enough for all flag names
277 char fbuffer[256];
278 bool first = true;
279 if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
280 sprintf(fbuffer, " OffloadFlags=(");
281 if (offload_flags.bits.fortran_traceback) {
282 sprintf(fbuffer+strlen(fbuffer), "fortran_traceback");
283 first = false;
284 }
285 if (offload_flags.bits.omp_async) {
286 sprintf(fbuffer+strlen(fbuffer), first ? "omp_async" : ",omp_async");
287 first = false;
288 }
289 OFFLOAD_DEBUG_TRACE_1(1,
290 GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func,
291 "%s)\n", fbuffer);
292 }
293}
294
295// Print readable varDesc flags
296static void trace_varDesc_flags(
297 OffloadHostTimerData* timer_data,
298 varDescFlags offload_flags
299)
300{
301 // SIzed big enough for all flag names
302 char fbuffer[256];
303 bool first = true;
304 if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
305 sprintf(fbuffer, " varDescFlags=(");
306 if (offload_flags.is_static) {
307 sprintf(fbuffer+strlen(fbuffer), "is_static");
308 first = false;
309 }
310 if (offload_flags.is_static_dstn) {
311 sprintf(fbuffer+strlen(fbuffer),
312 first ? "is_static_dstn" : ",is_static_dstn");
313 first = false;
314 }
315 if (offload_flags.has_length) {
316 sprintf(fbuffer+strlen(fbuffer),
317 first ? "has_length" : ",has_length");
318 first = false;
319 }
320 if (offload_flags.is_stack_buf) {
321 sprintf(fbuffer+strlen(fbuffer),
322 first ? "is_stack_buf" : ",is_stack_buf");
323 first = false;
324 }
325 if (offload_flags.targetptr) {
326 sprintf(fbuffer+strlen(fbuffer),
327 first ? "targetptr" : ",targetptr");
328 first = false;
329 }
330 if (offload_flags.preallocated) {
331 sprintf(fbuffer+strlen(fbuffer),
332 first ? "preallocated" : ",preallocated");
333 first = false;
334 }
335 if (offload_flags.is_pointer) {
336 sprintf(fbuffer+strlen(fbuffer),
337 first ? "is_pointer" : ",is_pointer");
338 first = false;
339 }
340 if (offload_flags.sink_addr) {
341 sprintf(fbuffer+strlen(fbuffer),
342 first ? "sink_addr" : ",sink_addr");
343 first = false;
344 }
345 if (offload_flags.alloc_disp) {
346 sprintf(fbuffer+strlen(fbuffer),
347 first ? "alloc_disp" : ",alloc_disp");
348 first = false;
349 }
350 if (offload_flags.is_noncont_src) {
351 sprintf(fbuffer+strlen(fbuffer),
352 first ? "is_noncont_src" : ",is_noncont_src");
353 first = false;
354 }
355 if (offload_flags.is_noncont_dst) {
356 sprintf(fbuffer+strlen(fbuffer),
357 first ? "is_noncont_dst" : ",is_noncont_dst");
358 first = false;
359 }
360 if (offload_flags.always_copy) {
361 sprintf(fbuffer+strlen(fbuffer),
362 first ? "always_copy" : ",always_copy");
363 first = false;
364 }
365 if (offload_flags.always_delete) {
366 sprintf(fbuffer+strlen(fbuffer),
367 first ? "always_delete" : ",always_delete");
368 first = false;
369 }
370 OFFLOAD_DEBUG_TRACE_1(1,
371 GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func,
372 "%s)\n", fbuffer);
373 }
374}
375
5f520819
KY
376static char * offload_get_src_base(void * ptr, uint8_t type)
377{
378 char *base;
379 if (VAR_TYPE_IS_PTR(type)) {
380 base = *static_cast<char**>(ptr);
381 }
382 else if (VAR_TYPE_IS_SCALAR(type)) {
383 base = static_cast<char*>(ptr);
384 }
385 else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
386 ArrDesc *dvp;
387 if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
2eab9666 388 const Arr_Desc *ap = static_cast<const Arr_Desc*>(ptr);
5f520819
KY
389 dvp = (type == c_dv_data_slice) ?
390 reinterpret_cast<ArrDesc*>(ap->base) :
391 *reinterpret_cast<ArrDesc**>(ap->base);
392 }
393 else {
394 dvp = (type == c_dv_data) ?
395 static_cast<ArrDesc*>(ptr) :
396 *static_cast<ArrDesc**>(ptr);
397 }
398 base = reinterpret_cast<char*>(dvp->Base);
399 }
400 else {
401 base = NULL;
402 }
403 return base;
404}
405
406void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
407{
408 // special case for the 'process died' error
409 if (res == COI_PROCESS_DIED) {
410 m_device.fini_process(true);
411 }
412 else {
413 switch (msg) {
414 case c_buf_create:
415 if (res == COI_OUT_OF_MEMORY) {
416 msg = c_buf_create_out_of_mem;
417 }
418 /* fallthru */
419
420 case c_buf_create_from_mem:
421 case c_buf_get_address:
422 case c_pipeline_create:
423 case c_pipeline_run_func:
424 LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
425 break;
426
427 case c_buf_read:
428 case c_buf_write:
429 case c_buf_copy:
430 case c_buf_map:
431 case c_buf_unmap:
432 case c_buf_destroy:
433 case c_buf_set_state:
434 LIBOFFLOAD_ERROR(msg, res);
435 break;
436
437 default:
438 break;
439 }
440 }
441
442 exit(1);
443}
444
445_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
446{
447 switch (res) {
448 case COI_SUCCESS:
449 return OFFLOAD_SUCCESS;
450
451 case COI_PROCESS_DIED:
452 return OFFLOAD_PROCESS_DIED;
453
454 case COI_OUT_OF_MEMORY:
455 return OFFLOAD_OUT_OF_MEMORY;
456
457 default:
458 return OFFLOAD_ERROR;
459 }
460}
461
2eab9666
IV
462// is_targetptr == 0 && is_prealloc == 0 - allocation of pointer data;
463// is_targetptr == 1 && is_prealloc == 0 - allocation of target memory:
464// allocate memory at target; use its value as base in target table.
465// is_targetptr == 1 && is_prealloc == 1 - use preallocated target memory:
466// base - is address at target of preallocated memory; use its value as
467// base in target table.
468
5f520819
KY
469bool OffloadDescriptor::alloc_ptr_data(
470 PtrData* &ptr_data,
471 void *base,
472 int64_t disp,
473 int64_t size,
474 int64_t alloc_disp,
2eab9666
IV
475 int align,
476 bool is_targptr,
477 bool is_prealloc,
478 bool pin
5f520819
KY
479)
480{
481 // total length of base
2eab9666 482 int64_t length = size;
5f520819 483 bool is_new;
2eab9666
IV
484 COIBUFFER targptr_buf;
485 COIRESULT res;
486 uint32_t buffer_flags = 0;
487 char * base_disp = reinterpret_cast<char *>(base) + disp;
5f520819 488
2eab9666
IV
489 // create buffer with large pages if data length exceeds
490 // large page threshold
491 if (length >= __offload_use_2mb_buffers) {
492 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
493 }
494 // Allocate memory at target for targetptr without preallocated as we need
495 // its address as base argument in call to m_device.insert_ptr_data
496 if (is_targptr && !is_prealloc) {
497 length = alloc_disp ? length : size + disp;
498 res = COI::BufferCreate(
499 length,
500 COI_BUFFER_NORMAL,
501 buffer_flags,
502 0,
503 1,
504 &m_device.get_process(),
505 &targptr_buf);
506 if (res != COI_SUCCESS) {
507 if (m_status != 0) {
508 m_status->result = translate_coi_error(res);
509 }
510 else if (m_is_mandatory) {
511 report_coi_error(c_buf_create, res);
512 }
513 return false;
514 }
515
516 res = COI::BufferGetSinkAddress(
517 targptr_buf, reinterpret_cast<uint64_t *>(&base));
518 if (res != COI_SUCCESS) {
519 if (m_status != 0) {
520 m_status->result = translate_coi_error(res);
521 }
522 else if (m_is_mandatory) {
523 report_coi_error(c_buf_get_address, res);
524 }
525 return false;
526 }
527 }
5f520819 528
2eab9666
IV
529 OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
530 alloc_disp ? base : base_disp,
531 alloc_disp ? length : size + disp);
532
5f520819 533 // add new entry
2eab9666
IV
534
535 ptr_data = is_targptr ?
536 m_device.find_targetptr_data(base_disp) :
537 m_device.find_ptr_data(base_disp);
538 // if ptr_data is found just need to check it for overlapping
539 if (ptr_data) {
540 is_new = false;
541 base = base_disp;
542 }
543 else {
544 // If association is not found we must create it.
545 length = alloc_disp ? length : size + disp;
546 ptr_data = is_targptr ?
547 m_device.insert_targetptr_data(base, length, is_new) :
548 m_device.insert_ptr_data(base, length, is_new);
549 }
5f520819
KY
550 if (is_new) {
551
552 OFFLOAD_TRACE(3, "Added new association\n");
553
554 if (length > 0) {
555 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
5f520819
KY
556
557 // align should be a power of 2
2eab9666
IV
558 if (!pin && !is_targptr &&
559 align > 0 && (align & (align - 1)) == 0) {
5f520819
KY
560 // offset within mic_buffer. Can do offset optimization
561 // only when source address alignment satisfies requested
562 // alignment on the target (cq172736).
563 if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
2eab9666
IV
564 ptr_data->mic_offset =
565 reinterpret_cast<intptr_t>(base) & 4095;
5f520819
KY
566 }
567 }
568
569 // buffer size and flags
570 uint64_t buffer_size = length + ptr_data->mic_offset;
5f520819 571
2eab9666
IV
572 // For targetptr there is no CPU buffer
573 if (pin || !is_targptr) {
574 // create CPU buffer
575 OFFLOAD_DEBUG_TRACE_1(3,
5f520819
KY
576 GET_OFFLOAD_NUMBER(get_timer_data()),
577 c_offload_create_buf_host,
578 "Creating buffer from source memory %p, "
579 "length %lld\n", base, length);
580
2eab9666
IV
581 // result is not checked because we can continue without cpu
582 // buffer. In this case we will use COIBufferRead/Write
583 // instead of COIBufferCopy.
584
585 COI::BufferCreateFromMemory(length,
5f520819
KY
586 COI_BUFFER_NORMAL,
587 0,
588 base,
589 1,
590 &m_device.get_process(),
591 &ptr_data->cpu_buf);
2eab9666 592 }
5f520819 593
2eab9666
IV
594 // create MIC buffer
595 if (is_prealloc) {
596 OFFLOAD_DEBUG_TRACE_1(3,
5f520819
KY
597 GET_OFFLOAD_NUMBER(get_timer_data()),
598 c_offload_create_buf_mic,
2eab9666
IV
599 "Creating buffer from sink memory: size %lld, offset %d, "
600 "flags =0x%x\n", buffer_size,
5f520819 601 ptr_data->mic_offset, buffer_flags);
2eab9666
IV
602 res = COI::BufferCreateFromMemory(ptr_data->cpu_addr.length(),
603 COI_BUFFER_NORMAL,
604 COI_SINK_MEMORY,
605 base,
606 1,
607 &m_device.get_process(),
608 &ptr_data->mic_buf);
609 if (res != COI_SUCCESS) {
610 if (m_status != 0) {
611 m_status->result = translate_coi_error(res);
612 }
613 else if (m_is_mandatory) {
614 report_coi_error(c_buf_create, res);
615 }
616 ptr_data->alloc_ptr_data_lock.unlock();
617 return false;
5f520819 618 }
5f520819 619 }
2eab9666
IV
620 else if (is_targptr) {
621 ptr_data->mic_buf = targptr_buf;
622 }
623 else if (!pin) {
624 OFFLOAD_DEBUG_TRACE_1(3,
625 GET_OFFLOAD_NUMBER(get_timer_data()),
626 c_offload_create_buf_mic,
627 "Creating buffer for sink: size %lld, offset %d, "
628 "flags =0x%x\n", buffer_size,
629 ptr_data->mic_offset, buffer_flags);
630 res = COI::BufferCreate(buffer_size,
631 COI_BUFFER_NORMAL,
632 buffer_flags,
633 0,
634 1,
635 &m_device.get_process(),
636 &ptr_data->mic_buf);
637 if (res != COI_SUCCESS) {
638 if (m_status != 0) {
639 m_status->result = translate_coi_error(res);
640 }
641 else if (m_is_mandatory) {
642 report_coi_error(c_buf_create, res);
643 }
644 ptr_data->alloc_ptr_data_lock.unlock();
645 return false;
5f520819 646 }
5f520819
KY
647 }
648
2eab9666
IV
649 if (!pin) {
650 // make buffer valid on the device.
651 res = COI::BufferSetState(ptr_data->mic_buf,
652 m_device.get_process(),
653 COI_BUFFER_VALID,
654 COI_BUFFER_NO_MOVE,
655 0, 0, 0);
656 if (res != COI_SUCCESS) {
657 if (m_status != 0) {
658 m_status->result = translate_coi_error(res);
659 }
660 else if (m_is_mandatory) {
661 report_coi_error(c_buf_set_state, res);
662 }
663 ptr_data->alloc_ptr_data_lock.unlock();
664 return false;
5f520819 665 }
2eab9666
IV
666
667 res = COI::BufferSetState(ptr_data->mic_buf,
668 COI_PROCESS_SOURCE,
669 COI_BUFFER_INVALID,
670 COI_BUFFER_NO_MOVE,
671 0, 0, 0);
672 if (res != COI_SUCCESS) {
673 if (m_status != 0) {
674 m_status->result = translate_coi_error(res);
675 }
676 else if (m_is_mandatory) {
677 report_coi_error(c_buf_set_state, res);
678 }
679 ptr_data->alloc_ptr_data_lock.unlock();
680 return false;
5f520819 681 }
5f520819
KY
682 }
683 }
5f520819
KY
684 ptr_data->alloc_disp = alloc_disp;
685 ptr_data->alloc_ptr_data_lock.unlock();
686 }
687 else {
688 mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
689
690 OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
691 "is_static %d\n",
692 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
693 ptr_data->is_static);
694
695 // This is not a new entry. Make sure that provided address range fits
696 // into existing one.
2eab9666 697 MemRange addr_range(base, length);
5f520819 698 if (!ptr_data->cpu_addr.contains(addr_range)) {
2eab9666
IV
699 LIBOFFLOAD_ERROR(c_bad_ptr_mem_alloc, base, length,
700 const_cast<void *>(ptr_data->cpu_addr.start()),
701 ptr_data->cpu_addr.length());
5f520819
KY
702 exit(1);
703 }
704
705 // if the entry is associated with static data it may not have buffers
706 // created because they are created on demand.
707 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
708 return false;
709 }
710 }
711
712 return true;
713}
714
715bool OffloadDescriptor::find_ptr_data(
716 PtrData* &ptr_data,
2eab9666 717 void *in_base,
5f520819
KY
718 int64_t disp,
719 int64_t size,
2eab9666 720 bool is_targetptr,
5f520819
KY
721 bool report_error
722)
723{
724 // total length of base
2eab9666
IV
725 int64_t length = size;
726 char *base = reinterpret_cast<char *>(in_base) + disp;
727
5f520819
KY
728 OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
729 "length %lld\n", base, length);
730
731 // find existing association in pointer table
2eab9666
IV
732 ptr_data = is_targetptr ?
733 m_device.find_targetptr_data(base) :
734 m_device.find_ptr_data(base);
5f520819
KY
735 if (ptr_data == 0) {
736 if (report_error) {
737 LIBOFFLOAD_ERROR(c_no_ptr_data, base);
738 exit(1);
739 }
740 OFFLOAD_TRACE(3, "Association does not exist\n");
741 return true;
742 }
743
744 OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
745 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
746 ptr_data->is_static);
747
748 // make sure that provided address range fits into existing one
749 MemRange addr_range(base, length);
750 if (!ptr_data->cpu_addr.contains(addr_range)) {
751 if (report_error) {
2eab9666
IV
752 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range, base, length,
753 const_cast<void *>(ptr_data->cpu_addr.start()),
754 ptr_data->cpu_addr.length());
5f520819
KY
755 exit(1);
756 }
757 OFFLOAD_TRACE(3, "Existing association partially overlaps with "
758 "data address range\n");
759 ptr_data = 0;
760 return true;
761 }
762
763 // if the entry is associated with static data it may not have buffers
764 // created because they are created on demand.
765 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
766 return false;
767 }
768
769 return true;
770}
771
772bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
773{
774 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
775
776 if (ptr_data->cpu_buf == 0) {
777 OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
778 ptr_data->cpu_addr.start());
779
780 COIRESULT res = COI::BufferCreateFromMemory(
781 ptr_data->cpu_addr.length(),
782 COI_BUFFER_NORMAL,
783 0,
784 const_cast<void*>(ptr_data->cpu_addr.start()),
785 1, &m_device.get_process(),
786 &ptr_data->cpu_buf);
787
788 if (res != COI_SUCCESS) {
789 if (m_status != 0) {
790 m_status->result = translate_coi_error(res);
791 return false;
792 }
793 report_coi_error(c_buf_create_from_mem, res);
794 }
795 }
796
797 if (ptr_data->mic_buf == 0) {
798 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
799 ptr_data->mic_addr);
800
801 COIRESULT res = COI::BufferCreateFromMemory(
802 ptr_data->cpu_addr.length(),
803 COI_BUFFER_NORMAL,
804 COI_SINK_MEMORY,
805 reinterpret_cast<void*>(ptr_data->mic_addr),
806 1, &m_device.get_process(),
807 &ptr_data->mic_buf);
808
809 if (res != COI_SUCCESS) {
810 if (m_status != 0) {
811 m_status->result = translate_coi_error(res);
812 return false;
813 }
814 report_coi_error(c_buf_create_from_mem, res);
815 }
816 }
817
818 return true;
819}
820
821bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
822{
823 if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
824 COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
825 &ptr_data->mic_addr);
826 if (res != COI_SUCCESS) {
827 if (m_status != 0) {
828 m_status->result = translate_coi_error(res);
829 }
830 else if (m_is_mandatory) {
831 report_coi_error(c_buf_get_address, res);
832 }
833 return false;
834 }
835 }
836 return true;
837}
838
839bool OffloadDescriptor::nullify_target_stack(
840 COIBUFFER targ_buf,
841 uint64_t size
842)
843{
844 char * ptr = (char*)malloc(size);
845 if (ptr == NULL)
846 LIBOFFLOAD_ERROR(c_malloc);
847 COIRESULT res;
848
849 memset(ptr, 0, size);
850 res = COI::BufferWrite(
851 targ_buf,
852 0,
853 ptr,
854 size,
855 COI_COPY_UNSPECIFIED,
856 0, 0, 0);
857 free(ptr);
858 if (res != COI_SUCCESS) {
859 if (m_status != 0) {
860 m_status->result = translate_coi_error(res);
861 return false;
862 }
863 report_coi_error(c_buf_write, res);
864 }
865 return true;
866}
867
868bool OffloadDescriptor::offload_stack_memory_manager(
869 const void * stack_begin,
870 int routine_id,
871 int buf_size,
872 int align,
873 bool *is_new)
874{
875 mutex_locker_t locker(stack_alloc_lock);
876
877 PersistData * new_el;
878 PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
879 PersistDataList::iterator it_end;
880 int erase = 0;
2eab9666 881 uint64_t cur_thread_id = m_device.get_thread_id();
5f520819
KY
882
883 *is_new = false;
884
885 for (PersistDataList::iterator it = m_device.m_persist_list.begin();
886 it != m_device.m_persist_list.end(); it++) {
887 PersistData cur_el = *it;
888
889 if (stack_begin > it->stack_cpu_addr) {
890 // this stack data must be destroyed
2eab9666
IV
891 if (cur_thread_id == cur_el.thread_id) {
892 m_destroy_stack.push_front(cur_el.stack_ptr_data);
893 it_end = it;
894 erase++;
895 }
5f520819
KY
896 }
897 else if (stack_begin == it->stack_cpu_addr) {
898 if (routine_id != it-> routine_id) {
899 // this stack data must be destroyed
900 m_destroy_stack.push_front(cur_el.stack_ptr_data);
901 it_end = it;
902 erase++;
903 break;
904 }
905 else {
906 // stack data is reused
907 m_stack_ptr_data = it->stack_ptr_data;
908 if (erase > 0) {
909 // all obsolete stack sections must be erased from the list
910 m_device.m_persist_list.erase(it_begin, ++it_end);
911
912 m_in_datalen +=
913 erase * sizeof(new_el->stack_ptr_data->mic_addr);
914 }
915 OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
916 m_stack_ptr_data->mic_addr);
917 return true;
918 }
919 }
2eab9666
IV
920 else if (stack_begin < it->stack_cpu_addr &&
921 cur_thread_id == cur_el.thread_id) {
5f520819
KY
922 break;
923 }
924 }
925
926 if (erase > 0) {
927 // all obsolete stack sections must be erased from the list
928 m_device.m_persist_list.erase(it_begin, ++it_end);
929 m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
930 }
931 // new stack table is created
2eab9666 932 new_el = new PersistData(stack_begin, routine_id, buf_size, cur_thread_id);
5f520819
KY
933 // create MIC buffer
934 COIRESULT res;
935 uint32_t buffer_flags = 0;
936
937 // create buffer with large pages if data length exceeds
938 // large page threshold
939 if (buf_size >= __offload_use_2mb_buffers) {
940 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
941 }
942 res = COI::BufferCreate(buf_size,
943 COI_BUFFER_NORMAL,
944 buffer_flags,
945 0,
946 1,
947 &m_device.get_process(),
948 &new_el->stack_ptr_data->mic_buf);
949 if (res != COI_SUCCESS) {
950 if (m_status != 0) {
951 m_status->result = translate_coi_error(res);
952 }
953 else if (m_is_mandatory) {
954 report_coi_error(c_buf_create, res);
955 }
956 return false;
957 }
958 // make buffer valid on the device.
959 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
960 m_device.get_process(),
961 COI_BUFFER_VALID,
962 COI_BUFFER_NO_MOVE,
963 0, 0, 0);
964 if (res != COI_SUCCESS) {
965 if (m_status != 0) {
966 m_status->result = translate_coi_error(res);
967 }
968 else if (m_is_mandatory) {
969 report_coi_error(c_buf_set_state, res);
970 }
971 return false;
972 }
973 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
974 COI_PROCESS_SOURCE,
975 COI_BUFFER_INVALID,
976 COI_BUFFER_NO_MOVE,
977 0, 0, 0);
978 if (res != COI_SUCCESS) {
979 if (m_status != 0) {
980 m_status->result = translate_coi_error(res);
981 }
982 else if (m_is_mandatory) {
983 report_coi_error(c_buf_set_state, res);
984 }
985 return false;
986 }
987 // persistence algorithm requires target stack initialy to be nullified
988 if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
989 return false;
990 }
991
992 m_stack_ptr_data = new_el->stack_ptr_data;
993 init_mic_address(m_stack_ptr_data);
994 OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
995 m_stack_ptr_data->mic_addr);
996 m_device.m_persist_list.push_front(*new_el);
997 init_mic_address(new_el->stack_ptr_data);
998 *is_new = true;
999 return true;
1000}
1001
1002bool OffloadDescriptor::setup_descriptors(
1003 VarDesc *vars,
1004 VarDesc2 *vars2,
1005 int vars_total,
1006 int entry_id,
1007 const void *stack_addr
1008)
1009{
1010 COIRESULT res;
1011
1012 OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
1013
1014 // make a copy of variable descriptors
1015 m_vars_total = vars_total;
1016 if (vars_total > 0) {
1017 m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
1018 if (m_vars == NULL)
1019 LIBOFFLOAD_ERROR(c_malloc);
1020 memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
1021 m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
1022 if (m_vars_extra == NULL)
1023 LIBOFFLOAD_ERROR(c_malloc);
1024 }
1025
1026 // dependencies
2eab9666
IV
1027 m_in_deps_allocated = m_vars_total + 1;
1028 m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_in_deps_allocated);
5f520819
KY
1029 if (m_in_deps == NULL)
1030 LIBOFFLOAD_ERROR(c_malloc);
1031 if (m_vars_total > 0) {
2eab9666
IV
1032 m_out_deps_allocated = m_vars_total;
1033 m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_out_deps_allocated);
5f520819
KY
1034 if (m_out_deps == NULL)
1035 LIBOFFLOAD_ERROR(c_malloc);
1036 }
1037
1038 // copyin/copyout data length
1039 m_in_datalen = 0;
1040 m_out_datalen = 0;
1041
1042 // First pass over variable descriptors
1043 // - Calculate size of the input and output non-pointer data
1044 // - Allocate buffers for input and output pointers
1045 for (int i = 0; i < m_vars_total; i++) {
1046 void* alloc_base = NULL;
1047 int64_t alloc_disp = 0;
2eab9666 1048 int64_t alloc_size = 0;
5f520819
KY
1049 bool src_is_for_mic = (m_vars[i].direction.out ||
1050 m_vars[i].into == NULL);
1051
1052 const char *var_sname = "";
1053 if (vars2 != NULL && i < vars_total) {
1054 if (vars2[i].sname != NULL) {
1055 var_sname = vars2[i].sname;
1056 }
1057 }
1058 OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n",
1059 i, var_sname,
1060 vardesc_direction_as_string[m_vars[i].direction.bits],
1061 vardesc_type_as_string[m_vars[i].type.src]);
1062 if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
1063 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
1064 vardesc_type_as_string[m_vars[i].type.dst]);
1065 }
1066 OFFLOAD_TRACE(2,
1067 " type_src=%d, type_dstn=%d, direction=%d, "
1068 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
1069 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
1070 m_vars[i].type.src,
1071 m_vars[i].type.dst,
1072 m_vars[i].direction.bits,
1073 m_vars[i].alloc_if,
1074 m_vars[i].free_if,
1075 m_vars[i].align,
1076 m_vars[i].mic_offset,
1077 m_vars[i].flags.bits,
1078 m_vars[i].offset,
1079 m_vars[i].size,
1080 m_vars[i].count,
1081 m_vars[i].ptr,
1082 m_vars[i].into);
2eab9666
IV
1083 // If any varDesc flags bits set, show them
1084 if (console_enabled >= 1 && m_vars[i].flags.bits != 0) {
1085 trace_varDesc_flags(get_timer_data(), m_vars[i].flags);
1086 }
5f520819 1087
2eab9666
IV
1088 // preallocated implies targetptr
1089 if (m_vars[i].flags.preallocated) {
1090 // targetptr preallocated alloc_if(1) may not be used with
1091 // an in clause
1092 if (m_vars[i].direction.in && m_vars[i].alloc_if) {
1093 LIBOFFLOAD_ERROR(c_in_with_preallocated);
1094 exit(1);
1095 }
1096 m_vars[i].flags.targetptr = 1;
1097 }
5f520819
KY
1098 if (m_vars[i].alloc != NULL) {
1099 // array descriptor
2eab9666
IV
1100 const Arr_Desc *ap =
1101 static_cast<const Arr_Desc*>(m_vars[i].alloc);
5f520819
KY
1102
1103 // debug dump
2eab9666 1104 ARRAY_DESC_DUMP(" ", "ALLOC", ap, 0, 1);
5f520819
KY
1105
1106 __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
1107
1108 alloc_base = reinterpret_cast<void*>(ap->base);
1109 }
1110
2eab9666 1111 m_vars_extra[i].alloc = m_vars[i].alloc;
5f520819
KY
1112 m_vars_extra[i].cpu_disp = 0;
1113 m_vars_extra[i].cpu_offset = 0;
1114 m_vars_extra[i].src_data = 0;
1115 m_vars_extra[i].read_rng_src = 0;
1116 m_vars_extra[i].read_rng_dst = 0;
2eab9666 1117 m_vars_extra[i].omp_last_event_type = c_last_not;
5f520819
KY
1118 // flag is_arr_ptr_el is 1 only for var_descs generated
1119 // for c_data_ptr_array type
1120 if (i < vars_total) {
1121 m_vars_extra[i].is_arr_ptr_el = 0;
1122 }
1123
1124 switch (m_vars[i].type.src) {
1125 case c_data_ptr_array:
1126 {
2eab9666 1127 const Arr_Desc *ap;
5f520819
KY
1128 const VarDesc3 *vd3 =
1129 static_cast<const VarDesc3*>(m_vars[i].ptr);
1130 int flags = vd3->array_fields;
1131 OFFLOAD_TRACE(2,
1132 " pointer array flags = %04x\n", flags);
1133 OFFLOAD_TRACE(2,
1134 " pointer array type is %s\n",
1135 vardesc_type_as_string[flags & 0x3f]);
2eab9666
IV
1136 ap = static_cast<const Arr_Desc*>(vd3->ptr_array);
1137 ARRAY_DESC_DUMP(" ", "ptr array", ap,
1138 m_vars[i].flags.is_pointer, 1);
5f520819 1139 if (m_vars[i].into) {
2eab9666
IV
1140 ap = static_cast<const Arr_Desc*>(m_vars[i].into);
1141 ARRAY_DESC_DUMP(
1142 " ", "into array", ap, 0, 1);
5f520819
KY
1143 }
1144 if ((flags & (1<<flag_align_is_array)) != 0) {
2eab9666
IV
1145 ap = static_cast<const Arr_Desc*>(vd3->align_array);
1146 ARRAY_DESC_DUMP(
1147 " ", "align array", ap, 0, 1);
5f520819
KY
1148 }
1149 if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
2eab9666
IV
1150 ap = static_cast<const Arr_Desc*>(vd3->alloc_if_array);
1151 ARRAY_DESC_DUMP(
1152 " ", "alloc_if array", ap, 0, 1);
5f520819
KY
1153 }
1154 if ((flags & (1<<flag_free_if_is_array)) != 0) {
2eab9666
IV
1155 ap = static_cast<const Arr_Desc*>(vd3->free_if_array);
1156 ARRAY_DESC_DUMP(
1157 " ", "free_if array", ap, 0, 1);
5f520819
KY
1158 }
1159 if ((flags & (1<<flag_extent_start_is_array)) != 0) {
2eab9666
IV
1160 ap = static_cast<const Arr_Desc*>(vd3->extent_start);
1161 ARRAY_DESC_DUMP(
1162 " ", "extent_start array", ap, 0, 1);
5f520819
KY
1163 } else if ((flags &
1164 (1<<flag_extent_start_is_scalar)) != 0) {
1165 OFFLOAD_TRACE(2,
1166 " extent_start scalar = %d\n",
1167 (int64_t)vd3->extent_start);
1168 }
1169 if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
2eab9666 1170 ap = static_cast<const Arr_Desc*>
5f520819 1171 (vd3->extent_elements);
2eab9666
IV
1172 ARRAY_DESC_DUMP(" ",
1173 "extent_elements array", ap, 0, 1);
5f520819
KY
1174 } else if ((flags &
1175 (1<<flag_extent_elements_is_scalar)) != 0) {
1176 OFFLOAD_TRACE(2,
1177 " extent_elements scalar = %d\n",
1178 (int64_t)vd3->extent_elements);
1179 }
1180 if ((flags & (1<<flag_into_start_is_array)) != 0) {
2eab9666
IV
1181 ap = static_cast<const Arr_Desc*>(vd3->into_start);
1182 ARRAY_DESC_DUMP(
1183 " ", "into_start array", ap, 0, 1);
5f520819
KY
1184 } else if ((flags &
1185 (1<<flag_into_start_is_scalar)) != 0) {
1186 OFFLOAD_TRACE(2,
1187 " into_start scalar = %d\n",
1188 (int64_t)vd3->into_start);
1189 }
1190 if ((flags & (1<<flag_into_elements_is_array)) != 0) {
2eab9666
IV
1191 ap = static_cast<const Arr_Desc*>(vd3->into_elements);
1192 ARRAY_DESC_DUMP(
1193 " ", "into_elements array", ap, 0, 1);
5f520819
KY
1194 } else if ((flags &
1195 (1<<flag_into_elements_is_scalar)) != 0) {
1196 OFFLOAD_TRACE(2,
1197 " into_elements scalar = %d\n",
1198 (int64_t)vd3->into_elements);
1199 }
1200 if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
2eab9666
IV
1201 ap = static_cast<const Arr_Desc*>(vd3->alloc_start);
1202 ARRAY_DESC_DUMP(
1203 " ", "alloc_start array", ap, 0, 1);
5f520819
KY
1204 } else if ((flags &
1205 (1<<flag_alloc_start_is_scalar)) != 0) {
1206 OFFLOAD_TRACE(2,
1207 " alloc_start scalar = %d\n",
1208 (int64_t)vd3->alloc_start);
1209 }
1210 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
2eab9666
IV
1211 ap = static_cast<const Arr_Desc*>(vd3->alloc_elements);
1212 ARRAY_DESC_DUMP(" ",
1213 "alloc_elements array", ap, 0, 1);
5f520819
KY
1214 } else if ((flags &
1215 (1<<flag_alloc_elements_is_scalar)) != 0) {
1216 OFFLOAD_TRACE(2,
1217 " alloc_elements scalar = %d\n",
1218 (int64_t)vd3->alloc_elements);
1219 }
1220 }
1221 if (!gen_var_descs_for_pointer_array(i)) {
1222 return false;
1223 }
1224 break;
1225
1226 case c_data:
1227 case c_void_ptr:
1228 case c_cean_var:
1229 // In all uses later
1230 // VarDesc.size will have the length of the data to be
1231 // transferred
1232 // VarDesc.disp will have an offset from base
1233 if (m_vars[i].type.src == c_cean_var) {
1234 // array descriptor
2eab9666
IV
1235 const Arr_Desc *ap =
1236 static_cast<const Arr_Desc*>(m_vars[i].ptr);
5f520819
KY
1237
1238 // debug dump
2eab9666 1239 ARRAY_DESC_DUMP("", "IN/OUT", ap, 0, !src_is_for_mic);
5f520819
KY
1240
1241 // offset and length are derived from the array descriptor
1242 __arr_data_offset_and_length(ap, m_vars[i].disp,
1243 m_vars[i].size);
1244 if (!is_arr_desc_contiguous(ap)) {
1245 m_vars[i].flags.is_noncont_src = 1;
1246 m_vars_extra[i].read_rng_src =
1247 init_read_ranges_arr_desc(ap);
1248 }
1249 // all necessary information about length and offset is
1250 // transferred in var descriptor. There is no need to send
1251 // array descriptor to the target side.
1252 m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
1253 }
1254 else {
1255 m_vars[i].size *= m_vars[i].count;
1256 m_vars[i].disp = 0;
1257 }
1258
1259 if (m_vars[i].direction.bits) {
1260 // make sure that transfer size > 0
1261 if (m_vars[i].size <= 0) {
1262 LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
1263 exit(1);
1264 }
1265
1266 if (m_vars[i].flags.is_static) {
1267 PtrData *ptr_data;
1268
1269 // find data associated with variable
1270 if (!find_ptr_data(ptr_data,
1271 m_vars[i].ptr,
1272 m_vars[i].disp,
1273 m_vars[i].size,
2eab9666 1274 false, false)) {
5f520819
KY
1275 return false;
1276 }
1277
1278 if (ptr_data != 0) {
1279 // offset to base from the beginning of the buffer
1280 // memory
1281 m_vars[i].offset =
1282 (char*) m_vars[i].ptr -
1283 (char*) ptr_data->cpu_addr.start();
1284 }
1285 else {
1286 m_vars[i].flags.is_static = false;
1287 if (m_vars[i].into == NULL) {
1288 m_vars[i].flags.is_static_dstn = false;
1289 }
1290 }
1291 m_vars_extra[i].src_data = ptr_data;
1292 }
1293
1294 if (m_is_openmp) {
1295 if (m_vars[i].flags.is_static) {
2eab9666 1296 // Static data is transferred either by omp target
5f520819 1297 // update construct which passes zeros for
2eab9666
IV
1298 // alloc_if and free_if or by always modifier.
1299 if (!m_vars[i].flags.always_copy &&
1300 (m_vars[i].alloc_if || m_vars[i].free_if)) {
5f520819
KY
1301 m_vars[i].direction.bits = c_parameter_nocopy;
1302 }
1303 }
1304 else {
1305 AutoData *auto_data;
1306 if (m_vars[i].alloc_if) {
1307 auto_data = m_device.insert_auto_data(
1308 m_vars[i].ptr, m_vars[i].size);
1309 auto_data->add_reference();
1310 }
1311 else {
1312 // TODO: what should be done if var is not in
1313 // the table?
1314 auto_data = m_device.find_auto_data(
1315 m_vars[i].ptr);
1316 }
1317
2eab9666
IV
1318 // For automatic variables data is transferred:
1319 // - if always modifier is used OR
1320 // - if alloc_if == 0 && free_if == 0 OR
1321 // - if reference count is 1
1322 if (!m_vars[i].flags.always_copy &&
1323 (m_vars[i].alloc_if || m_vars[i].free_if) &&
5f520819
KY
1324 auto_data != 0 &&
1325 auto_data->get_reference() != 1) {
1326 m_vars[i].direction.bits = c_parameter_nocopy;
1327 }
1328
1329 // save data for later use
1330 m_vars_extra[i].auto_data = auto_data;
1331 }
1332 }
1333
1334 if (m_vars[i].direction.in &&
1335 !m_vars[i].flags.is_static) {
1336 m_in_datalen += m_vars[i].size;
1337
1338 // for non-static target destination defined as CEAN
1339 // expression we pass to target its size and dist
1340 if (m_vars[i].into == NULL &&
1341 m_vars[i].type.src == c_cean_var) {
1342 m_in_datalen += 2 * sizeof(uint64_t);
1343 }
1344 m_need_runfunction = true;
1345 }
1346 if (m_vars[i].direction.out &&
1347 !m_vars[i].flags.is_static) {
1348 m_out_datalen += m_vars[i].size;
1349 m_need_runfunction = true;
1350 }
1351 }
1352 break;
1353
1354 case c_dv:
1355 if (m_vars[i].direction.bits ||
1356 m_vars[i].alloc_if ||
1357 m_vars[i].free_if) {
1358 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
1359
1360 // debug dump
1361 __dv_desc_dump("IN/OUT", dvp);
1362
1363 // send dope vector contents excluding base
1364 m_in_datalen += m_vars[i].size - sizeof(uint64_t);
1365 m_need_runfunction = true;
1366 }
1367 break;
1368
1369 case c_string_ptr:
1370 if ((m_vars[i].direction.bits ||
1371 m_vars[i].alloc_if ||
1372 m_vars[i].free_if) &&
1373 m_vars[i].size == 0) {
1374 m_vars[i].size = 1;
1375 m_vars[i].count =
1376 strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
1377 }
1378 /* fallthru */
1379
1380 case c_data_ptr:
1381 if (m_vars[i].flags.is_stack_buf &&
1382 !m_vars[i].direction.bits &&
1383 m_vars[i].alloc_if) {
1384 // this var_desc is for stack buffer
1385 bool is_new;
1386
1387 if (!offload_stack_memory_manager(
1388 stack_addr, entry_id,
1389 m_vars[i].count, m_vars[i].align, &is_new)) {
1390 return false;
1391 }
1392 if (is_new) {
1393 m_compute_buffers.push_back(
1394 m_stack_ptr_data->mic_buf);
1395 m_device.m_persist_list.front().cpu_stack_addr =
1396 static_cast<char*>(m_vars[i].ptr);
1397 }
1398 else {
1399 m_vars[i].flags.sink_addr = 1;
1400 m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
1401 }
1402 m_vars[i].size = m_destroy_stack.size();
1403 m_vars_extra[i].src_data = m_stack_ptr_data;
2eab9666
IV
1404
1405 // need to add or remove references for stack buffer at target
1406 if (is_new || m_destroy_stack.size()) {
1407 m_need_runfunction = true;
1408 }
1409
5f520819
KY
1410 break;
1411 }
1412 /* fallthru */
1413
1414 case c_cean_var_ptr:
1415 case c_dv_ptr:
1416 if (m_vars[i].type.src == c_cean_var_ptr) {
1417 // array descriptor
2eab9666
IV
1418 const Arr_Desc *ap =
1419 static_cast<const Arr_Desc*>(m_vars[i].ptr);
5f520819
KY
1420
1421 // debug dump
2eab9666 1422 ARRAY_DESC_DUMP("", "IN/OUT", ap, 1, !src_is_for_mic);
5f520819
KY
1423
1424 // offset and length are derived from the array descriptor
1425 __arr_data_offset_and_length(ap, m_vars[i].disp,
1426 m_vars[i].size);
1427
1428 if (!is_arr_desc_contiguous(ap)) {
1429 m_vars[i].flags.is_noncont_src = 1;
1430 m_vars_extra[i].read_rng_src =
1431 init_read_ranges_arr_desc(ap);
1432 }
1433 // all necessary information about length and offset is
1434 // transferred in var descriptor. There is no need to send
1435 // array descriptor to the target side.
1436 m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
1437 }
1438 else if (m_vars[i].type.src == c_dv_ptr) {
1439 // need to send DV to the device unless it is 'nocopy'
1440 if (m_vars[i].direction.bits ||
1441 m_vars[i].alloc_if ||
1442 m_vars[i].free_if) {
1443 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
1444
1445 // debug dump
1446 __dv_desc_dump("IN/OUT", dvp);
1447
1448 m_vars[i].direction.bits = c_parameter_in;
1449 }
1450
1451 // no displacement
1452 m_vars[i].disp = 0;
1453 }
1454 else {
1455 // c_data_ptr or c_string_ptr
1456 m_vars[i].size *= m_vars[i].count;
1457 m_vars[i].disp = 0;
1458 }
1459
1460 if (m_vars[i].direction.bits ||
1461 m_vars[i].alloc_if ||
1462 m_vars[i].free_if) {
1463 PtrData *ptr_data;
1464
2eab9666 1465 // check that buffer length > 0
5f520819 1466 if (m_vars[i].alloc_if &&
2eab9666
IV
1467 m_vars[i].disp + m_vars[i].size <
1468 (m_is_openmp ? 0 : 1)) {
5f520819
KY
1469 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
1470 exit(1);
1471 }
1472
1473 // base address
1474 void *base = *static_cast<void**>(m_vars[i].ptr);
1475
1476 // allocate buffer if we have no INTO and don't need
1477 // allocation for the ptr at target
1478 if (src_is_for_mic) {
1479 if (m_vars[i].flags.is_stack_buf) {
1480 // for stack persistent objects ptr data is created
1481 // by var_desc with number 0.
1482 // Its ptr_data is stored at m_stack_ptr_data
1483 ptr_data = m_stack_ptr_data;
1484 m_vars[i].flags.sink_addr = 1;
1485 }
1486 else if (m_vars[i].alloc_if) {
2eab9666
IV
1487 if (m_vars[i].flags.preallocated) {
1488 m_out_datalen += sizeof(void*);
1489 m_need_runfunction = true;
1490 break;
1491 }
5f520819
KY
1492 // add new entry
1493 if (!alloc_ptr_data(
1494 ptr_data,
2eab9666 1495 reinterpret_cast<char *>(base) + alloc_disp,
5f520819
KY
1496 (alloc_base != NULL) ?
1497 alloc_disp : m_vars[i].disp,
1498 (alloc_base != NULL) ?
1499 alloc_size : m_vars[i].size,
1500 alloc_disp,
1501 (alloc_base != NULL) ?
2eab9666
IV
1502 0 : m_vars[i].align,
1503 m_vars[i].flags.targetptr,
1504 0,
1505 m_vars[i].flags.pin)) {
5f520819
KY
1506 return false;
1507 }
2eab9666
IV
1508 if (m_vars[i].flags.targetptr) {
1509 if (!init_mic_address(ptr_data)) {
1510 return false;
1511 }
1512 *static_cast<void**>(m_vars[i].ptr) = base =
1513 reinterpret_cast<void*>(ptr_data->mic_addr);
1514 }
5f520819
KY
1515 if (ptr_data->add_reference() == 0 &&
1516 ptr_data->mic_buf != 0) {
1517 // add buffer to the list of buffers that
1518 // are passed to dispatch call
1519 m_compute_buffers.push_back(
1520 ptr_data->mic_buf);
1521 }
2eab9666
IV
1522 else if (!m_vars[i].flags.pin &&
1523 !m_vars[i].flags.preallocated) {
5f520819
KY
1524 // will send buffer address to device
1525 m_vars[i].flags.sink_addr = 1;
1526 }
1527
2eab9666
IV
1528 if (!m_vars[i].flags.pin &&
1529 !ptr_data->is_static) {
5f520819
KY
1530 // need to add reference for buffer
1531 m_need_runfunction = true;
1532 }
1533 }
1534 else {
1535 bool error_if_not_found = true;
1536 if (m_is_openmp) {
1537 // For omp target update variable is ignored
1538 // if it does not exist.
2eab9666
IV
1539 if (m_vars[i].flags.always_copy ||
1540 (!m_vars[i].alloc_if &&
1541 !m_vars[i].free_if)) {
5f520819
KY
1542 error_if_not_found = false;
1543 }
1544 }
1545
1546 // use existing association from pointer table
1547 if (!find_ptr_data(ptr_data,
1548 base,
1549 m_vars[i].disp,
1550 m_vars[i].size,
2eab9666 1551 m_vars[i].flags.targetptr,
5f520819
KY
1552 error_if_not_found)) {
1553 return false;
1554 }
1555
1556 if (m_is_openmp) {
1557 // make var nocopy if it does not exist
1558 if (ptr_data == 0) {
1559 m_vars[i].direction.bits =
1560 c_parameter_nocopy;
1561 }
1562 }
1563
1564 if (ptr_data != 0) {
1565 m_vars[i].flags.sink_addr = 1;
1566 }
1567 }
1568
1569 if (ptr_data != 0) {
1570 if (m_is_openmp) {
1571 // data is transferred only if
1572 // alloc_if == 0 && free_if == 0
1573 // or reference count is 1
2eab9666
IV
1574 if (!m_vars[i].flags.always_copy &&
1575 ((m_vars[i].alloc_if ||
1576 m_vars[i].free_if) &&
1577 ptr_data->get_reference() != 1)) {
5f520819
KY
1578 m_vars[i].direction.bits =
1579 c_parameter_nocopy;
1580 }
1581 }
1582
1583 if (ptr_data->alloc_disp != 0) {
1584 m_vars[i].flags.alloc_disp = 1;
1585 m_in_datalen += sizeof(alloc_disp);
1586 }
1587
1588 if (m_vars[i].flags.sink_addr) {
1589 // get buffers's address on the sink
1590 if (!init_mic_address(ptr_data)) {
1591 return false;
1592 }
1593
1594 m_in_datalen += sizeof(ptr_data->mic_addr);
1595 }
1596
2eab9666
IV
1597 if (!m_vars[i].flags.pin &&
1598 !ptr_data->is_static && m_vars[i].free_if) {
5f520819
KY
1599 // need to decrement buffer reference on target
1600 m_need_runfunction = true;
1601 }
1602
1603 // offset to base from the beginning of the buffer
1604 // memory
1605 m_vars[i].offset = (char*) base -
1606 (char*) ptr_data->cpu_addr.start();
1607
1608 // copy other pointer properties to var descriptor
1609 m_vars[i].mic_offset = ptr_data->mic_offset;
1610 m_vars[i].flags.is_static = ptr_data->is_static;
1611 }
1612 }
1613 else {
1614 if (!find_ptr_data(ptr_data,
1615 base,
1616 m_vars[i].disp,
1617 m_vars[i].size,
2eab9666 1618 false, false)) {
5f520819
KY
1619 return false;
1620 }
1621 if (ptr_data) {
1622 m_vars[i].offset =
1623 (char*) base -
1624 (char*) ptr_data->cpu_addr.start();
1625 }
1626 }
1627
1628 // save pointer data
1629 m_vars_extra[i].src_data = ptr_data;
1630 }
1631 break;
1632
1633 case c_func_ptr:
1634 if (m_vars[i].direction.in) {
1635 m_in_datalen += __offload_funcs.max_name_length();
1636 }
1637 if (m_vars[i].direction.out) {
1638 m_out_datalen += __offload_funcs.max_name_length();
1639 }
1640 m_need_runfunction = true;
1641 break;
1642
1643 case c_dv_data:
1644 case c_dv_ptr_data:
1645 case c_dv_data_slice:
1646 case c_dv_ptr_data_slice:
1647 ArrDesc *dvp;
1648 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
2eab9666
IV
1649 const Arr_Desc *ap;
1650 ap = static_cast<const Arr_Desc*>(m_vars[i].ptr);
5f520819
KY
1651
1652 dvp = (m_vars[i].type.src == c_dv_data_slice) ?
1653 reinterpret_cast<ArrDesc*>(ap->base) :
1654 *reinterpret_cast<ArrDesc**>(ap->base);
1655 }
1656 else {
1657 dvp = (m_vars[i].type.src == c_dv_data) ?
1658 static_cast<ArrDesc*>(m_vars[i].ptr) :
1659 *static_cast<ArrDesc**>(m_vars[i].ptr);
1660 }
1661
1662 // if allocatable dope vector isn't allocated don't
1663 // transfer its data
1664 if (!__dv_is_allocated(dvp)) {
1665 m_vars[i].direction.bits = c_parameter_nocopy;
1666 m_vars[i].alloc_if = 0;
1667 m_vars[i].free_if = 0;
1668 }
1669 if (m_vars[i].direction.bits ||
1670 m_vars[i].alloc_if ||
1671 m_vars[i].free_if) {
2eab9666 1672 const Arr_Desc *ap;
5f520819
KY
1673
1674 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
2eab9666 1675 ap = static_cast<const Arr_Desc*>(m_vars[i].ptr);
5f520819
KY
1676
1677 // debug dump
2eab9666 1678 ARRAY_DESC_DUMP("", "IN/OUT", ap, 0, !src_is_for_mic);
5f520819
KY
1679 }
1680 if (!__dv_is_contiguous(dvp)) {
1681 m_vars[i].flags.is_noncont_src = 1;
1682 m_vars_extra[i].read_rng_src =
1683 init_read_ranges_dv(dvp);
1684 }
1685
1686 // size and displacement
1687 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1688 // offset and length are derived from the
1689 // array descriptor
1690 __arr_data_offset_and_length(ap,
1691 m_vars[i].disp,
1692 m_vars[i].size);
1693 if (m_vars[i].direction.bits) {
1694 if (!is_arr_desc_contiguous(ap)) {
1695 if (m_vars[i].flags.is_noncont_src) {
1696 LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
1697 return false;
1698 }
1699 m_vars[i].flags.is_noncont_src = 1;
1700 m_vars_extra[i].read_rng_src =
1701 init_read_ranges_arr_desc(ap);
1702 }
1703 }
1704 }
1705 else {
1706 if (m_vars[i].flags.has_length) {
1707 m_vars[i].size =
1708 __dv_data_length(dvp, m_vars[i].count);
1709 }
1710 else {
1711 m_vars[i].size = __dv_data_length(dvp);
1712 }
1713 m_vars[i].disp = 0;
1714 }
1715
1716 // check that length >= 0
1717 if (m_vars[i].alloc_if &&
1718 (m_vars[i].disp + m_vars[i].size < 0)) {
1719 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
1720 exit(1);
1721 }
1722
1723 // base address
1724 void *base = reinterpret_cast<void*>(dvp->Base);
1725 PtrData *ptr_data;
1726
1727 // allocate buffer if we have no INTO and don't need
1728 // allocation for the ptr at target
1729 if (src_is_for_mic) {
1730 if (m_vars[i].alloc_if) {
1731 // add new entry
1732 if (!alloc_ptr_data(
1733 ptr_data,
2eab9666 1734 reinterpret_cast<char *>(base) + alloc_disp,
5f520819
KY
1735 (alloc_base != NULL) ?
1736 alloc_disp : m_vars[i].disp,
1737 (alloc_base != NULL) ?
1738 alloc_size : m_vars[i].size,
1739 alloc_disp,
1740 (alloc_base != NULL) ?
2eab9666
IV
1741 0 : m_vars[i].align,
1742 m_vars[i].flags.targetptr,
1743 m_vars[i].flags.preallocated,
1744 m_vars[i].flags.pin)) {
5f520819
KY
1745 return false;
1746 }
1747
1748 if (ptr_data->add_reference() == 0 &&
1749 ptr_data->mic_buf != 0) {
1750 // add buffer to the list of buffers
1751 // that are passed to dispatch call
1752 m_compute_buffers.push_back(
1753 ptr_data->mic_buf);
1754 }
1755 else {
1756 // will send buffer address to device
1757 m_vars[i].flags.sink_addr = 1;
1758 }
1759
1760 if (!ptr_data->is_static) {
1761 // need to add reference for buffer
1762 m_need_runfunction = true;
1763 }
1764 }
1765 else {
1766 bool error_if_not_found = true;
1767 if (m_is_openmp) {
1768 // For omp target update variable is ignored
1769 // if it does not exist.
2eab9666
IV
1770 if (m_vars[i].flags.always_copy ||
1771 (!m_vars[i].alloc_if &&
1772 !m_vars[i].free_if)) {
5f520819
KY
1773 error_if_not_found = false;
1774 }
1775 }
1776
1777 // use existing association from pointer table
1778 if (!find_ptr_data(ptr_data,
1779 base,
1780 m_vars[i].disp,
1781 m_vars[i].size,
2eab9666 1782 m_vars[i].flags.targetptr,
5f520819
KY
1783 error_if_not_found)) {
1784 return false;
1785 }
1786
1787 if (m_is_openmp) {
1788 // make var nocopy if it does not exist
1789 if (ptr_data == 0) {
1790 m_vars[i].direction.bits =
1791 c_parameter_nocopy;
1792 }
1793 }
1794
1795 if (ptr_data != 0) {
1796 // need to update base in dope vector on device
1797 m_vars[i].flags.sink_addr = 1;
1798 }
1799 }
1800
1801 if (ptr_data != 0) {
1802 if (m_is_openmp) {
2eab9666
IV
1803 // data is transferred if
1804 // - if always modifier is used OR
1805 // - if alloc_if == 0 && free_if == 0 OR
1806 // - if reference count is 1
1807 if (!m_vars[i].flags.always_copy &&
1808 (m_vars[i].alloc_if ||
5f520819
KY
1809 m_vars[i].free_if) &&
1810 ptr_data->get_reference() != 1) {
1811 m_vars[i].direction.bits =
1812 c_parameter_nocopy;
1813 }
1814 }
1815
1816 if (ptr_data->alloc_disp != 0) {
1817 m_vars[i].flags.alloc_disp = 1;
1818 m_in_datalen += sizeof(alloc_disp);
1819 }
1820
1821 if (m_vars[i].flags.sink_addr) {
1822 // get buffers's address on the sink
1823 if (!init_mic_address(ptr_data)) {
1824 return false;
1825 }
1826
1827 m_in_datalen += sizeof(ptr_data->mic_addr);
1828 }
1829
1830 if (!ptr_data->is_static && m_vars[i].free_if) {
1831 // need to decrement buffer reference on target
1832 m_need_runfunction = true;
1833 }
1834
1835 // offset to base from the beginning of the buffer
1836 // memory
1837 m_vars[i].offset =
1838 (char*) base -
1839 (char*) ptr_data->cpu_addr.start();
1840
1841 // copy other pointer properties to var descriptor
1842 m_vars[i].mic_offset = ptr_data->mic_offset;
1843 m_vars[i].flags.is_static = ptr_data->is_static;
1844 }
1845 }
1846 else { // !src_is_for_mic
1847 if (!find_ptr_data(ptr_data,
1848 base,
1849 m_vars[i].disp,
1850 m_vars[i].size,
2eab9666 1851 false, false)) {
5f520819
KY
1852 return false;
1853 }
1854 m_vars[i].offset = !ptr_data ? 0 :
1855 (char*) base -
1856 (char*) ptr_data->cpu_addr.start();
1857 }
1858
1859 // save pointer data
1860 m_vars_extra[i].src_data = ptr_data;
1861 }
1862 break;
1863
1864 default:
1865 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
1866 LIBOFFLOAD_ABORT;
1867 }
1868 if (m_vars[i].type.src == c_data_ptr_array) {
1869 continue;
1870 }
1871
1872 if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
1873 m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
1874 m_device.m_persist_list.front().cpu_stack_addr;
1875 }
1876 // if source is used at CPU save its offset and disp
1877 if (m_vars[i].into == NULL || m_vars[i].direction.in) {
1878 m_vars_extra[i].cpu_offset = m_vars[i].offset;
1879 m_vars_extra[i].cpu_disp = m_vars[i].disp;
1880 }
1881
1882 // If "into" is define we need to do the similar work for it
1883 if (!m_vars[i].into) {
1884 continue;
1885 }
1886
1887 int64_t into_disp =0, into_offset = 0;
1888
1889 switch (m_vars[i].type.dst) {
1890 case c_data_ptr_array:
1891 break;
1892 case c_data:
1893 case c_void_ptr:
1894 case c_cean_var: {
1895 int64_t size = m_vars[i].size;
1896
1897 if (m_vars[i].type.dst == c_cean_var) {
1898 // array descriptor
2eab9666
IV
1899 const Arr_Desc *ap =
1900 static_cast<const Arr_Desc*>(m_vars[i].into);
5f520819
KY
1901
1902 // debug dump
2eab9666 1903 ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic);
5f520819
KY
1904
1905 // offset and length are derived from the array descriptor
1906 __arr_data_offset_and_length(ap, into_disp, size);
1907
1908 if (!is_arr_desc_contiguous(ap)) {
1909 m_vars[i].flags.is_noncont_dst = 1;
1910 m_vars_extra[i].read_rng_dst =
1911 init_read_ranges_arr_desc(ap);
1912 if (!cean_ranges_match(
1913 m_vars_extra[i].read_rng_src,
1914 m_vars_extra[i].read_rng_dst)) {
1915 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1916 exit(1);
1917 }
1918 }
1919 m_vars[i].into = reinterpret_cast<void*>(ap->base);
1920 }
1921
1922 int64_t size_src = m_vars_extra[i].read_rng_src ?
1923 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1924 m_vars[i].size;
1925 int64_t size_dst = m_vars_extra[i].read_rng_dst ?
1926 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1927 size;
1928 // It's supposed that "into" size must be not less
1929 // than src size
1930 if (size_src > size_dst) {
1931 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1932 size_src, size_dst);
1933 exit(1);
1934 }
1935
1936 if (m_vars[i].direction.bits) {
1937 if (m_vars[i].flags.is_static_dstn) {
1938 PtrData *ptr_data;
1939
1940 // find data associated with variable
1941 if (!find_ptr_data(ptr_data, m_vars[i].into,
2eab9666 1942 into_disp, size, false, false)) {
5f520819
KY
1943 return false;
1944 }
1945 if (ptr_data != 0) {
1946 // offset to base from the beginning of the buffer
1947 // memory
1948 into_offset =
1949 (char*) m_vars[i].into -
1950 (char*) ptr_data->cpu_addr.start();
1951 }
1952 else {
1953 m_vars[i].flags.is_static_dstn = false;
1954 }
1955 m_vars_extra[i].dst_data = ptr_data;
1956 }
1957 }
1958
1959 if (m_vars[i].direction.in &&
1960 !m_vars[i].flags.is_static_dstn) {
1961 m_in_datalen += m_vars[i].size;
1962
1963 // for non-static target destination defined as CEAN
1964 // expression we pass to target its size and dist
1965 if (m_vars[i].type.dst == c_cean_var) {
1966 m_in_datalen += 2 * sizeof(uint64_t);
1967 }
1968 m_need_runfunction = true;
1969 }
1970 break;
1971 }
1972
1973 case c_dv:
1974 if (m_vars[i].direction.bits ||
1975 m_vars[i].alloc_if ||
1976 m_vars[i].free_if) {
1977 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
1978
1979 // debug dump
1980 __dv_desc_dump("INTO", dvp);
1981
1982 // send dope vector contents excluding base
1983 m_in_datalen += m_vars[i].size - sizeof(uint64_t);
1984 m_need_runfunction = true;
1985 }
1986 break;
1987
1988 case c_string_ptr:
1989 case c_data_ptr:
1990 case c_cean_var_ptr:
1991 case c_dv_ptr: {
1992 int64_t size = m_vars[i].size;
1993
1994 if (m_vars[i].type.dst == c_cean_var_ptr) {
1995 // array descriptor
2eab9666
IV
1996 const Arr_Desc *ap =
1997 static_cast<const Arr_Desc*>(m_vars[i].into);
5f520819
KY
1998
1999 // debug dump
2eab9666 2000 ARRAY_DESC_DUMP(" ", "INTO", ap, 1, src_is_for_mic);
5f520819
KY
2001
2002 // offset and length are derived from the array descriptor
2003 __arr_data_offset_and_length(ap, into_disp, size);
2004
2005 if (!is_arr_desc_contiguous(ap)) {
2006 m_vars[i].flags.is_noncont_src = 1;
2007 m_vars_extra[i].read_rng_dst =
2008 init_read_ranges_arr_desc(ap);
2009 if (!cean_ranges_match(
2010 m_vars_extra[i].read_rng_src,
2011 m_vars_extra[i].read_rng_dst)) {
2012 LIBOFFLOAD_ERROR(c_ranges_dont_match);
2013 }
2014 }
2015 m_vars[i].into = reinterpret_cast<char**>(ap->base);
2016 }
2017 else if (m_vars[i].type.dst == c_dv_ptr) {
2018 // need to send DV to the device unless it is 'nocopy'
2019 if (m_vars[i].direction.bits ||
2020 m_vars[i].alloc_if ||
2021 m_vars[i].free_if) {
2022 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
2023
2024 // debug dump
2025 __dv_desc_dump("INTO", dvp);
2026
2027 m_vars[i].direction.bits = c_parameter_in;
2028 }
2029 }
2030
2031 int64_t size_src = m_vars_extra[i].read_rng_src ?
2032 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
2033 m_vars[i].size;
2034 int64_t size_dst = m_vars_extra[i].read_rng_dst ?
2035 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
2036 size;
2037 // It's supposed that "into" size must be not less than
2038 // src size
2039 if (size_src > size_dst) {
2040 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
2041 size_src, size_dst);
2042 exit(1);
2043 }
2044
2045 if (m_vars[i].direction.bits) {
2046 PtrData *ptr_data;
2047
2048 // base address
2049 void *base = *static_cast<void**>(m_vars[i].into);
2050
2051 if (m_vars[i].direction.in) {
2052 // allocate buffer
2053 if (m_vars[i].flags.is_stack_buf) {
2054 // for stack persistent objects ptr data is created
2055 // by var_desc with number 0.
2056 // Its ptr_data is stored at m_stack_ptr_data
2057 ptr_data = m_stack_ptr_data;
2058 m_vars[i].flags.sink_addr = 1;
2059 }
2060 else if (m_vars[i].alloc_if) {
2eab9666
IV
2061 if (m_vars[i].flags.preallocated) {
2062 m_out_datalen += sizeof(void*);
2063 m_need_runfunction = true;
2064 break;
2065 }
5f520819
KY
2066 // add new entry
2067 if (!alloc_ptr_data(
2068 ptr_data,
2eab9666 2069 reinterpret_cast<char *>(base) + alloc_disp,
5f520819
KY
2070 (alloc_base != NULL) ?
2071 alloc_disp : into_disp,
2072 (alloc_base != NULL) ?
2073 alloc_size : size,
2074 alloc_disp,
2075 (alloc_base != NULL) ?
2eab9666
IV
2076 0 : m_vars[i].align,
2077 m_vars[i].flags.targetptr,
2078 m_vars[i].flags.preallocated,
2079 m_vars[i].flags.pin)) {
5f520819
KY
2080 return false;
2081 }
2eab9666
IV
2082 if (m_vars[i].flags.targetptr) {
2083 if (!init_mic_address(ptr_data)) {
2084 return false;
2085 }
2086 *static_cast<void**>(m_vars[i].into) = base =
2087 reinterpret_cast<void*>(ptr_data->mic_addr);
2088 }
5f520819
KY
2089 if (ptr_data->add_reference() == 0 &&
2090 ptr_data->mic_buf != 0) {
2091 // add buffer to the list of buffers that
2092 // are passed to dispatch call
2093 m_compute_buffers.push_back(
2094 ptr_data->mic_buf);
2095 }
2096 else {
2097 // will send buffer address to device
2098 m_vars[i].flags.sink_addr = 1;
2099 }
2100
2101 if (!ptr_data->is_static) {
2102 // need to add reference for buffer
2103 m_need_runfunction = true;
2104 }
2105 }
2106 else {
2107 // use existing association from pointer table
2eab9666
IV
2108 if (!find_ptr_data(ptr_data, base, into_disp,
2109 size, m_vars[i].flags.targetptr, true)) {
5f520819
KY
2110 return false;
2111 }
2112 m_vars[i].flags.sink_addr = 1;
2113 }
2114
2115 if (ptr_data->alloc_disp != 0) {
2116 m_vars[i].flags.alloc_disp = 1;
2117 m_in_datalen += sizeof(alloc_disp);
2118 }
2119
2120 if (m_vars[i].flags.sink_addr) {
2121 // get buffers's address on the sink
2122 if (!init_mic_address(ptr_data)) {
2123 return false;
2124 }
2125
2126 m_in_datalen += sizeof(ptr_data->mic_addr);
2127 }
2128
2129 if (!ptr_data->is_static && m_vars[i].free_if) {
2130 // need to decrement buffer reference on target
2131 m_need_runfunction = true;
2132 }
2133
2134 // copy other pointer properties to var descriptor
2135 m_vars[i].mic_offset = ptr_data->mic_offset;
2136 m_vars[i].flags.is_static_dstn = ptr_data->is_static;
2137 }
2138 else {
2139 if (!find_ptr_data(ptr_data,
2140 base,
2141 into_disp,
2142 m_vars[i].size,
2eab9666 2143 false, false)) {
5f520819
KY
2144 return false;
2145 }
2146 }
2147 if (ptr_data) {
2148 into_offset = ptr_data ?
2149 (char*) base -
2150 (char*) ptr_data->cpu_addr.start() :
2151 0;
2152 }
2153 // save pointer data
2154 m_vars_extra[i].dst_data = ptr_data;
2155 }
2156 break;
2157 }
2158
2159 case c_func_ptr:
2160 break;
2161
2162 case c_dv_data:
2163 case c_dv_ptr_data:
2164 case c_dv_data_slice:
2165 case c_dv_ptr_data_slice:
2166 if (m_vars[i].direction.bits ||
2167 m_vars[i].alloc_if ||
2168 m_vars[i].free_if) {
2eab9666 2169 const Arr_Desc *ap;
5f520819
KY
2170 ArrDesc *dvp;
2171 PtrData *ptr_data;
2172 int64_t disp;
2173 int64_t size;
2174
2175 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
2eab9666 2176 ap = static_cast<const Arr_Desc*>(m_vars[i].into);
5f520819
KY
2177
2178 // debug dump
2eab9666 2179 ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic);
5f520819
KY
2180
2181 dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
2182 reinterpret_cast<ArrDesc*>(ap->base) :
2183 *reinterpret_cast<ArrDesc**>(ap->base);
2184 }
2185 else {
2186 dvp = (m_vars[i].type.dst == c_dv_data) ?
2187 static_cast<ArrDesc*>(m_vars[i].into) :
2188 *static_cast<ArrDesc**>(m_vars[i].into);
2189 }
2190 if (!__dv_is_contiguous(dvp)) {
2191 m_vars[i].flags.is_noncont_dst = 1;
2192 m_vars_extra[i].read_rng_dst =
2193 init_read_ranges_dv(dvp);
2194 }
2195 // size and displacement
2196 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
2197 // offset and length are derived from the array
2198 // descriptor
2199 __arr_data_offset_and_length(ap, into_disp, size);
2200 if (m_vars[i].direction.bits) {
2201 if (!is_arr_desc_contiguous(ap)) {
2202 if (m_vars[i].flags.is_noncont_dst) {
2203 LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
2204 return false;
2205 }
2206 m_vars[i].flags.is_noncont_dst = 1;
2207 m_vars_extra[i].read_rng_dst =
2208 init_read_ranges_arr_desc(ap);
2209 if (!cean_ranges_match(
2210 m_vars_extra[i].read_rng_src,
2211 m_vars_extra[i].read_rng_dst)) {
2212 LIBOFFLOAD_ERROR(c_ranges_dont_match);
2213 }
2214 }
2215 }
2216 }
2217 else {
2218 if (m_vars[i].flags.has_length) {
2219 size = __dv_data_length(dvp, m_vars[i].count);
2220 }
2221 else {
2222 size = __dv_data_length(dvp);
2223 }
2224 disp = 0;
2225 }
2226
2227 int64_t size_src =
2228 m_vars_extra[i].read_rng_src ?
2229 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
2230 m_vars[i].size;
2231 int64_t size_dst =
2232 m_vars_extra[i].read_rng_dst ?
2233 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
2234 size;
2235 // It's supposed that "into" size must be not less
2236 // than src size
2237 if (size_src > size_dst) {
2238 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
2239 size_src, size_dst);
2240 exit(1);
2241 }
2242
2243 // base address
2244 void *base = reinterpret_cast<void*>(dvp->Base);
2245
2246 // allocate buffer
2247 if (m_vars[i].direction.in) {
2248 if (m_vars[i].alloc_if) {
2249 // add new entry
2250 if (!alloc_ptr_data(
2251 ptr_data,
2eab9666 2252 reinterpret_cast<char *>(base) + alloc_disp,
5f520819
KY
2253 (alloc_base != NULL) ?
2254 alloc_disp : into_disp,
2255 (alloc_base != NULL) ?
2256 alloc_size : size,
2257 alloc_disp,
2258 (alloc_base != NULL) ?
2eab9666
IV
2259 0 : m_vars[i].align,
2260 m_vars[i].flags.targetptr,
2261 m_vars[i].flags.preallocated,
2262 m_vars[i].flags.pin)) {
5f520819
KY
2263 return false;
2264 }
2265 if (ptr_data->add_reference() == 0 &&
2266 ptr_data->mic_buf !=0) {
2267 // add buffer to the list of buffers
2268 // that are passed to dispatch call
2269 m_compute_buffers.push_back(
2270 ptr_data->mic_buf);
2271 }
2272 else {
2273 // will send buffer address to device
2274 m_vars[i].flags.sink_addr = 1;
2275 }
2276
2277 if (!ptr_data->is_static) {
2278 // need to add reference for buffer
2279 m_need_runfunction = true;
2280 }
2281 }
2282 else {
2283 // use existing association from pointer table
2eab9666
IV
2284 if (!find_ptr_data(ptr_data, base, into_disp,
2285 size, m_vars[i].flags.targetptr, true)) {
5f520819
KY
2286 return false;
2287 }
2288
2289 // need to update base in dope vector on device
2290 m_vars[i].flags.sink_addr = 1;
2291 }
2292
2293 if (ptr_data->alloc_disp != 0) {
2294 m_vars[i].flags.alloc_disp = 1;
2295 m_in_datalen += sizeof(alloc_disp);
2296 }
2297
2298 if (m_vars[i].flags.sink_addr) {
2299 // get buffers's address on the sink
2300 if (!init_mic_address(ptr_data)) {
2301 return false;
2302 }
2303 m_in_datalen += sizeof(ptr_data->mic_addr);
2304 }
2305
2306 if (!ptr_data->is_static && m_vars[i].free_if) {
2307 // need to decrement buffer reference on target
2308 m_need_runfunction = true;
2309 }
2310
2311 // offset to base from the beginning of the buffer
2312 // memory
2313 into_offset =
2314 (char*) base - (char*) ptr_data->cpu_addr.start();
2315
2316 // copy other pointer properties to var descriptor
2317 m_vars[i].mic_offset = ptr_data->mic_offset;
2318 m_vars[i].flags.is_static_dstn = ptr_data->is_static;
2319 }
2320 else { // src_is_for_mic
2321 if (!find_ptr_data(ptr_data,
2322 base,
2323 into_disp,
2324 size,
2eab9666 2325 false, false)) {
5f520819
KY
2326 return false;
2327 }
2328 into_offset = !ptr_data ?
2329 0 :
2330 (char*) base - (char*) ptr_data->cpu_addr.start();
2331 }
2332
2333 // save pointer data
2334 m_vars_extra[i].dst_data = ptr_data;
2335 }
2336 break;
2337
2338 default:
2339 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
2340 LIBOFFLOAD_ABORT;
2341 }
2342 // if into is used at CPU save its offset and disp
2343 if (m_vars[i].direction.out) {
2344 m_vars_extra[i].cpu_offset = into_offset;
2345 m_vars_extra[i].cpu_disp = into_disp;
2346 }
2347 else {
2348 if (m_vars[i].flags.is_stack_buf) {
2349 into_offset = static_cast<char*>(m_vars[i].into) -
2350 m_device.m_persist_list.front().cpu_stack_addr;
2351 }
2352 m_vars[i].offset = into_offset;
2353 m_vars[i].disp = into_disp;
2354 }
2355 }
2356
2357 return true;
2358}
2359
2360bool OffloadDescriptor::setup_misc_data(const char *name)
2361{
2362 OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
2363
2364 // we can skip run functon call together with wait if offloaded
2365 // region is empty and there is no user defined non-pointer IN/OUT data
2366 if (m_need_runfunction) {
2367 // variable descriptors are sent as input data
2368 m_in_datalen += m_vars_total * sizeof(VarDesc);
2369
2370 // timer data is sent as a part of the output data
2371 m_out_datalen += OFFLOAD_TIMER_DATALEN();
2372
2373 // max from input data and output data length
2374 uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
2375 m_out_datalen;
2376
2377 // Misc data has the following layout
2378 // <Function Descriptor>
2379 // <Function Name>
2380 // <In/Out Data> (optional)
2381 //
2382 // We can transfer copyin/copyout data in misc/return data which can
2383 // be passed to run function call if its size does not exceed
2384 // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
2385 // buffer for it.
2386
2387 m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
2388 m_func_desc_size = (m_func_desc_size + 7) & ~7;
2389
2390 int misc_data_offset = 0;
2391 int misc_data_size = 0;
2392 if (data_len > 0) {
2393 if (m_func_desc_size +
2394 m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
2395 m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
2396 // use misc/return data for copyin/copyout
2397 misc_data_offset = m_func_desc_size;
2398 misc_data_size = data_len;
2399 }
2400 else {
2401 OffloadTimer timer_buf(get_timer_data(),
2402 c_offload_host_alloc_data_buffer);
2403
2404 // send/receive data using buffer
2405 COIRESULT res = COI::BufferCreate(data_len,
2406 COI_BUFFER_NORMAL,
2407 0, 0,
2408 1, &m_device.get_process(),
2409 &m_inout_buf);
2410 if (res != COI_SUCCESS) {
2411 if (m_status != 0) {
2412 m_status->result = translate_coi_error(res);
2413 return false;
2414 }
2415 report_coi_error(c_buf_create, res);
2416 }
2417
2418 m_compute_buffers.push_back(m_inout_buf);
2419 m_destroy_buffers.push_back(m_inout_buf);
2420 }
2421 }
2422
2423 // initialize function descriptor
0b7c37ee
IV
2424 m_func_desc = (FunctionDescriptor*) calloc(1, m_func_desc_size
2425 + misc_data_size);
5f520819
KY
2426 if (m_func_desc == NULL)
2427 LIBOFFLOAD_ERROR(c_malloc);
2428 m_func_desc->console_enabled = console_enabled;
2eab9666
IV
2429 m_func_desc->timer_enabled = offload_report_enabled &&
2430 (timer_enabled || offload_report_level);
2431 m_func_desc->offload_report_level = offload_report_enabled ?
2432 offload_report_level : 0;
5f520819
KY
2433 m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
2434 m_func_desc->in_datalen = m_in_datalen;
2435 m_func_desc->out_datalen = m_out_datalen;
2436 m_func_desc->vars_num = m_vars_total;
2437 m_func_desc->data_offset = misc_data_offset;
2438
2439 // append entry name
2440 strcpy(m_func_desc->data, name);
2441 }
2442
2443 return true;
2444}
2445
2eab9666
IV
2446void OffloadDescriptor::setup_omp_async_info()
2447{
2448 OFFLOAD_TRACE(2, "setup_omp_async_info\n");
2449 OmpAsyncLastEventType event_type = m_need_runfunction ?
2450 c_last_runfunc : c_last_write;
2451 int last_in = m_need_runfunction ? 0 : -1;
2452 int i;
2453
2454 for (i = m_vars_total - 1; i >=0; i--) {
2455 switch (m_vars[i].type.dst) {
2456 case c_data:
2457 case c_void_ptr:
2458 case c_cean_var:
2459 if (m_vars[i].direction.out &&
2460 m_vars[i].flags.is_static_dstn) {
2461 event_type = c_last_read;
2462 }
2463 else if (last_in < 0 && m_vars[i].direction.in &&
2464 m_vars[i].flags.is_static_dstn) {
2465 last_in = i;
2466 }
2467 break;
2468 case c_string_ptr:
2469 case c_data_ptr:
2470 case c_cean_var_ptr:
2471 case c_dv_ptr:
2472 case c_dv_data:
2473 case c_dv_ptr_data:
2474 case c_dv_data_slice:
2475 case c_dv_ptr_data_slice:
2476
2477 if (m_vars[i].direction.out) {
2478 event_type = c_last_read;
2479 }
2480 else if (last_in < 0 && m_vars[i].direction.in) {
2481 last_in = i;
2482 }
2483 break;
2484 default:
2485 break;
2486 }
2487 if (event_type == c_last_read) {
2488 break;
2489 }
2490 }
2491
2492 if (event_type == c_last_read) {
2493 m_vars_extra[i].omp_last_event_type = c_last_read;
2494 }
2495 else if (event_type == c_last_write) {
2496 m_vars_extra[last_in].omp_last_event_type = c_last_write;
2497 }
2498 m_omp_async_last_event_type = event_type;
2499 OFFLOAD_TRACE(2, "setup_omp_async_info: event_type=%d\n",
2500 m_omp_async_last_event_type);
2501}
2502
2503extern "C" {
2504 void offload_proxy_task_completed_ooo(
2505 COIEVENT e,
2506 const COIRESULT r,
2507 const void *info
2508 )
2509 {
2510 /* TODO: Call callback function, pass info. */
2511 }
2512}
2513
2514void OffloadDescriptor::register_omp_event_call_back(
2515 const COIEVENT *event,
2516 const void *info)
2517{
2518 OFFLOAD_TRACE(2, "register_omp_event_call_back(event=%p, info=%p)\n",
2519 event, info);
2520 if (COI::EventRegisterCallback) {
2521 COI::EventRegisterCallback(
2522 *event,
2523 &offload_proxy_task_completed_ooo,
2524 info, 0);
2525 OFFLOAD_TRACE(2,
2526 "COI::EventRegisterCallback found; callback registered\n");
2527 }
2528}
2529
5f520819 2530bool OffloadDescriptor::wait_dependencies(
2eab9666
IV
2531 const void **waits,
2532 int num_waits,
2533 _Offload_stream handle
5f520819
KY
2534)
2535{
2536 OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
2537 bool ret = true;
2eab9666
IV
2538 OffloadDescriptor *task;
2539 if (num_waits == 0) {
2540 return true;
2541 }
5f520819 2542
2eab9666
IV
2543 // wait for streams
2544 if (num_waits == -1) {
2545 Stream * stream;
2546 // some specific stream of the device
2547 if (handle != 0) {
2548 stream = Stream::find_stream(handle, false);
5f520819 2549
2eab9666
IV
2550 // the stream was not created or was destroyed
2551 if (!stream) {
2552 LIBOFFLOAD_ERROR(c_offload_no_stream, m_device.get_logical_index());
2553 LIBOFFLOAD_ABORT;
2554 }
2555 task = stream->get_last_offload();
5f520819 2556
2eab9666
IV
2557 // offload was completed by previous offload_wait pragma
2558 // or wait clause
2559 if (task == 0) {
2560 return true;
2561 }
2562 if (!task->offload_finish(0)) { //arg is 0 for is_traceback
2563 ret = false;
2564 }
2565 task->cleanup();
2566 stream->set_last_offload(NULL);
2567 delete task;
5f520819 2568 }
2eab9666
IV
2569 // all streams of the device or over all devices
2570 else {
2571 StreamMap stream_map = Stream::all_streams;
2572 for (StreamMap::iterator it = stream_map.begin();
2573 it != stream_map.end(); it++) {
2574 Stream * stream = it->second;
5f520819 2575
2eab9666
IV
2576 if (!m_wait_all_devices &&
2577 stream->get_device() != m_device.get_logical_index()) {
2578 continue;
2579 }
2580 // get associated async task
2581 OffloadDescriptor *task = stream->get_last_offload();
5f520819 2582
2eab9666
IV
2583 // offload was completed by offload_wait pragma or wait clause
2584 if (task == 0) {
2585 continue;
2586 }
2587 if (!task->offload_finish(0)) { //arg is 0 for is_traceback
2588 ret = false;
2589 }
2590 task->cleanup();
2591 stream->set_last_offload(NULL);
2592 delete task;
2593 }
2594 // no uncompleted streams
2595 return true;
2596 }
2597 }
2598 else {
2599 // if handle is equal to no_stream it's wait for signals
2600 for (int i = 0; i < num_waits; i++) {
2601 _Offload_stream stream_handle;
2602 Stream *stream;
2603 task = m_device.find_signal(waits[i], true);
2604 if (task == 0) {
2605 LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
2606 waits[i]);
2607 LIBOFFLOAD_ABORT;
2608 }
2609 else if (task == SIGNAL_IS_REMOVED) {
2610 continue;
2611 }
2612 if (!task->offload_finish(0)) { //arg is 0 for is_traceback
2613 ret = false;
2614 }
2615 task->cleanup();
2616 // if the offload both has signal and is last offload of its
2617 // stream, we must wipe out the "last_offload" reference as
2618 // the offload already is finished.
2619 stream_handle = task->m_stream;
2620 if (stream_handle != -1) {
2621 stream = Stream::find_stream(stream_handle, false);
2622 if (stream && stream->get_last_offload() == task) {
2623 stream->set_last_offload(NULL);
2624 }
2625 }
2626 delete task;
2627 }
2628 }
5f520819
KY
2629 return ret;
2630}
2631
2eab9666 2632bool OffloadDescriptor::offload_wrap(
5f520819
KY
2633 const char *name,
2634 bool is_empty,
2635 VarDesc *vars,
2636 VarDesc2 *vars2,
2637 int vars_total,
2638 const void **waits,
2639 int num_waits,
2640 const void **signal,
2641 int entry_id,
2eab9666
IV
2642 const void *stack_addr,
2643 OffloadFlags offload_flags
5f520819
KY
2644)
2645{
2eab9666
IV
2646 OffloadWaitKind wait_kind = c_offload_wait_signal;
2647 bool is_traceback = offload_flags.bits.fortran_traceback;
2648
2649 // define kind of wait if any;
2650 // there can be one off the following kind:
2651 // 1. c_offload_wait_signal for "offload_wait wait(signal)"
2652 // 2. c_offload_wait_stream for "offload_wait stream(stream)"
2653 // 3. c_offload_wait_all_streams for "offload_wait stream(0)"
2654 if (num_waits == -1) {
2655 wait_kind = (m_stream == 0) ?
2656 c_offload_wait_all_streams :
2657 c_offload_wait_stream;
2658 }
2659 char buf[35];
2660 const char *stream_str;
2661
2662 if (m_stream == no_stream || num_waits >= 0) {
2663 stream_str = "none";
2664 }
2665 else if (m_stream == 0) {
2666 stream_str = "all";
2667 }
2668 else {
2669 sprintf(buf, "%#llx", m_stream);
2670 stream_str = buf;
2671 }
2672
5f520819
KY
2673 if (signal == 0) {
2674 OFFLOAD_DEBUG_TRACE_1(1,
2675 GET_OFFLOAD_NUMBER(get_timer_data()),
2676 c_offload_init_func,
2677 "Offload function %s, is_empty=%d, #varDescs=%d, "
2eab9666
IV
2678 "signal=none, stream=%s, #waits=%d%c",
2679 name, is_empty, vars_total, stream_str, num_waits,
2680 num_waits == 0 ? '\n' : ' ');
2681 // Breaks the norm of using OFFLOAD_DEBUG_TRACE to print the waits
2682 // since the number of waits is not fixed.
2683 if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
2684 if (num_waits) {
2685 printf("(");
2686 if (m_stream == no_stream) {
2687 printf("%p", waits[0]);
2688 for (int i = 1; i < num_waits; i++) {
2689 printf(", %p", waits[i]);
2690 }
2691 }
2692 else if (m_stream != 0) {
2693 printf("%#x", m_stream);
2694 }
2695 else {
2696 printf(" all streams");
2697 }
2698 printf(")");
2699 }
2700 printf("\n");
2701 fflush(NULL);
2702 }
2703 // stream in wait is reported further in OFFLOAD_REPORT for waits
2704 if (m_stream != no_stream && num_waits == 0) {
2705 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2706 c_offload_stream,
2707 "%d\n", m_stream);
2708 }
5f520819
KY
2709 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2710 c_offload_signal,
2711 "none %d\n", 0);
2712 }
2713 else {
2714 OFFLOAD_DEBUG_TRACE_1(1,
2715 GET_OFFLOAD_NUMBER(get_timer_data()),
2716 c_offload_init_func,
2717 "Offload function %s, is_empty=%d, #varDescs=%d, "
2eab9666
IV
2718 "signal=%p, stream=%s, #waits=%d%c",
2719 name, is_empty, vars_total, *signal, stream_str, num_waits,
2720 num_waits == 0 ? '\n' : ' ');
2721 // Breaks the norm of using OFFLOAD_DEBUG_TRACE to print the waits
2722 // since the number of waits is not fixed.
2723 if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
2724 if (num_waits) {
2725 printf("(");
2726 if (m_stream == no_stream) {
2727 printf("%p", waits[0]);
2728 for (int i = 1; i < num_waits; i++) {
2729 printf(", %p", waits[i]);
2730 }
2731 printf(")");
2732 }
2733 else if (m_stream != 0) {
2734 printf("%#x", m_stream);
2735 }
2736 else {
2737 printf(" all streams");
2738 }
2739 printf(")");
2740 }
2741 printf("\n");
2742 fflush(NULL);
2743 }
2744 // stream in wait is reported further in OFFLOAD_REPORT for waits
2745 if (m_stream != no_stream && num_waits == 0) {
2746 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2747 c_offload_stream,
2748 "%d\n", m_stream);
2749 }
5f520819
KY
2750 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2751 c_offload_signal,
2752 "%d\n", signal);
2753 }
2eab9666
IV
2754 if (console_enabled >= 1 && offload_flags.flags != 0) {
2755 trace_offload_flags(get_timer_data(), offload_flags);
2756 }
2757
5f520819 2758 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2eab9666
IV
2759 c_offload_wait, "%d\n",
2760 wait_kind, num_waits,
2761 (wait_kind == c_offload_wait_signal) ?
2762 waits :
2763 reinterpret_cast<const void **>(m_stream));
5f520819
KY
2764
2765 if (m_status != 0) {
2766 m_status->result = OFFLOAD_SUCCESS;
2767 m_status->device_number = m_device.get_logical_index();
2768 }
2769
2eab9666 2770 m_initial_need_runfunction = m_need_runfunction = !is_empty;
5f520819
KY
2771
2772 // wait for dependencies to finish
2eab9666 2773 if (!wait_dependencies(waits, num_waits, m_stream)) {
5f520819
KY
2774 cleanup();
2775 return false;
2776 }
2777
2778 // setup buffers
2779 if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
2780 cleanup();
2781 return false;
2782 }
2783
2eab9666
IV
2784 if (offload_flags.bits.omp_async) {
2785 setup_omp_async_info();
2786 }
2787
5f520819 2788 // initiate send for pointers. Want to do it as early as possible.
2eab9666
IV
2789 if (!send_pointer_data(signal != 0 || offload_flags.bits.omp_async,
2790 signal)) {
5f520819
KY
2791 cleanup();
2792 return false;
2793 }
2794
2795 // setup misc data for run function
2796 if (!setup_misc_data(name)) {
2797 cleanup();
2798 return false;
2799 }
2800
2801 // gather copyin data into buffer
2802 if (!gather_copyin_data()) {
2803 cleanup();
2804 return false;
2805 }
2806
2807 // Start the computation
2eab9666 2808 if (!compute(signal)) {
5f520819
KY
2809 cleanup();
2810 return false;
2811 }
2812
2813 // initiate receive for pointers
2eab9666
IV
2814 if (!receive_pointer_data(signal != 0 || offload_flags.bits.omp_async,
2815 true, signal)) {
5f520819
KY
2816 cleanup();
2817 return false;
2818 }
2eab9666 2819 if (offload_flags.bits.omp_async) {
5f520819
KY
2820 return true;
2821 }
2eab9666
IV
2822 // if there is a signal or stream save descriptor for the later use.
2823 // num_waits == -1 is for offload_wait and there is nothing to save
2824 if (num_waits != -1 && (signal != 0 || m_stream != no_stream)) {
2825 if (signal != 0) {
2826 m_device.add_signal(*signal, this);
2827 }
5f520819 2828
2eab9666
IV
2829 if (m_stream != no_stream && m_stream != 0) {
2830 Stream* stream = Stream::find_stream(m_stream, false);
2831 if (stream) {
2832 stream->set_last_offload(this);
2833 }
2834 else {
2835 LIBOFFLOAD_ERROR(c_offload_no_stream, m_device.get_logical_index());
2836 LIBOFFLOAD_ABORT;
2837 }
2838 }
2839 // if there is a clause with alloc_if(1) and preallocated need to call
2840 // offload_finish after runfunction
2841 if (!m_preallocated_alloc) {
2842 return true;
2843 }
2844 }
2845
2846 // wait for the offload to finish.
2847 if (!offload_finish(is_traceback)) {
2848 cleanup();
2849 return false;
5f520819
KY
2850 }
2851
2852 cleanup();
2853 return true;
2854}
2855
2eab9666
IV
2856bool OffloadDescriptor::offload(
2857 const char *name,
2858 bool is_empty,
2859 VarDesc *vars,
2860 VarDesc2 *vars2,
2861 int vars_total,
2862 const void **waits,
2863 int num_waits,
2864 const void **signal,
2865 int entry_id,
2866 const void *stack_addr,
2867 OffloadFlags offload_flags
2868)
2869{
2870 bool res;
2871 res = offload_wrap(name, is_empty, vars, vars2, vars_total,
2872 waits, num_waits, signal, entry_id,
2873 stack_addr, offload_flags);
2874 if (res == false && !m_traceback_called) {
2875 if (offload_flags.bits.fortran_traceback) {
2876 OFFLOAD_TRACE(3,
2877 "Calling Fortran library to continue traceback from MIC\n");
2878 FORTRAN_TRACE_BACK(m_status->result);
2879 m_traceback_called = true;
2880 }
2881 }
2882 return res;
2883}
2884
2885bool OffloadDescriptor::offload_finish(
2886 bool is_traceback
2887)
5f520819
KY
2888{
2889 COIRESULT res;
2890
2891 // wait for compute dependencies to become signaled
2892 if (m_in_deps_total > 0) {
2893 OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
2894
2895 if (__offload_active_wait) {
2896 // keep CPU busy
2897 do {
2898 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
2899 }
2900 while (res == COI_TIME_OUT_REACHED);
2901 }
2902 else {
2903 res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
2904 }
2905
2906 if (res != COI_SUCCESS) {
2eab9666 2907 if (m_status != 0 && !m_traceback_called) {
5f520819 2908 m_status->result = translate_coi_error(res);
2eab9666
IV
2909 if (is_traceback) {
2910 OFFLOAD_TRACE(3,
2911 "Calling Fortran library to continue traceback from MIC\n");
2912 FORTRAN_TRACE_BACK(m_status->result);
2913 m_traceback_called = true;
2914 }
5f520819
KY
2915 return false;
2916 }
2eab9666
IV
2917
2918 if (is_traceback && !m_traceback_called) {
2919 OFFLOAD_TRACE(3,
2920 "Calling Fortran library to continue traceback from MIC\n");
2921 FORTRAN_TRACE_BACK(OFFLOAD_ERROR);
2922 m_traceback_called = true;
2923 }
2924
5f520819
KY
2925 report_coi_error(c_event_wait, res);
2926 }
2927 }
2928
2929 // scatter copyout data received from target
2930 if (!scatter_copyout_data()) {
2931 return false;
2932 }
2eab9666
IV
2933
2934 if (m_out_with_preallocated &&
2935 !receive_pointer_data(m_out_deps_total > 0, false, NULL)) {
2936 cleanup();
2937 return false;
2938 }
2939
5f520819
KY
2940 // wait for receive dependencies to become signaled
2941 if (m_out_deps_total > 0) {
2942 OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
2943
2944 if (__offload_active_wait) {
2945 // keep CPU busy
2946 do {
2947 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
2948 }
2949 while (res == COI_TIME_OUT_REACHED);
2950 }
2951 else {
2952 res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
2953 }
2954
2955 if (res != COI_SUCCESS) {
2956 if (m_status != 0) {
2957 m_status->result = translate_coi_error(res);
2958 return false;
2959 }
2960 report_coi_error(c_event_wait, res);
2961 }
2962 }
2963
2964 // destroy buffers
2965 {
2966 OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
2967
2968 for (BufferList::const_iterator it = m_destroy_buffers.begin();
2969 it != m_destroy_buffers.end(); it++) {
2970 res = COI::BufferDestroy(*it);
2971 if (res != COI_SUCCESS) {
2972 if (m_status != 0) {
2973 m_status->result = translate_coi_error(res);
2974 return false;
2975 }
2976 report_coi_error(c_buf_destroy, res);
2977 }
2978 }
2979 }
2980
2981 return true;
2982}
2983
2984void OffloadDescriptor::cleanup()
2985{
2986 // release device in orsl
2987 ORSL::release(m_device.get_logical_index());
2988
2989 OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
2990
2991 // report stuff
2992 Offload_Report_Epilog(get_timer_data());
2993}
2994
2995bool OffloadDescriptor::is_signaled()
2996{
2997 bool signaled = true;
2998 COIRESULT res;
2999
3000 // check compute and receive dependencies
3001 if (m_in_deps_total > 0) {
3002 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
3003 signaled = signaled && (res == COI_SUCCESS);
3004 }
3005 if (m_out_deps_total > 0) {
3006 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
3007 signaled = signaled && (res == COI_SUCCESS);
3008 }
3009
3010 return signaled;
3011}
3012
2eab9666
IV
3013static Arr_Desc * make_arr_desc(
3014 void* ptr_val,
3015 int64_t extent_start_val,
3016 int64_t extent_elements_val,
3017 int64_t size
3018)
3019{
3020 Arr_Desc *res;
3021 res = (Arr_Desc *)malloc(sizeof(Arr_Desc));
3022 if (res == NULL)
3023 LIBOFFLOAD_ERROR(c_malloc);
3024 res->base = reinterpret_cast<int64_t>(ptr_val);
3025 res->rank = 1;
3026 res->dim[0].size = size;
3027 res->dim[0].lindex = 0;
3028 res->dim[0].lower = extent_start_val;
3029 res->dim[0].upper = extent_elements_val + extent_start_val - 1;
3030 res->dim[0].stride = 1;
3031 return res;
3032}
3033
5f520819
KY
3034// Send pointer data if source or destination or both of them are
3035// noncontiguous. There is guarantee that length of destination enough for
2eab9666 3036// transferred data.
5f520819
KY
3037bool OffloadDescriptor::send_noncontiguous_pointer_data(
3038 int i,
3039 PtrData* src_data,
3040 PtrData* dst_data,
2eab9666
IV
3041 COIEVENT *event,
3042 uint64_t &data_sent,
3043 uint32_t in_deps_amount,
3044 COIEVENT *in_deps
5f520819
KY
3045 )
3046{
3047 int64_t offset_src, offset_dst;
3048 int64_t length_src, length_dst;
3049 int64_t length_src_cur, length_dst_cur;
2eab9666 3050 int64_t send_size;
5f520819
KY
3051 COIRESULT res;
3052 bool dst_is_empty = true;
3053 bool src_is_empty = true;
3054
2eab9666
IV
3055 data_sent = 0;
3056
5f520819
KY
3057 // Set length_src and length_dst
3058 length_src = (m_vars_extra[i].read_rng_src) ?
3059 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
3060 length_dst = !m_vars[i].into ? length_src :
3061 (m_vars_extra[i].read_rng_dst) ?
3062 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
3063 send_size = (length_src < length_dst) ? length_src : length_dst;
3064
2eab9666
IV
3065 // If BufferWriteMultiD is defined we can set values of required arguments
3066 // and transfer noncontiguous data via call to the COI routine.
3067 if (__offload_use_coi_noncontiguous_transfer && COI::BufferWriteMultiD) {
3068 struct Arr_Desc* arr_desc_dst;
3069 struct Arr_Desc* arr_desc_src;
3070 int64_t size_src, size_dst;
3071 char *base = offload_get_src_base(static_cast<char*>(m_vars[i].ptr),
3072 m_vars[i].type.src);
3073 COIBUFFER dst_buf = m_vars[i].into ?
3074 m_vars_extra[i].dst_data->mic_buf :
3075 m_vars_extra[i].src_data->mic_buf;
3076
3077 offset_src = (m_vars_extra[i].read_rng_src)?
3078 m_vars_extra[i].read_rng_src->init_offset : m_vars_extra[i].cpu_disp;
3079 size_src = m_vars_extra[i].read_rng_src ?
3080 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
3081 m_vars[i].size;
3082
3083 offset_dst = (m_vars_extra[i].read_rng_dst)?
3084 m_vars_extra[i].read_rng_dst->init_offset : m_vars[i].disp;
3085 size_dst = m_vars_extra[i].read_rng_dst ?
3086 cean_get_transf_size(m_vars_extra[i].read_rng_dst) : m_vars[i].size;
3087
3088 int64_t el_size = (!m_vars[i].into ||
3089 (m_vars_extra[i].read_rng_src && m_vars_extra[i].read_rng_dst)) ?
3090 1 :
3091 m_vars_extra[i].read_rng_src ?
3092 m_vars_extra[i].read_rng_src->arr_desc->dim[
3093 m_vars_extra[i].read_rng_src->arr_desc->rank - 1].size :
3094 m_vars_extra[i].read_rng_dst->arr_desc->dim[
3095 m_vars_extra[i].read_rng_dst->arr_desc->rank - 1].size;
3096
3097 arr_desc_src = (m_vars_extra[i].read_rng_src) ?
3098 m_vars_extra[i].read_rng_src->arr_desc :
3099 make_arr_desc(NULL, // don't required for source
3100 offset_src/el_size, size_src/el_size, el_size);
3101
3102 arr_desc_dst = !m_vars[i].into ?
3103 arr_desc_src :
3104 (m_vars_extra[i].read_rng_dst) ?
3105 m_vars_extra[i].read_rng_dst->arr_desc :
3106 make_arr_desc(NULL,
3107 offset_dst/el_size, size_src/el_size, el_size);
3108
3109 int64_t alloc_disp = m_vars[i].into ?
3110 m_vars_extra[i].dst_data->alloc_disp :
3111 m_vars_extra[i].src_data->alloc_disp;
3112
3113 arr_desc_src->base = reinterpret_cast<int64_t>(base);
3114 arr_desc_dst->base = 0;
3115
3116 res = COI::BufferWriteMultiD(
3117 dst_buf, // in_DestBuffer,
3118 m_device.get_process(), // DestProcess,
3119 m_vars[i].offset + m_vars[i].mic_offset -
3120 alloc_disp, // Offset
3121 (void*)arr_desc_dst, // descriptor of DestArray
3122 (void*)arr_desc_src, // descriptor of SrcArray
3123 COI_COPY_UNSPECIFIED, // Type
3124 in_deps_amount, // Number of in Dependencies
3125 in_deps, // array of in Dependencies
3126 event); // out Dependency
3127 if (res != COI_SUCCESS) {
3128 if (m_status != 0) {
3129 m_status->result = translate_coi_error(res);
3130 return false;
3131 }
3132 report_coi_error(c_buf_copy, res);
3133 }
3134 return(true);
3135 }
3136
3137 // if event is defined we must multiplate it for all contiguous intervals
3138 // that will be Copied/Write.
3139 // Take in account that we already have 1 event.
3140 if (event) {
3141 m_in_deps_allocated += (length_src / send_size) *
3142 ((m_vars_extra[i].read_rng_src) ?
3143 m_vars_extra[i].read_rng_src->range_max_number : 1) ;
3144 m_in_deps =
3145 (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * m_in_deps_allocated);
3146 m_in_deps_total--;
3147 }
3148
5f520819
KY
3149 // consequently get contiguous ranges,
3150 // define corresponded destination offset and send data
3151 do {
3152 if (src_is_empty) {
3153 if (m_vars_extra[i].read_rng_src) {
3154 if (!get_next_range(m_vars_extra[i].read_rng_src,
3155 &offset_src)) {
3156 // source ranges are over - nothing to send
3157 break;
3158 }
3159 }
3160 else if (data_sent == 0) {
3161 offset_src = m_vars_extra[i].cpu_disp;
3162 }
3163 else {
3164 break;
3165 }
3166 length_src_cur = length_src;
3167 }
3168 else {
3169 // if source is contiguous or its contiguous range is greater
3170 // than destination one
3171 offset_src += send_size;
3172 }
3173 length_src_cur -= send_size;
3174 src_is_empty = length_src_cur == 0;
3175
3176 if (dst_is_empty) {
3177 if (m_vars[i].into) {
3178 if (m_vars_extra[i].read_rng_dst) {
3179 if (!get_next_range(m_vars_extra[i].read_rng_dst,
3180 &offset_dst)) {
3181 // destination ranges are over
3182 LIBOFFLOAD_ERROR(c_destination_is_over);
3183 return false;
3184 }
3185 }
3186 // into is contiguous.
3187 else {
3188 offset_dst = m_vars[i].disp;
3189 }
3190 length_dst_cur = length_dst;
3191 }
3192 // same as source
3193 else {
3194 offset_dst = offset_src;
3195 length_dst_cur = length_src;
3196 }
3197 }
3198 else {
3199 // if destination is contiguous or its contiguous range is greater
3200 // than source one
3201 offset_dst += send_size;
3202 }
3203 length_dst_cur -= send_size;
3204 dst_is_empty = length_dst_cur == 0;
2eab9666
IV
3205
3206 if (event) {
3207 event = &m_in_deps[m_in_deps_total++];
3208 }
5f520819
KY
3209 if (src_data != 0 && src_data->cpu_buf != 0) {
3210 res = COI::BufferCopy(
3211 dst_data->mic_buf,
3212 src_data->cpu_buf,
2eab9666 3213 m_vars[i].mic_offset +
5f520819
KY
3214 m_vars[i].offset + offset_dst,
3215 m_vars_extra[i].cpu_offset + offset_src,
3216 send_size,
3217 COI_COPY_UNSPECIFIED,
2eab9666 3218 in_deps_amount, in_deps,
5f520819
KY
3219 event);
3220 if (res != COI_SUCCESS) {
3221 if (m_status != 0) {
3222 m_status->result = translate_coi_error(res);
3223 return false;
3224 }
3225 report_coi_error(c_buf_copy, res);
3226 }
3227 }
3228 else {
3229 char *base = offload_get_src_base(m_vars[i].ptr,
3230 m_vars[i].type.src);
3231
3232 res = COI::BufferWrite(
3233 dst_data->mic_buf,
2eab9666 3234 m_vars[i].mic_offset +
5f520819
KY
3235 m_vars[i].offset + offset_dst,
3236 base + offset_src,
3237 send_size,
3238 COI_COPY_UNSPECIFIED,
2eab9666 3239 in_deps_amount, in_deps,
5f520819
KY
3240 event);
3241 if (res != COI_SUCCESS) {
3242 if (m_status != 0) {
3243 m_status->result = translate_coi_error(res);
3244 return false;
3245 }
3246 report_coi_error(c_buf_write, res);
3247 }
3248 }
2eab9666 3249 data_sent += send_size;
5f520819
KY
3250 }
3251 while (true);
3252 return true;
3253}
3254
2eab9666 3255bool OffloadDescriptor::send_pointer_data(bool is_async, void* info)
5f520819
KY
3256{
3257 OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
3258
2eab9666 3259 bool should_use_async_buffer_write = m_initial_need_runfunction;
5f520819
KY
3260 uint64_t ptr_sent = 0;
3261 COIRESULT res;
2eab9666
IV
3262 uint32_t in_deps_amount = 0;
3263 COIEVENT *in_deps = NULL;
3264
3265 // For offload_transfer and offload with empty body without signal:
3266 // - if there is only one buffer copy - send data synchronously
3267 // - if there are multiple buffer copy and
3268 // __offload_parallel_copy is false - send data synchronously
3269 // - if there are multiple buffer copy and
3270 // __offload_parallel_copy is true - send data asynchronously
3271 // It concerns only big size data - greater than __offload_use_async_buffer_write.
3272 // Data of size less than __offload_use_async_buffer_write are sent synchronously.
3273 // Synchronous transfer results in better performance in COI.
3274 // __offload_parallel_copy is false by default but can be changed
3275 // via environment variable OFFLOAD_PARALLEL_COPY
3276 if (!m_initial_need_runfunction && __offload_parallel_copy) {
3277 int big_size_count = 0;
3278 for (int i = 0; i < m_vars_total; i++) {
3279 if (m_vars[i].direction.in &&
3280 m_vars[i].size >= __offload_use_async_buffer_write) {
3281 switch (m_vars[i].type.dst) {
3282 case c_data:
3283 case c_void_ptr:
3284 case c_cean_var:
3285 if (m_vars[i].flags.is_static_dstn) {
3286 big_size_count++;
3287 }
3288 break;
3289 case c_string_ptr:
3290 case c_data_ptr:
3291 case c_cean_var_ptr:
3292 case c_dv_ptr:
3293 case c_dv_data:
3294 case c_dv_ptr_data:
3295 case c_dv_data_slice:
3296 case c_dv_ptr_data_slice:
3297 big_size_count++;
3298 break;
3299 default:
3300 break;
3301 }
3302 }
3303 }
3304 if (big_size_count > 1) {
3305 should_use_async_buffer_write = true;
3306 }
3307 }
3308
3309 if (m_stream != no_stream && m_vars_total != 0) {
3310 get_stream_in_dependencies(in_deps_amount, in_deps);
3311 }
5f520819
KY
3312
3313 // Initiate send for pointer data
3314 for (int i = 0; i < m_vars_total; i++) {
2eab9666
IV
3315 uint64_t sent_data = m_vars[i].size;
3316 uint32_t in_deps_amount_save;
3317 COIEVENT *in_deps_save;
3318
3319 if (m_vars_extra[i].omp_last_event_type == c_last_write) {
3320 in_deps_amount_save = in_deps_amount;
3321 in_deps_save = in_deps;
3322 in_deps_amount = m_in_deps_total;
3323 if (in_deps_amount > 0) {
3324 in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * in_deps_amount);
3325 if (in_deps == NULL)
3326 LIBOFFLOAD_ERROR(c_malloc);
3327 memcpy(in_deps, m_in_deps,in_deps_amount * sizeof(COIEVENT));
3328 }
3329 }
5f520819
KY
3330 switch (m_vars[i].type.dst) {
3331 case c_data_ptr_array:
3332 break;
3333 case c_data:
3334 case c_void_ptr:
3335 case c_cean_var:
3336 if (m_vars[i].direction.in &&
3337 m_vars[i].flags.is_static_dstn) {
3338 COIEVENT *event =
3339 (is_async ||
2eab9666
IV
3340 (should_use_async_buffer_write &&
3341 m_vars[i].size >= __offload_use_async_buffer_write)) ?
5f520819
KY
3342 &m_in_deps[m_in_deps_total++] : 0;
3343 PtrData* dst_data = m_vars[i].into ?
3344 m_vars_extra[i].dst_data :
3345 m_vars_extra[i].src_data;
3346 PtrData* src_data =
3347 VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
3348 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
3349 m_vars[i].flags.is_static ?
3350 m_vars_extra[i].src_data : 0;
3351
3352 if (m_vars[i].flags.is_noncont_src ||
3353 m_vars[i].flags.is_noncont_dst) {
3354 if (!send_noncontiguous_pointer_data(
2eab9666
IV
3355 i, src_data, dst_data, event, sent_data,
3356 in_deps_amount, in_deps)) {
5f520819
KY
3357 return false;
3358 }
3359 }
3360 else if (src_data != 0 && src_data->cpu_buf != 0) {
3361 res = COI::BufferCopy(
3362 dst_data->mic_buf,
3363 src_data->cpu_buf,
2eab9666 3364 m_vars[i].mic_offset +
5f520819
KY
3365 m_vars[i].offset + m_vars[i].disp,
3366 m_vars_extra[i].cpu_offset +
3367 m_vars_extra[i].cpu_disp,
3368 m_vars[i].size,
3369 COI_COPY_UNSPECIFIED,
2eab9666 3370 in_deps_amount, in_deps,
5f520819
KY
3371 event);
3372 if (res != COI_SUCCESS) {
3373 if (m_status != 0) {
3374 m_status->result = translate_coi_error(res);
3375 return false;
3376 }
3377 report_coi_error(c_buf_copy, res);
3378 }
3379 }
3380 else {
3381 char *base = offload_get_src_base(m_vars[i].ptr,
3382 m_vars[i].type.src);
3383 res = COI::BufferWrite(
3384 dst_data->mic_buf,
2eab9666 3385 m_vars[i].mic_offset +
5f520819
KY
3386 m_vars[i].offset + m_vars[i].disp,
3387 base + m_vars_extra[i].cpu_disp,
3388 m_vars[i].size,
3389 COI_COPY_UNSPECIFIED,
2eab9666 3390 in_deps_amount, in_deps,
5f520819
KY
3391 event);
3392 if (res != COI_SUCCESS) {
3393 if (m_status != 0) {
3394 m_status->result = translate_coi_error(res);
3395 return false;
3396 }
3397 report_coi_error(c_buf_write, res);
3398 }
3399 }
2eab9666 3400 ptr_sent += sent_data;
5f520819
KY
3401 }
3402 break;
3403
3404 case c_string_ptr:
3405 case c_data_ptr:
3406 case c_cean_var_ptr:
3407 case c_dv_ptr:
3408 if (m_vars[i].direction.in && m_vars[i].size > 0) {
3409 COIEVENT *event =
3410 (is_async ||
2eab9666
IV
3411 (should_use_async_buffer_write &&
3412 m_vars[i].size >= __offload_use_async_buffer_write)) ?
5f520819
KY
3413 &m_in_deps[m_in_deps_total++] : 0;
3414 PtrData* dst_data = m_vars[i].into ?
3415 m_vars_extra[i].dst_data :
3416 m_vars_extra[i].src_data;
3417 PtrData* src_data =
3418 VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
3419 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
3420 m_vars[i].flags.is_static ?
3421 m_vars_extra[i].src_data : 0;
3422
3423 if (m_vars[i].flags.is_noncont_src ||
3424 m_vars[i].flags.is_noncont_dst) {
3425 send_noncontiguous_pointer_data(
2eab9666
IV
3426 i, src_data, dst_data, event, sent_data,
3427 in_deps_amount, in_deps);
5f520819
KY
3428 }
3429 else if (src_data != 0 && src_data->cpu_buf != 0) {
3430 res = COI::BufferCopy(
3431 dst_data->mic_buf,
3432 src_data->cpu_buf,
2eab9666 3433 m_vars[i].mic_offset +
5f520819
KY
3434 m_vars[i].offset + m_vars[i].disp,
3435 m_vars_extra[i].cpu_offset +
3436 m_vars_extra[i].cpu_disp,
3437 m_vars[i].size,
3438 COI_COPY_UNSPECIFIED,
2eab9666 3439 in_deps_amount, in_deps,
5f520819
KY
3440 event);
3441 if (res != COI_SUCCESS) {
3442 if (m_status != 0) {
3443 m_status->result = translate_coi_error(res);
3444 return false;
3445 }
3446 report_coi_error(c_buf_copy, res);
3447 }
3448 }
3449 else {
3450 char *base = offload_get_src_base(m_vars[i].ptr,
3451 m_vars[i].type.src);
3452 res = COI::BufferWrite(
3453 dst_data->mic_buf,
2eab9666 3454 m_vars[i].mic_offset +
5f520819
KY
3455 m_vars[i].offset + m_vars[i].disp,
3456 base + m_vars_extra[i].cpu_disp,
3457 m_vars[i].size,
3458 COI_COPY_UNSPECIFIED,
2eab9666 3459 in_deps_amount, in_deps,
5f520819
KY
3460 event);
3461 if (res != COI_SUCCESS) {
3462 if (m_status != 0) {
3463 m_status->result = translate_coi_error(res);
3464 return false;
3465 }
3466 report_coi_error(c_buf_write, res);
3467 }
3468 }
3469
2eab9666 3470 ptr_sent += sent_data;
5f520819
KY
3471 }
3472 break;
3473
3474 case c_dv_data:
3475 case c_dv_ptr_data:
3476 if (m_vars[i].direction.in &&
3477 m_vars[i].size > 0) {
3478 PtrData *ptr_data = m_vars[i].into ?
3479 m_vars_extra[i].dst_data :
3480 m_vars_extra[i].src_data;
3481 PtrData* src_data = m_vars_extra[i].src_data;
3482
3483 COIEVENT *event =
3484 (is_async ||
2eab9666
IV
3485 (should_use_async_buffer_write &&
3486 m_vars[i].size >= __offload_use_async_buffer_write)) ?
5f520819
KY
3487 &m_in_deps[m_in_deps_total++] : 0;
3488
3489 if (m_vars[i].flags.is_noncont_src ||
3490 m_vars[i].flags.is_noncont_dst) {
3491 send_noncontiguous_pointer_data(
2eab9666
IV
3492 i, src_data, ptr_data, event, sent_data,
3493 in_deps_amount, in_deps);
5f520819
KY
3494 }
3495 else if (src_data && src_data->cpu_buf != 0) {
3496 res = COI::BufferCopy(
3497 ptr_data->mic_buf,
3498 src_data->cpu_buf,
2eab9666 3499 m_vars[i].offset + ptr_data->mic_offset +
5f520819
KY
3500 m_vars[i].disp,
3501 m_vars_extra[i].cpu_offset +
3502 m_vars_extra[i].cpu_disp,
3503 m_vars[i].size,
3504 COI_COPY_UNSPECIFIED,
2eab9666 3505 in_deps_amount, in_deps,
5f520819
KY
3506 event);
3507 if (res != COI_SUCCESS) {
3508 if (m_status != 0) {
3509 m_status->result = translate_coi_error(res);
3510 return false;
3511 }
3512 report_coi_error(c_buf_copy, res);
3513 }
3514 }
3515 else {
3516 char *base = offload_get_src_base(m_vars[i].ptr,
3517 m_vars[i].type.src);
3518 res = COI::BufferWrite(
3519 ptr_data->mic_buf,
2eab9666 3520 ptr_data->mic_offset +
5f520819
KY
3521 m_vars[i].offset + m_vars[i].disp,
3522 base + m_vars_extra[i].cpu_disp,
3523 m_vars[i].size,
3524 COI_COPY_UNSPECIFIED,
2eab9666 3525 in_deps_amount, in_deps,
5f520819
KY
3526 event);
3527 if (res != COI_SUCCESS) {
3528 if (m_status != 0) {
3529 m_status->result = translate_coi_error(res);
3530 return false;
3531 }
3532 report_coi_error(c_buf_write, res);
3533 }
3534 }
2eab9666 3535 ptr_sent += sent_data;
5f520819
KY
3536 }
3537 break;
3538
3539 case c_dv_data_slice:
3540 case c_dv_ptr_data_slice:
3541 if (m_vars[i].direction.in &&
3542 m_vars[i].size > 0) {
3543 PtrData *dst_data = m_vars[i].into ?
3544 m_vars_extra[i].dst_data :
3545 m_vars_extra[i].src_data;
3546 PtrData* src_data =
3547 (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
3548 VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
3549 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
3550 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
3551 m_vars[i].flags.is_static) ?
3552 m_vars_extra[i].src_data : 0;
3553 COIEVENT *event =
3554 (is_async ||
2eab9666
IV
3555 (should_use_async_buffer_write &&
3556 m_vars[i].size >= __offload_use_async_buffer_write)) ?
5f520819
KY
3557 &m_in_deps[m_in_deps_total++] : 0;
3558 if (m_vars[i].flags.is_noncont_src ||
3559 m_vars[i].flags.is_noncont_dst) {
3560 send_noncontiguous_pointer_data(
2eab9666
IV
3561 i, src_data, dst_data, event, sent_data,
3562 in_deps_amount, in_deps);
5f520819
KY
3563 }
3564 else if (src_data && src_data->cpu_buf != 0) {
3565 res = COI::BufferCopy(
3566 dst_data->mic_buf,
3567 src_data->cpu_buf,
2eab9666 3568 m_vars[i].offset +
5f520819
KY
3569 dst_data->mic_offset +
3570 m_vars[i].disp,
3571 m_vars_extra[i].cpu_offset +
3572 m_vars_extra[i].cpu_disp,
3573 m_vars[i].size,
3574 COI_COPY_UNSPECIFIED,
2eab9666 3575 in_deps_amount, in_deps,
5f520819
KY
3576 event);
3577 if (res != COI_SUCCESS) {
3578 if (m_status != 0) {
3579 m_status->result = translate_coi_error(res);
3580 return false;
3581 }
3582 report_coi_error(c_buf_copy, res);
3583 }
3584 }
3585 else {
3586 char *base = offload_get_src_base(m_vars[i].ptr,
3587 m_vars[i].type.src);
3588 res = COI::BufferWrite(
3589 dst_data->mic_buf,
2eab9666 3590 dst_data->mic_offset +
5f520819
KY
3591 m_vars[i].offset + m_vars[i].disp,
3592 base + m_vars_extra[i].cpu_disp,
3593 m_vars[i].size,
3594 COI_COPY_UNSPECIFIED,
2eab9666 3595 in_deps_amount, in_deps,
5f520819
KY
3596 event);
3597 if (res != COI_SUCCESS) {
3598 if (m_status != 0) {
3599 m_status->result = translate_coi_error(res);
3600 return false;
3601 }
3602 report_coi_error(c_buf_write, res);
3603 }
3604 }
3605
2eab9666 3606 ptr_sent += sent_data;
5f520819
KY
3607 }
3608 break;
3609
3610 default:
3611 break;
3612 }
2eab9666
IV
3613 if (m_vars_extra[i].omp_last_event_type == c_last_write) {
3614 in_deps_amount = in_deps_amount_save;
3615 in_deps = in_deps_save;
3616 register_omp_event_call_back(&m_in_deps[m_in_deps_total - 1], info);
3617 }
5f520819
KY
3618 // alloc field isn't used at target.
3619 // We can reuse it for offset of array pointers.
3620 if (m_vars_extra[i].is_arr_ptr_el) {
3621 m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
3622 }
3623 }
3624
3625 if (m_status) {
3626 m_status->data_sent += ptr_sent;
3627 }
3628
3629 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
3630 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
3631 c_offload_sent_pointer_data,
3632 "Total pointer data sent to target: [%lld] bytes\n",
3633 ptr_sent);
3634
3635 return true;
3636}
3637
3638bool OffloadDescriptor::gather_copyin_data()
3639{
3640 OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
3641
3642 if (m_need_runfunction && m_in_datalen > 0) {
3643 COIMAPINSTANCE map_inst;
3644 char *data;
3645
3646 // init marshaller
3647 if (m_inout_buf != 0) {
3648 OffloadTimer timer_map(get_timer_data(),
3649 c_offload_host_map_in_data_buffer);
3650
3651 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
3652 COI_MAP_WRITE_ENTIRE_BUFFER,
3653 0, 0, 0, &map_inst,
3654 reinterpret_cast<void**>(&data));
3655 if (res != COI_SUCCESS) {
3656 if (m_status != 0) {
3657 m_status->result = translate_coi_error(res);
3658 return false;
3659 }
3660 report_coi_error(c_buf_map, res);
3661 }
3662 }
3663 else {
3664 data = (char*) m_func_desc + m_func_desc->data_offset;
3665 }
3666
3667 // send variable descriptors
3668 memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
3669 data += m_vars_total * sizeof(VarDesc);
3670
3671 // init marshaller
3672 m_in.init_buffer(data, m_in_datalen);
3673
3674 // Gather copy data into buffer
3675 for (int i = 0; i < m_vars_total; i++) {
3676 bool src_is_for_mic = (m_vars[i].direction.out ||
3677 m_vars[i].into == NULL);
3678 PtrData* ptr_data = src_is_for_mic ?
3679 m_vars_extra[i].src_data :
3680 m_vars_extra[i].dst_data;
3681 if (m_vars[i].flags.alloc_disp) {
3682 m_in.send_data(&ptr_data->alloc_disp,
3683 sizeof(ptr_data->alloc_disp));
3684 }
3685
3686 // send sink address to the target
3687 if (m_vars[i].flags.sink_addr) {
3688 m_in.send_data(&ptr_data->mic_addr,
3689 sizeof(ptr_data->mic_addr));
3690 }
3691
3692 switch (m_vars[i].type.dst) {
3693 case c_data_ptr_array:
3694 break;
3695 case c_data:
3696 case c_void_ptr:
3697 case c_cean_var:
3698 if (m_vars[i].direction.in &&
3699 !m_vars[i].flags.is_static_dstn) {
3700
3701 char *ptr = offload_get_src_base(m_vars[i].ptr,
3702 m_vars[i].type.src);
3703 if (m_vars[i].type.dst == c_cean_var) {
3704 // offset and length are derived from the array
3705 // descriptor
3706 int64_t size = m_vars[i].size;
3707 int64_t disp = m_vars[i].disp;
3708 m_in.send_data(reinterpret_cast<char*>(&size),
3709 sizeof(int64_t));
3710 m_in.send_data(reinterpret_cast<char*>(&disp),
3711 sizeof(int64_t));
3712 }
3713
3714 m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
3715 m_vars[i].size);
3716 }
3717 break;
3718
3719 case c_dv:
3720 if (m_vars[i].direction.bits ||
3721 m_vars[i].alloc_if ||
3722 m_vars[i].free_if) {
3723 // send dope vector excluding base
3724 char *ptr = static_cast<char*>(m_vars[i].ptr);
3725 m_in.send_data(ptr + sizeof(uint64_t),
3726 m_vars[i].size - sizeof(uint64_t));
3727 }
3728 break;
3729
3730 case c_data_ptr:
3731 // send to target addresses of obsolete
3732 // stacks to be released
3733 if (m_vars[i].flags.is_stack_buf &&
3734 !m_vars[i].direction.bits &&
3735 m_vars[i].alloc_if &&
3736 m_vars[i].size != 0) {
3737 for (PtrDataList::iterator it =
3738 m_destroy_stack.begin();
3739 it != m_destroy_stack.end(); it++) {
3740 PtrData * ptr_data = *it;
3741 m_in.send_data(&(ptr_data->mic_addr),
3742 sizeof(ptr_data->mic_addr));
3743 }
3744 }
3745 break;
3746 case c_func_ptr:
3747 if (m_vars[i].direction.in) {
3748 m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
3749 }
3750 break;
3751
3752 default:
3753 break;
3754 }
3755 }
3756
3757 if (m_status) {
3758 m_status->data_sent += m_in.get_tfr_size();
3759 }
3760
3761 if (m_func_desc->data_offset == 0) {
3762 OffloadTimer timer_unmap(get_timer_data(),
3763 c_offload_host_unmap_in_data_buffer);
3764 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
3765 if (res != COI_SUCCESS) {
3766 if (m_status != 0) {
3767 m_status->result = translate_coi_error(res);
3768 return false;
3769 }
3770 report_coi_error(c_buf_unmap, res);
3771 }
3772 }
3773 }
3774
3775 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
3776 OFFLOAD_DEBUG_TRACE_1(1,
3777 GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
3778 "Total copyin data sent to target: [%lld] bytes\n",
3779 m_in.get_tfr_size());
3780
3781 return true;
3782}
3783
2eab9666 3784bool OffloadDescriptor::compute(void *info)
5f520819
KY
3785{
3786 OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
3787
3788 if (m_need_runfunction) {
3789 OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
3790 c_offload_compute, "Compute task on MIC\n");
3791
3792 void* misc = m_func_desc;
3793 int misc_len = m_func_desc_size;
3794 void* ret = 0;
3795 int ret_len = 0;
3796
3797 if (m_func_desc->data_offset != 0) {
3798 misc_len += m_in_datalen;
3799
3800 if (m_out_datalen > 0) {
3801 ret = (char*) m_func_desc + m_func_desc->data_offset;
3802 ret_len = m_out_datalen;
3803 }
3804 }
3805
3806 // dispatch task
3807 COIRESULT res;
3808 COIEVENT event;
2eab9666
IV
3809 uint32_t in_deps_amount = m_in_deps_total;
3810 COIEVENT *in_deps = m_in_deps_total > 0 ? m_in_deps : 0;
3811
3812 if (0 == m_in_deps_total && m_stream != no_stream) {
3813 get_stream_in_dependencies(in_deps_amount, in_deps);
3814 }
3815
3816 res = m_device.compute(m_stream,
3817 m_compute_buffers,
5f520819
KY
3818 misc, misc_len,
3819 ret, ret_len,
2eab9666
IV
3820 in_deps_amount,
3821 in_deps,
5f520819 3822 &event);
2eab9666 3823
5f520819
KY
3824 if (res != COI_SUCCESS) {
3825 if (m_status != 0) {
3826 m_status->result = translate_coi_error(res);
3827 return false;
3828 }
3829 report_coi_error(c_pipeline_run_func, res);
3830 }
3831
2eab9666
IV
3832 if (m_omp_async_last_event_type == c_last_runfunc) {
3833 register_omp_event_call_back(&event, info);
3834 }
3835
5f520819
KY
3836 m_in_deps_total = 1;
3837 m_in_deps[0] = event;
3838 }
3839
3840 return true;
3841}
3842
2eab9666 3843// receive pointer data if source or destination or both of them are
5f520819 3844// noncontiguous. There is guarantee that length of destination enough for
2eab9666
IV
3845// transferred data.
3846bool OffloadDescriptor::receive_noncontiguous_pointer_data(
5f520819 3847 int i,
5f520819 3848 COIBUFFER dst_buf,
2eab9666
IV
3849 COIEVENT *event,
3850 uint64_t &received_data,
3851 uint32_t in_deps_amount,
3852 COIEVENT *in_deps
5f520819
KY
3853)
3854{
3855 int64_t offset_src, offset_dst;
3856 int64_t length_src, length_dst;
3857 int64_t length_src_cur, length_dst_cur;
2eab9666 3858 int64_t receive_size;
5f520819
KY
3859 COIRESULT res;
3860 bool dst_is_empty = true;
3861 bool src_is_empty = true;
3862
2eab9666
IV
3863 char *base = offload_get_src_base(
3864 m_vars[i].into ?
3865 static_cast<char*>(m_vars[i].into) :
3866 static_cast<char*>(m_vars[i].ptr),
3867 m_vars[i].type.dst);
3868 received_data = 0;
3869
5f520819
KY
3870 // Set length_src and length_dst
3871 length_src = (m_vars_extra[i].read_rng_src) ?
3872 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
3873 length_dst = !m_vars[i].into ? length_src :
3874 (m_vars_extra[i].read_rng_dst) ?
3875 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
2eab9666
IV
3876 receive_size = (length_src < length_dst) ? length_src : length_dst;
3877
3878 // If BufferReadMultiD is defined we can set values of required arguments
3879 // and transfer noncontiguous data via call to the COI routine.
3880 if (__offload_use_coi_noncontiguous_transfer && COI::BufferReadMultiD) {
3881 struct Arr_Desc* arr_desc_dst;
3882 struct Arr_Desc* arr_desc_src;
3883 int64_t size_src, size_dst;
3884
3885 offset_src = (m_vars_extra[i].read_rng_src)?
3886 m_vars_extra[i].read_rng_src->init_offset : m_vars[i].disp;
3887 size_src = m_vars_extra[i].read_rng_src ?
3888 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
3889 m_vars[i].size;
3890
3891 offset_dst = (m_vars_extra[i].read_rng_dst)?
3892 m_vars_extra[i].read_rng_dst->init_offset : m_vars_extra[i].cpu_disp;
3893 size_dst = m_vars_extra[i].read_rng_dst ?
3894 cean_get_transf_size(m_vars_extra[i].read_rng_dst) : m_vars[i].size;
3895
3896 int64_t el_size = (!m_vars[i].into ||
3897 (m_vars_extra[i].read_rng_src &&
3898 m_vars_extra[i].read_rng_dst)) ?
3899 1 :
3900 m_vars_extra[i].read_rng_src ?
3901 m_vars_extra[i].read_rng_src->arr_desc->dim[
3902 m_vars_extra[i].read_rng_src->arr_desc->rank - 1].size :
3903 m_vars_extra[i].read_rng_dst->arr_desc->dim[
3904 m_vars_extra[i].read_rng_dst->arr_desc->rank - 1].size;
3905 arr_desc_src = (m_vars_extra[i].read_rng_src) ?
3906 m_vars_extra[i].read_rng_src->arr_desc :
3907 make_arr_desc(NULL, // don't required for source
3908 offset_src/el_size, size_src/el_size,
3909 el_size);
3910 arr_desc_dst = !m_vars[i].into ? arr_desc_src :
3911 (m_vars_extra[i].read_rng_dst) ?
3912 m_vars_extra[i].read_rng_dst->arr_desc :
3913 make_arr_desc(NULL,
3914 offset_dst/el_size, size_src/el_size, el_size);
3915
3916 arr_desc_dst->base = reinterpret_cast<int64_t>(base);
3917
3918 res = COI::BufferReadMultiD(
3919 m_vars_extra[i].src_data->mic_buf, // SourceBuffer
3920 m_vars[i].offset + m_vars[i].mic_offset -
3921 m_vars_extra[i].src_data->alloc_disp, // Offset
3922 (void*)arr_desc_dst, // descriptor of DestArray
3923 (void*)arr_desc_src, // descriptor of SrcArray
3924 COI_COPY_UNSPECIFIED, // Type
3925 in_deps_amount, // Number of in Dependencies
3926 in_deps, // array of in Dependencies
3927 event); // out Dependency
3928 if (res != COI_SUCCESS) {
3929 if (m_status != 0) {
3930 m_status->result = translate_coi_error(res);
3931 return false;
3932 }
3933 report_coi_error(c_buf_copy, res);
3934 }
3935 return(true);
3936 }
3937 // if event is defined we must multiplate for all contiguous intervals
3938 // that will be Copied/Read.
3939 // Take in account that we already have 1 event.
3940 if (event) {
3941 m_out_deps_allocated += (length_src / receive_size) *
3942 ((m_vars_extra[i].read_rng_src) ?
3943 m_vars_extra[i].read_rng_src->range_max_number : 1) ;
3944 m_out_deps =
3945 (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_out_deps_allocated);
3946 m_out_deps_total--;
3947 }
3948
5f520819 3949 // consequently get contiguous ranges,
2eab9666 3950 // define corresponded destination offset and receive data
5f520819
KY
3951 do {
3952 // get sorce offset
3953 if (src_is_empty) {
3954 if (m_vars_extra[i].read_rng_src) {
3955 if (!get_next_range(m_vars_extra[i].read_rng_src,
3956 &offset_src)) {
3957 // source ranges are over - nothing to send
3958 break;
3959 }
3960 }
2eab9666
IV
3961 else if (received_data == 0) {
3962 offset_src = m_vars[i].disp;
5f520819
KY
3963 }
3964 else {
3965 break;
3966 }
3967 length_src_cur = length_src;
3968 }
3969 else {
3970 // if source is contiguous or its contiguous range is greater
3971 // than destination one
2eab9666 3972 offset_src += receive_size;
5f520819 3973 }
2eab9666 3974 length_src_cur -= receive_size;
5f520819
KY
3975 src_is_empty = length_src_cur == 0;
3976
3977 // get destination offset
3978 if (dst_is_empty) {
3979 if (m_vars[i].into) {
3980 if (m_vars_extra[i].read_rng_dst) {
3981 if (!get_next_range(m_vars_extra[i].read_rng_dst,
3982 &offset_dst)) {
3983 // destination ranges are over
3984 LIBOFFLOAD_ERROR(c_destination_is_over);
3985 return false;
3986 }
3987 }
3988 // destination is contiguous.
3989 else {
3990 offset_dst = m_vars_extra[i].cpu_disp;
3991 }
3992 length_dst_cur = length_dst;
3993 }
3994 // same as source
3995 else {
3996 offset_dst = offset_src;
3997 length_dst_cur = length_src;
3998 }
3999 }
4000 else {
4001 // if destination is contiguous or its contiguous range is greater
4002 // than source one
2eab9666 4003 offset_dst += receive_size;
5f520819 4004 }
2eab9666 4005 length_dst_cur -= receive_size;
5f520819 4006 dst_is_empty = length_dst_cur == 0;
2eab9666
IV
4007 if (event) {
4008 event = &m_out_deps[m_out_deps_total++];
4009 }
5f520819
KY
4010 if (dst_buf != 0) {
4011 res = COI::BufferCopy(
4012 dst_buf,
4013 m_vars_extra[i].src_data->mic_buf,
4014 m_vars_extra[i].cpu_offset + offset_dst,
4015 m_vars[i].offset + offset_src +
2eab9666
IV
4016 m_vars[i].mic_offset,
4017 receive_size,
5f520819 4018 COI_COPY_UNSPECIFIED,
2eab9666
IV
4019 in_deps_amount,
4020 in_deps,
5f520819
KY
4021 event);
4022 if (res != COI_SUCCESS) {
4023 if (m_status != 0) {
4024 m_status->result = translate_coi_error(res);
4025 return false;
4026 }
4027 report_coi_error(c_buf_copy, res);
4028 }
4029 }
4030 else {
4031 res = COI::BufferRead(
4032 m_vars_extra[i].src_data->mic_buf,
4033 m_vars[i].offset + offset_src +
2eab9666 4034 m_vars[i].mic_offset,
5f520819 4035 base + offset_dst,
2eab9666 4036 receive_size,
5f520819 4037 COI_COPY_UNSPECIFIED,
2eab9666
IV
4038 in_deps_amount,
4039 in_deps,
5f520819
KY
4040 event);
4041 if (res != COI_SUCCESS) {
4042 if (m_status != 0) {
4043 m_status->result = translate_coi_error(res);
4044 return false;
4045 }
4046 report_coi_error(c_buf_read, res);
4047 }
4048 }
2eab9666 4049 received_data += receive_size;
5f520819
KY
4050 }
4051 while (true);
4052 return true;
4053}
4054
2eab9666
IV
4055bool OffloadDescriptor::receive_pointer_data(bool is_async,
4056 bool first_run, void *info)
5f520819
KY
4057{
4058 OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
4059
2eab9666 4060 bool should_use_async_buffer_read = m_initial_need_runfunction;
5f520819
KY
4061 uint64_t ptr_received = 0;
4062 COIRESULT res;
4063
2eab9666
IV
4064 // For offload_transfer and offload with empty body without signal:
4065 // - if there is only one buffer copy - get data synchronously
4066 // - if there are multiple buffer copy and
4067 // __offload_parallel_copy is false - get data synchronously
4068 // - if there are multiple buffer copy
4069 // and __offload_parallel_copy is true - get data asynchronously
4070 // It concerns only data with size greater than __offload_use_async_buffer_read.
4071 // Data of size less than __offload_use_async_buffer_read are received synchronously.
4072 // Synchronous transfer results in better performance in COI.
4073 // __offload_parallel_copy is false by default but can be changed
4074 // via environment variable OFFLOAD_PARALLEL_COPY
4075 if (!m_initial_need_runfunction && __offload_parallel_copy) {
4076 int big_size_count = 0;
4077
4078 for (int i = 0; i < m_vars_total; i++) {
4079 if (m_vars[i].direction.out &&
4080 m_vars[i].size >= __offload_use_async_buffer_read) {
4081 // preallocated OUT only at second run
4082 if (first_run == m_vars[i].flags.preallocated) {
4083 continue;
4084 }
4085 switch (m_vars[i].type.src) {
4086 case c_data:
4087 case c_void_ptr:
4088 case c_cean_var:
4089 if (m_vars[i].flags.is_static) {
4090 big_size_count++;
4091 }
4092 break;
4093 case c_string_ptr:
4094 case c_data_ptr:
4095 case c_cean_var_ptr:
4096 case c_dv_data:
4097 case c_dv_ptr_data:
4098 case c_dv_data_slice:
4099 case c_dv_ptr_data_slice:
4100 case c_dv_ptr:
4101 big_size_count++;
4102 break;
4103 default:
4104 break;
4105 }
4106 }
4107 }
4108 if (big_size_count > 1) {
4109 should_use_async_buffer_read = true;
4110 }
4111 }
4112 uint32_t in_deps_amount = m_in_deps_total;
4113 COIEVENT *in_deps = m_in_deps_total > 0 ? m_in_deps : 0;
4114
4115 if (0 == m_in_deps_total &&
4116 m_stream != no_stream &&
4117 m_vars_total != 0) {
4118 get_stream_in_dependencies(in_deps_amount, in_deps);
4119 }
4120
5f520819 4121 for (int i = 0; i < m_vars_total; i++) {
2eab9666
IV
4122 uint64_t received_data = m_vars[i].size;
4123 uint32_t in_deps_amount_save;
4124 COIEVENT *in_deps_save;
4125
4126 if (m_vars_extra[i].omp_last_event_type == c_last_read) {
4127 in_deps_amount_save = in_deps_amount;
4128 in_deps_save = in_deps;
4129
4130 in_deps_amount += m_out_deps_total;
4131 if (in_deps_amount > 0) {
4132 in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * in_deps_amount);
4133 if (in_deps == NULL)
4134 LIBOFFLOAD_ERROR(c_malloc);
4135 memcpy(in_deps, in_deps_save,
4136 in_deps_amount_save * sizeof(COIEVENT));
4137 memcpy(in_deps + in_deps_amount_save * sizeof(COIEVENT),
4138 m_out_deps,
4139 m_out_deps_total * sizeof(COIEVENT));
4140 }
4141 }
4142 // At first run don't receive by preallocated target pointer as the
4143 //pointer value will be ready later after call to scatter_copyout_data
4144 if (first_run && m_vars[i].alloc_if && m_vars[i].flags.preallocated) {
4145 m_preallocated_alloc = true;
4146 // need one more call to OffloadDescriptor::receive_pointer_data
4147 if (m_vars[i].direction.out) {
4148 m_out_with_preallocated = true;
4149 }
4150 continue;
4151 }
5f520819
KY
4152 switch (m_vars[i].type.src) {
4153 case c_data_ptr_array:
4154 break;
4155 case c_data:
4156 case c_void_ptr:
4157 case c_cean_var:
4158 if (m_vars[i].direction.out &&
4159 m_vars[i].flags.is_static) {
4160 COIEVENT *event =
4161 (is_async ||
4162 m_in_deps_total > 0 ||
2eab9666
IV
4163 (should_use_async_buffer_read &&
4164 m_vars[i].size >= __offload_use_async_buffer_read)) ?
5f520819
KY
4165 &m_out_deps[m_out_deps_total++] : 0;
4166 PtrData *ptr_data = NULL;
4167 COIBUFFER dst_buf = NULL; // buffer at host
4168 char *base;
4169
4170 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
4171 ptr_data = m_vars[i].into ?
4172 m_vars_extra[i].dst_data :
4173 m_vars_extra[i].src_data;
4174 }
4175 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
4176 if (m_vars[i].flags.is_static_dstn) {
4177 ptr_data = m_vars[i].into ?
4178 m_vars_extra[i].dst_data :
4179 m_vars_extra[i].src_data;
4180 }
4181 }
4182 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
4183 if (dst_buf == NULL) {
4184 base = offload_get_src_base(
4185 m_vars[i].into ?
4186 static_cast<char*>(m_vars[i].into) :
4187 static_cast<char*>(m_vars[i].ptr),
4188 m_vars[i].type.dst);
4189 }
4190
4191 if (m_vars[i].flags.is_noncont_src ||
4192 m_vars[i].flags.is_noncont_dst) {
2eab9666
IV
4193 receive_noncontiguous_pointer_data(
4194 i, dst_buf, event, received_data,
4195 in_deps_amount, in_deps);
5f520819
KY
4196 }
4197 else if (dst_buf != 0) {
4198 res = COI::BufferCopy(
4199 dst_buf,
4200 m_vars_extra[i].src_data->mic_buf,
4201 m_vars_extra[i].cpu_offset +
4202 m_vars_extra[i].cpu_disp,
4203 m_vars[i].offset + m_vars[i].disp,
4204 m_vars[i].size,
4205 COI_COPY_UNSPECIFIED,
2eab9666
IV
4206 in_deps_amount,
4207 in_deps,
5f520819
KY
4208 event);
4209 if (res != COI_SUCCESS) {
4210 if (m_status != 0) {
4211 m_status->result = translate_coi_error(res);
4212 return false;
4213 }
4214 report_coi_error(c_buf_copy, res);
4215 }
4216 }
4217 else {
4218 res = COI::BufferRead(
4219 m_vars_extra[i].src_data->mic_buf,
4220 m_vars[i].offset + m_vars[i].disp,
4221 base + m_vars_extra[i].cpu_offset +
4222 m_vars_extra[i].cpu_disp,
4223 m_vars[i].size,
4224 COI_COPY_UNSPECIFIED,
2eab9666
IV
4225 in_deps_amount,
4226 in_deps,
5f520819
KY
4227 event);
4228 if (res != COI_SUCCESS) {
4229 if (m_status != 0) {
4230 m_status->result = translate_coi_error(res);
4231 return false;
4232 }
4233 report_coi_error(c_buf_read, res);
4234 }
4235 }
2eab9666 4236 ptr_received += received_data;
5f520819
KY
4237 }
4238 break;
4239
4240 case c_string_ptr:
4241 case c_data_ptr:
4242 case c_cean_var_ptr:
4243 case c_dv_data:
4244 case c_dv_ptr_data:
4245 case c_dv_data_slice:
4246 case c_dv_ptr_data_slice:
4247 case c_dv_ptr: {
4248 COIBUFFER dst_buf = NULL; // buffer on host
4249 if (m_vars[i].direction.out && m_vars[i].size > 0) {
4250 COIEVENT *event =
4251 (is_async ||
4252 m_in_deps_total > 0 ||
2eab9666
IV
4253 (should_use_async_buffer_read &&
4254 m_vars[i].size >= __offload_use_async_buffer_read)) ?
5f520819
KY
4255 &m_out_deps[m_out_deps_total++] : 0;
4256
4257 uint64_t dst_offset = 0;
4258 char *base = static_cast<char*>(m_vars[i].ptr);
4259
4260 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
4261 PtrData *ptr_data = m_vars[i].into ?
4262 m_vars_extra[i].dst_data :
4263 m_vars_extra[i].src_data;
4264 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
4265 if (dst_buf == NULL) {
4266 base = m_vars[i].into ?
4267 *static_cast<char**>(m_vars[i].into) :
4268 *static_cast<char**>(m_vars[i].ptr);
4269 }
4270 dst_offset = m_vars_extra[i].cpu_offset +
4271 m_vars_extra[i].cpu_disp;
4272 }
4273 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
4274 if (m_vars[i].flags.is_static_dstn) {
4275 dst_buf = m_vars[i].into ?
4276 m_vars_extra[i].dst_data->cpu_buf :
4277 m_vars_extra[i].src_data->cpu_buf;
4278 }
4279 if (dst_buf == NULL) {
4280 base = offload_get_src_base(
4281 m_vars[i].into ?
4282 static_cast<char*>(m_vars[i].into) :
4283 static_cast<char*>(m_vars[i].ptr),
4284 m_vars[i].type.dst);
4285 }
4286 dst_offset = m_vars_extra[i].cpu_offset +
4287 m_vars_extra[i].cpu_disp;
4288 }
4289 else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
4290 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
4291 PtrData *ptr_data = m_vars[i].into != 0 ?
4292 m_vars_extra[i].dst_data :
4293 m_vars_extra[i].src_data;
4294 dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
4295 if (dst_buf == NULL) {
4296 base = offload_get_src_base(
4297 m_vars[i].into ?
4298 static_cast<char*>(m_vars[i].into) :
4299 static_cast<char*>(m_vars[i].ptr),
4300 m_vars[i].type.dst);
4301
4302 }
4303 dst_offset = m_vars_extra[i].cpu_offset +
4304 m_vars_extra[i].cpu_disp;
4305 }
4306
4307 if (m_vars[i].flags.is_noncont_src ||
4308 m_vars[i].flags.is_noncont_dst) {
2eab9666
IV
4309 receive_noncontiguous_pointer_data(
4310 i, dst_buf, event, received_data,
4311 in_deps_amount,
4312 in_deps);
5f520819
KY
4313 }
4314 else if (dst_buf != 0) {
4315 res = COI::BufferCopy(
4316 dst_buf,
4317 m_vars_extra[i].src_data->mic_buf,
4318 dst_offset,
4319 m_vars[i].offset + m_vars[i].disp +
2eab9666 4320 m_vars[i].mic_offset,
5f520819
KY
4321 m_vars[i].size,
4322 COI_COPY_UNSPECIFIED,
2eab9666
IV
4323 in_deps_amount,
4324 in_deps,
5f520819
KY
4325 event);
4326 if (res != COI_SUCCESS) {
4327 if (m_status != 0) {
4328 m_status->result = translate_coi_error(res);
4329 return false;
4330 }
4331 report_coi_error(c_buf_copy, res);
4332 }
4333 }
4334 else {
4335 res = COI::BufferRead(
4336 m_vars_extra[i].src_data->mic_buf,
4337 m_vars[i].offset + m_vars[i].disp +
2eab9666 4338 m_vars[i].mic_offset,
5f520819
KY
4339 base + dst_offset,
4340 m_vars[i].size,
4341 COI_COPY_UNSPECIFIED,
2eab9666
IV
4342 in_deps_amount,
4343 in_deps,
5f520819
KY
4344 event);
4345 if (res != COI_SUCCESS) {
4346 if (m_status != 0) {
4347 m_status->result = translate_coi_error(res);
4348 return false;
4349 }
4350 report_coi_error(c_buf_read, res);
4351 }
4352 }
2eab9666 4353 ptr_received += received_data;
5f520819
KY
4354 }
4355 break;
4356 }
4357
4358 default:
4359 break;
4360 }
4361
2eab9666
IV
4362 if (m_vars_extra[i].omp_last_event_type == c_last_read) {
4363 in_deps_amount = in_deps_amount_save;
4364 in_deps = in_deps_save;
4365 register_omp_event_call_back(&m_out_deps[m_out_deps_total - 1], info);
4366 }
5f520819
KY
4367 // destroy buffers for obsolete stacks
4368 if (m_destroy_stack.size() != 0) {
4369 for (PtrDataList::iterator it = m_destroy_stack.begin();
4370 it != m_destroy_stack.end(); it++) {
4371 PtrData *ptr_data = *it;
4372 m_destroy_buffers.push_back(ptr_data->mic_buf);
4373 OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
4374 ptr_data->mic_addr);
4375 }
4376 m_destroy_stack.clear();
4377 }
4378 if (m_vars[i].free_if) {
4379 // remove association for automatic variables
4380 if (m_is_openmp && !m_vars[i].flags.is_static &&
4381 (m_vars[i].type.src == c_data ||
4382 m_vars[i].type.src == c_void_ptr ||
4383 m_vars[i].type.src == c_cean_var)) {
4384 AutoData *auto_data = m_vars_extra[i].auto_data;
2eab9666
IV
4385 if (auto_data != 0) {
4386 if (m_vars[i].flags.always_delete) {
4387 auto_data->nullify_reference();
4388 }
4389 else if(auto_data->remove_reference() == 0) {
4390 m_device.remove_auto_data(auto_data->cpu_addr.start());
4391 }
5f520819
KY
4392 }
4393 }
4394
4395 // destroy buffers
4396 if (m_vars[i].direction.out || m_vars[i].into == NULL) {
4397 if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
4398 !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
4399 !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
4400 continue;
4401 }
4402
4403 PtrData *ptr_data = m_vars_extra[i].src_data;
4404 if (ptr_data->remove_reference() == 0) {
4405 // destroy buffers
4406 if (ptr_data->cpu_buf != 0) {
4407 m_destroy_buffers.push_back(ptr_data->cpu_buf);
4408 }
4409 if (ptr_data->mic_buf != 0) {
4410 m_destroy_buffers.push_back(ptr_data->mic_buf);
4411 }
4412 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
4413 ptr_data->cpu_addr.start());
4414
4415 // remove association from map
2eab9666
IV
4416 if (m_vars[i].flags.targetptr) {
4417 m_device.remove_targetptr_data(ptr_data->cpu_addr.start());
4418 }
4419 else {
4420 m_device.remove_ptr_data(ptr_data->cpu_addr.start());
4421 }
5f520819
KY
4422 }
4423 }
4424 else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
4425 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
4426 VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
4427 PtrData *ptr_data = m_vars_extra[i].dst_data;
4428 if (ptr_data->remove_reference() == 0) {
4429 // destroy buffers
4430 if (ptr_data->cpu_buf != 0) {
4431 m_destroy_buffers.push_back(ptr_data->cpu_buf);
4432 }
4433 if (ptr_data->mic_buf != 0) {
4434 m_destroy_buffers.push_back(ptr_data->mic_buf);
4435 }
4436 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
4437 ptr_data->cpu_addr.start());
4438
4439 // remove association from map
2eab9666
IV
4440 if (m_vars[i].flags.targetptr) {
4441 m_device.remove_targetptr_data(ptr_data->cpu_addr.start());
4442 }
4443 else {
4444 m_device.remove_ptr_data(ptr_data->cpu_addr.start());
4445 }
5f520819
KY
4446 }
4447 }
4448 }
4449 }
4450
4451 if (m_status) {
4452 m_status->data_received += ptr_received;
4453 }
4454
4455 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
4456 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
4457 c_offload_received_pointer_data,
4458 "Total pointer data received from target: [%lld] bytes\n",
4459 ptr_received);
4460
4461 return true;
4462}
4463
4464bool OffloadDescriptor::scatter_copyout_data()
4465{
4466 OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
4467
4468 if (m_need_runfunction && m_out_datalen > 0) {
4469
4470 // total size that need to be transferred from target to host
4471 COIMAPINSTANCE map_inst;
4472 COIRESULT res;
4473 char *data;
4474
4475 // output data buffer
4476 if (m_func_desc->data_offset == 0) {
4477 OffloadTimer timer_map(get_timer_data(),
4478 c_offload_host_map_out_data_buffer);
4479
4480 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
4481 COI_MAP_READ_ONLY, 0, 0, 0,
4482 &map_inst,
4483 reinterpret_cast<void**>(&data));
4484 if (res != COI_SUCCESS) {
4485 if (m_status != 0) {
4486 m_status->result = translate_coi_error(res);
4487 return false;
4488 }
4489 report_coi_error(c_buf_map, res);
4490 }
4491 }
4492 else {
4493 data = (char*) m_func_desc + m_func_desc->data_offset;
4494 }
4495
4496 // get timing data
4497 OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
4498 data += OFFLOAD_TIMER_DATALEN();
4499
4500 // initialize output marshaller
4501 m_out.init_buffer(data, m_out_datalen);
4502
4503 for (int i = 0; i < m_vars_total; i++) {
2eab9666
IV
4504 bool src_is_for_mic = (m_vars[i].direction.out ||
4505 m_vars[i].into == NULL);
4506
4507 if (m_vars[i].type.src != c_data_ptr_array &&
4508 m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
4509 PtrData *ptr_data;
4510 void *ptr_value;
4511 void ** cpu_ptr = src_is_for_mic ?
4512 reinterpret_cast<void**>(m_vars[i].ptr) :
4513 reinterpret_cast<void**>(m_vars[i].into);
4514 void* alloc_base = NULL;
4515 int64_t alloc_disp = 0;
4516 int64_t alloc_size;
4517 if (m_vars_extra[i].alloc != NULL) {
4518 // array descriptor
4519 const Arr_Desc *ap =
4520 static_cast<const Arr_Desc*>(m_vars_extra[i].alloc);
4521
4522 __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
4523
4524 alloc_base = reinterpret_cast<void*>(ap->base);
4525 }
4526
4527 // get pointer to target memory
4528 m_out.receive_data(&ptr_value, sizeof(void*));
4529
4530 // add new entry
4531 if (!alloc_ptr_data(
4532 ptr_data,
4533 ptr_value,
4534 (alloc_base != NULL) ?
4535 alloc_disp : m_vars[i].disp,
4536 (alloc_base != NULL) ?
4537 alloc_size : m_vars[i].size,
4538 alloc_disp,
4539 0,
4540 m_vars[i].flags.targetptr,
4541 m_vars[i].flags.preallocated,
4542 m_vars[i].flags.pin)) {
4543 return false;
4544 }
4545
4546 ptr_data->add_reference();
4547 *cpu_ptr = ptr_value;
4548 if (src_is_for_mic) {
4549 m_vars_extra[i].src_data = ptr_data;
4550 }
4551 else {
4552 m_vars_extra[i].dst_data = ptr_data;
4553 }
4554 m_vars[i].offset = (char*) ptr_value -
4555 (char*) ptr_data->cpu_addr.start();
4556 }
4557
5f520819
KY
4558 switch (m_vars[i].type.src) {
4559 case c_data_ptr_array:
4560 break;
4561 case c_data:
4562 case c_void_ptr:
4563 case c_cean_var:
4564 if (m_vars[i].direction.out &&
4565 !m_vars[i].flags.is_static) {
4566
4567 if (m_vars[i].into) {
4568 char *ptr = offload_get_src_base(
4569 static_cast<char*>(m_vars[i].into),
4570 m_vars[i].type.dst);
4571 m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
4572 m_vars[i].size);
4573 }
4574 else {
4575 m_out.receive_data(
4576 static_cast<char*>(m_vars[i].ptr) +
4577 m_vars_extra[i].cpu_disp,
4578 m_vars[i].size);
4579 }
4580 }
4581 break;
4582
4583 case c_func_ptr:
4584 if (m_vars[i].direction.out) {
4585 m_out.receive_func_ptr((const void**) m_vars[i].ptr);
4586 }
4587 break;
4588
4589 default:
4590 break;
4591 }
4592 }
4593
4594 if (m_status) {
4595 m_status->data_received += m_out.get_tfr_size();
4596 }
4597
4598 if (m_func_desc->data_offset == 0) {
4599 OffloadTimer timer_unmap(get_timer_data(),
4600 c_offload_host_unmap_out_data_buffer);
4601
4602 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
4603 if (res != COI_SUCCESS) {
4604 if (m_status != 0) {
4605 m_status->result = translate_coi_error(res);
4606 return false;
4607 }
4608 report_coi_error(c_buf_unmap, res);
4609 }
4610 }
4611 }
4612
4613 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
4614 OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
4615 m_out.get_tfr_size());
4616
4617 return true;
4618}
4619
2eab9666
IV
4620static void get_arr_desc_numbers(
4621 const Arr_Desc *ap,
5f520819
KY
4622 int64_t el_size,
4623 int64_t &offset,
4624 int64_t &size,
4625 int &el_number,
4626 CeanReadRanges* &ptr_ranges
4627)
4628{
4629 if (is_arr_desc_contiguous(ap)) {
4630 ptr_ranges = NULL;
4631 __arr_data_offset_and_length(ap, offset, size);
4632 el_number = size / el_size;
4633 }
4634 else {
4635 ptr_ranges = init_read_ranges_arr_desc(ap);
4636 el_number = (ptr_ranges->range_size / el_size) *
4637 ptr_ranges->range_max_number;
4638 size = ptr_ranges->range_size;
4639 }
4640}
4641
5f520819
KY
4642bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
4643{
4644 int pointers_number;
4645 int tmp_val;
4646 int new_index = m_vars_total;
2eab9666 4647 const Arr_Desc *ap;
5f520819
KY
4648 const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
4649 int flags = vd3->array_fields;
4650 bool src_is_for_mic = (m_vars[i].direction.out ||
4651 m_vars[i].into == NULL);
4652
4653 ReadArrElements<void *> ptr;
4654 ReadArrElements<void *> into;
4655 ReadArrElements<int64_t> ext_start;
4656 ReadArrElements<int64_t> ext_elements;
4657 ReadArrElements<int64_t> align;
4658 ReadArrElements<int64_t> alloc_if;
4659 ReadArrElements<int64_t> free_if;
4660 ReadArrElements<int64_t> into_start;
4661 ReadArrElements<int64_t> into_elem;
4662 ReadArrElements<int64_t> alloc_start;
4663 ReadArrElements<int64_t> alloc_elem;
4664
4665
2eab9666 4666 ap = static_cast<const Arr_Desc*>(vd3->ptr_array);
5f520819 4667
2eab9666 4668 // "pointers_number" for total number of transferred pointers.
5f520819
KY
4669 // For each of them we create new var_desc and put it at the bottom
4670 // of the var_desc's array
4671 get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
4672 pointers_number, ptr.ranges);
2eab9666
IV
4673 ptr.base = (m_vars[i].flags.is_pointer) ?
4674 *(reinterpret_cast<char**>(ap->base)) :
4675 reinterpret_cast<char*>(ap->base);
5f520819
KY
4676
4677 // 2. prepare memory for new var_descs
4678 m_vars_total += pointers_number;
4679 m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
4680 if (m_vars == NULL)
4681 LIBOFFLOAD_ERROR(c_malloc);
4682 m_vars_extra =
4683 (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
4684 if (m_vars_extra == NULL)
4685 LIBOFFLOAD_ERROR(c_malloc);
4686 m_in_deps =
4687 (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
4688 if (m_in_deps == NULL)
4689 LIBOFFLOAD_ERROR(c_malloc);
4690 m_out_deps =
4691 (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
4692 if (m_out_deps == NULL)
4693 LIBOFFLOAD_ERROR(c_malloc);
4694
4695 // 3. Prepare for reading new var_desc's fields
4696 // EXTENT START
4697 if ((flags & (1<<flag_extent_start_is_array)) != 0) {
2eab9666 4698 ap = static_cast<const Arr_Desc*>(vd3->extent_start);
5f520819
KY
4699 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
4700 ext_start.size, tmp_val, ext_start.ranges);
4701 ext_start.base = reinterpret_cast<char*>(ap->base);
4702 ext_start.el_size = ap->dim[ap->rank - 1].size;
4703
4704 if (tmp_val < pointers_number) {
4705 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
4706 return false;
4707 }
4708 }
4709 else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
4710 ext_start.val = (int64_t)vd3->extent_start;
4711 }
4712 else {
4713 ext_start.val = 0;
4714 }
4715
4716 // EXTENT ELEMENTS NUMBER
4717 if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
2eab9666 4718 ap = static_cast<const Arr_Desc*>(vd3->extent_elements);
5f520819
KY
4719 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
4720 ext_elements.offset, ext_elements.size,
4721 tmp_val, ext_elements.ranges);
4722 ext_elements.base = reinterpret_cast<char*>(ap->base);
4723 ext_elements.el_size = ap->dim[ap->rank - 1].size;
4724
4725 if (tmp_val < pointers_number) {
4726 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
4727 return false;
4728 }
4729 }
4730 else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
4731 ext_elements.val = (int64_t)vd3->extent_elements;
4732 }
4733 else {
4734 ext_elements.val = m_vars[i].count;
4735 }
4736
4737 // ALLOC_IF
4738 if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
2eab9666 4739 ap = static_cast<const Arr_Desc*>(vd3->alloc_if_array);
5f520819
KY
4740 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
4741 alloc_if.size, tmp_val, alloc_if.ranges);
4742 alloc_if.base = reinterpret_cast<char*>(ap->base);
4743 alloc_if.el_size = ap->dim[ap->rank - 1].size;
4744
4745 if (tmp_val < pointers_number) {
4746 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
4747 return false;
4748 }
4749 }
4750 else {
2eab9666 4751 alloc_if.val = m_vars[i].alloc_if;
5f520819
KY
4752 }
4753
4754 // FREE_IF
4755 if ((flags & (1<<flag_free_if_is_array)) != 0) {
2eab9666 4756 ap = static_cast<const Arr_Desc*>(vd3->free_if_array);
5f520819
KY
4757 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
4758 free_if.size, tmp_val, free_if.ranges);
4759 free_if.base = reinterpret_cast<char*>(ap->base);
4760 free_if.el_size = ap->dim[ap->rank - 1].size;
4761
4762 if (tmp_val < pointers_number) {
4763 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
4764 return false;
4765 }
4766 }
4767 else {
2eab9666 4768 free_if.val = m_vars[i].free_if;
5f520819
KY
4769 }
4770
4771 // ALIGN
4772
4773 if ((flags & (1<<flag_align_is_array)) != 0) {
2eab9666 4774 ap = static_cast<const Arr_Desc*>(vd3->align_array);
5f520819
KY
4775 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
4776 align.size, tmp_val, align.ranges);
4777 align.base = reinterpret_cast<char*>(ap->base);
4778 align.el_size = ap->dim[ap->rank - 1].size;
4779
4780 if (tmp_val < pointers_number) {
4781 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
4782 return false;
4783 }
4784 }
4785 else {
4786 align.val = m_vars[i].align;
4787 }
4788
4789 // 3.1 INTO
4790
4791 if (m_vars[i].into) {
2eab9666 4792 ap = static_cast<const Arr_Desc*>(m_vars[i].into);
5f520819
KY
4793 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
4794 into.size, tmp_val, into.ranges);
4795 into.base = reinterpret_cast<char*>(ap->base);
4796
4797 if (tmp_val < pointers_number) {
4798 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
4799 return false;
4800 }
4801 }
4802
4803 // 3.2 INTO_START
4804
4805 if ((flags & (1<<flag_into_start_is_array)) != 0) {
2eab9666 4806 ap = static_cast<const Arr_Desc*>(vd3->into_start);
5f520819
KY
4807 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
4808 into_start.size, tmp_val, into_start.ranges);
4809 into_start.base = reinterpret_cast<char*>(ap->base);
4810 into_start.el_size = ap->dim[ap->rank - 1].size;
4811
4812 if (tmp_val < pointers_number) {
4813 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
4814 return false;
4815 }
4816 }
4817 else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
4818 into_start.val = (int64_t)vd3->into_start;
4819 }
4820 else {
4821 into_start.val = 0;
4822 }
4823
4824 // 3.3 INTO_ELEMENTS
4825
4826 if ((flags & (1<<flag_into_elements_is_array)) != 0) {
2eab9666 4827 ap = static_cast<const Arr_Desc*>(vd3->into_elements);
5f520819
KY
4828 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
4829 into_elem.size, tmp_val, into_elem.ranges);
4830 into_elem.base = reinterpret_cast<char*>(ap->base);
4831 into_elem.el_size = ap->dim[ap->rank - 1].size;
4832
4833 if (tmp_val < pointers_number) {
4834 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
4835 return false;
4836 }
4837 }
4838 else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
4839 into_elem.val = (int64_t)vd3->into_elements;
4840 }
4841 else {
4842 into_elem.val = m_vars[i].count;
4843 }
4844
4845 // alloc_start
4846
4847 if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
2eab9666 4848 ap = static_cast<const Arr_Desc*>(vd3->alloc_start);
5f520819
KY
4849 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
4850 alloc_start.offset, alloc_start.size, tmp_val,
4851 alloc_start.ranges);
4852 alloc_start.base = reinterpret_cast<char*>(ap->base);
4853 alloc_start.el_size = ap->dim[ap->rank - 1].size;
4854
4855 if (tmp_val < pointers_number) {
4856 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
4857 return false;
4858 }
4859 }
4860 else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
4861 alloc_start.val = (int64_t)vd3->alloc_start;
4862 }
4863 else {
4864 alloc_start.val = 0;
4865 }
4866
4867 // alloc_elem
4868
4869 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
2eab9666 4870 ap = static_cast<const Arr_Desc*>(vd3->alloc_elements);
5f520819
KY
4871 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
4872 alloc_elem.size, tmp_val, alloc_elem.ranges);
4873 alloc_elem.base = reinterpret_cast<char*>(ap->base);
4874 alloc_elem.el_size = ap->dim[ap->rank - 1].size;
4875 if (tmp_val < pointers_number) {
4876 LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
4877 "alloc_extent elements");
4878 return false;
4879 }
4880 }
4881 else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
4882 alloc_elem.val = (int64_t)vd3->alloc_elements;
4883 }
4884 else {
4885 alloc_elem.val = 0;
4886 }
4887
4888 for (int k = 0; k < pointers_number; k++) {
4889 int type = flags & 0x3f;
4890 int type_src, type_dst;
4891 // Get new values
4892 // type_src, type_dst
4893 type_src = type_dst = (type == c_data_ptr_array) ?
4894 c_data_ptr : (type == c_func_ptr_array) ?
4895 c_func_ptr : (type == c_void_ptr_array) ?
4896 c_void_ptr : (type == c_string_ptr_array) ?
4897 c_string_ptr : 0;
4898
4899 // Get ptr val
4900 if (!ptr.read_next(true)) {
4901 break;
4902 }
4903 else {
4904 ptr.val = (void*)(ptr.base + ptr.offset);
4905 }
4906
4907 // !!! If we got error at phase of reading - it's an internal
4908 // !!! error, as we must detect mismatch before
4909
4910 // Get into val
4911 if (m_vars[i].into) {
4912 if (!into.read_next(true)) {
4913 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
4914 LIBOFFLOAD_ABORT;
4915 }
4916 else {
4917 into.val = (void*)(into.base + into.offset);
4918 }
4919 }
4920
4921 // Get other components of the clause
4922 if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
4923 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
4924 LIBOFFLOAD_ABORT;
4925 }
4926 if (!ext_elements.read_next(
4927 flags & (1<<flag_extent_elements_is_array))) {
4928 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
4929 LIBOFFLOAD_ABORT;
4930 }
4931 if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
4932 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
4933 LIBOFFLOAD_ABORT;
4934 }
4935 if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
4936 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
4937 LIBOFFLOAD_ABORT;
4938 }
4939 if (!align.read_next(flags & (1<<flag_align_is_array))) {
4940 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
4941 LIBOFFLOAD_ABORT;
4942 }
4943 if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
4944 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
4945 LIBOFFLOAD_ABORT;
4946 }
4947 if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
4948 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
4949 LIBOFFLOAD_ABORT;
4950 }
4951 if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
4952 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
4953 LIBOFFLOAD_ABORT;
4954 }
4955 if (!alloc_elem.read_next(
4956 flags & (1<<flag_alloc_elements_is_array))) {
4957 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
4958 LIBOFFLOAD_ABORT;
4959 }
4960
4961 m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
4962 m_vars[new_index + k].alloc_if = alloc_if.val;
4963 m_vars[new_index + k].free_if = free_if.val;
4964 m_vars[new_index + k].align = align.val;
4965 m_vars[new_index + k].mic_offset = 0;
4966 m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
4967 m_vars[new_index + k].offset = 0;
4968 m_vars[new_index + k].size = m_vars[i].size;
2eab9666
IV
4969 m_vars[new_index + k].flags.targetptr = m_vars[i].flags.targetptr;
4970 m_vars[new_index + k].flags.preallocated =
4971 m_vars[i].flags.preallocated;
5f520819
KY
4972
4973 if (ext_start.val == 0) {
4974 m_vars[new_index + k].count = ext_elements.val;
4975 m_vars[new_index + k].ptr = ptr.val;
4976 if (type_src == c_string_ptr) {
4977 m_vars[new_index + k].size = 0;
4978 }
4979 }
4980 else {
4981 m_vars[new_index + k].count = 0;
4982 m_vars[new_index + k].ptr =
4983 static_cast<void*>(make_arr_desc(
4984 ptr.val,
4985 ext_start.val,
4986 ext_elements.val,
4987 m_vars[i].size));
4988
4989 type_src = type_src == c_data_ptr ? c_cean_var_ptr :
4990 c_string_ptr ? c_cean_var_ptr :
4991 type_src;
4992 if (!m_vars[i].into) {
4993 type_dst = type_src;
4994 }
4995 }
4996
4997 if (m_vars[i].into && into_elem.val != 0) {
4998 m_vars[new_index + k].into =
4999 static_cast<void*>(make_arr_desc(
5000 into.val,
5001 into_start.val,
5002 into_elem.val,
5003 m_vars[i].size));
5004 type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
5005 (type == c_string_ptr_array) ? c_cean_var_ptr :
5006 type_src;
5007 }
5008 else {
5009 m_vars[new_index + k].into = NULL;
5010 }
5011
5012 if (alloc_elem.val != 0) {
5013 m_vars[new_index + k].alloc =
5014 static_cast<void*>(make_arr_desc(
5015 ptr.val,
5016 alloc_start.val,
5017 alloc_elem.val,
5018 m_vars[i].size));
5019 }
5020 else {
5021 m_vars[new_index + k].alloc = NULL;
5022 }
5023
5024 m_vars[new_index + k].type.src = type_src;
5025 m_vars[new_index + k].type.dst = type_dst;
5026
2eab9666 5027 m_vars_extra[new_index + k].alloc = m_vars[new_index + k].alloc;
5f520819
KY
5028 m_vars_extra[new_index + k].is_arr_ptr_el = 1;
5029 m_vars_extra[new_index + k].ptr_arr_offset =
5030 src_is_for_mic ? ptr.offset : into.offset;
5031 }
5032 // count and alloc fields are useless at target. They can be reused
5033 // for pointer arrays.
5034 m_vars[i].count = pointers_number;
5035 m_vars[i].ptr_arr_offset = new_index;
5036 return true;
5037}
5038
2eab9666
IV
5039// Gets in dependencies of the previous offload via the stream "m_stream".
5040// Out argument in_deps_amount - address of amount of the dependencies
5041// Out argument in_deps - array of dependencies.
5042// Description of the dependencies scheme for streams :
5043// ----------------------------------------------------
5044// Every offload forms DAG consisted of 3 nodes:
5045// for in-transfers, runfunction and out-transfers.
5046// Every node has in-dependencies and out-dependencies
5047// Out-dependencies of previous node forms in-dependencies of current node.
5048// In-dependencies of 1-st node (of in-transfers) without streams is equal
5049// to NULL. For streams in-dependencies of 1-st node is equal to list of out
5050// dependencies of last node of previous offload via this stream.
5051// So we can say that DAGs of 2 consequent offloads via the same stream are
5052// connected by the way described above.
5053void OffloadDescriptor::get_stream_in_dependencies(
5054 uint32_t &in_deps_amount,
5055 COIEVENT* &in_deps
5056)
5057{
5058 if (m_stream != no_stream && m_stream != 0) {
5059 Stream * stream = Stream::find_stream(m_stream, false);
5060 if (!stream) {
5061 LIBOFFLOAD_ERROR(c_offload_no_stream,
5062 m_device.get_logical_index());
5063 LIBOFFLOAD_ABORT;
5064 }
5065 OffloadDescriptor* offload = stream->get_last_offload();
5066
5067 // if it's the first offload in the stream
5068 if (!offload) {
5069 return;
5070 }
5071 // if last offload has out-tranfers
5072 if (offload->m_out_deps_total) {
5073 in_deps_amount = offload->m_out_deps_total;
5074 in_deps = offload->m_out_deps;
5075 }
5076 // last offload only sends pointer data or run function or both of them
5077 // and has no out-transfers
5078 else if (offload->m_in_deps_total) {
5079 in_deps_amount = offload->m_in_deps_total;
5080 in_deps = offload->m_in_deps;
5081 }
5082 }
5083}
5084
5f520819
KY
5085static void __offload_fini_library(void)
5086{
5087 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
5088 if (mic_engines_total > 0) {
5089 delete[] mic_engines;
0b7c37ee 5090 mic_engines_total = 0;
5f520819
KY
5091
5092 if (mic_proxy_fs_root != 0) {
5093 free(mic_proxy_fs_root);
5094 mic_proxy_fs_root = 0;
5095 }
5096
5097 if (mic_library_path != 0) {
5098 free(mic_library_path);
5099 mic_library_path = 0;
5100 }
5101
5102 // destroy thread key
5103 thread_key_delete(mic_thread_key);
5104 }
5105
5106 // unload COI library
5107 if (COI::is_available) {
5108 COI::fini();
5109 }
5110
5111 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
5112}
5113
5114static void __offload_init_library_once(void)
5115{
5116 COIRESULT res;
5117 uint32_t num_devices;
5118 std::bitset<MIC_ENGINES_MAX> devices;
5f520819
KY
5119 prefix = report_get_message_str(c_report_host);
5120
5121 // initialize trace
5122 const char *env_var = getenv(htrace_envname);
5123 if (env_var != 0 && *env_var != '\0') {
5124 int64_t new_val;
5125 if (__offload_parse_int_string(env_var, new_val)) {
5126 console_enabled = new_val & 0x0f;
5127 }
5128 }
5129
5130 env_var = getenv(offload_report_envname);
5131 if (env_var != 0 && *env_var != '\0') {
5132 int64_t env_val;
5133 if (__offload_parse_int_string(env_var, env_val)) {
5134 if (env_val == OFFLOAD_REPORT_1 ||
5135 env_val == OFFLOAD_REPORT_2 ||
5136 env_val == OFFLOAD_REPORT_3) {
5137 offload_report_level = env_val;
5138 }
5139 else {
5140 LIBOFFLOAD_ERROR(c_invalid_env_report_value,
5141 offload_report_envname);
5142 }
5143 }
5144 else {
5145 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
5146 offload_report_envname);
5147 }
5148 }
5149 else if (!offload_report_level) {
5150 env_var = getenv(timer_envname);
5151 if (env_var != 0 && *env_var != '\0') {
5152 timer_enabled = atoi(env_var);
5153 }
5154 }
5155
5156 // initialize COI
5157 if (!COI::init()) {
5158 return;
5159 }
5160
5161 // get number of devices installed in the system
2eab9666 5162 res = COI::EngineGetCount(COI_ISA_MIC, &num_devices);
5f520819
KY
5163 if (res != COI_SUCCESS) {
5164 return;
5165 }
5166
5167 if (num_devices > MIC_ENGINES_MAX) {
5168 num_devices = MIC_ENGINES_MAX;
5169 }
5170
5171 // fill in the list of devices that can be used for offloading
5172 env_var = getenv("OFFLOAD_DEVICES");
5173 if (env_var != 0) {
5174 if (strcasecmp(env_var, "none") != 0) {
5175 // value is composed of comma separated physical device indexes
5176 char *buf = strdup(env_var);
6fd2e66a
IV
5177 if (buf == NULL)
5178 LIBOFFLOAD_ERROR(c_malloc);
5f520819
KY
5179 char *str, *ptr;
5180 for (str = strtok_r(buf, ",", &ptr); str != 0;
5181 str = strtok_r(0, ",", &ptr)) {
5182 // convert string to an int
5183 int64_t num;
5184 if (!__offload_parse_int_string(str, num)) {
5185 LIBOFFLOAD_ERROR(c_mic_init5);
5186
5187 // fallback to using all installed devices
5188 devices.reset();
5189 for (int i = 0; i < num_devices; i++) {
5190 devices.set(i);
5191 }
5192 break;
5193 }
5194 if (num < 0 || num >= num_devices) {
5195 LIBOFFLOAD_ERROR(c_mic_init6, num);
5196 continue;
5197 }
5198 devices.set(num);
5199 }
5200 free(buf);
5201 }
5202 }
5203 else {
5204 // use all available devices
5205 for (int i = 0; i < num_devices; i++) {
5206 COIENGINE engine;
2eab9666 5207 res = COI::EngineGetHandle(COI_ISA_MIC, i, &engine);
5f520819
KY
5208 if (res == COI_SUCCESS) {
5209 devices.set(i);
5210 }
5211 }
5212 }
5213
5214 mic_engines_total = devices.count();
5215
5216 // no need to continue if there are no devices to offload to
5217 if (mic_engines_total <= 0) {
5218 return;
5219 }
5220
5221 // initialize indexes for available devices
5222 mic_engines = new Engine[mic_engines_total];
5223 for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
5224 if (devices[p_idx]) {
5225 mic_engines[l_idx].set_indexes(l_idx, p_idx);
5226 l_idx++;
5227 }
5228 }
5229
2eab9666
IV
5230 // Get DMA channel count to pass it to COI
5231 env_var = getenv("OFFLOAD_DMA_CHANNEL_COUNT");
5232 if (env_var != 0) {
5233 int64_t new_val;
5234 if (__offload_parse_int_string(env_var, new_val)) {
5235 mic_dma_channel_count = new_val;
5236 }
5237 else {
5238 LIBOFFLOAD_ERROR(c_invalid_env_var_value,
5239 "OFFLOAD_DMA_CHANNEL_COUNT");
5240 }
5241 }
5242
5243 // Set COI_HOST_THREAD_AFFINITY if OFFLOAD_HOST_THREAD_AFFINITY is set.
5244 // Use putenv instead of setenv as Windows has no setenv.
5245 // Note: putenv requires its argument can't be freed or modified.
5246 // So no free after call to putenv or elsewhere.
5247 env_var = getenv("OFFLOAD_HOST_THREAD_AFFINITY");
5248 if (env_var != 0) {
5249 char * new_env_var =
5250 (char*) malloc(sizeof("COI_HOST_THREAD_AFFINITY=") +
6fd2e66a
IV
5251 strlen(env_var));
5252 if (new_env_var == NULL)
5253 LIBOFFLOAD_ERROR(c_malloc);
2eab9666
IV
5254 sprintf(new_env_var, "COI_HOST_THREAD_AFFINITY=%s", env_var);
5255 putenv(new_env_var);
5256 }
5257
5f520819
KY
5258 // library search path for device binaries
5259 env_var = getenv("MIC_LD_LIBRARY_PATH");
5260 if (env_var != 0) {
5261 mic_library_path = strdup(env_var);
6fd2e66a
IV
5262 if (mic_library_path == NULL)
5263 LIBOFFLOAD_ERROR(c_malloc);
5f520819
KY
5264 }
5265
2eab9666
IV
5266
5267 // find target executable to be used if main application is not an
5268 // offload build application.
5269 const char *base_name = "offload_main";
5270 if (mic_library_path != 0) {
5271 char *buf = strdup(mic_library_path);
6fd2e66a
IV
5272 if (buf == NULL)
5273 LIBOFFLOAD_ERROR(c_malloc);
2eab9666
IV
5274 char *try_name = (char*) alloca(strlen(mic_library_path) +
5275 strlen(base_name) + 2);
5276 char *dir, *ptr;
5277
5278 for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0;
5279 dir = strtok_r(0, PATH_SEPARATOR, &ptr)) {
5280 // compose a full path
5281 sprintf(try_name, "%s/%s", dir, base_name);
5282
5283 // check if such file exists
5284 struct stat st;
5285 if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) {
5286 mic_device_main = strdup(try_name);
6fd2e66a
IV
5287 if (mic_device_main == NULL)
5288 LIBOFFLOAD_ERROR(c_malloc);
2eab9666
IV
5289 break;
5290 }
5291 }
5292
5293 free(buf);
5294 }
5295
5f520819
KY
5296 // memory size reserved for COI buffers
5297 env_var = getenv("MIC_BUFFERSIZE");
5298 if (env_var != 0) {
5299 uint64_t new_size;
5300 if (__offload_parse_size_string(env_var, new_size)) {
5301 mic_buffer_size = new_size;
5302 }
5303 else {
5304 LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
5305 }
5306 }
5307
2eab9666
IV
5308 // memory size reserved for 4K pages for COI buffers
5309 env_var = getenv("MIC_4K_BUFFER_RESERVE_SIZE");
5310 if (env_var != 0) {
5311 uint64_t new_size;
5312 if (__offload_parse_size_string(env_var, new_size)) {
5313 mic_4k_buffer_size = new_size;
5314 }
5315 else {
5316 LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_4K_BUFFER_RESERVE_SIZE");
5317 }
5318 }
5319
5320 // memory size reserved for 2M pages for COI buffers
5321 env_var = getenv("MIC_2M_BUFFER_RESERVE_SIZE");
5322 if (env_var != 0) {
5323 uint64_t new_size;
5324 if (__offload_parse_size_string(env_var, new_size)) {
5325 mic_2m_buffer_size = new_size;
5326 }
5327 else {
5328 LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_2M_BUFFER_RESERVE_SIZE");
5329 }
5330 }
5331
5f520819
KY
5332 // determine stacksize for the pipeline on the device
5333 env_var = getenv("MIC_STACKSIZE");
5334 if (env_var != 0 && *env_var != '\0') {
5335 uint64_t new_size;
5336 if (__offload_parse_size_string(env_var, new_size) &&
5337 (new_size >= 16384) && ((new_size & 4095) == 0)) {
5338 mic_stack_size = new_size;
5339 }
5340 else {
5341 LIBOFFLOAD_ERROR(c_mic_init3);
5342 }
5343 }
5344
5345 // proxy I/O
5346 env_var = getenv("MIC_PROXY_IO");
5347 if (env_var != 0 && *env_var != '\0') {
5348 int64_t new_val;
5349 if (__offload_parse_int_string(env_var, new_val)) {
5350 mic_proxy_io = new_val;
5351 }
5352 else {
5353 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
5354 }
5355 }
5356 env_var = getenv("MIC_PROXY_FS_ROOT");
5357 if (env_var != 0 && *env_var != '\0') {
5358 mic_proxy_fs_root = strdup(env_var);
6fd2e66a
IV
5359 if (mic_proxy_fs_root == NULL)
5360 LIBOFFLOAD_ERROR(c_malloc);
5f520819
KY
5361 }
5362
5363 // Prepare environment for the target process using the following
5364 // rules
5365 // - If MIC_ENV_PREFIX is set then any environment variable on the
5366 // host which has that prefix are copied to the device without
5367 // the prefix.
5368 // All other host environment variables are ignored.
5369 // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
5370 // environment is duplicated.
5371 env_var = getenv("MIC_ENV_PREFIX");
5372 if (env_var != 0 && *env_var != '\0') {
5373 mic_env_vars.set_prefix(env_var);
5374
5375 int len = strlen(env_var);
5376 for (int i = 0; environ[i] != 0; i++) {
5377 if (strncmp(environ[i], env_var, len) == 0 &&
5378 strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
5379 environ[i][len] != '=') {
5380 mic_env_vars.analyze_env_var(environ[i]);
5381 }
5382 }
5383 }
5384
5385 // create key for thread data
5386 if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
5387 LIBOFFLOAD_ERROR(c_mic_init4, errno);
5388 return;
5389 }
5390
5391 // cpu frequency
5392 cpu_frequency = COI::PerfGetCycleFrequency();
5393
5394 env_var = getenv(mic_use_2mb_buffers_envname);
5395 if (env_var != 0 && *env_var != '\0') {
5396 uint64_t new_size;
5397 if (__offload_parse_size_string(env_var, new_size)) {
5398 __offload_use_2mb_buffers = new_size;
5399 }
5400 else {
5401 LIBOFFLOAD_ERROR(c_invalid_env_var_value,
5402 mic_use_2mb_buffers_envname);
5403 }
5404 }
5405
5406 env_var = getenv(mic_use_async_buffer_write_envname);
5407 if (env_var != 0 && *env_var != '\0') {
5408 uint64_t new_size;
5409 if (__offload_parse_size_string(env_var, new_size)) {
5410 __offload_use_async_buffer_write = new_size;
5411 }
5412 }
5413
5414 env_var = getenv(mic_use_async_buffer_read_envname);
5415 if (env_var != 0 && *env_var != '\0') {
5416 uint64_t new_size;
5417 if (__offload_parse_size_string(env_var, new_size)) {
5418 __offload_use_async_buffer_read = new_size;
5419 }
5420 }
5421
5422 // mic initialization type
5423 env_var = getenv(offload_init_envname);
5424 if (env_var != 0 && *env_var != '\0') {
5425 if (strcmp(env_var, "on_offload") == 0) {
5426 __offload_init_type = c_init_on_offload;
5427 }
5428 else if (strcmp(env_var, "on_offload_all") == 0) {
5429 __offload_init_type = c_init_on_offload_all;
5430 }
5f520819
KY
5431 else if (strcmp(env_var, "on_start") == 0) {
5432 __offload_init_type = c_init_on_start;
5433 }
5f520819
KY
5434 else {
5435 LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
5436 }
5437 }
5438
5439 // active wait
5440 env_var = getenv(offload_active_wait_envname);
5441 if (env_var != 0 && *env_var != '\0') {
5442 int64_t new_val;
5443 if (__offload_parse_int_string(env_var, new_val)) {
5444 __offload_active_wait = new_val;
5445 }
5446 else {
5447 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
5448 offload_active_wait_envname);
5449 }
5450 }
5451
5452 // omp device num
5453 env_var = getenv(omp_device_num_envname);
5454 if (env_var != 0 && *env_var != '\0') {
5455 int64_t new_val;
5456 if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
5457 __omp_device_num = new_val;
5458 }
5459 else {
5460 LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
5461 omp_device_num_envname);
5462 }
5463 }
5464
2eab9666
IV
5465 // parallel copy of offload_transfer
5466 env_var = getenv(parallel_copy_envname);
5467 if (env_var != 0 && *env_var != '\0') {
5468 int64_t new_val;
5469 if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
5470 __offload_parallel_copy = new_val;
5471 }
5472 else {
5473 LIBOFFLOAD_ERROR(c_invalid_env_var_value,
5474 parallel_copy_envname);
5475 }
5476 }
5477
5478 // use COI interface for noncontiguous arrays transfer
5479 env_var = getenv(use_coi_noncontiguous_transfer_envname);
5480 if (env_var != 0 && *env_var != '\0') {
5481 uint64_t new_size;
5482 if (__offload_parse_size_string(env_var, new_size)) {
5483 __offload_use_coi_noncontiguous_transfer = new_size;
5484 }
5485 else {
5486 LIBOFFLOAD_ERROR(c_invalid_env_var_value,
5487 use_coi_noncontiguous_transfer_envname);
5488 }
5489 }
5490
5f520819
KY
5491 // init ORSL
5492 ORSL::init();
5493}
5494
5495extern int __offload_init_library(void)
5496{
5497 // do one time intialization
5498 static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
5499 __offload_run_once(&ctrl, __offload_init_library_once);
5500
5501 // offload is available if COI is available and the number of devices > 0
5502 bool is_available = COI::is_available && (mic_engines_total > 0);
5503
5504 // register pending libraries if there are any
5505 if (is_available && __target_libs) {
5506 mutex_locker_t locker(__target_libs_lock);
5507
5508 for (TargetImageList::iterator it = __target_libs_list.begin();
5509 it != __target_libs_list.end(); it++) {
5510 // Register library in COI
5511 COI::ProcessRegisterLibraries(1, &it->data, &it->size,
5512 &it->origin, &it->offset);
5513
5514 // add lib to all engines
5515 for (int i = 0; i < mic_engines_total; i++) {
5516 mic_engines[i].add_lib(*it);
5517 }
5518 }
5519
5520 __target_libs = false;
5521 __target_libs_list.clear();
5522 }
5523
5524 return is_available;
5525}
5526
2eab9666
IV
5527extern "C" bool __offload_target_image_is_executable(const void *target_image)
5528{
5529 const struct Image *image = static_cast<const struct Image*>(target_image);
5530
5531 // decode image
5532 const char *name = image->data;
5533 const void *data = image->data + strlen(image->data) + 1;
5534
5535 // determine image type
5536 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
5537 return (hdr->e_type == ET_EXEC);
5538}
5539
5540extern "C" bool __offload_register_image(const void *target_image)
5f520819
KY
5541{
5542 const struct Image *image = static_cast<const struct Image*>(target_image);
5543
5544 // decode image
5545 const char *name = image->data;
5546 const void *data = image->data + strlen(image->data) + 1;
5547 uint64_t size = image->size;
2eab9666 5548 char *origin = (char *) malloc(strlen(image->data) + 1);
5f520819 5549 uint64_t offset = 0;
2eab9666
IV
5550 const char *host_name = image->data;
5551 int i;
5552
5553 if (origin == NULL)
5554 LIBOFFLOAD_ERROR(c_malloc);
5555
5556 // The origin name is the name of the file on the host
5557 // this is used by Vtune, since it is a fat binary we
5558 // use the host file name of the fat binary.
5559 // Driver prepends the host file name ending with "?"
5560 // to the image->data name so need to extract the string
5561 i = 0;
5562 while (*host_name != '\0' && *host_name != '?') {
5563 origin[i] = *host_name;
5564 host_name++;
5565 i++;
5566 }
5567 origin[i] = '\0';
5568 // Implies the host name does not exist which really should
5569 // not occur. Allow this since only consumer is Vtune.
5570 if ((i == 0) || (*host_name != '?')) {
5571 free(origin);
5572 origin = 0;
5573 }
5f520819
KY
5574
5575 // our actions depend on the image type
5576 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
5577 switch (hdr->e_type) {
5578 case ET_EXEC:
5579 // Each offload application is supposed to have only one target
5580 // image representing target executable.
5581 // No thread synchronization is required here as the initialization
5582 // code is always executed in a single thread.
5583 if (__target_exe != 0) {
5584 LIBOFFLOAD_ERROR(c_multiple_target_exes);
5585 exit(1);
5586 }
5587 __target_exe = new TargetImage(name, data, size, origin, offset);
5588
5589 // Registration code for execs is always called from the context
5590 // of main and thus we can safely call any function here,
5591 // including LoadLibrary API on windows. This is the place where
5592 // we do the offload library initialization.
5593 if (__offload_init_library()) {
5594 // initialize engine if init_type is on_start
5595 if (__offload_init_type == c_init_on_start) {
5596 for (int i = 0; i < mic_engines_total; i++) {
5597 mic_engines[i].init();
5598 }
5599 }
5600 }
2eab9666 5601 return mic_engines_total > 0;
5f520819
KY
5602
5603 case ET_DYN:
2eab9666
IV
5604 {
5605 char *fullname = origin;
5606 // We add the library to a list of pending libraries
5607 __target_libs_lock.lock();
5608 __target_libs = true;
5609 __target_libs_list.push_back(
5610 TargetImage(name, data, size, fullname, offset));
5611 __target_libs_lock.unlock();
5612 // If __target_exe is set, then main has started running
5613 // If not main, then we can't do anything useful here
5614 // because this registration code is called from DllMain
5615 // context (on windows).
5616 if (__target_exe != 0) {
5617 // There is no need to delay loading the library
5618 if (!__offload_init_library()) {
5619 // Couldn't validate library as a fat offload library
5620 LIBOFFLOAD_ERROR(c_unknown_binary_type);
5621 exit(1);
5622 }
5623 }
5624 return true;
5625 }
5f520819
KY
5626
5627 default:
5628 // something is definitely wrong, issue an error and exit
5629 LIBOFFLOAD_ERROR(c_unknown_binary_type);
5630 exit(1);
5631 }
5632}
5633
5634extern "C" void __offload_unregister_image(const void *target_image)
5635{
5636 // Target image is packed as follows:
5637 // 8 bytes - size of the target binary
5638 // null-terminated string - binary name
5639 // <size> bytes - binary contents
5640 const struct Image {
5641 int64_t size;
5642 char data[];
5643 } *image = static_cast<const struct Image*>(target_image);
5644
5645 // decode image
5646 const char *name = image->data;
5647 const void *data = image->data + strlen(image->data) + 1;
5648
5649 // our actions depend on the image type
5650 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
5651 if (hdr->e_type == ET_EXEC) {
5652 // We are executing exec's desctructors.
5653 // It is time to do a library cleanup.
5654 if (timer_enabled) {
5655 Offload_Timer_Print();
5656 }
5657
5658#ifdef MYO_SUPPORT
5659 __offload_myoFini();
5660#endif // MYO_SUPPORT
5661
5662 __offload_fini_library();
5663 }
2eab9666
IV
5664 else if (hdr->e_type == ET_DYN) {
5665 for (int i = 0; i < mic_engines_total; i++) {
5666 mic_engines[i].unload_library(data, name);
5667 }
5668
5669 }
5f520819
KY
5670}
5671
5672// Runtime trace interface for user programs
5673
5674void __offload_console_trace(int level)
5675{
5676 console_enabled = level;
5677}
5678
5679// User-visible offload API
5680
5681int _Offload_number_of_devices(void)
5682{
5683 __offload_init_library();
5684 return mic_engines_total;
5685}
5686
5687int _Offload_get_device_number(void)
5688{
5689 return -1;
5690}
5691
5692int _Offload_get_physical_device_number(void)
5693{
5694 return -1;
5695}
5696
5697int _Offload_signaled(int index, void *signal)
5698{
5699 __offload_init_library();
5700
5701 // check index value
2eab9666 5702 if (index < 0) {
5f520819
KY
5703 LIBOFFLOAD_ERROR(c_offload_signaled1, index);
5704 LIBOFFLOAD_ABORT;
5705 }
5706
2eab9666
IV
5707 index %= mic_engines_total;
5708
5f520819
KY
5709 // find associated async task
5710 OffloadDescriptor *task =
2eab9666 5711 mic_engines[index].find_signal(signal, false);
5f520819
KY
5712 if (task == 0) {
5713 LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
5714 LIBOFFLOAD_ABORT;
5715 }
2eab9666
IV
5716 // if signal is removed by wait completing
5717 else if (task == SIGNAL_IS_REMOVED) {
5718 return (true);
5719 }
5f520819
KY
5720 return task->is_signaled();
5721}
5722
5723void _Offload_report(int val)
5724{
5725 if (val == OFFLOAD_REPORT_ON ||
5726 val == OFFLOAD_REPORT_OFF) {
5727 offload_report_enabled = val;
5728 }
5729}
5730
2eab9666
IV
5731int _Offload_find_associated_mic_memory(
5732 int target,
5733 const void* cpu_addr,
5734 void** cpu_base_addr,
5735 uint64_t* buf_length,
5736 void** mic_addr,
5737 uint64_t* mic_buf_start_offset,
5738 int* is_static
5739)
5740{
5741 __offload_init_library();
5742
5743 // check target value
5744 if (target < 0) {
5745 LIBOFFLOAD_ERROR(c_offload_signaled1, target);
5746 LIBOFFLOAD_ABORT;
5747 }
5748 target %= mic_engines_total;
5749
5750 // find existing association in pointer table
5751 PtrData* ptr_data = mic_engines[target].find_ptr_data(cpu_addr);
5752 if (ptr_data == 0) {
5753 OFFLOAD_TRACE(3, "Association does not exist\n");
5754 return 0;
5755 }
5756
5757 OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
5758 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
5759 ptr_data->is_static);
5760
5761 if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
5762 COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
5763 &ptr_data->mic_addr);
5764 if (res != COI_SUCCESS) {
5765 return 0;
5766 }
5767 }
5768 *cpu_base_addr = const_cast<void *>(ptr_data->cpu_addr.start());
5769 *buf_length = ptr_data->cpu_addr.length() - ptr_data->alloc_disp;
5770 *mic_addr = (void *)(ptr_data->mic_addr + ptr_data->mic_offset);
5771 *mic_buf_start_offset = ptr_data->alloc_disp;
5772 *is_static = ptr_data->is_static;
5773 return ptr_data->is_static ? 1 : ptr_data->get_reference();
5774}
5775
5776_Offload_stream _Offload_stream_create(
5777 int device, // MIC device number
5778 int number_of_cpus // Cores allocated to the stream
5779 )
5780{
5781 __offload_init_library();
5782
5783 // check target value
5784 if (device < 0) {
5785 LIBOFFLOAD_ERROR(c_offload_signaled1, device);
5786 LIBOFFLOAD_ABORT;
5787 }
5788 device %= mic_engines_total;
5789
5790 // Create new stream and get its handle
5791 _Offload_stream handle = Stream::add_stream(device, number_of_cpus);
5792 if (handle == 0) {
5793 OFFLOAD_TRACE(3, "Can't create stream\n");
5794 return 0;
5795 }
5796
5797 // create pipeline associated with the new stream
5798 mic_engines[device].get_pipeline(handle);
5799
5800 return(handle);
5801}
5802
5803int _Offload_stream_destroy(
5804 int device, // MIC device number
5805 _Offload_stream handle // stream to destroy
5806 )
5807{
5808 __offload_init_library();
5809
5810 // check target value
5811 if (device < 0) {
5812 LIBOFFLOAD_ERROR(c_offload_signaled1, device);
5813 LIBOFFLOAD_ABORT;
5814 }
5815 device %= mic_engines_total;
5816
5817 mic_engines[device].stream_destroy(handle);
5818
5819 return(true);
5820}
5821
5822int _Offload_stream_completed(int device, _Offload_stream handler)
5823{
5824 __offload_init_library();
5825
5826 // check index value
5827 if (device < 0) {
5828 LIBOFFLOAD_ERROR(c_offload_signaled1, device);
5829 LIBOFFLOAD_ABORT;
5830 }
5831
5832 device %= mic_engines_total;
5833
5834 // get stream
5835 Stream * stream;
5836
5837 if (handler != 0) {
5838 stream = Stream::find_stream(handler, false);
5839
5840 // the stream was not created or was destroyed
5841 if (!stream) {
5842 LIBOFFLOAD_ERROR(c_offload_no_stream, device);
5843 LIBOFFLOAD_ABORT;
5844 }
5845
5846 // find associated async task
5847 OffloadDescriptor *task = stream->get_last_offload();
5848
5849 // offload was completed by offload_wait pragma or wait clause
5850 if (task == 0) {
5851 return(true);
5852 }
5853 return task->is_signaled();
5854 }
5855 // zero handler is for all streams at the device
5856 else {
5857 StreamMap stream_map = Stream::all_streams;
5858 for (StreamMap::iterator it = stream_map.begin();
5859 it != stream_map.end(); it++) {
5860 Stream * stream = it->second;
5861 // find associated async task
5862 OffloadDescriptor *task = stream->get_last_offload();
5863
5864 // offload was completed by offload_wait pragma or wait clause
5865 if (task == 0) {
5866 return(true);
5867 }
5868 // if even one stream is not completed result is false
5869 if (!task->is_signaled()) {
5870 return false;
5871 }
5872 }
5873 // no uncompleted streams
5874 return true;
5875 }
5876}
5877
5f520819
KY
5878// IDB support
5879int __dbg_is_attached = 0;
5880int __dbg_target_id = -1;
5881pid_t __dbg_target_so_pid = -1;
5882char __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
5883const int __dbg_api_major_version = 1;
5884const int __dbg_api_minor_version = 0;
5885
5886void __dbg_target_so_loaded()
5887{
5888}
5889void __dbg_target_so_unloaded()
5890{
5891}