]> git.ipfire.org Git - thirdparty/gcc.git/blob - liboffloadmic/runtime/offload_engine.h
backport: Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host and...
[thirdparty/gcc.git] / liboffloadmic / runtime / offload_engine.h
1 /*
2 Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31 #ifndef OFFLOAD_ENGINE_H_INCLUDED
32 #define OFFLOAD_ENGINE_H_INCLUDED
33
34 #include <limits.h>
35 #include <bitset>
36 #include <list>
37 #include <set>
38 #include <map>
39 #include "offload_common.h"
40 #include "coi/coi_client.h"
41
42 #define SIGNAL_IS_REMOVED ((OffloadDescriptor *)-1)
43 const int64_t no_stream = -1;
44
45 // Address range
46 class MemRange {
47 public:
48 MemRange() : m_start(0), m_length(0) {}
49 MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
50
51 const void* start() const {
52 return m_start;
53 }
54
55 const void* end() const {
56 return static_cast<const char*>(m_start) + m_length;
57 }
58
59 uint64_t length() const {
60 return m_length;
61 }
62
63 // returns true if given range overlaps with another one
64 bool overlaps(const MemRange &o) const {
65 // Two address ranges A[start, end) and B[start,end) overlap
66 // if A.start < B.end and A.end > B.start.
67 return start() < o.end() && end() > o.start();
68 }
69
70 // returns true if given range contains the other range
71 bool contains(const MemRange &o) const {
72 return start() <= o.start() && o.end() <= end();
73 }
74
75 private:
76 const void* m_start;
77 uint64_t m_length;
78 };
79
80 // Data associated with a pointer variable
81 class PtrData {
82 public:
83 PtrData(const void *addr, uint64_t len) :
84 cpu_addr(addr, len), cpu_buf(0),
85 mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
86 ref_count(0), is_static(false)
87 {}
88
89 //
90 // Copy constructor
91 //
92 PtrData(const PtrData& ptr):
93 cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
94 mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
95 mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
96 ref_count(ptr.ref_count), is_static(ptr.is_static)
97 {}
98
99 bool operator<(const PtrData &o) const {
100 // Variables are sorted by the CPU start address.
101 // Overlapping memory ranges are considered equal.
102 return (cpu_addr.start() < o.cpu_addr.start()) &&
103 !cpu_addr.overlaps(o.cpu_addr);
104 }
105
106 long add_reference() {
107 if (is_static) {
108 return LONG_MAX;
109 }
110 #ifndef TARGET_WINNT
111 return __sync_fetch_and_add(&ref_count, 1);
112 #else // TARGET_WINNT
113 return _InterlockedIncrement(&ref_count) - 1;
114 #endif // TARGET_WINNT
115 }
116
117 long remove_reference() {
118 if (is_static) {
119 return LONG_MAX;
120 }
121 #ifndef TARGET_WINNT
122 return __sync_sub_and_fetch(&ref_count, 1);
123 #else // TARGET_WINNT
124 return _InterlockedDecrement(&ref_count);
125 #endif // TARGET_WINNT
126 }
127
128 long get_reference() const {
129 if (is_static) {
130 return LONG_MAX;
131 }
132 return ref_count;
133 }
134
135 public:
136 // CPU address range
137 const MemRange cpu_addr;
138
139 // CPU and MIC buffers
140 COIBUFFER cpu_buf;
141 COIBUFFER mic_buf;
142
143 // placeholder for buffer address on mic
144 uint64_t mic_addr;
145
146 uint64_t alloc_disp;
147
148 // additional offset to pointer data on MIC for improving bandwidth for
149 // data which is not 4K aligned
150 uint32_t mic_offset;
151
152 // if true buffers are created from static memory
153 bool is_static;
154 mutex_t alloc_ptr_data_lock;
155
156 private:
157 // reference count for the entry
158 long ref_count;
159 };
160
161 typedef std::list<PtrData*> PtrDataList;
162
163 class PtrDataTable {
164 public:
165 typedef std::set<PtrData> PtrSet;
166
167 PtrData* find_ptr_data(const void *ptr) {
168 m_ptr_lock.lock();
169 PtrSet::iterator res = list.find(PtrData(ptr, 0));
170
171 m_ptr_lock.unlock();
172 if (res == list.end()) {
173 return 0;
174 }
175 return const_cast<PtrData*>(res.operator->());
176 }
177
178 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
179 m_ptr_lock.lock();
180 std::pair<PtrSet::iterator, bool> res =
181 list.insert(PtrData(ptr, len));
182
183 PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
184 m_ptr_lock.unlock();
185
186 is_new = res.second;
187 if (is_new) {
188 // It's necessary to lock as soon as possible.
189 // unlock must be done at call site of insert_ptr_data at
190 // branch for is_new
191 ptr_data->alloc_ptr_data_lock.lock();
192 }
193 return ptr_data;
194 }
195
196 void remove_ptr_data(const void *ptr) {
197 m_ptr_lock.lock();
198 list.erase(PtrData(ptr, 0));
199 m_ptr_lock.unlock();
200 }
201 private:
202
203 PtrSet list;
204 mutex_t m_ptr_lock;
205 };
206
207 // Data associated with automatic variable
208 class AutoData {
209 public:
210 AutoData(const void *addr, uint64_t len) :
211 cpu_addr(addr, len), ref_count(0)
212 {}
213
214 bool operator<(const AutoData &o) const {
215 // Variables are sorted by the CPU start address.
216 // Overlapping memory ranges are considered equal.
217 return (cpu_addr.start() < o.cpu_addr.start()) &&
218 !cpu_addr.overlaps(o.cpu_addr);
219 }
220
221 long add_reference() {
222 #ifndef TARGET_WINNT
223 return __sync_fetch_and_add(&ref_count, 1);
224 #else // TARGET_WINNT
225 return _InterlockedIncrement(&ref_count) - 1;
226 #endif // TARGET_WINNT
227 }
228
229 long remove_reference() {
230 #ifndef TARGET_WINNT
231 return __sync_sub_and_fetch(&ref_count, 1);
232 #else // TARGET_WINNT
233 return _InterlockedDecrement(&ref_count);
234 #endif // TARGET_WINNT
235 }
236
237 long nullify_reference() {
238 #ifndef TARGET_WINNT
239 return __sync_lock_test_and_set(&ref_count, 0);
240 #else // TARGET_WINNT
241 return _InterlockedExchange(&ref_count,0);
242 #endif // TARGET_WINNT
243 }
244
245 long get_reference() const {
246 return ref_count;
247 }
248
249 public:
250 // CPU address range
251 const MemRange cpu_addr;
252
253 private:
254 // reference count for the entry
255 long ref_count;
256 };
257
258 // Set of autimatic variables
259 typedef std::set<AutoData> AutoSet;
260
261 // Target image data
262 struct TargetImage
263 {
264 TargetImage(const char *_name, const void *_data, uint64_t _size,
265 const char *_origin, uint64_t _offset) :
266 name(_name), data(_data), size(_size),
267 origin(_origin), offset(_offset)
268 {}
269
270 // library name
271 const char* name;
272
273 // contents and size
274 const void* data;
275 uint64_t size;
276
277 // file of origin and offset within that file
278 const char* origin;
279 uint64_t offset;
280 };
281
282 typedef std::list<TargetImage> TargetImageList;
283
284 // dynamic library and Image associated with lib
285 struct DynLib
286 {
287 DynLib(const char *_name, const void *_data,
288 COILIBRARY _lib) :
289 name(_name), data(_data), lib(_lib)
290 {}
291 // library name
292 const char* name;
293
294 // contents
295 const void* data;
296
297 COILIBRARY lib;
298 };
299 typedef std::list<DynLib> DynLibList;
300
301 // Data associated with persistent auto objects
302 struct PersistData
303 {
304 PersistData(const void *addr, uint64_t routine_num,
305 uint64_t size, uint64_t thread) :
306 stack_cpu_addr(addr), routine_id(routine_num), thread_id(thread)
307 {
308 stack_ptr_data = new PtrData(0, size);
309 }
310 // 1-st key value - beginning of the stack at CPU
311 const void * stack_cpu_addr;
312 // 2-nd key value - identifier of routine invocation at CPU
313 uint64_t routine_id;
314 // 3-rd key value - thread identifier
315 uint64_t thread_id;
316
317 // corresponded PtrData; only stack_ptr_data->mic_buf is used
318 PtrData * stack_ptr_data;
319 // used to get offset of the variable in stack buffer
320 char * cpu_stack_addr;
321 };
322
323 typedef std::list<PersistData> PersistDataList;
324
325 // Data associated with stream
326 struct Stream
327 {
328 Stream(int device, int num_of_cpus) :
329 m_number_of_cpus(num_of_cpus), m_pipeline(0), m_last_offload(0),
330 m_device(device)
331 {}
332 ~Stream() {
333 if (m_pipeline) {
334 COI::PipelineDestroy(m_pipeline);
335 }
336 }
337
338 COIPIPELINE get_pipeline(void) {
339 return(m_pipeline);
340 }
341
342 int get_device(void) {
343 return(m_device);
344 }
345
346 int get_cpu_number(void) {
347 return(m_number_of_cpus);
348 }
349
350 void set_pipeline(COIPIPELINE pipeline) {
351 m_pipeline = pipeline;
352 }
353
354 OffloadDescriptor* get_last_offload(void) {
355 return(m_last_offload);
356 }
357
358 void set_last_offload(OffloadDescriptor* last_offload) {
359 m_last_offload = last_offload;
360 }
361
362 static Stream* find_stream(uint64_t handle, bool remove);
363
364 static _Offload_stream add_stream(int device, int number_of_cpus) {
365 m_stream_lock.lock();
366 all_streams[++m_streams_count] = new Stream(device, number_of_cpus);
367 m_stream_lock.unlock();
368 return(m_streams_count);
369 }
370
371 typedef std::map<uint64_t, Stream*> StreamMap;
372
373 static uint64_t m_streams_count;
374 static StreamMap all_streams;
375 static mutex_t m_stream_lock;
376
377 int m_device;
378
379 // number of cpus
380 int m_number_of_cpus;
381
382 // The pipeline associated with the stream
383 COIPIPELINE m_pipeline;
384
385 // The last offload occured via the stream
386 OffloadDescriptor* m_last_offload;
387
388 // Cpus used by the stream
389 std::bitset<COI_MAX_HW_THREADS> m_stream_cpus;
390 };
391
392 typedef std::map<uint64_t, Stream*> StreamMap;
393
394 // class representing a single engine
395 struct Engine {
396 friend void __offload_init_library_once(void);
397 friend void __offload_fini_library(void);
398
399 #define check_result(res, tag, ...) \
400 { \
401 if (res == COI_PROCESS_DIED) { \
402 fini_process(true); \
403 exit(1); \
404 } \
405 if (res != COI_SUCCESS) { \
406 __liboffload_error_support(tag, __VA_ARGS__); \
407 exit(1); \
408 } \
409 }
410
411 int get_logical_index() const {
412 return m_index;
413 }
414
415 int get_physical_index() const {
416 return m_physical_index;
417 }
418
419 const COIPROCESS& get_process() const {
420 return m_process;
421 }
422
423 uint64_t get_thread_id(void);
424
425 // initialize device
426 void init(void);
427
428 // unload library
429 void unload_library(const void *data, const char *name);
430
431 // add new library
432 void add_lib(const TargetImage &lib)
433 {
434 m_lock.lock();
435 m_ready = false;
436 m_images.push_back(lib);
437 m_lock.unlock();
438 }
439
440 COIRESULT compute(
441 _Offload_stream stream,
442 const std::list<COIBUFFER> &buffers,
443 const void* data,
444 uint16_t data_size,
445 void* ret,
446 uint16_t ret_size,
447 uint32_t num_deps,
448 const COIEVENT* deps,
449 COIEVENT* event
450 );
451
452 #ifdef MYO_SUPPORT
453 // temporary workaround for blocking behavior for myoiLibInit/Fini calls
454 void init_myo(COIEVENT *event) {
455 COIRESULT res;
456 res = COI::PipelineRunFunction(get_pipeline(),
457 m_funcs[c_func_myo_init],
458 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 event);
460 check_result(res, c_pipeline_run_func, m_index, res);
461 }
462
463 void fini_myo(COIEVENT *event) {
464 COIRESULT res;
465 res = COI::PipelineRunFunction(get_pipeline(),
466 m_funcs[c_func_myo_fini],
467 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 event);
469 check_result(res, c_pipeline_run_func, m_index, res);
470 }
471 #endif // MYO_SUPPORT
472
473 //
474 // Memory association table
475 //
476 PtrData* find_ptr_data(const void *ptr) {
477 return m_ptr_set.find_ptr_data(ptr);
478 }
479
480 PtrData* find_targetptr_data(const void *ptr) {
481 return m_targetptr_set.find_ptr_data(ptr);
482 }
483
484 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
485 return m_ptr_set.insert_ptr_data(ptr, len, is_new);
486 }
487
488 PtrData* insert_targetptr_data(const void *ptr, uint64_t len,
489 bool &is_new) {
490 return m_targetptr_set.insert_ptr_data(ptr, len, is_new);
491 }
492
493 void remove_ptr_data(const void *ptr) {
494 m_ptr_set.remove_ptr_data(ptr);
495 }
496
497 void remove_targetptr_data(const void *ptr) {
498 m_targetptr_set.remove_ptr_data(ptr);
499 }
500
501 //
502 // Automatic variables
503 //
504 AutoData* find_auto_data(const void *ptr) {
505 AutoSet &auto_vars = get_auto_vars();
506 AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
507 if (res == auto_vars.end()) {
508 return 0;
509 }
510 return const_cast<AutoData*>(res.operator->());
511 }
512
513 AutoData* insert_auto_data(const void *ptr, uint64_t len) {
514 AutoSet &auto_vars = get_auto_vars();
515 std::pair<AutoSet::iterator, bool> res =
516 auto_vars.insert(AutoData(ptr, len));
517 return const_cast<AutoData*>(res.first.operator->());
518 }
519
520 void remove_auto_data(const void *ptr) {
521 get_auto_vars().erase(AutoData(ptr, 0));
522 }
523
524 //
525 // Signals
526 //
527 void add_signal(const void *signal, OffloadDescriptor *desc) {
528 m_signal_lock.lock();
529 m_signal_map[signal] = desc;
530 m_signal_lock.unlock();
531 }
532
533 OffloadDescriptor* find_signal(const void *signal, bool remove) {
534 OffloadDescriptor *desc = 0;
535
536 m_signal_lock.lock();
537 {
538 SignalMap::iterator it = m_signal_map.find(signal);
539 if (it != m_signal_map.end()) {
540 desc = it->second;
541 if (remove) {
542 it->second = SIGNAL_IS_REMOVED;
543 }
544 }
545 }
546 m_signal_lock.unlock();
547
548 return desc;
549 }
550
551 void stream_destroy(_Offload_stream handle);
552
553 COIPIPELINE get_pipeline(_Offload_stream stream);
554
555 StreamMap get_stream_map() {
556 return m_stream_map;
557 }
558
559 // stop device process
560 void fini_process(bool verbose);
561
562 // list of stacks active at the engine
563 PersistDataList m_persist_list;
564
565 private:
566 Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
567 m_proc_number(0)
568 {}
569
570 ~Engine() {
571 for (StreamMap::iterator it = m_stream_map.begin();
572 it != m_stream_map.end(); it++) {
573 Stream * stream = it->second;
574 delete stream;
575 }
576 if (m_process != 0) {
577 fini_process(false);
578 }
579 }
580
581 // set indexes
582 void set_indexes(int logical_index, int physical_index) {
583 m_index = logical_index;
584 m_physical_index = physical_index;
585 }
586
587 // start process on device
588 void init_process();
589
590 void load_libraries(void);
591 void init_ptr_data(void);
592
593 // performs library intialization on the device side
594 pid_t init_device(void);
595
596 private:
597 // get pipeline associated with a calling thread
598 COIPIPELINE get_pipeline(void);
599
600 // get automatic vars set associated with the calling thread
601 AutoSet& get_auto_vars(void);
602
603 // destructor for thread data
604 static void destroy_thread_data(void *data);
605
606 private:
607 typedef std::set<PtrData> PtrSet;
608 typedef std::map<const void*, OffloadDescriptor*> SignalMap;
609
610 // device indexes
611 int m_index;
612 int m_physical_index;
613
614 // number of COI pipes created for the engine
615 long m_proc_number;
616
617 // process handle
618 COIPROCESS m_process;
619
620 // If false, device either has not been initialized or new libraries
621 // have been added.
622 bool m_ready;
623 mutex_t m_lock;
624
625 // List of libraries to be loaded
626 TargetImageList m_images;
627
628 // var tables
629 PtrDataTable m_ptr_set;
630 PtrDataTable m_targetptr_set;
631
632 // signals
633 SignalMap m_signal_map;
634 mutex_t m_signal_lock;
635
636 // streams
637 StreamMap m_stream_map;
638 mutex_t m_stream_lock;
639 int m_num_cores;
640 int m_num_threads;
641 std::bitset<COI_MAX_HW_THREADS> m_cpus;
642
643 // List of dynamic libraries to be registred
644 DynLibList m_dyn_libs;
645
646 // constants for accessing device function handles
647 enum {
648 c_func_compute = 0,
649 #ifdef MYO_SUPPORT
650 c_func_myo_init,
651 c_func_myo_fini,
652 #endif // MYO_SUPPORT
653 c_func_init,
654 c_func_var_table_size,
655 c_func_var_table_copy,
656 c_func_set_stream_affinity,
657 c_funcs_total
658 };
659 static const char* m_func_names[c_funcs_total];
660
661 // device function handles
662 COIFUNCTION m_funcs[c_funcs_total];
663
664 // int -> name mapping for device signals
665 static const int c_signal_max = 32;
666 static const char* c_signal_names[c_signal_max];
667 };
668
669 #endif // OFFLOAD_ENGINE_H_INCLUDED