]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/hsa-brig.c
Move MEMMODEL_* from coretypes.h to memmodel.h
[thirdparty/gcc.git] / gcc / hsa-brig.c
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "cfg.h"
38 #include "function.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "gimple-pretty-print.h"
42 #include "diagnostic-core.h"
43 #include "cgraph.h"
44 #include "dumpfile.h"
45 #include "print-tree.h"
46 #include "symbol-summary.h"
47 #include "hsa.h"
48 #include "gomp-constants.h"
49
50 /* Convert VAL to little endian form, if necessary. */
51
52 static uint16_t
53 lendian16 (uint16_t val)
54 {
55 #if GCC_VERSION >= 4008
56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
57 return val;
58 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
59 return __builtin_bswap16 (val);
60 #else /* __ORDER_PDP_ENDIAN__ */
61 return val;
62 #endif
63 #else
64 // provide a safe slower default, with shifts and masking
65 #ifndef WORDS_BIGENDIAN
66 return val;
67 #else
68 return (val >> 8) | (val << 8);
69 #endif
70 #endif
71 }
72
73 /* Convert VAL to little endian form, if necessary. */
74
75 static uint32_t
76 lendian32 (uint32_t val)
77 {
78 #if GCC_VERSION >= 4006
79 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
80 return val;
81 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
82 return __builtin_bswap32 (val);
83 #else /* __ORDER_PDP_ENDIAN__ */
84 return (val >> 16) | (val << 16);
85 #endif
86 #else
87 // provide a safe slower default, with shifts and masking
88 #ifndef WORDS_BIGENDIAN
89 return val;
90 #else
91 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
92 return (val >> 16) | (val << 16);
93 #endif
94 #endif
95 }
96
97 /* Convert VAL to little endian form, if necessary. */
98
99 static uint64_t
100 lendian64 (uint64_t val)
101 {
102 #if GCC_VERSION >= 4006
103 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
104 return val;
105 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
106 return __builtin_bswap64 (val);
107 #else /* __ORDER_PDP_ENDIAN__ */
108 return (((val & 0xffffll) << 48)
109 | ((val & 0xffff0000ll) << 16)
110 | ((val & 0xffff00000000ll) >> 16)
111 | ((val & 0xffff000000000000ll) >> 48));
112 #endif
113 #else
114 // provide a safe slower default, with shifts and masking
115 #ifndef WORDS_BIGENDIAN
116 return val;
117 #else
118 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
119 | ((val & 0x00ff00ff00ff00ffll) << 8));
120 val = ((( val & 0xffff0000ffff0000ll) >> 16)
121 | (( val & 0x0000ffff0000ffffll) << 16));
122 return (val >> 32) | (val << 32);
123 #endif
124 #endif
125 }
126
127 #define BRIG_ELF_SECTION_NAME ".brig"
128 #define BRIG_LABEL_STRING "hsa_brig"
129 #define BRIG_SECTION_DATA_NAME "hsa_data"
130 #define BRIG_SECTION_CODE_NAME "hsa_code"
131 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
132
133 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
134
135 /* Required HSA section alignment. */
136
137 #define HSA_SECTION_ALIGNMENT 16
138
139 /* Chunks of BRIG binary data. */
140
141 struct hsa_brig_data_chunk
142 {
143 /* Size of the data already stored into a chunk. */
144 unsigned size;
145
146 /* Pointer to the data. */
147 char *data;
148 };
149
150 /* Structure representing a BRIG section, holding and writing its data. */
151
152 class hsa_brig_section
153 {
154 public:
155 /* Section name that will be output to the BRIG. */
156 const char *section_name;
157 /* Size in bytes of all data stored in the section. */
158 unsigned total_size;
159 /* The size of the header of the section including padding. */
160 unsigned header_byte_count;
161 /* The size of the header of the section without any padding. */
162 unsigned header_byte_delta;
163
164 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
165 vec <struct hsa_brig_data_chunk> chunks;
166
167 /* More convenient access to the last chunk from the vector above. */
168 struct hsa_brig_data_chunk *cur_chunk;
169
170 void allocate_new_chunk ();
171 void init (const char *name);
172 void release ();
173 void output ();
174 unsigned add (const void *data, unsigned len);
175 void round_size_up (int factor);
176 void *get_ptr_by_offset (unsigned int offset);
177 };
178
179 static struct hsa_brig_section brig_data, brig_code, brig_operand;
180 static uint32_t brig_insn_count;
181 static bool brig_initialized = false;
182
183 /* Mapping between emitted HSA functions and their offset in code segment. */
184 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
185
186 /* Hash map of emitted function declarations. */
187 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
188
189 /* Hash table of emitted internal function declaration offsets. */
190 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
191
192 /* List of sbr instructions. */
193 static vec <hsa_insn_sbr *> *switch_instructions;
194
195 struct function_linkage_pair
196 {
197 function_linkage_pair (tree decl, unsigned int off)
198 : function_decl (decl), offset (off) {}
199
200 /* Declaration of called function. */
201 tree function_decl;
202
203 /* Offset in operand section. */
204 unsigned int offset;
205 };
206
207 /* Vector of function calls where we need to resolve function offsets. */
208 static auto_vec <function_linkage_pair> function_call_linkage;
209
210 /* Add a new chunk, allocate data for it and initialize it. */
211
212 void
213 hsa_brig_section::allocate_new_chunk ()
214 {
215 struct hsa_brig_data_chunk new_chunk;
216
217 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
218 new_chunk.size = 0;
219 cur_chunk = chunks.safe_push (new_chunk);
220 }
221
222 /* Initialize the brig section. */
223
224 void
225 hsa_brig_section::init (const char *name)
226 {
227 section_name = name;
228 /* While the following computation is basically wrong, because the intent
229 certainly wasn't to have the first character of name and padding, which
230 are a part of sizeof (BrigSectionHeader), included in the first addend,
231 this is what the disassembler expects. */
232 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
233 chunks.create (1);
234 allocate_new_chunk ();
235 header_byte_delta = total_size;
236 round_size_up (4);
237 header_byte_count = total_size;
238 }
239
240 /* Free all data in the section. */
241
242 void
243 hsa_brig_section::release ()
244 {
245 for (unsigned i = 0; i < chunks.length (); i++)
246 free (chunks[i].data);
247 chunks.release ();
248 cur_chunk = NULL;
249 }
250
251 /* Write the section to the output file to a section with the name given at
252 initialization. Switches the output section and does not restore it. */
253
254 void
255 hsa_brig_section::output ()
256 {
257 struct BrigSectionHeader section_header;
258 char padding[8];
259
260 section_header.byteCount = lendian64 (total_size);
261 section_header.headerByteCount = lendian32 (header_byte_count);
262 section_header.nameLength = lendian32 (strlen (section_name));
263 assemble_string ((const char *) &section_header, 16);
264 assemble_string (section_name, (section_header.nameLength));
265 memset (&padding, 0, sizeof (padding));
266 /* This is also a consequence of the wrong header size computation described
267 in a comment in hsa_brig_section::init. */
268 assemble_string (padding, 8);
269 for (unsigned i = 0; i < chunks.length (); i++)
270 assemble_string (chunks[i].data, chunks[i].size);
271 }
272
273 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
274 which it was stored. */
275
276 unsigned
277 hsa_brig_section::add (const void *data, unsigned len)
278 {
279 unsigned offset = total_size;
280
281 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
282 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
283 allocate_new_chunk ();
284
285 memcpy (cur_chunk->data + cur_chunk->size, data, len);
286 cur_chunk->size += len;
287 total_size += len;
288
289 return offset;
290 }
291
292 /* Add padding to section so that its size is divisible by FACTOR. */
293
294 void
295 hsa_brig_section::round_size_up (int factor)
296 {
297 unsigned padding, res = total_size % factor;
298
299 if (res == 0)
300 return;
301
302 padding = factor - res;
303 total_size += padding;
304 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
305 {
306 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
307 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
308 allocate_new_chunk ();
309 }
310
311 cur_chunk->size += padding;
312 }
313
314 /* Return pointer to data by global OFFSET in the section. */
315
316 void *
317 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
318 {
319 gcc_assert (offset < total_size);
320 offset -= header_byte_delta;
321
322 unsigned i;
323 for (i = 0; offset >= chunks[i].size; i++)
324 offset -= chunks[i].size;
325
326 return chunks[i].data + offset;
327 }
328
329 /* BRIG string data hashing. */
330
331 struct brig_string_slot
332 {
333 const char *s;
334 char prefix;
335 int len;
336 uint32_t offset;
337 };
338
339 /* Hash table helpers. */
340
341 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
342 {
343 static inline hashval_t hash (const value_type);
344 static inline bool equal (const value_type, const compare_type);
345 static inline void remove (value_type);
346 };
347
348 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
349 to support strings that may not end in '\0'. */
350
351 inline hashval_t
352 brig_string_slot_hasher::hash (const value_type ds)
353 {
354 hashval_t r = ds->len;
355 int i;
356
357 for (i = 0; i < ds->len; i++)
358 r = r * 67 + (unsigned) ds->s[i] - 113;
359 r = r * 67 + (unsigned) ds->prefix - 113;
360 return r;
361 }
362
363 /* Returns nonzero if DS1 and DS2 are equal. */
364
365 inline bool
366 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
367 {
368 if (ds1->len == ds2->len)
369 return ds1->prefix == ds2->prefix
370 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
371
372 return 0;
373 }
374
375 /* Deallocate memory for DS upon its removal. */
376
377 inline void
378 brig_string_slot_hasher::remove (value_type ds)
379 {
380 free (const_cast<char *> (ds->s));
381 free (ds);
382 }
383
384 /* Hash for strings we output in order not to duplicate them needlessly. */
385
386 static hash_table<brig_string_slot_hasher> *brig_string_htab;
387
388 /* Emit a null terminated string STR to the data section and return its
389 offset in it. If PREFIX is non-zero, output it just before STR too.
390 Sanitize the string if SANITIZE option is set to true. */
391
392 static unsigned
393 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
394 {
395 unsigned slen = strlen (str);
396 unsigned offset, len = slen + (prefix ? 1 : 0);
397 uint32_t hdr_len = lendian32 (len);
398 brig_string_slot s_slot;
399 brig_string_slot **slot;
400 char *str2;
401
402 str2 = xstrdup (str);
403
404 if (sanitize)
405 hsa_sanitize_name (str2);
406 s_slot.s = str2;
407 s_slot.len = slen;
408 s_slot.prefix = prefix;
409 s_slot.offset = 0;
410
411 slot = brig_string_htab->find_slot (&s_slot, INSERT);
412 if (*slot == NULL)
413 {
414 brig_string_slot *new_slot = XCNEW (brig_string_slot);
415
416 /* In theory we should fill in BrigData but that would mean copying
417 the string to a buffer for no reason, so we just emulate it. */
418 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
419 if (prefix)
420 brig_data.add (&prefix, 1);
421
422 brig_data.add (str2, slen);
423 brig_data.round_size_up (4);
424
425 /* TODO: could use the string we just copied into
426 brig_string->cur_chunk */
427 new_slot->s = str2;
428 new_slot->len = slen;
429 new_slot->prefix = prefix;
430 new_slot->offset = offset;
431 *slot = new_slot;
432 }
433 else
434 {
435 offset = (*slot)->offset;
436 free (str2);
437 }
438
439 return offset;
440 }
441
442 /* Linked list of queued operands. */
443
444 static struct operand_queue
445 {
446 /* First from the chain of queued operands. */
447 hsa_op_base *first_op, *last_op;
448
449 /* The offset at which the next operand will be enqueued. */
450 unsigned projected_size;
451
452 } op_queue;
453
454 /* Unless already initialized, initialize infrastructure to produce BRIG. */
455
456 static void
457 brig_init (void)
458 {
459 brig_insn_count = 0;
460
461 if (brig_initialized)
462 return;
463
464 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
465 brig_data.init (BRIG_SECTION_DATA_NAME);
466 brig_code.init (BRIG_SECTION_CODE_NAME);
467 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
468 brig_initialized = true;
469
470 struct BrigDirectiveModule moddir;
471 memset (&moddir, 0, sizeof (moddir));
472 moddir.base.byteCount = lendian16 (sizeof (moddir));
473
474 char *modname;
475 if (main_input_filename && *main_input_filename != '\0')
476 {
477 const char *part = strrchr (main_input_filename, '/');
478 if (!part)
479 part = main_input_filename;
480 else
481 part++;
482 modname = concat ("&__hsa_module_", part, NULL);
483 char *extension = strchr (modname, '.');
484 if (extension)
485 *extension = '\0';
486
487 /* As in LTO mode, we have to emit a different module names. */
488 if (flag_ltrans)
489 {
490 part = strrchr (asm_file_name, '/');
491 if (!part)
492 part = asm_file_name;
493 else
494 part++;
495 char *modname2;
496 asprintf (&modname2, "%s_%s", modname, part);
497 free (modname);
498 modname = modname2;
499 }
500
501 hsa_sanitize_name (modname);
502 moddir.name = brig_emit_string (modname);
503 free (modname);
504 }
505 else
506 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
507 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
508 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
509 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
510 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
511 if (hsa_machine_large_p ())
512 moddir.machineModel = BRIG_MACHINE_LARGE;
513 else
514 moddir.machineModel = BRIG_MACHINE_SMALL;
515 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
516 brig_code.add (&moddir, sizeof (moddir));
517 }
518
519 /* Free all BRIG data. */
520
521 static void
522 brig_release_data (void)
523 {
524 delete brig_string_htab;
525 brig_data.release ();
526 brig_code.release ();
527 brig_operand.release ();
528
529 brig_initialized = 0;
530 }
531
532 /* Enqueue operation OP. Return the offset at which it will be stored. */
533
534 static unsigned int
535 enqueue_op (hsa_op_base *op)
536 {
537 unsigned ret;
538
539 if (op->m_brig_op_offset)
540 return op->m_brig_op_offset;
541
542 ret = op_queue.projected_size;
543 op->m_brig_op_offset = op_queue.projected_size;
544
545 if (!op_queue.first_op)
546 op_queue.first_op = op;
547 else
548 op_queue.last_op->m_next = op;
549 op_queue.last_op = op;
550
551 if (is_a <hsa_op_immed *> (op))
552 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
553 else if (is_a <hsa_op_reg *> (op))
554 op_queue.projected_size += sizeof (struct BrigOperandRegister);
555 else if (is_a <hsa_op_address *> (op))
556 op_queue.projected_size += sizeof (struct BrigOperandAddress);
557 else if (is_a <hsa_op_code_ref *> (op))
558 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
559 else if (is_a <hsa_op_code_list *> (op))
560 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
561 else if (is_a <hsa_op_operand_list *> (op))
562 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
563 else
564 gcc_unreachable ();
565 return ret;
566 }
567
568
569 /* Emit directive describing a symbol if it has not been emitted already.
570 Return the offset of the directive. */
571
572 static unsigned
573 emit_directive_variable (struct hsa_symbol *symbol)
574 {
575 struct BrigDirectiveVariable dirvar;
576 unsigned name_offset;
577 static unsigned res_name_offset;
578
579 if (symbol->m_directive_offset)
580 return symbol->m_directive_offset;
581
582 memset (&dirvar, 0, sizeof (dirvar));
583 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
584 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
585 dirvar.allocation = symbol->m_allocation;
586
587 char prefix = symbol->m_global_scope_p ? '&' : '%';
588
589 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
590 {
591 if (res_name_offset == 0)
592 res_name_offset = brig_emit_string (symbol->m_name, '%');
593 name_offset = res_name_offset;
594 }
595 else if (symbol->m_name)
596 name_offset = brig_emit_string (symbol->m_name, prefix);
597 else
598 {
599 char buf[64];
600 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
601 symbol->m_name_number);
602 name_offset = brig_emit_string (buf, prefix);
603 }
604
605 dirvar.name = lendian32 (name_offset);
606 dirvar.init = 0;
607 dirvar.type = lendian16 (symbol->m_type);
608 dirvar.segment = symbol->m_segment;
609 dirvar.align = symbol->m_align;
610 dirvar.linkage = symbol->m_linkage;
611 dirvar.dim.lo = symbol->m_dim;
612 dirvar.dim.hi = symbol->m_dim >> 32;
613
614 /* Global variables are just declared and linked via HSA runtime. */
615 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
616 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
617 dirvar.reserved = 0;
618
619 if (symbol->m_cst_value)
620 {
621 dirvar.modifier |= BRIG_VARIABLE_CONST;
622 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
623 }
624
625 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
626 return symbol->m_directive_offset;
627 }
628
629 /* Emit directives describing either a function declaration or
630 definition F. */
631
632 static BrigDirectiveExecutable *
633 emit_function_directives (hsa_function_representation *f, bool is_declaration)
634 {
635 struct BrigDirectiveExecutable fndir;
636 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
637 int count = 0;
638 BrigDirectiveExecutable *ptr_to_fndir;
639 hsa_symbol *sym;
640
641 if (!f->m_declaration_p)
642 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
643 {
644 gcc_assert (!sym->m_emitted_to_brig);
645 sym->m_emitted_to_brig = true;
646 emit_directive_variable (sym);
647 brig_insn_count++;
648 }
649
650 name_offset = brig_emit_string (f->m_name, '&');
651 inarg_off = brig_code.total_size + sizeof (fndir)
652 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
653 scoped_off = inarg_off
654 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
655
656 if (!f->m_declaration_p)
657 {
658 count += f->m_spill_symbols.length ();
659 count += f->m_private_variables.length ();
660 }
661
662 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
663
664 memset (&fndir, 0, sizeof (fndir));
665 fndir.base.byteCount = lendian16 (sizeof (fndir));
666 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
667 : BRIG_KIND_DIRECTIVE_FUNCTION);
668 fndir.name = lendian32 (name_offset);
669 fndir.inArgCount = lendian16 (f->m_input_args.length ());
670 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
671 fndir.firstInArg = lendian32 (inarg_off);
672 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
673 fndir.nextModuleEntry = lendian32 (next_toplev_off);
674 fndir.linkage = f->get_linkage ();
675 if (!f->m_declaration_p)
676 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
677 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
678
679 /* Once we put a definition of function_offsets, we should not overwrite
680 it with a declaration of the function. */
681 if (f->m_internal_fn == NULL)
682 {
683 if (!function_offsets->get (f->m_decl) || !is_declaration)
684 function_offsets->put (f->m_decl, brig_code.total_size);
685 }
686 else
687 {
688 /* Internal function. */
689 hsa_internal_fn **slot
690 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
691 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
692 int_fn->m_offset = brig_code.total_size;
693 *slot = int_fn;
694 }
695
696 brig_code.add (&fndir, sizeof (fndir));
697 /* terrible hack: we need to set instCount after we emit all
698 insns, but we need to emit directive in order, and we emit directives
699 during insn emitting. So we need to emit the FUNCTION directive
700 early, then the insns, and then we need to set instCount, so remember
701 a pointer to it, in some horrible way. cur_chunk.data+size points
702 directly to after fndir here. */
703 ptr_to_fndir
704 = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data
705 + brig_code.cur_chunk->size
706 - sizeof (fndir));
707
708 if (f->m_output_arg)
709 emit_directive_variable (f->m_output_arg);
710 for (unsigned i = 0; i < f->m_input_args.length (); i++)
711 emit_directive_variable (f->m_input_args[i]);
712
713 if (!f->m_declaration_p)
714 {
715 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
716 {
717 emit_directive_variable (sym);
718 brig_insn_count++;
719 }
720 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
721 {
722 emit_directive_variable (f->m_private_variables[i]);
723 brig_insn_count++;
724 }
725 }
726
727 return ptr_to_fndir;
728 }
729
730 /* Emit a label directive for the given HBB. We assume it is about to start on
731 the current offset in the code section. */
732
733 static void
734 emit_bb_label_directive (hsa_bb *hbb)
735 {
736 struct BrigDirectiveLabel lbldir;
737
738 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
739 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
740 char buf[32];
741 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
742 hbb->m_index);
743 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
744
745 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
746 sizeof (lbldir));
747 brig_insn_count++;
748 }
749
750 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
751 holding such, for constants and registers. */
752
753 static BrigType16_t
754 regtype_for_type (BrigType16_t t)
755 {
756 switch (t)
757 {
758 case BRIG_TYPE_B1:
759 return BRIG_TYPE_B1;
760
761 case BRIG_TYPE_U8:
762 case BRIG_TYPE_U16:
763 case BRIG_TYPE_U32:
764 case BRIG_TYPE_S8:
765 case BRIG_TYPE_S16:
766 case BRIG_TYPE_S32:
767 case BRIG_TYPE_B8:
768 case BRIG_TYPE_B16:
769 case BRIG_TYPE_B32:
770 case BRIG_TYPE_F16:
771 case BRIG_TYPE_F32:
772 case BRIG_TYPE_U8X4:
773 case BRIG_TYPE_U16X2:
774 case BRIG_TYPE_S8X4:
775 case BRIG_TYPE_S16X2:
776 case BRIG_TYPE_F16X2:
777 return BRIG_TYPE_B32;
778
779 case BRIG_TYPE_U64:
780 case BRIG_TYPE_S64:
781 case BRIG_TYPE_F64:
782 case BRIG_TYPE_B64:
783 case BRIG_TYPE_U8X8:
784 case BRIG_TYPE_U16X4:
785 case BRIG_TYPE_U32X2:
786 case BRIG_TYPE_S8X8:
787 case BRIG_TYPE_S16X4:
788 case BRIG_TYPE_S32X2:
789 case BRIG_TYPE_F16X4:
790 case BRIG_TYPE_F32X2:
791 return BRIG_TYPE_B64;
792
793 case BRIG_TYPE_B128:
794 case BRIG_TYPE_U8X16:
795 case BRIG_TYPE_U16X8:
796 case BRIG_TYPE_U32X4:
797 case BRIG_TYPE_U64X2:
798 case BRIG_TYPE_S8X16:
799 case BRIG_TYPE_S16X8:
800 case BRIG_TYPE_S32X4:
801 case BRIG_TYPE_S64X2:
802 case BRIG_TYPE_F16X8:
803 case BRIG_TYPE_F32X4:
804 case BRIG_TYPE_F64X2:
805 return BRIG_TYPE_B128;
806
807 default:
808 gcc_unreachable ();
809 }
810 }
811
812 /* Return the length of the BRIG type TYPE that is going to be streamed out as
813 an immediate constant (so it must not be B1). */
814
815 unsigned
816 hsa_get_imm_brig_type_len (BrigType16_t type)
817 {
818 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
819 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
820
821 switch (pack_type)
822 {
823 case BRIG_TYPE_PACK_NONE:
824 break;
825 case BRIG_TYPE_PACK_32:
826 return 4;
827 case BRIG_TYPE_PACK_64:
828 return 8;
829 case BRIG_TYPE_PACK_128:
830 return 16;
831 default:
832 gcc_unreachable ();
833 }
834
835 switch (base_type)
836 {
837 case BRIG_TYPE_U8:
838 case BRIG_TYPE_S8:
839 case BRIG_TYPE_B8:
840 return 1;
841 case BRIG_TYPE_U16:
842 case BRIG_TYPE_S16:
843 case BRIG_TYPE_F16:
844 case BRIG_TYPE_B16:
845 return 2;
846 case BRIG_TYPE_U32:
847 case BRIG_TYPE_S32:
848 case BRIG_TYPE_F32:
849 case BRIG_TYPE_B32:
850 return 4;
851 case BRIG_TYPE_U64:
852 case BRIG_TYPE_S64:
853 case BRIG_TYPE_F64:
854 case BRIG_TYPE_B64:
855 return 8;
856 case BRIG_TYPE_B128:
857 return 16;
858 default:
859 gcc_unreachable ();
860 }
861 }
862
863 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
864 If NEED_LEN is not equal to zero, shrink or extend the value
865 to NEED_LEN bytes. Return how many bytes were written. */
866
867 static int
868 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
869 {
870 union hsa_bytes bytes;
871
872 memset (&bytes, 0, sizeof (bytes));
873 tree type = TREE_TYPE (value);
874 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
875
876 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
877 if (INTEGRAL_TYPE_P (type)
878 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
879 switch (data_len)
880 {
881 case 1:
882 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
883 break;
884 case 2:
885 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
886 break;
887 case 4:
888 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
889 break;
890 case 8:
891 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
892 break;
893 default:
894 gcc_unreachable ();
895 }
896 else if (SCALAR_FLOAT_TYPE_P (type))
897 {
898 if (data_len == 2)
899 {
900 sorry ("Support for HSA does not implement immediate 16 bit FPU "
901 "operands");
902 return 2;
903 }
904 unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
905 /* There are always 32 bits in each long, no matter the size of
906 the hosts long. */
907 long tmp[6];
908
909 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
910
911 if (int_len == 4)
912 bytes.b32 = (uint32_t) tmp[0];
913 else
914 {
915 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
916 bytes.b64 <<= 32;
917 bytes.b64 |= (uint32_t) tmp[0];
918 }
919 }
920 else
921 gcc_unreachable ();
922
923 int len;
924 if (need_len == 0)
925 len = data_len;
926 else
927 len = need_len;
928
929 memcpy (data, &bytes, len);
930 return len;
931 }
932
933 char *
934 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
935 {
936 char *brig_repr;
937 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
938
939 if (m_tree_value != NULL_TREE)
940 {
941 /* Update brig_repr_size for special tree values. */
942 if (TREE_CODE (m_tree_value) == STRING_CST)
943 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
944 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
945 *brig_repr_size
946 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
947
948 unsigned total_len = *brig_repr_size;
949
950 /* As we can have a constructor with fewer elements, fill the memory
951 with zeros. */
952 brig_repr = XCNEWVEC (char, total_len);
953 char *p = brig_repr;
954
955 if (TREE_CODE (m_tree_value) == VECTOR_CST)
956 {
957 int i, num = VECTOR_CST_NELTS (m_tree_value);
958 for (i = 0; i < num; i++)
959 {
960 tree v = VECTOR_CST_ELT (m_tree_value, i);
961 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
962 total_len -= actual;
963 p += actual;
964 }
965 /* Vectors should have the exact size. */
966 gcc_assert (total_len == 0);
967 }
968 else if (TREE_CODE (m_tree_value) == STRING_CST)
969 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
970 TREE_STRING_LENGTH (m_tree_value));
971 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
972 {
973 gcc_assert (total_len % 2 == 0);
974 unsigned actual;
975 actual
976 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
977 total_len / 2);
978
979 gcc_assert (actual == total_len / 2);
980 p += actual;
981
982 actual
983 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
984 total_len / 2);
985 gcc_assert (actual == total_len / 2);
986 }
987 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
988 {
989 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
990 for (unsigned i = 0; i < len; i++)
991 {
992 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
993 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
994 total_len -= actual;
995 p += actual;
996 }
997 }
998 else
999 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1000 }
1001 else
1002 {
1003 hsa_bytes bytes;
1004
1005 switch (*brig_repr_size)
1006 {
1007 case 1:
1008 bytes.b8 = (uint8_t) m_int_value;
1009 break;
1010 case 2:
1011 bytes.b16 = (uint16_t) m_int_value;
1012 break;
1013 case 4:
1014 bytes.b32 = (uint32_t) m_int_value;
1015 break;
1016 case 8:
1017 bytes.b64 = (uint64_t) m_int_value;
1018 break;
1019 default:
1020 gcc_unreachable ();
1021 }
1022
1023 brig_repr = XNEWVEC (char, *brig_repr_size);
1024 memcpy (brig_repr, &bytes, *brig_repr_size);
1025 }
1026
1027 return brig_repr;
1028 }
1029
1030 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1031 have been massaged to comply with various HSA/BRIG type requirements, so the
1032 only important aspect of that is the length (because HSAIL might expect
1033 smaller constants or become bit-data). The data should be represented
1034 according to what is in the tree representation. */
1035
1036 static void
1037 emit_immediate_operand (hsa_op_immed *imm)
1038 {
1039 unsigned brig_repr_size;
1040 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1041 struct BrigOperandConstantBytes out;
1042
1043 memset (&out, 0, sizeof (out));
1044 out.base.byteCount = lendian16 (sizeof (out));
1045 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1046 uint32_t byteCount = lendian32 (brig_repr_size);
1047 out.type = lendian16 (imm->m_type);
1048 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1049 brig_operand.add (&out, sizeof (out));
1050 brig_data.add (brig_repr, brig_repr_size);
1051 brig_data.round_size_up (4);
1052
1053 free (brig_repr);
1054 }
1055
1056 /* Emit a register BRIG operand REG. */
1057
1058 static void
1059 emit_register_operand (hsa_op_reg *reg)
1060 {
1061 struct BrigOperandRegister out;
1062
1063 out.base.byteCount = lendian16 (sizeof (out));
1064 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1065 out.regNum = lendian32 (reg->m_hard_num);
1066
1067 switch (regtype_for_type (reg->m_type))
1068 {
1069 case BRIG_TYPE_B32:
1070 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1071 break;
1072 case BRIG_TYPE_B64:
1073 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1074 break;
1075 case BRIG_TYPE_B128:
1076 out.regKind = BRIG_REGISTER_KIND_QUAD;
1077 break;
1078 case BRIG_TYPE_B1:
1079 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1080 break;
1081 default:
1082 gcc_unreachable ();
1083 }
1084
1085 brig_operand.add (&out, sizeof (out));
1086 }
1087
1088 /* Emit an address BRIG operand ADDR. */
1089
1090 static void
1091 emit_address_operand (hsa_op_address *addr)
1092 {
1093 struct BrigOperandAddress out;
1094
1095 out.base.byteCount = lendian16 (sizeof (out));
1096 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1097 out.symbol = addr->m_symbol
1098 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1099 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1100
1101 if (sizeof (addr->m_imm_offset) == 8)
1102 {
1103 out.offset.lo = lendian32 (addr->m_imm_offset);
1104 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1105 }
1106 else
1107 {
1108 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1109 out.offset.lo = lendian32 (addr->m_imm_offset);
1110 out.offset.hi = 0;
1111 }
1112
1113 brig_operand.add (&out, sizeof (out));
1114 }
1115
1116 /* Emit a code reference operand REF. */
1117
1118 static void
1119 emit_code_ref_operand (hsa_op_code_ref *ref)
1120 {
1121 struct BrigOperandCodeRef out;
1122
1123 out.base.byteCount = lendian16 (sizeof (out));
1124 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1125 out.ref = lendian32 (ref->m_directive_offset);
1126 brig_operand.add (&out, sizeof (out));
1127 }
1128
1129 /* Emit a code list operand CODE_LIST. */
1130
1131 static void
1132 emit_code_list_operand (hsa_op_code_list *code_list)
1133 {
1134 struct BrigOperandCodeList out;
1135 unsigned args = code_list->m_offsets.length ();
1136
1137 for (unsigned i = 0; i < args; i++)
1138 gcc_assert (code_list->m_offsets[i]);
1139
1140 out.base.byteCount = lendian16 (sizeof (out));
1141 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1142
1143 uint32_t byteCount = lendian32 (4 * args);
1144
1145 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1146 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1147 brig_data.round_size_up (4);
1148 brig_operand.add (&out, sizeof (out));
1149 }
1150
1151 /* Emit an operand list operand OPERAND_LIST. */
1152
1153 static void
1154 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1155 {
1156 struct BrigOperandOperandList out;
1157 unsigned args = operand_list->m_offsets.length ();
1158
1159 for (unsigned i = 0; i < args; i++)
1160 gcc_assert (operand_list->m_offsets[i]);
1161
1162 out.base.byteCount = lendian16 (sizeof (out));
1163 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1164
1165 uint32_t byteCount = lendian32 (4 * args);
1166
1167 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1168 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1169 brig_data.round_size_up (4);
1170 brig_operand.add (&out, sizeof (out));
1171 }
1172
1173 /* Emit all operands queued for writing. */
1174
1175 static void
1176 emit_queued_operands (void)
1177 {
1178 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1179 {
1180 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1181 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1182 emit_immediate_operand (imm);
1183 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1184 emit_register_operand (reg);
1185 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1186 emit_address_operand (addr);
1187 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1188 emit_code_ref_operand (ref);
1189 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1190 emit_code_list_operand (code_list);
1191 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1192 emit_operand_list_operand (l);
1193 else
1194 gcc_unreachable ();
1195 }
1196 }
1197
1198 /* Emit directives describing the function that is used for
1199 a function declaration. */
1200
1201 static BrigDirectiveExecutable *
1202 emit_function_declaration (tree decl)
1203 {
1204 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1205
1206 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1207 emit_queued_operands ();
1208
1209 delete f;
1210
1211 return e;
1212 }
1213
1214 /* Emit directives describing the function that is used for
1215 an internal function declaration. */
1216
1217 static BrigDirectiveExecutable *
1218 emit_internal_fn_decl (hsa_internal_fn *fn)
1219 {
1220 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1221
1222 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1223 emit_queued_operands ();
1224
1225 delete f;
1226
1227 return e;
1228 }
1229
1230 /* Enqueue all operands of INSN and return offset to BRIG data section
1231 to list of operand offsets. */
1232
1233 static unsigned
1234 emit_insn_operands (hsa_insn_basic *insn)
1235 {
1236 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1237 operand_offsets;
1238
1239 unsigned l = insn->operand_count ();
1240 operand_offsets.safe_grow (l);
1241
1242 for (unsigned i = 0; i < l; i++)
1243 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1244
1245 /* We have N operands so use 4 * N for the byte_count. */
1246 uint32_t byte_count = lendian32 (4 * l);
1247
1248 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1249 brig_data.add (operand_offsets.address (),
1250 l * sizeof (BrigOperandOffset32_t));
1251
1252 brig_data.round_size_up (4);
1253
1254 return offset;
1255 }
1256
1257 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1258 to BRIG data section to list of operand offsets. */
1259
1260 static unsigned
1261 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1262 hsa_op_base *op2 = NULL)
1263 {
1264 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1265 operand_offsets;
1266
1267 gcc_checking_assert (op0 != NULL);
1268 operand_offsets.safe_push (enqueue_op (op0));
1269
1270 if (op1 != NULL)
1271 {
1272 operand_offsets.safe_push (enqueue_op (op1));
1273 if (op2 != NULL)
1274 operand_offsets.safe_push (enqueue_op (op2));
1275 }
1276
1277 unsigned l = operand_offsets.length ();
1278
1279 /* We have N operands so use 4 * N for the byte_count. */
1280 uint32_t byte_count = lendian32 (4 * l);
1281
1282 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1283 brig_data.add (operand_offsets.address (),
1284 l * sizeof (BrigOperandOffset32_t));
1285
1286 brig_data.round_size_up (4);
1287
1288 return offset;
1289 }
1290
1291 /* Emit an HSA memory instruction and all necessary directives, schedule
1292 necessary operands for writing. */
1293
1294 static void
1295 emit_memory_insn (hsa_insn_mem *mem)
1296 {
1297 struct BrigInstMem repr;
1298 gcc_checking_assert (mem->operand_count () == 2);
1299
1300 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1301
1302 /* This is necessary because of the erroneous typedef of
1303 BrigMemoryModifier8_t which introduces padding which may then contain
1304 random stuff (which we do not want so that we can test things don't
1305 change). */
1306 memset (&repr, 0, sizeof (repr));
1307 repr.base.base.byteCount = lendian16 (sizeof (repr));
1308 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1309 repr.base.opcode = lendian16 (mem->m_opcode);
1310 repr.base.type = lendian16 (mem->m_type);
1311 repr.base.operands = lendian32 (emit_insn_operands (mem));
1312
1313 if (addr->m_symbol)
1314 repr.segment = addr->m_symbol->m_segment;
1315 else
1316 repr.segment = BRIG_SEGMENT_FLAT;
1317 repr.modifier = 0;
1318 repr.equivClass = mem->m_equiv_class;
1319 repr.align = mem->m_align;
1320 if (mem->m_opcode == BRIG_OPCODE_LD)
1321 repr.width = BRIG_WIDTH_1;
1322 else
1323 repr.width = BRIG_WIDTH_NONE;
1324 memset (&repr.reserved, 0, sizeof (repr.reserved));
1325 brig_code.add (&repr, sizeof (repr));
1326 brig_insn_count++;
1327 }
1328
1329 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1330 necessary operands for writing. */
1331
1332 static void
1333 emit_signal_insn (hsa_insn_signal *mem)
1334 {
1335 struct BrigInstSignal repr;
1336
1337 /* This is necessary because of the erroneous typedef of
1338 BrigMemoryModifier8_t which introduces padding which may then contain
1339 random stuff (which we do not want so that we can test things don't
1340 change). */
1341 memset (&repr, 0, sizeof (repr));
1342 repr.base.base.byteCount = lendian16 (sizeof (repr));
1343 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1344 repr.base.opcode = lendian16 (mem->m_opcode);
1345 repr.base.type = lendian16 (mem->m_type);
1346 repr.base.operands = lendian32 (emit_insn_operands (mem));
1347
1348 repr.memoryOrder = mem->m_memoryorder;
1349 repr.signalOperation = mem->m_atomicop;
1350 repr.signalType = BRIG_TYPE_SIG64;
1351
1352 brig_code.add (&repr, sizeof (repr));
1353 brig_insn_count++;
1354 }
1355
1356 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1357 necessary operands for writing. */
1358
1359 static void
1360 emit_atomic_insn (hsa_insn_atomic *mem)
1361 {
1362 struct BrigInstAtomic repr;
1363
1364 /* Either operand[0] or operand[1] must be an address operand. */
1365 hsa_op_address *addr = NULL;
1366 if (is_a <hsa_op_address *> (mem->get_op (0)))
1367 addr = as_a <hsa_op_address *> (mem->get_op (0));
1368 else
1369 addr = as_a <hsa_op_address *> (mem->get_op (1));
1370
1371 /* This is necessary because of the erroneous typedef of
1372 BrigMemoryModifier8_t which introduces padding which may then contain
1373 random stuff (which we do not want so that we can test things don't
1374 change). */
1375 memset (&repr, 0, sizeof (repr));
1376 repr.base.base.byteCount = lendian16 (sizeof (repr));
1377 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1378 repr.base.opcode = lendian16 (mem->m_opcode);
1379 repr.base.type = lendian16 (mem->m_type);
1380 repr.base.operands = lendian32 (emit_insn_operands (mem));
1381
1382 if (addr->m_symbol)
1383 repr.segment = addr->m_symbol->m_segment;
1384 else
1385 repr.segment = BRIG_SEGMENT_FLAT;
1386 repr.memoryOrder = mem->m_memoryorder;
1387 repr.memoryScope = mem->m_memoryscope;
1388 repr.atomicOperation = mem->m_atomicop;
1389
1390 brig_code.add (&repr, sizeof (repr));
1391 brig_insn_count++;
1392 }
1393
1394 /* Emit an HSA LDA instruction and all necessary directives, schedule
1395 necessary operands for writing. */
1396
1397 static void
1398 emit_addr_insn (hsa_insn_basic *insn)
1399 {
1400 struct BrigInstAddr repr;
1401
1402 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1403
1404 repr.base.base.byteCount = lendian16 (sizeof (repr));
1405 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1406 repr.base.opcode = lendian16 (insn->m_opcode);
1407 repr.base.type = lendian16 (insn->m_type);
1408 repr.base.operands = lendian32 (emit_insn_operands (insn));
1409
1410 if (addr->m_symbol)
1411 repr.segment = addr->m_symbol->m_segment;
1412 else
1413 repr.segment = BRIG_SEGMENT_FLAT;
1414 memset (&repr.reserved, 0, sizeof (repr.reserved));
1415
1416 brig_code.add (&repr, sizeof (repr));
1417 brig_insn_count++;
1418 }
1419
1420 /* Emit an HSA segment conversion instruction and all necessary directives,
1421 schedule necessary operands for writing. */
1422
1423 static void
1424 emit_segment_insn (hsa_insn_seg *seg)
1425 {
1426 struct BrigInstSegCvt repr;
1427
1428 repr.base.base.byteCount = lendian16 (sizeof (repr));
1429 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1430 repr.base.opcode = lendian16 (seg->m_opcode);
1431 repr.base.type = lendian16 (seg->m_type);
1432 repr.base.operands = lendian32 (emit_insn_operands (seg));
1433 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1434 repr.segment = seg->m_segment;
1435 repr.modifier = 0;
1436
1437 brig_code.add (&repr, sizeof (repr));
1438
1439 brig_insn_count++;
1440 }
1441
1442 /* Emit an HSA alloca instruction and all necessary directives,
1443 schedule necessary operands for writing. */
1444
1445 static void
1446 emit_alloca_insn (hsa_insn_alloca *alloca)
1447 {
1448 struct BrigInstMem repr;
1449 gcc_checking_assert (alloca->operand_count () == 2);
1450
1451 /* This is necessary because of the erroneous typedef of
1452 BrigMemoryModifier8_t which introduces padding which may then contain
1453 random stuff (which we do not want so that we can test things don't
1454 change). */
1455 memset (&repr, 0, sizeof (repr));
1456 repr.base.base.byteCount = lendian16 (sizeof (repr));
1457 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1458 repr.base.opcode = lendian16 (alloca->m_opcode);
1459 repr.base.type = lendian16 (alloca->m_type);
1460 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1461 repr.segment = BRIG_SEGMENT_PRIVATE;
1462 repr.modifier = 0;
1463 repr.equivClass = 0;
1464 repr.align = alloca->m_align;
1465 repr.width = BRIG_WIDTH_NONE;
1466 memset (&repr.reserved, 0, sizeof (repr.reserved));
1467 brig_code.add (&repr, sizeof (repr));
1468 brig_insn_count++;
1469 }
1470
1471 /* Emit an HSA comparison instruction and all necessary directives,
1472 schedule necessary operands for writing. */
1473
1474 static void
1475 emit_cmp_insn (hsa_insn_cmp *cmp)
1476 {
1477 struct BrigInstCmp repr;
1478
1479 memset (&repr, 0, sizeof (repr));
1480 repr.base.base.byteCount = lendian16 (sizeof (repr));
1481 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1482 repr.base.opcode = lendian16 (cmp->m_opcode);
1483 repr.base.type = lendian16 (cmp->m_type);
1484 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1485
1486 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1487 repr.sourceType
1488 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1489 else
1490 repr.sourceType
1491 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1492 repr.modifier = 0;
1493 repr.compare = cmp->m_compare;
1494 repr.pack = 0;
1495
1496 brig_code.add (&repr, sizeof (repr));
1497 brig_insn_count++;
1498 }
1499
1500 /* Emit an HSA branching instruction and all necessary directives, schedule
1501 necessary operands for writing. */
1502
1503 static void
1504 emit_branch_insn (hsa_insn_br *br)
1505 {
1506 struct BrigInstBr repr;
1507
1508 basic_block target = NULL;
1509 edge_iterator ei;
1510 edge e;
1511
1512 /* At the moment we only handle direct conditional jumps. */
1513 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1514 repr.base.base.byteCount = lendian16 (sizeof (repr));
1515 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1516 repr.base.opcode = lendian16 (br->m_opcode);
1517 repr.width = BRIG_WIDTH_1;
1518 /* For Conditional jumps the type is always B1. */
1519 repr.base.type = lendian16 (BRIG_TYPE_B1);
1520
1521 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1522 if (e->flags & EDGE_TRUE_VALUE)
1523 {
1524 target = e->dest;
1525 break;
1526 }
1527 gcc_assert (target);
1528
1529 repr.base.operands
1530 = lendian32 (emit_operands (br->get_op (0),
1531 &hsa_bb_for_bb (target)->m_label_ref));
1532 memset (&repr.reserved, 0, sizeof (repr.reserved));
1533
1534 brig_code.add (&repr, sizeof (repr));
1535 brig_insn_count++;
1536 }
1537
1538 /* Emit an HSA unconditional jump branching instruction that points to
1539 a label REFERENCE. */
1540
1541 static void
1542 emit_unconditional_jump (hsa_op_code_ref *reference)
1543 {
1544 struct BrigInstBr repr;
1545
1546 repr.base.base.byteCount = lendian16 (sizeof (repr));
1547 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1548 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1549 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1550 /* Direct branches to labels must be width(all). */
1551 repr.width = BRIG_WIDTH_ALL;
1552
1553 repr.base.operands = lendian32 (emit_operands (reference));
1554 memset (&repr.reserved, 0, sizeof (repr.reserved));
1555 brig_code.add (&repr, sizeof (repr));
1556 brig_insn_count++;
1557 }
1558
1559 /* Emit an HSA switch jump instruction that uses a jump table to
1560 jump to a destination label. */
1561
1562 static void
1563 emit_switch_insn (hsa_insn_sbr *sbr)
1564 {
1565 struct BrigInstBr repr;
1566
1567 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1568 repr.base.base.byteCount = lendian16 (sizeof (repr));
1569 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1570 repr.base.opcode = lendian16 (sbr->m_opcode);
1571 repr.width = BRIG_WIDTH_1;
1572 /* For Conditional jumps the type is always B1. */
1573 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1574 repr.base.type = lendian16 (index->m_type);
1575 repr.base.operands
1576 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1577 memset (&repr.reserved, 0, sizeof (repr.reserved));
1578
1579 brig_code.add (&repr, sizeof (repr));
1580 brig_insn_count++;
1581 }
1582
1583 /* Emit a HSA convert instruction and all necessary directives, schedule
1584 necessary operands for writing. */
1585
1586 static void
1587 emit_cvt_insn (hsa_insn_cvt *insn)
1588 {
1589 struct BrigInstCvt repr;
1590 BrigType16_t srctype;
1591
1592 repr.base.base.byteCount = lendian16 (sizeof (repr));
1593 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1594 repr.base.opcode = lendian16 (insn->m_opcode);
1595 repr.base.type = lendian16 (insn->m_type);
1596 repr.base.operands = lendian32 (emit_insn_operands (insn));
1597
1598 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1599 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1600 else
1601 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1602 repr.sourceType = lendian16 (srctype);
1603 repr.modifier = 0;
1604 /* float to smaller float requires a rounding setting (we default
1605 to 'near'. */
1606 if (hsa_type_float_p (insn->m_type)
1607 && (!hsa_type_float_p (srctype)
1608 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1609 < (srctype & BRIG_TYPE_BASE_MASK))))
1610 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1611 else if (hsa_type_integer_p (insn->m_type) &&
1612 hsa_type_float_p (srctype))
1613 repr.round = BRIG_ROUND_INTEGER_ZERO;
1614 else
1615 repr.round = BRIG_ROUND_NONE;
1616 brig_code.add (&repr, sizeof (repr));
1617 brig_insn_count++;
1618 }
1619
1620 /* Emit call instruction INSN, where this instruction must be closed
1621 within a call block instruction. */
1622
1623 static void
1624 emit_call_insn (hsa_insn_call *call)
1625 {
1626 struct BrigInstBr repr;
1627
1628 repr.base.base.byteCount = lendian16 (sizeof (repr));
1629 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1630 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1631 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1632
1633 repr.base.operands
1634 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1635 call->m_args_code_list));
1636
1637 /* Internal functions have not set m_called_function. */
1638 if (call->m_called_function)
1639 {
1640 function_linkage_pair pair (call->m_called_function,
1641 call->m_func.m_brig_op_offset);
1642 function_call_linkage.safe_push (pair);
1643 }
1644 else
1645 {
1646 hsa_internal_fn *slot
1647 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1648 gcc_assert (slot);
1649 gcc_assert (slot->m_offset > 0);
1650 call->m_func.m_directive_offset = slot->m_offset;
1651 }
1652
1653 repr.width = BRIG_WIDTH_ALL;
1654 memset (&repr.reserved, 0, sizeof (repr.reserved));
1655
1656 brig_code.add (&repr, sizeof (repr));
1657 brig_insn_count++;
1658 }
1659
1660 /* Emit argument block directive. */
1661
1662 static void
1663 emit_arg_block_insn (hsa_insn_arg_block *insn)
1664 {
1665 switch (insn->m_kind)
1666 {
1667 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1668 {
1669 struct BrigDirectiveArgBlock repr;
1670 repr.base.byteCount = lendian16 (sizeof (repr));
1671 repr.base.kind = lendian16 (insn->m_kind);
1672 brig_code.add (&repr, sizeof (repr));
1673
1674 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1675 {
1676 insn->m_call_insn->m_args_code_list->m_offsets[i]
1677 = lendian32 (emit_directive_variable
1678 (insn->m_call_insn->m_input_args[i]));
1679 brig_insn_count++;
1680 }
1681
1682 if (insn->m_call_insn->m_output_arg)
1683 {
1684 insn->m_call_insn->m_result_code_list->m_offsets[0]
1685 = lendian32 (emit_directive_variable
1686 (insn->m_call_insn->m_output_arg));
1687 brig_insn_count++;
1688 }
1689
1690 break;
1691 }
1692 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1693 {
1694 struct BrigDirectiveArgBlock repr;
1695 repr.base.byteCount = lendian16 (sizeof (repr));
1696 repr.base.kind = lendian16 (insn->m_kind);
1697 brig_code.add (&repr, sizeof (repr));
1698 break;
1699 }
1700 default:
1701 gcc_unreachable ();
1702 }
1703
1704 brig_insn_count++;
1705 }
1706
1707 /* Emit comment directive. */
1708
1709 static void
1710 emit_comment_insn (hsa_insn_comment *insn)
1711 {
1712 struct BrigDirectiveComment repr;
1713 memset (&repr, 0, sizeof (repr));
1714
1715 repr.base.byteCount = lendian16 (sizeof (repr));
1716 repr.base.kind = lendian16 (insn->m_opcode);
1717 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1718 brig_code.add (&repr, sizeof (repr));
1719 }
1720
1721 /* Emit queue instruction INSN. */
1722
1723 static void
1724 emit_queue_insn (hsa_insn_queue *insn)
1725 {
1726 BrigInstQueue repr;
1727 memset (&repr, 0, sizeof (repr));
1728
1729 repr.base.base.byteCount = lendian16 (sizeof (repr));
1730 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1731 repr.base.opcode = lendian16 (insn->m_opcode);
1732 repr.base.type = lendian16 (insn->m_type);
1733 repr.segment = BRIG_SEGMENT_GLOBAL;
1734 repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE;
1735 repr.base.operands = lendian32 (emit_insn_operands (insn));
1736 brig_data.round_size_up (4);
1737 brig_code.add (&repr, sizeof (repr));
1738
1739 brig_insn_count++;
1740 }
1741
1742 /* Emit source type instruction INSN. */
1743
1744 static void
1745 emit_srctype_insn (hsa_insn_srctype *insn)
1746 {
1747 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1748 struct BrigInstSourceType repr;
1749 unsigned operand_count = insn->operand_count ();
1750 gcc_checking_assert (operand_count >= 2);
1751
1752 memset (&repr, 0, sizeof (repr));
1753 repr.sourceType = lendian16 (insn->m_source_type);
1754 repr.base.base.byteCount = lendian16 (sizeof (repr));
1755 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1756 repr.base.opcode = lendian16 (insn->m_opcode);
1757 repr.base.type = lendian16 (insn->m_type);
1758
1759 repr.base.operands = lendian32 (emit_insn_operands (insn));
1760 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1761 brig_insn_count++;
1762 }
1763
1764 /* Emit packed instruction INSN. */
1765
1766 static void
1767 emit_packed_insn (hsa_insn_packed *insn)
1768 {
1769 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1770 struct BrigInstSourceType repr;
1771 unsigned operand_count = insn->operand_count ();
1772 gcc_checking_assert (operand_count >= 2);
1773
1774 memset (&repr, 0, sizeof (repr));
1775 repr.sourceType = lendian16 (insn->m_source_type);
1776 repr.base.base.byteCount = lendian16 (sizeof (repr));
1777 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1778 repr.base.opcode = lendian16 (insn->m_opcode);
1779 repr.base.type = lendian16 (insn->m_type);
1780
1781 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1782 {
1783 /* Create operand list for packed type. */
1784 for (unsigned i = 1; i < operand_count; i++)
1785 {
1786 gcc_checking_assert (insn->get_op (i));
1787 insn->m_operand_list->m_offsets[i - 1]
1788 = lendian32 (enqueue_op (insn->get_op (i)));
1789 }
1790
1791 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1792 insn->m_operand_list));
1793 }
1794 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1795 {
1796 /* Create operand list for packed type. */
1797 for (unsigned i = 0; i < operand_count - 1; i++)
1798 {
1799 gcc_checking_assert (insn->get_op (i));
1800 insn->m_operand_list->m_offsets[i]
1801 = lendian32 (enqueue_op (insn->get_op (i)));
1802 }
1803
1804 unsigned ops = emit_operands (insn->m_operand_list,
1805 insn->get_op (insn->operand_count () - 1));
1806 repr.base.operands = lendian32 (ops);
1807 }
1808
1809
1810 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1811 brig_insn_count++;
1812 }
1813
1814 /* Emit a basic HSA instruction and all necessary directives, schedule
1815 necessary operands for writing. */
1816
1817 static void
1818 emit_basic_insn (hsa_insn_basic *insn)
1819 {
1820 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1821 struct BrigInstMod repr;
1822 BrigType16_t type;
1823
1824 memset (&repr, 0, sizeof (repr));
1825 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1826 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1827 repr.base.opcode = lendian16 (insn->m_opcode);
1828 switch (insn->m_opcode)
1829 {
1830 /* And the bit-logical operations need bit types and whine about
1831 arithmetic types :-/ */
1832 case BRIG_OPCODE_AND:
1833 case BRIG_OPCODE_OR:
1834 case BRIG_OPCODE_XOR:
1835 case BRIG_OPCODE_NOT:
1836 type = regtype_for_type (insn->m_type);
1837 break;
1838 default:
1839 type = insn->m_type;
1840 break;
1841 }
1842 repr.base.type = lendian16 (type);
1843 repr.base.operands = lendian32 (emit_insn_operands (insn));
1844
1845 if (hsa_type_packed_p (type))
1846 {
1847 if (hsa_type_float_p (type)
1848 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1849 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1850 else
1851 repr.round = 0;
1852 /* We assume that destination and sources agree in packing layout. */
1853 if (insn->num_used_ops () >= 2)
1854 repr.pack = BRIG_PACK_PP;
1855 else
1856 repr.pack = BRIG_PACK_P;
1857 repr.reserved = 0;
1858 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1859 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1860 brig_code.add (&repr, sizeof (struct BrigInstMod));
1861 }
1862 else
1863 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1864 brig_insn_count++;
1865 }
1866
1867 /* Emit an HSA instruction and all necessary directives, schedule necessary
1868 operands for writing. */
1869
1870 static void
1871 emit_insn (hsa_insn_basic *insn)
1872 {
1873 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1874
1875 insn->m_brig_offset = brig_code.total_size;
1876
1877 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1878 emit_signal_insn (signal);
1879 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1880 emit_atomic_insn (atom);
1881 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1882 emit_memory_insn (mem);
1883 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1884 emit_addr_insn (insn);
1885 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1886 emit_segment_insn (seg);
1887 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1888 emit_cmp_insn (cmp);
1889 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1890 emit_branch_insn (br);
1891 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1892 {
1893 if (switch_instructions == NULL)
1894 switch_instructions = new vec <hsa_insn_sbr *> ();
1895
1896 switch_instructions->safe_push (sbr);
1897 emit_switch_insn (sbr);
1898 }
1899 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1900 emit_arg_block_insn (block);
1901 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1902 emit_call_insn (call);
1903 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1904 emit_comment_insn (comment);
1905 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1906 emit_queue_insn (queue);
1907 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1908 emit_srctype_insn (srctype);
1909 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1910 emit_packed_insn (packed);
1911 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1912 emit_cvt_insn (cvt);
1913 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1914 emit_alloca_insn (alloca);
1915 else
1916 emit_basic_insn (insn);
1917 }
1918
1919 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1920 or we are about to finish emitting code, if it is NULL. If the fall through
1921 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1922
1923 static void
1924 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1925 {
1926 basic_block t_bb = NULL, ff = NULL;
1927
1928 edge_iterator ei;
1929 edge e;
1930
1931 /* If the last instruction of BB is a switch, ignore emission of all
1932 edges. */
1933 if (hsa_bb_for_bb (bb)->m_last_insn
1934 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1935 return;
1936
1937 FOR_EACH_EDGE (e, ei, bb->succs)
1938 if (e->flags & EDGE_TRUE_VALUE)
1939 {
1940 gcc_assert (!t_bb);
1941 t_bb = e->dest;
1942 }
1943 else
1944 {
1945 gcc_assert (!ff);
1946 ff = e->dest;
1947 }
1948
1949 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1950 return;
1951
1952 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1953 }
1954
1955 /* Emit the a function with name NAME to the various brig sections. */
1956
1957 void
1958 hsa_brig_emit_function (void)
1959 {
1960 basic_block bb, prev_bb;
1961 hsa_insn_basic *insn;
1962 BrigDirectiveExecutable *ptr_to_fndir;
1963
1964 brig_init ();
1965
1966 brig_insn_count = 0;
1967 memset (&op_queue, 0, sizeof (op_queue));
1968 op_queue.projected_size = brig_operand.total_size;
1969
1970 if (!function_offsets)
1971 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1972
1973 if (!emitted_declarations)
1974 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1975
1976 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1977 {
1978 tree called = hsa_cfun->m_called_functions[i];
1979
1980 /* If the function has no definition, emit a declaration. */
1981 if (!emitted_declarations->get (called))
1982 {
1983 BrigDirectiveExecutable *e = emit_function_declaration (called);
1984 emitted_declarations->put (called, e);
1985 }
1986 }
1987
1988 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
1989 {
1990 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
1991 emit_internal_fn_decl (called);
1992 }
1993
1994 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
1995 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
1996 insn;
1997 insn = insn->m_next)
1998 emit_insn (insn);
1999 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2000 FOR_EACH_BB_FN (bb, cfun)
2001 {
2002 perhaps_emit_branch (prev_bb, bb);
2003 emit_bb_label_directive (hsa_bb_for_bb (bb));
2004 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2005 emit_insn (insn);
2006 prev_bb = bb;
2007 }
2008 perhaps_emit_branch (prev_bb, NULL);
2009 ptr_to_fndir->nextModuleEntry = brig_code.total_size;
2010
2011 /* Fill up label references for all sbr instructions. */
2012 if (switch_instructions)
2013 {
2014 for (unsigned i = 0; i < switch_instructions->length (); i++)
2015 {
2016 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2017 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2018 {
2019 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2020 sbr->m_label_code_list->m_offsets[j]
2021 = hbb->m_label_ref.m_directive_offset;
2022 }
2023 }
2024
2025 switch_instructions->release ();
2026 delete switch_instructions;
2027 switch_instructions = NULL;
2028 }
2029
2030 if (dump_file)
2031 {
2032 fprintf (dump_file, "------- After BRIG emission: -------\n");
2033 dump_hsa_cfun (dump_file);
2034 }
2035
2036 emit_queued_operands ();
2037 }
2038
2039 /* Emit all OMP symbols related to OMP. */
2040
2041 void
2042 hsa_brig_emit_omp_symbols (void)
2043 {
2044 brig_init ();
2045 emit_directive_variable (hsa_num_threads);
2046 }
2047
2048 /* Create and return __hsa_global_variables symbol that contains
2049 all informations consumed by libgomp to link global variables
2050 with their string names used by an HSA kernel. */
2051
2052 static tree
2053 hsa_output_global_variables ()
2054 {
2055 unsigned l = hsa_global_variable_symbols->elements ();
2056
2057 tree variable_info_type = make_node (RECORD_TYPE);
2058 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2059 get_identifier ("name"), ptr_type_node);
2060 DECL_CHAIN (id_f1) = NULL_TREE;
2061 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2062 get_identifier ("omp_data_size"),
2063 ptr_type_node);
2064 DECL_CHAIN (id_f2) = id_f1;
2065 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2066 NULL_TREE);
2067
2068 tree int_num_of_global_vars;
2069 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2070 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2071 tree global_vars_array_type = build_array_type (variable_info_type,
2072 global_vars_num_index_type);
2073 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2074
2075 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2076
2077 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2078 = hsa_global_variable_symbols->begin ();
2079 it != hsa_global_variable_symbols->end (); ++it)
2080 {
2081 unsigned len = strlen ((*it)->m_name);
2082 char *copy = XNEWVEC (char, len + 2);
2083 copy[0] = '&';
2084 memcpy (copy + 1, (*it)->m_name, len);
2085 copy[len + 1] = '\0';
2086 len++;
2087 hsa_sanitize_name (copy);
2088
2089 tree var_name = build_string (len, copy);
2090 TREE_TYPE (var_name)
2091 = build_array_type (char_type_node, build_index_type (size_int (len)));
2092 free (copy);
2093
2094 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2095 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2096 build1 (ADDR_EXPR,
2097 build_pointer_type (TREE_TYPE (var_name)),
2098 var_name));
2099 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2100 build_fold_addr_expr ((*it)->m_decl));
2101
2102 tree variable_info_ctor = build_constructor (variable_info_type,
2103 variable_info_vec);
2104
2105 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2106 variable_info_ctor);
2107 }
2108
2109 tree global_vars_ctor = build_constructor (global_vars_array_type,
2110 global_vars_vec);
2111
2112 char tmp_name[64];
2113 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2114 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2115 get_identifier (tmp_name),
2116 global_vars_array_type);
2117 TREE_STATIC (global_vars_table) = 1;
2118 TREE_READONLY (global_vars_table) = 1;
2119 TREE_PUBLIC (global_vars_table) = 0;
2120 DECL_ARTIFICIAL (global_vars_table) = 1;
2121 DECL_IGNORED_P (global_vars_table) = 1;
2122 DECL_EXTERNAL (global_vars_table) = 0;
2123 TREE_CONSTANT (global_vars_table) = 1;
2124 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2125 varpool_node::finalize_decl (global_vars_table);
2126
2127 return global_vars_table;
2128 }
2129
2130 /* Create __hsa_host_functions and __hsa_kernels that contain
2131 all informations consumed by libgomp to register all kernels
2132 in the BRIG binary. */
2133
2134 static void
2135 hsa_output_kernels (tree *host_func_table, tree *kernels)
2136 {
2137 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2138
2139 tree int_num_of_kernels;
2140 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2141 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2142 tree host_functions_array_type = build_array_type (ptr_type_node,
2143 kernel_num_index_type);
2144 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2145
2146 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2147 for (unsigned i = 0; i < map_count; ++i)
2148 {
2149 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2150 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2151 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2152 }
2153 tree host_functions_ctor = build_constructor (host_functions_array_type,
2154 host_functions_vec);
2155 char tmp_name[64];
2156 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2157 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2158 get_identifier (tmp_name),
2159 host_functions_array_type);
2160 TREE_STATIC (hsa_host_func_table) = 1;
2161 TREE_READONLY (hsa_host_func_table) = 1;
2162 TREE_PUBLIC (hsa_host_func_table) = 0;
2163 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2164 DECL_IGNORED_P (hsa_host_func_table) = 1;
2165 DECL_EXTERNAL (hsa_host_func_table) = 0;
2166 TREE_CONSTANT (hsa_host_func_table) = 1;
2167 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2168 varpool_node::finalize_decl (hsa_host_func_table);
2169 *host_func_table = hsa_host_func_table;
2170
2171 /* Following code emits list of kernel_info structures. */
2172
2173 tree kernel_info_type = make_node (RECORD_TYPE);
2174 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2175 get_identifier ("name"), ptr_type_node);
2176 DECL_CHAIN (id_f1) = NULL_TREE;
2177 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2178 get_identifier ("omp_data_size"),
2179 unsigned_type_node);
2180 DECL_CHAIN (id_f2) = id_f1;
2181 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2182 get_identifier ("gridified_kernel_p"),
2183 boolean_type_node);
2184 DECL_CHAIN (id_f3) = id_f2;
2185 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2186 get_identifier ("kernel_dependencies_count"),
2187 unsigned_type_node);
2188 DECL_CHAIN (id_f4) = id_f3;
2189 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2190 get_identifier ("kernel_dependencies"),
2191 build_pointer_type (build_pointer_type
2192 (char_type_node)));
2193 DECL_CHAIN (id_f5) = id_f4;
2194 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2195 NULL_TREE);
2196
2197 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2198 tree kernel_info_vector_type
2199 = build_array_type (kernel_info_type,
2200 build_index_type (int_num_of_kernels));
2201 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2202
2203 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2204 tree kernel_dependencies_vector_type = NULL;
2205
2206 for (unsigned i = 0; i < map_count; ++i)
2207 {
2208 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2209 char *name = hsa_get_decl_kernel_mapping_name (i);
2210 unsigned len = strlen (name);
2211 char *copy = XNEWVEC (char, len + 2);
2212 copy[0] = '&';
2213 memcpy (copy + 1, name, len);
2214 copy[len + 1] = '\0';
2215 len++;
2216
2217 tree kern_name = build_string (len, copy);
2218 TREE_TYPE (kern_name)
2219 = build_array_type (char_type_node, build_index_type (size_int (len)));
2220 free (copy);
2221
2222 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2223 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2224 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2225 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2226 gridified_kernel_p);
2227 unsigned count = 0;
2228
2229 kernel_dependencies_vector_type
2230 = build_array_type (build_pointer_type (char_type_node),
2231 build_index_type (size_int (0)));
2232
2233 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2234 if (hsa_decl_kernel_dependencies)
2235 {
2236 vec<const char *> **slot;
2237 slot = hsa_decl_kernel_dependencies->get (kernel);
2238 if (slot)
2239 {
2240 vec <const char *> *dependencies = *slot;
2241 count = dependencies->length ();
2242
2243 kernel_dependencies_vector_type
2244 = build_array_type (build_pointer_type (char_type_node),
2245 build_index_type (size_int (count)));
2246 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2247
2248 for (unsigned j = 0; j < count; j++)
2249 {
2250 const char *d = (*dependencies)[j];
2251 len = strlen (d);
2252 tree dependency_name = build_string (len, d);
2253 TREE_TYPE (dependency_name)
2254 = build_array_type (char_type_node,
2255 build_index_type (size_int (len)));
2256
2257 CONSTRUCTOR_APPEND_ELT
2258 (kernel_dependencies_vec, NULL_TREE,
2259 build1 (ADDR_EXPR,
2260 build_pointer_type (TREE_TYPE (dependency_name)),
2261 dependency_name));
2262 }
2263 }
2264 }
2265
2266 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2267
2268 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2269 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2270 build1 (ADDR_EXPR,
2271 build_pointer_type (TREE_TYPE
2272 (kern_name)),
2273 kern_name));
2274 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2275 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2276 gridified_kernel_p_tree);
2277 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2278
2279 if (count > 0)
2280 {
2281 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2282 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2283 get_identifier (tmp_name),
2284 kernel_dependencies_vector_type);
2285
2286 TREE_STATIC (dependencies_list) = 1;
2287 TREE_READONLY (dependencies_list) = 1;
2288 TREE_PUBLIC (dependencies_list) = 0;
2289 DECL_ARTIFICIAL (dependencies_list) = 1;
2290 DECL_IGNORED_P (dependencies_list) = 1;
2291 DECL_EXTERNAL (dependencies_list) = 0;
2292 TREE_CONSTANT (dependencies_list) = 1;
2293 DECL_INITIAL (dependencies_list)
2294 = build_constructor (kernel_dependencies_vector_type,
2295 kernel_dependencies_vec);
2296 varpool_node::finalize_decl (dependencies_list);
2297
2298 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2299 build1 (ADDR_EXPR,
2300 build_pointer_type
2301 (TREE_TYPE (dependencies_list)),
2302 dependencies_list));
2303 }
2304 else
2305 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2306
2307 tree kernel_info_ctor = build_constructor (kernel_info_type,
2308 kernel_info_vec);
2309
2310 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2311 kernel_info_ctor);
2312 }
2313
2314 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2315 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2316 get_identifier (tmp_name),
2317 kernel_info_vector_type);
2318
2319 TREE_STATIC (hsa_kernels) = 1;
2320 TREE_READONLY (hsa_kernels) = 1;
2321 TREE_PUBLIC (hsa_kernels) = 0;
2322 DECL_ARTIFICIAL (hsa_kernels) = 1;
2323 DECL_IGNORED_P (hsa_kernels) = 1;
2324 DECL_EXTERNAL (hsa_kernels) = 0;
2325 TREE_CONSTANT (hsa_kernels) = 1;
2326 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2327 kernel_info_vector_vec);
2328 varpool_node::finalize_decl (hsa_kernels);
2329 *kernels = hsa_kernels;
2330 }
2331
2332 /* Create a static constructor that will register out brig stuff with
2333 libgomp. */
2334
2335 static void
2336 hsa_output_libgomp_mapping (tree brig_decl)
2337 {
2338 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2339 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2340
2341 tree kernels;
2342 tree host_func_table;
2343
2344 hsa_output_kernels (&host_func_table, &kernels);
2345 tree global_vars = hsa_output_global_variables ();
2346
2347 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2348 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2349 get_identifier ("brig_module"), ptr_type_node);
2350 DECL_CHAIN (id_f1) = NULL_TREE;
2351 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2352 get_identifier ("kernel_count"),
2353 unsigned_type_node);
2354
2355 DECL_CHAIN (id_f2) = id_f1;
2356 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2357 get_identifier ("hsa_kernel_infos"),
2358 ptr_type_node);
2359 DECL_CHAIN (id_f3) = id_f2;
2360 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2361 get_identifier ("global_variable_count"),
2362 unsigned_type_node);
2363 DECL_CHAIN (id_f4) = id_f3;
2364 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2365 get_identifier ("hsa_global_variable_infos"),
2366 ptr_type_node);
2367 DECL_CHAIN (id_f5) = id_f4;
2368 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2369 NULL_TREE);
2370 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2371
2372 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2373 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2374 build_fold_addr_expr (brig_decl));
2375 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2376 build_int_cstu (unsigned_type_node, kernel_count));
2377 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2378 build1 (ADDR_EXPR,
2379 build_pointer_type (TREE_TYPE (kernels)),
2380 kernels));
2381 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2382 build_int_cstu (unsigned_type_node,
2383 global_variable_count));
2384 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2385 build1 (ADDR_EXPR,
2386 build_pointer_type (TREE_TYPE (global_vars)),
2387 global_vars));
2388
2389 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2390
2391 char tmp_name[64];
2392 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2393 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2394 get_identifier (tmp_name),
2395 hsa_image_desc_type);
2396 TREE_STATIC (hsa_img_descriptor) = 1;
2397 TREE_READONLY (hsa_img_descriptor) = 1;
2398 TREE_PUBLIC (hsa_img_descriptor) = 0;
2399 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2400 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2401 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2402 TREE_CONSTANT (hsa_img_descriptor) = 1;
2403 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2404 varpool_node::finalize_decl (hsa_img_descriptor);
2405
2406 /* Construct the "host_table" libgomp expects. */
2407 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2408 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2409 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2410 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2411 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2412 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2413 host_func_table_addr);
2414 offset_int func_table_size
2415 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2416 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2417 fold_build2 (POINTER_PLUS_EXPR,
2418 TREE_TYPE (host_func_table_addr),
2419 host_func_table_addr,
2420 build_int_cst (size_type_node,
2421 func_table_size.to_uhwi
2422 ())));
2423 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2424 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2425 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2426 libgomp_host_table_vec);
2427 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2428 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2429 get_identifier (tmp_name),
2430 libgomp_host_table_type);
2431
2432 TREE_STATIC (hsa_libgomp_host_table) = 1;
2433 TREE_READONLY (hsa_libgomp_host_table) = 1;
2434 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2435 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2436 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2437 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2438 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2439 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2440 varpool_node::finalize_decl (hsa_libgomp_host_table);
2441
2442 /* Generate an initializer with a call to the registration routine. */
2443
2444 tree offload_register
2445 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2446 gcc_checking_assert (offload_register);
2447
2448 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2449 append_to_statement_list
2450 (build_call_expr (offload_register, 4,
2451 build_int_cstu (unsigned_type_node,
2452 GOMP_VERSION_PACK (GOMP_VERSION,
2453 GOMP_VERSION_HSA)),
2454 build_fold_addr_expr (hsa_libgomp_host_table),
2455 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2456 build_fold_addr_expr (hsa_img_descriptor)),
2457 hsa_ctor_stmts);
2458
2459 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2460
2461 tree offload_unregister
2462 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2463 gcc_checking_assert (offload_unregister);
2464
2465 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2466 append_to_statement_list
2467 (build_call_expr (offload_unregister, 4,
2468 build_int_cstu (unsigned_type_node,
2469 GOMP_VERSION_PACK (GOMP_VERSION,
2470 GOMP_VERSION_HSA)),
2471 build_fold_addr_expr (hsa_libgomp_host_table),
2472 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2473 build_fold_addr_expr (hsa_img_descriptor)),
2474 hsa_dtor_stmts);
2475 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2476 }
2477
2478 /* Emit the brig module we have compiled to a section in the final assembly and
2479 also create a compile unit static constructor that will register the brig
2480 module with libgomp. */
2481
2482 void
2483 hsa_output_brig (void)
2484 {
2485 section *saved_section;
2486
2487 if (!brig_initialized)
2488 return;
2489
2490 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2491 {
2492 function_linkage_pair p = function_call_linkage[i];
2493
2494 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2495 gcc_assert (*func_offset);
2496 BrigOperandCodeRef *code_ref
2497 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2498 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2499 code_ref->ref = lendian32 (*func_offset);
2500 }
2501
2502 /* Iterate all function declarations and if we meet a function that should
2503 have module linkage and we are unable to emit HSAIL for the function,
2504 then change the linkage to program linkage. Doing so, we will emit
2505 a valid BRIG image. */
2506 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2507 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2508 = emitted_declarations->begin ();
2509 it != emitted_declarations->end ();
2510 ++it)
2511 {
2512 if (hsa_failed_functions->contains ((*it).first))
2513 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2514 }
2515
2516 saved_section = in_section;
2517
2518 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2519 char tmp_name[64];
2520 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2521 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2522 tree brig_id = get_identifier (tmp_name);
2523 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2524 char_type_node);
2525 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2526 TREE_ADDRESSABLE (brig_decl) = 1;
2527 TREE_READONLY (brig_decl) = 1;
2528 DECL_ARTIFICIAL (brig_decl) = 1;
2529 DECL_IGNORED_P (brig_decl) = 1;
2530 TREE_STATIC (brig_decl) = 1;
2531 TREE_PUBLIC (brig_decl) = 0;
2532 TREE_USED (brig_decl) = 1;
2533 DECL_INITIAL (brig_decl) = brig_decl;
2534 TREE_ASM_WRITTEN (brig_decl) = 1;
2535
2536 BrigModuleHeader module_header;
2537 memcpy (&module_header.identification, "HSA BRIG",
2538 sizeof (module_header.identification));
2539 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2540 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2541 uint64_t section_index[3];
2542
2543 int data_padding, code_padding, operand_padding;
2544 data_padding = HSA_SECTION_ALIGNMENT
2545 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2546 code_padding = HSA_SECTION_ALIGNMENT
2547 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2548 operand_padding = HSA_SECTION_ALIGNMENT
2549 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2550
2551 uint64_t module_size = sizeof (module_header)
2552 + sizeof (section_index)
2553 + brig_data.total_size
2554 + data_padding
2555 + brig_code.total_size
2556 + code_padding
2557 + brig_operand.total_size
2558 + operand_padding;
2559 gcc_assert ((module_size % 16) == 0);
2560 module_header.byteCount = lendian64 (module_size);
2561 memset (&module_header.hash, 0, sizeof (module_header.hash));
2562 module_header.reserved = 0;
2563 module_header.sectionCount = lendian32 (3);
2564 module_header.sectionIndex = lendian64 (sizeof (module_header));
2565 assemble_string ((const char *) &module_header, sizeof (module_header));
2566 uint64_t off = sizeof (module_header) + sizeof (section_index);
2567 section_index[0] = lendian64 (off);
2568 off += brig_data.total_size + data_padding;
2569 section_index[1] = lendian64 (off);
2570 off += brig_code.total_size + code_padding;
2571 section_index[2] = lendian64 (off);
2572 assemble_string ((const char *) &section_index, sizeof (section_index));
2573
2574 char padding[HSA_SECTION_ALIGNMENT];
2575 memset (padding, 0, sizeof (padding));
2576
2577 brig_data.output ();
2578 assemble_string (padding, data_padding);
2579 brig_code.output ();
2580 assemble_string (padding, code_padding);
2581 brig_operand.output ();
2582 assemble_string (padding, operand_padding);
2583
2584 if (saved_section)
2585 switch_to_section (saved_section);
2586
2587 hsa_output_libgomp_mapping (brig_decl);
2588
2589 hsa_free_decl_kernel_mapping ();
2590 brig_release_data ();
2591 hsa_deinit_compilation_unit_data ();
2592
2593 delete emitted_declarations;
2594 emitted_declarations = NULL;
2595 delete function_offsets;
2596 function_offsets = NULL;
2597 }