]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/hsa-brig.c
[Ada] Improved support for aspect alignment in CCG
[thirdparty/gcc.git] / gcc / hsa-brig.c
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2020 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "function.h"
39 #include "cfg.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
44 #include "cgraph.h"
45 #include "dumpfile.h"
46 #include "print-tree.h"
47 #include "alloc-pool.h"
48 #include "symbol-summary.h"
49 #include "hsa-common.h"
50 #include "gomp-constants.h"
51
52 /* Convert VAL to little endian form, if necessary. */
53
54 static uint16_t
55 lendian16 (uint16_t val)
56 {
57 #if GCC_VERSION >= 4008
58 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
59 return val;
60 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
61 return __builtin_bswap16 (val);
62 #else /* __ORDER_PDP_ENDIAN__ */
63 return val;
64 #endif
65 #else
66 // provide a safe slower default, with shifts and masking
67 #ifndef WORDS_BIGENDIAN
68 return val;
69 #else
70 return (val >> 8) | (val << 8);
71 #endif
72 #endif
73 }
74
75 /* Convert VAL to little endian form, if necessary. */
76
77 static uint32_t
78 lendian32 (uint32_t val)
79 {
80 #if GCC_VERSION >= 4006
81 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
82 return val;
83 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
84 return __builtin_bswap32 (val);
85 #else /* __ORDER_PDP_ENDIAN__ */
86 return (val >> 16) | (val << 16);
87 #endif
88 #else
89 // provide a safe slower default, with shifts and masking
90 #ifndef WORDS_BIGENDIAN
91 return val;
92 #else
93 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
94 return (val >> 16) | (val << 16);
95 #endif
96 #endif
97 }
98
99 /* Convert VAL to little endian form, if necessary. */
100
101 static uint64_t
102 lendian64 (uint64_t val)
103 {
104 #if GCC_VERSION >= 4006
105 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
106 return val;
107 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
108 return __builtin_bswap64 (val);
109 #else /* __ORDER_PDP_ENDIAN__ */
110 return (((val & 0xffffll) << 48)
111 | ((val & 0xffff0000ll) << 16)
112 | ((val & 0xffff00000000ll) >> 16)
113 | ((val & 0xffff000000000000ll) >> 48));
114 #endif
115 #else
116 // provide a safe slower default, with shifts and masking
117 #ifndef WORDS_BIGENDIAN
118 return val;
119 #else
120 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
121 | ((val & 0x00ff00ff00ff00ffll) << 8));
122 val = ((( val & 0xffff0000ffff0000ll) >> 16)
123 | (( val & 0x0000ffff0000ffffll) << 16));
124 return (val >> 32) | (val << 32);
125 #endif
126 #endif
127 }
128
129 #define BRIG_ELF_SECTION_NAME ".brig"
130 #define BRIG_LABEL_STRING "hsa_brig"
131 #define BRIG_SECTION_DATA_NAME "hsa_data"
132 #define BRIG_SECTION_CODE_NAME "hsa_code"
133 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
134
135 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
136
137 /* Required HSA section alignment. */
138
139 #define HSA_SECTION_ALIGNMENT 16
140
141 /* Chunks of BRIG binary data. */
142
143 struct hsa_brig_data_chunk
144 {
145 /* Size of the data already stored into a chunk. */
146 unsigned size;
147
148 /* Pointer to the data. */
149 char *data;
150 };
151
152 /* Structure representing a BRIG section, holding and writing its data. */
153
154 struct hsa_brig_section
155 {
156 /* Section name that will be output to the BRIG. */
157 const char *section_name;
158 /* Size in bytes of all data stored in the section. */
159 unsigned total_size;
160 /* The size of the header of the section including padding. */
161 unsigned header_byte_count;
162 /* The size of the header of the section without any padding. */
163 unsigned header_byte_delta;
164
165 void init (const char *name);
166 void release ();
167 void output ();
168 unsigned add (const void *data, unsigned len, void **output = NULL);
169 void round_size_up (int factor);
170 void *get_ptr_by_offset (unsigned int offset);
171
172 private:
173 void allocate_new_chunk ();
174
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
176 vec <struct hsa_brig_data_chunk> chunks;
177
178 /* More convenient access to the last chunk from the vector above. */
179 struct hsa_brig_data_chunk *cur_chunk;
180 };
181
182 static struct hsa_brig_section brig_data, brig_code, brig_operand;
183 static uint32_t brig_insn_count;
184 static bool brig_initialized = false;
185
186 /* Mapping between emitted HSA functions and their offset in code segment. */
187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
188
189 /* Hash map of emitted function declarations. */
190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
191
192 /* Hash table of emitted internal function declaration offsets. */
193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
194
195 /* List of sbr instructions. */
196 static vec <hsa_insn_sbr *> *switch_instructions;
197
198 class function_linkage_pair
199 {
200 public:
201 function_linkage_pair (tree decl, unsigned int off)
202 : function_decl (decl), offset (off) {}
203
204 /* Declaration of called function. */
205 tree function_decl;
206
207 /* Offset in operand section. */
208 unsigned int offset;
209 };
210
211 /* Vector of function calls where we need to resolve function offsets. */
212 static auto_vec <function_linkage_pair> function_call_linkage;
213
214 /* Add a new chunk, allocate data for it and initialize it. */
215
216 void
217 hsa_brig_section::allocate_new_chunk ()
218 {
219 struct hsa_brig_data_chunk new_chunk;
220
221 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
222 new_chunk.size = 0;
223 cur_chunk = chunks.safe_push (new_chunk);
224 }
225
226 /* Initialize the brig section. */
227
228 void
229 hsa_brig_section::init (const char *name)
230 {
231 section_name = name;
232 /* While the following computation is basically wrong, because the intent
233 certainly wasn't to have the first character of name and padding, which
234 are a part of sizeof (BrigSectionHeader), included in the first addend,
235 this is what the disassembler expects. */
236 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
237 chunks.create (1);
238 allocate_new_chunk ();
239 header_byte_delta = total_size;
240 round_size_up (4);
241 header_byte_count = total_size;
242 }
243
244 /* Free all data in the section. */
245
246 void
247 hsa_brig_section::release ()
248 {
249 for (unsigned i = 0; i < chunks.length (); i++)
250 free (chunks[i].data);
251 chunks.release ();
252 cur_chunk = NULL;
253 }
254
255 /* Write the section to the output file to a section with the name given at
256 initialization. Switches the output section and does not restore it. */
257
258 void
259 hsa_brig_section::output ()
260 {
261 struct BrigSectionHeader section_header;
262 char padding[8];
263
264 section_header.byteCount = lendian64 (total_size);
265 section_header.headerByteCount = lendian32 (header_byte_count);
266 section_header.nameLength = lendian32 (strlen (section_name));
267 assemble_string ((const char *) &section_header, 16);
268 assemble_string (section_name, (section_header.nameLength));
269 memset (&padding, 0, sizeof (padding));
270 /* This is also a consequence of the wrong header size computation described
271 in a comment in hsa_brig_section::init. */
272 assemble_string (padding, 8);
273 for (unsigned i = 0; i < chunks.length (); i++)
274 assemble_string (chunks[i].data, chunks[i].size);
275 }
276
277 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
278 which it was stored. If OUTPUT is not NULL, store into it the pointer to
279 the place where DATA was actually stored. */
280
281 unsigned
282 hsa_brig_section::add (const void *data, unsigned len, void **output)
283 {
284 unsigned offset = total_size;
285
286 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
287 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
288 allocate_new_chunk ();
289
290 char *dst = cur_chunk->data + cur_chunk->size;
291 memcpy (dst, data, len);
292 if (output)
293 *output = dst;
294 cur_chunk->size += len;
295 total_size += len;
296
297 return offset;
298 }
299
300 /* Add padding to section so that its size is divisible by FACTOR. */
301
302 void
303 hsa_brig_section::round_size_up (int factor)
304 {
305 unsigned padding, res = total_size % factor;
306
307 if (res == 0)
308 return;
309
310 padding = factor - res;
311 total_size += padding;
312 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
313 {
314 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
315 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
316 allocate_new_chunk ();
317 }
318
319 cur_chunk->size += padding;
320 }
321
322 /* Return pointer to data by global OFFSET in the section. */
323
324 void *
325 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
326 {
327 gcc_assert (offset < total_size);
328 offset -= header_byte_delta;
329
330 unsigned i;
331 for (i = 0; offset >= chunks[i].size; i++)
332 offset -= chunks[i].size;
333
334 return chunks[i].data + offset;
335 }
336
337 /* BRIG string data hashing. */
338
339 struct brig_string_slot
340 {
341 const char *s;
342 char prefix;
343 int len;
344 uint32_t offset;
345 };
346
347 /* Hash table helpers. */
348
349 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
350 {
351 static inline hashval_t hash (const value_type);
352 static inline bool equal (const value_type, const compare_type);
353 static inline void remove (value_type);
354 };
355
356 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
357 to support strings that may not end in '\0'. */
358
359 inline hashval_t
360 brig_string_slot_hasher::hash (const value_type ds)
361 {
362 hashval_t r = ds->len;
363 int i;
364
365 for (i = 0; i < ds->len; i++)
366 r = r * 67 + (unsigned) ds->s[i] - 113;
367 r = r * 67 + (unsigned) ds->prefix - 113;
368 return r;
369 }
370
371 /* Returns nonzero if DS1 and DS2 are equal. */
372
373 inline bool
374 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
375 {
376 if (ds1->len == ds2->len)
377 return ds1->prefix == ds2->prefix
378 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
379
380 return 0;
381 }
382
383 /* Deallocate memory for DS upon its removal. */
384
385 inline void
386 brig_string_slot_hasher::remove (value_type ds)
387 {
388 free (const_cast<char *> (ds->s));
389 free (ds);
390 }
391
392 /* Hash for strings we output in order not to duplicate them needlessly. */
393
394 static hash_table<brig_string_slot_hasher> *brig_string_htab;
395
396 /* Emit a null terminated string STR to the data section and return its
397 offset in it. If PREFIX is non-zero, output it just before STR too.
398 Sanitize the string if SANITIZE option is set to true. */
399
400 static unsigned
401 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
402 {
403 unsigned slen = strlen (str);
404 unsigned offset, len = slen + (prefix ? 1 : 0);
405 uint32_t hdr_len = lendian32 (len);
406 brig_string_slot s_slot;
407 brig_string_slot **slot;
408 char *str2;
409
410 str2 = xstrdup (str);
411
412 if (sanitize)
413 hsa_sanitize_name (str2);
414 s_slot.s = str2;
415 s_slot.len = slen;
416 s_slot.prefix = prefix;
417 s_slot.offset = 0;
418
419 slot = brig_string_htab->find_slot (&s_slot, INSERT);
420 if (*slot == NULL)
421 {
422 brig_string_slot *new_slot = XCNEW (brig_string_slot);
423
424 /* In theory we should fill in BrigData but that would mean copying
425 the string to a buffer for no reason, so we just emulate it. */
426 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
427 if (prefix)
428 brig_data.add (&prefix, 1);
429
430 brig_data.add (str2, slen);
431 brig_data.round_size_up (4);
432
433 /* TODO: could use the string we just copied into
434 brig_string->cur_chunk */
435 new_slot->s = str2;
436 new_slot->len = slen;
437 new_slot->prefix = prefix;
438 new_slot->offset = offset;
439 *slot = new_slot;
440 }
441 else
442 {
443 offset = (*slot)->offset;
444 free (str2);
445 }
446
447 return offset;
448 }
449
450 /* Linked list of queued operands. */
451
452 static struct operand_queue
453 {
454 /* First from the chain of queued operands. */
455 hsa_op_base *first_op, *last_op;
456
457 /* The offset at which the next operand will be enqueued. */
458 unsigned projected_size;
459
460 } op_queue;
461
462 /* Unless already initialized, initialize infrastructure to produce BRIG. */
463
464 static void
465 brig_init (void)
466 {
467 brig_insn_count = 0;
468
469 if (brig_initialized)
470 return;
471
472 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
473 brig_data.init (BRIG_SECTION_DATA_NAME);
474 brig_code.init (BRIG_SECTION_CODE_NAME);
475 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
476 brig_initialized = true;
477
478 struct BrigDirectiveModule moddir;
479 memset (&moddir, 0, sizeof (moddir));
480 moddir.base.byteCount = lendian16 (sizeof (moddir));
481
482 char *modname;
483 if (main_input_filename && *main_input_filename != '\0')
484 {
485 const char *part = strrchr (main_input_filename, '/');
486 if (!part)
487 part = main_input_filename;
488 else
489 part++;
490 modname = concat ("&__hsa_module_", part, NULL);
491 char *extension = strchr (modname, '.');
492 if (extension)
493 *extension = '\0';
494
495 /* As in LTO mode, we have to emit a different module names. */
496 if (flag_ltrans)
497 {
498 part = strrchr (asm_file_name, '/');
499 if (!part)
500 part = asm_file_name;
501 else
502 part++;
503 char *modname2;
504 modname2 = xasprintf ("%s_%s", modname, part);
505 free (modname);
506 modname = modname2;
507 }
508
509 hsa_sanitize_name (modname);
510 moddir.name = brig_emit_string (modname);
511 free (modname);
512 }
513 else
514 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
515 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
516 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
517 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
518 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
519 if (hsa_machine_large_p ())
520 moddir.machineModel = BRIG_MACHINE_LARGE;
521 else
522 moddir.machineModel = BRIG_MACHINE_SMALL;
523 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
524 brig_code.add (&moddir, sizeof (moddir));
525 }
526
527 /* Free all BRIG data. */
528
529 static void
530 brig_release_data (void)
531 {
532 delete brig_string_htab;
533 brig_data.release ();
534 brig_code.release ();
535 brig_operand.release ();
536
537 brig_initialized = 0;
538 }
539
540 /* Enqueue operation OP. Return the offset at which it will be stored. */
541
542 static unsigned int
543 enqueue_op (hsa_op_base *op)
544 {
545 unsigned ret;
546
547 if (op->m_brig_op_offset)
548 return op->m_brig_op_offset;
549
550 ret = op_queue.projected_size;
551 op->m_brig_op_offset = op_queue.projected_size;
552
553 if (!op_queue.first_op)
554 op_queue.first_op = op;
555 else
556 op_queue.last_op->m_next = op;
557 op_queue.last_op = op;
558
559 if (is_a <hsa_op_immed *> (op))
560 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
561 else if (is_a <hsa_op_reg *> (op))
562 op_queue.projected_size += sizeof (struct BrigOperandRegister);
563 else if (is_a <hsa_op_address *> (op))
564 op_queue.projected_size += sizeof (struct BrigOperandAddress);
565 else if (is_a <hsa_op_code_ref *> (op))
566 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
567 else if (is_a <hsa_op_code_list *> (op))
568 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
569 else if (is_a <hsa_op_operand_list *> (op))
570 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
571 else
572 gcc_unreachable ();
573 return ret;
574 }
575
576 static void emit_immediate_operand (hsa_op_immed *imm);
577
578 /* Emit directive describing a symbol if it has not been emitted already.
579 Return the offset of the directive. */
580
581 static unsigned
582 emit_directive_variable (class hsa_symbol *symbol)
583 {
584 struct BrigDirectiveVariable dirvar;
585 unsigned name_offset;
586 static unsigned res_name_offset;
587
588 if (symbol->m_directive_offset)
589 return symbol->m_directive_offset;
590
591 memset (&dirvar, 0, sizeof (dirvar));
592 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
593 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
594 dirvar.allocation = symbol->m_allocation;
595
596 char prefix = symbol->m_global_scope_p ? '&' : '%';
597
598 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
599 {
600 if (res_name_offset == 0)
601 res_name_offset = brig_emit_string (symbol->m_name, '%');
602 name_offset = res_name_offset;
603 }
604 else if (symbol->m_name)
605 name_offset = brig_emit_string (symbol->m_name, prefix);
606 else
607 {
608 char buf[64];
609 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
610 symbol->m_name_number);
611 name_offset = brig_emit_string (buf, prefix);
612 }
613
614 dirvar.name = lendian32 (name_offset);
615
616 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
617 {
618 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
619 dirvar.init = lendian32 (enqueue_op (tmp));
620 }
621 else
622 dirvar.init = 0;
623 dirvar.type = lendian16 (symbol->m_type);
624 dirvar.segment = symbol->m_segment;
625 dirvar.align = symbol->m_align;
626 dirvar.linkage = symbol->m_linkage;
627 dirvar.dim.lo = symbol->m_dim;
628 dirvar.dim.hi = symbol->m_dim >> 32;
629
630 /* Global variables are just declared and linked via HSA runtime. */
631 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
632 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
633 dirvar.reserved = 0;
634
635 if (symbol->m_cst_value)
636 {
637 dirvar.modifier |= BRIG_VARIABLE_CONST;
638 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
639 }
640
641 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
642 return symbol->m_directive_offset;
643 }
644
645 /* Emit directives describing either a function declaration or definition F and
646 return the produced BrigDirectiveExecutable structure. The function does
647 not take into account any instructions when calculating nextModuleEntry
648 field of the produced BrigDirectiveExecutable structure so when emitting
649 actual definitions, this field needs to be updated after all of the function
650 is actually added to the code section. */
651
652 static BrigDirectiveExecutable *
653 emit_function_directives (hsa_function_representation *f, bool is_declaration)
654 {
655 struct BrigDirectiveExecutable fndir;
656 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
657 int count = 0;
658 void *ptr_to_fndir;
659 hsa_symbol *sym;
660
661 if (!f->m_declaration_p)
662 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
663 {
664 gcc_assert (!sym->m_emitted_to_brig);
665 sym->m_emitted_to_brig = true;
666 emit_directive_variable (sym);
667 brig_insn_count++;
668 }
669
670 name_offset = brig_emit_string (f->m_name, '&');
671 inarg_off = brig_code.total_size + sizeof (fndir)
672 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
673 scoped_off = inarg_off
674 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
675
676 if (!f->m_declaration_p)
677 {
678 count += f->m_spill_symbols.length ();
679 count += f->m_private_variables.length ();
680 }
681
682 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
683
684 memset (&fndir, 0, sizeof (fndir));
685 fndir.base.byteCount = lendian16 (sizeof (fndir));
686 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
687 : BRIG_KIND_DIRECTIVE_FUNCTION);
688 fndir.name = lendian32 (name_offset);
689 fndir.inArgCount = lendian16 (f->m_input_args.length ());
690 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
691 fndir.firstInArg = lendian32 (inarg_off);
692 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
693 fndir.nextModuleEntry = lendian32 (next_toplev_off);
694 fndir.linkage = f->get_linkage ();
695 if (!f->m_declaration_p)
696 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
697 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
698
699 /* Once we put a definition of function_offsets, we should not overwrite
700 it with a declaration of the function. */
701 if (f->m_internal_fn == NULL)
702 {
703 if (!function_offsets->get (f->m_decl) || !is_declaration)
704 function_offsets->put (f->m_decl, brig_code.total_size);
705 }
706 else
707 {
708 /* Internal function. */
709 hsa_internal_fn **slot
710 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
711 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
712 int_fn->m_offset = brig_code.total_size;
713 *slot = int_fn;
714 }
715
716 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
717
718 if (f->m_output_arg)
719 emit_directive_variable (f->m_output_arg);
720 for (unsigned i = 0; i < f->m_input_args.length (); i++)
721 emit_directive_variable (f->m_input_args[i]);
722
723 if (!f->m_declaration_p)
724 {
725 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
726 {
727 emit_directive_variable (sym);
728 brig_insn_count++;
729 }
730 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
731 {
732 emit_directive_variable (f->m_private_variables[i]);
733 brig_insn_count++;
734 }
735 }
736
737 return (BrigDirectiveExecutable *) ptr_to_fndir;
738 }
739
740 /* Emit a label directive for the given HBB. We assume it is about to start on
741 the current offset in the code section. */
742
743 static void
744 emit_bb_label_directive (hsa_bb *hbb)
745 {
746 struct BrigDirectiveLabel lbldir;
747
748 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
749 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
750 char buf[32];
751 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
752 hbb->m_index);
753 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
754
755 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
756 sizeof (lbldir));
757 brig_insn_count++;
758 }
759
760 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
761 holding such, for constants and registers. */
762
763 static BrigType16_t
764 regtype_for_type (BrigType16_t t)
765 {
766 switch (t)
767 {
768 case BRIG_TYPE_B1:
769 return BRIG_TYPE_B1;
770
771 case BRIG_TYPE_U8:
772 case BRIG_TYPE_U16:
773 case BRIG_TYPE_U32:
774 case BRIG_TYPE_S8:
775 case BRIG_TYPE_S16:
776 case BRIG_TYPE_S32:
777 case BRIG_TYPE_B8:
778 case BRIG_TYPE_B16:
779 case BRIG_TYPE_B32:
780 case BRIG_TYPE_F16:
781 case BRIG_TYPE_F32:
782 case BRIG_TYPE_U8X4:
783 case BRIG_TYPE_U16X2:
784 case BRIG_TYPE_S8X4:
785 case BRIG_TYPE_S16X2:
786 case BRIG_TYPE_F16X2:
787 return BRIG_TYPE_B32;
788
789 case BRIG_TYPE_U64:
790 case BRIG_TYPE_S64:
791 case BRIG_TYPE_F64:
792 case BRIG_TYPE_B64:
793 case BRIG_TYPE_U8X8:
794 case BRIG_TYPE_U16X4:
795 case BRIG_TYPE_U32X2:
796 case BRIG_TYPE_S8X8:
797 case BRIG_TYPE_S16X4:
798 case BRIG_TYPE_S32X2:
799 case BRIG_TYPE_F16X4:
800 case BRIG_TYPE_F32X2:
801 return BRIG_TYPE_B64;
802
803 case BRIG_TYPE_B128:
804 case BRIG_TYPE_U8X16:
805 case BRIG_TYPE_U16X8:
806 case BRIG_TYPE_U32X4:
807 case BRIG_TYPE_U64X2:
808 case BRIG_TYPE_S8X16:
809 case BRIG_TYPE_S16X8:
810 case BRIG_TYPE_S32X4:
811 case BRIG_TYPE_S64X2:
812 case BRIG_TYPE_F16X8:
813 case BRIG_TYPE_F32X4:
814 case BRIG_TYPE_F64X2:
815 return BRIG_TYPE_B128;
816
817 default:
818 gcc_unreachable ();
819 }
820 }
821
822 /* Return the length of the BRIG type TYPE that is going to be streamed out as
823 an immediate constant (so it must not be B1). */
824
825 unsigned
826 hsa_get_imm_brig_type_len (BrigType16_t type)
827 {
828 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
829 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
830
831 switch (pack_type)
832 {
833 case BRIG_TYPE_PACK_NONE:
834 break;
835 case BRIG_TYPE_PACK_32:
836 return 4;
837 case BRIG_TYPE_PACK_64:
838 return 8;
839 case BRIG_TYPE_PACK_128:
840 return 16;
841 default:
842 gcc_unreachable ();
843 }
844
845 switch (base_type)
846 {
847 case BRIG_TYPE_U8:
848 case BRIG_TYPE_S8:
849 case BRIG_TYPE_B8:
850 return 1;
851 case BRIG_TYPE_U16:
852 case BRIG_TYPE_S16:
853 case BRIG_TYPE_F16:
854 case BRIG_TYPE_B16:
855 return 2;
856 case BRIG_TYPE_U32:
857 case BRIG_TYPE_S32:
858 case BRIG_TYPE_F32:
859 case BRIG_TYPE_B32:
860 return 4;
861 case BRIG_TYPE_U64:
862 case BRIG_TYPE_S64:
863 case BRIG_TYPE_F64:
864 case BRIG_TYPE_B64:
865 return 8;
866 case BRIG_TYPE_B128:
867 return 16;
868 default:
869 gcc_unreachable ();
870 }
871 }
872
873 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
874 If NEED_LEN is not equal to zero, shrink or extend the value
875 to NEED_LEN bytes. Return how many bytes were written. */
876
877 static int
878 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
879 {
880 union hsa_bytes bytes;
881
882 memset (&bytes, 0, sizeof (bytes));
883 tree type = TREE_TYPE (value);
884 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
885
886 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
887 if (INTEGRAL_TYPE_P (type)
888 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
889 switch (data_len)
890 {
891 case 1:
892 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
893 break;
894 case 2:
895 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
896 break;
897 case 4:
898 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
899 break;
900 case 8:
901 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
902 break;
903 default:
904 gcc_unreachable ();
905 }
906 else if (SCALAR_FLOAT_TYPE_P (type))
907 {
908 if (data_len == 2)
909 {
910 sorry ("Support for HSA does not implement immediate 16 bit FPU "
911 "operands");
912 return 2;
913 }
914 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
915 /* There are always 32 bits in each long, no matter the size of
916 the hosts long. */
917 long tmp[6];
918
919 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
920
921 if (int_len == 4)
922 bytes.b32 = (uint32_t) tmp[0];
923 else
924 {
925 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
926 bytes.b64 <<= 32;
927 bytes.b64 |= (uint32_t) tmp[0];
928 }
929 }
930 else
931 gcc_unreachable ();
932
933 int len;
934 if (need_len == 0)
935 len = data_len;
936 else
937 len = need_len;
938
939 memcpy (data, &bytes, len);
940 return len;
941 }
942
943 char *
944 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
945 {
946 char *brig_repr;
947 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
948
949 if (m_tree_value != NULL_TREE)
950 {
951 /* Update brig_repr_size for special tree values. */
952 if (TREE_CODE (m_tree_value) == STRING_CST)
953 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
954 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
955 *brig_repr_size
956 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
957
958 unsigned total_len = *brig_repr_size;
959
960 /* As we can have a constructor with fewer elements, fill the memory
961 with zeros. */
962 brig_repr = XCNEWVEC (char, total_len);
963 char *p = brig_repr;
964
965 if (TREE_CODE (m_tree_value) == VECTOR_CST)
966 {
967 /* Variable-length vectors aren't supported. */
968 int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant ();
969 for (i = 0; i < num; i++)
970 {
971 tree v = VECTOR_CST_ELT (m_tree_value, i);
972 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
973 total_len -= actual;
974 p += actual;
975 }
976 /* Vectors should have the exact size. */
977 gcc_assert (total_len == 0);
978 }
979 else if (TREE_CODE (m_tree_value) == STRING_CST)
980 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
981 TREE_STRING_LENGTH (m_tree_value));
982 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
983 {
984 gcc_assert (total_len % 2 == 0);
985 unsigned actual;
986 actual
987 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
988 total_len / 2);
989
990 gcc_assert (actual == total_len / 2);
991 p += actual;
992
993 actual
994 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
995 total_len / 2);
996 gcc_assert (actual == total_len / 2);
997 }
998 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
999 {
1000 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
1001 for (unsigned i = 0; i < len; i++)
1002 {
1003 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1004 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1005 total_len -= actual;
1006 p += actual;
1007 }
1008 }
1009 else
1010 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1011 }
1012 else
1013 {
1014 hsa_bytes bytes;
1015
1016 switch (*brig_repr_size)
1017 {
1018 case 1:
1019 bytes.b8 = (uint8_t) m_int_value;
1020 break;
1021 case 2:
1022 bytes.b16 = (uint16_t) m_int_value;
1023 break;
1024 case 4:
1025 bytes.b32 = (uint32_t) m_int_value;
1026 break;
1027 case 8:
1028 bytes.b64 = (uint64_t) m_int_value;
1029 break;
1030 default:
1031 gcc_unreachable ();
1032 }
1033
1034 brig_repr = XNEWVEC (char, *brig_repr_size);
1035 memcpy (brig_repr, &bytes, *brig_repr_size);
1036 }
1037
1038 return brig_repr;
1039 }
1040
1041 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1042 have been massaged to comply with various HSA/BRIG type requirements, so the
1043 only important aspect of that is the length (because HSAIL might expect
1044 smaller constants or become bit-data). The data should be represented
1045 according to what is in the tree representation. */
1046
1047 static void
1048 emit_immediate_operand (hsa_op_immed *imm)
1049 {
1050 unsigned brig_repr_size;
1051 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1052 struct BrigOperandConstantBytes out;
1053
1054 memset (&out, 0, sizeof (out));
1055 out.base.byteCount = lendian16 (sizeof (out));
1056 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1057 uint32_t byteCount = lendian32 (brig_repr_size);
1058 out.type = lendian16 (imm->m_type);
1059 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1060 brig_operand.add (&out, sizeof (out));
1061 brig_data.add (brig_repr, brig_repr_size);
1062 brig_data.round_size_up (4);
1063
1064 free (brig_repr);
1065 }
1066
1067 /* Emit a register BRIG operand REG. */
1068
1069 static void
1070 emit_register_operand (hsa_op_reg *reg)
1071 {
1072 struct BrigOperandRegister out;
1073
1074 out.base.byteCount = lendian16 (sizeof (out));
1075 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1076 out.regNum = lendian32 (reg->m_hard_num);
1077
1078 switch (regtype_for_type (reg->m_type))
1079 {
1080 case BRIG_TYPE_B32:
1081 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1082 break;
1083 case BRIG_TYPE_B64:
1084 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1085 break;
1086 case BRIG_TYPE_B128:
1087 out.regKind = BRIG_REGISTER_KIND_QUAD;
1088 break;
1089 case BRIG_TYPE_B1:
1090 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1091 break;
1092 default:
1093 gcc_unreachable ();
1094 }
1095
1096 brig_operand.add (&out, sizeof (out));
1097 }
1098
1099 /* Emit an address BRIG operand ADDR. */
1100
1101 static void
1102 emit_address_operand (hsa_op_address *addr)
1103 {
1104 struct BrigOperandAddress out;
1105
1106 out.base.byteCount = lendian16 (sizeof (out));
1107 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1108 out.symbol = addr->m_symbol
1109 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1110 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1111
1112 if (sizeof (addr->m_imm_offset) == 8)
1113 {
1114 out.offset.lo = lendian32 (addr->m_imm_offset);
1115 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1116 }
1117 else
1118 {
1119 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1120 out.offset.lo = lendian32 (addr->m_imm_offset);
1121 out.offset.hi = 0;
1122 }
1123
1124 brig_operand.add (&out, sizeof (out));
1125 }
1126
1127 /* Emit a code reference operand REF. */
1128
1129 static void
1130 emit_code_ref_operand (hsa_op_code_ref *ref)
1131 {
1132 struct BrigOperandCodeRef out;
1133
1134 out.base.byteCount = lendian16 (sizeof (out));
1135 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1136 out.ref = lendian32 (ref->m_directive_offset);
1137 brig_operand.add (&out, sizeof (out));
1138 }
1139
1140 /* Emit a code list operand CODE_LIST. */
1141
1142 static void
1143 emit_code_list_operand (hsa_op_code_list *code_list)
1144 {
1145 struct BrigOperandCodeList out;
1146 unsigned args = code_list->m_offsets.length ();
1147
1148 for (unsigned i = 0; i < args; i++)
1149 gcc_assert (code_list->m_offsets[i]);
1150
1151 out.base.byteCount = lendian16 (sizeof (out));
1152 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1153
1154 uint32_t byteCount = lendian32 (4 * args);
1155
1156 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1157 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1158 brig_data.round_size_up (4);
1159 brig_operand.add (&out, sizeof (out));
1160 }
1161
1162 /* Emit an operand list operand OPERAND_LIST. */
1163
1164 static void
1165 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1166 {
1167 struct BrigOperandOperandList out;
1168 unsigned args = operand_list->m_offsets.length ();
1169
1170 for (unsigned i = 0; i < args; i++)
1171 gcc_assert (operand_list->m_offsets[i]);
1172
1173 out.base.byteCount = lendian16 (sizeof (out));
1174 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1175
1176 uint32_t byteCount = lendian32 (4 * args);
1177
1178 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1179 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1180 brig_data.round_size_up (4);
1181 brig_operand.add (&out, sizeof (out));
1182 }
1183
1184 /* Emit all operands queued for writing. */
1185
1186 static void
1187 emit_queued_operands (void)
1188 {
1189 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1190 {
1191 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1192 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1193 emit_immediate_operand (imm);
1194 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1195 emit_register_operand (reg);
1196 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1197 emit_address_operand (addr);
1198 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1199 emit_code_ref_operand (ref);
1200 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1201 emit_code_list_operand (code_list);
1202 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1203 emit_operand_list_operand (l);
1204 else
1205 gcc_unreachable ();
1206 }
1207 }
1208
1209 /* Emit directives describing the function that is used for
1210 a function declaration. */
1211
1212 static BrigDirectiveExecutable *
1213 emit_function_declaration (tree decl)
1214 {
1215 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1216
1217 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1218 emit_queued_operands ();
1219
1220 delete f;
1221
1222 return e;
1223 }
1224
1225 /* Emit directives describing the function that is used for
1226 an internal function declaration. */
1227
1228 static BrigDirectiveExecutable *
1229 emit_internal_fn_decl (hsa_internal_fn *fn)
1230 {
1231 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1232
1233 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1234 emit_queued_operands ();
1235
1236 delete f;
1237
1238 return e;
1239 }
1240
1241 /* Enqueue all operands of INSN and return offset to BRIG data section
1242 to list of operand offsets. */
1243
1244 static unsigned
1245 emit_insn_operands (hsa_insn_basic *insn)
1246 {
1247 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1248 operand_offsets;
1249
1250 unsigned l = insn->operand_count ();
1251
1252 /* We have N operands so use 4 * N for the byte_count. */
1253 uint32_t byte_count = lendian32 (4 * l);
1254 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1255 if (l > 0)
1256 {
1257 operand_offsets.safe_grow (l);
1258 for (unsigned i = 0; i < l; i++)
1259 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1260
1261 brig_data.add (operand_offsets.address (),
1262 l * sizeof (BrigOperandOffset32_t));
1263 }
1264 brig_data.round_size_up (4);
1265 return offset;
1266 }
1267
1268 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1269 to BRIG data section to list of operand offsets. */
1270
1271 static unsigned
1272 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1273 hsa_op_base *op2 = NULL)
1274 {
1275 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1276 operand_offsets;
1277
1278 gcc_checking_assert (op0 != NULL);
1279 operand_offsets.safe_push (enqueue_op (op0));
1280
1281 if (op1 != NULL)
1282 {
1283 operand_offsets.safe_push (enqueue_op (op1));
1284 if (op2 != NULL)
1285 operand_offsets.safe_push (enqueue_op (op2));
1286 }
1287
1288 unsigned l = operand_offsets.length ();
1289
1290 /* We have N operands so use 4 * N for the byte_count. */
1291 uint32_t byte_count = lendian32 (4 * l);
1292
1293 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1294 brig_data.add (operand_offsets.address (),
1295 l * sizeof (BrigOperandOffset32_t));
1296
1297 brig_data.round_size_up (4);
1298
1299 return offset;
1300 }
1301
1302 /* Emit an HSA memory instruction and all necessary directives, schedule
1303 necessary operands for writing. */
1304
1305 static void
1306 emit_memory_insn (hsa_insn_mem *mem)
1307 {
1308 struct BrigInstMem repr;
1309 gcc_checking_assert (mem->operand_count () == 2);
1310
1311 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1312
1313 /* This is necessary because of the erroneous typedef of
1314 BrigMemoryModifier8_t which introduces padding which may then contain
1315 random stuff (which we do not want so that we can test things don't
1316 change). */
1317 memset (&repr, 0, sizeof (repr));
1318 repr.base.base.byteCount = lendian16 (sizeof (repr));
1319 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1320 repr.base.opcode = lendian16 (mem->m_opcode);
1321 repr.base.type = lendian16 (mem->m_type);
1322 repr.base.operands = lendian32 (emit_insn_operands (mem));
1323
1324 if (addr->m_symbol)
1325 repr.segment = addr->m_symbol->m_segment;
1326 else
1327 repr.segment = BRIG_SEGMENT_FLAT;
1328 repr.modifier = 0;
1329 repr.equivClass = mem->m_equiv_class;
1330 repr.align = mem->m_align;
1331 if (mem->m_opcode == BRIG_OPCODE_LD)
1332 repr.width = BRIG_WIDTH_1;
1333 else
1334 repr.width = BRIG_WIDTH_NONE;
1335 memset (&repr.reserved, 0, sizeof (repr.reserved));
1336 brig_code.add (&repr, sizeof (repr));
1337 brig_insn_count++;
1338 }
1339
1340 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1341 necessary operands for writing. */
1342
1343 static void
1344 emit_signal_insn (hsa_insn_signal *mem)
1345 {
1346 struct BrigInstSignal repr;
1347
1348 memset (&repr, 0, sizeof (repr));
1349 repr.base.base.byteCount = lendian16 (sizeof (repr));
1350 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1351 repr.base.opcode = lendian16 (mem->m_opcode);
1352 repr.base.type = lendian16 (mem->m_type);
1353 repr.base.operands = lendian32 (emit_insn_operands (mem));
1354
1355 repr.memoryOrder = mem->m_memory_order;
1356 repr.signalOperation = mem->m_signalop;
1357 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1358
1359 brig_code.add (&repr, sizeof (repr));
1360 brig_insn_count++;
1361 }
1362
1363 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1364 necessary operands for writing. */
1365
1366 static void
1367 emit_atomic_insn (hsa_insn_atomic *mem)
1368 {
1369 struct BrigInstAtomic repr;
1370
1371 /* Either operand[0] or operand[1] must be an address operand. */
1372 hsa_op_address *addr = NULL;
1373 if (is_a <hsa_op_address *> (mem->get_op (0)))
1374 addr = as_a <hsa_op_address *> (mem->get_op (0));
1375 else
1376 addr = as_a <hsa_op_address *> (mem->get_op (1));
1377
1378 memset (&repr, 0, sizeof (repr));
1379 repr.base.base.byteCount = lendian16 (sizeof (repr));
1380 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1381 repr.base.opcode = lendian16 (mem->m_opcode);
1382 repr.base.type = lendian16 (mem->m_type);
1383 repr.base.operands = lendian32 (emit_insn_operands (mem));
1384
1385 if (addr->m_symbol)
1386 repr.segment = addr->m_symbol->m_segment;
1387 else
1388 repr.segment = BRIG_SEGMENT_FLAT;
1389 repr.memoryOrder = mem->m_memoryorder;
1390 repr.memoryScope = mem->m_memoryscope;
1391 repr.atomicOperation = mem->m_atomicop;
1392
1393 brig_code.add (&repr, sizeof (repr));
1394 brig_insn_count++;
1395 }
1396
1397 /* Emit an HSA LDA instruction and all necessary directives, schedule
1398 necessary operands for writing. */
1399
1400 static void
1401 emit_addr_insn (hsa_insn_basic *insn)
1402 {
1403 struct BrigInstAddr repr;
1404
1405 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1406
1407 repr.base.base.byteCount = lendian16 (sizeof (repr));
1408 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1409 repr.base.opcode = lendian16 (insn->m_opcode);
1410 repr.base.type = lendian16 (insn->m_type);
1411 repr.base.operands = lendian32 (emit_insn_operands (insn));
1412
1413 if (addr->m_symbol)
1414 repr.segment = addr->m_symbol->m_segment;
1415 else
1416 repr.segment = BRIG_SEGMENT_FLAT;
1417 memset (&repr.reserved, 0, sizeof (repr.reserved));
1418
1419 brig_code.add (&repr, sizeof (repr));
1420 brig_insn_count++;
1421 }
1422
1423 /* Emit an HSA segment conversion instruction and all necessary directives,
1424 schedule necessary operands for writing. */
1425
1426 static void
1427 emit_segment_insn (hsa_insn_seg *seg)
1428 {
1429 struct BrigInstSegCvt repr;
1430
1431 repr.base.base.byteCount = lendian16 (sizeof (repr));
1432 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1433 repr.base.opcode = lendian16 (seg->m_opcode);
1434 repr.base.type = lendian16 (seg->m_type);
1435 repr.base.operands = lendian32 (emit_insn_operands (seg));
1436 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1437 repr.segment = seg->m_segment;
1438 repr.modifier = 0;
1439
1440 brig_code.add (&repr, sizeof (repr));
1441
1442 brig_insn_count++;
1443 }
1444
1445 /* Emit an HSA alloca instruction and all necessary directives,
1446 schedule necessary operands for writing. */
1447
1448 static void
1449 emit_alloca_insn (hsa_insn_alloca *alloca)
1450 {
1451 struct BrigInstMem repr;
1452 gcc_checking_assert (alloca->operand_count () == 2);
1453
1454 memset (&repr, 0, sizeof (repr));
1455 repr.base.base.byteCount = lendian16 (sizeof (repr));
1456 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1457 repr.base.opcode = lendian16 (alloca->m_opcode);
1458 repr.base.type = lendian16 (alloca->m_type);
1459 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1460 repr.segment = BRIG_SEGMENT_PRIVATE;
1461 repr.modifier = 0;
1462 repr.equivClass = 0;
1463 repr.align = alloca->m_align;
1464 repr.width = BRIG_WIDTH_NONE;
1465 memset (&repr.reserved, 0, sizeof (repr.reserved));
1466 brig_code.add (&repr, sizeof (repr));
1467 brig_insn_count++;
1468 }
1469
1470 /* Emit an HSA comparison instruction and all necessary directives,
1471 schedule necessary operands for writing. */
1472
1473 static void
1474 emit_cmp_insn (hsa_insn_cmp *cmp)
1475 {
1476 struct BrigInstCmp repr;
1477
1478 memset (&repr, 0, sizeof (repr));
1479 repr.base.base.byteCount = lendian16 (sizeof (repr));
1480 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1481 repr.base.opcode = lendian16 (cmp->m_opcode);
1482 repr.base.type = lendian16 (cmp->m_type);
1483 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1484
1485 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1486 repr.sourceType
1487 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1488 else
1489 repr.sourceType
1490 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1491 repr.modifier = 0;
1492 repr.compare = cmp->m_compare;
1493 repr.pack = 0;
1494
1495 brig_code.add (&repr, sizeof (repr));
1496 brig_insn_count++;
1497 }
1498
1499 /* Emit an HSA generic branching/sycnronization instruction. */
1500
1501 static void
1502 emit_generic_branch_insn (hsa_insn_br *br)
1503 {
1504 struct BrigInstBr repr;
1505 repr.base.base.byteCount = lendian16 (sizeof (repr));
1506 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1507 repr.base.opcode = lendian16 (br->m_opcode);
1508 repr.width = br->m_width;
1509 repr.base.type = lendian16 (br->m_type);
1510 repr.base.operands = lendian32 (emit_insn_operands (br));
1511 memset (&repr.reserved, 0, sizeof (repr.reserved));
1512
1513 brig_code.add (&repr, sizeof (repr));
1514 brig_insn_count++;
1515 }
1516
1517 /* Emit an HSA conditional branching instruction and all necessary directives,
1518 schedule necessary operands for writing. */
1519
1520 static void
1521 emit_cond_branch_insn (hsa_insn_cbr *br)
1522 {
1523 struct BrigInstBr repr;
1524
1525 basic_block target = NULL;
1526 edge_iterator ei;
1527 edge e;
1528
1529 /* At the moment we only handle direct conditional jumps. */
1530 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1531 repr.base.base.byteCount = lendian16 (sizeof (repr));
1532 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1533 repr.base.opcode = lendian16 (br->m_opcode);
1534 repr.width = br->m_width;
1535 /* For Conditional jumps the type is always B1. */
1536 repr.base.type = lendian16 (BRIG_TYPE_B1);
1537
1538 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1539 if (e->flags & EDGE_TRUE_VALUE)
1540 {
1541 target = e->dest;
1542 break;
1543 }
1544 gcc_assert (target);
1545
1546 repr.base.operands
1547 = lendian32 (emit_operands (br->get_op (0),
1548 &hsa_bb_for_bb (target)->m_label_ref));
1549 memset (&repr.reserved, 0, sizeof (repr.reserved));
1550
1551 brig_code.add (&repr, sizeof (repr));
1552 brig_insn_count++;
1553 }
1554
1555 /* Emit an HSA unconditional jump branching instruction that points to
1556 a label REFERENCE. */
1557
1558 static void
1559 emit_unconditional_jump (hsa_op_code_ref *reference)
1560 {
1561 struct BrigInstBr repr;
1562
1563 repr.base.base.byteCount = lendian16 (sizeof (repr));
1564 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1565 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1566 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1567 /* Direct branches to labels must be width(all). */
1568 repr.width = BRIG_WIDTH_ALL;
1569
1570 repr.base.operands = lendian32 (emit_operands (reference));
1571 memset (&repr.reserved, 0, sizeof (repr.reserved));
1572 brig_code.add (&repr, sizeof (repr));
1573 brig_insn_count++;
1574 }
1575
1576 /* Emit an HSA switch jump instruction that uses a jump table to
1577 jump to a destination label. */
1578
1579 static void
1580 emit_switch_insn (hsa_insn_sbr *sbr)
1581 {
1582 struct BrigInstBr repr;
1583
1584 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1585 repr.base.base.byteCount = lendian16 (sizeof (repr));
1586 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1587 repr.base.opcode = lendian16 (sbr->m_opcode);
1588 repr.width = BRIG_WIDTH_1;
1589 /* For Conditional jumps the type is always B1. */
1590 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1591 repr.base.type = lendian16 (index->m_type);
1592 repr.base.operands
1593 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1594 memset (&repr.reserved, 0, sizeof (repr.reserved));
1595
1596 brig_code.add (&repr, sizeof (repr));
1597 brig_insn_count++;
1598 }
1599
1600 /* Emit a HSA convert instruction and all necessary directives, schedule
1601 necessary operands for writing. */
1602
1603 static void
1604 emit_cvt_insn (hsa_insn_cvt *insn)
1605 {
1606 struct BrigInstCvt repr;
1607 BrigType16_t srctype;
1608
1609 repr.base.base.byteCount = lendian16 (sizeof (repr));
1610 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1611 repr.base.opcode = lendian16 (insn->m_opcode);
1612 repr.base.type = lendian16 (insn->m_type);
1613 repr.base.operands = lendian32 (emit_insn_operands (insn));
1614
1615 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1616 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1617 else
1618 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1619 repr.sourceType = lendian16 (srctype);
1620 repr.modifier = 0;
1621 /* float to smaller float requires a rounding setting (we default
1622 to 'near'. */
1623 if (hsa_type_float_p (insn->m_type)
1624 && (!hsa_type_float_p (srctype)
1625 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1626 < (srctype & BRIG_TYPE_BASE_MASK))))
1627 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1628 else if (hsa_type_integer_p (insn->m_type) &&
1629 hsa_type_float_p (srctype))
1630 repr.round = BRIG_ROUND_INTEGER_ZERO;
1631 else
1632 repr.round = BRIG_ROUND_NONE;
1633 brig_code.add (&repr, sizeof (repr));
1634 brig_insn_count++;
1635 }
1636
1637 /* Emit call instruction INSN, where this instruction must be closed
1638 within a call block instruction. */
1639
1640 static void
1641 emit_call_insn (hsa_insn_call *call)
1642 {
1643 struct BrigInstBr repr;
1644
1645 repr.base.base.byteCount = lendian16 (sizeof (repr));
1646 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1647 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1648 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1649
1650 repr.base.operands
1651 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1652 call->m_args_code_list));
1653
1654 /* Internal functions have not set m_called_function. */
1655 if (call->m_called_function)
1656 {
1657 function_linkage_pair pair (call->m_called_function,
1658 call->m_func.m_brig_op_offset);
1659 function_call_linkage.safe_push (pair);
1660 }
1661 else
1662 {
1663 hsa_internal_fn *slot
1664 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1665 gcc_assert (slot);
1666 gcc_assert (slot->m_offset > 0);
1667 call->m_func.m_directive_offset = slot->m_offset;
1668 }
1669
1670 repr.width = BRIG_WIDTH_ALL;
1671 memset (&repr.reserved, 0, sizeof (repr.reserved));
1672
1673 brig_code.add (&repr, sizeof (repr));
1674 brig_insn_count++;
1675 }
1676
1677 /* Emit argument block directive. */
1678
1679 static void
1680 emit_arg_block_insn (hsa_insn_arg_block *insn)
1681 {
1682 switch (insn->m_kind)
1683 {
1684 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1685 {
1686 struct BrigDirectiveArgBlock repr;
1687 repr.base.byteCount = lendian16 (sizeof (repr));
1688 repr.base.kind = lendian16 (insn->m_kind);
1689 brig_code.add (&repr, sizeof (repr));
1690
1691 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1692 {
1693 insn->m_call_insn->m_args_code_list->m_offsets[i]
1694 = lendian32 (emit_directive_variable
1695 (insn->m_call_insn->m_input_args[i]));
1696 brig_insn_count++;
1697 }
1698
1699 if (insn->m_call_insn->m_output_arg)
1700 {
1701 insn->m_call_insn->m_result_code_list->m_offsets[0]
1702 = lendian32 (emit_directive_variable
1703 (insn->m_call_insn->m_output_arg));
1704 brig_insn_count++;
1705 }
1706
1707 break;
1708 }
1709 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1710 {
1711 struct BrigDirectiveArgBlock repr;
1712 repr.base.byteCount = lendian16 (sizeof (repr));
1713 repr.base.kind = lendian16 (insn->m_kind);
1714 brig_code.add (&repr, sizeof (repr));
1715 break;
1716 }
1717 default:
1718 gcc_unreachable ();
1719 }
1720
1721 brig_insn_count++;
1722 }
1723
1724 /* Emit comment directive. */
1725
1726 static void
1727 emit_comment_insn (hsa_insn_comment *insn)
1728 {
1729 struct BrigDirectiveComment repr;
1730 memset (&repr, 0, sizeof (repr));
1731
1732 repr.base.byteCount = lendian16 (sizeof (repr));
1733 repr.base.kind = lendian16 (insn->m_opcode);
1734 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1735 brig_code.add (&repr, sizeof (repr));
1736 }
1737
1738 /* Emit queue instruction INSN. */
1739
1740 static void
1741 emit_queue_insn (hsa_insn_queue *insn)
1742 {
1743 BrigInstQueue repr;
1744 memset (&repr, 0, sizeof (repr));
1745
1746 repr.base.base.byteCount = lendian16 (sizeof (repr));
1747 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1748 repr.base.opcode = lendian16 (insn->m_opcode);
1749 repr.base.type = lendian16 (insn->m_type);
1750 repr.segment = insn->m_segment;
1751 repr.memoryOrder = insn->m_memory_order;
1752 repr.base.operands = lendian32 (emit_insn_operands (insn));
1753 brig_data.round_size_up (4);
1754 brig_code.add (&repr, sizeof (repr));
1755
1756 brig_insn_count++;
1757 }
1758
1759 /* Emit source type instruction INSN. */
1760
1761 static void
1762 emit_srctype_insn (hsa_insn_srctype *insn)
1763 {
1764 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1765 struct BrigInstSourceType repr;
1766 unsigned operand_count = insn->operand_count ();
1767 gcc_checking_assert (operand_count >= 2);
1768
1769 memset (&repr, 0, sizeof (repr));
1770 repr.sourceType = lendian16 (insn->m_source_type);
1771 repr.base.base.byteCount = lendian16 (sizeof (repr));
1772 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1773 repr.base.opcode = lendian16 (insn->m_opcode);
1774 repr.base.type = lendian16 (insn->m_type);
1775
1776 repr.base.operands = lendian32 (emit_insn_operands (insn));
1777 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1778 brig_insn_count++;
1779 }
1780
1781 /* Emit packed instruction INSN. */
1782
1783 static void
1784 emit_packed_insn (hsa_insn_packed *insn)
1785 {
1786 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1787 struct BrigInstSourceType repr;
1788 unsigned operand_count = insn->operand_count ();
1789 gcc_checking_assert (operand_count >= 2);
1790
1791 memset (&repr, 0, sizeof (repr));
1792 repr.sourceType = lendian16 (insn->m_source_type);
1793 repr.base.base.byteCount = lendian16 (sizeof (repr));
1794 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1795 repr.base.opcode = lendian16 (insn->m_opcode);
1796 repr.base.type = lendian16 (insn->m_type);
1797
1798 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1799 {
1800 /* Create operand list for packed type. */
1801 for (unsigned i = 1; i < operand_count; i++)
1802 {
1803 gcc_checking_assert (insn->get_op (i));
1804 insn->m_operand_list->m_offsets[i - 1]
1805 = lendian32 (enqueue_op (insn->get_op (i)));
1806 }
1807
1808 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1809 insn->m_operand_list));
1810 }
1811 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1812 {
1813 /* Create operand list for packed type. */
1814 for (unsigned i = 0; i < operand_count - 1; i++)
1815 {
1816 gcc_checking_assert (insn->get_op (i));
1817 insn->m_operand_list->m_offsets[i]
1818 = lendian32 (enqueue_op (insn->get_op (i)));
1819 }
1820
1821 unsigned ops = emit_operands (insn->m_operand_list,
1822 insn->get_op (insn->operand_count () - 1));
1823 repr.base.operands = lendian32 (ops);
1824 }
1825
1826
1827 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1828 brig_insn_count++;
1829 }
1830
1831 /* Emit a basic HSA instruction and all necessary directives, schedule
1832 necessary operands for writing. */
1833
1834 static void
1835 emit_basic_insn (hsa_insn_basic *insn)
1836 {
1837 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1838 struct BrigInstMod repr;
1839 BrigType16_t type;
1840
1841 memset (&repr, 0, sizeof (repr));
1842 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1843 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1844 repr.base.opcode = lendian16 (insn->m_opcode);
1845 switch (insn->m_opcode)
1846 {
1847 /* And the bit-logical operations need bit types and whine about
1848 arithmetic types :-/ */
1849 case BRIG_OPCODE_AND:
1850 case BRIG_OPCODE_OR:
1851 case BRIG_OPCODE_XOR:
1852 case BRIG_OPCODE_NOT:
1853 type = regtype_for_type (insn->m_type);
1854 break;
1855 default:
1856 type = insn->m_type;
1857 break;
1858 }
1859 repr.base.type = lendian16 (type);
1860 repr.base.operands = lendian32 (emit_insn_operands (insn));
1861
1862 if (hsa_type_packed_p (type))
1863 {
1864 if (hsa_type_float_p (type)
1865 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1866 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1867 else
1868 repr.round = 0;
1869 /* We assume that destination and sources agree in packing layout. */
1870 if (insn->num_used_ops () >= 2)
1871 repr.pack = BRIG_PACK_PP;
1872 else
1873 repr.pack = BRIG_PACK_P;
1874 repr.reserved = 0;
1875 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1876 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1877 brig_code.add (&repr, sizeof (struct BrigInstMod));
1878 }
1879 else
1880 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1881 brig_insn_count++;
1882 }
1883
1884 /* Emit an HSA instruction and all necessary directives, schedule necessary
1885 operands for writing. */
1886
1887 static void
1888 emit_insn (hsa_insn_basic *insn)
1889 {
1890 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1891
1892 insn->m_brig_offset = brig_code.total_size;
1893
1894 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1895 emit_signal_insn (signal);
1896 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1897 emit_atomic_insn (atom);
1898 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1899 emit_memory_insn (mem);
1900 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1901 emit_addr_insn (insn);
1902 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1903 emit_segment_insn (seg);
1904 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1905 emit_cmp_insn (cmp);
1906 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1907 emit_cond_branch_insn (br);
1908 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1909 {
1910 if (switch_instructions == NULL)
1911 switch_instructions = new vec <hsa_insn_sbr *> ();
1912
1913 switch_instructions->safe_push (sbr);
1914 emit_switch_insn (sbr);
1915 }
1916 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1917 emit_generic_branch_insn (br);
1918 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1919 emit_arg_block_insn (block);
1920 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1921 emit_call_insn (call);
1922 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1923 emit_comment_insn (comment);
1924 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1925 emit_queue_insn (queue);
1926 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1927 emit_srctype_insn (srctype);
1928 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1929 emit_packed_insn (packed);
1930 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1931 emit_cvt_insn (cvt);
1932 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1933 emit_alloca_insn (alloca);
1934 else
1935 emit_basic_insn (insn);
1936 }
1937
1938 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1939 or we are about to finish emitting code, if it is NULL. If the fall through
1940 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1941
1942 static void
1943 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1944 {
1945 basic_block t_bb = NULL, ff = NULL;
1946
1947 edge_iterator ei;
1948 edge e;
1949
1950 /* If the last instruction of BB is a switch, ignore emission of all
1951 edges. */
1952 if (hsa_bb_for_bb (bb)->m_last_insn
1953 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1954 return;
1955
1956 FOR_EACH_EDGE (e, ei, bb->succs)
1957 if (e->flags & EDGE_TRUE_VALUE)
1958 {
1959 gcc_assert (!t_bb);
1960 t_bb = e->dest;
1961 }
1962 else
1963 {
1964 gcc_assert (!ff);
1965 ff = e->dest;
1966 }
1967
1968 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1969 return;
1970
1971 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1972 }
1973
1974 /* Emit the a function with name NAME to the various brig sections. */
1975
1976 void
1977 hsa_brig_emit_function (void)
1978 {
1979 basic_block bb, prev_bb;
1980 hsa_insn_basic *insn;
1981 BrigDirectiveExecutable *ptr_to_fndir;
1982
1983 brig_init ();
1984
1985 brig_insn_count = 0;
1986 memset (&op_queue, 0, sizeof (op_queue));
1987 op_queue.projected_size = brig_operand.total_size;
1988
1989 if (!function_offsets)
1990 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1991
1992 if (!emitted_declarations)
1993 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1994
1995 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1996 {
1997 tree called = hsa_cfun->m_called_functions[i];
1998
1999 /* If the function has no definition, emit a declaration. */
2000 if (!emitted_declarations->get (called))
2001 {
2002 BrigDirectiveExecutable *e = emit_function_declaration (called);
2003 emitted_declarations->put (called, e);
2004 }
2005 }
2006
2007 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2008 {
2009 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2010 emit_internal_fn_decl (called);
2011 }
2012
2013 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2014 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2015 insn;
2016 insn = insn->m_next)
2017 emit_insn (insn);
2018 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2019 FOR_EACH_BB_FN (bb, cfun)
2020 {
2021 perhaps_emit_branch (prev_bb, bb);
2022 emit_bb_label_directive (hsa_bb_for_bb (bb));
2023 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2024 emit_insn (insn);
2025 prev_bb = bb;
2026 }
2027 perhaps_emit_branch (prev_bb, NULL);
2028 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2029
2030 /* Fill up label references for all sbr instructions. */
2031 if (switch_instructions)
2032 {
2033 for (unsigned i = 0; i < switch_instructions->length (); i++)
2034 {
2035 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2036 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2037 {
2038 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2039 sbr->m_label_code_list->m_offsets[j]
2040 = hbb->m_label_ref.m_directive_offset;
2041 }
2042 }
2043
2044 switch_instructions->release ();
2045 delete switch_instructions;
2046 switch_instructions = NULL;
2047 }
2048
2049 if (dump_file)
2050 {
2051 fprintf (dump_file, "------- After BRIG emission: -------\n");
2052 dump_hsa_cfun (dump_file);
2053 }
2054
2055 emit_queued_operands ();
2056 }
2057
2058 /* Emit all OMP symbols related to OMP. */
2059
2060 void
2061 hsa_brig_emit_omp_symbols (void)
2062 {
2063 brig_init ();
2064 emit_directive_variable (hsa_num_threads);
2065 }
2066
2067 /* Create and return __hsa_global_variables symbol that contains
2068 all informations consumed by libgomp to link global variables
2069 with their string names used by an HSA kernel. */
2070
2071 static tree
2072 hsa_output_global_variables ()
2073 {
2074 unsigned l = hsa_global_variable_symbols->elements ();
2075
2076 tree variable_info_type = make_node (RECORD_TYPE);
2077 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2078 get_identifier ("name"), ptr_type_node);
2079 DECL_CHAIN (id_f1) = NULL_TREE;
2080 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2081 get_identifier ("omp_data_size"),
2082 ptr_type_node);
2083 DECL_CHAIN (id_f2) = id_f1;
2084 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2085 NULL_TREE);
2086
2087 tree int_num_of_global_vars;
2088 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2089 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2090 tree global_vars_array_type = build_array_type (variable_info_type,
2091 global_vars_num_index_type);
2092 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2093
2094 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2095
2096 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2097 = hsa_global_variable_symbols->begin ();
2098 it != hsa_global_variable_symbols->end (); ++it)
2099 {
2100 unsigned len = strlen ((*it)->m_name);
2101 char *copy = XNEWVEC (char, len + 2);
2102 copy[0] = '&';
2103 memcpy (copy + 1, (*it)->m_name, len);
2104 copy[len + 1] = '\0';
2105 len++;
2106 hsa_sanitize_name (copy);
2107
2108 tree var_name = build_string (len, copy);
2109 TREE_TYPE (var_name)
2110 = build_array_type (char_type_node, build_index_type (size_int (len)));
2111 free (copy);
2112
2113 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2114 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2115 build1 (ADDR_EXPR,
2116 build_pointer_type (TREE_TYPE (var_name)),
2117 var_name));
2118 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2119 build_fold_addr_expr ((*it)->m_decl));
2120
2121 tree variable_info_ctor = build_constructor (variable_info_type,
2122 variable_info_vec);
2123
2124 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2125 variable_info_ctor);
2126 }
2127
2128 tree global_vars_ctor = build_constructor (global_vars_array_type,
2129 global_vars_vec);
2130
2131 char tmp_name[64];
2132 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2133 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2134 get_identifier (tmp_name),
2135 global_vars_array_type);
2136 TREE_STATIC (global_vars_table) = 1;
2137 TREE_READONLY (global_vars_table) = 1;
2138 TREE_PUBLIC (global_vars_table) = 0;
2139 DECL_ARTIFICIAL (global_vars_table) = 1;
2140 DECL_IGNORED_P (global_vars_table) = 1;
2141 DECL_EXTERNAL (global_vars_table) = 0;
2142 TREE_CONSTANT (global_vars_table) = 1;
2143 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2144 varpool_node::finalize_decl (global_vars_table);
2145
2146 return global_vars_table;
2147 }
2148
2149 /* Create __hsa_host_functions and __hsa_kernels that contain
2150 all informations consumed by libgomp to register all kernels
2151 in the BRIG binary. */
2152
2153 static void
2154 hsa_output_kernels (tree *host_func_table, tree *kernels)
2155 {
2156 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2157
2158 tree int_num_of_kernels;
2159 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2160 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2161 tree host_functions_array_type = build_array_type (ptr_type_node,
2162 kernel_num_index_type);
2163 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2164
2165 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2166 for (unsigned i = 0; i < map_count; ++i)
2167 {
2168 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2169 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2170 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2171 }
2172 tree host_functions_ctor = build_constructor (host_functions_array_type,
2173 host_functions_vec);
2174 char tmp_name[64];
2175 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2176 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2177 get_identifier (tmp_name),
2178 host_functions_array_type);
2179 TREE_STATIC (hsa_host_func_table) = 1;
2180 TREE_READONLY (hsa_host_func_table) = 1;
2181 TREE_PUBLIC (hsa_host_func_table) = 0;
2182 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2183 DECL_IGNORED_P (hsa_host_func_table) = 1;
2184 DECL_EXTERNAL (hsa_host_func_table) = 0;
2185 TREE_CONSTANT (hsa_host_func_table) = 1;
2186 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2187 varpool_node::finalize_decl (hsa_host_func_table);
2188 *host_func_table = hsa_host_func_table;
2189
2190 /* Following code emits list of kernel_info structures. */
2191
2192 tree kernel_info_type = make_node (RECORD_TYPE);
2193 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2194 get_identifier ("name"), ptr_type_node);
2195 DECL_CHAIN (id_f1) = NULL_TREE;
2196 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2197 get_identifier ("omp_data_size"),
2198 unsigned_type_node);
2199 DECL_CHAIN (id_f2) = id_f1;
2200 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2201 get_identifier ("gridified_kernel_p"),
2202 boolean_type_node);
2203 DECL_CHAIN (id_f3) = id_f2;
2204 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2205 get_identifier ("kernel_dependencies_count"),
2206 unsigned_type_node);
2207 DECL_CHAIN (id_f4) = id_f3;
2208 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2209 get_identifier ("kernel_dependencies"),
2210 build_pointer_type (build_pointer_type
2211 (char_type_node)));
2212 DECL_CHAIN (id_f5) = id_f4;
2213 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2214 NULL_TREE);
2215
2216 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2217 tree kernel_info_vector_type
2218 = build_array_type (kernel_info_type,
2219 build_index_type (int_num_of_kernels));
2220 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2221
2222 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2223 tree kernel_dependencies_vector_type = NULL;
2224
2225 for (unsigned i = 0; i < map_count; ++i)
2226 {
2227 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2228 char *name = hsa_get_decl_kernel_mapping_name (i);
2229 unsigned len = strlen (name);
2230 char *copy = XNEWVEC (char, len + 2);
2231 copy[0] = '&';
2232 memcpy (copy + 1, name, len);
2233 copy[len + 1] = '\0';
2234 len++;
2235
2236 tree kern_name = build_string (len, copy);
2237 TREE_TYPE (kern_name)
2238 = build_array_type (char_type_node, build_index_type (size_int (len)));
2239 free (copy);
2240
2241 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2242 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2243 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2244 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2245 gridified_kernel_p);
2246 unsigned count = 0;
2247 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2248 if (hsa_decl_kernel_dependencies)
2249 {
2250 vec<const char *> **slot;
2251 slot = hsa_decl_kernel_dependencies->get (kernel);
2252 if (slot)
2253 {
2254 vec <const char *> *dependencies = *slot;
2255 count = dependencies->length ();
2256
2257 kernel_dependencies_vector_type
2258 = build_array_type (build_pointer_type (char_type_node),
2259 build_index_type (size_int (count)));
2260 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2261
2262 for (unsigned j = 0; j < count; j++)
2263 {
2264 const char *d = (*dependencies)[j];
2265 len = strlen (d);
2266 tree dependency_name = build_string (len, d);
2267 TREE_TYPE (dependency_name)
2268 = build_array_type (char_type_node,
2269 build_index_type (size_int (len)));
2270
2271 CONSTRUCTOR_APPEND_ELT
2272 (kernel_dependencies_vec, NULL_TREE,
2273 build1 (ADDR_EXPR,
2274 build_pointer_type (TREE_TYPE (dependency_name)),
2275 dependency_name));
2276 }
2277 }
2278 }
2279
2280 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2281
2282 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2283 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2284 build1 (ADDR_EXPR,
2285 build_pointer_type (TREE_TYPE
2286 (kern_name)),
2287 kern_name));
2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2289 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2290 gridified_kernel_p_tree);
2291 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2292
2293 if (count > 0)
2294 {
2295 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2296 gcc_checking_assert (kernel_dependencies_vector_type);
2297 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2298 get_identifier (tmp_name),
2299 kernel_dependencies_vector_type);
2300
2301 TREE_STATIC (dependencies_list) = 1;
2302 TREE_READONLY (dependencies_list) = 1;
2303 TREE_PUBLIC (dependencies_list) = 0;
2304 DECL_ARTIFICIAL (dependencies_list) = 1;
2305 DECL_IGNORED_P (dependencies_list) = 1;
2306 DECL_EXTERNAL (dependencies_list) = 0;
2307 TREE_CONSTANT (dependencies_list) = 1;
2308 DECL_INITIAL (dependencies_list)
2309 = build_constructor (kernel_dependencies_vector_type,
2310 kernel_dependencies_vec);
2311 varpool_node::finalize_decl (dependencies_list);
2312
2313 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2314 build1 (ADDR_EXPR,
2315 build_pointer_type
2316 (TREE_TYPE (dependencies_list)),
2317 dependencies_list));
2318 }
2319 else
2320 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2321
2322 tree kernel_info_ctor = build_constructor (kernel_info_type,
2323 kernel_info_vec);
2324
2325 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2326 kernel_info_ctor);
2327 }
2328
2329 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2330 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2331 get_identifier (tmp_name),
2332 kernel_info_vector_type);
2333
2334 TREE_STATIC (hsa_kernels) = 1;
2335 TREE_READONLY (hsa_kernels) = 1;
2336 TREE_PUBLIC (hsa_kernels) = 0;
2337 DECL_ARTIFICIAL (hsa_kernels) = 1;
2338 DECL_IGNORED_P (hsa_kernels) = 1;
2339 DECL_EXTERNAL (hsa_kernels) = 0;
2340 TREE_CONSTANT (hsa_kernels) = 1;
2341 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2342 kernel_info_vector_vec);
2343 varpool_node::finalize_decl (hsa_kernels);
2344 *kernels = hsa_kernels;
2345 }
2346
2347 /* Create a static constructor that will register out brig stuff with
2348 libgomp. */
2349
2350 static void
2351 hsa_output_libgomp_mapping (tree brig_decl)
2352 {
2353 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2354 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2355
2356 tree kernels;
2357 tree host_func_table;
2358
2359 hsa_output_kernels (&host_func_table, &kernels);
2360 tree global_vars = hsa_output_global_variables ();
2361
2362 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2363 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2364 get_identifier ("brig_module"), ptr_type_node);
2365 DECL_CHAIN (id_f1) = NULL_TREE;
2366 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2367 get_identifier ("kernel_count"),
2368 unsigned_type_node);
2369
2370 DECL_CHAIN (id_f2) = id_f1;
2371 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2372 get_identifier ("hsa_kernel_infos"),
2373 ptr_type_node);
2374 DECL_CHAIN (id_f3) = id_f2;
2375 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2376 get_identifier ("global_variable_count"),
2377 unsigned_type_node);
2378 DECL_CHAIN (id_f4) = id_f3;
2379 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2380 get_identifier ("hsa_global_variable_infos"),
2381 ptr_type_node);
2382 DECL_CHAIN (id_f5) = id_f4;
2383 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2384 NULL_TREE);
2385 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2386
2387 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2388 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2389 build_fold_addr_expr (brig_decl));
2390 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2391 build_int_cstu (unsigned_type_node, kernel_count));
2392 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2393 build1 (ADDR_EXPR,
2394 build_pointer_type (TREE_TYPE (kernels)),
2395 kernels));
2396 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2397 build_int_cstu (unsigned_type_node,
2398 global_variable_count));
2399 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2400 build1 (ADDR_EXPR,
2401 build_pointer_type (TREE_TYPE (global_vars)),
2402 global_vars));
2403
2404 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2405
2406 char tmp_name[64];
2407 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2408 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2409 get_identifier (tmp_name),
2410 hsa_image_desc_type);
2411 TREE_STATIC (hsa_img_descriptor) = 1;
2412 TREE_READONLY (hsa_img_descriptor) = 1;
2413 TREE_PUBLIC (hsa_img_descriptor) = 0;
2414 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2415 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2416 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2417 TREE_CONSTANT (hsa_img_descriptor) = 1;
2418 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2419 varpool_node::finalize_decl (hsa_img_descriptor);
2420
2421 /* Construct the "host_table" libgomp expects. */
2422 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2423 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2424 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2425 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2426 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2427 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2428 host_func_table_addr);
2429 offset_int func_table_size
2430 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2431 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2432 fold_build2 (POINTER_PLUS_EXPR,
2433 TREE_TYPE (host_func_table_addr),
2434 host_func_table_addr,
2435 build_int_cst (size_type_node,
2436 func_table_size.to_uhwi
2437 ())));
2438 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2439 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2440 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2441 libgomp_host_table_vec);
2442 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2443 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2444 get_identifier (tmp_name),
2445 libgomp_host_table_type);
2446
2447 TREE_STATIC (hsa_libgomp_host_table) = 1;
2448 TREE_READONLY (hsa_libgomp_host_table) = 1;
2449 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2450 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2451 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2452 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2453 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2454 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2455 varpool_node::finalize_decl (hsa_libgomp_host_table);
2456
2457 /* Generate an initializer with a call to the registration routine. */
2458
2459 tree offload_register
2460 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2461 gcc_checking_assert (offload_register);
2462
2463 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2464 append_to_statement_list
2465 (build_call_expr (offload_register, 4,
2466 build_int_cstu (unsigned_type_node,
2467 GOMP_VERSION_PACK (GOMP_VERSION,
2468 GOMP_VERSION_HSA)),
2469 build_fold_addr_expr (hsa_libgomp_host_table),
2470 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2471 build_fold_addr_expr (hsa_img_descriptor)),
2472 hsa_ctor_stmts);
2473
2474 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2475
2476 tree offload_unregister
2477 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2478 gcc_checking_assert (offload_unregister);
2479
2480 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2481 append_to_statement_list
2482 (build_call_expr (offload_unregister, 4,
2483 build_int_cstu (unsigned_type_node,
2484 GOMP_VERSION_PACK (GOMP_VERSION,
2485 GOMP_VERSION_HSA)),
2486 build_fold_addr_expr (hsa_libgomp_host_table),
2487 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2488 build_fold_addr_expr (hsa_img_descriptor)),
2489 hsa_dtor_stmts);
2490 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2491 }
2492
2493 /* Emit the brig module we have compiled to a section in the final assembly and
2494 also create a compile unit static constructor that will register the brig
2495 module with libgomp. */
2496
2497 void
2498 hsa_output_brig (void)
2499 {
2500 section *saved_section;
2501
2502 if (!brig_initialized)
2503 return;
2504
2505 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2506 {
2507 function_linkage_pair p = function_call_linkage[i];
2508
2509 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2510 gcc_assert (*func_offset);
2511 BrigOperandCodeRef *code_ref
2512 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2513 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2514 code_ref->ref = lendian32 (*func_offset);
2515 }
2516
2517 /* Iterate all function declarations and if we meet a function that should
2518 have module linkage and we are unable to emit HSAIL for the function,
2519 then change the linkage to program linkage. Doing so, we will emit
2520 a valid BRIG image. */
2521 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2522 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2523 = emitted_declarations->begin ();
2524 it != emitted_declarations->end ();
2525 ++it)
2526 {
2527 if (hsa_failed_functions->contains ((*it).first))
2528 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2529 }
2530
2531 saved_section = in_section;
2532
2533 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2534 char tmp_name[64];
2535 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2536 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2537 tree brig_id = get_identifier (tmp_name);
2538 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2539 char_type_node);
2540 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2541 TREE_ADDRESSABLE (brig_decl) = 1;
2542 TREE_READONLY (brig_decl) = 1;
2543 DECL_ARTIFICIAL (brig_decl) = 1;
2544 DECL_IGNORED_P (brig_decl) = 1;
2545 TREE_STATIC (brig_decl) = 1;
2546 TREE_PUBLIC (brig_decl) = 0;
2547 TREE_USED (brig_decl) = 1;
2548 DECL_INITIAL (brig_decl) = brig_decl;
2549 TREE_ASM_WRITTEN (brig_decl) = 1;
2550
2551 BrigModuleHeader module_header;
2552 memcpy (&module_header.identification, "HSA BRIG",
2553 sizeof (module_header.identification));
2554 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2555 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2556 uint64_t section_index[3];
2557
2558 int data_padding, code_padding, operand_padding;
2559 data_padding = HSA_SECTION_ALIGNMENT
2560 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2561 code_padding = HSA_SECTION_ALIGNMENT
2562 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2563 operand_padding = HSA_SECTION_ALIGNMENT
2564 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2565
2566 uint64_t module_size = sizeof (module_header)
2567 + sizeof (section_index)
2568 + brig_data.total_size
2569 + data_padding
2570 + brig_code.total_size
2571 + code_padding
2572 + brig_operand.total_size
2573 + operand_padding;
2574 gcc_assert ((module_size % 16) == 0);
2575 module_header.byteCount = lendian64 (module_size);
2576 memset (&module_header.hash, 0, sizeof (module_header.hash));
2577 module_header.reserved = 0;
2578 module_header.sectionCount = lendian32 (3);
2579 module_header.sectionIndex = lendian64 (sizeof (module_header));
2580 assemble_string ((const char *) &module_header, sizeof (module_header));
2581 uint64_t off = sizeof (module_header) + sizeof (section_index);
2582 section_index[0] = lendian64 (off);
2583 off += brig_data.total_size + data_padding;
2584 section_index[1] = lendian64 (off);
2585 off += brig_code.total_size + code_padding;
2586 section_index[2] = lendian64 (off);
2587 assemble_string ((const char *) &section_index, sizeof (section_index));
2588
2589 char padding[HSA_SECTION_ALIGNMENT];
2590 memset (padding, 0, sizeof (padding));
2591
2592 brig_data.output ();
2593 assemble_string (padding, data_padding);
2594 brig_code.output ();
2595 assemble_string (padding, code_padding);
2596 brig_operand.output ();
2597 assemble_string (padding, operand_padding);
2598
2599 if (saved_section)
2600 switch_to_section (saved_section);
2601
2602 hsa_output_libgomp_mapping (brig_decl);
2603
2604 hsa_free_decl_kernel_mapping ();
2605 brig_release_data ();
2606 hsa_deinit_compilation_unit_data ();
2607
2608 delete emitted_declarations;
2609 emitted_declarations = NULL;
2610 delete function_offsets;
2611 function_offsets = NULL;
2612 }