]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/brig/brigfrontend/brig-code-entry-handler.cc
re PR lto/79061 ([LTO][ASAN] LTO plus ASAN fails with "AddressSanitizer: initializati...
[thirdparty/gcc.git] / gcc / brig / brigfrontend / brig-code-entry-handler.cc
CommitLineData
5fd1486c 1/* brig-code-entry-handler.cc -- a gccbrig base class
68edb9ba 2 Copyright (C) 2016-2017 Free Software Foundation, Inc.
5fd1486c
PJ
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22#include "brig-code-entry-handler.h"
23
24#include "stringpool.h"
25#include "tree-iterator.h"
26#include "toplev.h"
27#include "diagnostic.h"
28#include "brig-machine.h"
29#include "brig-util.h"
30#include "errors.h"
31#include "real.h"
32#include "print-tree.h"
33#include "tree-pretty-print.h"
34#include "target.h"
35#include "langhooks.h"
36#include "gimple-expr.h"
37#include "convert.h"
38#include "brig-util.h"
39#include "builtins.h"
40#include "phsa.h"
41#include "brig-builtins.h"
42#include "fold-const.h"
43
44brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins;
45
46brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent)
47 : brig_entry_handler (parent)
48{
49 if (s_custom_builtins.size () > 0) return;
50
51 /* Populate the builtin index. */
52#undef DEF_HSAIL_ATOMIC_BUILTIN
53#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
54#undef DEF_HSAIL_INTR_BUILTIN
55#undef DEF_HSAIL_SAT_BUILTIN
56#undef DEF_HSAIL_BUILTIN
57#define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
58 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
59 = builtin_decl_explicit (ENUM);
60
61#include "brig-builtins.def"
62}
63
64/* Build a tree operand which is a reference to a piece of code. REF is the
65 original reference as a BRIG object. */
66
67tree
68brig_code_entry_handler::build_code_ref (const BrigBase &ref)
69{
70 if (ref.kind == BRIG_KIND_DIRECTIVE_LABEL)
71 {
72 const BrigDirectiveLabel *brig_label = (const BrigDirectiveLabel *) &ref;
73
74 const BrigData *label_name
75 = m_parent.get_brig_data_entry (brig_label->name);
76
77 std::string label_str ((const char *) (label_name->bytes),
78 label_name->byteCount);
79 return m_parent.m_cf->label (label_str);
80 }
81 else if (ref.kind == BRIG_KIND_DIRECTIVE_FUNCTION)
82 {
83 const BrigDirectiveExecutable *func
84 = (const BrigDirectiveExecutable *) &ref;
85 return m_parent.function_decl (m_parent.get_mangled_name (func));
86 }
87 else if (ref.kind == BRIG_KIND_DIRECTIVE_FBARRIER)
88 {
89 const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref;
90
91 uint64_t offset = m_parent.group_variable_segment_offset
92 (m_parent.get_mangled_name (fbar));
93
94 return build_int_cst (uint32_type_node, offset);
95 }
96 else
97 gcc_unreachable ();
98}
99
100/* Produce a tree operand for the given BRIG_INST and its OPERAND.
101 OPERAND_TYPE should be the operand type in case it should not
102 be dictated by the BrigBase. IS_INPUT indicates if the operand
103 is an input operand or a result. */
104
105tree
106brig_code_entry_handler::build_tree_operand (const BrigInstBase &brig_inst,
107 const BrigBase &operand,
108 tree operand_type, bool is_input)
109{
110 switch (operand.kind)
111 {
112 case BRIG_KIND_OPERAND_OPERAND_LIST:
113 {
114 vec<constructor_elt, va_gc> *constructor_vals = NULL;
115 const BrigOperandOperandList &oplist
116 = (const BrigOperandOperandList &) operand;
117 const BrigData *data = m_parent.get_brig_data_entry (oplist.elements);
118 size_t bytes = data->byteCount;
119 const BrigOperandOffset32_t *operand_ptr
120 = (const BrigOperandOffset32_t *) data->bytes;
121 while (bytes > 0)
122 {
123 BrigOperandOffset32_t offset = *operand_ptr;
124 const BrigBase *operand_element
125 = m_parent.get_brig_operand_entry (offset);
126 tree element
127 = build_tree_operand (brig_inst, *operand_element, operand_type);
128
129 /* In case a vector is used an input, cast the elements to
130 correct size here so we don't need a separate unpack/pack for it.
131 fp16-fp32 conversion is done in build_operands (). */
132 if (is_input && TREE_TYPE (element) != operand_type)
133 {
134 if (int_size_in_bytes (TREE_TYPE (element))
135 == int_size_in_bytes (operand_type)
136 && !INTEGRAL_TYPE_P (operand_type))
137 element = build1 (VIEW_CONVERT_EXPR, operand_type, element);
138 else
139 element = convert (operand_type, element);
140 }
141
142 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element);
143 ++operand_ptr;
144 bytes -= 4;
145 }
146 size_t element_count = data->byteCount / 4;
147 tree vec_type = build_vector_type (operand_type, element_count);
148
149 return build_constructor (vec_type, constructor_vals);
150 }
151 case BRIG_KIND_OPERAND_CODE_LIST:
152 {
153 /* Build a TREE_VEC of code expressions. */
154
155 const BrigOperandCodeList &oplist
156 = (const BrigOperandCodeList &) operand;
157 const BrigData *data = m_parent.get_brig_data_entry (oplist.elements);
158 size_t bytes = data->byteCount;
159 const BrigOperandOffset32_t *operand_ptr
160 = (const BrigOperandOffset32_t *) data->bytes;
161
162 size_t case_index = 0;
163 size_t element_count = data->byteCount / 4;
164
165 /* Create a TREE_VEC out of the labels in the list. */
166 tree vec = make_tree_vec (element_count);
167
168 while (bytes > 0)
169 {
170 BrigOperandOffset32_t offset = *operand_ptr;
171 const BrigBase *ref = m_parent.get_brig_code_entry (offset);
172 tree element = build_code_ref (*ref);
173
174 gcc_assert (case_index < element_count);
175 TREE_VEC_ELT (vec, case_index) = element;
176 case_index++;
177
178 ++operand_ptr;
179 bytes -= 4;
180 }
181 return vec;
182 }
183 case BRIG_KIND_OPERAND_REGISTER:
184 {
185 const BrigOperandRegister *brig_reg
186 = (const BrigOperandRegister *) &operand;
187 return m_parent.m_cf->get_m_var_declfor_reg (brig_reg);
188 }
189 case BRIG_KIND_OPERAND_CONSTANT_BYTES:
190 {
191 const BrigOperandConstantBytes *brigConst
192 = (const BrigOperandConstantBytes *) &operand;
193 /* The constants can be of different type than the instruction
194 and are implicitly casted to the input operand. */
195 return get_tree_cst_for_hsa_operand (brigConst, NULL_TREE);
196 }
197 case BRIG_KIND_OPERAND_WAVESIZE:
198 {
199 if (!INTEGRAL_TYPE_P (operand_type))
200 {
201 gcc_unreachable ();
202 return NULL_TREE;
203 }
204 return build_int_cstu (operand_type, gccbrig_get_target_wavesize ());
205 }
206 case BRIG_KIND_OPERAND_CODE_REF:
207 {
208 const BrigOperandCodeRef *brig_code_ref
209 = (const BrigOperandCodeRef *) &operand;
210
211 const BrigBase *ref = m_parent.get_brig_code_entry (brig_code_ref->ref);
212
213 return build_code_ref (*ref);
214 }
215 case BRIG_KIND_OPERAND_ADDRESS:
216 {
217 return build_address_operand (brig_inst,
218 (const BrigOperandAddress &) operand);
219 }
220 default:
221 gcc_unreachable ();
222 }
223}
224
225/* Build a tree node representing an address reference from a BRIG_INST and its
226 ADDR_OPERAND. */
227
228tree
229brig_code_entry_handler::build_address_operand
230 (const BrigInstBase &brig_inst, const BrigOperandAddress &addr_operand)
231{
232 tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
233
234 BrigSegment8_t segment = BRIG_SEGMENT_GLOBAL;
235 if (brig_inst.opcode == BRIG_OPCODE_LDA)
236 segment = ((const BrigInstAddr &) brig_inst).segment;
237 else if (brig_inst.base.kind == BRIG_KIND_INST_MEM)
238 segment = ((const BrigInstMem &) brig_inst).segment;
239 else if (brig_inst.base.kind == BRIG_KIND_INST_ATOMIC)
240 segment = ((const BrigInstAtomic &) brig_inst).segment;
241
242 tree var_offset = NULL_TREE;
243 tree const_offset = NULL_TREE;
244 tree symbol_base = NULL_TREE;
245
246 if (addr_operand.symbol != 0)
247 {
248 const BrigDirectiveVariable *arg_symbol
249 = (const BrigDirectiveVariable *) m_parent.get_brig_code_entry
250 (addr_operand.symbol);
251
252 std::string var_name = m_parent.get_mangled_name (arg_symbol);
253
254 if (segment == BRIG_SEGMENT_KERNARG)
255 {
256 /* Find the offset to the kernarg buffer for the given
257 kernel argument variable. */
258 tree func = m_parent.m_cf->m_func_decl;
259 /* __args is the first parameter in kernel functions. */
260 symbol_base = DECL_ARGUMENTS (func);
261 uint64_t offset = m_parent.m_cf->kernel_arg_offset (arg_symbol);
262 if (offset > 0)
263 const_offset = build_int_cst (size_type_node, offset);
264 }
265 else if (segment == BRIG_SEGMENT_GROUP)
266 {
267
268 uint64_t offset = m_parent.group_variable_segment_offset (var_name);
269 const_offset = build_int_cst (size_type_node, offset);
270 }
271 else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL)
272 {
273 uint32_t offset = m_parent.private_variable_segment_offset (var_name);
274
275 /* Compute the offset to the work item's copy:
276
277 single-wi-offset * local_size + wiflatid * varsize
278
279 This way the work items have the same variable in
280 successive elements to each other in the segment,
281 helping to achieve autovectorization of loads/stores
282 with stride 1. */
283
284 tree_stl_vec uint32_0
285 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
286
287 tree_stl_vec uint32_1
288 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
289
290 tree_stl_vec uint32_2
291 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
292
293 tree local_size
294 = build2 (MULT_EXPR, uint32_type_node,
295 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
296 BRIG_TYPE_U32,
297 uint32_type_node, uint32_0),
298 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
299 BRIG_TYPE_U32,
300 uint32_type_node, uint32_1));
301
302 local_size
303 = build2 (MULT_EXPR, uint32_type_node,
304 expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
305 BRIG_TYPE_U32,
306 uint32_type_node, uint32_2),
307 local_size);
308
309 tree var_region
310 = build2 (MULT_EXPR, uint32_type_node,
311 build_int_cst (uint32_type_node, offset), local_size);
312
313 tree_stl_vec operands;
314 tree pos
315 = build2 (MULT_EXPR, uint32_type_node,
316 build_int_cst (uint32_type_node,
317 m_parent.private_variable_size (var_name)),
318 expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID,
319 BRIG_TYPE_U32,
320 uint32_type_node, operands));
321
322 tree var_offset
323 = build2 (PLUS_EXPR, uint32_type_node, var_region, pos);
324
325 /* In case of LDA this is returned directly as an integer value.
326 For other mem-related instructions, we will convert this segment
327 offset to a flat address by adding it as an offset to a (private
328 or group) base pointer later on. Same applies to group_var_offset. */
329 symbol_base
330 = add_temp_var ("priv_var_offset",
331 convert (size_type_node, var_offset));
332 }
333 else if (segment == BRIG_SEGMENT_ARG)
334 {
335 tree arg_var_decl;
336 if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol)
337 arg_var_decl = m_parent.m_cf->m_ret_temp;
338 else
339 arg_var_decl = m_parent.m_cf->arg_variable (arg_symbol);
340
341 gcc_assert (arg_var_decl != NULL_TREE);
342
343 tree ptype = build_pointer_type (instr_type);
344
345 if (arg_symbol->type & BRIG_TYPE_ARRAY)
346 {
347
348 /* Two different type of array references in case of arguments
349 depending where they are referred at. In the caller (argument
350 segment), the reference is to an array object and
351 in the callee, the array object has been passed as a pointer
352 to the array object. */
353
354 if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl)))
355 symbol_base = build_reinterpret_cast (ptype, arg_var_decl);
356 else
357 {
358 /* In case we are referring to an array (the argument in
359 call site), use its element zero as the base address. */
360 tree element_zero
361 = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (arg_var_decl)),
362 arg_var_decl, integer_zero_node, NULL_TREE,
363 NULL_TREE);
364 symbol_base = build1 (ADDR_EXPR, ptype, element_zero);
365 }
366 }
367 else
368 symbol_base = build1 (ADDR_EXPR, ptype, arg_var_decl);
369 }
370 else
371 {
372 tree global_var_decl = m_parent.global_variable (var_name);
373
374 /* In case the global variable hasn't been defined (yet),
375 use the host def indirection ptr variable. */
376 if (global_var_decl == NULL_TREE)
377 {
378 std::string host_ptr_name
379 = std::string (PHSA_HOST_DEF_PTR_PREFIX) + var_name;
380 tree host_defined_ptr = m_parent.global_variable (host_ptr_name);
381 gcc_assert (host_defined_ptr != NULL_TREE);
382 symbol_base = host_defined_ptr;
383 }
384 else
385 {
386 gcc_assert (global_var_decl != NULL_TREE);
387
388 tree ptype = build_pointer_type (instr_type);
389 symbol_base = build1 (ADDR_EXPR, ptype, global_var_decl);
390 }
391 }
392 }
393
394 if (brig_inst.opcode != BRIG_OPCODE_LDA)
395 {
396 /* In case of lda_* we want to return the segment address because it's
397 used as a value, perhaps in address computation and later converted
398 explicitly to a flat address.
399
400 In case of other instructions with memory operands we produce the flat
401 address directly here (assuming the target does not have a separate
402 address space for group/private segments for now). */
403 if (segment == BRIG_SEGMENT_GROUP)
404 symbol_base = m_parent.m_cf->m_group_base_arg;
405 else if (segment == BRIG_SEGMENT_PRIVATE
406 || segment == BRIG_SEGMENT_SPILL)
407 {
408 if (symbol_base != NULL_TREE)
409 symbol_base = build2 (POINTER_PLUS_EXPR, ptr_type_node,
410 m_parent.m_cf->m_private_base_arg,
411 symbol_base);
412 else
413 symbol_base = m_parent.m_cf->m_private_base_arg;
414 }
415 }
416
417 if (addr_operand.reg != 0)
418 {
419 const BrigOperandRegister *mem_base_reg
420 = (const BrigOperandRegister *) m_parent.get_brig_operand_entry
421 (addr_operand.reg);
422 tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg);
423 var_offset = convert_to_pointer (ptr_type_node, base_reg_var);
424
425 gcc_assert (var_offset != NULL_TREE);
426 }
427 /* The pointer type we use to access the memory. Should be of the
428 width of the load/store instruction, not the target/data
429 register. */
430 tree ptype = build_pointer_type (instr_type);
431
432 gcc_assert (ptype != NULL_TREE);
433
434 tree addr = NULL_TREE;
435 if (symbol_base != NULL_TREE && var_offset != NULL_TREE)
436 /* The most complex addressing mode: symbol + reg [+ const offset]. */
437 addr = build2 (POINTER_PLUS_EXPR, ptr_type_node,
438 convert (ptr_type_node, symbol_base),
439 convert (size_type_node, var_offset));
440 else if (var_offset != NULL)
441 addr = var_offset;
442 else if (symbol_base != NULL)
443 addr = symbol_base;
444
445 if (const_offset != NULL_TREE)
446 {
447 if (addr == NULL_TREE)
448 /* At least direct module-scope global group symbol access with LDA
449 has only the const_offset. Group base ptr is not added as LDA should
450 return the segment address, not the flattened one. */
451 addr = const_offset;
452 else
453 addr = build2 (POINTER_PLUS_EXPR, ptr_type_node,
454 addr, convert (size_type_node, const_offset));
455 }
456
457 /* We might have two const offsets in case of group or private arrays
458 which have the first offset to the incoming group/private pointer
459 arg, and the second one an offset to it. */
460 uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset);
461 if (offs > 0)
462 {
463 tree const_offset_2 = build_int_cst (size_type_node, offs);
464 if (addr == NULL_TREE)
465 addr = const_offset_2;
466 else
467 addr = build2 (POINTER_PLUS_EXPR, ptr_type_node,
468 addr, convert (size_type_node, const_offset_2));
469
470 }
471
472 gcc_assert (addr != NULL_TREE);
473 return convert_to_pointer (ptype, addr);
474}
475
476/* Builds a tree operand with the given OPERAND_INDEX for the given
477 BRIG_INST with the desired tree OPERAND_TYPE. OPERAND_TYPE can
478 be NULL in case the type is forced by the BRIG_INST type. */
479
480tree
481brig_code_entry_handler::build_tree_operand_from_brig
482 (const BrigInstBase *brig_inst, tree operand_type, size_t operand_index)
483{
484 const BrigData *operand_entries
485 = m_parent.get_brig_data_entry (brig_inst->operands);
486
487 uint32_t operand_offset
488 = ((const uint32_t *) &operand_entries->bytes)[operand_index];
489 const BrigBase *operand_data
490 = m_parent.get_brig_operand_entry (operand_offset);
491 return build_tree_operand (*brig_inst, *operand_data, operand_type);
492}
493
494/* Builds a single (scalar) constant initialized element of type
495 ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */
496
497tree
498brig_code_entry_handler::build_tree_cst_element
499 (BrigType16_t element_type, const unsigned char *next_data) const
500{
501
502 tree tree_element_type = gccbrig_tree_type_for_hsa_type (element_type);
503
504 tree cst;
505 switch (element_type)
506 {
507 case BRIG_TYPE_F16:
508 {
509 HOST_WIDE_INT low = *(const uint16_t *) next_data;
510 cst = build_int_cst (uint16_type_node, low);
511 break;
512 }
513 case BRIG_TYPE_F32:
514 {
515 REAL_VALUE_TYPE val;
516 ieee_single_format.decode (&ieee_single_format, &val,
517 (const long *) next_data);
518 cst = build_real (tree_element_type, val);
519 break;
520 }
521 case BRIG_TYPE_F64:
522 {
523 long data[2];
524 data[0] = *(const uint32_t *) next_data;
525 data[1] = *(const uint32_t *) (next_data + 4);
526 REAL_VALUE_TYPE val;
527 ieee_double_format.decode (&ieee_double_format, &val, data);
528 cst = build_real (tree_element_type, val);
529 break;
530 }
531 case BRIG_TYPE_S8:
532 case BRIG_TYPE_S16:
533 case BRIG_TYPE_S32:
534 case BRIG_TYPE_S64:
535 {
536 HOST_WIDE_INT low = *(const int64_t *) next_data;
537 cst = build_int_cst (tree_element_type, low);
538 break;
539 }
540 case BRIG_TYPE_U8:
541 case BRIG_TYPE_U16:
542 case BRIG_TYPE_U32:
543 case BRIG_TYPE_U64:
544 {
545 unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data;
546 cst = build_int_cstu (tree_element_type, low);
547 break;
548 }
549 case BRIG_TYPE_SIG64:
550 {
551 unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data;
552 cst = build_int_cstu (uint64_type_node, low);
553 break;
554 }
555 case BRIG_TYPE_SIG32:
556 {
557 unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data;
558 cst = build_int_cstu (uint32_type_node, low);
559 break;
560 }
561 default:
562 gcc_unreachable ();
563 return NULL_TREE;
564 }
565 return cst;
566}
567
568/* Produce a tree constant type for the given BRIG constant (BRIG_CONST).
569 TYPE should be the forced instruction type, otherwise the type is
570 dictated by the BRIG_CONST. */
571
572tree
573brig_code_entry_handler::get_tree_cst_for_hsa_operand
574 (const BrigOperandConstantBytes *brig_const, tree type) const
575{
576 const BrigData *data = m_parent.get_brig_data_entry (brig_const->bytes);
577
578 tree cst = NULL_TREE;
579
580 if (type == NULL_TREE)
581 type = gccbrig_tree_type_for_hsa_type (brig_const->type);
582
583 /* The type of a single (scalar) element inside an array,
584 vector or an array of vectors. */
585 BrigType16_t scalar_element_type
586 = brig_const->type & BRIG_TYPE_BASE_MASK;
587 tree tree_element_type = type;
588
589 vec<constructor_elt, va_gc> *constructor_vals = NULL;
590
591 if (TREE_CODE (type) == ARRAY_TYPE)
592 tree_element_type = TREE_TYPE (type);
593
594 size_t bytes_left = data->byteCount;
595 const unsigned char *next_data = data->bytes;
596 size_t scalar_element_size
597 = gccbrig_hsa_type_bit_size (scalar_element_type) / BITS_PER_UNIT;
598
599 while (bytes_left > 0)
600 {
601 if (VECTOR_TYPE_P (tree_element_type))
602 {
603 /* In case of vector type elements (or sole vectors),
604 create a vector ctor. */
605 size_t element_count = TYPE_VECTOR_SUBPARTS (tree_element_type);
606 if (bytes_left < scalar_element_size * element_count)
607 fatal_error (UNKNOWN_LOCATION,
608 "Not enough bytes left for the initializer "
609 "(%lu need %lu).",
610 bytes_left, scalar_element_size * element_count);
611
612 vec<constructor_elt, va_gc> *vec_els = NULL;
613 for (size_t i = 0; i < element_count; ++i)
614 {
615 tree element
616 = build_tree_cst_element (scalar_element_type, next_data);
617 CONSTRUCTOR_APPEND_ELT (vec_els, NULL_TREE, element);
618 bytes_left -= scalar_element_size;
619 next_data += scalar_element_size;
620 }
621 cst = build_vector_from_ctor (tree_element_type, vec_els);
622 }
623 else
624 {
625 if (bytes_left < scalar_element_size)
626 fatal_error (UNKNOWN_LOCATION,
627 "Not enough bytes left for the initializer "
628 "(%lu need %lu).",
629 bytes_left, scalar_element_size);
630 cst = build_tree_cst_element (scalar_element_type, next_data);
631 bytes_left -= scalar_element_size;
632 next_data += scalar_element_size;
633 }
634 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst);
635 }
636
637 if (TREE_CODE (type) == ARRAY_TYPE)
638 return build_constructor (type, constructor_vals);
639 else
640 return cst;
641}
642
643/* Return the matching tree instruction arithmetics type for the
644 given BRIG_TYPE. The aritmethics type is the one with which
645 computation is done (in contrast to the storage type). F16
646 arithmetics type is emulated using F32 for now. */
647
648tree
649brig_code_entry_handler::get_tree_expr_type_for_hsa_type
650 (BrigType16_t brig_type) const
651{
652 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
653 if (brig_inner_type == BRIG_TYPE_F16)
654 {
655 if (brig_inner_type == brig_type)
656 return m_parent.s_fp32_type;
657 size_t element_count = gccbrig_hsa_type_bit_size (brig_type) / 16;
658 return build_vector_type (m_parent.s_fp32_type, element_count);
659 }
660 else
661 return gccbrig_tree_type_for_hsa_type (brig_type);
662}
663
664/* In case the HSA instruction must be implemented using a builtin,
665 this function is called to get the correct builtin function.
666 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
667 brig instruction and BRIG_TYPE the brig instruction's type. */
668
669tree
670brig_code_entry_handler::get_builtin_for_hsa_opcode
671 (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
672{
673 tree builtin = NULL_TREE;
674 tree builtin_type = type;
675
676 /* For vector types, first find the scalar version of the builtin. */
677 if (type != NULL_TREE && VECTOR_TYPE_P (type))
678 builtin_type = TREE_TYPE (type);
679 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
680
681 /* Some BRIG opcodes can use the same builtins for unsigned and
682 signed types. Force these cases to unsigned types. */
683
684 if (brig_opcode == BRIG_OPCODE_BORROW
685 || brig_opcode == BRIG_OPCODE_CARRY
686 || brig_opcode == BRIG_OPCODE_LASTBIT
687 || brig_opcode == BRIG_OPCODE_BITINSERT)
688 {
689 if (brig_type == BRIG_TYPE_S32)
690 brig_type = BRIG_TYPE_U32;
691 else if (brig_type == BRIG_TYPE_S64)
692 brig_type = BRIG_TYPE_U64;
693 }
694
695 switch (brig_opcode)
696 {
697 case BRIG_OPCODE_FLOOR:
698 builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
699 break;
700 case BRIG_OPCODE_CEIL:
701 builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
702 break;
703 case BRIG_OPCODE_SQRT:
704 case BRIG_OPCODE_NSQRT:
705 builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
706 break;
707 case BRIG_OPCODE_RINT:
708 builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
709 break;
710 case BRIG_OPCODE_TRUNC:
711 builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
712 break;
713 case BRIG_OPCODE_COPYSIGN:
714 builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
715 break;
716 case BRIG_OPCODE_NSIN:
717 builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
718 break;
719 case BRIG_OPCODE_NLOG2:
720 builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
721 break;
722 case BRIG_OPCODE_NEXP2:
723 builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
724 break;
725 case BRIG_OPCODE_NFMA:
726 builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
727 break;
728 case BRIG_OPCODE_NCOS:
729 builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
730 break;
731 case BRIG_OPCODE_POPCOUNT:
732 /* Popcount should be typed by its argument type (the return value
733 is always u32). Let's use a b64 version for also for b32 for now. */
734 return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
735 case BRIG_OPCODE_BORROW:
736 /* Borrow uses the same builtin for unsigned and signed types. */
737 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
738 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
739 else
740 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
741 case BRIG_OPCODE_CARRY:
742 /* Carry also uses the same builtin for unsigned and signed types. */
743 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
744 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
745 else
746 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
747 default:
748
749 /* Use our builtin index for finding a proper builtin for the BRIG
750 opcode and BRIG type. This takes care most of the builtin cases,
751 the special cases are handled in the separate 'case' statements
752 above. */
753 builtin_map::const_iterator i
754 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
755 if (i != s_custom_builtins.end ())
756 return (*i).second;
757
758 if (brig_inner_type != brig_type)
759 {
760 /* Try to find a scalar built-in we could use. */
761 i = s_custom_builtins.find
762 (std::make_pair (brig_opcode, brig_inner_type));
763 if (i != s_custom_builtins.end ())
764 return (*i).second;
765 }
766
767 /* In case this is an fp16 operation that is promoted to fp32,
768 try to find a fp32 scalar built-in. */
769 if (brig_inner_type == BRIG_TYPE_F16)
770 {
771 i = s_custom_builtins.find
772 (std::make_pair (brig_opcode, BRIG_TYPE_F32));
773 if (i != s_custom_builtins.end ())
774 return (*i).second;
775 }
776 gcc_unreachable ();
777 }
778
779 if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
780 {
781 /* Try to find a vectorized version of the built-in.
782 TODO: properly assert that builtin is a mathfn builtin? */
783 tree vec_builtin
784 = targetm.vectorize.builtin_vectorized_function
785 (builtin_mathfn_code (builtin), type, type);
786 if (vec_builtin != NULL_TREE)
787 return vec_builtin;
788 else
789 return builtin;
790 }
791 if (builtin == NULL_TREE)
792 gcc_unreachable ();
793 return builtin;
794}
795
796/* Return the correct GENERIC type for storing comparison results
797 of operand with the type given in SOURCE_TYPE. */
798
799tree
800brig_code_entry_handler::get_comparison_result_type (tree source_type)
801{
802 if (VECTOR_TYPE_P (source_type))
803 {
804 size_t element_size = int_size_in_bytes (TREE_TYPE (source_type));
805 return build_vector_type
806 (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT),
807 TYPE_VECTOR_SUBPARTS (source_type));
808 }
809 else
810 return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1);
811}
812
813/* Returns true in case the given opcode needs to know about work-item context
814 data. In such case the context data is passed as a pointer to a work-item
815 context object, as the last argument in the builtin call. */
816
817bool
818brig_code_entry_handler::needs_workitem_context_data
819 (BrigOpcode16_t brig_opcode) const
820{
821 switch (brig_opcode)
822 {
823 case BRIG_OPCODE_WORKITEMABSID:
824 case BRIG_OPCODE_WORKITEMFLATABSID:
825 case BRIG_OPCODE_WORKITEMFLATID:
826 case BRIG_OPCODE_CURRENTWORKITEMFLATID:
827 case BRIG_OPCODE_WORKITEMID:
828 case BRIG_OPCODE_WORKGROUPID:
829 case BRIG_OPCODE_WORKGROUPSIZE:
830 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
831 case BRIG_OPCODE_GRIDGROUPS:
832 case BRIG_OPCODE_GRIDSIZE:
833 case BRIG_OPCODE_DIM:
834 case BRIG_OPCODE_PACKETID:
835 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
836 case BRIG_OPCODE_BARRIER:
837 case BRIG_OPCODE_WAVEBARRIER:
838 case BRIG_OPCODE_ARRIVEFBAR:
839 case BRIG_OPCODE_INITFBAR:
840 case BRIG_OPCODE_JOINFBAR:
841 case BRIG_OPCODE_LEAVEFBAR:
842 case BRIG_OPCODE_RELEASEFBAR:
843 case BRIG_OPCODE_WAITFBAR:
844 case BRIG_OPCODE_CUID:
845 case BRIG_OPCODE_MAXCUID:
846 case BRIG_OPCODE_DEBUGTRAP:
847 case BRIG_OPCODE_GROUPBASEPTR:
848 case BRIG_OPCODE_KERNARGBASEPTR:
849 case BRIG_OPCODE_ALLOCA:
850 return true;
851 default:
852 return false;
853 };
854}
855
856/* Returns true in case the given opcode that would normally be generated
857 as a builtin call can be expanded to tree nodes. */
858
859bool
860brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const
861{
862 switch (brig_opcode)
863 {
864 case BRIG_OPCODE_WORKITEMFLATABSID:
865 case BRIG_OPCODE_WORKITEMFLATID:
866 case BRIG_OPCODE_WORKITEMABSID:
867 case BRIG_OPCODE_WORKGROUPSIZE:
868 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
869 /* TODO: expand more builtins. */
870 return true;
871 default:
872 return false;
873 };
874}
875
876/* Try to expand the given builtin call to reuse a previously generated
877 variable, if possible. If not, just call the given builtin.
878 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
879 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
880 input operands. */
881
882tree
883brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
884 BrigType16_t brig_type,
885 tree arith_type,
886 tree_stl_vec &operands)
887{
888 if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode))
889 return expand_builtin (brig_opcode, operands);
890
891 tree built_in
892 = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
893
894 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
895 && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
896 && brig_opcode != BRIG_OPCODE_LERP
897 && brig_opcode != BRIG_OPCODE_PACKCVT
898 && brig_opcode != BRIG_OPCODE_SAD
899 && brig_opcode != BRIG_OPCODE_SADHI)
900 {
901 /* Call the scalar built-in for all elements in the vector. */
902 tree_stl_vec operand0_elements;
903 if (operands.size () > 0)
904 unpack (operands[0], operand0_elements);
905
906 tree_stl_vec operand1_elements;
907 if (operands.size () > 1)
908 unpack (operands[1], operand1_elements);
909
910 tree_stl_vec result_elements;
911
912 for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i)
913 {
914 tree_stl_vec call_operands;
915 if (operand0_elements.size () > 0)
916 call_operands.push_back (operand0_elements.at (i));
917
918 if (operand1_elements.size () > 0)
919 call_operands.push_back (operand1_elements.at (i));
920
921 result_elements.push_back
922 (expand_or_call_builtin (brig_opcode, brig_type,
923 TREE_TYPE (arith_type),
924 call_operands));
925 }
926 return pack (result_elements);
927 }
928
929 tree_stl_vec call_operands;
930 tree_stl_vec operand_types;
931
932 tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
933
934 for (size_t i = 0; i < operands.size (); ++i)
935 {
936 tree operand_type = TREE_VALUE (arg_type_chain);
937 call_operands.push_back (convert (operand_type, operands[i]));
938 operand_types.push_back (operand_type);
939 arg_type_chain = TREE_CHAIN (arg_type_chain);
940 }
941
942 if (needs_workitem_context_data (brig_opcode))
943 {
944 call_operands.push_back (m_parent.m_cf->m_context_arg);
945 operand_types.push_back (ptr_type_node);
946 m_parent.m_cf->m_has_unexpanded_dp_builtins = true;
947 }
948
949 size_t operand_count = call_operands.size ();
950
951 call_operands.resize (4, NULL_TREE);
952 operand_types.resize (4, NULL_TREE);
953 for (size_t i = 0; i < operand_count; ++i)
954 call_operands.at (i) = build_reinterpret_cast (operand_types.at (i),
955 call_operands.at (i));
956
957 tree fnptr = build_fold_addr_expr (built_in);
958 return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
959 operand_count, &call_operands[0]);
960}
961
962/* Instead of calling a built-in, reuse a previously returned value known to
963 be still valid. This is beneficial especially for the work-item
964 identification related builtins as not having them as calls can lead to
965 more easily vectorizable parallel loops for multi work-item work-groups.
966 BRIG_OPCODE identifies the builtin and OPERANDS store the operands. */
967
968tree
969brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode,
970 tree_stl_vec &operands)
971{
972 tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
973
974 tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
975
976 tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
977
978 if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
979 {
980 tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
981 id0 = convert (uint64_type_node, id0);
982
983 tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
984 id1 = convert (uint64_type_node, id1);
985
986 tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
987 id2 = convert (uint64_type_node, id2);
988
989 tree max0 = convert (uint64_type_node,
990 m_parent.m_cf->m_grid_size_vars[0]);
991 tree max1 = convert (uint64_type_node,
992 m_parent.m_cf->m_grid_size_vars[1]);
993
994 tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
995 id2_x_max0_x_max1
996 = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
997
998 tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
999
1000 tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
1001 sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
1002
1003 return add_temp_var ("workitemflatabsid", sum);
1004 }
1005 else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
1006 {
1007 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1008
1009 tree local_id_var = m_parent.m_cf->m_local_id_vars[dim];
1010 tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim];
1011 tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim];
1012 tree grid_size_var = m_parent.m_cf->m_grid_size_vars[dim];
1013
1014 tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node,
1015 convert (uint32_type_node, wg_id_var),
1016 convert (uint32_type_node, wg_size_var));
1017 tree sum
1018 = build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var);
1019
1020 /* We need a modulo here because of work-groups which have dimensions
1021 larger than the grid size :( TO CHECK: is this really allowed in the
1022 specs? */
1023 tree modulo
1024 = build2 (TRUNC_MOD_EXPR, uint32_type_node, sum, grid_size_var);
1025
1026 return add_temp_var (std::string ("workitemabsid_")
1027 + (char) ((int) 'x' + dim),
1028 modulo);
1029 }
1030 else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
1031 {
1032 tree z_x_wgsx_wgsy
1033 = build2 (MULT_EXPR, uint32_type_node,
1034 m_parent.m_cf->m_local_id_vars[2],
1035 m_parent.m_cf->m_wg_size_vars[0]);
1036 z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
1037 m_parent.m_cf->m_wg_size_vars[1]);
1038
1039 tree y_x_wgsx
1040 = build2 (MULT_EXPR, uint32_type_node,
1041 m_parent.m_cf->m_local_id_vars[1],
1042 m_parent.m_cf->m_wg_size_vars[0]);
1043
1044 tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
1045 sum = build2 (PLUS_EXPR, uint32_type_node,
1046 m_parent.m_cf->m_local_id_vars[0],
1047 sum);
1048 return add_temp_var ("workitemflatid", sum);
1049 }
1050 else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
1051 {
1052 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1053 return m_parent.m_cf->m_wg_size_vars[dim];
1054 }
1055 else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
1056 {
1057 HOST_WIDE_INT dim = int_constant_value (operands[0]);
1058 return m_parent.m_cf->m_cur_wg_size_vars[dim];
1059 }
1060 else
1061 gcc_unreachable ();
1062
1063 return NULL_TREE;
1064}
1065
1066/* Appends and returns a new temp variable and an accompanying assignment
1067 statement that stores the value of the given EXPR and has the given NAME. */
1068
1069tree
1070brig_code_entry_handler::add_temp_var (std::string name, tree expr)
1071{
1072 tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
1073 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
1074 m_parent.m_cf->append_statement (assign);
1075 return temp_var;
1076}
1077
1078/* Creates a FP32 to FP16 conversion call, assuming the source and destination
1079 are FP32 type variables. */
1080
1081tree
1082brig_code_entry_handler::build_f2h_conversion (tree source)
1083{
1084 return float_to_half () (*this, source);
1085}
1086
1087/* Creates a FP16 to FP32 conversion call, assuming the source and destination
1088 are FP32 type variables. */
1089
1090tree
1091brig_code_entry_handler::build_h2f_conversion (tree source)
1092{
1093 return half_to_float () (*this, source);
1094}
1095
1096/* Builds and "normalizes" the dest and source operands for the instruction
1097 execution; converts the input operands to the expected instruction type,
1098 performs half to float conversions, constant to correct type variable,
1099 and flush to zero (if applicable). */
1100
1101tree_stl_vec
1102brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst)
1103{
1104 /* Flush to zero. */
1105 bool ftz = false;
1106 const BrigBase *base = &brig_inst.base;
1107
1108 if (base->kind == BRIG_KIND_INST_MOD)
1109 {
1110 const BrigInstMod *mod = (const BrigInstMod *) base;
1111 ftz = mod->modifier & BRIG_ALU_FTZ;
1112 }
1113 else if (base->kind == BRIG_KIND_INST_CMP)
1114 {
1115 const BrigInstCmp *cmp = (const BrigInstCmp *) base;
1116 ftz = cmp->modifier & BRIG_ALU_FTZ;
1117 }
1118
1119 bool is_vec_instr = hsa_type_packed_p (brig_inst.type);
1120
1121 size_t element_count;
1122 if (is_vec_instr)
1123 {
1124 BrigType16_t brig_element_type = brig_inst.type & BRIG_TYPE_BASE_MASK;
1125 element_count = gccbrig_hsa_type_bit_size (brig_inst.type)
1126 / gccbrig_hsa_type_bit_size (brig_element_type);
1127 }
1128 else
1129 element_count = 1;
1130
1131 bool is_fp16_arith = false;
1132
1133 tree src_type;
1134 tree dest_type;
1135 if (base->kind == BRIG_KIND_INST_CMP)
1136 {
1137 const BrigInstCmp *cmp_inst = (const BrigInstCmp *) base;
1138 src_type = gccbrig_tree_type_for_hsa_type (cmp_inst->sourceType);
1139 dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1140 is_fp16_arith
1141 = (cmp_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16;
1142 }
1143 else if (base->kind == BRIG_KIND_INST_SOURCE_TYPE)
1144 {
1145 const BrigInstSourceType *src_type_inst
1146 = (const BrigInstSourceType *) base;
1147 src_type = gccbrig_tree_type_for_hsa_type (src_type_inst->sourceType);
1148 dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1149 is_fp16_arith
1150 = (src_type_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
1151 && !gccbrig_is_bit_operation (brig_inst.opcode);
1152 }
1153 else if (base->kind == BRIG_KIND_INST_SEG_CVT)
1154 {
1155 const BrigInstSegCvt *seg_cvt_inst = (const BrigInstSegCvt *) base;
1156 src_type = gccbrig_tree_type_for_hsa_type (seg_cvt_inst->sourceType);
1157 dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1158 }
1159 else if (base->kind == BRIG_KIND_INST_MEM)
1160 {
1161 src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1162 dest_type = src_type;
1163 /* With mem instructions we don't want to cast the fp16
1164 back and forth between fp32, because the load/stores
1165 are not specific to the data type. */
1166 is_fp16_arith = false;
1167 }
1168 else if (base->kind == BRIG_KIND_INST_CVT)
1169 {
1170 const BrigInstCvt *cvt_inst = (const BrigInstCvt *) base;
1171
1172 src_type = gccbrig_tree_type_for_hsa_type (cvt_inst->sourceType);
1173 dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1174 }
1175 else
1176 {
1177 switch (brig_inst.opcode)
1178 {
1179 case BRIG_OPCODE_INITFBAR:
1180 case BRIG_OPCODE_JOINFBAR:
1181 case BRIG_OPCODE_WAITFBAR:
1182 case BRIG_OPCODE_ARRIVEFBAR:
1183 case BRIG_OPCODE_LEAVEFBAR:
1184 case BRIG_OPCODE_RELEASEFBAR:
1185 src_type = uint32_type_node;
1186 break;
1187 default:
1188 src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1189 break;
1190 }
1191 dest_type = src_type;
1192 is_fp16_arith
1193 = !gccbrig_is_bit_operation (brig_inst.opcode)
1194 && (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16;
1195 }
1196
1197 /* Halfs are a tricky special case: their "storage format" is u16, but
1198 scalars are stored in 32b regs while packed f16 are... well packed. */
1199 tree half_storage_type = element_count > 1
1200 ? gccbrig_tree_type_for_hsa_type (brig_inst.type)
1201 : uint32_type_node;
1202
1203 const BrigData *operand_entries
1204 = m_parent.get_brig_data_entry (brig_inst.operands);
1205 std::vector<tree> operands;
1206 for (size_t i = 0; i < operand_entries->byteCount / 4; ++i)
1207 {
1208 uint32_t operand_offset = ((const uint32_t *) &operand_entries->bytes)[i];
1209 const BrigBase *operand_data
1210 = m_parent.get_brig_operand_entry (operand_offset);
1211
1212 const bool is_output
1213 = gccbrig_hsa_opcode_op_output_p (brig_inst.opcode, i);
1214
1215 tree operand_type = is_output ? dest_type : src_type;
1216
1217 bool half_to_float = is_fp16_arith;
1218
1219 /* Special cases for operand types. */
1220 if ((brig_inst.opcode == BRIG_OPCODE_SHL
1221 || brig_inst.opcode == BRIG_OPCODE_SHR)
1222 && i == 2)
1223 /* The shift amount is always a scalar. */
1224 operand_type
1225 = VECTOR_TYPE_P (src_type) ? TREE_TYPE (src_type) : src_type;
1226 else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
1227 {
1228 if (i == 3)
1229 /* HSAIL shuffle inputs the MASK vector as tightly packed bits
1230 while GENERIC VEC_PERM_EXPR expects the mask elements to be
1231 of the same size as the elements in the input vectors. Let's
1232 cast to a scalar type here and convert to the VEC_PERM_EXPR
1233 format in instruction handling. There are no arbitrary bit
1234 width int types in GENERIC so we cannot use the original
1235 vector type. */
1236 operand_type = uint32_type_node;
1237 else
1238 /* Always treat the element as unsigned ints to avoid
1239 sign extensions/negative offsets with masks, which
1240 are expected to be of the same element type as the
1241 data in VEC_PERM_EXPR. With shuffles the data type
1242 should not matter as it's a "raw operation". */
1243 operand_type = get_unsigned_int_type (operand_type);
1244 }
1245 else if (brig_inst.opcode == BRIG_OPCODE_PACK)
1246 {
1247 if (i == 1)
1248 operand_type = get_unsigned_int_type (dest_type);
1249 else if (i == 2)
1250 operand_type = get_unsigned_int_type (TREE_TYPE (dest_type));
1251 else if (i == 3)
1252 operand_type = uint32_type_node;
1253 }
1254 else if (brig_inst.opcode == BRIG_OPCODE_UNPACK && i == 2)
1255 operand_type = uint32_type_node;
1256 else if (brig_inst.opcode == BRIG_OPCODE_SAD && i == 3)
1257 operand_type = uint32_type_node;
1258 else if (brig_inst.opcode == BRIG_OPCODE_CLASS && i == 2)
1259 {
1260 operand_type = uint32_type_node;
1261 half_to_float = false;
1262 }
1263 else if (half_to_float)
1264 /* Treat the operands as the storage type at this point. */
1265 operand_type = half_storage_type;
1266
1267 tree operand = build_tree_operand (brig_inst, *operand_data, operand_type,
1268 !is_output);
1269
1270 gcc_assert (operand);
1271
1272 /* Cast/convert the inputs to correct types as expected by the GENERIC
1273 opcode instruction. */
1274 if (!is_output)
1275 {
1276 if (half_to_float)
1277 operand = build_h2f_conversion
1278 (build_reinterpret_cast (half_storage_type, operand));
1279 else if (TREE_CODE (operand) != LABEL_DECL
1280 && TREE_CODE (operand) != TREE_VEC
1281 && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS
1282 && !VECTOR_TYPE_P (TREE_TYPE (operand)))
1283 {
1284 size_t reg_width = int_size_in_bytes (TREE_TYPE (operand));
1285 size_t instr_width = int_size_in_bytes (operand_type);
1286 if (reg_width == instr_width)
1287 operand = build_reinterpret_cast (operand_type, operand);
1288 else if (reg_width > instr_width)
1289 {
1290 /* Clip the operand because the instruction's bitwidth
1291 is smaller than the HSAIL reg width. */
1292 if (INTEGRAL_TYPE_P (operand_type))
1293 operand
1294 = convert_to_integer (signed_or_unsigned_type_for
1295 (TYPE_UNSIGNED (operand_type),
1296 operand_type), operand);
1297 else
1298 operand = build_reinterpret_cast (operand_type, operand);
1299 }
1300 else if (reg_width < instr_width)
1301 /* At least shift amount operands can be read from smaller
1302 registers than the data operands. */
1303 operand = convert (operand_type, operand);
1304 }
1305 else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
1306 /* Force the operand type to be treated as the raw type. */
1307 operand = build_reinterpret_cast (operand_type, operand);
1308
1309 if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1)
1310 {
1311 /* gcc expects the lower bit to be 1 (or all ones in case of
1312 vectors) while CMOV assumes false iff 0. Convert the input
1313 here to what gcc likes by generating
1314 'operand = operand != 0'. */
1315 tree cmp_res_type = get_comparison_result_type (operand_type);
1316 operand = build2 (NE_EXPR, cmp_res_type, operand,
1317 build_zero_cst (TREE_TYPE (operand)));
1318 }
1319
1320 if (ftz)
1321 operand = flush_to_zero (is_fp16_arith) (*this, operand);
1322 }
1323 operands.push_back (operand);
1324 }
1325 return operands;
1326}
1327
1328/* Build the GENERIC for assigning the result of an instruction to the result
1329 "register" (variable). BRIG_INST is the original brig instruction,
1330 OUTPUT the result variable/register, INST_EXPR the one producing the
1331 result. Required bitcasts and fp32 to fp16 conversions are added as
1332 well. */
1333
1334tree
1335brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
1336 tree output, tree inst_expr)
1337{
1338 /* The destination type might be different from the output register
1339 variable type (which is always an unsigned integer type). */
1340 tree output_type = TREE_TYPE (output);
1341 tree input_type = TREE_TYPE (inst_expr);
1342 bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
1343 && brig_inst.base.kind != BRIG_KIND_INST_MEM
1344 && !gccbrig_is_bit_operation (brig_inst.opcode);
1345
1346 /* Flush to zero. */
1347 bool ftz = false;
1348 const BrigBase *base = &brig_inst.base;
1349
1350 if (base->kind == BRIG_KIND_INST_MOD)
1351 {
1352 const BrigInstMod *mod = (const BrigInstMod *) base;
1353 ftz = mod->modifier & BRIG_ALU_FTZ;
1354 }
1355 else if (base->kind == BRIG_KIND_INST_CMP)
1356 {
1357 const BrigInstCmp *cmp = (const BrigInstCmp *) base;
1358 ftz = cmp->modifier & BRIG_ALU_FTZ;
1359 }
1360
1361 if (TREE_CODE (inst_expr) == CALL_EXPR)
1362 {
1363 tree func_decl = TREE_OPERAND (TREE_OPERAND (inst_expr, 1), 0);
1364 input_type = TREE_TYPE (TREE_TYPE (func_decl));
1365 }
1366
1367 if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr))
1368 || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16))
1369 {
1370 /* Ensure we don't duplicate the arithmetics to the arguments of the bit
1371 field reference operators. */
1372 inst_expr = add_temp_var ("before_ftz", inst_expr);
1373 inst_expr = flush_to_zero (is_fp16) (*this, inst_expr);
1374 }
1375
1376 if (is_fp16)
1377 {
1378 inst_expr = add_temp_var ("before_f2h", inst_expr);
1379 tree f2h_output = build_f2h_conversion (inst_expr);
1380 tree conv_int = convert_to_integer (output_type, f2h_output);
1381 tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int);
1382 m_parent.m_cf->append_statement (assign);
1383 return assign;
1384 }
1385 else if (VECTOR_TYPE_P (TREE_TYPE (output)))
1386 {
1387 /* Expand/unpack the input value to the given vector elements. */
1388 size_t i;
1389 tree input = inst_expr;
1390 tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type);
1391 tree element;
1392 tree last_assign = NULL_TREE;
1393 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (output), i, element)
1394 {
1395 tree element_ref
1396 = build3 (BIT_FIELD_REF, element_type, input,
1397 TYPE_SIZE (element_type),
1398 build_int_cst (uint32_type_node,
1399 i * int_size_in_bytes (element_type)
1400 * BITS_PER_UNIT));
1401
1402 last_assign
1403 = build_output_assignment (brig_inst, element, element_ref);
1404 }
1405 return last_assign;
1406 }
1407 else
1408 {
1409 /* All we do here is to bitcast the result and store it to the
1410 'register' (variable). Mainly need to take care of differing
1411 bitwidths. */
1412 size_t src_width = int_size_in_bytes (input_type);
1413 size_t dst_width = int_size_in_bytes (output_type);
1414
1415 if (src_width == dst_width)
1416 {
1417 /* A simple bitcast should do. */
1418 tree bitcast = build_reinterpret_cast (output_type, inst_expr);
1419 tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast);
1420 m_parent.m_cf->append_statement (assign);
1421 return assign;
1422 }
1423 else
1424 {
1425 tree conv_int = convert_to_integer (output_type, inst_expr);
1426 tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int);
1427 m_parent.m_cf->append_statement (assign);
1428 return assign;
1429 }
1430 }
1431 return NULL_TREE;
1432}
1433
1434/* Appends a GENERIC statement (STMT) to the currently constructed function. */
1435
1436void
1437brig_code_entry_handler::append_statement (tree stmt)
1438{
1439 m_parent.m_cf->append_statement (stmt);
1440}
1441
1442/* Unpacks the elements of the vector in VALUE to scalars (bit field
1443 references) in ELEMENTS. */
1444
1445void
1446brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements)
1447{
1448 size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
1449 size_t element_size
1450 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
1451 size_t element_count
1452 = vec_size * BITS_PER_UNIT / element_size;
1453
1454 tree input_element_type = TREE_TYPE (TREE_TYPE (value));
1455
1456 value = add_temp_var ("unpack_input", value);
1457
1458 for (size_t i = 0; i < element_count; ++i)
1459 {
1460 tree element
1461 = build3 (BIT_FIELD_REF, input_element_type, value,
1462 TYPE_SIZE (input_element_type),
1463 build_int_cst (unsigned_char_type_node, i * element_size));
1464
1465 element = add_temp_var ("scalar", element);
1466 elements.push_back (element);
1467 }
1468}
1469
1470/* Pack the elements of the scalars in ELEMENTS to the returned vector. */
1471
1472tree
1473brig_code_entry_handler::pack (tree_stl_vec &elements)
1474{
1475 size_t element_count = elements.size ();
1476
1477 gcc_assert (element_count > 1);
1478
1479 tree output_element_type = TREE_TYPE (elements.at (0));
1480
1481 vec<constructor_elt, va_gc> *constructor_vals = NULL;
1482 for (size_t i = 0; i < element_count; ++i)
1483 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
1484
1485 tree vec_type = build_vector_type (output_element_type, element_count);
1486
1487 /* build_constructor creates a vector type which is not a vector_cst
1488 that requires compile time constant elements. */
1489 tree vec = build_constructor (vec_type, constructor_vals);
1490
1491 /* Add a temp variable for readability. */
1492 tree tmp_var = create_tmp_var (vec_type, "vec_out");
1493 tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1494 m_parent.m_cf->append_statement (vec_tmp_assign);
1495 return tmp_var;
1496}
1497
1498/* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */
1499
1500tree
1501tree_element_unary_visitor::operator () (brig_code_entry_handler &handler,
1502 tree operand)
1503{
1504 if (VECTOR_TYPE_P (TREE_TYPE (operand)))
1505 {
1506 size_t vec_size = int_size_in_bytes (TREE_TYPE (operand));
1507 size_t element_size = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand)));
1508 size_t element_count = vec_size / element_size;
1509
1510 tree input_element_type = TREE_TYPE (TREE_TYPE (operand));
1511 tree output_element_type = NULL_TREE;
1512
1513 vec<constructor_elt, va_gc> *constructor_vals = NULL;
1514 for (size_t i = 0; i < element_count; ++i)
1515 {
1516 tree element = build3 (BIT_FIELD_REF, input_element_type, operand,
1517 TYPE_SIZE (input_element_type),
1518 build_int_cst (unsigned_char_type_node,
1519 i * element_size
1520 * BITS_PER_UNIT));
1521
1522 tree output = visit_element (handler, element);
1523 output_element_type = TREE_TYPE (output);
1524
1525 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output);
1526 }
1527
1528 tree vec_type = build_vector_type (output_element_type, element_count);
1529
1530 /* build_constructor creates a vector type which is not a vector_cst
1531 that requires compile time constant elements. */
1532 tree vec = build_constructor (vec_type, constructor_vals);
1533
1534 /* Add a temp variable for readability. */
1535 tree tmp_var = create_tmp_var (vec_type, "vec_out");
1536 tree vec_tmp_assign
1537 = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1538 handler.append_statement (vec_tmp_assign);
1539 return tmp_var;
1540 }
1541 else
1542 return visit_element (handler, operand);
1543}
1544
1545/* Visits the element pair(s) in the OPERAND0 and OPERAND1, calling HANDLER
1546 to each of them. */
1547
1548tree
1549tree_element_binary_visitor::operator () (brig_code_entry_handler &handler,
1550 tree operand0, tree operand1)
1551{
1552 if (VECTOR_TYPE_P (TREE_TYPE (operand0)))
1553 {
1554 gcc_assert (VECTOR_TYPE_P (TREE_TYPE (operand1)));
1555 size_t vec_size = int_size_in_bytes (TREE_TYPE (operand0));
1556 size_t element_size
1557 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand0)));
1558 size_t element_count = vec_size / element_size;
1559
1560 tree input_element_type = TREE_TYPE (TREE_TYPE (operand0));
1561 tree output_element_type = NULL_TREE;
1562
1563 vec<constructor_elt, va_gc> *constructor_vals = NULL;
1564 for (size_t i = 0; i < element_count; ++i)
1565 {
1566
1567 tree element0 = build3 (BIT_FIELD_REF, input_element_type, operand0,
1568 TYPE_SIZE (input_element_type),
1569 build_int_cst (unsigned_char_type_node,
1570 i * element_size
1571 * BITS_PER_UNIT));
1572
1573 tree element1 = build3 (BIT_FIELD_REF, input_element_type, operand1,
1574 TYPE_SIZE (input_element_type),
1575 build_int_cst (unsigned_char_type_node,
1576 i * element_size
1577 * BITS_PER_UNIT));
1578
1579 tree output = visit_element (handler, element0, element1);
1580 output_element_type = TREE_TYPE (output);
1581
1582 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output);
1583 }
1584
1585 tree vec_type = build_vector_type (output_element_type, element_count);
1586
1587 /* build_constructor creates a vector type which is not a vector_cst
1588 that requires compile time constant elements. */
1589 tree vec = build_constructor (vec_type, constructor_vals);
1590
1591 /* Add a temp variable for readability. */
1592 tree tmp_var = create_tmp_var (vec_type, "vec_out");
1593 tree vec_tmp_assign
1594 = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
1595 handler.append_statement (vec_tmp_assign);
1596 return tmp_var;
1597 }
1598 else
1599 return visit_element (handler, operand0, operand1);
1600}
1601
1602/* Generates GENERIC code that flushes the visited element to zero. */
1603
1604tree
1605flush_to_zero::visit_element (brig_code_entry_handler &, tree operand)
1606{
1607 size_t size = int_size_in_bytes (TREE_TYPE (operand));
1608 if (size == 4)
1609 {
1610 tree built_in
1611 = (m_fp16) ? builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32_F16) :
1612 builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32);
1613
1614 return call_builtin (built_in, 1, float_type_node, float_type_node,
1615 operand);
1616 }
1617 else if (size == 8)
1618 {
1619 return call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F64), 1,
1620 double_type_node, double_type_node, operand);
1621 }
1622 else
1623 gcc_unreachable ();
1624 return NULL_TREE;
1625}
1626
1627/* Generates GENERIC code that converts a single precision float to half
1628 precision float. */
1629
1630tree
1631float_to_half::visit_element (brig_code_entry_handler &caller, tree operand)
1632{
1633 tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16);
1634
1635 tree casted_operand = build_reinterpret_cast (uint32_type_node, operand);
1636
1637 tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node,
1638 casted_operand);
1639 tree output
1640 = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out");
1641 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, call);
1642 caller.append_statement (assign);
1643 return output;
1644}
1645
1646/* Generates GENERIC code that converts a half precision float to single
1647 precision float. */
1648
1649tree
1650half_to_float::visit_element (brig_code_entry_handler &caller, tree operand)
1651{
1652 tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F16_TO_F32);
1653 tree truncated_source = convert_to_integer (uint16_type_node, operand);
1654
1655 tree call
1656 = call_builtin (built_in, 1, uint32_type_node, uint16_type_node,
1657 truncated_source);
1658
1659 tree const_fp32_type
1660 = build_type_variant (brig_to_generic::s_fp32_type, 1, 0);
1661
1662 tree output = create_tmp_var (const_fp32_type, "fp32out");
1663 tree casted_result
1664 = build_reinterpret_cast (brig_to_generic::s_fp32_type, call);
1665
1666 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result);
1667
1668 caller.append_statement (assign);
1669
1670 return output;
1671}
1672
1673/* Treats the INPUT as SRC_TYPE and sign or zero extends it to DEST_TYPE. */
1674
1675tree
1676brig_code_entry_handler::extend_int (tree input, tree dest_type, tree src_type)
1677{
1678 /* Extend integer conversions according to the destination's
1679 ext mode. First we need to clip the input register to
1680 the possible smaller integer size to ensure the correct sign
1681 bit is extended. */
1682 tree clipped_input = convert_to_integer (src_type, input);
1683 tree conversion_result;
1684
1685 if (TYPE_UNSIGNED (src_type))
1686 conversion_result
1687 = convert_to_integer (unsigned_type_for (dest_type), clipped_input);
1688 else
1689 conversion_result
1690 = convert_to_integer (signed_type_for (dest_type), clipped_input);
1691
1692 /* Treat the result as unsigned so we do not sign extend to the
1693 register width. For some reason this GENERIC sequence sign
1694 extends to the s register:
1695
1696 D.1541 = (signed char) s1;
1697 D.1542 = (signed short) D.1541;
1698 s0 = (unsigned int) D.1542
1699 */
1700
1701 /* The converted result is then extended to the target register
1702 width, using the same sign as the destination. */
1703 return convert_to_integer (dest_type, conversion_result);
1704}
1705
1706/* Returns the integer constant value of the given node.
1707 If it's a cast, looks into the source of the cast. */
1708HOST_WIDE_INT
1709brig_code_entry_handler::int_constant_value (tree node)
1710{
1711 tree n = node;
1712 if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
1713 n = TREE_OPERAND (n, 0);
1714 return int_cst_value (n);
1715}
1716