]>
Commit | Line | Data |
---|---|---|
5fd1486c PJ |
1 | /* brig-code-entry-handler.cc -- a gccbrig base class |
2 | Copyright (C) 2016 Free Software Foundation, Inc. | |
3 | Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> | |
4 | for General Processor Tech. | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 3, or (at your option) any later | |
11 | version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | #include "brig-code-entry-handler.h" | |
23 | ||
24 | #include "stringpool.h" | |
25 | #include "tree-iterator.h" | |
26 | #include "toplev.h" | |
27 | #include "diagnostic.h" | |
28 | #include "brig-machine.h" | |
29 | #include "brig-util.h" | |
30 | #include "errors.h" | |
31 | #include "real.h" | |
32 | #include "print-tree.h" | |
33 | #include "tree-pretty-print.h" | |
34 | #include "target.h" | |
35 | #include "langhooks.h" | |
36 | #include "gimple-expr.h" | |
37 | #include "convert.h" | |
38 | #include "brig-util.h" | |
39 | #include "builtins.h" | |
40 | #include "phsa.h" | |
41 | #include "brig-builtins.h" | |
42 | #include "fold-const.h" | |
43 | ||
44 | brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins; | |
45 | ||
46 | brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) | |
47 | : brig_entry_handler (parent) | |
48 | { | |
49 | if (s_custom_builtins.size () > 0) return; | |
50 | ||
51 | /* Populate the builtin index. */ | |
52 | #undef DEF_HSAIL_ATOMIC_BUILTIN | |
53 | #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN | |
54 | #undef DEF_HSAIL_INTR_BUILTIN | |
55 | #undef DEF_HSAIL_SAT_BUILTIN | |
56 | #undef DEF_HSAIL_BUILTIN | |
57 | #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \ | |
58 | s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \ | |
59 | = builtin_decl_explicit (ENUM); | |
60 | ||
61 | #include "brig-builtins.def" | |
62 | } | |
63 | ||
64 | /* Build a tree operand which is a reference to a piece of code. REF is the | |
65 | original reference as a BRIG object. */ | |
66 | ||
67 | tree | |
68 | brig_code_entry_handler::build_code_ref (const BrigBase &ref) | |
69 | { | |
70 | if (ref.kind == BRIG_KIND_DIRECTIVE_LABEL) | |
71 | { | |
72 | const BrigDirectiveLabel *brig_label = (const BrigDirectiveLabel *) &ref; | |
73 | ||
74 | const BrigData *label_name | |
75 | = m_parent.get_brig_data_entry (brig_label->name); | |
76 | ||
77 | std::string label_str ((const char *) (label_name->bytes), | |
78 | label_name->byteCount); | |
79 | return m_parent.m_cf->label (label_str); | |
80 | } | |
81 | else if (ref.kind == BRIG_KIND_DIRECTIVE_FUNCTION) | |
82 | { | |
83 | const BrigDirectiveExecutable *func | |
84 | = (const BrigDirectiveExecutable *) &ref; | |
85 | return m_parent.function_decl (m_parent.get_mangled_name (func)); | |
86 | } | |
87 | else if (ref.kind == BRIG_KIND_DIRECTIVE_FBARRIER) | |
88 | { | |
89 | const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref; | |
90 | ||
91 | uint64_t offset = m_parent.group_variable_segment_offset | |
92 | (m_parent.get_mangled_name (fbar)); | |
93 | ||
94 | return build_int_cst (uint32_type_node, offset); | |
95 | } | |
96 | else | |
97 | gcc_unreachable (); | |
98 | } | |
99 | ||
100 | /* Produce a tree operand for the given BRIG_INST and its OPERAND. | |
101 | OPERAND_TYPE should be the operand type in case it should not | |
102 | be dictated by the BrigBase. IS_INPUT indicates if the operand | |
103 | is an input operand or a result. */ | |
104 | ||
105 | tree | |
106 | brig_code_entry_handler::build_tree_operand (const BrigInstBase &brig_inst, | |
107 | const BrigBase &operand, | |
108 | tree operand_type, bool is_input) | |
109 | { | |
110 | switch (operand.kind) | |
111 | { | |
112 | case BRIG_KIND_OPERAND_OPERAND_LIST: | |
113 | { | |
114 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
115 | const BrigOperandOperandList &oplist | |
116 | = (const BrigOperandOperandList &) operand; | |
117 | const BrigData *data = m_parent.get_brig_data_entry (oplist.elements); | |
118 | size_t bytes = data->byteCount; | |
119 | const BrigOperandOffset32_t *operand_ptr | |
120 | = (const BrigOperandOffset32_t *) data->bytes; | |
121 | while (bytes > 0) | |
122 | { | |
123 | BrigOperandOffset32_t offset = *operand_ptr; | |
124 | const BrigBase *operand_element | |
125 | = m_parent.get_brig_operand_entry (offset); | |
126 | tree element | |
127 | = build_tree_operand (brig_inst, *operand_element, operand_type); | |
128 | ||
129 | /* In case a vector is used an input, cast the elements to | |
130 | correct size here so we don't need a separate unpack/pack for it. | |
131 | fp16-fp32 conversion is done in build_operands (). */ | |
132 | if (is_input && TREE_TYPE (element) != operand_type) | |
133 | { | |
134 | if (int_size_in_bytes (TREE_TYPE (element)) | |
135 | == int_size_in_bytes (operand_type) | |
136 | && !INTEGRAL_TYPE_P (operand_type)) | |
137 | element = build1 (VIEW_CONVERT_EXPR, operand_type, element); | |
138 | else | |
139 | element = convert (operand_type, element); | |
140 | } | |
141 | ||
142 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element); | |
143 | ++operand_ptr; | |
144 | bytes -= 4; | |
145 | } | |
146 | size_t element_count = data->byteCount / 4; | |
147 | tree vec_type = build_vector_type (operand_type, element_count); | |
148 | ||
149 | return build_constructor (vec_type, constructor_vals); | |
150 | } | |
151 | case BRIG_KIND_OPERAND_CODE_LIST: | |
152 | { | |
153 | /* Build a TREE_VEC of code expressions. */ | |
154 | ||
155 | const BrigOperandCodeList &oplist | |
156 | = (const BrigOperandCodeList &) operand; | |
157 | const BrigData *data = m_parent.get_brig_data_entry (oplist.elements); | |
158 | size_t bytes = data->byteCount; | |
159 | const BrigOperandOffset32_t *operand_ptr | |
160 | = (const BrigOperandOffset32_t *) data->bytes; | |
161 | ||
162 | size_t case_index = 0; | |
163 | size_t element_count = data->byteCount / 4; | |
164 | ||
165 | /* Create a TREE_VEC out of the labels in the list. */ | |
166 | tree vec = make_tree_vec (element_count); | |
167 | ||
168 | while (bytes > 0) | |
169 | { | |
170 | BrigOperandOffset32_t offset = *operand_ptr; | |
171 | const BrigBase *ref = m_parent.get_brig_code_entry (offset); | |
172 | tree element = build_code_ref (*ref); | |
173 | ||
174 | gcc_assert (case_index < element_count); | |
175 | TREE_VEC_ELT (vec, case_index) = element; | |
176 | case_index++; | |
177 | ||
178 | ++operand_ptr; | |
179 | bytes -= 4; | |
180 | } | |
181 | return vec; | |
182 | } | |
183 | case BRIG_KIND_OPERAND_REGISTER: | |
184 | { | |
185 | const BrigOperandRegister *brig_reg | |
186 | = (const BrigOperandRegister *) &operand; | |
187 | return m_parent.m_cf->get_m_var_declfor_reg (brig_reg); | |
188 | } | |
189 | case BRIG_KIND_OPERAND_CONSTANT_BYTES: | |
190 | { | |
191 | const BrigOperandConstantBytes *brigConst | |
192 | = (const BrigOperandConstantBytes *) &operand; | |
193 | /* The constants can be of different type than the instruction | |
194 | and are implicitly casted to the input operand. */ | |
195 | return get_tree_cst_for_hsa_operand (brigConst, NULL_TREE); | |
196 | } | |
197 | case BRIG_KIND_OPERAND_WAVESIZE: | |
198 | { | |
199 | if (!INTEGRAL_TYPE_P (operand_type)) | |
200 | { | |
201 | gcc_unreachable (); | |
202 | return NULL_TREE; | |
203 | } | |
204 | return build_int_cstu (operand_type, gccbrig_get_target_wavesize ()); | |
205 | } | |
206 | case BRIG_KIND_OPERAND_CODE_REF: | |
207 | { | |
208 | const BrigOperandCodeRef *brig_code_ref | |
209 | = (const BrigOperandCodeRef *) &operand; | |
210 | ||
211 | const BrigBase *ref = m_parent.get_brig_code_entry (brig_code_ref->ref); | |
212 | ||
213 | return build_code_ref (*ref); | |
214 | } | |
215 | case BRIG_KIND_OPERAND_ADDRESS: | |
216 | { | |
217 | return build_address_operand (brig_inst, | |
218 | (const BrigOperandAddress &) operand); | |
219 | } | |
220 | default: | |
221 | gcc_unreachable (); | |
222 | } | |
223 | } | |
224 | ||
225 | /* Build a tree node representing an address reference from a BRIG_INST and its | |
226 | ADDR_OPERAND. */ | |
227 | ||
228 | tree | |
229 | brig_code_entry_handler::build_address_operand | |
230 | (const BrigInstBase &brig_inst, const BrigOperandAddress &addr_operand) | |
231 | { | |
232 | tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
233 | ||
234 | BrigSegment8_t segment = BRIG_SEGMENT_GLOBAL; | |
235 | if (brig_inst.opcode == BRIG_OPCODE_LDA) | |
236 | segment = ((const BrigInstAddr &) brig_inst).segment; | |
237 | else if (brig_inst.base.kind == BRIG_KIND_INST_MEM) | |
238 | segment = ((const BrigInstMem &) brig_inst).segment; | |
239 | else if (brig_inst.base.kind == BRIG_KIND_INST_ATOMIC) | |
240 | segment = ((const BrigInstAtomic &) brig_inst).segment; | |
241 | ||
242 | tree var_offset = NULL_TREE; | |
243 | tree const_offset = NULL_TREE; | |
244 | tree symbol_base = NULL_TREE; | |
245 | ||
246 | if (addr_operand.symbol != 0) | |
247 | { | |
248 | const BrigDirectiveVariable *arg_symbol | |
249 | = (const BrigDirectiveVariable *) m_parent.get_brig_code_entry | |
250 | (addr_operand.symbol); | |
251 | ||
252 | std::string var_name = m_parent.get_mangled_name (arg_symbol); | |
253 | ||
254 | if (segment == BRIG_SEGMENT_KERNARG) | |
255 | { | |
256 | /* Find the offset to the kernarg buffer for the given | |
257 | kernel argument variable. */ | |
258 | tree func = m_parent.m_cf->m_func_decl; | |
259 | /* __args is the first parameter in kernel functions. */ | |
260 | symbol_base = DECL_ARGUMENTS (func); | |
261 | uint64_t offset = m_parent.m_cf->kernel_arg_offset (arg_symbol); | |
262 | if (offset > 0) | |
263 | const_offset = build_int_cst (size_type_node, offset); | |
264 | } | |
265 | else if (segment == BRIG_SEGMENT_GROUP) | |
266 | { | |
267 | ||
268 | uint64_t offset = m_parent.group_variable_segment_offset (var_name); | |
269 | const_offset = build_int_cst (size_type_node, offset); | |
270 | } | |
271 | else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL) | |
272 | { | |
273 | uint32_t offset = m_parent.private_variable_segment_offset (var_name); | |
274 | ||
275 | /* Compute the offset to the work item's copy: | |
276 | ||
277 | single-wi-offset * local_size + wiflatid * varsize | |
278 | ||
279 | This way the work items have the same variable in | |
280 | successive elements to each other in the segment, | |
281 | helping to achieve autovectorization of loads/stores | |
282 | with stride 1. */ | |
283 | ||
284 | tree_stl_vec uint32_0 | |
285 | = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); | |
286 | ||
287 | tree_stl_vec uint32_1 | |
288 | = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); | |
289 | ||
290 | tree_stl_vec uint32_2 | |
291 | = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | |
292 | ||
293 | tree local_size | |
294 | = build2 (MULT_EXPR, uint32_type_node, | |
295 | expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | |
296 | BRIG_TYPE_U32, | |
297 | uint32_type_node, uint32_0), | |
298 | expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | |
299 | BRIG_TYPE_U32, | |
300 | uint32_type_node, uint32_1)); | |
301 | ||
302 | local_size | |
303 | = build2 (MULT_EXPR, uint32_type_node, | |
304 | expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, | |
305 | BRIG_TYPE_U32, | |
306 | uint32_type_node, uint32_2), | |
307 | local_size); | |
308 | ||
309 | tree var_region | |
310 | = build2 (MULT_EXPR, uint32_type_node, | |
311 | build_int_cst (uint32_type_node, offset), local_size); | |
312 | ||
313 | tree_stl_vec operands; | |
314 | tree pos | |
315 | = build2 (MULT_EXPR, uint32_type_node, | |
316 | build_int_cst (uint32_type_node, | |
317 | m_parent.private_variable_size (var_name)), | |
318 | expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID, | |
319 | BRIG_TYPE_U32, | |
320 | uint32_type_node, operands)); | |
321 | ||
322 | tree var_offset | |
323 | = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); | |
324 | ||
325 | /* In case of LDA this is returned directly as an integer value. | |
326 | For other mem-related instructions, we will convert this segment | |
327 | offset to a flat address by adding it as an offset to a (private | |
328 | or group) base pointer later on. Same applies to group_var_offset. */ | |
329 | symbol_base | |
330 | = add_temp_var ("priv_var_offset", | |
331 | convert (size_type_node, var_offset)); | |
332 | } | |
333 | else if (segment == BRIG_SEGMENT_ARG) | |
334 | { | |
335 | tree arg_var_decl; | |
336 | if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol) | |
337 | arg_var_decl = m_parent.m_cf->m_ret_temp; | |
338 | else | |
339 | arg_var_decl = m_parent.m_cf->arg_variable (arg_symbol); | |
340 | ||
341 | gcc_assert (arg_var_decl != NULL_TREE); | |
342 | ||
343 | tree ptype = build_pointer_type (instr_type); | |
344 | ||
345 | if (arg_symbol->type & BRIG_TYPE_ARRAY) | |
346 | { | |
347 | ||
348 | /* Two different type of array references in case of arguments | |
349 | depending where they are referred at. In the caller (argument | |
350 | segment), the reference is to an array object and | |
351 | in the callee, the array object has been passed as a pointer | |
352 | to the array object. */ | |
353 | ||
354 | if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl))) | |
355 | symbol_base = build_reinterpret_cast (ptype, arg_var_decl); | |
356 | else | |
357 | { | |
358 | /* In case we are referring to an array (the argument in | |
359 | call site), use its element zero as the base address. */ | |
360 | tree element_zero | |
361 | = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (arg_var_decl)), | |
362 | arg_var_decl, integer_zero_node, NULL_TREE, | |
363 | NULL_TREE); | |
364 | symbol_base = build1 (ADDR_EXPR, ptype, element_zero); | |
365 | } | |
366 | } | |
367 | else | |
368 | symbol_base = build1 (ADDR_EXPR, ptype, arg_var_decl); | |
369 | } | |
370 | else | |
371 | { | |
372 | tree global_var_decl = m_parent.global_variable (var_name); | |
373 | ||
374 | /* In case the global variable hasn't been defined (yet), | |
375 | use the host def indirection ptr variable. */ | |
376 | if (global_var_decl == NULL_TREE) | |
377 | { | |
378 | std::string host_ptr_name | |
379 | = std::string (PHSA_HOST_DEF_PTR_PREFIX) + var_name; | |
380 | tree host_defined_ptr = m_parent.global_variable (host_ptr_name); | |
381 | gcc_assert (host_defined_ptr != NULL_TREE); | |
382 | symbol_base = host_defined_ptr; | |
383 | } | |
384 | else | |
385 | { | |
386 | gcc_assert (global_var_decl != NULL_TREE); | |
387 | ||
388 | tree ptype = build_pointer_type (instr_type); | |
389 | symbol_base = build1 (ADDR_EXPR, ptype, global_var_decl); | |
390 | } | |
391 | } | |
392 | } | |
393 | ||
394 | if (brig_inst.opcode != BRIG_OPCODE_LDA) | |
395 | { | |
396 | /* In case of lda_* we want to return the segment address because it's | |
397 | used as a value, perhaps in address computation and later converted | |
398 | explicitly to a flat address. | |
399 | ||
400 | In case of other instructions with memory operands we produce the flat | |
401 | address directly here (assuming the target does not have a separate | |
402 | address space for group/private segments for now). */ | |
403 | if (segment == BRIG_SEGMENT_GROUP) | |
404 | symbol_base = m_parent.m_cf->m_group_base_arg; | |
405 | else if (segment == BRIG_SEGMENT_PRIVATE | |
406 | || segment == BRIG_SEGMENT_SPILL) | |
407 | { | |
408 | if (symbol_base != NULL_TREE) | |
409 | symbol_base = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
410 | m_parent.m_cf->m_private_base_arg, | |
411 | symbol_base); | |
412 | else | |
413 | symbol_base = m_parent.m_cf->m_private_base_arg; | |
414 | } | |
415 | } | |
416 | ||
417 | if (addr_operand.reg != 0) | |
418 | { | |
419 | const BrigOperandRegister *mem_base_reg | |
420 | = (const BrigOperandRegister *) m_parent.get_brig_operand_entry | |
421 | (addr_operand.reg); | |
422 | tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg); | |
423 | var_offset = convert_to_pointer (ptr_type_node, base_reg_var); | |
424 | ||
425 | gcc_assert (var_offset != NULL_TREE); | |
426 | } | |
427 | /* The pointer type we use to access the memory. Should be of the | |
428 | width of the load/store instruction, not the target/data | |
429 | register. */ | |
430 | tree ptype = build_pointer_type (instr_type); | |
431 | ||
432 | gcc_assert (ptype != NULL_TREE); | |
433 | ||
434 | tree addr = NULL_TREE; | |
435 | if (symbol_base != NULL_TREE && var_offset != NULL_TREE) | |
436 | /* The most complex addressing mode: symbol + reg [+ const offset]. */ | |
437 | addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
438 | convert (ptr_type_node, symbol_base), | |
439 | convert (size_type_node, var_offset)); | |
440 | else if (var_offset != NULL) | |
441 | addr = var_offset; | |
442 | else if (symbol_base != NULL) | |
443 | addr = symbol_base; | |
444 | ||
445 | if (const_offset != NULL_TREE) | |
446 | { | |
447 | if (addr == NULL_TREE) | |
448 | /* At least direct module-scope global group symbol access with LDA | |
449 | has only the const_offset. Group base ptr is not added as LDA should | |
450 | return the segment address, not the flattened one. */ | |
451 | addr = const_offset; | |
452 | else | |
453 | addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
454 | addr, convert (size_type_node, const_offset)); | |
455 | } | |
456 | ||
457 | /* We might have two const offsets in case of group or private arrays | |
458 | which have the first offset to the incoming group/private pointer | |
459 | arg, and the second one an offset to it. */ | |
460 | uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset); | |
461 | if (offs > 0) | |
462 | { | |
463 | tree const_offset_2 = build_int_cst (size_type_node, offs); | |
464 | if (addr == NULL_TREE) | |
465 | addr = const_offset_2; | |
466 | else | |
467 | addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
468 | addr, convert (size_type_node, const_offset_2)); | |
469 | ||
470 | } | |
471 | ||
472 | gcc_assert (addr != NULL_TREE); | |
473 | return convert_to_pointer (ptype, addr); | |
474 | } | |
475 | ||
476 | /* Builds a tree operand with the given OPERAND_INDEX for the given | |
477 | BRIG_INST with the desired tree OPERAND_TYPE. OPERAND_TYPE can | |
478 | be NULL in case the type is forced by the BRIG_INST type. */ | |
479 | ||
480 | tree | |
481 | brig_code_entry_handler::build_tree_operand_from_brig | |
482 | (const BrigInstBase *brig_inst, tree operand_type, size_t operand_index) | |
483 | { | |
484 | const BrigData *operand_entries | |
485 | = m_parent.get_brig_data_entry (brig_inst->operands); | |
486 | ||
487 | uint32_t operand_offset | |
488 | = ((const uint32_t *) &operand_entries->bytes)[operand_index]; | |
489 | const BrigBase *operand_data | |
490 | = m_parent.get_brig_operand_entry (operand_offset); | |
491 | return build_tree_operand (*brig_inst, *operand_data, operand_type); | |
492 | } | |
493 | ||
494 | /* Builds a single (scalar) constant initialized element of type | |
495 | ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */ | |
496 | ||
497 | tree | |
498 | brig_code_entry_handler::build_tree_cst_element | |
499 | (BrigType16_t element_type, const unsigned char *next_data) const | |
500 | { | |
501 | ||
502 | tree tree_element_type = gccbrig_tree_type_for_hsa_type (element_type); | |
503 | ||
504 | tree cst; | |
505 | switch (element_type) | |
506 | { | |
507 | case BRIG_TYPE_F16: | |
508 | { | |
509 | HOST_WIDE_INT low = *(const uint16_t *) next_data; | |
510 | cst = build_int_cst (uint16_type_node, low); | |
511 | break; | |
512 | } | |
513 | case BRIG_TYPE_F32: | |
514 | { | |
515 | REAL_VALUE_TYPE val; | |
516 | ieee_single_format.decode (&ieee_single_format, &val, | |
517 | (const long *) next_data); | |
518 | cst = build_real (tree_element_type, val); | |
519 | break; | |
520 | } | |
521 | case BRIG_TYPE_F64: | |
522 | { | |
523 | long data[2]; | |
524 | data[0] = *(const uint32_t *) next_data; | |
525 | data[1] = *(const uint32_t *) (next_data + 4); | |
526 | REAL_VALUE_TYPE val; | |
527 | ieee_double_format.decode (&ieee_double_format, &val, data); | |
528 | cst = build_real (tree_element_type, val); | |
529 | break; | |
530 | } | |
531 | case BRIG_TYPE_S8: | |
532 | case BRIG_TYPE_S16: | |
533 | case BRIG_TYPE_S32: | |
534 | case BRIG_TYPE_S64: | |
535 | { | |
536 | HOST_WIDE_INT low = *(const int64_t *) next_data; | |
537 | cst = build_int_cst (tree_element_type, low); | |
538 | break; | |
539 | } | |
540 | case BRIG_TYPE_U8: | |
541 | case BRIG_TYPE_U16: | |
542 | case BRIG_TYPE_U32: | |
543 | case BRIG_TYPE_U64: | |
544 | { | |
545 | unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; | |
546 | cst = build_int_cstu (tree_element_type, low); | |
547 | break; | |
548 | } | |
549 | case BRIG_TYPE_SIG64: | |
550 | { | |
551 | unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; | |
552 | cst = build_int_cstu (uint64_type_node, low); | |
553 | break; | |
554 | } | |
555 | case BRIG_TYPE_SIG32: | |
556 | { | |
557 | unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; | |
558 | cst = build_int_cstu (uint32_type_node, low); | |
559 | break; | |
560 | } | |
561 | default: | |
562 | gcc_unreachable (); | |
563 | return NULL_TREE; | |
564 | } | |
565 | return cst; | |
566 | } | |
567 | ||
568 | /* Produce a tree constant type for the given BRIG constant (BRIG_CONST). | |
569 | TYPE should be the forced instruction type, otherwise the type is | |
570 | dictated by the BRIG_CONST. */ | |
571 | ||
572 | tree | |
573 | brig_code_entry_handler::get_tree_cst_for_hsa_operand | |
574 | (const BrigOperandConstantBytes *brig_const, tree type) const | |
575 | { | |
576 | const BrigData *data = m_parent.get_brig_data_entry (brig_const->bytes); | |
577 | ||
578 | tree cst = NULL_TREE; | |
579 | ||
580 | if (type == NULL_TREE) | |
581 | type = gccbrig_tree_type_for_hsa_type (brig_const->type); | |
582 | ||
583 | /* The type of a single (scalar) element inside an array, | |
584 | vector or an array of vectors. */ | |
585 | BrigType16_t scalar_element_type | |
586 | = brig_const->type & BRIG_TYPE_BASE_MASK; | |
587 | tree tree_element_type = type; | |
588 | ||
589 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
590 | ||
591 | if (TREE_CODE (type) == ARRAY_TYPE) | |
592 | tree_element_type = TREE_TYPE (type); | |
593 | ||
594 | size_t bytes_left = data->byteCount; | |
595 | const unsigned char *next_data = data->bytes; | |
596 | size_t scalar_element_size | |
597 | = gccbrig_hsa_type_bit_size (scalar_element_type) / BITS_PER_UNIT; | |
598 | ||
599 | while (bytes_left > 0) | |
600 | { | |
601 | if (VECTOR_TYPE_P (tree_element_type)) | |
602 | { | |
603 | /* In case of vector type elements (or sole vectors), | |
604 | create a vector ctor. */ | |
605 | size_t element_count = TYPE_VECTOR_SUBPARTS (tree_element_type); | |
606 | if (bytes_left < scalar_element_size * element_count) | |
607 | fatal_error (UNKNOWN_LOCATION, | |
608 | "Not enough bytes left for the initializer " | |
609 | "(%lu need %lu).", | |
610 | bytes_left, scalar_element_size * element_count); | |
611 | ||
612 | vec<constructor_elt, va_gc> *vec_els = NULL; | |
613 | for (size_t i = 0; i < element_count; ++i) | |
614 | { | |
615 | tree element | |
616 | = build_tree_cst_element (scalar_element_type, next_data); | |
617 | CONSTRUCTOR_APPEND_ELT (vec_els, NULL_TREE, element); | |
618 | bytes_left -= scalar_element_size; | |
619 | next_data += scalar_element_size; | |
620 | } | |
621 | cst = build_vector_from_ctor (tree_element_type, vec_els); | |
622 | } | |
623 | else | |
624 | { | |
625 | if (bytes_left < scalar_element_size) | |
626 | fatal_error (UNKNOWN_LOCATION, | |
627 | "Not enough bytes left for the initializer " | |
628 | "(%lu need %lu).", | |
629 | bytes_left, scalar_element_size); | |
630 | cst = build_tree_cst_element (scalar_element_type, next_data); | |
631 | bytes_left -= scalar_element_size; | |
632 | next_data += scalar_element_size; | |
633 | } | |
634 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst); | |
635 | } | |
636 | ||
637 | if (TREE_CODE (type) == ARRAY_TYPE) | |
638 | return build_constructor (type, constructor_vals); | |
639 | else | |
640 | return cst; | |
641 | } | |
642 | ||
643 | /* Return the matching tree instruction arithmetics type for the | |
644 | given BRIG_TYPE. The aritmethics type is the one with which | |
645 | computation is done (in contrast to the storage type). F16 | |
646 | arithmetics type is emulated using F32 for now. */ | |
647 | ||
648 | tree | |
649 | brig_code_entry_handler::get_tree_expr_type_for_hsa_type | |
650 | (BrigType16_t brig_type) const | |
651 | { | |
652 | BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | |
653 | if (brig_inner_type == BRIG_TYPE_F16) | |
654 | { | |
655 | if (brig_inner_type == brig_type) | |
656 | return m_parent.s_fp32_type; | |
657 | size_t element_count = gccbrig_hsa_type_bit_size (brig_type) / 16; | |
658 | return build_vector_type (m_parent.s_fp32_type, element_count); | |
659 | } | |
660 | else | |
661 | return gccbrig_tree_type_for_hsa_type (brig_type); | |
662 | } | |
663 | ||
664 | /* In case the HSA instruction must be implemented using a builtin, | |
665 | this function is called to get the correct builtin function. | |
666 | TYPE is the instruction tree type, BRIG_OPCODE the opcode of the | |
667 | brig instruction and BRIG_TYPE the brig instruction's type. */ | |
668 | ||
669 | tree | |
670 | brig_code_entry_handler::get_builtin_for_hsa_opcode | |
671 | (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const | |
672 | { | |
673 | tree builtin = NULL_TREE; | |
674 | tree builtin_type = type; | |
675 | ||
676 | /* For vector types, first find the scalar version of the builtin. */ | |
677 | if (type != NULL_TREE && VECTOR_TYPE_P (type)) | |
678 | builtin_type = TREE_TYPE (type); | |
679 | BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | |
680 | ||
681 | /* Some BRIG opcodes can use the same builtins for unsigned and | |
682 | signed types. Force these cases to unsigned types. */ | |
683 | ||
684 | if (brig_opcode == BRIG_OPCODE_BORROW | |
685 | || brig_opcode == BRIG_OPCODE_CARRY | |
686 | || brig_opcode == BRIG_OPCODE_LASTBIT | |
687 | || brig_opcode == BRIG_OPCODE_BITINSERT) | |
688 | { | |
689 | if (brig_type == BRIG_TYPE_S32) | |
690 | brig_type = BRIG_TYPE_U32; | |
691 | else if (brig_type == BRIG_TYPE_S64) | |
692 | brig_type = BRIG_TYPE_U64; | |
693 | } | |
694 | ||
695 | switch (brig_opcode) | |
696 | { | |
697 | case BRIG_OPCODE_FLOOR: | |
698 | builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR); | |
699 | break; | |
700 | case BRIG_OPCODE_CEIL: | |
701 | builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL); | |
702 | break; | |
703 | case BRIG_OPCODE_SQRT: | |
704 | case BRIG_OPCODE_NSQRT: | |
705 | builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT); | |
706 | break; | |
707 | case BRIG_OPCODE_RINT: | |
708 | builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT); | |
709 | break; | |
710 | case BRIG_OPCODE_TRUNC: | |
711 | builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC); | |
712 | break; | |
713 | case BRIG_OPCODE_COPYSIGN: | |
714 | builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN); | |
715 | break; | |
716 | case BRIG_OPCODE_NSIN: | |
717 | builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN); | |
718 | break; | |
719 | case BRIG_OPCODE_NLOG2: | |
720 | builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2); | |
721 | break; | |
722 | case BRIG_OPCODE_NEXP2: | |
723 | builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); | |
724 | break; | |
725 | case BRIG_OPCODE_NFMA: | |
726 | builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); | |
727 | break; | |
728 | case BRIG_OPCODE_NCOS: | |
729 | builtin = mathfn_built_in (builtin_type, BUILT_IN_COS); | |
730 | break; | |
731 | case BRIG_OPCODE_POPCOUNT: | |
732 | /* Popcount should be typed by its argument type (the return value | |
733 | is always u32). Let's use a b64 version for also for b32 for now. */ | |
734 | return builtin_decl_explicit (BUILT_IN_POPCOUNTL); | |
735 | case BRIG_OPCODE_BORROW: | |
736 | /* Borrow uses the same builtin for unsigned and signed types. */ | |
737 | if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | |
738 | return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32); | |
739 | else | |
740 | return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64); | |
741 | case BRIG_OPCODE_CARRY: | |
742 | /* Carry also uses the same builtin for unsigned and signed types. */ | |
743 | if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | |
744 | return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32); | |
745 | else | |
746 | return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64); | |
747 | default: | |
748 | ||
749 | /* Use our builtin index for finding a proper builtin for the BRIG | |
750 | opcode and BRIG type. This takes care most of the builtin cases, | |
751 | the special cases are handled in the separate 'case' statements | |
752 | above. */ | |
753 | builtin_map::const_iterator i | |
754 | = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); | |
755 | if (i != s_custom_builtins.end ()) | |
756 | return (*i).second; | |
757 | ||
758 | if (brig_inner_type != brig_type) | |
759 | { | |
760 | /* Try to find a scalar built-in we could use. */ | |
761 | i = s_custom_builtins.find | |
762 | (std::make_pair (brig_opcode, brig_inner_type)); | |
763 | if (i != s_custom_builtins.end ()) | |
764 | return (*i).second; | |
765 | } | |
766 | ||
767 | /* In case this is an fp16 operation that is promoted to fp32, | |
768 | try to find a fp32 scalar built-in. */ | |
769 | if (brig_inner_type == BRIG_TYPE_F16) | |
770 | { | |
771 | i = s_custom_builtins.find | |
772 | (std::make_pair (brig_opcode, BRIG_TYPE_F32)); | |
773 | if (i != s_custom_builtins.end ()) | |
774 | return (*i).second; | |
775 | } | |
776 | gcc_unreachable (); | |
777 | } | |
778 | ||
779 | if (VECTOR_TYPE_P (type) && builtin != NULL_TREE) | |
780 | { | |
781 | /* Try to find a vectorized version of the built-in. | |
782 | TODO: properly assert that builtin is a mathfn builtin? */ | |
783 | tree vec_builtin | |
784 | = targetm.vectorize.builtin_vectorized_function | |
785 | (builtin_mathfn_code (builtin), type, type); | |
786 | if (vec_builtin != NULL_TREE) | |
787 | return vec_builtin; | |
788 | else | |
789 | return builtin; | |
790 | } | |
791 | if (builtin == NULL_TREE) | |
792 | gcc_unreachable (); | |
793 | return builtin; | |
794 | } | |
795 | ||
796 | /* Return the correct GENERIC type for storing comparison results | |
797 | of operand with the type given in SOURCE_TYPE. */ | |
798 | ||
799 | tree | |
800 | brig_code_entry_handler::get_comparison_result_type (tree source_type) | |
801 | { | |
802 | if (VECTOR_TYPE_P (source_type)) | |
803 | { | |
804 | size_t element_size = int_size_in_bytes (TREE_TYPE (source_type)); | |
805 | return build_vector_type | |
806 | (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT), | |
807 | TYPE_VECTOR_SUBPARTS (source_type)); | |
808 | } | |
809 | else | |
810 | return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); | |
811 | } | |
812 | ||
813 | /* Returns true in case the given opcode needs to know about work-item context | |
814 | data. In such case the context data is passed as a pointer to a work-item | |
815 | context object, as the last argument in the builtin call. */ | |
816 | ||
817 | bool | |
818 | brig_code_entry_handler::needs_workitem_context_data | |
819 | (BrigOpcode16_t brig_opcode) const | |
820 | { | |
821 | switch (brig_opcode) | |
822 | { | |
823 | case BRIG_OPCODE_WORKITEMABSID: | |
824 | case BRIG_OPCODE_WORKITEMFLATABSID: | |
825 | case BRIG_OPCODE_WORKITEMFLATID: | |
826 | case BRIG_OPCODE_CURRENTWORKITEMFLATID: | |
827 | case BRIG_OPCODE_WORKITEMID: | |
828 | case BRIG_OPCODE_WORKGROUPID: | |
829 | case BRIG_OPCODE_WORKGROUPSIZE: | |
830 | case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | |
831 | case BRIG_OPCODE_GRIDGROUPS: | |
832 | case BRIG_OPCODE_GRIDSIZE: | |
833 | case BRIG_OPCODE_DIM: | |
834 | case BRIG_OPCODE_PACKETID: | |
835 | case BRIG_OPCODE_PACKETCOMPLETIONSIG: | |
836 | case BRIG_OPCODE_BARRIER: | |
837 | case BRIG_OPCODE_WAVEBARRIER: | |
838 | case BRIG_OPCODE_ARRIVEFBAR: | |
839 | case BRIG_OPCODE_INITFBAR: | |
840 | case BRIG_OPCODE_JOINFBAR: | |
841 | case BRIG_OPCODE_LEAVEFBAR: | |
842 | case BRIG_OPCODE_RELEASEFBAR: | |
843 | case BRIG_OPCODE_WAITFBAR: | |
844 | case BRIG_OPCODE_CUID: | |
845 | case BRIG_OPCODE_MAXCUID: | |
846 | case BRIG_OPCODE_DEBUGTRAP: | |
847 | case BRIG_OPCODE_GROUPBASEPTR: | |
848 | case BRIG_OPCODE_KERNARGBASEPTR: | |
849 | case BRIG_OPCODE_ALLOCA: | |
850 | return true; | |
851 | default: | |
852 | return false; | |
853 | }; | |
854 | } | |
855 | ||
856 | /* Returns true in case the given opcode that would normally be generated | |
857 | as a builtin call can be expanded to tree nodes. */ | |
858 | ||
859 | bool | |
860 | brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const | |
861 | { | |
862 | switch (brig_opcode) | |
863 | { | |
864 | case BRIG_OPCODE_WORKITEMFLATABSID: | |
865 | case BRIG_OPCODE_WORKITEMFLATID: | |
866 | case BRIG_OPCODE_WORKITEMABSID: | |
867 | case BRIG_OPCODE_WORKGROUPSIZE: | |
868 | case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | |
869 | /* TODO: expand more builtins. */ | |
870 | return true; | |
871 | default: | |
872 | return false; | |
873 | }; | |
874 | } | |
875 | ||
876 | /* Try to expand the given builtin call to reuse a previously generated | |
877 | variable, if possible. If not, just call the given builtin. | |
878 | BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type, | |
879 | ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's | |
880 | input operands. */ | |
881 | ||
882 | tree | |
883 | brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode, | |
884 | BrigType16_t brig_type, | |
885 | tree arith_type, | |
886 | tree_stl_vec &operands) | |
887 | { | |
888 | if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode)) | |
889 | return expand_builtin (brig_opcode, operands); | |
890 | ||
891 | tree built_in | |
892 | = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type); | |
893 | ||
894 | if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in))) | |
895 | && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type) | |
896 | && brig_opcode != BRIG_OPCODE_LERP | |
897 | && brig_opcode != BRIG_OPCODE_PACKCVT | |
898 | && brig_opcode != BRIG_OPCODE_SAD | |
899 | && brig_opcode != BRIG_OPCODE_SADHI) | |
900 | { | |
901 | /* Call the scalar built-in for all elements in the vector. */ | |
902 | tree_stl_vec operand0_elements; | |
903 | if (operands.size () > 0) | |
904 | unpack (operands[0], operand0_elements); | |
905 | ||
906 | tree_stl_vec operand1_elements; | |
907 | if (operands.size () > 1) | |
908 | unpack (operands[1], operand1_elements); | |
909 | ||
910 | tree_stl_vec result_elements; | |
911 | ||
912 | for (size_t i = 0; i < TYPE_VECTOR_SUBPARTS (arith_type); ++i) | |
913 | { | |
914 | tree_stl_vec call_operands; | |
915 | if (operand0_elements.size () > 0) | |
916 | call_operands.push_back (operand0_elements.at (i)); | |
917 | ||
918 | if (operand1_elements.size () > 0) | |
919 | call_operands.push_back (operand1_elements.at (i)); | |
920 | ||
921 | result_elements.push_back | |
922 | (expand_or_call_builtin (brig_opcode, brig_type, | |
923 | TREE_TYPE (arith_type), | |
924 | call_operands)); | |
925 | } | |
926 | return pack (result_elements); | |
927 | } | |
928 | ||
929 | tree_stl_vec call_operands; | |
930 | tree_stl_vec operand_types; | |
931 | ||
932 | tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in)); | |
933 | ||
934 | for (size_t i = 0; i < operands.size (); ++i) | |
935 | { | |
936 | tree operand_type = TREE_VALUE (arg_type_chain); | |
937 | call_operands.push_back (convert (operand_type, operands[i])); | |
938 | operand_types.push_back (operand_type); | |
939 | arg_type_chain = TREE_CHAIN (arg_type_chain); | |
940 | } | |
941 | ||
942 | if (needs_workitem_context_data (brig_opcode)) | |
943 | { | |
944 | call_operands.push_back (m_parent.m_cf->m_context_arg); | |
945 | operand_types.push_back (ptr_type_node); | |
946 | m_parent.m_cf->m_has_unexpanded_dp_builtins = true; | |
947 | } | |
948 | ||
949 | size_t operand_count = call_operands.size (); | |
950 | ||
951 | call_operands.resize (4, NULL_TREE); | |
952 | operand_types.resize (4, NULL_TREE); | |
953 | for (size_t i = 0; i < operand_count; ++i) | |
954 | call_operands.at (i) = build_reinterpret_cast (operand_types.at (i), | |
955 | call_operands.at (i)); | |
956 | ||
957 | tree fnptr = build_fold_addr_expr (built_in); | |
958 | return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr, | |
959 | operand_count, &call_operands[0]); | |
960 | } | |
961 | ||
962 | /* Instead of calling a built-in, reuse a previously returned value known to | |
963 | be still valid. This is beneficial especially for the work-item | |
964 | identification related builtins as not having them as calls can lead to | |
965 | more easily vectorizable parallel loops for multi work-item work-groups. | |
966 | BRIG_OPCODE identifies the builtin and OPERANDS store the operands. */ | |
967 | ||
968 | tree | |
969 | brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode, | |
970 | tree_stl_vec &operands) | |
971 | { | |
972 | tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); | |
973 | ||
974 | tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); | |
975 | ||
976 | tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | |
977 | ||
978 | if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID) | |
979 | { | |
980 | tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0); | |
981 | id0 = convert (uint64_type_node, id0); | |
982 | ||
983 | tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1); | |
984 | id1 = convert (uint64_type_node, id1); | |
985 | ||
986 | tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2); | |
987 | id2 = convert (uint64_type_node, id2); | |
988 | ||
989 | tree max0 = convert (uint64_type_node, | |
990 | m_parent.m_cf->m_grid_size_vars[0]); | |
991 | tree max1 = convert (uint64_type_node, | |
992 | m_parent.m_cf->m_grid_size_vars[1]); | |
993 | ||
994 | tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0); | |
995 | id2_x_max0_x_max1 | |
996 | = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1); | |
997 | ||
998 | tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0); | |
999 | ||
1000 | tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0); | |
1001 | sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1); | |
1002 | ||
1003 | return add_temp_var ("workitemflatabsid", sum); | |
1004 | } | |
1005 | else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID) | |
1006 | { | |
1007 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1008 | ||
1009 | tree local_id_var = m_parent.m_cf->m_local_id_vars[dim]; | |
1010 | tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim]; | |
1011 | tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim]; | |
1012 | tree grid_size_var = m_parent.m_cf->m_grid_size_vars[dim]; | |
1013 | ||
1014 | tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node, | |
1015 | convert (uint32_type_node, wg_id_var), | |
1016 | convert (uint32_type_node, wg_size_var)); | |
1017 | tree sum | |
1018 | = build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var); | |
1019 | ||
1020 | /* We need a modulo here because of work-groups which have dimensions | |
1021 | larger than the grid size :( TO CHECK: is this really allowed in the | |
1022 | specs? */ | |
1023 | tree modulo | |
1024 | = build2 (TRUNC_MOD_EXPR, uint32_type_node, sum, grid_size_var); | |
1025 | ||
1026 | return add_temp_var (std::string ("workitemabsid_") | |
1027 | + (char) ((int) 'x' + dim), | |
1028 | modulo); | |
1029 | } | |
1030 | else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID) | |
1031 | { | |
1032 | tree z_x_wgsx_wgsy | |
1033 | = build2 (MULT_EXPR, uint32_type_node, | |
1034 | m_parent.m_cf->m_local_id_vars[2], | |
1035 | m_parent.m_cf->m_wg_size_vars[0]); | |
1036 | z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy, | |
1037 | m_parent.m_cf->m_wg_size_vars[1]); | |
1038 | ||
1039 | tree y_x_wgsx | |
1040 | = build2 (MULT_EXPR, uint32_type_node, | |
1041 | m_parent.m_cf->m_local_id_vars[1], | |
1042 | m_parent.m_cf->m_wg_size_vars[0]); | |
1043 | ||
1044 | tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy); | |
1045 | sum = build2 (PLUS_EXPR, uint32_type_node, | |
1046 | m_parent.m_cf->m_local_id_vars[0], | |
1047 | sum); | |
1048 | return add_temp_var ("workitemflatid", sum); | |
1049 | } | |
1050 | else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE) | |
1051 | { | |
1052 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1053 | return m_parent.m_cf->m_wg_size_vars[dim]; | |
1054 | } | |
1055 | else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE) | |
1056 | { | |
1057 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1058 | return m_parent.m_cf->m_cur_wg_size_vars[dim]; | |
1059 | } | |
1060 | else | |
1061 | gcc_unreachable (); | |
1062 | ||
1063 | return NULL_TREE; | |
1064 | } | |
1065 | ||
1066 | /* Appends and returns a new temp variable and an accompanying assignment | |
1067 | statement that stores the value of the given EXPR and has the given NAME. */ | |
1068 | ||
1069 | tree | |
1070 | brig_code_entry_handler::add_temp_var (std::string name, tree expr) | |
1071 | { | |
1072 | tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ()); | |
1073 | tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr); | |
1074 | m_parent.m_cf->append_statement (assign); | |
1075 | return temp_var; | |
1076 | } | |
1077 | ||
1078 | /* Creates a FP32 to FP16 conversion call, assuming the source and destination | |
1079 | are FP32 type variables. */ | |
1080 | ||
1081 | tree | |
1082 | brig_code_entry_handler::build_f2h_conversion (tree source) | |
1083 | { | |
1084 | return float_to_half () (*this, source); | |
1085 | } | |
1086 | ||
1087 | /* Creates a FP16 to FP32 conversion call, assuming the source and destination | |
1088 | are FP32 type variables. */ | |
1089 | ||
1090 | tree | |
1091 | brig_code_entry_handler::build_h2f_conversion (tree source) | |
1092 | { | |
1093 | return half_to_float () (*this, source); | |
1094 | } | |
1095 | ||
1096 | /* Builds and "normalizes" the dest and source operands for the instruction | |
1097 | execution; converts the input operands to the expected instruction type, | |
1098 | performs half to float conversions, constant to correct type variable, | |
1099 | and flush to zero (if applicable). */ | |
1100 | ||
1101 | tree_stl_vec | |
1102 | brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst) | |
1103 | { | |
1104 | /* Flush to zero. */ | |
1105 | bool ftz = false; | |
1106 | const BrigBase *base = &brig_inst.base; | |
1107 | ||
1108 | if (base->kind == BRIG_KIND_INST_MOD) | |
1109 | { | |
1110 | const BrigInstMod *mod = (const BrigInstMod *) base; | |
1111 | ftz = mod->modifier & BRIG_ALU_FTZ; | |
1112 | } | |
1113 | else if (base->kind == BRIG_KIND_INST_CMP) | |
1114 | { | |
1115 | const BrigInstCmp *cmp = (const BrigInstCmp *) base; | |
1116 | ftz = cmp->modifier & BRIG_ALU_FTZ; | |
1117 | } | |
1118 | ||
1119 | bool is_vec_instr = hsa_type_packed_p (brig_inst.type); | |
1120 | ||
1121 | size_t element_count; | |
1122 | if (is_vec_instr) | |
1123 | { | |
1124 | BrigType16_t brig_element_type = brig_inst.type & BRIG_TYPE_BASE_MASK; | |
1125 | element_count = gccbrig_hsa_type_bit_size (brig_inst.type) | |
1126 | / gccbrig_hsa_type_bit_size (brig_element_type); | |
1127 | } | |
1128 | else | |
1129 | element_count = 1; | |
1130 | ||
1131 | bool is_fp16_arith = false; | |
1132 | ||
1133 | tree src_type; | |
1134 | tree dest_type; | |
1135 | if (base->kind == BRIG_KIND_INST_CMP) | |
1136 | { | |
1137 | const BrigInstCmp *cmp_inst = (const BrigInstCmp *) base; | |
1138 | src_type = gccbrig_tree_type_for_hsa_type (cmp_inst->sourceType); | |
1139 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1140 | is_fp16_arith | |
1141 | = (cmp_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; | |
1142 | } | |
1143 | else if (base->kind == BRIG_KIND_INST_SOURCE_TYPE) | |
1144 | { | |
1145 | const BrigInstSourceType *src_type_inst | |
1146 | = (const BrigInstSourceType *) base; | |
1147 | src_type = gccbrig_tree_type_for_hsa_type (src_type_inst->sourceType); | |
1148 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1149 | is_fp16_arith | |
1150 | = (src_type_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 | |
1151 | && !gccbrig_is_bit_operation (brig_inst.opcode); | |
1152 | } | |
1153 | else if (base->kind == BRIG_KIND_INST_SEG_CVT) | |
1154 | { | |
1155 | const BrigInstSegCvt *seg_cvt_inst = (const BrigInstSegCvt *) base; | |
1156 | src_type = gccbrig_tree_type_for_hsa_type (seg_cvt_inst->sourceType); | |
1157 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1158 | } | |
1159 | else if (base->kind == BRIG_KIND_INST_MEM) | |
1160 | { | |
1161 | src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1162 | dest_type = src_type; | |
1163 | /* With mem instructions we don't want to cast the fp16 | |
1164 | back and forth between fp32, because the load/stores | |
1165 | are not specific to the data type. */ | |
1166 | is_fp16_arith = false; | |
1167 | } | |
1168 | else if (base->kind == BRIG_KIND_INST_CVT) | |
1169 | { | |
1170 | const BrigInstCvt *cvt_inst = (const BrigInstCvt *) base; | |
1171 | ||
1172 | src_type = gccbrig_tree_type_for_hsa_type (cvt_inst->sourceType); | |
1173 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1174 | } | |
1175 | else | |
1176 | { | |
1177 | switch (brig_inst.opcode) | |
1178 | { | |
1179 | case BRIG_OPCODE_INITFBAR: | |
1180 | case BRIG_OPCODE_JOINFBAR: | |
1181 | case BRIG_OPCODE_WAITFBAR: | |
1182 | case BRIG_OPCODE_ARRIVEFBAR: | |
1183 | case BRIG_OPCODE_LEAVEFBAR: | |
1184 | case BRIG_OPCODE_RELEASEFBAR: | |
1185 | src_type = uint32_type_node; | |
1186 | break; | |
1187 | default: | |
1188 | src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1189 | break; | |
1190 | } | |
1191 | dest_type = src_type; | |
1192 | is_fp16_arith | |
1193 | = !gccbrig_is_bit_operation (brig_inst.opcode) | |
1194 | && (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; | |
1195 | } | |
1196 | ||
1197 | /* Halfs are a tricky special case: their "storage format" is u16, but | |
1198 | scalars are stored in 32b regs while packed f16 are... well packed. */ | |
1199 | tree half_storage_type = element_count > 1 | |
1200 | ? gccbrig_tree_type_for_hsa_type (brig_inst.type) | |
1201 | : uint32_type_node; | |
1202 | ||
1203 | const BrigData *operand_entries | |
1204 | = m_parent.get_brig_data_entry (brig_inst.operands); | |
1205 | std::vector<tree> operands; | |
1206 | for (size_t i = 0; i < operand_entries->byteCount / 4; ++i) | |
1207 | { | |
1208 | uint32_t operand_offset = ((const uint32_t *) &operand_entries->bytes)[i]; | |
1209 | const BrigBase *operand_data | |
1210 | = m_parent.get_brig_operand_entry (operand_offset); | |
1211 | ||
1212 | const bool is_output | |
1213 | = gccbrig_hsa_opcode_op_output_p (brig_inst.opcode, i); | |
1214 | ||
1215 | tree operand_type = is_output ? dest_type : src_type; | |
1216 | ||
1217 | bool half_to_float = is_fp16_arith; | |
1218 | ||
1219 | /* Special cases for operand types. */ | |
1220 | if ((brig_inst.opcode == BRIG_OPCODE_SHL | |
1221 | || brig_inst.opcode == BRIG_OPCODE_SHR) | |
1222 | && i == 2) | |
1223 | /* The shift amount is always a scalar. */ | |
1224 | operand_type | |
1225 | = VECTOR_TYPE_P (src_type) ? TREE_TYPE (src_type) : src_type; | |
1226 | else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) | |
1227 | { | |
1228 | if (i == 3) | |
1229 | /* HSAIL shuffle inputs the MASK vector as tightly packed bits | |
1230 | while GENERIC VEC_PERM_EXPR expects the mask elements to be | |
1231 | of the same size as the elements in the input vectors. Let's | |
1232 | cast to a scalar type here and convert to the VEC_PERM_EXPR | |
1233 | format in instruction handling. There are no arbitrary bit | |
1234 | width int types in GENERIC so we cannot use the original | |
1235 | vector type. */ | |
1236 | operand_type = uint32_type_node; | |
1237 | else | |
1238 | /* Always treat the element as unsigned ints to avoid | |
1239 | sign extensions/negative offsets with masks, which | |
1240 | are expected to be of the same element type as the | |
1241 | data in VEC_PERM_EXPR. With shuffles the data type | |
1242 | should not matter as it's a "raw operation". */ | |
1243 | operand_type = get_unsigned_int_type (operand_type); | |
1244 | } | |
1245 | else if (brig_inst.opcode == BRIG_OPCODE_PACK) | |
1246 | { | |
1247 | if (i == 1) | |
1248 | operand_type = get_unsigned_int_type (dest_type); | |
1249 | else if (i == 2) | |
1250 | operand_type = get_unsigned_int_type (TREE_TYPE (dest_type)); | |
1251 | else if (i == 3) | |
1252 | operand_type = uint32_type_node; | |
1253 | } | |
1254 | else if (brig_inst.opcode == BRIG_OPCODE_UNPACK && i == 2) | |
1255 | operand_type = uint32_type_node; | |
1256 | else if (brig_inst.opcode == BRIG_OPCODE_SAD && i == 3) | |
1257 | operand_type = uint32_type_node; | |
1258 | else if (brig_inst.opcode == BRIG_OPCODE_CLASS && i == 2) | |
1259 | { | |
1260 | operand_type = uint32_type_node; | |
1261 | half_to_float = false; | |
1262 | } | |
1263 | else if (half_to_float) | |
1264 | /* Treat the operands as the storage type at this point. */ | |
1265 | operand_type = half_storage_type; | |
1266 | ||
1267 | tree operand = build_tree_operand (brig_inst, *operand_data, operand_type, | |
1268 | !is_output); | |
1269 | ||
1270 | gcc_assert (operand); | |
1271 | ||
1272 | /* Cast/convert the inputs to correct types as expected by the GENERIC | |
1273 | opcode instruction. */ | |
1274 | if (!is_output) | |
1275 | { | |
1276 | if (half_to_float) | |
1277 | operand = build_h2f_conversion | |
1278 | (build_reinterpret_cast (half_storage_type, operand)); | |
1279 | else if (TREE_CODE (operand) != LABEL_DECL | |
1280 | && TREE_CODE (operand) != TREE_VEC | |
1281 | && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS | |
1282 | && !VECTOR_TYPE_P (TREE_TYPE (operand))) | |
1283 | { | |
1284 | size_t reg_width = int_size_in_bytes (TREE_TYPE (operand)); | |
1285 | size_t instr_width = int_size_in_bytes (operand_type); | |
1286 | if (reg_width == instr_width) | |
1287 | operand = build_reinterpret_cast (operand_type, operand); | |
1288 | else if (reg_width > instr_width) | |
1289 | { | |
1290 | /* Clip the operand because the instruction's bitwidth | |
1291 | is smaller than the HSAIL reg width. */ | |
1292 | if (INTEGRAL_TYPE_P (operand_type)) | |
1293 | operand | |
1294 | = convert_to_integer (signed_or_unsigned_type_for | |
1295 | (TYPE_UNSIGNED (operand_type), | |
1296 | operand_type), operand); | |
1297 | else | |
1298 | operand = build_reinterpret_cast (operand_type, operand); | |
1299 | } | |
1300 | else if (reg_width < instr_width) | |
1301 | /* At least shift amount operands can be read from smaller | |
1302 | registers than the data operands. */ | |
1303 | operand = convert (operand_type, operand); | |
1304 | } | |
1305 | else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) | |
1306 | /* Force the operand type to be treated as the raw type. */ | |
1307 | operand = build_reinterpret_cast (operand_type, operand); | |
1308 | ||
1309 | if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1) | |
1310 | { | |
1311 | /* gcc expects the lower bit to be 1 (or all ones in case of | |
1312 | vectors) while CMOV assumes false iff 0. Convert the input | |
1313 | here to what gcc likes by generating | |
1314 | 'operand = operand != 0'. */ | |
1315 | tree cmp_res_type = get_comparison_result_type (operand_type); | |
1316 | operand = build2 (NE_EXPR, cmp_res_type, operand, | |
1317 | build_zero_cst (TREE_TYPE (operand))); | |
1318 | } | |
1319 | ||
1320 | if (ftz) | |
1321 | operand = flush_to_zero (is_fp16_arith) (*this, operand); | |
1322 | } | |
1323 | operands.push_back (operand); | |
1324 | } | |
1325 | return operands; | |
1326 | } | |
1327 | ||
1328 | /* Build the GENERIC for assigning the result of an instruction to the result | |
1329 | "register" (variable). BRIG_INST is the original brig instruction, | |
1330 | OUTPUT the result variable/register, INST_EXPR the one producing the | |
1331 | result. Required bitcasts and fp32 to fp16 conversions are added as | |
1332 | well. */ | |
1333 | ||
1334 | tree | |
1335 | brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, | |
1336 | tree output, tree inst_expr) | |
1337 | { | |
1338 | /* The destination type might be different from the output register | |
1339 | variable type (which is always an unsigned integer type). */ | |
1340 | tree output_type = TREE_TYPE (output); | |
1341 | tree input_type = TREE_TYPE (inst_expr); | |
1342 | bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 | |
1343 | && brig_inst.base.kind != BRIG_KIND_INST_MEM | |
1344 | && !gccbrig_is_bit_operation (brig_inst.opcode); | |
1345 | ||
1346 | /* Flush to zero. */ | |
1347 | bool ftz = false; | |
1348 | const BrigBase *base = &brig_inst.base; | |
1349 | ||
1350 | if (base->kind == BRIG_KIND_INST_MOD) | |
1351 | { | |
1352 | const BrigInstMod *mod = (const BrigInstMod *) base; | |
1353 | ftz = mod->modifier & BRIG_ALU_FTZ; | |
1354 | } | |
1355 | else if (base->kind == BRIG_KIND_INST_CMP) | |
1356 | { | |
1357 | const BrigInstCmp *cmp = (const BrigInstCmp *) base; | |
1358 | ftz = cmp->modifier & BRIG_ALU_FTZ; | |
1359 | } | |
1360 | ||
1361 | if (TREE_CODE (inst_expr) == CALL_EXPR) | |
1362 | { | |
1363 | tree func_decl = TREE_OPERAND (TREE_OPERAND (inst_expr, 1), 0); | |
1364 | input_type = TREE_TYPE (TREE_TYPE (func_decl)); | |
1365 | } | |
1366 | ||
1367 | if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) | |
1368 | || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16)) | |
1369 | { | |
1370 | /* Ensure we don't duplicate the arithmetics to the arguments of the bit | |
1371 | field reference operators. */ | |
1372 | inst_expr = add_temp_var ("before_ftz", inst_expr); | |
1373 | inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); | |
1374 | } | |
1375 | ||
1376 | if (is_fp16) | |
1377 | { | |
1378 | inst_expr = add_temp_var ("before_f2h", inst_expr); | |
1379 | tree f2h_output = build_f2h_conversion (inst_expr); | |
1380 | tree conv_int = convert_to_integer (output_type, f2h_output); | |
1381 | tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); | |
1382 | m_parent.m_cf->append_statement (assign); | |
1383 | return assign; | |
1384 | } | |
1385 | else if (VECTOR_TYPE_P (TREE_TYPE (output))) | |
1386 | { | |
1387 | /* Expand/unpack the input value to the given vector elements. */ | |
1388 | size_t i; | |
1389 | tree input = inst_expr; | |
1390 | tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1391 | tree element; | |
1392 | tree last_assign = NULL_TREE; | |
1393 | FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (output), i, element) | |
1394 | { | |
1395 | tree element_ref | |
1396 | = build3 (BIT_FIELD_REF, element_type, input, | |
1397 | TYPE_SIZE (element_type), | |
1398 | build_int_cst (uint32_type_node, | |
1399 | i * int_size_in_bytes (element_type) | |
1400 | * BITS_PER_UNIT)); | |
1401 | ||
1402 | last_assign | |
1403 | = build_output_assignment (brig_inst, element, element_ref); | |
1404 | } | |
1405 | return last_assign; | |
1406 | } | |
1407 | else | |
1408 | { | |
1409 | /* All we do here is to bitcast the result and store it to the | |
1410 | 'register' (variable). Mainly need to take care of differing | |
1411 | bitwidths. */ | |
1412 | size_t src_width = int_size_in_bytes (input_type); | |
1413 | size_t dst_width = int_size_in_bytes (output_type); | |
1414 | ||
1415 | if (src_width == dst_width) | |
1416 | { | |
1417 | /* A simple bitcast should do. */ | |
1418 | tree bitcast = build_reinterpret_cast (output_type, inst_expr); | |
1419 | tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast); | |
1420 | m_parent.m_cf->append_statement (assign); | |
1421 | return assign; | |
1422 | } | |
1423 | else | |
1424 | { | |
1425 | tree conv_int = convert_to_integer (output_type, inst_expr); | |
1426 | tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int); | |
1427 | m_parent.m_cf->append_statement (assign); | |
1428 | return assign; | |
1429 | } | |
1430 | } | |
1431 | return NULL_TREE; | |
1432 | } | |
1433 | ||
1434 | /* Appends a GENERIC statement (STMT) to the currently constructed function. */ | |
1435 | ||
1436 | void | |
1437 | brig_code_entry_handler::append_statement (tree stmt) | |
1438 | { | |
1439 | m_parent.m_cf->append_statement (stmt); | |
1440 | } | |
1441 | ||
1442 | /* Unpacks the elements of the vector in VALUE to scalars (bit field | |
1443 | references) in ELEMENTS. */ | |
1444 | ||
1445 | void | |
1446 | brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements) | |
1447 | { | |
1448 | size_t vec_size = int_size_in_bytes (TREE_TYPE (value)); | |
1449 | size_t element_size | |
1450 | = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT; | |
1451 | size_t element_count | |
1452 | = vec_size * BITS_PER_UNIT / element_size; | |
1453 | ||
1454 | tree input_element_type = TREE_TYPE (TREE_TYPE (value)); | |
1455 | ||
1456 | value = add_temp_var ("unpack_input", value); | |
1457 | ||
1458 | for (size_t i = 0; i < element_count; ++i) | |
1459 | { | |
1460 | tree element | |
1461 | = build3 (BIT_FIELD_REF, input_element_type, value, | |
1462 | TYPE_SIZE (input_element_type), | |
1463 | build_int_cst (unsigned_char_type_node, i * element_size)); | |
1464 | ||
1465 | element = add_temp_var ("scalar", element); | |
1466 | elements.push_back (element); | |
1467 | } | |
1468 | } | |
1469 | ||
1470 | /* Pack the elements of the scalars in ELEMENTS to the returned vector. */ | |
1471 | ||
1472 | tree | |
1473 | brig_code_entry_handler::pack (tree_stl_vec &elements) | |
1474 | { | |
1475 | size_t element_count = elements.size (); | |
1476 | ||
1477 | gcc_assert (element_count > 1); | |
1478 | ||
1479 | tree output_element_type = TREE_TYPE (elements.at (0)); | |
1480 | ||
1481 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1482 | for (size_t i = 0; i < element_count; ++i) | |
1483 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i)); | |
1484 | ||
1485 | tree vec_type = build_vector_type (output_element_type, element_count); | |
1486 | ||
1487 | /* build_constructor creates a vector type which is not a vector_cst | |
1488 | that requires compile time constant elements. */ | |
1489 | tree vec = build_constructor (vec_type, constructor_vals); | |
1490 | ||
1491 | /* Add a temp variable for readability. */ | |
1492 | tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1493 | tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1494 | m_parent.m_cf->append_statement (vec_tmp_assign); | |
1495 | return tmp_var; | |
1496 | } | |
1497 | ||
1498 | /* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */ | |
1499 | ||
1500 | tree | |
1501 | tree_element_unary_visitor::operator () (brig_code_entry_handler &handler, | |
1502 | tree operand) | |
1503 | { | |
1504 | if (VECTOR_TYPE_P (TREE_TYPE (operand))) | |
1505 | { | |
1506 | size_t vec_size = int_size_in_bytes (TREE_TYPE (operand)); | |
1507 | size_t element_size = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand))); | |
1508 | size_t element_count = vec_size / element_size; | |
1509 | ||
1510 | tree input_element_type = TREE_TYPE (TREE_TYPE (operand)); | |
1511 | tree output_element_type = NULL_TREE; | |
1512 | ||
1513 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1514 | for (size_t i = 0; i < element_count; ++i) | |
1515 | { | |
1516 | tree element = build3 (BIT_FIELD_REF, input_element_type, operand, | |
1517 | TYPE_SIZE (input_element_type), | |
1518 | build_int_cst (unsigned_char_type_node, | |
1519 | i * element_size | |
1520 | * BITS_PER_UNIT)); | |
1521 | ||
1522 | tree output = visit_element (handler, element); | |
1523 | output_element_type = TREE_TYPE (output); | |
1524 | ||
1525 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output); | |
1526 | } | |
1527 | ||
1528 | tree vec_type = build_vector_type (output_element_type, element_count); | |
1529 | ||
1530 | /* build_constructor creates a vector type which is not a vector_cst | |
1531 | that requires compile time constant elements. */ | |
1532 | tree vec = build_constructor (vec_type, constructor_vals); | |
1533 | ||
1534 | /* Add a temp variable for readability. */ | |
1535 | tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1536 | tree vec_tmp_assign | |
1537 | = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1538 | handler.append_statement (vec_tmp_assign); | |
1539 | return tmp_var; | |
1540 | } | |
1541 | else | |
1542 | return visit_element (handler, operand); | |
1543 | } | |
1544 | ||
1545 | /* Visits the element pair(s) in the OPERAND0 and OPERAND1, calling HANDLER | |
1546 | to each of them. */ | |
1547 | ||
1548 | tree | |
1549 | tree_element_binary_visitor::operator () (brig_code_entry_handler &handler, | |
1550 | tree operand0, tree operand1) | |
1551 | { | |
1552 | if (VECTOR_TYPE_P (TREE_TYPE (operand0))) | |
1553 | { | |
1554 | gcc_assert (VECTOR_TYPE_P (TREE_TYPE (operand1))); | |
1555 | size_t vec_size = int_size_in_bytes (TREE_TYPE (operand0)); | |
1556 | size_t element_size | |
1557 | = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand0))); | |
1558 | size_t element_count = vec_size / element_size; | |
1559 | ||
1560 | tree input_element_type = TREE_TYPE (TREE_TYPE (operand0)); | |
1561 | tree output_element_type = NULL_TREE; | |
1562 | ||
1563 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1564 | for (size_t i = 0; i < element_count; ++i) | |
1565 | { | |
1566 | ||
1567 | tree element0 = build3 (BIT_FIELD_REF, input_element_type, operand0, | |
1568 | TYPE_SIZE (input_element_type), | |
1569 | build_int_cst (unsigned_char_type_node, | |
1570 | i * element_size | |
1571 | * BITS_PER_UNIT)); | |
1572 | ||
1573 | tree element1 = build3 (BIT_FIELD_REF, input_element_type, operand1, | |
1574 | TYPE_SIZE (input_element_type), | |
1575 | build_int_cst (unsigned_char_type_node, | |
1576 | i * element_size | |
1577 | * BITS_PER_UNIT)); | |
1578 | ||
1579 | tree output = visit_element (handler, element0, element1); | |
1580 | output_element_type = TREE_TYPE (output); | |
1581 | ||
1582 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output); | |
1583 | } | |
1584 | ||
1585 | tree vec_type = build_vector_type (output_element_type, element_count); | |
1586 | ||
1587 | /* build_constructor creates a vector type which is not a vector_cst | |
1588 | that requires compile time constant elements. */ | |
1589 | tree vec = build_constructor (vec_type, constructor_vals); | |
1590 | ||
1591 | /* Add a temp variable for readability. */ | |
1592 | tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1593 | tree vec_tmp_assign | |
1594 | = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1595 | handler.append_statement (vec_tmp_assign); | |
1596 | return tmp_var; | |
1597 | } | |
1598 | else | |
1599 | return visit_element (handler, operand0, operand1); | |
1600 | } | |
1601 | ||
1602 | /* Generates GENERIC code that flushes the visited element to zero. */ | |
1603 | ||
1604 | tree | |
1605 | flush_to_zero::visit_element (brig_code_entry_handler &, tree operand) | |
1606 | { | |
1607 | size_t size = int_size_in_bytes (TREE_TYPE (operand)); | |
1608 | if (size == 4) | |
1609 | { | |
1610 | tree built_in | |
1611 | = (m_fp16) ? builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32_F16) : | |
1612 | builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32); | |
1613 | ||
1614 | return call_builtin (built_in, 1, float_type_node, float_type_node, | |
1615 | operand); | |
1616 | } | |
1617 | else if (size == 8) | |
1618 | { | |
1619 | return call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F64), 1, | |
1620 | double_type_node, double_type_node, operand); | |
1621 | } | |
1622 | else | |
1623 | gcc_unreachable (); | |
1624 | return NULL_TREE; | |
1625 | } | |
1626 | ||
1627 | /* Generates GENERIC code that converts a single precision float to half | |
1628 | precision float. */ | |
1629 | ||
1630 | tree | |
1631 | float_to_half::visit_element (brig_code_entry_handler &caller, tree operand) | |
1632 | { | |
1633 | tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16); | |
1634 | ||
1635 | tree casted_operand = build_reinterpret_cast (uint32_type_node, operand); | |
1636 | ||
1637 | tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node, | |
1638 | casted_operand); | |
1639 | tree output | |
1640 | = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out"); | |
1641 | tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, call); | |
1642 | caller.append_statement (assign); | |
1643 | return output; | |
1644 | } | |
1645 | ||
1646 | /* Generates GENERIC code that converts a half precision float to single | |
1647 | precision float. */ | |
1648 | ||
1649 | tree | |
1650 | half_to_float::visit_element (brig_code_entry_handler &caller, tree operand) | |
1651 | { | |
1652 | tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F16_TO_F32); | |
1653 | tree truncated_source = convert_to_integer (uint16_type_node, operand); | |
1654 | ||
1655 | tree call | |
1656 | = call_builtin (built_in, 1, uint32_type_node, uint16_type_node, | |
1657 | truncated_source); | |
1658 | ||
1659 | tree const_fp32_type | |
1660 | = build_type_variant (brig_to_generic::s_fp32_type, 1, 0); | |
1661 | ||
1662 | tree output = create_tmp_var (const_fp32_type, "fp32out"); | |
1663 | tree casted_result | |
1664 | = build_reinterpret_cast (brig_to_generic::s_fp32_type, call); | |
1665 | ||
1666 | tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result); | |
1667 | ||
1668 | caller.append_statement (assign); | |
1669 | ||
1670 | return output; | |
1671 | } | |
1672 | ||
1673 | /* Treats the INPUT as SRC_TYPE and sign or zero extends it to DEST_TYPE. */ | |
1674 | ||
1675 | tree | |
1676 | brig_code_entry_handler::extend_int (tree input, tree dest_type, tree src_type) | |
1677 | { | |
1678 | /* Extend integer conversions according to the destination's | |
1679 | ext mode. First we need to clip the input register to | |
1680 | the possible smaller integer size to ensure the correct sign | |
1681 | bit is extended. */ | |
1682 | tree clipped_input = convert_to_integer (src_type, input); | |
1683 | tree conversion_result; | |
1684 | ||
1685 | if (TYPE_UNSIGNED (src_type)) | |
1686 | conversion_result | |
1687 | = convert_to_integer (unsigned_type_for (dest_type), clipped_input); | |
1688 | else | |
1689 | conversion_result | |
1690 | = convert_to_integer (signed_type_for (dest_type), clipped_input); | |
1691 | ||
1692 | /* Treat the result as unsigned so we do not sign extend to the | |
1693 | register width. For some reason this GENERIC sequence sign | |
1694 | extends to the s register: | |
1695 | ||
1696 | D.1541 = (signed char) s1; | |
1697 | D.1542 = (signed short) D.1541; | |
1698 | s0 = (unsigned int) D.1542 | |
1699 | */ | |
1700 | ||
1701 | /* The converted result is then extended to the target register | |
1702 | width, using the same sign as the destination. */ | |
1703 | return convert_to_integer (dest_type, conversion_result); | |
1704 | } | |
1705 | ||
1706 | /* Returns the integer constant value of the given node. | |
1707 | If it's a cast, looks into the source of the cast. */ | |
1708 | HOST_WIDE_INT | |
1709 | brig_code_entry_handler::int_constant_value (tree node) | |
1710 | { | |
1711 | tree n = node; | |
1712 | if (TREE_CODE (n) == VIEW_CONVERT_EXPR) | |
1713 | n = TREE_OPERAND (n, 0); | |
1714 | return int_cst_value (n); | |
1715 | } | |
1716 |