]>
Commit | Line | Data |
---|---|---|
5fd1486c | 1 | /* brig-code-entry-handler.cc -- a gccbrig base class |
99dee823 | 2 | Copyright (C) 2016-2021 Free Software Foundation, Inc. |
5fd1486c PJ |
3 | Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> |
4 | for General Processor Tech. | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 3, or (at your option) any later | |
11 | version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | #include "brig-code-entry-handler.h" | |
23 | ||
24 | #include "stringpool.h" | |
25 | #include "tree-iterator.h" | |
26 | #include "toplev.h" | |
27 | #include "diagnostic.h" | |
28 | #include "brig-machine.h" | |
29 | #include "brig-util.h" | |
30 | #include "errors.h" | |
31 | #include "real.h" | |
32 | #include "print-tree.h" | |
33 | #include "tree-pretty-print.h" | |
34 | #include "target.h" | |
35 | #include "langhooks.h" | |
36 | #include "gimple-expr.h" | |
37 | #include "convert.h" | |
38 | #include "brig-util.h" | |
39 | #include "builtins.h" | |
40 | #include "phsa.h" | |
41 | #include "brig-builtins.h" | |
42 | #include "fold-const.h" | |
43 | ||
5fd1486c PJ |
44 | brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent) |
45 | : brig_entry_handler (parent) | |
46 | { | |
5fd1486c PJ |
47 | } |
48 | ||
49 | /* Build a tree operand which is a reference to a piece of code. REF is the | |
50 | original reference as a BRIG object. */ | |
51 | ||
52 | tree | |
53 | brig_code_entry_handler::build_code_ref (const BrigBase &ref) | |
54 | { | |
55 | if (ref.kind == BRIG_KIND_DIRECTIVE_LABEL) | |
56 | { | |
57 | const BrigDirectiveLabel *brig_label = (const BrigDirectiveLabel *) &ref; | |
58 | ||
59 | const BrigData *label_name | |
60 | = m_parent.get_brig_data_entry (brig_label->name); | |
61 | ||
62 | std::string label_str ((const char *) (label_name->bytes), | |
63 | label_name->byteCount); | |
64 | return m_parent.m_cf->label (label_str); | |
65 | } | |
66 | else if (ref.kind == BRIG_KIND_DIRECTIVE_FUNCTION) | |
67 | { | |
68 | const BrigDirectiveExecutable *func | |
69 | = (const BrigDirectiveExecutable *) &ref; | |
70 | return m_parent.function_decl (m_parent.get_mangled_name (func)); | |
71 | } | |
72 | else if (ref.kind == BRIG_KIND_DIRECTIVE_FBARRIER) | |
73 | { | |
74 | const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref; | |
75 | ||
d4b7f2ee PJ |
76 | std::string var_name = m_parent.get_mangled_name (fbar); |
77 | uint64_t offset | |
78 | = m_parent.m_cf->group_variable_segment_offset (var_name); | |
79 | ||
80 | tree local_offset = build_int_cst (uint32_type_node, offset); | |
81 | if (m_parent.m_cf->m_local_group_variables.has_variable (var_name)) | |
82 | local_offset | |
83 | = build2 (PLUS_EXPR, uint64_type_node, local_offset, | |
84 | convert (uint64_type_node, | |
85 | m_parent.m_cf->m_group_local_offset_arg)); | |
86 | return local_offset; | |
5fd1486c PJ |
87 | } |
88 | else | |
89 | gcc_unreachable (); | |
90 | } | |
91 | ||
92 | /* Produce a tree operand for the given BRIG_INST and its OPERAND. | |
93 | OPERAND_TYPE should be the operand type in case it should not | |
94 | be dictated by the BrigBase. IS_INPUT indicates if the operand | |
95 | is an input operand or a result. */ | |
96 | ||
97 | tree | |
98 | brig_code_entry_handler::build_tree_operand (const BrigInstBase &brig_inst, | |
99 | const BrigBase &operand, | |
100 | tree operand_type, bool is_input) | |
101 | { | |
102 | switch (operand.kind) | |
103 | { | |
104 | case BRIG_KIND_OPERAND_OPERAND_LIST: | |
105 | { | |
106 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
107 | const BrigOperandOperandList &oplist | |
108 | = (const BrigOperandOperandList &) operand; | |
109 | const BrigData *data = m_parent.get_brig_data_entry (oplist.elements); | |
110 | size_t bytes = data->byteCount; | |
111 | const BrigOperandOffset32_t *operand_ptr | |
112 | = (const BrigOperandOffset32_t *) data->bytes; | |
113 | while (bytes > 0) | |
114 | { | |
115 | BrigOperandOffset32_t offset = *operand_ptr; | |
116 | const BrigBase *operand_element | |
117 | = m_parent.get_brig_operand_entry (offset); | |
118 | tree element | |
119 | = build_tree_operand (brig_inst, *operand_element, operand_type); | |
120 | ||
121 | /* In case a vector is used an input, cast the elements to | |
122 | correct size here so we don't need a separate unpack/pack for it. | |
123 | fp16-fp32 conversion is done in build_operands (). */ | |
124 | if (is_input && TREE_TYPE (element) != operand_type) | |
dc03239c | 125 | element = build_resize_convert_view (operand_type, element); |
5fd1486c PJ |
126 | |
127 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element); | |
128 | ++operand_ptr; | |
129 | bytes -= 4; | |
130 | } | |
131 | size_t element_count = data->byteCount / 4; | |
132 | tree vec_type = build_vector_type (operand_type, element_count); | |
133 | ||
134 | return build_constructor (vec_type, constructor_vals); | |
135 | } | |
136 | case BRIG_KIND_OPERAND_CODE_LIST: | |
137 | { | |
138 | /* Build a TREE_VEC of code expressions. */ | |
139 | ||
140 | const BrigOperandCodeList &oplist | |
141 | = (const BrigOperandCodeList &) operand; | |
142 | const BrigData *data = m_parent.get_brig_data_entry (oplist.elements); | |
143 | size_t bytes = data->byteCount; | |
144 | const BrigOperandOffset32_t *operand_ptr | |
145 | = (const BrigOperandOffset32_t *) data->bytes; | |
146 | ||
147 | size_t case_index = 0; | |
148 | size_t element_count = data->byteCount / 4; | |
149 | ||
150 | /* Create a TREE_VEC out of the labels in the list. */ | |
151 | tree vec = make_tree_vec (element_count); | |
152 | ||
153 | while (bytes > 0) | |
154 | { | |
155 | BrigOperandOffset32_t offset = *operand_ptr; | |
156 | const BrigBase *ref = m_parent.get_brig_code_entry (offset); | |
157 | tree element = build_code_ref (*ref); | |
158 | ||
159 | gcc_assert (case_index < element_count); | |
160 | TREE_VEC_ELT (vec, case_index) = element; | |
161 | case_index++; | |
162 | ||
163 | ++operand_ptr; | |
164 | bytes -= 4; | |
165 | } | |
166 | return vec; | |
167 | } | |
168 | case BRIG_KIND_OPERAND_REGISTER: | |
169 | { | |
170 | const BrigOperandRegister *brig_reg | |
171 | = (const BrigOperandRegister *) &operand; | |
172 | return m_parent.m_cf->get_m_var_declfor_reg (brig_reg); | |
173 | } | |
174 | case BRIG_KIND_OPERAND_CONSTANT_BYTES: | |
175 | { | |
176 | const BrigOperandConstantBytes *brigConst | |
177 | = (const BrigOperandConstantBytes *) &operand; | |
178 | /* The constants can be of different type than the instruction | |
179 | and are implicitly casted to the input operand. */ | |
180 | return get_tree_cst_for_hsa_operand (brigConst, NULL_TREE); | |
181 | } | |
182 | case BRIG_KIND_OPERAND_WAVESIZE: | |
183 | { | |
184 | if (!INTEGRAL_TYPE_P (operand_type)) | |
185 | { | |
186 | gcc_unreachable (); | |
187 | return NULL_TREE; | |
188 | } | |
189 | return build_int_cstu (operand_type, gccbrig_get_target_wavesize ()); | |
190 | } | |
191 | case BRIG_KIND_OPERAND_CODE_REF: | |
192 | { | |
193 | const BrigOperandCodeRef *brig_code_ref | |
194 | = (const BrigOperandCodeRef *) &operand; | |
195 | ||
196 | const BrigBase *ref = m_parent.get_brig_code_entry (brig_code_ref->ref); | |
197 | ||
198 | return build_code_ref (*ref); | |
199 | } | |
200 | case BRIG_KIND_OPERAND_ADDRESS: | |
201 | { | |
202 | return build_address_operand (brig_inst, | |
203 | (const BrigOperandAddress &) operand); | |
204 | } | |
205 | default: | |
206 | gcc_unreachable (); | |
207 | } | |
208 | } | |
209 | ||
210 | /* Build a tree node representing an address reference from a BRIG_INST and its | |
211 | ADDR_OPERAND. */ | |
212 | ||
213 | tree | |
214 | brig_code_entry_handler::build_address_operand | |
215 | (const BrigInstBase &brig_inst, const BrigOperandAddress &addr_operand) | |
216 | { | |
217 | tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
218 | ||
219 | BrigSegment8_t segment = BRIG_SEGMENT_GLOBAL; | |
220 | if (brig_inst.opcode == BRIG_OPCODE_LDA) | |
221 | segment = ((const BrigInstAddr &) brig_inst).segment; | |
222 | else if (brig_inst.base.kind == BRIG_KIND_INST_MEM) | |
223 | segment = ((const BrigInstMem &) brig_inst).segment; | |
224 | else if (brig_inst.base.kind == BRIG_KIND_INST_ATOMIC) | |
225 | segment = ((const BrigInstAtomic &) brig_inst).segment; | |
226 | ||
227 | tree var_offset = NULL_TREE; | |
228 | tree const_offset = NULL_TREE; | |
229 | tree symbol_base = NULL_TREE; | |
230 | ||
231 | if (addr_operand.symbol != 0) | |
232 | { | |
233 | const BrigDirectiveVariable *arg_symbol | |
234 | = (const BrigDirectiveVariable *) m_parent.get_brig_code_entry | |
235 | (addr_operand.symbol); | |
236 | ||
237 | std::string var_name = m_parent.get_mangled_name (arg_symbol); | |
238 | ||
239 | if (segment == BRIG_SEGMENT_KERNARG) | |
240 | { | |
241 | /* Find the offset to the kernarg buffer for the given | |
242 | kernel argument variable. */ | |
243 | tree func = m_parent.m_cf->m_func_decl; | |
244 | /* __args is the first parameter in kernel functions. */ | |
245 | symbol_base = DECL_ARGUMENTS (func); | |
246 | uint64_t offset = m_parent.m_cf->kernel_arg_offset (arg_symbol); | |
247 | if (offset > 0) | |
248 | const_offset = build_int_cst (size_type_node, offset); | |
249 | } | |
250 | else if (segment == BRIG_SEGMENT_GROUP) | |
251 | { | |
d4b7f2ee PJ |
252 | uint64_t offset |
253 | = m_parent.m_cf->group_variable_segment_offset (var_name); | |
5fd1486c | 254 | const_offset = build_int_cst (size_type_node, offset); |
d4b7f2ee PJ |
255 | |
256 | /* If it's a local group variable reference, substract the local | |
257 | group segment offset to get the group base ptr offset. */ | |
258 | if (m_parent.m_cf->m_local_group_variables.has_variable (var_name)) | |
259 | const_offset | |
260 | = build2 (PLUS_EXPR, uint64_type_node, const_offset, | |
261 | convert (uint64_type_node, | |
262 | m_parent.m_cf->m_group_local_offset_arg)); | |
263 | ||
5fd1486c PJ |
264 | } |
265 | else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL) | |
266 | { | |
267 | uint32_t offset = m_parent.private_variable_segment_offset (var_name); | |
268 | ||
269 | /* Compute the offset to the work item's copy: | |
270 | ||
271 | single-wi-offset * local_size + wiflatid * varsize | |
272 | ||
273 | This way the work items have the same variable in | |
274 | successive elements to each other in the segment, | |
275 | helping to achieve autovectorization of loads/stores | |
276 | with stride 1. */ | |
277 | ||
278 | tree_stl_vec uint32_0 | |
279 | = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); | |
280 | ||
281 | tree_stl_vec uint32_1 | |
282 | = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); | |
283 | ||
284 | tree_stl_vec uint32_2 | |
285 | = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | |
286 | ||
287 | tree local_size | |
288 | = build2 (MULT_EXPR, uint32_type_node, | |
080dc243 PJ |
289 | m_parent.m_cf->expand_or_call_builtin |
290 | (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | |
291 | uint32_type_node, uint32_0), | |
292 | m_parent.m_cf->expand_or_call_builtin | |
293 | (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | |
294 | uint32_type_node, uint32_1)); | |
5fd1486c PJ |
295 | |
296 | local_size | |
297 | = build2 (MULT_EXPR, uint32_type_node, | |
080dc243 PJ |
298 | m_parent.m_cf->expand_or_call_builtin |
299 | (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | |
300 | uint32_type_node, uint32_2), | |
5fd1486c PJ |
301 | local_size); |
302 | ||
303 | tree var_region | |
304 | = build2 (MULT_EXPR, uint32_type_node, | |
305 | build_int_cst (uint32_type_node, offset), local_size); | |
306 | ||
307 | tree_stl_vec operands; | |
308 | tree pos | |
309 | = build2 (MULT_EXPR, uint32_type_node, | |
310 | build_int_cst (uint32_type_node, | |
311 | m_parent.private_variable_size (var_name)), | |
080dc243 PJ |
312 | m_parent.m_cf->expand_or_call_builtin |
313 | (BRIG_OPCODE_WORKITEMFLATID, BRIG_TYPE_U32, | |
314 | uint32_type_node, operands)); | |
5fd1486c PJ |
315 | |
316 | tree var_offset | |
317 | = build2 (PLUS_EXPR, uint32_type_node, var_region, pos); | |
318 | ||
319 | /* In case of LDA this is returned directly as an integer value. | |
320 | For other mem-related instructions, we will convert this segment | |
321 | offset to a flat address by adding it as an offset to a (private | |
322 | or group) base pointer later on. Same applies to group_var_offset. */ | |
323 | symbol_base | |
080dc243 PJ |
324 | = m_parent.m_cf->add_temp_var ("priv_var_offset", |
325 | convert (size_type_node, | |
326 | var_offset)); | |
5fd1486c PJ |
327 | } |
328 | else if (segment == BRIG_SEGMENT_ARG) | |
329 | { | |
330 | tree arg_var_decl; | |
331 | if (m_parent.m_cf->m_ret_value_brig_var == arg_symbol) | |
332 | arg_var_decl = m_parent.m_cf->m_ret_temp; | |
333 | else | |
334 | arg_var_decl = m_parent.m_cf->arg_variable (arg_symbol); | |
335 | ||
336 | gcc_assert (arg_var_decl != NULL_TREE); | |
337 | ||
338 | tree ptype = build_pointer_type (instr_type); | |
339 | ||
340 | if (arg_symbol->type & BRIG_TYPE_ARRAY) | |
341 | { | |
342 | ||
343 | /* Two different type of array references in case of arguments | |
344 | depending where they are referred at. In the caller (argument | |
345 | segment), the reference is to an array object and | |
346 | in the callee, the array object has been passed as a pointer | |
347 | to the array object. */ | |
348 | ||
349 | if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl))) | |
dc03239c | 350 | symbol_base = build_resize_convert_view (ptype, arg_var_decl); |
5fd1486c PJ |
351 | else |
352 | { | |
353 | /* In case we are referring to an array (the argument in | |
354 | call site), use its element zero as the base address. */ | |
355 | tree element_zero | |
356 | = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (arg_var_decl)), | |
357 | arg_var_decl, integer_zero_node, NULL_TREE, | |
358 | NULL_TREE); | |
359 | symbol_base = build1 (ADDR_EXPR, ptype, element_zero); | |
360 | } | |
361 | } | |
362 | else | |
363 | symbol_base = build1 (ADDR_EXPR, ptype, arg_var_decl); | |
364 | } | |
365 | else | |
366 | { | |
367 | tree global_var_decl = m_parent.global_variable (var_name); | |
368 | ||
369 | /* In case the global variable hasn't been defined (yet), | |
370 | use the host def indirection ptr variable. */ | |
371 | if (global_var_decl == NULL_TREE) | |
372 | { | |
373 | std::string host_ptr_name | |
374 | = std::string (PHSA_HOST_DEF_PTR_PREFIX) + var_name; | |
375 | tree host_defined_ptr = m_parent.global_variable (host_ptr_name); | |
376 | gcc_assert (host_defined_ptr != NULL_TREE); | |
377 | symbol_base = host_defined_ptr; | |
378 | } | |
379 | else | |
380 | { | |
381 | gcc_assert (global_var_decl != NULL_TREE); | |
382 | ||
383 | tree ptype = build_pointer_type (instr_type); | |
384 | symbol_base = build1 (ADDR_EXPR, ptype, global_var_decl); | |
385 | } | |
386 | } | |
387 | } | |
388 | ||
389 | if (brig_inst.opcode != BRIG_OPCODE_LDA) | |
390 | { | |
391 | /* In case of lda_* we want to return the segment address because it's | |
392 | used as a value, perhaps in address computation and later converted | |
393 | explicitly to a flat address. | |
394 | ||
395 | In case of other instructions with memory operands we produce the flat | |
396 | address directly here (assuming the target does not have a separate | |
397 | address space for group/private segments for now). */ | |
398 | if (segment == BRIG_SEGMENT_GROUP) | |
399 | symbol_base = m_parent.m_cf->m_group_base_arg; | |
400 | else if (segment == BRIG_SEGMENT_PRIVATE | |
401 | || segment == BRIG_SEGMENT_SPILL) | |
402 | { | |
403 | if (symbol_base != NULL_TREE) | |
404 | symbol_base = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
405 | m_parent.m_cf->m_private_base_arg, | |
406 | symbol_base); | |
407 | else | |
408 | symbol_base = m_parent.m_cf->m_private_base_arg; | |
409 | } | |
410 | } | |
411 | ||
412 | if (addr_operand.reg != 0) | |
413 | { | |
414 | const BrigOperandRegister *mem_base_reg | |
415 | = (const BrigOperandRegister *) m_parent.get_brig_operand_entry | |
416 | (addr_operand.reg); | |
417 | tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg); | |
dc03239c HL |
418 | tree as_uint = build_reinterpret_to_uint (base_reg_var); |
419 | var_offset = convert_to_pointer (ptr_type_node, as_uint); | |
5fd1486c PJ |
420 | |
421 | gcc_assert (var_offset != NULL_TREE); | |
422 | } | |
423 | /* The pointer type we use to access the memory. Should be of the | |
424 | width of the load/store instruction, not the target/data | |
425 | register. */ | |
426 | tree ptype = build_pointer_type (instr_type); | |
427 | ||
428 | gcc_assert (ptype != NULL_TREE); | |
429 | ||
430 | tree addr = NULL_TREE; | |
431 | if (symbol_base != NULL_TREE && var_offset != NULL_TREE) | |
432 | /* The most complex addressing mode: symbol + reg [+ const offset]. */ | |
433 | addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
434 | convert (ptr_type_node, symbol_base), | |
435 | convert (size_type_node, var_offset)); | |
436 | else if (var_offset != NULL) | |
437 | addr = var_offset; | |
438 | else if (symbol_base != NULL) | |
439 | addr = symbol_base; | |
440 | ||
441 | if (const_offset != NULL_TREE) | |
442 | { | |
443 | if (addr == NULL_TREE) | |
444 | /* At least direct module-scope global group symbol access with LDA | |
445 | has only the const_offset. Group base ptr is not added as LDA should | |
446 | return the segment address, not the flattened one. */ | |
447 | addr = const_offset; | |
448 | else | |
449 | addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
450 | addr, convert (size_type_node, const_offset)); | |
451 | } | |
452 | ||
453 | /* We might have two const offsets in case of group or private arrays | |
454 | which have the first offset to the incoming group/private pointer | |
a0deb992 PJ |
455 | arg, and the second one an offset to it. It's also legal to have |
456 | a reference with a zero constant offset but no symbol. I've seen | |
457 | codes that reference kernarg segment like this. Thus, if at this | |
458 | point there is no address expression at all we assume it's an | |
459 | access to offset 0. */ | |
5fd1486c | 460 | uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset); |
a0deb992 | 461 | if (offs > 0 || addr == NULL_TREE) |
5fd1486c | 462 | { |
c6e334cd PJ |
463 | /* In large mode, the offset is treated as 32bits unless it's |
464 | global, readonly or kernarg address space. | |
465 | See: | |
466 | http://www.hsafoundation.com/html_spec111/HSA_Library.htm | |
467 | #PRM/Topics/02_ProgModel/small_and_large_machine_models.htm | |
468 | #table_machine_model_data_sizes */ | |
469 | ||
470 | int is64b_offset = segment == BRIG_SEGMENT_GLOBAL | |
471 | || segment == BRIG_SEGMENT_READONLY | |
472 | || segment == BRIG_SEGMENT_KERNARG; | |
473 | ||
474 | /* The original offset is signed and should be sign | |
475 | extended for the pointer arithmetics. */ | |
476 | tree const_offset_2 = is64b_offset | |
477 | ? build_int_cst (size_type_node, offs) | |
478 | : convert (long_integer_type_node, | |
479 | build_int_cst (integer_type_node, offs)); | |
480 | ||
5fd1486c PJ |
481 | if (addr == NULL_TREE) |
482 | addr = const_offset_2; | |
483 | else | |
484 | addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, | |
c7488b4f PJ |
485 | /* Addr can be a constant offset in case this is |
486 | a private array access. */ | |
487 | convert (ptr_type_node, addr), | |
488 | convert (size_type_node, const_offset_2)); | |
5fd1486c PJ |
489 | } |
490 | ||
491 | gcc_assert (addr != NULL_TREE); | |
492 | return convert_to_pointer (ptype, addr); | |
493 | } | |
494 | ||
495 | /* Builds a tree operand with the given OPERAND_INDEX for the given | |
496 | BRIG_INST with the desired tree OPERAND_TYPE. OPERAND_TYPE can | |
497 | be NULL in case the type is forced by the BRIG_INST type. */ | |
498 | ||
499 | tree | |
500 | brig_code_entry_handler::build_tree_operand_from_brig | |
501 | (const BrigInstBase *brig_inst, tree operand_type, size_t operand_index) | |
502 | { | |
503 | const BrigData *operand_entries | |
504 | = m_parent.get_brig_data_entry (brig_inst->operands); | |
505 | ||
506 | uint32_t operand_offset | |
507 | = ((const uint32_t *) &operand_entries->bytes)[operand_index]; | |
508 | const BrigBase *operand_data | |
509 | = m_parent.get_brig_operand_entry (operand_offset); | |
dc03239c HL |
510 | |
511 | bool inputp = !gccbrig_hsa_opcode_op_output_p (brig_inst->opcode, | |
512 | operand_index); | |
513 | return build_tree_operand (*brig_inst, *operand_data, operand_type, inputp); | |
5fd1486c PJ |
514 | } |
515 | ||
516 | /* Builds a single (scalar) constant initialized element of type | |
517 | ELEMENT_TYPE from the buffer pointed to by NEXT_DATA. */ | |
518 | ||
519 | tree | |
520 | brig_code_entry_handler::build_tree_cst_element | |
521 | (BrigType16_t element_type, const unsigned char *next_data) const | |
522 | { | |
523 | ||
524 | tree tree_element_type = gccbrig_tree_type_for_hsa_type (element_type); | |
525 | ||
526 | tree cst; | |
527 | switch (element_type) | |
528 | { | |
529 | case BRIG_TYPE_F16: | |
530 | { | |
531 | HOST_WIDE_INT low = *(const uint16_t *) next_data; | |
532 | cst = build_int_cst (uint16_type_node, low); | |
533 | break; | |
534 | } | |
535 | case BRIG_TYPE_F32: | |
536 | { | |
537 | REAL_VALUE_TYPE val; | |
538 | ieee_single_format.decode (&ieee_single_format, &val, | |
539 | (const long *) next_data); | |
540 | cst = build_real (tree_element_type, val); | |
541 | break; | |
542 | } | |
543 | case BRIG_TYPE_F64: | |
544 | { | |
545 | long data[2]; | |
546 | data[0] = *(const uint32_t *) next_data; | |
547 | data[1] = *(const uint32_t *) (next_data + 4); | |
548 | REAL_VALUE_TYPE val; | |
549 | ieee_double_format.decode (&ieee_double_format, &val, data); | |
550 | cst = build_real (tree_element_type, val); | |
551 | break; | |
552 | } | |
553 | case BRIG_TYPE_S8: | |
554 | case BRIG_TYPE_S16: | |
555 | case BRIG_TYPE_S32: | |
556 | case BRIG_TYPE_S64: | |
557 | { | |
558 | HOST_WIDE_INT low = *(const int64_t *) next_data; | |
559 | cst = build_int_cst (tree_element_type, low); | |
560 | break; | |
561 | } | |
562 | case BRIG_TYPE_U8: | |
563 | case BRIG_TYPE_U16: | |
564 | case BRIG_TYPE_U32: | |
565 | case BRIG_TYPE_U64: | |
566 | { | |
567 | unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; | |
568 | cst = build_int_cstu (tree_element_type, low); | |
569 | break; | |
570 | } | |
571 | case BRIG_TYPE_SIG64: | |
572 | { | |
573 | unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; | |
574 | cst = build_int_cstu (uint64_type_node, low); | |
575 | break; | |
576 | } | |
577 | case BRIG_TYPE_SIG32: | |
578 | { | |
579 | unsigned HOST_WIDE_INT low = *(const uint64_t *) next_data; | |
580 | cst = build_int_cstu (uint32_type_node, low); | |
581 | break; | |
582 | } | |
583 | default: | |
584 | gcc_unreachable (); | |
585 | return NULL_TREE; | |
586 | } | |
587 | return cst; | |
588 | } | |
589 | ||
590 | /* Produce a tree constant type for the given BRIG constant (BRIG_CONST). | |
591 | TYPE should be the forced instruction type, otherwise the type is | |
592 | dictated by the BRIG_CONST. */ | |
593 | ||
594 | tree | |
595 | brig_code_entry_handler::get_tree_cst_for_hsa_operand | |
596 | (const BrigOperandConstantBytes *brig_const, tree type) const | |
597 | { | |
598 | const BrigData *data = m_parent.get_brig_data_entry (brig_const->bytes); | |
599 | ||
600 | tree cst = NULL_TREE; | |
601 | ||
602 | if (type == NULL_TREE) | |
603 | type = gccbrig_tree_type_for_hsa_type (brig_const->type); | |
604 | ||
605 | /* The type of a single (scalar) element inside an array, | |
606 | vector or an array of vectors. */ | |
607 | BrigType16_t scalar_element_type | |
608 | = brig_const->type & BRIG_TYPE_BASE_MASK; | |
609 | tree tree_element_type = type; | |
610 | ||
611 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
612 | ||
613 | if (TREE_CODE (type) == ARRAY_TYPE) | |
614 | tree_element_type = TREE_TYPE (type); | |
615 | ||
616 | size_t bytes_left = data->byteCount; | |
617 | const unsigned char *next_data = data->bytes; | |
618 | size_t scalar_element_size | |
619 | = gccbrig_hsa_type_bit_size (scalar_element_type) / BITS_PER_UNIT; | |
620 | ||
621 | while (bytes_left > 0) | |
622 | { | |
623 | if (VECTOR_TYPE_P (tree_element_type)) | |
624 | { | |
625 | /* In case of vector type elements (or sole vectors), | |
626 | create a vector ctor. */ | |
e112bba2 RS |
627 | size_t element_count |
628 | = gccbrig_type_vector_subparts (tree_element_type); | |
5fd1486c PJ |
629 | if (bytes_left < scalar_element_size * element_count) |
630 | fatal_error (UNKNOWN_LOCATION, | |
631 | "Not enough bytes left for the initializer " | |
cc0608e7 JJ |
632 | "(%lu need %lu).", (unsigned long) bytes_left, |
633 | (unsigned long) (scalar_element_size | |
634 | * element_count)); | |
5fd1486c PJ |
635 | |
636 | vec<constructor_elt, va_gc> *vec_els = NULL; | |
637 | for (size_t i = 0; i < element_count; ++i) | |
638 | { | |
639 | tree element | |
640 | = build_tree_cst_element (scalar_element_type, next_data); | |
641 | CONSTRUCTOR_APPEND_ELT (vec_els, NULL_TREE, element); | |
642 | bytes_left -= scalar_element_size; | |
643 | next_data += scalar_element_size; | |
644 | } | |
645 | cst = build_vector_from_ctor (tree_element_type, vec_els); | |
646 | } | |
647 | else | |
648 | { | |
649 | if (bytes_left < scalar_element_size) | |
650 | fatal_error (UNKNOWN_LOCATION, | |
651 | "Not enough bytes left for the initializer " | |
cc0608e7 JJ |
652 | "(%lu need %lu).", (unsigned long) bytes_left, |
653 | (unsigned long) scalar_element_size); | |
5fd1486c PJ |
654 | cst = build_tree_cst_element (scalar_element_type, next_data); |
655 | bytes_left -= scalar_element_size; | |
656 | next_data += scalar_element_size; | |
657 | } | |
658 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst); | |
659 | } | |
660 | ||
661 | if (TREE_CODE (type) == ARRAY_TYPE) | |
662 | return build_constructor (type, constructor_vals); | |
663 | else | |
664 | return cst; | |
665 | } | |
666 | ||
667 | /* Return the matching tree instruction arithmetics type for the | |
668 | given BRIG_TYPE. The aritmethics type is the one with which | |
669 | computation is done (in contrast to the storage type). F16 | |
670 | arithmetics type is emulated using F32 for now. */ | |
671 | ||
672 | tree | |
673 | brig_code_entry_handler::get_tree_expr_type_for_hsa_type | |
674 | (BrigType16_t brig_type) const | |
675 | { | |
676 | BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | |
677 | if (brig_inner_type == BRIG_TYPE_F16) | |
678 | { | |
679 | if (brig_inner_type == brig_type) | |
680 | return m_parent.s_fp32_type; | |
681 | size_t element_count = gccbrig_hsa_type_bit_size (brig_type) / 16; | |
682 | return build_vector_type (m_parent.s_fp32_type, element_count); | |
683 | } | |
684 | else | |
685 | return gccbrig_tree_type_for_hsa_type (brig_type); | |
686 | } | |
687 | ||
5fd1486c PJ |
688 | /* Return the correct GENERIC type for storing comparison results |
689 | of operand with the type given in SOURCE_TYPE. */ | |
690 | ||
691 | tree | |
692 | brig_code_entry_handler::get_comparison_result_type (tree source_type) | |
693 | { | |
694 | if (VECTOR_TYPE_P (source_type)) | |
695 | { | |
696 | size_t element_size = int_size_in_bytes (TREE_TYPE (source_type)); | |
697 | return build_vector_type | |
698 | (build_nonstandard_boolean_type (element_size * BITS_PER_UNIT), | |
e112bba2 | 699 | gccbrig_type_vector_subparts (source_type)); |
5fd1486c PJ |
700 | } |
701 | else | |
702 | return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1); | |
703 | } | |
704 | ||
5fd1486c PJ |
705 | /* Creates a FP32 to FP16 conversion call, assuming the source and destination |
706 | are FP32 type variables. */ | |
707 | ||
708 | tree | |
709 | brig_code_entry_handler::build_f2h_conversion (tree source) | |
710 | { | |
711 | return float_to_half () (*this, source); | |
712 | } | |
713 | ||
714 | /* Creates a FP16 to FP32 conversion call, assuming the source and destination | |
715 | are FP32 type variables. */ | |
716 | ||
717 | tree | |
718 | brig_code_entry_handler::build_h2f_conversion (tree source) | |
719 | { | |
720 | return half_to_float () (*this, source); | |
721 | } | |
722 | ||
723 | /* Builds and "normalizes" the dest and source operands for the instruction | |
724 | execution; converts the input operands to the expected instruction type, | |
725 | performs half to float conversions, constant to correct type variable, | |
726 | and flush to zero (if applicable). */ | |
727 | ||
728 | tree_stl_vec | |
729 | brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst) | |
dc03239c HL |
730 | { |
731 | return build_or_analyze_operands (brig_inst, false); | |
732 | } | |
733 | ||
734 | void | |
735 | brig_code_entry_handler::analyze_operands (const BrigInstBase &brig_inst) | |
736 | { | |
737 | build_or_analyze_operands (brig_inst, true); | |
738 | } | |
739 | ||
740 | /* Implements both the build_operands () and analyze_operands () call | |
741 | so changes go in tandem. Performs build_operands () when ANALYZE | |
742 | is false. Otherwise, only analyze operands and return empty | |
743 | list. | |
744 | ||
745 | If analyzing record each HSA register operand with the | |
746 | corresponding resolved operand tree type to | |
747 | brig_to_generic::m_fn_regs_use_index. */ | |
748 | ||
749 | tree_stl_vec | |
750 | brig_code_entry_handler:: | |
751 | build_or_analyze_operands (const BrigInstBase &brig_inst, bool analyze) | |
5fd1486c PJ |
752 | { |
753 | /* Flush to zero. */ | |
754 | bool ftz = false; | |
755 | const BrigBase *base = &brig_inst.base; | |
756 | ||
757 | if (base->kind == BRIG_KIND_INST_MOD) | |
758 | { | |
759 | const BrigInstMod *mod = (const BrigInstMod *) base; | |
760 | ftz = mod->modifier & BRIG_ALU_FTZ; | |
761 | } | |
762 | else if (base->kind == BRIG_KIND_INST_CMP) | |
763 | { | |
764 | const BrigInstCmp *cmp = (const BrigInstCmp *) base; | |
765 | ftz = cmp->modifier & BRIG_ALU_FTZ; | |
766 | } | |
767 | ||
768 | bool is_vec_instr = hsa_type_packed_p (brig_inst.type); | |
769 | ||
770 | size_t element_count; | |
771 | if (is_vec_instr) | |
772 | { | |
773 | BrigType16_t brig_element_type = brig_inst.type & BRIG_TYPE_BASE_MASK; | |
774 | element_count = gccbrig_hsa_type_bit_size (brig_inst.type) | |
775 | / gccbrig_hsa_type_bit_size (brig_element_type); | |
776 | } | |
777 | else | |
778 | element_count = 1; | |
779 | ||
780 | bool is_fp16_arith = false; | |
781 | ||
782 | tree src_type; | |
783 | tree dest_type; | |
784 | if (base->kind == BRIG_KIND_INST_CMP) | |
785 | { | |
786 | const BrigInstCmp *cmp_inst = (const BrigInstCmp *) base; | |
787 | src_type = gccbrig_tree_type_for_hsa_type (cmp_inst->sourceType); | |
788 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
789 | is_fp16_arith | |
790 | = (cmp_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; | |
791 | } | |
792 | else if (base->kind == BRIG_KIND_INST_SOURCE_TYPE) | |
793 | { | |
794 | const BrigInstSourceType *src_type_inst | |
795 | = (const BrigInstSourceType *) base; | |
796 | src_type = gccbrig_tree_type_for_hsa_type (src_type_inst->sourceType); | |
797 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
798 | is_fp16_arith | |
799 | = (src_type_inst->sourceType & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 | |
800 | && !gccbrig_is_bit_operation (brig_inst.opcode); | |
801 | } | |
802 | else if (base->kind == BRIG_KIND_INST_SEG_CVT) | |
803 | { | |
804 | const BrigInstSegCvt *seg_cvt_inst = (const BrigInstSegCvt *) base; | |
805 | src_type = gccbrig_tree_type_for_hsa_type (seg_cvt_inst->sourceType); | |
806 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
807 | } | |
808 | else if (base->kind == BRIG_KIND_INST_MEM) | |
809 | { | |
810 | src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
811 | dest_type = src_type; | |
812 | /* With mem instructions we don't want to cast the fp16 | |
813 | back and forth between fp32, because the load/stores | |
814 | are not specific to the data type. */ | |
815 | is_fp16_arith = false; | |
816 | } | |
817 | else if (base->kind == BRIG_KIND_INST_CVT) | |
818 | { | |
819 | const BrigInstCvt *cvt_inst = (const BrigInstCvt *) base; | |
820 | ||
821 | src_type = gccbrig_tree_type_for_hsa_type (cvt_inst->sourceType); | |
822 | dest_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
823 | } | |
824 | else | |
825 | { | |
826 | switch (brig_inst.opcode) | |
827 | { | |
828 | case BRIG_OPCODE_INITFBAR: | |
829 | case BRIG_OPCODE_JOINFBAR: | |
830 | case BRIG_OPCODE_WAITFBAR: | |
831 | case BRIG_OPCODE_ARRIVEFBAR: | |
832 | case BRIG_OPCODE_LEAVEFBAR: | |
833 | case BRIG_OPCODE_RELEASEFBAR: | |
834 | src_type = uint32_type_node; | |
835 | break; | |
836 | default: | |
837 | src_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
838 | break; | |
839 | } | |
840 | dest_type = src_type; | |
841 | is_fp16_arith | |
842 | = !gccbrig_is_bit_operation (brig_inst.opcode) | |
843 | && (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16; | |
844 | } | |
845 | ||
846 | /* Halfs are a tricky special case: their "storage format" is u16, but | |
847 | scalars are stored in 32b regs while packed f16 are... well packed. */ | |
848 | tree half_storage_type = element_count > 1 | |
849 | ? gccbrig_tree_type_for_hsa_type (brig_inst.type) | |
850 | : uint32_type_node; | |
851 | ||
852 | const BrigData *operand_entries | |
853 | = m_parent.get_brig_data_entry (brig_inst.operands); | |
854 | std::vector<tree> operands; | |
855 | for (size_t i = 0; i < operand_entries->byteCount / 4; ++i) | |
856 | { | |
857 | uint32_t operand_offset = ((const uint32_t *) &operand_entries->bytes)[i]; | |
858 | const BrigBase *operand_data | |
859 | = m_parent.get_brig_operand_entry (operand_offset); | |
860 | ||
861 | const bool is_output | |
862 | = gccbrig_hsa_opcode_op_output_p (brig_inst.opcode, i); | |
863 | ||
864 | tree operand_type = is_output ? dest_type : src_type; | |
865 | ||
866 | bool half_to_float = is_fp16_arith; | |
867 | ||
868 | /* Special cases for operand types. */ | |
869 | if ((brig_inst.opcode == BRIG_OPCODE_SHL | |
870 | || brig_inst.opcode == BRIG_OPCODE_SHR) | |
871 | && i == 2) | |
872 | /* The shift amount is always a scalar. */ | |
873 | operand_type | |
874 | = VECTOR_TYPE_P (src_type) ? TREE_TYPE (src_type) : src_type; | |
875 | else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) | |
876 | { | |
877 | if (i == 3) | |
878 | /* HSAIL shuffle inputs the MASK vector as tightly packed bits | |
879 | while GENERIC VEC_PERM_EXPR expects the mask elements to be | |
880 | of the same size as the elements in the input vectors. Let's | |
881 | cast to a scalar type here and convert to the VEC_PERM_EXPR | |
882 | format in instruction handling. There are no arbitrary bit | |
883 | width int types in GENERIC so we cannot use the original | |
884 | vector type. */ | |
885 | operand_type = uint32_type_node; | |
886 | else | |
887 | /* Always treat the element as unsigned ints to avoid | |
888 | sign extensions/negative offsets with masks, which | |
889 | are expected to be of the same element type as the | |
890 | data in VEC_PERM_EXPR. With shuffles the data type | |
891 | should not matter as it's a "raw operation". */ | |
892 | operand_type = get_unsigned_int_type (operand_type); | |
893 | } | |
894 | else if (brig_inst.opcode == BRIG_OPCODE_PACK) | |
895 | { | |
896 | if (i == 1) | |
897 | operand_type = get_unsigned_int_type (dest_type); | |
898 | else if (i == 2) | |
899 | operand_type = get_unsigned_int_type (TREE_TYPE (dest_type)); | |
900 | else if (i == 3) | |
901 | operand_type = uint32_type_node; | |
902 | } | |
903 | else if (brig_inst.opcode == BRIG_OPCODE_UNPACK && i == 2) | |
904 | operand_type = uint32_type_node; | |
905 | else if (brig_inst.opcode == BRIG_OPCODE_SAD && i == 3) | |
906 | operand_type = uint32_type_node; | |
907 | else if (brig_inst.opcode == BRIG_OPCODE_CLASS && i == 2) | |
908 | { | |
909 | operand_type = uint32_type_node; | |
910 | half_to_float = false; | |
911 | } | |
c6e334cd PJ |
912 | else if (brig_inst.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE && i == 4) |
913 | { | |
914 | operand_type = uint32_type_node; | |
915 | } | |
5fd1486c PJ |
916 | else if (half_to_float) |
917 | /* Treat the operands as the storage type at this point. */ | |
918 | operand_type = half_storage_type; | |
919 | ||
dc03239c HL |
920 | if (analyze) |
921 | { | |
922 | if (operand_data->kind == BRIG_KIND_OPERAND_REGISTER) | |
923 | { | |
924 | const BrigOperandRegister &brig_reg | |
925 | = (const BrigOperandRegister &) *operand_data; | |
926 | m_parent.add_reg_used_as_type (brig_reg, operand_type); | |
927 | } | |
928 | continue; | |
929 | } | |
930 | ||
5fd1486c PJ |
931 | tree operand = build_tree_operand (brig_inst, *operand_data, operand_type, |
932 | !is_output); | |
5fd1486c PJ |
933 | gcc_assert (operand); |
934 | ||
935 | /* Cast/convert the inputs to correct types as expected by the GENERIC | |
936 | opcode instruction. */ | |
937 | if (!is_output) | |
938 | { | |
939 | if (half_to_float) | |
940 | operand = build_h2f_conversion | |
dc03239c | 941 | (build_resize_convert_view (half_storage_type, operand)); |
5fd1486c PJ |
942 | else if (TREE_CODE (operand) != LABEL_DECL |
943 | && TREE_CODE (operand) != TREE_VEC | |
944 | && operand_data->kind != BRIG_KIND_OPERAND_ADDRESS | |
dc03239c | 945 | && operand_data->kind != BRIG_KIND_OPERAND_OPERAND_LIST) |
5fd1486c | 946 | { |
dc03239c | 947 | operand = build_resize_convert_view (operand_type, operand); |
5fd1486c PJ |
948 | } |
949 | else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE) | |
950 | /* Force the operand type to be treated as the raw type. */ | |
dc03239c | 951 | operand = build_resize_convert_view (operand_type, operand); |
5fd1486c PJ |
952 | |
953 | if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1) | |
954 | { | |
955 | /* gcc expects the lower bit to be 1 (or all ones in case of | |
956 | vectors) while CMOV assumes false iff 0. Convert the input | |
957 | here to what gcc likes by generating | |
958 | 'operand = operand != 0'. */ | |
959 | tree cmp_res_type = get_comparison_result_type (operand_type); | |
960 | operand = build2 (NE_EXPR, cmp_res_type, operand, | |
961 | build_zero_cst (TREE_TYPE (operand))); | |
962 | } | |
963 | ||
964 | if (ftz) | |
965 | operand = flush_to_zero (is_fp16_arith) (*this, operand); | |
966 | } | |
967 | operands.push_back (operand); | |
968 | } | |
969 | return operands; | |
970 | } | |
971 | ||
972 | /* Build the GENERIC for assigning the result of an instruction to the result | |
973 | "register" (variable). BRIG_INST is the original brig instruction, | |
974 | OUTPUT the result variable/register, INST_EXPR the one producing the | |
975 | result. Required bitcasts and fp32 to fp16 conversions are added as | |
976 | well. */ | |
977 | ||
978 | tree | |
979 | brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst, | |
980 | tree output, tree inst_expr) | |
981 | { | |
dc03239c HL |
982 | /* The result/input type might be different from the output register |
983 | variable type (can be any type; see get_m_var_declfor_reg @ | |
984 | brig-function.cc). */ | |
5fd1486c | 985 | tree output_type = TREE_TYPE (output); |
5fd1486c PJ |
986 | bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16 |
987 | && brig_inst.base.kind != BRIG_KIND_INST_MEM | |
988 | && !gccbrig_is_bit_operation (brig_inst.opcode); | |
989 | ||
990 | /* Flush to zero. */ | |
991 | bool ftz = false; | |
992 | const BrigBase *base = &brig_inst.base; | |
993 | ||
080dc243 PJ |
994 | if (m_parent.m_cf->is_id_val (inst_expr)) |
995 | inst_expr = m_parent.m_cf->id_val (inst_expr); | |
996 | ||
997 | tree input_type = TREE_TYPE (inst_expr); | |
998 | ||
999 | m_parent.m_cf->add_reg_var_update (output, inst_expr); | |
1000 | ||
5fd1486c PJ |
1001 | if (base->kind == BRIG_KIND_INST_MOD) |
1002 | { | |
1003 | const BrigInstMod *mod = (const BrigInstMod *) base; | |
1004 | ftz = mod->modifier & BRIG_ALU_FTZ; | |
1005 | } | |
1006 | else if (base->kind == BRIG_KIND_INST_CMP) | |
1007 | { | |
1008 | const BrigInstCmp *cmp = (const BrigInstCmp *) base; | |
1009 | ftz = cmp->modifier & BRIG_ALU_FTZ; | |
1010 | } | |
1011 | ||
1012 | if (TREE_CODE (inst_expr) == CALL_EXPR) | |
1013 | { | |
1014 | tree func_decl = TREE_OPERAND (TREE_OPERAND (inst_expr, 1), 0); | |
1015 | input_type = TREE_TYPE (TREE_TYPE (func_decl)); | |
1016 | } | |
1017 | ||
1018 | if (ftz && (VECTOR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) | |
1019 | || SCALAR_FLOAT_TYPE_P (TREE_TYPE (inst_expr)) || is_fp16)) | |
1020 | { | |
1021 | /* Ensure we don't duplicate the arithmetics to the arguments of the bit | |
1022 | field reference operators. */ | |
080dc243 | 1023 | inst_expr = m_parent.m_cf->add_temp_var ("before_ftz", inst_expr); |
5fd1486c PJ |
1024 | inst_expr = flush_to_zero (is_fp16) (*this, inst_expr); |
1025 | } | |
1026 | ||
1027 | if (is_fp16) | |
1028 | { | |
080dc243 | 1029 | inst_expr = m_parent.m_cf->add_temp_var ("before_f2h", inst_expr); |
5fd1486c | 1030 | tree f2h_output = build_f2h_conversion (inst_expr); |
dc03239c HL |
1031 | tree conv = build_resize_convert_view (output_type, f2h_output); |
1032 | tree assign = build2 (MODIFY_EXPR, output_type, output, conv); | |
5fd1486c PJ |
1033 | m_parent.m_cf->append_statement (assign); |
1034 | return assign; | |
1035 | } | |
dc03239c | 1036 | else if (VECTOR_TYPE_P (output_type) && TREE_CODE (output) == CONSTRUCTOR) |
5fd1486c PJ |
1037 | { |
1038 | /* Expand/unpack the input value to the given vector elements. */ | |
1039 | size_t i; | |
1040 | tree input = inst_expr; | |
1041 | tree element_type = gccbrig_tree_type_for_hsa_type (brig_inst.type); | |
1042 | tree element; | |
1043 | tree last_assign = NULL_TREE; | |
1044 | FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (output), i, element) | |
1045 | { | |
1046 | tree element_ref | |
1047 | = build3 (BIT_FIELD_REF, element_type, input, | |
1048 | TYPE_SIZE (element_type), | |
8c058905 HL |
1049 | bitsize_int (i * int_size_in_bytes (element_type) |
1050 | * BITS_PER_UNIT)); | |
5fd1486c PJ |
1051 | |
1052 | last_assign | |
1053 | = build_output_assignment (brig_inst, element, element_ref); | |
1054 | } | |
1055 | return last_assign; | |
1056 | } | |
1057 | else | |
1058 | { | |
1059 | /* All we do here is to bitcast the result and store it to the | |
1060 | 'register' (variable). Mainly need to take care of differing | |
1061 | bitwidths. */ | |
1062 | size_t src_width = int_size_in_bytes (input_type); | |
1063 | size_t dst_width = int_size_in_bytes (output_type); | |
dc03239c HL |
1064 | tree input = inst_expr; |
1065 | /* Integer results are extended to the target register width, using | |
1066 | the same sign as the inst_expr. */ | |
1067 | if (INTEGRAL_TYPE_P (TREE_TYPE (input)) && src_width != dst_width) | |
5fd1486c | 1068 | { |
dc03239c HL |
1069 | bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (input)); |
1070 | tree resized_type | |
1071 | = build_nonstandard_integer_type (dst_width * BITS_PER_UNIT, | |
1072 | unsigned_p); | |
1073 | input = convert_to_integer (resized_type, input); | |
5fd1486c | 1074 | } |
dc03239c HL |
1075 | input = build_resize_convert_view (output_type, input); |
1076 | tree assign = build2 (MODIFY_EXPR, output_type, output, input); | |
1077 | m_parent.m_cf->append_statement (assign); | |
1078 | return assign; | |
5fd1486c PJ |
1079 | } |
1080 | return NULL_TREE; | |
1081 | } | |
1082 | ||
1083 | /* Appends a GENERIC statement (STMT) to the currently constructed function. */ | |
1084 | ||
1085 | void | |
1086 | brig_code_entry_handler::append_statement (tree stmt) | |
1087 | { | |
1088 | m_parent.m_cf->append_statement (stmt); | |
1089 | } | |
1090 | ||
5fd1486c PJ |
1091 | /* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */ |
1092 | ||
1093 | tree | |
1094 | tree_element_unary_visitor::operator () (brig_code_entry_handler &handler, | |
1095 | tree operand) | |
1096 | { | |
1097 | if (VECTOR_TYPE_P (TREE_TYPE (operand))) | |
1098 | { | |
1099 | size_t vec_size = int_size_in_bytes (TREE_TYPE (operand)); | |
1100 | size_t element_size = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand))); | |
1101 | size_t element_count = vec_size / element_size; | |
1102 | ||
1103 | tree input_element_type = TREE_TYPE (TREE_TYPE (operand)); | |
1104 | tree output_element_type = NULL_TREE; | |
1105 | ||
1106 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1107 | for (size_t i = 0; i < element_count; ++i) | |
1108 | { | |
1109 | tree element = build3 (BIT_FIELD_REF, input_element_type, operand, | |
1110 | TYPE_SIZE (input_element_type), | |
8c058905 HL |
1111 | bitsize_int (i * element_size |
1112 | * BITS_PER_UNIT)); | |
5fd1486c PJ |
1113 | |
1114 | tree output = visit_element (handler, element); | |
1115 | output_element_type = TREE_TYPE (output); | |
1116 | ||
1117 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output); | |
1118 | } | |
1119 | ||
1120 | tree vec_type = build_vector_type (output_element_type, element_count); | |
1121 | ||
1122 | /* build_constructor creates a vector type which is not a vector_cst | |
1123 | that requires compile time constant elements. */ | |
1124 | tree vec = build_constructor (vec_type, constructor_vals); | |
1125 | ||
1126 | /* Add a temp variable for readability. */ | |
1127 | tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1128 | tree vec_tmp_assign | |
1129 | = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1130 | handler.append_statement (vec_tmp_assign); | |
1131 | return tmp_var; | |
1132 | } | |
1133 | else | |
1134 | return visit_element (handler, operand); | |
1135 | } | |
1136 | ||
1137 | /* Visits the element pair(s) in the OPERAND0 and OPERAND1, calling HANDLER | |
1138 | to each of them. */ | |
1139 | ||
1140 | tree | |
1141 | tree_element_binary_visitor::operator () (brig_code_entry_handler &handler, | |
1142 | tree operand0, tree operand1) | |
1143 | { | |
1144 | if (VECTOR_TYPE_P (TREE_TYPE (operand0))) | |
1145 | { | |
1146 | gcc_assert (VECTOR_TYPE_P (TREE_TYPE (operand1))); | |
1147 | size_t vec_size = int_size_in_bytes (TREE_TYPE (operand0)); | |
1148 | size_t element_size | |
1149 | = int_size_in_bytes (TREE_TYPE (TREE_TYPE (operand0))); | |
1150 | size_t element_count = vec_size / element_size; | |
1151 | ||
1152 | tree input_element_type = TREE_TYPE (TREE_TYPE (operand0)); | |
1153 | tree output_element_type = NULL_TREE; | |
1154 | ||
1155 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1156 | for (size_t i = 0; i < element_count; ++i) | |
1157 | { | |
1158 | ||
1159 | tree element0 = build3 (BIT_FIELD_REF, input_element_type, operand0, | |
1160 | TYPE_SIZE (input_element_type), | |
8c058905 HL |
1161 | bitsize_int (i * element_size |
1162 | * BITS_PER_UNIT)); | |
5fd1486c PJ |
1163 | |
1164 | tree element1 = build3 (BIT_FIELD_REF, input_element_type, operand1, | |
1165 | TYPE_SIZE (input_element_type), | |
8c058905 HL |
1166 | bitsize_int (i * element_size |
1167 | * BITS_PER_UNIT)); | |
5fd1486c PJ |
1168 | |
1169 | tree output = visit_element (handler, element0, element1); | |
1170 | output_element_type = TREE_TYPE (output); | |
1171 | ||
1172 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, output); | |
1173 | } | |
1174 | ||
1175 | tree vec_type = build_vector_type (output_element_type, element_count); | |
1176 | ||
1177 | /* build_constructor creates a vector type which is not a vector_cst | |
1178 | that requires compile time constant elements. */ | |
1179 | tree vec = build_constructor (vec_type, constructor_vals); | |
1180 | ||
1181 | /* Add a temp variable for readability. */ | |
1182 | tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1183 | tree vec_tmp_assign | |
1184 | = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1185 | handler.append_statement (vec_tmp_assign); | |
1186 | return tmp_var; | |
1187 | } | |
1188 | else | |
1189 | return visit_element (handler, operand0, operand1); | |
1190 | } | |
1191 | ||
1192 | /* Generates GENERIC code that flushes the visited element to zero. */ | |
1193 | ||
1194 | tree | |
1195 | flush_to_zero::visit_element (brig_code_entry_handler &, tree operand) | |
1196 | { | |
1197 | size_t size = int_size_in_bytes (TREE_TYPE (operand)); | |
1198 | if (size == 4) | |
1199 | { | |
1200 | tree built_in | |
1201 | = (m_fp16) ? builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32_F16) : | |
1202 | builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F32); | |
1203 | ||
1204 | return call_builtin (built_in, 1, float_type_node, float_type_node, | |
1205 | operand); | |
1206 | } | |
1207 | else if (size == 8) | |
1208 | { | |
1209 | return call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_FTZ_F64), 1, | |
1210 | double_type_node, double_type_node, operand); | |
1211 | } | |
1212 | else | |
1213 | gcc_unreachable (); | |
1214 | return NULL_TREE; | |
1215 | } | |
1216 | ||
1217 | /* Generates GENERIC code that converts a single precision float to half | |
1218 | precision float. */ | |
1219 | ||
1220 | tree | |
1221 | float_to_half::visit_element (brig_code_entry_handler &caller, tree operand) | |
1222 | { | |
1223 | tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16); | |
1224 | ||
dc03239c | 1225 | tree casted_operand = build_resize_convert_view (uint32_type_node, operand); |
5fd1486c PJ |
1226 | |
1227 | tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node, | |
1228 | casted_operand); | |
1229 | tree output | |
1230 | = create_tmp_var (TREE_TYPE (TREE_TYPE (built_in)), "fp16out"); | |
1231 | tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, call); | |
1232 | caller.append_statement (assign); | |
1233 | return output; | |
1234 | } | |
1235 | ||
1236 | /* Generates GENERIC code that converts a half precision float to single | |
1237 | precision float. */ | |
1238 | ||
1239 | tree | |
1240 | half_to_float::visit_element (brig_code_entry_handler &caller, tree operand) | |
1241 | { | |
1242 | tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F16_TO_F32); | |
1243 | tree truncated_source = convert_to_integer (uint16_type_node, operand); | |
1244 | ||
1245 | tree call | |
1246 | = call_builtin (built_in, 1, uint32_type_node, uint16_type_node, | |
1247 | truncated_source); | |
1248 | ||
1249 | tree const_fp32_type | |
1250 | = build_type_variant (brig_to_generic::s_fp32_type, 1, 0); | |
1251 | ||
1252 | tree output = create_tmp_var (const_fp32_type, "fp32out"); | |
1253 | tree casted_result | |
dc03239c | 1254 | = build_resize_convert_view (brig_to_generic::s_fp32_type, call); |
5fd1486c PJ |
1255 | |
1256 | tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result); | |
1257 | ||
1258 | caller.append_statement (assign); | |
1259 | ||
1260 | return output; | |
1261 | } | |
1262 | ||
1263 | /* Treats the INPUT as SRC_TYPE and sign or zero extends it to DEST_TYPE. */ | |
1264 | ||
1265 | tree | |
1266 | brig_code_entry_handler::extend_int (tree input, tree dest_type, tree src_type) | |
1267 | { | |
1268 | /* Extend integer conversions according to the destination's | |
1269 | ext mode. First we need to clip the input register to | |
1270 | the possible smaller integer size to ensure the correct sign | |
1271 | bit is extended. */ | |
1272 | tree clipped_input = convert_to_integer (src_type, input); | |
1273 | tree conversion_result; | |
1274 | ||
1275 | if (TYPE_UNSIGNED (src_type)) | |
1276 | conversion_result | |
1277 | = convert_to_integer (unsigned_type_for (dest_type), clipped_input); | |
1278 | else | |
1279 | conversion_result | |
1280 | = convert_to_integer (signed_type_for (dest_type), clipped_input); | |
1281 | ||
1282 | /* Treat the result as unsigned so we do not sign extend to the | |
1283 | register width. For some reason this GENERIC sequence sign | |
1284 | extends to the s register: | |
1285 | ||
1286 | D.1541 = (signed char) s1; | |
1287 | D.1542 = (signed short) D.1541; | |
1288 | s0 = (unsigned int) D.1542 | |
1289 | */ | |
1290 | ||
1291 | /* The converted result is then extended to the target register | |
1292 | width, using the same sign as the destination. */ | |
1293 | return convert_to_integer (dest_type, conversion_result); | |
1294 | } | |
1295 | ||
1296 | /* Returns the integer constant value of the given node. | |
1297 | If it's a cast, looks into the source of the cast. */ | |
1298 | HOST_WIDE_INT | |
1299 | brig_code_entry_handler::int_constant_value (tree node) | |
1300 | { | |
1301 | tree n = node; | |
1302 | if (TREE_CODE (n) == VIEW_CONVERT_EXPR) | |
1303 | n = TREE_OPERAND (n, 0); | |
1304 | return int_cst_value (n); | |
1305 | } |