]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/brig/brigfrontend/brig-basic-inst-handler.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / brig / brigfrontend / brig-basic-inst-handler.cc
1 /* brig-basic-inst-handler.cc -- brig basic instruction handling
2 Copyright (C) 2016-2019 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23
24 #include "brig-code-entry-handler.h"
25 #include "brig-util.h"
26
27 #include "errors.h"
28 #include "gimple-expr.h"
29 #include "convert.h"
30 #include "print-tree.h"
31 #include "tree-pretty-print.h"
32 #include "langhooks.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-builtins.h"
36 #include "fold-const.h"
37
38 brig_basic_inst_handler::brig_basic_inst_handler (brig_to_generic &parent)
39 : brig_code_entry_handler (parent)
40 {
41 }
42
43 class scalarized_sat_arithmetics : public tree_element_binary_visitor
44 {
45 public:
46 scalarized_sat_arithmetics (const BrigInstBase &brig_inst)
47 : m_brig_inst (brig_inst)
48 {
49 BrigType16_t element_type = brig_inst.type & BRIG_TYPE_BASE_MASK;
50
51 #undef DEF_HSAIL_SAT_BUILTIN
52 #undef DEF_HSAIL_BUILTIN
53 #undef DEF_HSAIL_ATOMIC_BUILTIN
54 #undef DEF_HSAIL_INTR_BUILTIN
55 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
56
57 #define DEF_HSAIL_SAT_BUILTIN(ENUM, BRIG_OPCODE, HSAIL_TYPE, \
58 NAME, TYPE, ATTRS) \
59 if (brig_inst.opcode == BRIG_OPCODE && element_type == HSAIL_TYPE) \
60 m_builtin = builtin_decl_explicit (ENUM); \
61 else
62 #include "brig-builtins.def"
63 gcc_unreachable ();
64 }
65
66 virtual tree
67 visit_element (brig_code_entry_handler &, tree operand0, tree operand1)
68 {
69 /* Implement saturating arithmetics with scalar built-ins for now.
70 TODO: emit GENERIC nodes for the simplest cases or at least
71 emit vector built-ins. */
72 return call_builtin (m_builtin, 2, TREE_TYPE (operand0),
73 TREE_TYPE (operand0), operand0,
74 TREE_TYPE (operand1), operand1);
75 }
76 const BrigInstBase &m_brig_inst;
77 tree m_builtin;
78 };
79
80 /* Implements a vector shuffle. ARITH_TYPE is the type of the vector,
81 OPERANDS[0] is the first vector, OPERAND[1] the second vector and
82 OPERANDS[2] the shuffle mask in HSAIL format. The output is a VEC_PERM_EXPR
83 that implements the shuffle as a GENERIC expression. */
84
85 tree
86 brig_basic_inst_handler::build_shuffle (tree arith_type,
87 tree_stl_vec &operands)
88 {
89 tree element_type
90 = get_unsigned_int_type (TREE_TYPE (TREE_TYPE (operands[0])));
91
92 /* Offsets to add to the mask values to convert from the
93 HSAIL mask to VEC_PERM_EXPR masks. VEC_PERM_EXPR mask
94 assumes an index spanning from 0 to 2 times the vec
95 width while HSAIL refers separately to two different
96 input vectors, thus is not a "full shuffle" where all
97 output elements can originate from any input element. */
98 vec<constructor_elt, va_gc> *mask_offset_vals = NULL;
99
100 unsigned int element_count = gccbrig_type_vector_subparts (arith_type);
101
102 vec<constructor_elt, va_gc> *input_mask_vals = NULL;
103 size_t input_mask_element_size = exact_log2 (element_count);
104
105 /* Unpack the tightly packed mask elements to BIT_FIELD_REFs
106 from which to construct the mask vector as understood by
107 VEC_PERM_EXPR. */
108 tree mask_operand
109 = m_parent.m_cf->add_temp_var ("shuffle_mask", operands[2]);
110
111 tree mask_element_type
112 = build_nonstandard_integer_type (input_mask_element_size, true);
113
114 for (size_t i = 0; i < element_count; ++i)
115 {
116 tree mask_element
117 = build3 (BIT_FIELD_REF, mask_element_type, mask_operand,
118 bitsize_int (input_mask_element_size),
119 bitsize_int (i * input_mask_element_size));
120
121 mask_element = convert (element_type, mask_element);
122
123 tree offset;
124 if (i < element_count / 2)
125 offset = build_int_cst (element_type, 0);
126 else
127 offset = build_int_cst (element_type, element_count);
128
129 CONSTRUCTOR_APPEND_ELT (mask_offset_vals, NULL_TREE, offset);
130 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, mask_element);
131 }
132 tree mask_vec_type = build_vector_type (element_type, element_count);
133
134 tree mask_vec = build_constructor (mask_vec_type, input_mask_vals);
135 tree offset_vec = build_constructor (mask_vec_type, mask_offset_vals);
136
137 tree mask = build2 (PLUS_EXPR, mask_vec_type, mask_vec, offset_vec);
138
139 tree perm = build3 (VEC_PERM_EXPR, TREE_TYPE (operands[0]), operands[0],
140 operands[1], mask);
141 return perm;
142 }
143
144 /* Unpacks (extracts) a scalar element with an index in OPERANDS[1]
145 from the vector expression in OPERANDS[0]. */
146
147 tree
148 brig_basic_inst_handler::build_unpack (tree_stl_vec &operands)
149 {
150 /* Implement the unpack with a shuffle that stores the unpacked
151 element to the lowest bit positions in the dest. After that
152 a bitwise AND is used to clear the uppermost bits. */
153 tree src_element_type = TREE_TYPE (TREE_TYPE (operands[0]));
154
155 /* Perform the operations with a raw (unsigned int type) type. */
156 tree element_type = get_unsigned_int_type (src_element_type);
157
158 vec<constructor_elt, va_gc> *input_mask_vals = NULL;
159 vec<constructor_elt, va_gc> *and_mask_vals = NULL;
160
161 size_t element_count
162 = gccbrig_type_vector_subparts (TREE_TYPE (operands[0]));
163 tree vec_type = build_vector_type (element_type, element_count);
164
165 for (size_t i = 0; i < element_count; ++i)
166 {
167 tree mask_element;
168 if (i == 0)
169 mask_element = convert (element_type, operands[1]);
170 else
171 mask_element = build_int_cst (element_type, 0);
172
173 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, mask_element);
174
175 tree and_mask_element;
176 if (i == 0)
177 and_mask_element = build_int_cst (element_type, -1);
178 else
179 and_mask_element = build_int_cst (element_type, 0);
180 CONSTRUCTOR_APPEND_ELT (and_mask_vals, NULL_TREE, and_mask_element);
181 }
182
183 tree mask_vec = build_constructor (vec_type, input_mask_vals);
184
185 tree and_mask_vec = build_constructor (vec_type, and_mask_vals);
186
187 tree perm = build3 (VEC_PERM_EXPR, vec_type,
188 build_resize_convert_view (vec_type, operands[0]),
189 build_resize_convert_view (vec_type, operands[0]),
190 mask_vec);
191
192 tree cleared = build2 (BIT_AND_EXPR, vec_type, perm, and_mask_vec);
193
194 size_t s = int_size_in_bytes (TREE_TYPE (cleared)) * BITS_PER_UNIT;
195 tree raw_type = build_nonstandard_integer_type (s, true);
196
197 tree as_int = build_resize_convert_view (raw_type, cleared);
198
199 if (int_size_in_bytes (src_element_type) < 4)
200 {
201 if (INTEGRAL_TYPE_P (src_element_type))
202 return extend_int (as_int, uint32_type_node, src_element_type);
203 }
204 return as_int;
205 }
206
207 /* Packs (inserts) a scalar element in OPERANDS[1]
208 to the vector in OPERANDS[0] at element position defined by
209 OPERANDS[2]. */
210
211 tree
212 brig_basic_inst_handler::build_pack (tree_stl_vec &operands)
213 {
214 /* Implement using a bit level insertion.
215 TODO: Reuse this for implementing 'bitinsert'
216 without a builtin call. */
217
218 size_t ecount = gccbrig_type_vector_subparts (TREE_TYPE (operands[0]));
219 size_t vecsize = int_size_in_bytes (TREE_TYPE (operands[0])) * BITS_PER_UNIT;
220 tree wide_type = build_nonstandard_integer_type (vecsize, 1);
221
222 tree src_vect = build_resize_convert_view (wide_type, operands[0]);
223 src_vect = m_parent.m_cf->add_temp_var ("src_vect", src_vect);
224
225 tree scalar = operands[1];
226 scalar = m_parent.m_cf->add_temp_var ("scalar",
227 convert_to_integer (wide_type, scalar));
228
229 tree pos = operands[2];
230
231 /* The upper bits of the position can contain garbage.
232 Zero them for well-defined semantics. */
233 tree t = build2 (BIT_AND_EXPR, TREE_TYPE (pos), operands[2],
234 build_int_cstu (TREE_TYPE (pos), ecount - 1));
235 pos = m_parent.m_cf->add_temp_var ("pos", convert (wide_type, t));
236
237 tree element_type = TREE_TYPE (TREE_TYPE (operands[0]));
238 size_t element_width = int_size_in_bytes (element_type) * BITS_PER_UNIT;
239 tree ewidth = build_int_cstu (wide_type, element_width);
240
241 tree bitoffset = build2 (MULT_EXPR, wide_type, ewidth, pos);
242 bitoffset = m_parent.m_cf->add_temp_var ("offset", bitoffset);
243
244 uint64_t mask_int
245 = element_width == 64 ? (uint64_t) -1 : ((uint64_t) 1 << element_width) - 1;
246
247 tree mask = build_int_cstu (wide_type, mask_int);
248
249 mask = m_parent.m_cf->add_temp_var ("mask",
250 convert_to_integer (wide_type, mask));
251
252 tree clearing_mask
253 = build1 (BIT_NOT_EXPR, wide_type,
254 build2 (LSHIFT_EXPR, wide_type, mask, bitoffset));
255
256 tree zeroed_element
257 = build2 (BIT_AND_EXPR, wide_type, src_vect, clearing_mask);
258
259 /* TODO: Is the AND necessary: does HSA define what
260 happens if the upper bits in the inserted element are not
261 zero? */
262 tree element_in_position
263 = build2 (LSHIFT_EXPR, wide_type,
264 build2 (BIT_AND_EXPR, wide_type, scalar, mask), bitoffset);
265
266 tree inserted
267 = build2 (BIT_IOR_EXPR, wide_type, zeroed_element, element_in_position);
268 return inserted;
269 }
270
271 /* Implement the unpack{lo,hi}. BRIG_OPCODE should tell which one and
272 ARITH_TYPE describe the type of the vector arithmetics.
273 OPERANDS[0] and OPERANDS[1] are the input vectors. */
274
275 tree
276 brig_basic_inst_handler::build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode,
277 tree arith_type,
278 tree_stl_vec &operands)
279 {
280 tree element_type = get_unsigned_int_type (TREE_TYPE (arith_type));
281 tree mask_vec_type
282 = build_vector_type (element_type,
283 gccbrig_type_vector_subparts (arith_type));
284
285 size_t element_count = gccbrig_type_vector_subparts (arith_type);
286 vec<constructor_elt, va_gc> *input_mask_vals = NULL;
287
288 size_t offset = (brig_opcode == BRIG_OPCODE_UNPACKLO) ? 0 : element_count / 2;
289
290 for (size_t i = 0; i < element_count / 2; ++i)
291 {
292 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE,
293 build_int_cst (element_type, offset + i));
294 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE,
295 build_int_cst (element_type,
296 offset + i + element_count));
297 }
298
299 tree mask_vec = build_constructor (mask_vec_type, input_mask_vals);
300
301 tree perm = build3 (VEC_PERM_EXPR, TREE_TYPE (operands[0]), operands[0],
302 operands[1], mask_vec);
303 return perm;
304 }
305
306 /* Builds a basic instruction expression from a BRIG instruction. BRIG_OPCODE
307 is the opcode, BRIG_TYPE the brig type of the instruction, ARITH_TYPE the
308 desired tree type for the instruction, and OPERANDS the instruction's
309 input operands already converted to tree nodes. */
310
311 tree
312 brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode,
313 BrigType16_t brig_type,
314 tree arith_type,
315 tree_stl_vec &operands)
316 {
317 tree_code opcode
318 = brig_function::get_tree_code_for_hsa_opcode (brig_opcode, brig_type);
319
320 BrigType16_t inner_type = brig_type & BRIG_TYPE_BASE_MASK;
321
322 tree instr_inner_type
323 = VECTOR_TYPE_P (arith_type) ? TREE_TYPE (arith_type) : arith_type;
324
325 if (opcode == RSHIFT_EXPR || opcode == LSHIFT_EXPR)
326 {
327 /* HSA defines modulo/clipping behavior for shift amounts larger
328 than the bit width, while tree.def leaves it undefined.
329 We need to mask the upper bits to ensure the defined behavior. */
330 tree scalar_mask
331 = build_int_cst (instr_inner_type,
332 gccbrig_hsa_type_bit_size (inner_type) - 1);
333
334 tree mask = VECTOR_TYPE_P (arith_type)
335 ? build_vector_from_val (arith_type, scalar_mask)
336 : scalar_mask;
337
338 /* The shift amount is a scalar, broadcast it to produce
339 a vector shift. */
340 if (VECTOR_TYPE_P (arith_type))
341 operands[1] = build_vector_from_val (arith_type, operands[1]);
342 operands[1] = build2 (BIT_AND_EXPR, arith_type, operands[1], mask);
343 }
344
345 size_t input_count = operands.size ();
346 size_t output_count = gccbrig_hsa_opcode_op_output_p (brig_opcode, 0) ?
347 1 : 0;
348
349 if (opcode == TREE_LIST)
350 {
351 /* There was no direct GENERIC opcode for the instruction;
352 try to emulate it with a chain of GENERIC nodes. */
353 if (brig_opcode == BRIG_OPCODE_MAD || brig_opcode == BRIG_OPCODE_MAD24)
354 {
355 /* There doesn't seem to be a "standard" MAD built-in in gcc so let's
356 use a chain of multiply + add for now (double rounding method).
357 It should be easier for optimizers than a custom built-in call
358 WIDEN_MULT_EXPR is close, but requires a double size result
359 type. */
360 tree mult_res
361 = build2 (MULT_EXPR, arith_type, operands[0], operands[1]);
362 return build2 (PLUS_EXPR, arith_type, mult_res, operands[2]);
363 }
364 else if (brig_opcode == BRIG_OPCODE_MAD24HI)
365 {
366 tree mult_res
367 = build2 (MULT_HIGHPART_EXPR, arith_type, operands[0], operands[1]);
368 return build2 (PLUS_EXPR, arith_type, mult_res, operands[2]);
369 }
370 else if (brig_opcode == BRIG_OPCODE_SHUFFLE)
371 {
372 return build_shuffle (arith_type, operands);
373 }
374 else if (brig_opcode == BRIG_OPCODE_UNPACKLO
375 || brig_opcode == BRIG_OPCODE_UNPACKHI)
376 {
377 return build_unpack_lo_or_hi (brig_opcode, arith_type, operands);
378 }
379 else if (brig_opcode == BRIG_OPCODE_UNPACK)
380 {
381 return build_unpack (operands);
382 }
383 else if (brig_opcode == BRIG_OPCODE_PACK)
384 {
385 return build_pack (operands);
386 }
387 else if (brig_opcode == BRIG_OPCODE_NRSQRT)
388 {
389 /* Implement as 1.0/sqrt (x) and assume gcc instruction selects to
390 native ISA other than a division, if available.
391 TODO: this will happen only with unsafe math optimizations
392 on which cannot be used in general to remain HSAIL compliant.
393 Perhaps a builtin call would be better option here. */
394 return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type),
395 m_parent.m_cf->expand_or_call_builtin
396 (BRIG_OPCODE_SQRT, brig_type, arith_type, operands));
397 }
398 else if (brig_opcode == BRIG_OPCODE_NRCP)
399 {
400 /* Implement as 1.0/x and assume gcc instruction selects to
401 native ISA other than a division, if available. */
402 return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type),
403 operands[0]);
404 }
405 else if (brig_opcode == BRIG_OPCODE_LANEID
406 || brig_opcode == BRIG_OPCODE_MAXWAVEID
407 || brig_opcode == BRIG_OPCODE_WAVEID)
408 {
409 /* Assuming WAVESIZE 1 (for now), therefore LANEID, WAVEID and
410 MAXWAVEID always return 0. */
411 return build_zero_cst (arith_type);
412 }
413 else
414 gcc_unreachable ();
415 }
416 else if (opcode == CALL_EXPR)
417 return m_parent.m_cf->expand_or_call_builtin (brig_opcode, brig_type,
418 arith_type, operands);
419 else if (output_count == 1)
420 {
421 if (input_count == 1)
422 {
423 if (opcode == MODIFY_EXPR)
424 return operands[0];
425 else
426 return build1 (opcode, arith_type, operands[0]);
427 }
428 else if (input_count == 2)
429 return build2 (opcode, arith_type, operands[0], operands[1]);
430 else if (input_count == 3)
431 return build3 (opcode, arith_type, operands[0], operands[1],
432 operands[2]);
433 else
434 gcc_unreachable ();
435 }
436 else
437 gcc_unreachable ();
438
439 return NULL_TREE;
440 }
441
442 /* Handles the basic instructions, including packed instructions. Deals
443 with the different packing modes by unpacking/packing the wanted
444 elements. Delegates most of the instruction cases to build_inst_expr(). */
445
446 size_t
447 brig_basic_inst_handler::operator () (const BrigBase *base)
448 {
449 const BrigInstBase *brig_inst = (const BrigInstBase *) base;
450 if (brig_inst->opcode == BRIG_OPCODE_NOP)
451 return base->byteCount;
452
453 tree_stl_vec operands = build_operands (*brig_inst);
454
455 size_t output_count
456 = gccbrig_hsa_opcode_op_output_p (brig_inst->opcode, 0) ? 1 : 0;
457 size_t input_count
458 = operands.size () == 0 ? 0 : (operands.size () - output_count);
459
460 gcc_assert (output_count == 0 || output_count == 1);
461
462 tree_stl_vec::iterator first_input_i = operands.begin ();
463 if (output_count > 0 && operands.size () > 0)
464 ++first_input_i;
465
466 tree_stl_vec in_operands;
467 in_operands.assign (first_input_i, operands.end ());
468
469 BrigType16_t brig_inst_type = brig_inst->type;
470
471 if (brig_inst->opcode == BRIG_OPCODE_FIRSTBIT
472 || brig_inst->opcode == BRIG_OPCODE_LASTBIT
473 || brig_inst->opcode == BRIG_OPCODE_SAD)
474 /* These instructions are reported to be always 32b in HSAIL, but we want
475 to treat them according to their input argument's type to select the
476 correct instruction/builtin. */
477 brig_inst_type
478 = gccbrig_tree_type_to_hsa_type (TREE_TYPE (in_operands[0]));
479
480 tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst_type);
481
482 if (!instr_type)
483 {
484 gcc_unreachable ();
485 return base->byteCount;
486 }
487
488 bool is_vec_instr = hsa_type_packed_p (brig_inst_type);
489
490 size_t element_size_bits;
491 size_t element_count;
492
493 if (is_vec_instr)
494 {
495 BrigType16_t brig_element_type = brig_inst_type & BRIG_TYPE_BASE_MASK;
496 element_size_bits = gccbrig_hsa_type_bit_size (brig_element_type);
497 element_count = gccbrig_hsa_type_bit_size (brig_inst_type)
498 / gccbrig_hsa_type_bit_size (brig_element_type);
499 }
500 else
501 {
502 element_size_bits = gccbrig_hsa_type_bit_size (brig_inst_type);
503 element_count = 1;
504 }
505
506 /* The actual arithmetics type that should be performed with the
507 operation. This is not always the same as the original BRIG
508 opcode's type due to implicit conversions of storage-only f16. */
509 tree arith_type = gccbrig_is_bit_operation (brig_inst->opcode)
510 ? gccbrig_tree_type_for_hsa_type (brig_inst_type)
511 : get_tree_expr_type_for_hsa_type (brig_inst_type);
512
513 tree instr_expr = NULL_TREE;
514
515 BrigPack8_t p = BRIG_PACK_NONE;
516 if (brig_inst->base.kind == BRIG_KIND_INST_MOD)
517 p = ((const BrigInstMod *) brig_inst)->pack;
518 else if (brig_inst->base.kind == BRIG_KIND_INST_CMP)
519 p = ((const BrigInstCmp *) brig_inst)->pack;
520
521 if (p == BRIG_PACK_PS || p == BRIG_PACK_PSSAT)
522 in_operands[1] = build_lower_element_broadcast (in_operands[1]);
523 else if (p == BRIG_PACK_SP || p == BRIG_PACK_SPSAT)
524 in_operands[0] = build_lower_element_broadcast (in_operands[0]);
525
526 tree_code opcode
527 = brig_function::get_tree_code_for_hsa_opcode (brig_inst->opcode,
528 brig_inst_type);
529
530 if (p >= BRIG_PACK_PPSAT && p <= BRIG_PACK_PSAT)
531 {
532 scalarized_sat_arithmetics sat_arith (*brig_inst);
533 gcc_assert (input_count == 2);
534 instr_expr = sat_arith (*this, in_operands[0], in_operands[1]);
535 }
536 else if (opcode == RETURN_EXPR)
537 {
538 if (m_parent.m_cf->m_is_kernel)
539 {
540 tree goto_stmt
541 = build1 (GOTO_EXPR, void_type_node, m_parent.m_cf->m_exit_label);
542 m_parent.m_cf->append_statement (goto_stmt);
543 return base->byteCount;
544 }
545 else
546 {
547 m_parent.m_cf->append_return_stmt ();
548 return base->byteCount;
549 }
550 }
551 else if (opcode == MULT_HIGHPART_EXPR &&
552 is_vec_instr && element_size_bits < 64)
553 {
554 /* MULT_HIGHPART_EXPR works only on target dependent vector sizes and
555 even the scalars do not seem to work at least for char elements.
556
557 Let's fall back to scalarization and promotion of the vector elements
558 to larger types with the MULHI computed as a regular MUL.
559 MULHI for 2x64b seems to work with the Intel CPUs I've tested so
560 that is passed on for vector processing so there is no need for
561 128b scalar arithmetics.
562
563 This is not modular as these type of things do not belong to the
564 frontend, there should be a legalization phase before the backend
565 that figures out the best way to compute the MULHI for any
566 integer vector datatype.
567
568 TODO: promote to larger vector types instead. For example
569 MULT_HIGHPART_EXPR with s8x8 doesn't work, but s16x8 seems to at least
570 with my x86-64.
571 */
572 tree_stl_vec operand0_elements;
573 if (input_count > 0)
574 m_parent.m_cf->unpack (in_operands[0], operand0_elements);
575
576 tree_stl_vec operand1_elements;
577 if (input_count > 1)
578 m_parent.m_cf->unpack (in_operands[1], operand1_elements);
579
580 tree_stl_vec result_elements;
581
582 tree scalar_type = TREE_TYPE (arith_type);
583 BrigType16_t element_type = brig_inst_type & BRIG_TYPE_BASE_MASK;
584 tree promoted_type = short_integer_type_node;
585 switch (element_type)
586 {
587 case BRIG_TYPE_S8:
588 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S16);
589 break;
590 case BRIG_TYPE_U8:
591 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16);
592 break;
593 case BRIG_TYPE_S16:
594 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S32);
595 break;
596 case BRIG_TYPE_U16:
597 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U32);
598 break;
599 case BRIG_TYPE_S32:
600 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S64);
601 break;
602 case BRIG_TYPE_U32:
603 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64);
604 break;
605 default:
606 gcc_unreachable ();
607 }
608
609 size_t promoted_type_size = int_size_in_bytes (promoted_type) * 8;
610 size_t element_count = gccbrig_type_vector_subparts (arith_type);
611 for (size_t i = 0; i < element_count; ++i)
612 {
613 tree operand0 = convert (promoted_type, operand0_elements.at (i));
614 tree operand1 = convert (promoted_type, operand1_elements.at (i));
615
616 tree scalar_expr
617 = build2 (MULT_EXPR, promoted_type, operand0, operand1);
618
619 scalar_expr
620 = build2 (RSHIFT_EXPR, promoted_type, scalar_expr,
621 build_int_cstu (promoted_type, promoted_type_size / 2));
622
623 result_elements.push_back (convert (scalar_type, scalar_expr));
624 }
625 instr_expr = m_parent.m_cf->pack (result_elements);
626 }
627 else
628 {
629 /* 'class' is always of b1 type, let's consider it by its
630 float type when building the instruction to find the
631 correct builtin. */
632 if (brig_inst->opcode == BRIG_OPCODE_CLASS)
633 brig_inst_type = ((const BrigInstSourceType *) base)->sourceType;
634 instr_expr = build_inst_expr (brig_inst->opcode, brig_inst_type,
635 arith_type, in_operands);
636 }
637
638 if (instr_expr == NULL_TREE)
639 {
640 gcc_unreachable ();
641 return base->byteCount;
642 }
643
644 if (p == BRIG_PACK_SS || p == BRIG_PACK_S || p == BRIG_PACK_SSSAT
645 || p == BRIG_PACK_SSAT)
646 {
647 /* In case of _s_ or _ss_, select only the lowest element
648 from the new input to the output. We could extract
649 the element and use a scalar operation, but try
650 to keep data in vector registers as much as possible
651 to avoid copies between scalar and vector datapaths. */
652 tree old_value;
653 tree half_storage_type = gccbrig_tree_type_for_hsa_type (brig_inst_type);
654 bool is_fp16_operation
655 = (brig_inst_type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
656 && !gccbrig_is_bit_operation (brig_inst->opcode);
657
658 if (is_fp16_operation)
659 old_value = build_h2f_conversion
660 (build_resize_convert_view (half_storage_type, operands[0]));
661 else
662 old_value
663 = build_resize_convert_view (TREE_TYPE (instr_expr), operands[0]);
664
665 size_t esize = is_fp16_operation ? 32 : element_size_bits;
666
667 /* Construct a permutation mask where other elements than the lowest one
668 is picked from the old_value. */
669 tree mask_inner_type = build_nonstandard_integer_type (esize, 1);
670 vec<constructor_elt, va_gc> *constructor_vals = NULL;
671 for (size_t i = 0; i < element_count; ++i)
672 {
673 tree cst;
674
675 if (i == 0)
676 cst = build_int_cstu (mask_inner_type, element_count);
677 else
678 cst = build_int_cstu (mask_inner_type, i);
679 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst);
680 }
681 tree mask_vec_type = build_vector_type (mask_inner_type, element_count);
682 tree mask = build_vector_from_ctor (mask_vec_type, constructor_vals);
683
684 tree new_value = create_tmp_var (TREE_TYPE (instr_expr), "new_output");
685 tree assign
686 = build2 (MODIFY_EXPR, TREE_TYPE (instr_expr), new_value, instr_expr);
687 m_parent.m_cf->append_statement (assign);
688
689 instr_expr
690 = build3 (VEC_PERM_EXPR, arith_type, old_value, new_value, mask);
691
692 tree lower_output = create_tmp_var (TREE_TYPE (instr_expr), "s_output");
693 tree assign_lower = build2 (MODIFY_EXPR, TREE_TYPE (instr_expr),
694 lower_output, instr_expr);
695 m_parent.m_cf->append_statement (assign_lower);
696 instr_expr = lower_output;
697 }
698
699 if (output_count == 1)
700 build_output_assignment (*brig_inst, operands[0], instr_expr);
701 else
702 m_parent.m_cf->append_statement (instr_expr);
703 return base->byteCount;
704 }
705
706 /* Create an expression that broadcasts the lowest element of the
707 vector in VEC_OPERAND to all elements of the returned vector. */
708
709 tree
710 brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand)
711 {
712 /* Build the broadcast using shuffle because there's no
713 direct broadcast in GENERIC and this way there's no need for
714 a separate extract of the lowest element. */
715 tree element_type = TREE_TYPE (TREE_TYPE (vec_operand));
716 size_t esize = 8 * int_size_in_bytes (element_type);
717
718 size_t element_count
719 = gccbrig_type_vector_subparts (TREE_TYPE (vec_operand));
720 tree mask_inner_type = build_nonstandard_integer_type (esize, 1);
721 vec<constructor_elt, va_gc> *constructor_vals = NULL;
722
723 /* Construct the mask. */
724 for (size_t i = 0; i < element_count; ++i)
725 {
726 tree cst = build_int_cstu (mask_inner_type, element_count);
727 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst);
728 }
729 tree mask_vec_type = build_vector_type (mask_inner_type, element_count);
730 tree mask = build_vector_from_ctor (mask_vec_type, constructor_vals);
731
732 return build3 (VEC_PERM_EXPR, TREE_TYPE (vec_operand), vec_operand,
733 vec_operand, mask);
734 }
735