]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-generic.c
gimple-walk.h: New File.
[thirdparty/gcc.git] / gcc / tree-vect-generic.c
CommitLineData
2b725155 1/* Lower vector operations to scalar operations.
d1e082c2 2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
2b725155
RH
3
4This file is part of GCC.
b8698a0f 5
2b725155
RH
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
2b725155 9later version.
b8698a0f 10
2b725155
RH
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
2b725155 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
2b725155
RH
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tree.h"
24#include "tm.h"
2b725155 25#include "langhooks.h"
45b0be94 26#include "gimplify.h"
5be5c238 27#include "gimple-iterator.h"
442b4905
AM
28#include "gimple-ssa.h"
29#include "tree-cfg.h"
30#include "tree-ssanames.h"
2b725155
RH
31#include "tree-iterator.h"
32#include "tree-pass.h"
33#include "flags.h"
34#include "ggc.h"
f90e8e2e 35#include "diagnostic.h"
0fcc85cd 36#include "target.h"
2b725155 37
2eb79bbb
SB
38/* Need to include rtl.h, expr.h, etc. for optabs. */
39#include "expr.h"
40#include "optabs.h"
2b725155 41
d246ab4f
AS
42
43static void expand_vector_operations_1 (gimple_stmt_iterator *);
44
45
2b725155
RH
46/* Build a constant of type TYPE, made of VALUE's bits replicated
47 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
48static tree
49build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
50{
51 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
52 int n = HOST_BITS_PER_WIDE_INT / width;
53 unsigned HOST_WIDE_INT low, high, mask;
54 tree ret;
55
56 gcc_assert (n);
57
58 if (width == HOST_BITS_PER_WIDE_INT)
59 low = value;
60 else
61 {
62 mask = ((HOST_WIDE_INT)1 << width) - 1;
63 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
64 }
65
66 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
67 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
68 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
69 high = 0;
49ab6098 70 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
2b725155
RH
71 high = low;
72 else
73 gcc_unreachable ();
74
75 ret = build_int_cst_wide (type, low, high);
76 return ret;
77}
78
79static GTY(()) tree vector_inner_type;
80static GTY(()) tree vector_last_type;
81static GTY(()) int vector_last_nunits;
82
83/* Return a suitable vector types made of SUBPARTS units each of mode
84 "word_mode" (the global variable). */
85static tree
86build_word_mode_vector_type (int nunits)
87{
88 if (!vector_inner_type)
89 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
90 else if (vector_last_nunits == nunits)
91 {
92 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
93 return vector_last_type;
94 }
95
96 /* We build a new type, but we canonicalize it nevertheless,
97 because it still saves some memory. */
98 vector_last_nunits = nunits;
99 vector_last_type = type_hash_canon (nunits,
100 build_vector_type (vector_inner_type,
101 nunits));
102 return vector_last_type;
103}
104
726a989a 105typedef tree (*elem_op_func) (gimple_stmt_iterator *,
2b725155
RH
106 tree, tree, tree, tree, tree, enum tree_code);
107
108static inline tree
726a989a 109tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
2b725155
RH
110 tree t, tree bitsize, tree bitpos)
111{
112 if (bitpos)
726a989a 113 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
2b725155 114 else
726a989a 115 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
2b725155
RH
116}
117
118static tree
726a989a 119do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
2b725155
RH
120 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
121 enum tree_code code)
122{
726a989a
RB
123 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
124 return gimplify_build1 (gsi, code, inner_type, a);
2b725155
RH
125}
126
127static tree
726a989a 128do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
2b725155
RH
129 tree bitpos, tree bitsize, enum tree_code code)
130{
362235e9
RG
131 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
132 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
133 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
134 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
726a989a 135 return gimplify_build2 (gsi, code, inner_type, a, b);
2b725155
RH
136}
137
d246ab4f
AS
138/* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
139
140 INNER_TYPE is the type of A and B elements
141
142 returned expression is of signed integer type with the
143 size equal to the size of INNER_TYPE. */
144static tree
145do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
146 tree bitpos, tree bitsize, enum tree_code code)
147{
148 tree comp_type;
149
150 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
151 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
152
153 comp_type = build_nonstandard_integer_type
154 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
155
156 return gimplify_build3 (gsi, COND_EXPR, comp_type,
157 fold_build2 (code, boolean_type_node, a, b),
158 build_int_cst (comp_type, -1),
159 build_int_cst (comp_type, 0));
160}
161
2b725155
RH
162/* Expand vector addition to scalars. This does bit twiddling
163 in order to increase parallelism:
164
165 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
166 (a ^ b) & 0x80808080
167
168 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
169 (a ^ ~b) & 0x80808080
170
171 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
172
173 This optimization should be done only if 4 vector items or more
174 fit into a word. */
175static tree
726a989a 176do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
2b725155
RH
177 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
178 enum tree_code code)
179{
180 tree inner_type = TREE_TYPE (TREE_TYPE (a));
181 unsigned HOST_WIDE_INT max;
182 tree low_bits, high_bits, a_low, b_low, result_low, signs;
183
184 max = GET_MODE_MASK (TYPE_MODE (inner_type));
185 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
186 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
187
726a989a
RB
188 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
189 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
2b725155 190
726a989a
RB
191 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
192 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
2b725155 193 if (code == PLUS_EXPR)
726a989a 194 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
2b725155
RH
195 else
196 {
726a989a
RB
197 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
198 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
2b725155
RH
199 }
200
726a989a
RB
201 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
202 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
203 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
2b725155
RH
204}
205
206static tree
726a989a 207do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
2b725155
RH
208 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
209 tree bitsize ATTRIBUTE_UNUSED,
210 enum tree_code code ATTRIBUTE_UNUSED)
211{
212 tree inner_type = TREE_TYPE (TREE_TYPE (b));
213 HOST_WIDE_INT max;
214 tree low_bits, high_bits, b_low, result_low, signs;
215
216 max = GET_MODE_MASK (TYPE_MODE (inner_type));
217 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
218 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
219
726a989a 220 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
2b725155 221
726a989a
RB
222 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
223 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
224 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
225 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
226 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
2b725155
RH
227}
228
229/* Expand a vector operation to scalars, by using many operations
230 whose type is the vector type's inner type. */
231static tree
726a989a 232expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
2b725155
RH
233 tree type, tree inner_type,
234 tree a, tree b, enum tree_code code)
235{
9771b263 236 vec<constructor_elt, va_gc> *v;
2b725155
RH
237 tree part_width = TYPE_SIZE (inner_type);
238 tree index = bitsize_int (0);
239 int nunits = TYPE_VECTOR_SUBPARTS (type);
240 int delta = tree_low_cst (part_width, 1)
241 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
242 int i;
cdbb5ba3
AS
243 location_t loc = gimple_location (gsi_stmt (*gsi));
244
245 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
246 warning_at (loc, OPT_Wvector_operation_performance,
247 "vector operation will be expanded piecewise");
248 else
249 warning_at (loc, OPT_Wvector_operation_performance,
250 "vector operation will be expanded in parallel");
2b725155 251
9771b263 252 vec_alloc (v, (nunits + delta - 1) / delta);
2b725155 253 for (i = 0; i < nunits;
d35936ab 254 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
2b725155 255 {
726a989a 256 tree result = f (gsi, inner_type, a, b, index, part_width, code);
f32682ca 257 constructor_elt ce = {NULL_TREE, result};
9771b263 258 v->quick_push (ce);
2b725155
RH
259 }
260
4038c495 261 return build_constructor (type, v);
2b725155
RH
262}
263
264/* Expand a vector operation to scalars with the freedom to use
265 a scalar integer type, or to use a different size for the items
266 in the vector type. */
267static tree
726a989a 268expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
2b725155
RH
269 tree a, tree b,
270 enum tree_code code)
271{
272 tree result, compute_type;
273 enum machine_mode mode;
274 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
cdbb5ba3 275 location_t loc = gimple_location (gsi_stmt (*gsi));
2b725155
RH
276
277 /* We have three strategies. If the type is already correct, just do
278 the operation an element at a time. Else, if the vector is wider than
279 one word, do it a word at a time; finally, if the vector is smaller
280 than one word, do it as a scalar. */
281 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
726a989a 282 return expand_vector_piecewise (gsi, f,
2b725155
RH
283 type, TREE_TYPE (type),
284 a, b, code);
285 else if (n_words > 1)
286 {
287 tree word_type = build_word_mode_vector_type (n_words);
726a989a 288 result = expand_vector_piecewise (gsi, f,
2b725155
RH
289 word_type, TREE_TYPE (word_type),
290 a, b, code);
726a989a
RB
291 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
292 GSI_SAME_STMT);
2b725155
RH
293 }
294 else
295 {
296 /* Use a single scalar operation with a mode no wider than word_mode. */
297 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
298 compute_type = lang_hooks.types.type_for_mode (mode, 1);
726a989a 299 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
cdbb5ba3
AS
300 warning_at (loc, OPT_Wvector_operation_performance,
301 "vector operation will be expanded with a "
302 "single scalar operation");
2b725155
RH
303 }
304
305 return result;
306}
307
308/* Expand a vector operation to scalars; for integer types we can use
309 special bit twiddling tricks to do the sums a word at a time, using
310 function F_PARALLEL instead of F. These tricks are done only if
311 they can process at least four items, that is, only if the vector
312 holds at least four items and if a word can hold four items. */
313static tree
726a989a 314expand_vector_addition (gimple_stmt_iterator *gsi,
2b725155
RH
315 elem_op_func f, elem_op_func f_parallel,
316 tree type, tree a, tree b, enum tree_code code)
317{
318 int parts_per_word = UNITS_PER_WORD
319 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
320
321 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
322 && parts_per_word >= 4
323 && TYPE_VECTOR_SUBPARTS (type) >= 4)
726a989a 324 return expand_vector_parallel (gsi, f_parallel,
2b725155
RH
325 type, a, b, code);
326 else
726a989a 327 return expand_vector_piecewise (gsi, f,
2b725155
RH
328 type, TREE_TYPE (type),
329 a, b, code);
330}
331
d246ab4f
AS
332/* Try to expand vector comparison expression OP0 CODE OP1 by
333 querying optab if the following expression:
334 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
335 can be expanded. */
336static tree
337expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
338 tree op1, enum tree_code code)
339{
340 tree t;
341 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
342 t = expand_vector_piecewise (gsi, do_compare, type,
343 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
344 else
345 t = NULL_TREE;
346
347 return t;
348}
349
4ee4c52c
JJ
350/* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
351 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
352 the result if successful, otherwise return NULL_TREE. */
353static tree
354add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
355{
356 optab op;
357 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
358 bool scalar_shift = true;
359
360 for (i = 1; i < nunits; i++)
361 {
362 if (shiftcnts[i] != shiftcnts[0])
363 scalar_shift = false;
364 }
365
366 if (scalar_shift && shiftcnts[0] == 0)
367 return op0;
368
369 if (scalar_shift)
370 {
371 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
2225b9f2 372 if (op != unknown_optab
4ee4c52c
JJ
373 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
374 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
375 build_int_cst (NULL_TREE, shiftcnts[0]));
376 }
377
378 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
2225b9f2 379 if (op != unknown_optab
4ee4c52c
JJ
380 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
381 {
382 tree *vec = XALLOCAVEC (tree, nunits);
383 for (i = 0; i < nunits; i++)
384 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
385 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
386 build_vector (type, vec));
387 }
388
389 return NULL_TREE;
390}
391
392/* Try to expand integer vector division by constant using
393 widening multiply, shifts and additions. */
394static tree
395expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
396 tree op1, enum tree_code code)
397{
398 bool use_pow2 = true;
399 bool has_vector_shift = true;
400 int mode = -1, this_mode;
401 int pre_shift = -1, post_shift;
402 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
403 int *shifts = XALLOCAVEC (int, nunits * 4);
404 int *pre_shifts = shifts + nunits;
405 int *post_shifts = pre_shifts + nunits;
406 int *shift_temps = post_shifts + nunits;
407 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
408 int prec = TYPE_PRECISION (TREE_TYPE (type));
409 int dummy_int;
410 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
411 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
4ee4c52c 412 tree *vec;
00f07b86
RH
413 tree cur_op, mulcst, tem;
414 optab op;
4ee4c52c
JJ
415
416 if (prec > HOST_BITS_PER_WIDE_INT)
417 return NULL_TREE;
418
419 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
2225b9f2 420 if (op == unknown_optab
4ee4c52c
JJ
421 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
422 has_vector_shift = false;
423
424 /* Analysis phase. Determine if all op1 elements are either power
425 of two and it is possible to expand it using shifts (or for remainder
426 using masking). Additionally compute the multiplicative constants
427 and pre and post shifts if the division is to be expanded using
428 widening or high part multiplication plus shifts. */
429 for (i = 0; i < nunits; i++)
430 {
431 tree cst = VECTOR_CST_ELT (op1, i);
432 unsigned HOST_WIDE_INT ml;
433
434 if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
435 return NULL_TREE;
436 pre_shifts[i] = 0;
437 post_shifts[i] = 0;
438 mulc[i] = 0;
439 if (use_pow2
440 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
441 use_pow2 = false;
442 if (use_pow2)
443 {
444 shifts[i] = tree_log2 (cst);
445 if (shifts[i] != shifts[0]
446 && code == TRUNC_DIV_EXPR
447 && !has_vector_shift)
448 use_pow2 = false;
449 }
450 if (mode == -2)
451 continue;
452 if (unsignedp)
453 {
454 unsigned HOST_WIDE_INT mh;
455 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
456
457 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
458 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
459 return NULL_TREE;
460
461 if (d <= 1)
462 {
463 mode = -2;
464 continue;
465 }
466
467 /* Find a suitable multiplier and right shift count
468 instead of multiplying with D. */
469 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
470
471 /* If the suggested multiplier is more than SIZE bits, we can
472 do better for even divisors, using an initial right shift. */
473 if ((mh != 0 && (d & 1) == 0)
474 || (!has_vector_shift && pre_shift != -1))
475 {
476 if (has_vector_shift)
477 pre_shift = floor_log2 (d & -d);
478 else if (pre_shift == -1)
479 {
480 unsigned int j;
481 for (j = 0; j < nunits; j++)
482 {
483 tree cst2 = VECTOR_CST_ELT (op1, j);
484 unsigned HOST_WIDE_INT d2;
485 int this_pre_shift;
486
487 if (!host_integerp (cst2, 1))
488 return NULL_TREE;
489 d2 = tree_low_cst (cst2, 1) & mask;
490 if (d2 == 0)
491 return NULL_TREE;
492 this_pre_shift = floor_log2 (d2 & -d2);
493 if (pre_shift == -1 || this_pre_shift < pre_shift)
494 pre_shift = this_pre_shift;
495 }
496 if (i != 0 && pre_shift != 0)
497 {
498 /* Restart. */
499 i = -1U;
500 mode = -1;
501 continue;
502 }
503 }
504 if (pre_shift != 0)
505 {
506 if ((d >> pre_shift) <= 1)
507 {
508 mode = -2;
509 continue;
510 }
511 mh = choose_multiplier (d >> pre_shift, prec,
512 prec - pre_shift,
513 &ml, &post_shift, &dummy_int);
514 gcc_assert (!mh);
515 pre_shifts[i] = pre_shift;
516 }
517 }
518 if (!mh)
519 this_mode = 0;
520 else
521 this_mode = 1;
522 }
523 else
524 {
525 HOST_WIDE_INT d = tree_low_cst (cst, 0);
526 unsigned HOST_WIDE_INT abs_d;
527
528 if (d == -1)
529 return NULL_TREE;
530
531 /* Since d might be INT_MIN, we have to cast to
532 unsigned HOST_WIDE_INT before negating to avoid
533 undefined signed overflow. */
534 abs_d = (d >= 0
535 ? (unsigned HOST_WIDE_INT) d
536 : - (unsigned HOST_WIDE_INT) d);
537
538 /* n rem d = n rem -d */
539 if (code == TRUNC_MOD_EXPR && d < 0)
540 d = abs_d;
541 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
542 {
543 /* This case is not handled correctly below. */
544 mode = -2;
545 continue;
546 }
547 if (abs_d <= 1)
548 {
549 mode = -2;
550 continue;
551 }
552
553 choose_multiplier (abs_d, prec, prec - 1, &ml,
554 &post_shift, &dummy_int);
555 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
556 {
557 this_mode = 4 + (d < 0);
558 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
559 }
560 else
561 this_mode = 2 + (d < 0);
562 }
563 mulc[i] = ml;
564 post_shifts[i] = post_shift;
565 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
566 || post_shift >= prec
567 || pre_shifts[i] >= prec)
568 this_mode = -2;
569
570 if (i == 0)
571 mode = this_mode;
572 else if (mode != this_mode)
573 mode = -2;
574 }
575
576 vec = XALLOCAVEC (tree, nunits);
577
578 if (use_pow2)
579 {
580 tree addend = NULL_TREE;
581 if (!unsignedp)
582 {
583 tree uns_type;
584
585 /* Both division and remainder sequences need
586 op0 < 0 ? mask : 0 computed. It can be either computed as
587 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
588 if none of the shifts is 0, or as the conditional. */
589 for (i = 0; i < nunits; i++)
590 if (shifts[i] == 0)
591 break;
592 uns_type
593 = build_vector_type (build_nonstandard_integer_type (prec, 1),
594 nunits);
595 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
596 {
597 for (i = 0; i < nunits; i++)
598 shift_temps[i] = prec - 1;
599 cur_op = add_rshift (gsi, type, op0, shift_temps);
600 if (cur_op != NULL_TREE)
601 {
602 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
603 uns_type, cur_op);
604 for (i = 0; i < nunits; i++)
605 shift_temps[i] = prec - shifts[i];
606 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
607 if (cur_op != NULL_TREE)
608 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
609 type, cur_op);
610 }
611 }
612 if (addend == NULL_TREE
613 && expand_vec_cond_expr_p (type, type))
614 {
615 tree zero, cst, cond;
616 gimple stmt;
617
618 zero = build_zero_cst (type);
619 cond = build2 (LT_EXPR, type, op0, zero);
620 for (i = 0; i < nunits; i++)
621 vec[i] = build_int_cst (TREE_TYPE (type),
622 ((unsigned HOST_WIDE_INT) 1
623 << shifts[i]) - 1);
624 cst = build_vector (type, vec);
83d5977e 625 addend = make_ssa_name (type, NULL);
73804b12
RG
626 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
627 cond, cst, zero);
4ee4c52c
JJ
628 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
629 }
630 }
631 if (code == TRUNC_DIV_EXPR)
632 {
633 if (unsignedp)
634 {
635 /* q = op0 >> shift; */
636 cur_op = add_rshift (gsi, type, op0, shifts);
637 if (cur_op != NULL_TREE)
638 return cur_op;
639 }
640 else if (addend != NULL_TREE)
641 {
642 /* t1 = op0 + addend;
643 q = t1 >> shift; */
644 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
2225b9f2 645 if (op != unknown_optab
4ee4c52c
JJ
646 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
647 {
648 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
649 cur_op = add_rshift (gsi, type, cur_op, shifts);
650 if (cur_op != NULL_TREE)
651 return cur_op;
652 }
653 }
654 }
655 else
656 {
657 tree mask;
658 for (i = 0; i < nunits; i++)
659 vec[i] = build_int_cst (TREE_TYPE (type),
660 ((unsigned HOST_WIDE_INT) 1
661 << shifts[i]) - 1);
662 mask = build_vector (type, vec);
663 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
2225b9f2 664 if (op != unknown_optab
4ee4c52c
JJ
665 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
666 {
667 if (unsignedp)
668 /* r = op0 & mask; */
669 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
670 else if (addend != NULL_TREE)
671 {
672 /* t1 = op0 + addend;
673 t2 = t1 & mask;
674 r = t2 - addend; */
675 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
2225b9f2 676 if (op != unknown_optab
4ee4c52c
JJ
677 && optab_handler (op, TYPE_MODE (type))
678 != CODE_FOR_nothing)
679 {
680 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
681 addend);
682 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
683 cur_op, mask);
684 op = optab_for_tree_code (MINUS_EXPR, type,
685 optab_default);
2225b9f2 686 if (op != unknown_optab
4ee4c52c
JJ
687 && optab_handler (op, TYPE_MODE (type))
688 != CODE_FOR_nothing)
689 return gimplify_build2 (gsi, MINUS_EXPR, type,
690 cur_op, addend);
691 }
692 }
693 }
694 }
695 }
696
697 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
698 return NULL_TREE;
699
00f07b86
RH
700 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
701 return NULL_TREE;
4ee4c52c
JJ
702
703 cur_op = op0;
704
705 switch (mode)
706 {
707 case 0:
708 gcc_assert (unsignedp);
709 /* t1 = oprnd0 >> pre_shift;
c9ba3307 710 t2 = t1 h* ml;
4ee4c52c
JJ
711 q = t2 >> post_shift; */
712 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
713 if (cur_op == NULL_TREE)
714 return NULL_TREE;
715 break;
716 case 1:
717 gcc_assert (unsignedp);
718 for (i = 0; i < nunits; i++)
719 {
720 shift_temps[i] = 1;
721 post_shifts[i]--;
722 }
723 break;
724 case 2:
725 case 3:
726 case 4:
727 case 5:
728 gcc_assert (!unsignedp);
729 for (i = 0; i < nunits; i++)
730 shift_temps[i] = prec - 1;
731 break;
732 default:
733 return NULL_TREE;
734 }
735
736 for (i = 0; i < nunits; i++)
737 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
738 mulcst = build_vector (type, vec);
0fcc85cd 739
00f07b86 740 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
4ee4c52c
JJ
741
742 switch (mode)
743 {
744 case 0:
745 /* t1 = oprnd0 >> pre_shift;
c9ba3307 746 t2 = t1 h* ml;
4ee4c52c
JJ
747 q = t2 >> post_shift; */
748 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
749 break;
750 case 1:
c9ba3307 751 /* t1 = oprnd0 h* ml;
4ee4c52c
JJ
752 t2 = oprnd0 - t1;
753 t3 = t2 >> 1;
754 t4 = t1 + t3;
755 q = t4 >> (post_shift - 1); */
756 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2225b9f2 757 if (op == unknown_optab
4ee4c52c
JJ
758 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
759 return NULL_TREE;
760 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
761 tem = add_rshift (gsi, type, tem, shift_temps);
762 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
2225b9f2 763 if (op == unknown_optab
4ee4c52c
JJ
764 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
765 return NULL_TREE;
766 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
767 cur_op = add_rshift (gsi, type, tem, post_shifts);
768 if (cur_op == NULL_TREE)
769 return NULL_TREE;
770 break;
771 case 2:
772 case 3:
773 case 4:
774 case 5:
c9ba3307 775 /* t1 = oprnd0 h* ml;
4ee4c52c
JJ
776 t2 = t1; [ iff (mode & 2) != 0 ]
777 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
778 t3 = t2 >> post_shift;
779 t4 = oprnd0 >> (prec - 1);
780 q = t3 - t4; [ iff (mode & 1) == 0 ]
781 q = t4 - t3; [ iff (mode & 1) != 0 ] */
782 if ((mode & 2) == 0)
783 {
784 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
2225b9f2 785 if (op == unknown_optab
4ee4c52c
JJ
786 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
787 return NULL_TREE;
788 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
789 }
790 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
791 if (cur_op == NULL_TREE)
792 return NULL_TREE;
793 tem = add_rshift (gsi, type, op0, shift_temps);
794 if (tem == NULL_TREE)
795 return NULL_TREE;
796 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2225b9f2 797 if (op == unknown_optab
4ee4c52c
JJ
798 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
799 return NULL_TREE;
800 if ((mode & 1) == 0)
801 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
802 else
803 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
804 break;
805 default:
806 gcc_unreachable ();
807 }
808
809 if (code == TRUNC_DIV_EXPR)
810 return cur_op;
811
812 /* We divided. Now finish by:
813 t1 = q * oprnd1;
814 r = oprnd0 - t1; */
815 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
2225b9f2 816 if (op == unknown_optab
4ee4c52c
JJ
817 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
818 return NULL_TREE;
819 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
820 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2225b9f2 821 if (op == unknown_optab
4ee4c52c
JJ
822 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
823 return NULL_TREE;
824 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
825}
826
374ab2d7
MG
827/* Expand a vector condition to scalars, by using many conditions
828 on the vector's elements. */
829static void
830expand_vector_condition (gimple_stmt_iterator *gsi)
831{
832 gimple stmt = gsi_stmt (*gsi);
833 tree type = gimple_expr_type (stmt);
834 tree a = gimple_assign_rhs1 (stmt);
835 tree a1 = a;
836 tree a2;
837 bool a_is_comparison = false;
838 tree b = gimple_assign_rhs2 (stmt);
839 tree c = gimple_assign_rhs3 (stmt);
9771b263 840 vec<constructor_elt, va_gc> *v;
374ab2d7
MG
841 tree constr;
842 tree inner_type = TREE_TYPE (type);
843 tree cond_type = TREE_TYPE (TREE_TYPE (a));
844 tree comp_inner_type = cond_type;
845 tree width = TYPE_SIZE (inner_type);
846 tree index = bitsize_int (0);
847 int nunits = TYPE_VECTOR_SUBPARTS (type);
848 int i;
849 location_t loc = gimple_location (gsi_stmt (*gsi));
850
784fb9b3 851 if (!is_gimple_val (a))
374ab2d7
MG
852 {
853 gcc_assert (COMPARISON_CLASS_P (a));
854 a_is_comparison = true;
855 a1 = TREE_OPERAND (a, 0);
856 a2 = TREE_OPERAND (a, 1);
857 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
858 }
859
860 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
861 return;
862
863 /* TODO: try and find a smaller vector type. */
864
865 warning_at (loc, OPT_Wvector_operation_performance,
866 "vector condition will be expanded piecewise");
867
9771b263 868 vec_alloc (v, nunits);
374ab2d7
MG
869 for (i = 0; i < nunits;
870 i++, index = int_const_binop (PLUS_EXPR, index, width))
871 {
872 tree aa, result;
873 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
874 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
875 if (a_is_comparison)
876 {
877 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
878 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
879 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
880 }
881 else
882 aa = tree_vec_extract (gsi, cond_type, a, width, index);
883 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
884 constructor_elt ce = {NULL_TREE, result};
9771b263 885 v->quick_push (ce);
374ab2d7
MG
886 }
887
888 constr = build_constructor (type, v);
889 gimple_assign_set_rhs_from_tree (gsi, constr);
890 update_stmt (gsi_stmt (*gsi));
891}
892
2b725155 893static tree
726a989a
RB
894expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
895 gimple assign, enum tree_code code)
2b725155
RH
896{
897 enum machine_mode compute_mode = TYPE_MODE (compute_type);
898
899 /* If the compute mode is not a vector mode (hence we are not decomposing
900 a BLKmode vector to smaller, hardware-supported vectors), we may want
901 to expand the operations in parallel. */
902 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
325217ed
CF
903 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
904 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
905 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
906 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
907 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
2b725155
RH
908 switch (code)
909 {
910 case PLUS_EXPR:
911 case MINUS_EXPR:
eeef0e45 912 if (!TYPE_OVERFLOW_TRAPS (type))
cdbb5ba3
AS
913 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
914 gimple_assign_rhs1 (assign),
726a989a 915 gimple_assign_rhs2 (assign), code);
2b725155
RH
916 break;
917
918 case NEGATE_EXPR:
eeef0e45 919 if (!TYPE_OVERFLOW_TRAPS (type))
726a989a
RB
920 return expand_vector_addition (gsi, do_unop, do_negate, type,
921 gimple_assign_rhs1 (assign),
2b725155
RH
922 NULL_TREE, code);
923 break;
924
925 case BIT_AND_EXPR:
926 case BIT_IOR_EXPR:
927 case BIT_XOR_EXPR:
726a989a
RB
928 return expand_vector_parallel (gsi, do_binop, type,
929 gimple_assign_rhs1 (assign),
930 gimple_assign_rhs2 (assign), code);
2b725155
RH
931
932 case BIT_NOT_EXPR:
726a989a
RB
933 return expand_vector_parallel (gsi, do_unop, type,
934 gimple_assign_rhs1 (assign),
d246ab4f
AS
935 NULL_TREE, code);
936 case EQ_EXPR:
937 case NE_EXPR:
938 case GT_EXPR:
939 case LT_EXPR:
940 case GE_EXPR:
941 case LE_EXPR:
942 case UNEQ_EXPR:
943 case UNGT_EXPR:
944 case UNLT_EXPR:
945 case UNGE_EXPR:
946 case UNLE_EXPR:
947 case LTGT_EXPR:
948 case ORDERED_EXPR:
949 case UNORDERED_EXPR:
950 {
951 tree rhs1 = gimple_assign_rhs1 (assign);
952 tree rhs2 = gimple_assign_rhs2 (assign);
2b725155 953
d246ab4f
AS
954 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
955 }
4ee4c52c
JJ
956
957 case TRUNC_DIV_EXPR:
958 case TRUNC_MOD_EXPR:
959 {
960 tree rhs1 = gimple_assign_rhs1 (assign);
961 tree rhs2 = gimple_assign_rhs2 (assign);
962 tree ret;
963
964 if (!optimize
965 || !VECTOR_INTEGER_TYPE_P (type)
966 || TREE_CODE (rhs2) != VECTOR_CST)
967 break;
968
969 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
970 if (ret != NULL_TREE)
971 return ret;
972 break;
973 }
974
2b725155
RH
975 default:
976 break;
977 }
978
979 if (TREE_CODE_CLASS (code) == tcc_unary)
726a989a
RB
980 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
981 gimple_assign_rhs1 (assign),
2b725155
RH
982 NULL_TREE, code);
983 else
726a989a
RB
984 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
985 gimple_assign_rhs1 (assign),
986 gimple_assign_rhs2 (assign), code);
2b725155
RH
987}
988\f
995ec132
RG
989/* Return a type for the widest vector mode whose components are of type
990 TYPE, or NULL_TREE if none is found. */
325217ed 991
2b725155 992static tree
995ec132 993type_for_widest_vector_mode (tree type, optab op)
2b725155 994{
995ec132 995 enum machine_mode inner_mode = TYPE_MODE (type);
2b725155
RH
996 enum machine_mode best_mode = VOIDmode, mode;
997 int best_nunits = 0;
998
3d8bf70f 999 if (SCALAR_FLOAT_MODE_P (inner_mode))
2b725155 1000 mode = MIN_MODE_VECTOR_FLOAT;
325217ed
CF
1001 else if (SCALAR_FRACT_MODE_P (inner_mode))
1002 mode = MIN_MODE_VECTOR_FRACT;
1003 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1004 mode = MIN_MODE_VECTOR_UFRACT;
1005 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1006 mode = MIN_MODE_VECTOR_ACCUM;
1007 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1008 mode = MIN_MODE_VECTOR_UACCUM;
2b725155
RH
1009 else
1010 mode = MIN_MODE_VECTOR_INT;
1011
1012 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1013 if (GET_MODE_INNER (mode) == inner_mode
1014 && GET_MODE_NUNITS (mode) > best_nunits
947131ba 1015 && optab_handler (op, mode) != CODE_FOR_nothing)
2b725155
RH
1016 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1017
1018 if (best_mode == VOIDmode)
1019 return NULL_TREE;
1020 else
995ec132 1021 return build_vector_type_for_mode (type, best_mode);
2b725155
RH
1022}
1023
f90e8e2e
AS
1024
1025/* Build a reference to the element of the vector VECT. Function
1026 returns either the element itself, either BIT_FIELD_REF, or an
1027 ARRAY_REF expression.
1028
073a8998 1029 GSI is required to insert temporary variables while building a
f90e8e2e
AS
1030 refernece to the element of the vector VECT.
1031
1032 PTMPVEC is a pointer to the temporary variable for caching
1033 purposes. In case when PTMPVEC is NULL new temporary variable
1034 will be created. */
1035static tree
1036vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1037{
067f5960 1038 tree vect_type, vect_elt_type;
f90e8e2e
AS
1039 gimple asgn;
1040 tree tmpvec;
1041 tree arraytype;
1042 bool need_asgn = true;
067f5960 1043 unsigned int elements;
f90e8e2e 1044
067f5960
RH
1045 vect_type = TREE_TYPE (vect);
1046 vect_elt_type = TREE_TYPE (vect_type);
1047 elements = TYPE_VECTOR_SUBPARTS (vect_type);
f90e8e2e 1048
f90e8e2e
AS
1049 if (TREE_CODE (idx) == INTEGER_CST)
1050 {
1051 unsigned HOST_WIDE_INT index;
1052
067f5960
RH
1053 /* Given that we're about to compute a binary modulus,
1054 we don't care about the high bits of the value. */
1055 index = TREE_INT_CST_LOW (idx);
1056 if (!host_integerp (idx, 1) || index >= elements)
1057 {
1058 index &= elements - 1;
1059 idx = build_int_cst (TREE_TYPE (idx), index);
1060 }
f90e8e2e 1061
bc622b2a
RG
1062 /* When lowering a vector statement sequence do some easy
1063 simplification by looking through intermediate vector results. */
1064 if (TREE_CODE (vect) == SSA_NAME)
1065 {
1066 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1067 if (is_gimple_assign (def_stmt)
1068 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1069 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1070 vect = gimple_assign_rhs1 (def_stmt);
1071 }
1072
f90e8e2e 1073 if (TREE_CODE (vect) == VECTOR_CST)
d2a12ae7 1074 return VECTOR_CST_ELT (vect, index);
4a2c20cc
JJ
1075 else if (TREE_CODE (vect) == CONSTRUCTOR
1076 && (CONSTRUCTOR_NELTS (vect) == 0
1077 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1078 != VECTOR_TYPE))
f90e8e2e 1079 {
4a2c20cc
JJ
1080 if (index < CONSTRUCTOR_NELTS (vect))
1081 return CONSTRUCTOR_ELT (vect, index)->value;
067f5960 1082 return build_zero_cst (vect_elt_type);
f90e8e2e 1083 }
067f5960 1084 else
f90e8e2e 1085 {
067f5960 1086 tree size = TYPE_SIZE (vect_elt_type);
26a7fca2
JJ
1087 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1088 size);
1089 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
f90e8e2e 1090 }
f90e8e2e
AS
1091 }
1092
1093 if (!ptmpvec)
067f5960 1094 tmpvec = create_tmp_var (vect_type, "vectmp");
f90e8e2e 1095 else if (!*ptmpvec)
067f5960 1096 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
f90e8e2e
AS
1097 else
1098 {
1099 tmpvec = *ptmpvec;
1100 need_asgn = false;
1101 }
1102
1103 if (need_asgn)
1104 {
1105 TREE_ADDRESSABLE (tmpvec) = 1;
1106 asgn = gimple_build_assign (tmpvec, vect);
1107 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1108 }
1109
067f5960
RH
1110 arraytype = build_array_type_nelts (vect_elt_type, elements);
1111 return build4 (ARRAY_REF, vect_elt_type,
f90e8e2e
AS
1112 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1113 idx, NULL_TREE, NULL_TREE);
1114}
1115
2205ed25 1116/* Check if VEC_PERM_EXPR within the given setting is supported
067f5960 1117 by hardware, or lower it piecewise.
f90e8e2e 1118
2205ed25
RH
1119 When VEC_PERM_EXPR has the same first and second operands:
1120 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
f90e8e2e
AS
1121 {v0[mask[0]], v0[mask[1]], ...}
1122 MASK and V0 must have the same number of elements.
1123
2205ed25 1124 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
f90e8e2e
AS
1125 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1126 V0 and V1 must have the same type. MASK, V0, V1 must have the
1127 same number of arguments. */
f90e8e2e 1128
067f5960 1129static void
2205ed25 1130lower_vec_perm (gimple_stmt_iterator *gsi)
067f5960 1131{
f90e8e2e
AS
1132 gimple stmt = gsi_stmt (*gsi);
1133 tree mask = gimple_assign_rhs3 (stmt);
1134 tree vec0 = gimple_assign_rhs1 (stmt);
1135 tree vec1 = gimple_assign_rhs2 (stmt);
067f5960
RH
1136 tree vect_type = TREE_TYPE (vec0);
1137 tree mask_type = TREE_TYPE (mask);
1138 tree vect_elt_type = TREE_TYPE (vect_type);
1139 tree mask_elt_type = TREE_TYPE (mask_type);
1140 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
9771b263 1141 vec<constructor_elt, va_gc> *v;
067f5960
RH
1142 tree constr, t, si, i_val;
1143 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1144 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
cdbb5ba3 1145 location_t loc = gimple_location (gsi_stmt (*gsi));
067f5960 1146 unsigned i;
f90e8e2e 1147
273d260f
RR
1148 if (TREE_CODE (mask) == SSA_NAME)
1149 {
1150 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1151 if (is_gimple_assign (def_stmt)
1152 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1153 mask = gimple_assign_rhs1 (def_stmt);
1154 }
1155
22e4dee7
RH
1156 if (TREE_CODE (mask) == VECTOR_CST)
1157 {
1158 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
22e4dee7 1159
d2a12ae7
RG
1160 for (i = 0; i < elements; ++i)
1161 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1162 & (2 * elements - 1));
22e4dee7
RH
1163
1164 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
273d260f
RR
1165 {
1166 gimple_assign_set_rhs3 (stmt, mask);
1167 update_stmt (stmt);
1168 return;
1169 }
22e4dee7
RH
1170 }
1171 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
067f5960 1172 return;
cdbb5ba3
AS
1173
1174 warning_at (loc, OPT_Wvector_operation_performance,
1175 "vector shuffling operation will be expanded piecewise");
1176
9771b263 1177 vec_alloc (v, elements);
067f5960 1178 for (i = 0; i < elements; i++)
f90e8e2e 1179 {
067f5960
RH
1180 si = size_int (i);
1181 i_val = vector_element (gsi, mask, si, &masktmp);
f90e8e2e 1182
067f5960 1183 if (TREE_CODE (i_val) == INTEGER_CST)
f90e8e2e 1184 {
067f5960 1185 unsigned HOST_WIDE_INT index;
f90e8e2e 1186
067f5960
RH
1187 index = TREE_INT_CST_LOW (i_val);
1188 if (!host_integerp (i_val, 1) || index >= elements)
1189 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
f90e8e2e 1190
067f5960
RH
1191 if (two_operand_p && (index & elements) != 0)
1192 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1193 else
1194 t = vector_element (gsi, vec0, i_val, &vec0tmp);
f90e8e2e 1195
067f5960
RH
1196 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1197 true, GSI_SAME_STMT);
f90e8e2e 1198 }
067f5960 1199 else
f90e8e2e 1200 {
067f5960
RH
1201 tree cond = NULL_TREE, v0_val;
1202
1203 if (two_operand_p)
1204 {
1205 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1206 build_int_cst (mask_elt_type, elements));
1207 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1208 true, GSI_SAME_STMT);
1209 }
1210
1211 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1212 build_int_cst (mask_elt_type, elements - 1));
1213 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1214 true, GSI_SAME_STMT);
1215
1216 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1217 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1218 true, GSI_SAME_STMT);
1219
1220 if (two_operand_p)
1221 {
1222 tree v1_val;
1223
1224 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1225 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1226 true, GSI_SAME_STMT);
1227
1228 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1229 cond, build_zero_cst (mask_elt_type));
1230 cond = fold_build3 (COND_EXPR, vect_elt_type,
1231 cond, v0_val, v1_val);
1232 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1233 true, GSI_SAME_STMT);
f90e8e2e 1234 }
067f5960
RH
1235 else
1236 t = v0_val;
f90e8e2e 1237 }
067f5960 1238
4a2c20cc 1239 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
f90e8e2e
AS
1240 }
1241
067f5960 1242 constr = build_constructor (vect_type, v);
f90e8e2e 1243 gimple_assign_set_rhs_from_tree (gsi, constr);
067f5960 1244 update_stmt (gsi_stmt (*gsi));
f90e8e2e
AS
1245}
1246
2b725155
RH
1247/* Process one statement. If we identify a vector operation, expand it. */
1248
1249static void
726a989a 1250expand_vector_operations_1 (gimple_stmt_iterator *gsi)
2b725155 1251{
726a989a
RB
1252 gimple stmt = gsi_stmt (*gsi);
1253 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
2b725155
RH
1254 enum tree_code code;
1255 enum machine_mode compute_mode;
2225b9f2 1256 optab op = unknown_optab;
726a989a
RB
1257 enum gimple_rhs_class rhs_class;
1258 tree new_rhs;
2b725155 1259
726a989a
RB
1260 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1261 return;
2b725155 1262
726a989a
RB
1263 code = gimple_assign_rhs_code (stmt);
1264 rhs_class = get_gimple_rhs_class (code);
d246ab4f 1265 lhs = gimple_assign_lhs (stmt);
2b725155 1266
2205ed25 1267 if (code == VEC_PERM_EXPR)
f90e8e2e 1268 {
2205ed25 1269 lower_vec_perm (gsi);
067f5960 1270 return;
f90e8e2e
AS
1271 }
1272
374ab2d7
MG
1273 if (code == VEC_COND_EXPR)
1274 {
1275 expand_vector_condition (gsi);
1276 return;
1277 }
726a989a
RB
1278 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1279 return;
2b725155 1280
726a989a
RB
1281 rhs1 = gimple_assign_rhs1 (stmt);
1282 type = gimple_expr_type (stmt);
1283 if (rhs_class == GIMPLE_BINARY_RHS)
1284 rhs2 = gimple_assign_rhs2 (stmt);
2b725155 1285
2b725155
RH
1286 if (TREE_CODE (type) != VECTOR_TYPE)
1287 return;
1288
b8698a0f 1289 if (code == NOP_EXPR
f57d17f1
TM
1290 || code == FLOAT_EXPR
1291 || code == FIX_TRUNC_EXPR
1292 || code == VIEW_CONVERT_EXPR)
2b725155 1293 return;
b8698a0f 1294
2b725155 1295 gcc_assert (code != CONVERT_EXPR);
9f106823
UB
1296
1297 /* The signedness is determined from input argument. */
1298 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1299 || code == VEC_UNPACK_FLOAT_LO_EXPR)
726a989a 1300 type = TREE_TYPE (rhs1);
9f106823 1301
3f30a9a6
RH
1302 /* For widening/narrowing vector operations, the relevant type is of the
1303 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1304 calculated in the same way above. */
1305 if (code == WIDEN_SUM_EXPR
1306 || code == VEC_WIDEN_MULT_HI_EXPR
1307 || code == VEC_WIDEN_MULT_LO_EXPR
1308 || code == VEC_WIDEN_MULT_EVEN_EXPR
1309 || code == VEC_WIDEN_MULT_ODD_EXPR
1310 || code == VEC_UNPACK_HI_EXPR
1311 || code == VEC_UNPACK_LO_EXPR
1312 || code == VEC_PACK_TRUNC_EXPR
1313 || code == VEC_PACK_SAT_EXPR
1314 || code == VEC_PACK_FIX_TRUNC_EXPR
1315 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1316 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1317 type = TREE_TYPE (rhs1);
1318
71d46ca5
MM
1319 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1320 scalar */
b8698a0f
L
1321 if (code == LSHIFT_EXPR
1322 || code == RSHIFT_EXPR
726a989a 1323 || code == LROTATE_EXPR
71d46ca5
MM
1324 || code == RROTATE_EXPR)
1325 {
0f3d6c10
RH
1326 optab opv;
1327
bdc3ee5d
RH
1328 /* Check whether we have vector <op> {x,x,x,x} where x
1329 could be a scalar variable or a constant. Transform
1330 vector <op> {x,x,x,x} ==> vector <op> scalar. */
0f3d6c10 1331 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
47853c73
AS
1332 {
1333 tree first;
1334 gimple def_stmt;
1335
bdc3ee5d
RH
1336 if ((TREE_CODE (rhs2) == VECTOR_CST
1337 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1338 || (TREE_CODE (rhs2) == SSA_NAME
1339 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1340 && gimple_assign_single_p (def_stmt)
1341 && (first = uniform_vector_p
1342 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
47853c73
AS
1343 {
1344 gimple_assign_set_rhs2 (stmt, first);
1345 update_stmt (stmt);
1346 rhs2 = first;
1347 }
47853c73 1348 }
f90e8e2e 1349
0f3d6c10
RH
1350 opv = optab_for_tree_code (code, type, optab_vector);
1351 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1352 op = opv;
bdc3ee5d 1353 else
2fa6eeff 1354 {
bdc3ee5d 1355 op = optab_for_tree_code (code, type, optab_scalar);
2fa6eeff 1356
bdc3ee5d
RH
1357 /* The rtl expander will expand vector/scalar as vector/vector
1358 if necessary. Don't bother converting the stmt here. */
0f3d6c10
RH
1359 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1360 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1361 return;
2fa6eeff 1362 }
71d46ca5
MM
1363 }
1364 else
1365 op = optab_for_tree_code (code, type, optab_default);
2b725155
RH
1366
1367 /* Optabs will try converting a negation into a subtraction, so
1368 look for it as well. TODO: negation of floating-point vectors
1369 might be turned into an exclusive OR toggling the sign bit. */
2225b9f2 1370 if (op == unknown_optab
2b725155
RH
1371 && code == NEGATE_EXPR
1372 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
71d46ca5 1373 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2b725155
RH
1374
1375 /* For very wide vectors, try using a smaller vector mode. */
1376 compute_type = type;
0f3d6c10 1377 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
2b725155
RH
1378 {
1379 tree vector_compute_type
995ec132 1380 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1e9ae5ab
UB
1381 if (vector_compute_type != NULL_TREE
1382 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
0f3d6c10
RH
1383 < TYPE_VECTOR_SUBPARTS (compute_type))
1384 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1385 != CODE_FOR_nothing))
1e9ae5ab 1386 compute_type = vector_compute_type;
2b725155
RH
1387 }
1388
1389 /* If we are breaking a BLKmode vector into smaller pieces,
1390 type_for_widest_vector_mode has already looked into the optab,
1391 so skip these checks. */
1392 if (compute_type == type)
1393 {
1394 compute_mode = TYPE_MODE (compute_type);
00f07b86
RH
1395 if (VECTOR_MODE_P (compute_mode))
1396 {
1397 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1398 return;
1399 if (code == MULT_HIGHPART_EXPR
1400 && can_mult_highpart_p (compute_mode,
1401 TYPE_UNSIGNED (compute_type)))
1402 return;
1403 }
1404 /* There is no operation in hardware, so fall back to scalars. */
1405 compute_type = TREE_TYPE (type);
2b725155
RH
1406 }
1407
a6b46ba2 1408 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
726a989a 1409 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
d246ab4f
AS
1410
1411 /* Leave expression untouched for later expansion. */
1412 if (new_rhs == NULL_TREE)
1413 return;
1414
726a989a
RB
1415 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1416 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1417 new_rhs);
1418
1419 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1420 way to do it is change expand_vector_operation and its callees to
1421 return a tree_code, RHS1 and RHS2 instead of a tree. */
1422 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3865a06f 1423 update_stmt (gsi_stmt (*gsi));
2b725155
RH
1424}
1425\f
1426/* Use this to lower vector operations introduced by the vectorizer,
1427 if it may need the bit-twiddling tricks implemented in this file. */
1428
1429static bool
f90e8e2e 1430gate_expand_vector_operations_ssa (void)
2b725155 1431{
6f37411d 1432 return !(cfun->curr_properties & PROP_gimple_lvec);
2b725155
RH
1433}
1434
c2924966 1435static unsigned int
2b725155
RH
1436expand_vector_operations (void)
1437{
726a989a 1438 gimple_stmt_iterator gsi;
2b725155 1439 basic_block bb;
3865a06f 1440 bool cfg_changed = false;
2b725155
RH
1441
1442 FOR_EACH_BB (bb)
1443 {
726a989a 1444 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2b725155 1445 {
726a989a 1446 expand_vector_operations_1 (&gsi);
3865a06f
RG
1447 /* ??? If we do not cleanup EH then we will ICE in
1448 verification. But in reality we have created wrong-code
1449 as we did not properly transition EH info and edges to
1450 the piecewise computations. */
1451 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1452 && gimple_purge_dead_eh_edges (bb))
1453 cfg_changed = true;
2b725155
RH
1454 }
1455 }
3865a06f
RG
1456
1457 return cfg_changed ? TODO_cleanup_cfg : 0;
2b725155
RH
1458}
1459
27a4cd48
DM
1460namespace {
1461
1462const pass_data pass_data_lower_vector =
2b725155 1463{
27a4cd48
DM
1464 GIMPLE_PASS, /* type */
1465 "veclower", /* name */
1466 OPTGROUP_VEC, /* optinfo_flags */
1467 true, /* has_gate */
1468 true, /* has_execute */
1469 TV_NONE, /* tv_id */
1470 PROP_cfg, /* properties_required */
1471 PROP_gimple_lvec, /* properties_provided */
1472 0, /* properties_destroyed */
1473 0, /* todo_flags_start */
1474 ( TODO_update_ssa | TODO_verify_ssa
1475 | TODO_verify_stmts
1476 | TODO_verify_flow
1477 | TODO_cleanup_cfg ), /* todo_flags_finish */
2b725155
RH
1478};
1479
27a4cd48
DM
1480class pass_lower_vector : public gimple_opt_pass
1481{
1482public:
c3284718
RS
1483 pass_lower_vector (gcc::context *ctxt)
1484 : gimple_opt_pass (pass_data_lower_vector, ctxt)
27a4cd48
DM
1485 {}
1486
1487 /* opt_pass methods: */
1488 bool gate () { return gate_expand_vector_operations_ssa (); }
1489 unsigned int execute () { return expand_vector_operations (); }
1490
1491}; // class pass_lower_vector
1492
1493} // anon namespace
1494
1495gimple_opt_pass *
1496make_pass_lower_vector (gcc::context *ctxt)
1497{
1498 return new pass_lower_vector (ctxt);
1499}
1500
1501namespace {
1502
1503const pass_data pass_data_lower_vector_ssa =
2b725155 1504{
27a4cd48
DM
1505 GIMPLE_PASS, /* type */
1506 "veclower2", /* name */
1507 OPTGROUP_VEC, /* optinfo_flags */
1508 false, /* has_gate */
1509 true, /* has_execute */
1510 TV_NONE, /* tv_id */
1511 PROP_cfg, /* properties_required */
1512 PROP_gimple_lvec, /* properties_provided */
1513 0, /* properties_destroyed */
1514 0, /* todo_flags_start */
1515 ( TODO_update_ssa | TODO_verify_ssa
1516 | TODO_verify_stmts
1517 | TODO_verify_flow
1518 | TODO_cleanup_cfg ), /* todo_flags_finish */
2b725155
RH
1519};
1520
27a4cd48
DM
1521class pass_lower_vector_ssa : public gimple_opt_pass
1522{
1523public:
c3284718
RS
1524 pass_lower_vector_ssa (gcc::context *ctxt)
1525 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
27a4cd48
DM
1526 {}
1527
1528 /* opt_pass methods: */
65d3284b 1529 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
27a4cd48
DM
1530 unsigned int execute () { return expand_vector_operations (); }
1531
1532}; // class pass_lower_vector_ssa
1533
1534} // anon namespace
1535
1536gimple_opt_pass *
1537make_pass_lower_vector_ssa (gcc::context *ctxt)
1538{
1539 return new pass_lower_vector_ssa (ctxt);
1540}
1541
2b725155 1542#include "gt-tree-vect-generic.h"