]>
Commit | Line | Data |
---|---|---|
2b725155 | 1 | /* Lower vector operations to scalar operations. |
85ec4feb | 2 | Copyright (C) 2004-2018 Free Software Foundation, Inc. |
2b725155 RH |
3 | |
4 | This file is part of GCC. | |
b8698a0f | 5 | |
2b725155 RH |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 8 | Free Software Foundation; either version 3, or (at your option) any |
2b725155 | 9 | later version. |
b8698a0f | 10 | |
2b725155 RH |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
b8698a0f | 15 | |
2b725155 | 16 | You should have received a copy of the GNU General Public License |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
2b725155 RH |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
c7131fb2 | 23 | #include "backend.h" |
957060b5 | 24 | #include "rtl.h" |
2b725155 | 25 | #include "tree.h" |
c7131fb2 | 26 | #include "gimple.h" |
957060b5 | 27 | #include "tree-pass.h" |
c7131fb2 | 28 | #include "ssa.h" |
957060b5 AM |
29 | #include "expmed.h" |
30 | #include "optabs-tree.h" | |
31 | #include "diagnostic.h" | |
40e23961 | 32 | #include "fold-const.h" |
d8a2d370 | 33 | #include "stor-layout.h" |
2b725155 | 34 | #include "langhooks.h" |
2fb9a547 | 35 | #include "tree-eh.h" |
5be5c238 | 36 | #include "gimple-iterator.h" |
18f429e2 | 37 | #include "gimplify-me.h" |
3826795b | 38 | #include "gimplify.h" |
442b4905 | 39 | #include "tree-cfg.h" |
5ebaa477 | 40 | #include "tree-vector-builder.h" |
f151c9e1 | 41 | #include "vec-perm-indices.h" |
2b725155 | 42 | |
d246ab4f AS |
43 | |
44 | static void expand_vector_operations_1 (gimple_stmt_iterator *); | |
45 | ||
22afc2b3 RS |
46 | /* Return the number of elements in a vector type TYPE that we have |
47 | already decided needs to be expanded piecewise. We don't support | |
48 | this kind of expansion for variable-length vectors, since we should | |
49 | always check for target support before introducing uses of those. */ | |
50 | static unsigned int | |
51 | nunits_for_known_piecewise_op (const_tree type) | |
52 | { | |
53 | return TYPE_VECTOR_SUBPARTS (type); | |
54 | } | |
55 | ||
56 | /* Return true if TYPE1 has more elements than TYPE2, where either | |
57 | type may be a vector or a scalar. */ | |
58 | ||
59 | static inline bool | |
60 | subparts_gt (tree type1, tree type2) | |
61 | { | |
62 | poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1; | |
63 | poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1; | |
64 | return known_gt (n1, n2); | |
65 | } | |
d246ab4f | 66 | |
2b725155 RH |
67 | /* Build a constant of type TYPE, made of VALUE's bits replicated |
68 | every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | |
69 | static tree | |
70 | build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | |
71 | { | |
ae7e9ddd | 72 | int width = tree_to_uhwi (TYPE_SIZE (inner_type)); |
807e902e KZ |
73 | int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) |
74 | / HOST_BITS_PER_WIDE_INT; | |
75 | unsigned HOST_WIDE_INT low, mask; | |
76 | HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; | |
77 | int i; | |
2b725155 | 78 | |
807e902e | 79 | gcc_assert (n && n <= WIDE_INT_MAX_ELTS); |
2b725155 RH |
80 | |
81 | if (width == HOST_BITS_PER_WIDE_INT) | |
82 | low = value; | |
83 | else | |
84 | { | |
85 | mask = ((HOST_WIDE_INT)1 << width) - 1; | |
86 | low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | |
87 | } | |
88 | ||
807e902e KZ |
89 | for (i = 0; i < n; i++) |
90 | a[i] = low; | |
2b725155 | 91 | |
807e902e KZ |
92 | gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT); |
93 | return wide_int_to_tree | |
94 | (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); | |
2b725155 RH |
95 | } |
96 | ||
97 | static GTY(()) tree vector_inner_type; | |
98 | static GTY(()) tree vector_last_type; | |
99 | static GTY(()) int vector_last_nunits; | |
100 | ||
101 | /* Return a suitable vector types made of SUBPARTS units each of mode | |
102 | "word_mode" (the global variable). */ | |
103 | static tree | |
104 | build_word_mode_vector_type (int nunits) | |
105 | { | |
106 | if (!vector_inner_type) | |
107 | vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
108 | else if (vector_last_nunits == nunits) | |
109 | { | |
110 | gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); | |
111 | return vector_last_type; | |
112 | } | |
113 | ||
114 | /* We build a new type, but we canonicalize it nevertheless, | |
115 | because it still saves some memory. */ | |
116 | vector_last_nunits = nunits; | |
117 | vector_last_type = type_hash_canon (nunits, | |
118 | build_vector_type (vector_inner_type, | |
119 | nunits)); | |
120 | return vector_last_type; | |
121 | } | |
122 | ||
726a989a | 123 | typedef tree (*elem_op_func) (gimple_stmt_iterator *, |
9f47c7e5 IE |
124 | tree, tree, tree, tree, tree, enum tree_code, |
125 | tree); | |
2b725155 RH |
126 | |
127 | static inline tree | |
8f66e7dc RH |
128 | tree_vec_extract (gimple_stmt_iterator *gsi, tree type, |
129 | tree t, tree bitsize, tree bitpos) | |
2b725155 | 130 | { |
3826795b RB |
131 | if (TREE_CODE (t) == SSA_NAME) |
132 | { | |
133 | gimple *def_stmt = SSA_NAME_DEF_STMT (t); | |
134 | if (is_gimple_assign (def_stmt) | |
135 | && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST | |
136 | || (bitpos | |
137 | && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))) | |
138 | t = gimple_assign_rhs1 (def_stmt); | |
139 | } | |
2b725155 | 140 | if (bitpos) |
9f47c7e5 IE |
141 | { |
142 | if (TREE_CODE (type) == BOOLEAN_TYPE) | |
143 | { | |
144 | tree itype | |
145 | = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0); | |
8f66e7dc RH |
146 | tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t, |
147 | bitsize, bitpos); | |
148 | return gimplify_build2 (gsi, NE_EXPR, type, field, | |
149 | build_zero_cst (itype)); | |
9f47c7e5 | 150 | } |
8f66e7dc RH |
151 | else |
152 | return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); | |
9f47c7e5 | 153 | } |
8f66e7dc RH |
154 | else |
155 | return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); | |
2b725155 RH |
156 | } |
157 | ||
158 | static tree | |
726a989a | 159 | do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, |
2b725155 | 160 | tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, |
9f47c7e5 | 161 | enum tree_code code, tree type ATTRIBUTE_UNUSED) |
2b725155 | 162 | { |
8f66e7dc | 163 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
726a989a | 164 | return gimplify_build1 (gsi, code, inner_type, a); |
2b725155 RH |
165 | } |
166 | ||
167 | static tree | |
726a989a | 168 | do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, |
9f47c7e5 IE |
169 | tree bitpos, tree bitsize, enum tree_code code, |
170 | tree type ATTRIBUTE_UNUSED) | |
2b725155 | 171 | { |
362235e9 | 172 | if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) |
8f66e7dc | 173 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
362235e9 | 174 | if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) |
8f66e7dc | 175 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); |
726a989a | 176 | return gimplify_build2 (gsi, code, inner_type, a, b); |
2b725155 RH |
177 | } |
178 | ||
d246ab4f AS |
179 | /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 |
180 | ||
181 | INNER_TYPE is the type of A and B elements | |
182 | ||
183 | returned expression is of signed integer type with the | |
184 | size equal to the size of INNER_TYPE. */ | |
185 | static tree | |
186 | do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
9f47c7e5 | 187 | tree bitpos, tree bitsize, enum tree_code code, tree type) |
d246ab4f | 188 | { |
337d2167 IE |
189 | tree stype = TREE_TYPE (type); |
190 | tree cst_false = build_zero_cst (stype); | |
191 | tree cst_true = build_all_ones_cst (stype); | |
192 | tree cmp; | |
193 | ||
8f66e7dc RH |
194 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
195 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
d246ab4f | 196 | |
337d2167 IE |
197 | cmp = build2 (code, boolean_type_node, a, b); |
198 | return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false); | |
d246ab4f AS |
199 | } |
200 | ||
2b725155 RH |
201 | /* Expand vector addition to scalars. This does bit twiddling |
202 | in order to increase parallelism: | |
203 | ||
204 | a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ | |
205 | (a ^ b) & 0x80808080 | |
206 | ||
207 | a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ | |
208 | (a ^ ~b) & 0x80808080 | |
209 | ||
210 | -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) | |
211 | ||
212 | This optimization should be done only if 4 vector items or more | |
213 | fit into a word. */ | |
214 | static tree | |
726a989a | 215 | do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, |
2b725155 | 216 | tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, |
9f47c7e5 | 217 | enum tree_code code, tree type ATTRIBUTE_UNUSED) |
2b725155 RH |
218 | { |
219 | tree inner_type = TREE_TYPE (TREE_TYPE (a)); | |
220 | unsigned HOST_WIDE_INT max; | |
221 | tree low_bits, high_bits, a_low, b_low, result_low, signs; | |
222 | ||
223 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
224 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
225 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
226 | ||
8f66e7dc RH |
227 | a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); |
228 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); | |
2b725155 | 229 | |
726a989a RB |
230 | signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); |
231 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); | |
2b725155 | 232 | if (code == PLUS_EXPR) |
726a989a | 233 | a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); |
2b725155 RH |
234 | else |
235 | { | |
726a989a RB |
236 | a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); |
237 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); | |
2b725155 RH |
238 | } |
239 | ||
726a989a RB |
240 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); |
241 | result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); | |
242 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
243 | } |
244 | ||
245 | static tree | |
726a989a | 246 | do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, |
2b725155 RH |
247 | tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, |
248 | tree bitsize ATTRIBUTE_UNUSED, | |
9f47c7e5 IE |
249 | enum tree_code code ATTRIBUTE_UNUSED, |
250 | tree type ATTRIBUTE_UNUSED) | |
2b725155 RH |
251 | { |
252 | tree inner_type = TREE_TYPE (TREE_TYPE (b)); | |
253 | HOST_WIDE_INT max; | |
254 | tree low_bits, high_bits, b_low, result_low, signs; | |
255 | ||
256 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
257 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
258 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
259 | ||
8f66e7dc | 260 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); |
2b725155 | 261 | |
726a989a RB |
262 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); |
263 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); | |
264 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
265 | result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); | |
266 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
267 | } |
268 | ||
269 | /* Expand a vector operation to scalars, by using many operations | |
270 | whose type is the vector type's inner type. */ | |
271 | static tree | |
726a989a | 272 | expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, |
2b725155 RH |
273 | tree type, tree inner_type, |
274 | tree a, tree b, enum tree_code code) | |
275 | { | |
9771b263 | 276 | vec<constructor_elt, va_gc> *v; |
2b725155 RH |
277 | tree part_width = TYPE_SIZE (inner_type); |
278 | tree index = bitsize_int (0); | |
22afc2b3 | 279 | int nunits = nunits_for_known_piecewise_op (type); |
ae7e9ddd RS |
280 | int delta = tree_to_uhwi (part_width) |
281 | / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); | |
2b725155 | 282 | int i; |
cdbb5ba3 AS |
283 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
284 | ||
285 | if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) | |
286 | warning_at (loc, OPT_Wvector_operation_performance, | |
287 | "vector operation will be expanded piecewise"); | |
288 | else | |
289 | warning_at (loc, OPT_Wvector_operation_performance, | |
290 | "vector operation will be expanded in parallel"); | |
2b725155 | 291 | |
9771b263 | 292 | vec_alloc (v, (nunits + delta - 1) / delta); |
2b725155 | 293 | for (i = 0; i < nunits; |
d35936ab | 294 | i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) |
2b725155 | 295 | { |
9f47c7e5 | 296 | tree result = f (gsi, inner_type, a, b, index, part_width, code, type); |
f32682ca | 297 | constructor_elt ce = {NULL_TREE, result}; |
9771b263 | 298 | v->quick_push (ce); |
2b725155 RH |
299 | } |
300 | ||
4038c495 | 301 | return build_constructor (type, v); |
2b725155 RH |
302 | } |
303 | ||
304 | /* Expand a vector operation to scalars with the freedom to use | |
305 | a scalar integer type, or to use a different size for the items | |
306 | in the vector type. */ | |
307 | static tree | |
726a989a | 308 | expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, |
2b725155 RH |
309 | tree a, tree b, |
310 | enum tree_code code) | |
311 | { | |
312 | tree result, compute_type; | |
ae7e9ddd | 313 | int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; |
cdbb5ba3 | 314 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
2b725155 RH |
315 | |
316 | /* We have three strategies. If the type is already correct, just do | |
317 | the operation an element at a time. Else, if the vector is wider than | |
318 | one word, do it a word at a time; finally, if the vector is smaller | |
319 | than one word, do it as a scalar. */ | |
320 | if (TYPE_MODE (TREE_TYPE (type)) == word_mode) | |
726a989a | 321 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
322 | type, TREE_TYPE (type), |
323 | a, b, code); | |
324 | else if (n_words > 1) | |
325 | { | |
326 | tree word_type = build_word_mode_vector_type (n_words); | |
726a989a | 327 | result = expand_vector_piecewise (gsi, f, |
2b725155 RH |
328 | word_type, TREE_TYPE (word_type), |
329 | a, b, code); | |
726a989a RB |
330 | result = force_gimple_operand_gsi (gsi, result, true, NULL, true, |
331 | GSI_SAME_STMT); | |
2b725155 RH |
332 | } |
333 | else | |
334 | { | |
335 | /* Use a single scalar operation with a mode no wider than word_mode. */ | |
fffbab82 RS |
336 | scalar_int_mode mode |
337 | = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require (); | |
2b725155 | 338 | compute_type = lang_hooks.types.type_for_mode (mode, 1); |
9f47c7e5 | 339 | result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type); |
cdbb5ba3 AS |
340 | warning_at (loc, OPT_Wvector_operation_performance, |
341 | "vector operation will be expanded with a " | |
342 | "single scalar operation"); | |
2b725155 RH |
343 | } |
344 | ||
345 | return result; | |
346 | } | |
347 | ||
348 | /* Expand a vector operation to scalars; for integer types we can use | |
349 | special bit twiddling tricks to do the sums a word at a time, using | |
350 | function F_PARALLEL instead of F. These tricks are done only if | |
351 | they can process at least four items, that is, only if the vector | |
352 | holds at least four items and if a word can hold four items. */ | |
353 | static tree | |
726a989a | 354 | expand_vector_addition (gimple_stmt_iterator *gsi, |
2b725155 RH |
355 | elem_op_func f, elem_op_func f_parallel, |
356 | tree type, tree a, tree b, enum tree_code code) | |
357 | { | |
358 | int parts_per_word = UNITS_PER_WORD | |
ae7e9ddd | 359 | / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); |
2b725155 RH |
360 | |
361 | if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | |
362 | && parts_per_word >= 4 | |
22afc2b3 | 363 | && nunits_for_known_piecewise_op (type) >= 4) |
726a989a | 364 | return expand_vector_parallel (gsi, f_parallel, |
2b725155 RH |
365 | type, a, b, code); |
366 | else | |
726a989a | 367 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
368 | type, TREE_TYPE (type), |
369 | a, b, code); | |
370 | } | |
371 | ||
d246ab4f AS |
372 | /* Try to expand vector comparison expression OP0 CODE OP1 by |
373 | querying optab if the following expression: | |
374 | VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> | |
375 | can be expanded. */ | |
376 | static tree | |
377 | expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, | |
378 | tree op1, enum tree_code code) | |
379 | { | |
380 | tree t; | |
96592eed JJ |
381 | if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) |
382 | && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) | |
d246ab4f AS |
383 | t = expand_vector_piecewise (gsi, do_compare, type, |
384 | TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); | |
385 | else | |
386 | t = NULL_TREE; | |
387 | ||
388 | return t; | |
389 | } | |
390 | ||
4ee4c52c JJ |
391 | /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type |
392 | of OP0 with shift counts in SHIFTCNTS array and return the temporary holding | |
393 | the result if successful, otherwise return NULL_TREE. */ | |
394 | static tree | |
395 | add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) | |
396 | { | |
397 | optab op; | |
22afc2b3 | 398 | unsigned int i, nunits = nunits_for_known_piecewise_op (type); |
4ee4c52c JJ |
399 | bool scalar_shift = true; |
400 | ||
401 | for (i = 1; i < nunits; i++) | |
402 | { | |
403 | if (shiftcnts[i] != shiftcnts[0]) | |
404 | scalar_shift = false; | |
405 | } | |
406 | ||
407 | if (scalar_shift && shiftcnts[0] == 0) | |
408 | return op0; | |
409 | ||
410 | if (scalar_shift) | |
411 | { | |
412 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); | |
2225b9f2 | 413 | if (op != unknown_optab |
4ee4c52c JJ |
414 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
415 | return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
416 | build_int_cst (NULL_TREE, shiftcnts[0])); | |
417 | } | |
418 | ||
419 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
2225b9f2 | 420 | if (op != unknown_optab |
4ee4c52c JJ |
421 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
422 | { | |
5ebaa477 | 423 | tree_vector_builder vec (type, nunits, 1); |
4ee4c52c | 424 | for (i = 0; i < nunits; i++) |
794e3180 | 425 | vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i])); |
5ebaa477 | 426 | return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ()); |
4ee4c52c JJ |
427 | } |
428 | ||
429 | return NULL_TREE; | |
430 | } | |
431 | ||
432 | /* Try to expand integer vector division by constant using | |
433 | widening multiply, shifts and additions. */ | |
434 | static tree | |
435 | expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, | |
436 | tree op1, enum tree_code code) | |
437 | { | |
438 | bool use_pow2 = true; | |
439 | bool has_vector_shift = true; | |
440 | int mode = -1, this_mode; | |
441 | int pre_shift = -1, post_shift; | |
22afc2b3 | 442 | unsigned int nunits = nunits_for_known_piecewise_op (type); |
4ee4c52c JJ |
443 | int *shifts = XALLOCAVEC (int, nunits * 4); |
444 | int *pre_shifts = shifts + nunits; | |
445 | int *post_shifts = pre_shifts + nunits; | |
446 | int *shift_temps = post_shifts + nunits; | |
447 | unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); | |
448 | int prec = TYPE_PRECISION (TREE_TYPE (type)); | |
449 | int dummy_int; | |
807e902e KZ |
450 | unsigned int i; |
451 | signop sign_p = TYPE_SIGN (TREE_TYPE (type)); | |
4ee4c52c | 452 | unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); |
00f07b86 RH |
453 | tree cur_op, mulcst, tem; |
454 | optab op; | |
4ee4c52c JJ |
455 | |
456 | if (prec > HOST_BITS_PER_WIDE_INT) | |
457 | return NULL_TREE; | |
458 | ||
459 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
2225b9f2 | 460 | if (op == unknown_optab |
4ee4c52c JJ |
461 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
462 | has_vector_shift = false; | |
463 | ||
464 | /* Analysis phase. Determine if all op1 elements are either power | |
465 | of two and it is possible to expand it using shifts (or for remainder | |
466 | using masking). Additionally compute the multiplicative constants | |
467 | and pre and post shifts if the division is to be expanded using | |
468 | widening or high part multiplication plus shifts. */ | |
469 | for (i = 0; i < nunits; i++) | |
470 | { | |
471 | tree cst = VECTOR_CST_ELT (op1, i); | |
472 | unsigned HOST_WIDE_INT ml; | |
473 | ||
6b58915b | 474 | if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) |
4ee4c52c JJ |
475 | return NULL_TREE; |
476 | pre_shifts[i] = 0; | |
477 | post_shifts[i] = 0; | |
478 | mulc[i] = 0; | |
479 | if (use_pow2 | |
480 | && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) | |
481 | use_pow2 = false; | |
482 | if (use_pow2) | |
483 | { | |
484 | shifts[i] = tree_log2 (cst); | |
485 | if (shifts[i] != shifts[0] | |
486 | && code == TRUNC_DIV_EXPR | |
487 | && !has_vector_shift) | |
488 | use_pow2 = false; | |
489 | } | |
490 | if (mode == -2) | |
491 | continue; | |
807e902e | 492 | if (sign_p == UNSIGNED) |
4ee4c52c JJ |
493 | { |
494 | unsigned HOST_WIDE_INT mh; | |
6b58915b | 495 | unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; |
4ee4c52c | 496 | |
fecfbfa4 | 497 | if (d >= (HOST_WIDE_INT_1U << (prec - 1))) |
4ee4c52c JJ |
498 | /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ |
499 | return NULL_TREE; | |
500 | ||
501 | if (d <= 1) | |
502 | { | |
503 | mode = -2; | |
504 | continue; | |
505 | } | |
506 | ||
507 | /* Find a suitable multiplier and right shift count | |
508 | instead of multiplying with D. */ | |
509 | mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); | |
510 | ||
511 | /* If the suggested multiplier is more than SIZE bits, we can | |
512 | do better for even divisors, using an initial right shift. */ | |
513 | if ((mh != 0 && (d & 1) == 0) | |
514 | || (!has_vector_shift && pre_shift != -1)) | |
515 | { | |
516 | if (has_vector_shift) | |
146ec50f | 517 | pre_shift = ctz_or_zero (d); |
4ee4c52c JJ |
518 | else if (pre_shift == -1) |
519 | { | |
520 | unsigned int j; | |
521 | for (j = 0; j < nunits; j++) | |
522 | { | |
523 | tree cst2 = VECTOR_CST_ELT (op1, j); | |
524 | unsigned HOST_WIDE_INT d2; | |
525 | int this_pre_shift; | |
526 | ||
cc269bb6 | 527 | if (!tree_fits_uhwi_p (cst2)) |
4ee4c52c | 528 | return NULL_TREE; |
ae7e9ddd | 529 | d2 = tree_to_uhwi (cst2) & mask; |
4ee4c52c JJ |
530 | if (d2 == 0) |
531 | return NULL_TREE; | |
532 | this_pre_shift = floor_log2 (d2 & -d2); | |
533 | if (pre_shift == -1 || this_pre_shift < pre_shift) | |
534 | pre_shift = this_pre_shift; | |
535 | } | |
536 | if (i != 0 && pre_shift != 0) | |
537 | { | |
538 | /* Restart. */ | |
539 | i = -1U; | |
540 | mode = -1; | |
541 | continue; | |
542 | } | |
543 | } | |
544 | if (pre_shift != 0) | |
545 | { | |
546 | if ((d >> pre_shift) <= 1) | |
547 | { | |
548 | mode = -2; | |
549 | continue; | |
550 | } | |
551 | mh = choose_multiplier (d >> pre_shift, prec, | |
552 | prec - pre_shift, | |
553 | &ml, &post_shift, &dummy_int); | |
554 | gcc_assert (!mh); | |
555 | pre_shifts[i] = pre_shift; | |
556 | } | |
557 | } | |
558 | if (!mh) | |
559 | this_mode = 0; | |
560 | else | |
561 | this_mode = 1; | |
562 | } | |
563 | else | |
564 | { | |
6b58915b | 565 | HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); |
4ee4c52c JJ |
566 | unsigned HOST_WIDE_INT abs_d; |
567 | ||
568 | if (d == -1) | |
569 | return NULL_TREE; | |
570 | ||
571 | /* Since d might be INT_MIN, we have to cast to | |
572 | unsigned HOST_WIDE_INT before negating to avoid | |
573 | undefined signed overflow. */ | |
574 | abs_d = (d >= 0 | |
575 | ? (unsigned HOST_WIDE_INT) d | |
576 | : - (unsigned HOST_WIDE_INT) d); | |
577 | ||
578 | /* n rem d = n rem -d */ | |
579 | if (code == TRUNC_MOD_EXPR && d < 0) | |
580 | d = abs_d; | |
fecfbfa4 | 581 | else if (abs_d == HOST_WIDE_INT_1U << (prec - 1)) |
4ee4c52c JJ |
582 | { |
583 | /* This case is not handled correctly below. */ | |
584 | mode = -2; | |
585 | continue; | |
586 | } | |
587 | if (abs_d <= 1) | |
588 | { | |
589 | mode = -2; | |
590 | continue; | |
591 | } | |
592 | ||
593 | choose_multiplier (abs_d, prec, prec - 1, &ml, | |
594 | &post_shift, &dummy_int); | |
fecfbfa4 | 595 | if (ml >= HOST_WIDE_INT_1U << (prec - 1)) |
4ee4c52c JJ |
596 | { |
597 | this_mode = 4 + (d < 0); | |
dd4786fe | 598 | ml |= HOST_WIDE_INT_M1U << (prec - 1); |
4ee4c52c JJ |
599 | } |
600 | else | |
601 | this_mode = 2 + (d < 0); | |
602 | } | |
603 | mulc[i] = ml; | |
604 | post_shifts[i] = post_shift; | |
605 | if ((i && !has_vector_shift && post_shifts[0] != post_shift) | |
606 | || post_shift >= prec | |
607 | || pre_shifts[i] >= prec) | |
608 | this_mode = -2; | |
609 | ||
610 | if (i == 0) | |
611 | mode = this_mode; | |
612 | else if (mode != this_mode) | |
613 | mode = -2; | |
614 | } | |
615 | ||
4ee4c52c JJ |
616 | if (use_pow2) |
617 | { | |
618 | tree addend = NULL_TREE; | |
807e902e | 619 | if (sign_p == SIGNED) |
4ee4c52c JJ |
620 | { |
621 | tree uns_type; | |
622 | ||
623 | /* Both division and remainder sequences need | |
624 | op0 < 0 ? mask : 0 computed. It can be either computed as | |
625 | (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) | |
626 | if none of the shifts is 0, or as the conditional. */ | |
627 | for (i = 0; i < nunits; i++) | |
628 | if (shifts[i] == 0) | |
629 | break; | |
630 | uns_type | |
631 | = build_vector_type (build_nonstandard_integer_type (prec, 1), | |
632 | nunits); | |
633 | if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) | |
634 | { | |
635 | for (i = 0; i < nunits; i++) | |
636 | shift_temps[i] = prec - 1; | |
637 | cur_op = add_rshift (gsi, type, op0, shift_temps); | |
638 | if (cur_op != NULL_TREE) | |
639 | { | |
640 | cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
641 | uns_type, cur_op); | |
642 | for (i = 0; i < nunits; i++) | |
643 | shift_temps[i] = prec - shifts[i]; | |
644 | cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); | |
645 | if (cur_op != NULL_TREE) | |
646 | addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
647 | type, cur_op); | |
648 | } | |
649 | } | |
650 | if (addend == NULL_TREE | |
96592eed | 651 | && expand_vec_cond_expr_p (type, type, LT_EXPR)) |
4ee4c52c | 652 | { |
9f47c7e5 | 653 | tree zero, cst, cond, mask_type; |
355fe088 | 654 | gimple *stmt; |
4ee4c52c | 655 | |
9f47c7e5 | 656 | mask_type = build_same_sized_truth_vector_type (type); |
4ee4c52c | 657 | zero = build_zero_cst (type); |
9f47c7e5 | 658 | cond = build2 (LT_EXPR, mask_type, op0, zero); |
5ebaa477 | 659 | tree_vector_builder vec (type, nunits, 1); |
4ee4c52c | 660 | for (i = 0; i < nunits; i++) |
794e3180 RS |
661 | vec.quick_push (build_int_cst (TREE_TYPE (type), |
662 | (HOST_WIDE_INT_1U | |
663 | << shifts[i]) - 1)); | |
5ebaa477 | 664 | cst = vec.build (); |
b731b390 | 665 | addend = make_ssa_name (type); |
0d0e4a03 JJ |
666 | stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, |
667 | cst, zero); | |
4ee4c52c JJ |
668 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
669 | } | |
670 | } | |
671 | if (code == TRUNC_DIV_EXPR) | |
672 | { | |
807e902e | 673 | if (sign_p == UNSIGNED) |
4ee4c52c JJ |
674 | { |
675 | /* q = op0 >> shift; */ | |
676 | cur_op = add_rshift (gsi, type, op0, shifts); | |
677 | if (cur_op != NULL_TREE) | |
678 | return cur_op; | |
679 | } | |
680 | else if (addend != NULL_TREE) | |
681 | { | |
682 | /* t1 = op0 + addend; | |
683 | q = t1 >> shift; */ | |
684 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 685 | if (op != unknown_optab |
4ee4c52c JJ |
686 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
687 | { | |
688 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); | |
689 | cur_op = add_rshift (gsi, type, cur_op, shifts); | |
690 | if (cur_op != NULL_TREE) | |
691 | return cur_op; | |
692 | } | |
693 | } | |
694 | } | |
695 | else | |
696 | { | |
697 | tree mask; | |
5ebaa477 | 698 | tree_vector_builder vec (type, nunits, 1); |
4ee4c52c | 699 | for (i = 0; i < nunits; i++) |
794e3180 RS |
700 | vec.quick_push (build_int_cst (TREE_TYPE (type), |
701 | (HOST_WIDE_INT_1U | |
702 | << shifts[i]) - 1)); | |
5ebaa477 | 703 | mask = vec.build (); |
4ee4c52c | 704 | op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); |
2225b9f2 | 705 | if (op != unknown_optab |
4ee4c52c JJ |
706 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
707 | { | |
807e902e | 708 | if (sign_p == UNSIGNED) |
4ee4c52c JJ |
709 | /* r = op0 & mask; */ |
710 | return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); | |
711 | else if (addend != NULL_TREE) | |
712 | { | |
713 | /* t1 = op0 + addend; | |
714 | t2 = t1 & mask; | |
715 | r = t2 - addend; */ | |
716 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 717 | if (op != unknown_optab |
4ee4c52c JJ |
718 | && optab_handler (op, TYPE_MODE (type)) |
719 | != CODE_FOR_nothing) | |
720 | { | |
721 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, | |
722 | addend); | |
723 | cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, | |
724 | cur_op, mask); | |
725 | op = optab_for_tree_code (MINUS_EXPR, type, | |
726 | optab_default); | |
2225b9f2 | 727 | if (op != unknown_optab |
4ee4c52c JJ |
728 | && optab_handler (op, TYPE_MODE (type)) |
729 | != CODE_FOR_nothing) | |
730 | return gimplify_build2 (gsi, MINUS_EXPR, type, | |
731 | cur_op, addend); | |
732 | } | |
733 | } | |
734 | } | |
735 | } | |
736 | } | |
737 | ||
738 | if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
739 | return NULL_TREE; | |
740 | ||
00f07b86 RH |
741 | if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) |
742 | return NULL_TREE; | |
4ee4c52c JJ |
743 | |
744 | cur_op = op0; | |
745 | ||
746 | switch (mode) | |
747 | { | |
748 | case 0: | |
807e902e | 749 | gcc_assert (sign_p == UNSIGNED); |
4ee4c52c | 750 | /* t1 = oprnd0 >> pre_shift; |
c9ba3307 | 751 | t2 = t1 h* ml; |
4ee4c52c JJ |
752 | q = t2 >> post_shift; */ |
753 | cur_op = add_rshift (gsi, type, cur_op, pre_shifts); | |
754 | if (cur_op == NULL_TREE) | |
755 | return NULL_TREE; | |
756 | break; | |
757 | case 1: | |
807e902e | 758 | gcc_assert (sign_p == UNSIGNED); |
4ee4c52c JJ |
759 | for (i = 0; i < nunits; i++) |
760 | { | |
761 | shift_temps[i] = 1; | |
762 | post_shifts[i]--; | |
763 | } | |
764 | break; | |
765 | case 2: | |
766 | case 3: | |
767 | case 4: | |
768 | case 5: | |
807e902e | 769 | gcc_assert (sign_p == SIGNED); |
4ee4c52c JJ |
770 | for (i = 0; i < nunits; i++) |
771 | shift_temps[i] = prec - 1; | |
772 | break; | |
773 | default: | |
774 | return NULL_TREE; | |
775 | } | |
776 | ||
5ebaa477 | 777 | tree_vector_builder vec (type, nunits, 1); |
4ee4c52c | 778 | for (i = 0; i < nunits; i++) |
794e3180 | 779 | vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i])); |
5ebaa477 | 780 | mulcst = vec.build (); |
0fcc85cd | 781 | |
00f07b86 | 782 | cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); |
4ee4c52c JJ |
783 | |
784 | switch (mode) | |
785 | { | |
786 | case 0: | |
787 | /* t1 = oprnd0 >> pre_shift; | |
c9ba3307 | 788 | t2 = t1 h* ml; |
4ee4c52c JJ |
789 | q = t2 >> post_shift; */ |
790 | cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
791 | break; | |
792 | case 1: | |
c9ba3307 | 793 | /* t1 = oprnd0 h* ml; |
4ee4c52c JJ |
794 | t2 = oprnd0 - t1; |
795 | t3 = t2 >> 1; | |
796 | t4 = t1 + t3; | |
797 | q = t4 >> (post_shift - 1); */ | |
798 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
2225b9f2 | 799 | if (op == unknown_optab |
4ee4c52c JJ |
800 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
801 | return NULL_TREE; | |
802 | tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); | |
803 | tem = add_rshift (gsi, type, tem, shift_temps); | |
804 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 805 | if (op == unknown_optab |
4ee4c52c JJ |
806 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
807 | return NULL_TREE; | |
808 | tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); | |
809 | cur_op = add_rshift (gsi, type, tem, post_shifts); | |
810 | if (cur_op == NULL_TREE) | |
811 | return NULL_TREE; | |
812 | break; | |
813 | case 2: | |
814 | case 3: | |
815 | case 4: | |
816 | case 5: | |
c9ba3307 | 817 | /* t1 = oprnd0 h* ml; |
4ee4c52c JJ |
818 | t2 = t1; [ iff (mode & 2) != 0 ] |
819 | t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] | |
820 | t3 = t2 >> post_shift; | |
821 | t4 = oprnd0 >> (prec - 1); | |
822 | q = t3 - t4; [ iff (mode & 1) == 0 ] | |
823 | q = t4 - t3; [ iff (mode & 1) != 0 ] */ | |
824 | if ((mode & 2) == 0) | |
825 | { | |
826 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 827 | if (op == unknown_optab |
4ee4c52c JJ |
828 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
829 | return NULL_TREE; | |
830 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); | |
831 | } | |
832 | cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
833 | if (cur_op == NULL_TREE) | |
834 | return NULL_TREE; | |
835 | tem = add_rshift (gsi, type, op0, shift_temps); | |
836 | if (tem == NULL_TREE) | |
837 | return NULL_TREE; | |
838 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
2225b9f2 | 839 | if (op == unknown_optab |
4ee4c52c JJ |
840 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
841 | return NULL_TREE; | |
842 | if ((mode & 1) == 0) | |
843 | cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); | |
844 | else | |
845 | cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); | |
846 | break; | |
847 | default: | |
848 | gcc_unreachable (); | |
849 | } | |
850 | ||
851 | if (code == TRUNC_DIV_EXPR) | |
852 | return cur_op; | |
853 | ||
854 | /* We divided. Now finish by: | |
855 | t1 = q * oprnd1; | |
856 | r = oprnd0 - t1; */ | |
857 | op = optab_for_tree_code (MULT_EXPR, type, optab_default); | |
2225b9f2 | 858 | if (op == unknown_optab |
4ee4c52c JJ |
859 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
860 | return NULL_TREE; | |
861 | tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); | |
862 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
2225b9f2 | 863 | if (op == unknown_optab |
4ee4c52c JJ |
864 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
865 | return NULL_TREE; | |
866 | return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); | |
867 | } | |
868 | ||
374ab2d7 MG |
869 | /* Expand a vector condition to scalars, by using many conditions |
870 | on the vector's elements. */ | |
871 | static void | |
872 | expand_vector_condition (gimple_stmt_iterator *gsi) | |
873 | { | |
538dd0b7 | 874 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); |
374ab2d7 MG |
875 | tree type = gimple_expr_type (stmt); |
876 | tree a = gimple_assign_rhs1 (stmt); | |
877 | tree a1 = a; | |
486208e7 | 878 | tree a2 = NULL_TREE; |
374ab2d7 MG |
879 | bool a_is_comparison = false; |
880 | tree b = gimple_assign_rhs2 (stmt); | |
881 | tree c = gimple_assign_rhs3 (stmt); | |
9771b263 | 882 | vec<constructor_elt, va_gc> *v; |
374ab2d7 MG |
883 | tree constr; |
884 | tree inner_type = TREE_TYPE (type); | |
885 | tree cond_type = TREE_TYPE (TREE_TYPE (a)); | |
886 | tree comp_inner_type = cond_type; | |
887 | tree width = TYPE_SIZE (inner_type); | |
888 | tree index = bitsize_int (0); | |
0f3f4ffe JJ |
889 | tree comp_width = width; |
890 | tree comp_index = index; | |
374ab2d7 MG |
891 | int i; |
892 | location_t loc = gimple_location (gsi_stmt (*gsi)); | |
893 | ||
784fb9b3 | 894 | if (!is_gimple_val (a)) |
374ab2d7 MG |
895 | { |
896 | gcc_assert (COMPARISON_CLASS_P (a)); | |
897 | a_is_comparison = true; | |
898 | a1 = TREE_OPERAND (a, 0); | |
899 | a2 = TREE_OPERAND (a, 1); | |
900 | comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); | |
0f3f4ffe | 901 | comp_width = TYPE_SIZE (comp_inner_type); |
374ab2d7 MG |
902 | } |
903 | ||
96592eed | 904 | if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) |
374ab2d7 MG |
905 | return; |
906 | ||
0f3f4ffe JJ |
907 | /* Handle vector boolean types with bitmasks. If there is a comparison |
908 | and we can expand the comparison into the vector boolean bitmask, | |
909 | or otherwise if it is compatible with type, we can transform | |
910 | vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5; | |
911 | into | |
912 | tmp_6 = x_2 < y_3; | |
913 | tmp_7 = tmp_6 & vbfld_4; | |
914 | tmp_8 = ~tmp_6; | |
915 | tmp_9 = tmp_8 & vbfld_5; | |
916 | vbfld_1 = tmp_7 | tmp_9; | |
917 | Similarly for vbfld_10 instead of x_2 < y_3. */ | |
918 | if (VECTOR_BOOLEAN_TYPE_P (type) | |
919 | && SCALAR_INT_MODE_P (TYPE_MODE (type)) | |
920 | && (GET_MODE_BITSIZE (TYPE_MODE (type)) | |
921 | < (TYPE_VECTOR_SUBPARTS (type) | |
922 | * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))))) | |
923 | && (a_is_comparison | |
924 | ? useless_type_conversion_p (type, TREE_TYPE (a)) | |
925 | : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) | |
926 | { | |
927 | if (a_is_comparison) | |
928 | a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); | |
929 | a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); | |
930 | a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); | |
931 | a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); | |
932 | a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2); | |
933 | gimple_assign_set_rhs_from_tree (gsi, a); | |
934 | update_stmt (gsi_stmt (*gsi)); | |
935 | return; | |
936 | } | |
937 | ||
374ab2d7 MG |
938 | /* TODO: try and find a smaller vector type. */ |
939 | ||
940 | warning_at (loc, OPT_Wvector_operation_performance, | |
941 | "vector condition will be expanded piecewise"); | |
942 | ||
22afc2b3 | 943 | int nunits = nunits_for_known_piecewise_op (type); |
9771b263 | 944 | vec_alloc (v, nunits); |
0f3f4ffe | 945 | for (i = 0; i < nunits; i++) |
374ab2d7 MG |
946 | { |
947 | tree aa, result; | |
8f66e7dc RH |
948 | tree bb = tree_vec_extract (gsi, inner_type, b, width, index); |
949 | tree cc = tree_vec_extract (gsi, inner_type, c, width, index); | |
374ab2d7 MG |
950 | if (a_is_comparison) |
951 | { | |
0f3f4ffe JJ |
952 | tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, |
953 | comp_width, comp_index); | |
954 | tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, | |
955 | comp_width, comp_index); | |
2fac8c14 | 956 | aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); |
374ab2d7 MG |
957 | } |
958 | else | |
8f66e7dc | 959 | aa = tree_vec_extract (gsi, cond_type, a, width, index); |
374ab2d7 MG |
960 | result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); |
961 | constructor_elt ce = {NULL_TREE, result}; | |
9771b263 | 962 | v->quick_push (ce); |
0f3f4ffe JJ |
963 | index = int_const_binop (PLUS_EXPR, index, width); |
964 | if (width == comp_width) | |
965 | comp_index = index; | |
966 | else | |
967 | comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width); | |
374ab2d7 MG |
968 | } |
969 | ||
970 | constr = build_constructor (type, v); | |
971 | gimple_assign_set_rhs_from_tree (gsi, constr); | |
972 | update_stmt (gsi_stmt (*gsi)); | |
973 | } | |
974 | ||
2b725155 | 975 | static tree |
726a989a | 976 | expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, |
538dd0b7 | 977 | gassign *assign, enum tree_code code) |
2b725155 | 978 | { |
ef4bddc2 | 979 | machine_mode compute_mode = TYPE_MODE (compute_type); |
2b725155 RH |
980 | |
981 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
982 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
983 | to expand the operations in parallel. */ | |
984 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
325217ed CF |
985 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT |
986 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
987 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
988 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
989 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
2b725155 RH |
990 | switch (code) |
991 | { | |
992 | case PLUS_EXPR: | |
993 | case MINUS_EXPR: | |
20bd649a | 994 | if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) |
cdbb5ba3 AS |
995 | return expand_vector_addition (gsi, do_binop, do_plus_minus, type, |
996 | gimple_assign_rhs1 (assign), | |
726a989a | 997 | gimple_assign_rhs2 (assign), code); |
2b725155 RH |
998 | break; |
999 | ||
1000 | case NEGATE_EXPR: | |
20bd649a | 1001 | if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) |
726a989a RB |
1002 | return expand_vector_addition (gsi, do_unop, do_negate, type, |
1003 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
1004 | NULL_TREE, code); |
1005 | break; | |
1006 | ||
1007 | case BIT_AND_EXPR: | |
1008 | case BIT_IOR_EXPR: | |
1009 | case BIT_XOR_EXPR: | |
726a989a RB |
1010 | return expand_vector_parallel (gsi, do_binop, type, |
1011 | gimple_assign_rhs1 (assign), | |
1012 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
1013 | |
1014 | case BIT_NOT_EXPR: | |
726a989a RB |
1015 | return expand_vector_parallel (gsi, do_unop, type, |
1016 | gimple_assign_rhs1 (assign), | |
d246ab4f AS |
1017 | NULL_TREE, code); |
1018 | case EQ_EXPR: | |
1019 | case NE_EXPR: | |
1020 | case GT_EXPR: | |
1021 | case LT_EXPR: | |
1022 | case GE_EXPR: | |
1023 | case LE_EXPR: | |
1024 | case UNEQ_EXPR: | |
1025 | case UNGT_EXPR: | |
1026 | case UNLT_EXPR: | |
1027 | case UNGE_EXPR: | |
1028 | case UNLE_EXPR: | |
1029 | case LTGT_EXPR: | |
1030 | case ORDERED_EXPR: | |
1031 | case UNORDERED_EXPR: | |
1032 | { | |
1033 | tree rhs1 = gimple_assign_rhs1 (assign); | |
1034 | tree rhs2 = gimple_assign_rhs2 (assign); | |
2b725155 | 1035 | |
d246ab4f AS |
1036 | return expand_vector_comparison (gsi, type, rhs1, rhs2, code); |
1037 | } | |
4ee4c52c JJ |
1038 | |
1039 | case TRUNC_DIV_EXPR: | |
1040 | case TRUNC_MOD_EXPR: | |
1041 | { | |
1042 | tree rhs1 = gimple_assign_rhs1 (assign); | |
1043 | tree rhs2 = gimple_assign_rhs2 (assign); | |
1044 | tree ret; | |
1045 | ||
1046 | if (!optimize | |
1047 | || !VECTOR_INTEGER_TYPE_P (type) | |
2b332829 JJ |
1048 | || TREE_CODE (rhs2) != VECTOR_CST |
1049 | || !VECTOR_MODE_P (TYPE_MODE (type))) | |
4ee4c52c JJ |
1050 | break; |
1051 | ||
1052 | ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); | |
1053 | if (ret != NULL_TREE) | |
1054 | return ret; | |
1055 | break; | |
1056 | } | |
1057 | ||
2b725155 RH |
1058 | default: |
1059 | break; | |
1060 | } | |
1061 | ||
1062 | if (TREE_CODE_CLASS (code) == tcc_unary) | |
726a989a RB |
1063 | return expand_vector_piecewise (gsi, do_unop, type, compute_type, |
1064 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
1065 | NULL_TREE, code); |
1066 | else | |
726a989a RB |
1067 | return expand_vector_piecewise (gsi, do_binop, type, compute_type, |
1068 | gimple_assign_rhs1 (assign), | |
1069 | gimple_assign_rhs2 (assign), code); | |
2b725155 | 1070 | } |
ce7e41fc JJ |
1071 | |
1072 | /* Try to optimize | |
1073 | a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 }; | |
1074 | style stmts into: | |
1075 | _9 = { b_7, b_7, b_7, b_7 }; | |
1076 | a_5 = _9 + { 0, 3, 6, 9 }; | |
1077 | because vector splat operation is usually more efficient | |
1078 | than piecewise initialization of the vector. */ | |
1079 | ||
1080 | static void | |
1081 | optimize_vector_constructor (gimple_stmt_iterator *gsi) | |
1082 | { | |
538dd0b7 | 1083 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); |
ce7e41fc JJ |
1084 | tree lhs = gimple_assign_lhs (stmt); |
1085 | tree rhs = gimple_assign_rhs1 (stmt); | |
1086 | tree type = TREE_TYPE (rhs); | |
1087 | unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type); | |
1088 | bool all_same = true; | |
1089 | constructor_elt *elt; | |
355fe088 | 1090 | gimple *g; |
ce7e41fc | 1091 | tree base = NULL_TREE; |
1f254157 | 1092 | optab op; |
ce7e41fc JJ |
1093 | |
1094 | if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts) | |
1095 | return; | |
1f254157 JJ |
1096 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); |
1097 | if (op == unknown_optab | |
1098 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
1099 | return; | |
ce7e41fc JJ |
1100 | FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt) |
1101 | if (TREE_CODE (elt->value) != SSA_NAME | |
1102 | || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) | |
1103 | return; | |
1104 | else | |
1105 | { | |
1106 | tree this_base = elt->value; | |
1107 | if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value) | |
1108 | all_same = false; | |
1109 | for (j = 0; j < nelts + 1; j++) | |
1110 | { | |
1111 | g = SSA_NAME_DEF_STMT (this_base); | |
1112 | if (is_gimple_assign (g) | |
1113 | && gimple_assign_rhs_code (g) == PLUS_EXPR | |
1114 | && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST | |
1115 | && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME | |
1116 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) | |
1117 | this_base = gimple_assign_rhs1 (g); | |
1118 | else | |
1119 | break; | |
1120 | } | |
1121 | if (i == 0) | |
1122 | base = this_base; | |
1123 | else if (this_base != base) | |
1124 | return; | |
1125 | } | |
1126 | if (all_same) | |
1127 | return; | |
5ebaa477 | 1128 | tree_vector_builder cst (type, nelts, 1); |
ce7e41fc JJ |
1129 | for (i = 0; i < nelts; i++) |
1130 | { | |
794e3180 RS |
1131 | tree this_base = CONSTRUCTOR_ELT (rhs, i)->value; |
1132 | tree elt = build_zero_cst (TREE_TYPE (base)); | |
ce7e41fc JJ |
1133 | while (this_base != base) |
1134 | { | |
1135 | g = SSA_NAME_DEF_STMT (this_base); | |
794e3180 RS |
1136 | elt = fold_binary (PLUS_EXPR, TREE_TYPE (base), |
1137 | elt, gimple_assign_rhs2 (g)); | |
1138 | if (elt == NULL_TREE | |
1139 | || TREE_CODE (elt) != INTEGER_CST | |
1140 | || TREE_OVERFLOW (elt)) | |
ce7e41fc JJ |
1141 | return; |
1142 | this_base = gimple_assign_rhs1 (g); | |
1143 | } | |
794e3180 | 1144 | cst.quick_push (elt); |
ce7e41fc JJ |
1145 | } |
1146 | for (i = 0; i < nelts; i++) | |
1147 | CONSTRUCTOR_ELT (rhs, i)->value = base; | |
b731b390 | 1148 | g = gimple_build_assign (make_ssa_name (type), rhs); |
ce7e41fc | 1149 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
0d0e4a03 | 1150 | g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g), |
5ebaa477 | 1151 | cst.build ()); |
ce7e41fc JJ |
1152 | gsi_replace (gsi, g, false); |
1153 | } | |
2b725155 | 1154 | \f |
995ec132 RG |
1155 | /* Return a type for the widest vector mode whose components are of type |
1156 | TYPE, or NULL_TREE if none is found. */ | |
325217ed | 1157 | |
2b725155 | 1158 | static tree |
995ec132 | 1159 | type_for_widest_vector_mode (tree type, optab op) |
2b725155 | 1160 | { |
ef4bddc2 RS |
1161 | machine_mode inner_mode = TYPE_MODE (type); |
1162 | machine_mode best_mode = VOIDmode, mode; | |
2b725155 RH |
1163 | int best_nunits = 0; |
1164 | ||
3d8bf70f | 1165 | if (SCALAR_FLOAT_MODE_P (inner_mode)) |
2b725155 | 1166 | mode = MIN_MODE_VECTOR_FLOAT; |
325217ed CF |
1167 | else if (SCALAR_FRACT_MODE_P (inner_mode)) |
1168 | mode = MIN_MODE_VECTOR_FRACT; | |
1169 | else if (SCALAR_UFRACT_MODE_P (inner_mode)) | |
1170 | mode = MIN_MODE_VECTOR_UFRACT; | |
1171 | else if (SCALAR_ACCUM_MODE_P (inner_mode)) | |
1172 | mode = MIN_MODE_VECTOR_ACCUM; | |
1173 | else if (SCALAR_UACCUM_MODE_P (inner_mode)) | |
1174 | mode = MIN_MODE_VECTOR_UACCUM; | |
2b725155 RH |
1175 | else |
1176 | mode = MIN_MODE_VECTOR_INT; | |
1177 | ||
c94843d2 | 1178 | FOR_EACH_MODE_FROM (mode, mode) |
2b725155 RH |
1179 | if (GET_MODE_INNER (mode) == inner_mode |
1180 | && GET_MODE_NUNITS (mode) > best_nunits | |
947131ba | 1181 | && optab_handler (op, mode) != CODE_FOR_nothing) |
2b725155 RH |
1182 | best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); |
1183 | ||
1184 | if (best_mode == VOIDmode) | |
1185 | return NULL_TREE; | |
1186 | else | |
995ec132 | 1187 | return build_vector_type_for_mode (type, best_mode); |
2b725155 RH |
1188 | } |
1189 | ||
f90e8e2e AS |
1190 | |
1191 | /* Build a reference to the element of the vector VECT. Function | |
1192 | returns either the element itself, either BIT_FIELD_REF, or an | |
1193 | ARRAY_REF expression. | |
1194 | ||
073a8998 | 1195 | GSI is required to insert temporary variables while building a |
f90e8e2e AS |
1196 | refernece to the element of the vector VECT. |
1197 | ||
1198 | PTMPVEC is a pointer to the temporary variable for caching | |
1199 | purposes. In case when PTMPVEC is NULL new temporary variable | |
1200 | will be created. */ | |
1201 | static tree | |
1202 | vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) | |
1203 | { | |
067f5960 | 1204 | tree vect_type, vect_elt_type; |
355fe088 | 1205 | gimple *asgn; |
f90e8e2e AS |
1206 | tree tmpvec; |
1207 | tree arraytype; | |
1208 | bool need_asgn = true; | |
067f5960 | 1209 | unsigned int elements; |
f90e8e2e | 1210 | |
067f5960 RH |
1211 | vect_type = TREE_TYPE (vect); |
1212 | vect_elt_type = TREE_TYPE (vect_type); | |
22afc2b3 | 1213 | elements = nunits_for_known_piecewise_op (vect_type); |
f90e8e2e | 1214 | |
f90e8e2e AS |
1215 | if (TREE_CODE (idx) == INTEGER_CST) |
1216 | { | |
1217 | unsigned HOST_WIDE_INT index; | |
1218 | ||
067f5960 RH |
1219 | /* Given that we're about to compute a binary modulus, |
1220 | we don't care about the high bits of the value. */ | |
1221 | index = TREE_INT_CST_LOW (idx); | |
cc269bb6 | 1222 | if (!tree_fits_uhwi_p (idx) || index >= elements) |
067f5960 RH |
1223 | { |
1224 | index &= elements - 1; | |
1225 | idx = build_int_cst (TREE_TYPE (idx), index); | |
1226 | } | |
f90e8e2e | 1227 | |
bc622b2a RG |
1228 | /* When lowering a vector statement sequence do some easy |
1229 | simplification by looking through intermediate vector results. */ | |
1230 | if (TREE_CODE (vect) == SSA_NAME) | |
1231 | { | |
355fe088 | 1232 | gimple *def_stmt = SSA_NAME_DEF_STMT (vect); |
bc622b2a RG |
1233 | if (is_gimple_assign (def_stmt) |
1234 | && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST | |
1235 | || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) | |
1236 | vect = gimple_assign_rhs1 (def_stmt); | |
1237 | } | |
1238 | ||
f90e8e2e | 1239 | if (TREE_CODE (vect) == VECTOR_CST) |
d2a12ae7 | 1240 | return VECTOR_CST_ELT (vect, index); |
4a2c20cc JJ |
1241 | else if (TREE_CODE (vect) == CONSTRUCTOR |
1242 | && (CONSTRUCTOR_NELTS (vect) == 0 | |
1243 | || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) | |
1244 | != VECTOR_TYPE)) | |
f90e8e2e | 1245 | { |
4a2c20cc JJ |
1246 | if (index < CONSTRUCTOR_NELTS (vect)) |
1247 | return CONSTRUCTOR_ELT (vect, index)->value; | |
067f5960 | 1248 | return build_zero_cst (vect_elt_type); |
f90e8e2e | 1249 | } |
067f5960 | 1250 | else |
f90e8e2e | 1251 | { |
067f5960 | 1252 | tree size = TYPE_SIZE (vect_elt_type); |
26a7fca2 JJ |
1253 | tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), |
1254 | size); | |
1255 | return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); | |
f90e8e2e | 1256 | } |
f90e8e2e AS |
1257 | } |
1258 | ||
1259 | if (!ptmpvec) | |
067f5960 | 1260 | tmpvec = create_tmp_var (vect_type, "vectmp"); |
f90e8e2e | 1261 | else if (!*ptmpvec) |
067f5960 | 1262 | tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); |
f90e8e2e AS |
1263 | else |
1264 | { | |
1265 | tmpvec = *ptmpvec; | |
1266 | need_asgn = false; | |
1267 | } | |
1268 | ||
1269 | if (need_asgn) | |
1270 | { | |
1271 | TREE_ADDRESSABLE (tmpvec) = 1; | |
1272 | asgn = gimple_build_assign (tmpvec, vect); | |
1273 | gsi_insert_before (gsi, asgn, GSI_SAME_STMT); | |
1274 | } | |
1275 | ||
067f5960 RH |
1276 | arraytype = build_array_type_nelts (vect_elt_type, elements); |
1277 | return build4 (ARRAY_REF, vect_elt_type, | |
f90e8e2e AS |
1278 | build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), |
1279 | idx, NULL_TREE, NULL_TREE); | |
1280 | } | |
1281 | ||
2205ed25 | 1282 | /* Check if VEC_PERM_EXPR within the given setting is supported |
067f5960 | 1283 | by hardware, or lower it piecewise. |
f90e8e2e | 1284 | |
2205ed25 RH |
1285 | When VEC_PERM_EXPR has the same first and second operands: |
1286 | VEC_PERM_EXPR <v0, v0, mask> the lowered version would be | |
f90e8e2e AS |
1287 | {v0[mask[0]], v0[mask[1]], ...} |
1288 | MASK and V0 must have the same number of elements. | |
1289 | ||
2205ed25 | 1290 | Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to |
f90e8e2e AS |
1291 | {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} |
1292 | V0 and V1 must have the same type. MASK, V0, V1 must have the | |
1293 | same number of arguments. */ | |
f90e8e2e | 1294 | |
067f5960 | 1295 | static void |
2205ed25 | 1296 | lower_vec_perm (gimple_stmt_iterator *gsi) |
067f5960 | 1297 | { |
538dd0b7 | 1298 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); |
f90e8e2e AS |
1299 | tree mask = gimple_assign_rhs3 (stmt); |
1300 | tree vec0 = gimple_assign_rhs1 (stmt); | |
1301 | tree vec1 = gimple_assign_rhs2 (stmt); | |
067f5960 RH |
1302 | tree vect_type = TREE_TYPE (vec0); |
1303 | tree mask_type = TREE_TYPE (mask); | |
1304 | tree vect_elt_type = TREE_TYPE (vect_type); | |
1305 | tree mask_elt_type = TREE_TYPE (mask_type); | |
1306 | unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
9771b263 | 1307 | vec<constructor_elt, va_gc> *v; |
067f5960 RH |
1308 | tree constr, t, si, i_val; |
1309 | tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; | |
1310 | bool two_operand_p = !operand_equal_p (vec0, vec1, 0); | |
cdbb5ba3 | 1311 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
067f5960 | 1312 | unsigned i; |
f90e8e2e | 1313 | |
273d260f RR |
1314 | if (TREE_CODE (mask) == SSA_NAME) |
1315 | { | |
355fe088 | 1316 | gimple *def_stmt = SSA_NAME_DEF_STMT (mask); |
273d260f RR |
1317 | if (is_gimple_assign (def_stmt) |
1318 | && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) | |
1319 | mask = gimple_assign_rhs1 (def_stmt); | |
1320 | } | |
1321 | ||
e3342de4 | 1322 | vec_perm_builder sel_int; |
22e4dee7 | 1323 | |
e3342de4 RS |
1324 | if (TREE_CODE (mask) == VECTOR_CST |
1325 | && tree_to_vec_perm_builder (&sel_int, mask)) | |
1326 | { | |
1327 | vec_perm_indices indices (sel_int, 2, elements); | |
1328 | if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices)) | |
273d260f RR |
1329 | { |
1330 | gimple_assign_set_rhs3 (stmt, mask); | |
1331 | update_stmt (stmt); | |
1332 | return; | |
1333 | } | |
3788cfb5 JJ |
1334 | /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero |
1335 | vector as VEC1 and a right element shift MASK. */ | |
1336 | if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type)) | |
1337 | != CODE_FOR_nothing | |
1338 | && TREE_CODE (vec1) == VECTOR_CST | |
1339 | && initializer_zerop (vec1) | |
6b0630fb RS |
1340 | && maybe_ne (indices[0], 0) |
1341 | && known_lt (indices[0], elements)) | |
3788cfb5 | 1342 | { |
d3867483 RS |
1343 | bool ok_p = indices.series_p (0, 1, indices[0], 1); |
1344 | if (!ok_p) | |
3788cfb5 | 1345 | { |
d3867483 RS |
1346 | for (i = 1; i < elements; ++i) |
1347 | { | |
6b0630fb | 1348 | poly_int64 expected = i + indices[0]; |
d3867483 | 1349 | /* Indices into the second vector are all equivalent. */ |
6b0630fb RS |
1350 | if (maybe_lt (indices[i], elements) |
1351 | ? maybe_ne (indices[i], expected) | |
1352 | : maybe_lt (expected, elements)) | |
d3867483 RS |
1353 | break; |
1354 | } | |
1355 | ok_p = i == elements; | |
3788cfb5 | 1356 | } |
d3867483 | 1357 | if (ok_p) |
3788cfb5 JJ |
1358 | { |
1359 | gimple_assign_set_rhs3 (stmt, mask); | |
1360 | update_stmt (stmt); | |
1361 | return; | |
1362 | } | |
1363 | } | |
22e4dee7 | 1364 | } |
7ac7e286 | 1365 | else if (can_vec_perm_var_p (TYPE_MODE (vect_type))) |
067f5960 | 1366 | return; |
cdbb5ba3 AS |
1367 | |
1368 | warning_at (loc, OPT_Wvector_operation_performance, | |
1369 | "vector shuffling operation will be expanded piecewise"); | |
1370 | ||
9771b263 | 1371 | vec_alloc (v, elements); |
067f5960 | 1372 | for (i = 0; i < elements; i++) |
f90e8e2e | 1373 | { |
067f5960 RH |
1374 | si = size_int (i); |
1375 | i_val = vector_element (gsi, mask, si, &masktmp); | |
f90e8e2e | 1376 | |
067f5960 | 1377 | if (TREE_CODE (i_val) == INTEGER_CST) |
f90e8e2e | 1378 | { |
067f5960 | 1379 | unsigned HOST_WIDE_INT index; |
f90e8e2e | 1380 | |
067f5960 | 1381 | index = TREE_INT_CST_LOW (i_val); |
cc269bb6 | 1382 | if (!tree_fits_uhwi_p (i_val) || index >= elements) |
067f5960 | 1383 | i_val = build_int_cst (mask_elt_type, index & (elements - 1)); |
f90e8e2e | 1384 | |
067f5960 RH |
1385 | if (two_operand_p && (index & elements) != 0) |
1386 | t = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1387 | else | |
1388 | t = vector_element (gsi, vec0, i_val, &vec0tmp); | |
f90e8e2e | 1389 | |
067f5960 RH |
1390 | t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, |
1391 | true, GSI_SAME_STMT); | |
f90e8e2e | 1392 | } |
067f5960 | 1393 | else |
f90e8e2e | 1394 | { |
067f5960 RH |
1395 | tree cond = NULL_TREE, v0_val; |
1396 | ||
1397 | if (two_operand_p) | |
1398 | { | |
1399 | cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1400 | build_int_cst (mask_elt_type, elements)); | |
1401 | cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1402 | true, GSI_SAME_STMT); | |
1403 | } | |
1404 | ||
1405 | i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1406 | build_int_cst (mask_elt_type, elements - 1)); | |
1407 | i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, | |
1408 | true, GSI_SAME_STMT); | |
1409 | ||
1410 | v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); | |
1411 | v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, | |
1412 | true, GSI_SAME_STMT); | |
1413 | ||
1414 | if (two_operand_p) | |
1415 | { | |
1416 | tree v1_val; | |
1417 | ||
1418 | v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1419 | v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, | |
1420 | true, GSI_SAME_STMT); | |
1421 | ||
1422 | cond = fold_build2 (EQ_EXPR, boolean_type_node, | |
1423 | cond, build_zero_cst (mask_elt_type)); | |
1424 | cond = fold_build3 (COND_EXPR, vect_elt_type, | |
1425 | cond, v0_val, v1_val); | |
1426 | t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1427 | true, GSI_SAME_STMT); | |
f90e8e2e | 1428 | } |
067f5960 RH |
1429 | else |
1430 | t = v0_val; | |
f90e8e2e | 1431 | } |
067f5960 | 1432 | |
4a2c20cc | 1433 | CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); |
f90e8e2e AS |
1434 | } |
1435 | ||
067f5960 | 1436 | constr = build_constructor (vect_type, v); |
f90e8e2e | 1437 | gimple_assign_set_rhs_from_tree (gsi, constr); |
067f5960 | 1438 | update_stmt (gsi_stmt (*gsi)); |
f90e8e2e AS |
1439 | } |
1440 | ||
975b6ff3 RB |
1441 | /* If OP is a uniform vector return the element it is a splat from. */ |
1442 | ||
1443 | static tree | |
1444 | ssa_uniform_vector_p (tree op) | |
1445 | { | |
1446 | if (TREE_CODE (op) == VECTOR_CST | |
be4c1d4a | 1447 | || TREE_CODE (op) == VEC_DUPLICATE_EXPR |
975b6ff3 RB |
1448 | || TREE_CODE (op) == CONSTRUCTOR) |
1449 | return uniform_vector_p (op); | |
1450 | if (TREE_CODE (op) == SSA_NAME) | |
1451 | { | |
1452 | gimple *def_stmt = SSA_NAME_DEF_STMT (op); | |
1453 | if (gimple_assign_single_p (def_stmt)) | |
1454 | return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); | |
1455 | } | |
1456 | return NULL_TREE; | |
1457 | } | |
1458 | ||
a4ee446d JJ |
1459 | /* Return type in which CODE operation with optab OP can be |
1460 | computed. */ | |
1461 | ||
1462 | static tree | |
1463 | get_compute_type (enum tree_code code, optab op, tree type) | |
1464 | { | |
1465 | /* For very wide vectors, try using a smaller vector mode. */ | |
1466 | tree compute_type = type; | |
1467 | if (op | |
1468 | && (!VECTOR_MODE_P (TYPE_MODE (type)) | |
1469 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)) | |
1470 | { | |
1471 | tree vector_compute_type | |
1472 | = type_for_widest_vector_mode (TREE_TYPE (type), op); | |
1473 | if (vector_compute_type != NULL_TREE | |
22afc2b3 | 1474 | && subparts_gt (compute_type, vector_compute_type) |
bd522678 | 1475 | && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1 |
a4ee446d JJ |
1476 | && (optab_handler (op, TYPE_MODE (vector_compute_type)) |
1477 | != CODE_FOR_nothing)) | |
1478 | compute_type = vector_compute_type; | |
1479 | } | |
1480 | ||
1481 | /* If we are breaking a BLKmode vector into smaller pieces, | |
1482 | type_for_widest_vector_mode has already looked into the optab, | |
1483 | so skip these checks. */ | |
1484 | if (compute_type == type) | |
1485 | { | |
ef4bddc2 | 1486 | machine_mode compute_mode = TYPE_MODE (compute_type); |
a4ee446d JJ |
1487 | if (VECTOR_MODE_P (compute_mode)) |
1488 | { | |
1489 | if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) | |
1490 | return compute_type; | |
1491 | if (code == MULT_HIGHPART_EXPR | |
1492 | && can_mult_highpart_p (compute_mode, | |
1493 | TYPE_UNSIGNED (compute_type))) | |
1494 | return compute_type; | |
1495 | } | |
1496 | /* There is no operation in hardware, so fall back to scalars. */ | |
1497 | compute_type = TREE_TYPE (type); | |
1498 | } | |
1499 | ||
1500 | return compute_type; | |
1501 | } | |
1502 | ||
f8c29d98 JJ |
1503 | static tree |
1504 | do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
9f47c7e5 IE |
1505 | tree bitpos, tree bitsize, enum tree_code code, |
1506 | tree type ATTRIBUTE_UNUSED) | |
f8c29d98 JJ |
1507 | { |
1508 | if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) | |
8f66e7dc | 1509 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
f8c29d98 | 1510 | if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) |
8f66e7dc | 1511 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); |
f8c29d98 | 1512 | tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); |
3826795b | 1513 | return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b); |
f8c29d98 JJ |
1514 | } |
1515 | ||
1516 | /* Expand a vector COND_EXPR to scalars, piecewise. */ | |
1517 | static void | |
1518 | expand_vector_scalar_condition (gimple_stmt_iterator *gsi) | |
1519 | { | |
1520 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); | |
1521 | tree type = gimple_expr_type (stmt); | |
1522 | tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); | |
1523 | machine_mode compute_mode = TYPE_MODE (compute_type); | |
1524 | gcc_assert (compute_mode != BLKmode); | |
1525 | tree lhs = gimple_assign_lhs (stmt); | |
1526 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
1527 | tree rhs3 = gimple_assign_rhs3 (stmt); | |
1528 | tree new_rhs; | |
1529 | ||
1530 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
1531 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
1532 | to expand the operations in parallel. */ | |
1533 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
1534 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT | |
1535 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
1536 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
1537 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
1538 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
1539 | new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, | |
1540 | COND_EXPR); | |
1541 | else | |
1542 | new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, | |
1543 | rhs2, rhs3, COND_EXPR); | |
1544 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) | |
1545 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
1546 | new_rhs); | |
1547 | ||
1548 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
1549 | way to do it is change expand_vector_operation and its callees to | |
1550 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
1551 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
1552 | update_stmt (gsi_stmt (*gsi)); | |
1553 | } | |
1554 | ||
2b725155 RH |
1555 | /* Process one statement. If we identify a vector operation, expand it. */ |
1556 | ||
1557 | static void | |
726a989a | 1558 | expand_vector_operations_1 (gimple_stmt_iterator *gsi) |
2b725155 | 1559 | { |
a4ee446d | 1560 | tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE; |
2b725155 | 1561 | enum tree_code code; |
2225b9f2 | 1562 | optab op = unknown_optab; |
726a989a RB |
1563 | enum gimple_rhs_class rhs_class; |
1564 | tree new_rhs; | |
2b725155 | 1565 | |
538dd0b7 DM |
1566 | /* Only consider code == GIMPLE_ASSIGN. */ |
1567 | gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); | |
1568 | if (!stmt) | |
726a989a | 1569 | return; |
2b725155 | 1570 | |
726a989a RB |
1571 | code = gimple_assign_rhs_code (stmt); |
1572 | rhs_class = get_gimple_rhs_class (code); | |
d246ab4f | 1573 | lhs = gimple_assign_lhs (stmt); |
2b725155 | 1574 | |
2205ed25 | 1575 | if (code == VEC_PERM_EXPR) |
f90e8e2e | 1576 | { |
2205ed25 | 1577 | lower_vec_perm (gsi); |
067f5960 | 1578 | return; |
f90e8e2e AS |
1579 | } |
1580 | ||
374ab2d7 MG |
1581 | if (code == VEC_COND_EXPR) |
1582 | { | |
1583 | expand_vector_condition (gsi); | |
1584 | return; | |
1585 | } | |
ce7e41fc | 1586 | |
f8c29d98 JJ |
1587 | if (code == COND_EXPR |
1588 | && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE | |
1589 | && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) | |
1590 | { | |
1591 | expand_vector_scalar_condition (gsi); | |
1592 | return; | |
1593 | } | |
1594 | ||
ce7e41fc JJ |
1595 | if (code == CONSTRUCTOR |
1596 | && TREE_CODE (lhs) == SSA_NAME | |
1597 | && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) | |
1598 | && !gimple_clobber_p (stmt) | |
1599 | && optimize) | |
1600 | { | |
1601 | optimize_vector_constructor (gsi); | |
1602 | return; | |
1603 | } | |
1604 | ||
726a989a RB |
1605 | if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) |
1606 | return; | |
2b725155 | 1607 | |
726a989a RB |
1608 | rhs1 = gimple_assign_rhs1 (stmt); |
1609 | type = gimple_expr_type (stmt); | |
1610 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1611 | rhs2 = gimple_assign_rhs2 (stmt); | |
2b725155 | 1612 | |
9adab579 RS |
1613 | if (!VECTOR_TYPE_P (type) |
1614 | || !VECTOR_TYPE_P (TREE_TYPE (rhs1))) | |
2b725155 RH |
1615 | return; |
1616 | ||
975b6ff3 RB |
1617 | /* If the vector operation is operating on all same vector elements |
1618 | implement it with a scalar operation and a splat if the target | |
1619 | supports the scalar operation. */ | |
1620 | tree srhs1, srhs2 = NULL_TREE; | |
1621 | if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE | |
1622 | && (rhs2 == NULL_TREE | |
0b3ecf75 RB |
1623 | || (! VECTOR_TYPE_P (TREE_TYPE (rhs2)) |
1624 | && (srhs2 = rhs2)) | |
975b6ff3 RB |
1625 | || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE) |
1626 | /* As we query direct optabs restrict to non-convert operations. */ | |
1627 | && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1))) | |
1628 | { | |
1629 | op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar); | |
cbf3bf32 | 1630 | if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB |
bfccadc9 | 1631 | && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing) |
975b6ff3 RB |
1632 | { |
1633 | tree slhs = make_ssa_name (TREE_TYPE (srhs1)); | |
1634 | gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2); | |
1635 | gsi_insert_before (gsi, repl, GSI_SAME_STMT); | |
1636 | gimple_assign_set_rhs_from_tree (gsi, | |
1637 | build_vector_from_val (type, slhs)); | |
1638 | update_stmt (stmt); | |
1639 | return; | |
1640 | } | |
1641 | } | |
1642 | ||
9f47c7e5 IE |
1643 | /* A scalar operation pretending to be a vector one. */ |
1644 | if (VECTOR_BOOLEAN_TYPE_P (type) | |
1645 | && !VECTOR_MODE_P (TYPE_MODE (type)) | |
1646 | && TYPE_MODE (type) != BLKmode) | |
1647 | return; | |
1648 | ||
625a9766 | 1649 | if (CONVERT_EXPR_CODE_P (code) |
f57d17f1 TM |
1650 | || code == FLOAT_EXPR |
1651 | || code == FIX_TRUNC_EXPR | |
1652 | || code == VIEW_CONVERT_EXPR) | |
2b725155 | 1653 | return; |
b8698a0f | 1654 | |
9f106823 UB |
1655 | /* The signedness is determined from input argument. */ |
1656 | if (code == VEC_UNPACK_FLOAT_HI_EXPR | |
1657 | || code == VEC_UNPACK_FLOAT_LO_EXPR) | |
dee6fc2b RB |
1658 | { |
1659 | type = TREE_TYPE (rhs1); | |
1660 | /* We do not know how to scalarize those. */ | |
1661 | return; | |
1662 | } | |
9f106823 | 1663 | |
3f30a9a6 RH |
1664 | /* For widening/narrowing vector operations, the relevant type is of the |
1665 | arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is | |
1666 | calculated in the same way above. */ | |
1667 | if (code == WIDEN_SUM_EXPR | |
1668 | || code == VEC_WIDEN_MULT_HI_EXPR | |
1669 | || code == VEC_WIDEN_MULT_LO_EXPR | |
1670 | || code == VEC_WIDEN_MULT_EVEN_EXPR | |
1671 | || code == VEC_WIDEN_MULT_ODD_EXPR | |
1672 | || code == VEC_UNPACK_HI_EXPR | |
1673 | || code == VEC_UNPACK_LO_EXPR | |
1674 | || code == VEC_PACK_TRUNC_EXPR | |
1675 | || code == VEC_PACK_SAT_EXPR | |
1676 | || code == VEC_PACK_FIX_TRUNC_EXPR | |
1677 | || code == VEC_WIDEN_LSHIFT_HI_EXPR | |
1678 | || code == VEC_WIDEN_LSHIFT_LO_EXPR) | |
dee6fc2b RB |
1679 | { |
1680 | type = TREE_TYPE (rhs1); | |
1681 | /* We do not know how to scalarize those. */ | |
1682 | return; | |
1683 | } | |
3f30a9a6 | 1684 | |
71d46ca5 MM |
1685 | /* Choose between vector shift/rotate by vector and vector shift/rotate by |
1686 | scalar */ | |
b8698a0f L |
1687 | if (code == LSHIFT_EXPR |
1688 | || code == RSHIFT_EXPR | |
726a989a | 1689 | || code == LROTATE_EXPR |
71d46ca5 MM |
1690 | || code == RROTATE_EXPR) |
1691 | { | |
0f3d6c10 RH |
1692 | optab opv; |
1693 | ||
bdc3ee5d RH |
1694 | /* Check whether we have vector <op> {x,x,x,x} where x |
1695 | could be a scalar variable or a constant. Transform | |
1696 | vector <op> {x,x,x,x} ==> vector <op> scalar. */ | |
0f3d6c10 | 1697 | if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) |
47853c73 AS |
1698 | { |
1699 | tree first; | |
975b6ff3 RB |
1700 | |
1701 | if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE) | |
47853c73 AS |
1702 | { |
1703 | gimple_assign_set_rhs2 (stmt, first); | |
1704 | update_stmt (stmt); | |
1705 | rhs2 = first; | |
1706 | } | |
47853c73 | 1707 | } |
f90e8e2e | 1708 | |
0f3d6c10 RH |
1709 | opv = optab_for_tree_code (code, type, optab_vector); |
1710 | if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) | |
1711 | op = opv; | |
bdc3ee5d | 1712 | else |
2fa6eeff | 1713 | { |
bdc3ee5d | 1714 | op = optab_for_tree_code (code, type, optab_scalar); |
2fa6eeff | 1715 | |
a4ee446d JJ |
1716 | compute_type = get_compute_type (code, op, type); |
1717 | if (compute_type == type) | |
1718 | return; | |
bdc3ee5d | 1719 | /* The rtl expander will expand vector/scalar as vector/vector |
a4ee446d JJ |
1720 | if necessary. Pick one with wider vector type. */ |
1721 | tree compute_vtype = get_compute_type (code, opv, type); | |
22afc2b3 | 1722 | if (subparts_gt (compute_vtype, compute_type)) |
a4ee446d JJ |
1723 | { |
1724 | compute_type = compute_vtype; | |
1725 | op = opv; | |
1726 | } | |
1727 | } | |
1728 | ||
1729 | if (code == LROTATE_EXPR || code == RROTATE_EXPR) | |
1730 | { | |
1731 | if (compute_type == NULL_TREE) | |
1732 | compute_type = get_compute_type (code, op, type); | |
1733 | if (compute_type == type) | |
0f3d6c10 | 1734 | return; |
a4ee446d JJ |
1735 | /* Before splitting vector rotates into scalar rotates, |
1736 | see if we can't use vector shifts and BIT_IOR_EXPR | |
1737 | instead. For vector by vector rotates we'd also | |
1738 | need to check BIT_AND_EXPR and NEGATE_EXPR, punt there | |
1739 | for now, fold doesn't seem to create such rotates anyway. */ | |
1740 | if (compute_type == TREE_TYPE (type) | |
1741 | && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) | |
1742 | { | |
1743 | optab oplv = vashl_optab, opl = ashl_optab; | |
1744 | optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab; | |
1745 | tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type); | |
1746 | tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type); | |
1747 | tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type); | |
1748 | tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type); | |
1749 | tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); | |
1750 | /* The rtl expander will expand vector/scalar as vector/vector | |
1751 | if necessary. Pick one with wider vector type. */ | |
22afc2b3 | 1752 | if (subparts_gt (compute_lvtype, compute_ltype)) |
a4ee446d JJ |
1753 | { |
1754 | compute_ltype = compute_lvtype; | |
1755 | opl = oplv; | |
1756 | } | |
22afc2b3 | 1757 | if (subparts_gt (compute_rvtype, compute_rtype)) |
a4ee446d JJ |
1758 | { |
1759 | compute_rtype = compute_rvtype; | |
1760 | opr = oprv; | |
1761 | } | |
1762 | /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and | |
1763 | BIT_IOR_EXPR. */ | |
1764 | compute_type = compute_ltype; | |
22afc2b3 | 1765 | if (subparts_gt (compute_type, compute_rtype)) |
a4ee446d | 1766 | compute_type = compute_rtype; |
22afc2b3 | 1767 | if (subparts_gt (compute_type, compute_otype)) |
a4ee446d JJ |
1768 | compute_type = compute_otype; |
1769 | /* Verify all 3 operations can be performed in that type. */ | |
1770 | if (compute_type != TREE_TYPE (type)) | |
1771 | { | |
1772 | if (optab_handler (opl, TYPE_MODE (compute_type)) | |
1773 | == CODE_FOR_nothing | |
1774 | || optab_handler (opr, TYPE_MODE (compute_type)) | |
1775 | == CODE_FOR_nothing | |
1776 | || optab_handler (opo, TYPE_MODE (compute_type)) | |
1777 | == CODE_FOR_nothing) | |
1778 | compute_type = TREE_TYPE (type); | |
1779 | } | |
1780 | } | |
2fa6eeff | 1781 | } |
71d46ca5 MM |
1782 | } |
1783 | else | |
1784 | op = optab_for_tree_code (code, type, optab_default); | |
2b725155 RH |
1785 | |
1786 | /* Optabs will try converting a negation into a subtraction, so | |
1787 | look for it as well. TODO: negation of floating-point vectors | |
1788 | might be turned into an exclusive OR toggling the sign bit. */ | |
2225b9f2 | 1789 | if (op == unknown_optab |
2b725155 RH |
1790 | && code == NEGATE_EXPR |
1791 | && INTEGRAL_TYPE_P (TREE_TYPE (type))) | |
71d46ca5 | 1792 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); |
2b725155 | 1793 | |
a4ee446d JJ |
1794 | if (compute_type == NULL_TREE) |
1795 | compute_type = get_compute_type (code, op, type); | |
2b725155 | 1796 | if (compute_type == type) |
a4ee446d | 1797 | return; |
2b725155 | 1798 | |
726a989a | 1799 | new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); |
d246ab4f AS |
1800 | |
1801 | /* Leave expression untouched for later expansion. */ | |
1802 | if (new_rhs == NULL_TREE) | |
1803 | return; | |
1804 | ||
726a989a RB |
1805 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) |
1806 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
1807 | new_rhs); | |
1808 | ||
1809 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
1810 | way to do it is change expand_vector_operation and its callees to | |
1811 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
1812 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
3865a06f | 1813 | update_stmt (gsi_stmt (*gsi)); |
2b725155 RH |
1814 | } |
1815 | \f | |
1816 | /* Use this to lower vector operations introduced by the vectorizer, | |
1817 | if it may need the bit-twiddling tricks implemented in this file. */ | |
1818 | ||
c2924966 | 1819 | static unsigned int |
2b725155 RH |
1820 | expand_vector_operations (void) |
1821 | { | |
726a989a | 1822 | gimple_stmt_iterator gsi; |
2b725155 | 1823 | basic_block bb; |
3865a06f | 1824 | bool cfg_changed = false; |
2b725155 | 1825 | |
11cd3bed | 1826 | FOR_EACH_BB_FN (bb, cfun) |
2b725155 | 1827 | { |
726a989a | 1828 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2b725155 | 1829 | { |
726a989a | 1830 | expand_vector_operations_1 (&gsi); |
3865a06f RG |
1831 | /* ??? If we do not cleanup EH then we will ICE in |
1832 | verification. But in reality we have created wrong-code | |
1833 | as we did not properly transition EH info and edges to | |
1834 | the piecewise computations. */ | |
1835 | if (maybe_clean_eh_stmt (gsi_stmt (gsi)) | |
1836 | && gimple_purge_dead_eh_edges (bb)) | |
1837 | cfg_changed = true; | |
2b725155 RH |
1838 | } |
1839 | } | |
3865a06f RG |
1840 | |
1841 | return cfg_changed ? TODO_cleanup_cfg : 0; | |
2b725155 RH |
1842 | } |
1843 | ||
17795822 TS |
1844 | namespace { |
1845 | ||
1846 | const pass_data pass_data_lower_vector = | |
2b725155 | 1847 | { |
27a4cd48 DM |
1848 | GIMPLE_PASS, /* type */ |
1849 | "veclower", /* name */ | |
1850 | OPTGROUP_VEC, /* optinfo_flags */ | |
27a4cd48 DM |
1851 | TV_NONE, /* tv_id */ |
1852 | PROP_cfg, /* properties_required */ | |
1853 | PROP_gimple_lvec, /* properties_provided */ | |
1854 | 0, /* properties_destroyed */ | |
1855 | 0, /* todo_flags_start */ | |
9538c95b | 1856 | TODO_update_ssa, /* todo_flags_finish */ |
2b725155 RH |
1857 | }; |
1858 | ||
17795822 | 1859 | class pass_lower_vector : public gimple_opt_pass |
27a4cd48 DM |
1860 | { |
1861 | public: | |
c3284718 RS |
1862 | pass_lower_vector (gcc::context *ctxt) |
1863 | : gimple_opt_pass (pass_data_lower_vector, ctxt) | |
27a4cd48 DM |
1864 | {} |
1865 | ||
1866 | /* opt_pass methods: */ | |
1a3d085c TS |
1867 | virtual bool gate (function *fun) |
1868 | { | |
1869 | return !(fun->curr_properties & PROP_gimple_lvec); | |
1870 | } | |
1871 | ||
be55bfe6 TS |
1872 | virtual unsigned int execute (function *) |
1873 | { | |
1874 | return expand_vector_operations (); | |
1875 | } | |
27a4cd48 DM |
1876 | |
1877 | }; // class pass_lower_vector | |
1878 | ||
17795822 TS |
1879 | } // anon namespace |
1880 | ||
27a4cd48 DM |
1881 | gimple_opt_pass * |
1882 | make_pass_lower_vector (gcc::context *ctxt) | |
1883 | { | |
1884 | return new pass_lower_vector (ctxt); | |
1885 | } | |
1886 | ||
17795822 TS |
1887 | namespace { |
1888 | ||
1889 | const pass_data pass_data_lower_vector_ssa = | |
2b725155 | 1890 | { |
27a4cd48 DM |
1891 | GIMPLE_PASS, /* type */ |
1892 | "veclower2", /* name */ | |
1893 | OPTGROUP_VEC, /* optinfo_flags */ | |
27a4cd48 DM |
1894 | TV_NONE, /* tv_id */ |
1895 | PROP_cfg, /* properties_required */ | |
1896 | PROP_gimple_lvec, /* properties_provided */ | |
1897 | 0, /* properties_destroyed */ | |
1898 | 0, /* todo_flags_start */ | |
3bea341f | 1899 | ( TODO_update_ssa |
27a4cd48 | 1900 | | TODO_cleanup_cfg ), /* todo_flags_finish */ |
2b725155 RH |
1901 | }; |
1902 | ||
17795822 | 1903 | class pass_lower_vector_ssa : public gimple_opt_pass |
27a4cd48 DM |
1904 | { |
1905 | public: | |
c3284718 RS |
1906 | pass_lower_vector_ssa (gcc::context *ctxt) |
1907 | : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) | |
27a4cd48 DM |
1908 | {} |
1909 | ||
1910 | /* opt_pass methods: */ | |
65d3284b | 1911 | opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } |
be55bfe6 TS |
1912 | virtual unsigned int execute (function *) |
1913 | { | |
1914 | return expand_vector_operations (); | |
1915 | } | |
27a4cd48 DM |
1916 | |
1917 | }; // class pass_lower_vector_ssa | |
1918 | ||
17795822 TS |
1919 | } // anon namespace |
1920 | ||
27a4cd48 DM |
1921 | gimple_opt_pass * |
1922 | make_pass_lower_vector_ssa (gcc::context *ctxt) | |
1923 | { | |
1924 | return new pass_lower_vector_ssa (ctxt); | |
1925 | } | |
1926 | ||
2b725155 | 1927 | #include "gt-tree-vect-generic.h" |