]>
Commit | Line | Data |
---|---|---|
2b725155 | 1 | /* Lower vector operations to scalar operations. |
5624e564 | 2 | Copyright (C) 2004-2015 Free Software Foundation, Inc. |
2b725155 RH |
3 | |
4 | This file is part of GCC. | |
b8698a0f | 5 | |
2b725155 RH |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 8 | Free Software Foundation; either version 3, or (at your option) any |
2b725155 | 9 | later version. |
b8698a0f | 10 | |
2b725155 RH |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
b8698a0f | 15 | |
2b725155 | 16 | You should have received a copy of the GNU General Public License |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
2b725155 RH |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
40e23961 | 23 | #include "alias.h" |
c7131fb2 | 24 | #include "backend.h" |
2b725155 | 25 | #include "tree.h" |
c7131fb2 AM |
26 | #include "gimple.h" |
27 | #include "rtl.h" | |
28 | #include "ssa.h" | |
29 | #include "options.h" | |
40e23961 | 30 | #include "fold-const.h" |
d8a2d370 | 31 | #include "stor-layout.h" |
2b725155 | 32 | #include "langhooks.h" |
2fb9a547 AM |
33 | #include "internal-fn.h" |
34 | #include "tree-eh.h" | |
5be5c238 | 35 | #include "gimple-iterator.h" |
18f429e2 | 36 | #include "gimplify-me.h" |
442b4905 | 37 | #include "tree-cfg.h" |
2b725155 RH |
38 | #include "tree-iterator.h" |
39 | #include "tree-pass.h" | |
40 | #include "flags.h" | |
f90e8e2e | 41 | #include "diagnostic.h" |
0fcc85cd | 42 | #include "target.h" |
2b725155 | 43 | |
2eb79bbb | 44 | /* Need to include rtl.h, expr.h, etc. for optabs. */ |
36566b39 PK |
45 | #include "insn-config.h" |
46 | #include "expmed.h" | |
47 | #include "dojump.h" | |
48 | #include "explow.h" | |
49 | #include "calls.h" | |
50 | #include "emit-rtl.h" | |
51 | #include "varasm.h" | |
52 | #include "stmt.h" | |
2eb79bbb | 53 | #include "expr.h" |
b0710fe1 | 54 | #include "insn-codes.h" |
2eb79bbb | 55 | #include "optabs.h" |
2b725155 | 56 | |
d246ab4f AS |
57 | |
58 | static void expand_vector_operations_1 (gimple_stmt_iterator *); | |
59 | ||
60 | ||
2b725155 RH |
61 | /* Build a constant of type TYPE, made of VALUE's bits replicated |
62 | every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | |
63 | static tree | |
64 | build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | |
65 | { | |
ae7e9ddd | 66 | int width = tree_to_uhwi (TYPE_SIZE (inner_type)); |
807e902e KZ |
67 | int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) |
68 | / HOST_BITS_PER_WIDE_INT; | |
69 | unsigned HOST_WIDE_INT low, mask; | |
70 | HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; | |
71 | int i; | |
2b725155 | 72 | |
807e902e | 73 | gcc_assert (n && n <= WIDE_INT_MAX_ELTS); |
2b725155 RH |
74 | |
75 | if (width == HOST_BITS_PER_WIDE_INT) | |
76 | low = value; | |
77 | else | |
78 | { | |
79 | mask = ((HOST_WIDE_INT)1 << width) - 1; | |
80 | low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | |
81 | } | |
82 | ||
807e902e KZ |
83 | for (i = 0; i < n; i++) |
84 | a[i] = low; | |
2b725155 | 85 | |
807e902e KZ |
86 | gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT); |
87 | return wide_int_to_tree | |
88 | (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); | |
2b725155 RH |
89 | } |
90 | ||
91 | static GTY(()) tree vector_inner_type; | |
92 | static GTY(()) tree vector_last_type; | |
93 | static GTY(()) int vector_last_nunits; | |
94 | ||
95 | /* Return a suitable vector types made of SUBPARTS units each of mode | |
96 | "word_mode" (the global variable). */ | |
97 | static tree | |
98 | build_word_mode_vector_type (int nunits) | |
99 | { | |
100 | if (!vector_inner_type) | |
101 | vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
102 | else if (vector_last_nunits == nunits) | |
103 | { | |
104 | gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); | |
105 | return vector_last_type; | |
106 | } | |
107 | ||
108 | /* We build a new type, but we canonicalize it nevertheless, | |
109 | because it still saves some memory. */ | |
110 | vector_last_nunits = nunits; | |
111 | vector_last_type = type_hash_canon (nunits, | |
112 | build_vector_type (vector_inner_type, | |
113 | nunits)); | |
114 | return vector_last_type; | |
115 | } | |
116 | ||
726a989a | 117 | typedef tree (*elem_op_func) (gimple_stmt_iterator *, |
2b725155 RH |
118 | tree, tree, tree, tree, tree, enum tree_code); |
119 | ||
120 | static inline tree | |
726a989a | 121 | tree_vec_extract (gimple_stmt_iterator *gsi, tree type, |
2b725155 RH |
122 | tree t, tree bitsize, tree bitpos) |
123 | { | |
124 | if (bitpos) | |
726a989a | 125 | return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); |
2b725155 | 126 | else |
726a989a | 127 | return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); |
2b725155 RH |
128 | } |
129 | ||
130 | static tree | |
726a989a | 131 | do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, |
2b725155 RH |
132 | tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, |
133 | enum tree_code code) | |
134 | { | |
726a989a RB |
135 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
136 | return gimplify_build1 (gsi, code, inner_type, a); | |
2b725155 RH |
137 | } |
138 | ||
139 | static tree | |
726a989a | 140 | do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, |
2b725155 RH |
141 | tree bitpos, tree bitsize, enum tree_code code) |
142 | { | |
362235e9 RG |
143 | if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) |
144 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
145 | if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) | |
146 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
726a989a | 147 | return gimplify_build2 (gsi, code, inner_type, a, b); |
2b725155 RH |
148 | } |
149 | ||
d246ab4f AS |
150 | /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 |
151 | ||
152 | INNER_TYPE is the type of A and B elements | |
153 | ||
154 | returned expression is of signed integer type with the | |
155 | size equal to the size of INNER_TYPE. */ | |
156 | static tree | |
157 | do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
158 | tree bitpos, tree bitsize, enum tree_code code) | |
159 | { | |
160 | tree comp_type; | |
161 | ||
162 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
163 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
164 | ||
165 | comp_type = build_nonstandard_integer_type | |
166 | (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0); | |
167 | ||
168 | return gimplify_build3 (gsi, COND_EXPR, comp_type, | |
169 | fold_build2 (code, boolean_type_node, a, b), | |
170 | build_int_cst (comp_type, -1), | |
171 | build_int_cst (comp_type, 0)); | |
172 | } | |
173 | ||
2b725155 RH |
174 | /* Expand vector addition to scalars. This does bit twiddling |
175 | in order to increase parallelism: | |
176 | ||
177 | a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ | |
178 | (a ^ b) & 0x80808080 | |
179 | ||
180 | a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ | |
181 | (a ^ ~b) & 0x80808080 | |
182 | ||
183 | -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) | |
184 | ||
185 | This optimization should be done only if 4 vector items or more | |
186 | fit into a word. */ | |
187 | static tree | |
726a989a | 188 | do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, |
2b725155 RH |
189 | tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, |
190 | enum tree_code code) | |
191 | { | |
192 | tree inner_type = TREE_TYPE (TREE_TYPE (a)); | |
193 | unsigned HOST_WIDE_INT max; | |
194 | tree low_bits, high_bits, a_low, b_low, result_low, signs; | |
195 | ||
196 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
197 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
198 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
199 | ||
726a989a RB |
200 | a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); |
201 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); | |
2b725155 | 202 | |
726a989a RB |
203 | signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); |
204 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); | |
2b725155 | 205 | if (code == PLUS_EXPR) |
726a989a | 206 | a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); |
2b725155 RH |
207 | else |
208 | { | |
726a989a RB |
209 | a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); |
210 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); | |
2b725155 RH |
211 | } |
212 | ||
726a989a RB |
213 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); |
214 | result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); | |
215 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
216 | } |
217 | ||
218 | static tree | |
726a989a | 219 | do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, |
2b725155 RH |
220 | tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, |
221 | tree bitsize ATTRIBUTE_UNUSED, | |
222 | enum tree_code code ATTRIBUTE_UNUSED) | |
223 | { | |
224 | tree inner_type = TREE_TYPE (TREE_TYPE (b)); | |
225 | HOST_WIDE_INT max; | |
226 | tree low_bits, high_bits, b_low, result_low, signs; | |
227 | ||
228 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
229 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
230 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
231 | ||
726a989a | 232 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); |
2b725155 | 233 | |
726a989a RB |
234 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); |
235 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); | |
236 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
237 | result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); | |
238 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
239 | } |
240 | ||
241 | /* Expand a vector operation to scalars, by using many operations | |
242 | whose type is the vector type's inner type. */ | |
243 | static tree | |
726a989a | 244 | expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, |
2b725155 RH |
245 | tree type, tree inner_type, |
246 | tree a, tree b, enum tree_code code) | |
247 | { | |
9771b263 | 248 | vec<constructor_elt, va_gc> *v; |
2b725155 RH |
249 | tree part_width = TYPE_SIZE (inner_type); |
250 | tree index = bitsize_int (0); | |
251 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
ae7e9ddd RS |
252 | int delta = tree_to_uhwi (part_width) |
253 | / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); | |
2b725155 | 254 | int i; |
cdbb5ba3 AS |
255 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
256 | ||
257 | if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) | |
258 | warning_at (loc, OPT_Wvector_operation_performance, | |
259 | "vector operation will be expanded piecewise"); | |
260 | else | |
261 | warning_at (loc, OPT_Wvector_operation_performance, | |
262 | "vector operation will be expanded in parallel"); | |
2b725155 | 263 | |
9771b263 | 264 | vec_alloc (v, (nunits + delta - 1) / delta); |
2b725155 | 265 | for (i = 0; i < nunits; |
d35936ab | 266 | i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) |
2b725155 | 267 | { |
726a989a | 268 | tree result = f (gsi, inner_type, a, b, index, part_width, code); |
f32682ca | 269 | constructor_elt ce = {NULL_TREE, result}; |
9771b263 | 270 | v->quick_push (ce); |
2b725155 RH |
271 | } |
272 | ||
4038c495 | 273 | return build_constructor (type, v); |
2b725155 RH |
274 | } |
275 | ||
276 | /* Expand a vector operation to scalars with the freedom to use | |
277 | a scalar integer type, or to use a different size for the items | |
278 | in the vector type. */ | |
279 | static tree | |
726a989a | 280 | expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, |
2b725155 RH |
281 | tree a, tree b, |
282 | enum tree_code code) | |
283 | { | |
284 | tree result, compute_type; | |
ef4bddc2 | 285 | machine_mode mode; |
ae7e9ddd | 286 | int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; |
cdbb5ba3 | 287 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
2b725155 RH |
288 | |
289 | /* We have three strategies. If the type is already correct, just do | |
290 | the operation an element at a time. Else, if the vector is wider than | |
291 | one word, do it a word at a time; finally, if the vector is smaller | |
292 | than one word, do it as a scalar. */ | |
293 | if (TYPE_MODE (TREE_TYPE (type)) == word_mode) | |
726a989a | 294 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
295 | type, TREE_TYPE (type), |
296 | a, b, code); | |
297 | else if (n_words > 1) | |
298 | { | |
299 | tree word_type = build_word_mode_vector_type (n_words); | |
726a989a | 300 | result = expand_vector_piecewise (gsi, f, |
2b725155 RH |
301 | word_type, TREE_TYPE (word_type), |
302 | a, b, code); | |
726a989a RB |
303 | result = force_gimple_operand_gsi (gsi, result, true, NULL, true, |
304 | GSI_SAME_STMT); | |
2b725155 RH |
305 | } |
306 | else | |
307 | { | |
308 | /* Use a single scalar operation with a mode no wider than word_mode. */ | |
ae7e9ddd | 309 | mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0); |
2b725155 | 310 | compute_type = lang_hooks.types.type_for_mode (mode, 1); |
726a989a | 311 | result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); |
cdbb5ba3 AS |
312 | warning_at (loc, OPT_Wvector_operation_performance, |
313 | "vector operation will be expanded with a " | |
314 | "single scalar operation"); | |
2b725155 RH |
315 | } |
316 | ||
317 | return result; | |
318 | } | |
319 | ||
320 | /* Expand a vector operation to scalars; for integer types we can use | |
321 | special bit twiddling tricks to do the sums a word at a time, using | |
322 | function F_PARALLEL instead of F. These tricks are done only if | |
323 | they can process at least four items, that is, only if the vector | |
324 | holds at least four items and if a word can hold four items. */ | |
325 | static tree | |
726a989a | 326 | expand_vector_addition (gimple_stmt_iterator *gsi, |
2b725155 RH |
327 | elem_op_func f, elem_op_func f_parallel, |
328 | tree type, tree a, tree b, enum tree_code code) | |
329 | { | |
330 | int parts_per_word = UNITS_PER_WORD | |
ae7e9ddd | 331 | / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); |
2b725155 RH |
332 | |
333 | if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | |
334 | && parts_per_word >= 4 | |
335 | && TYPE_VECTOR_SUBPARTS (type) >= 4) | |
726a989a | 336 | return expand_vector_parallel (gsi, f_parallel, |
2b725155 RH |
337 | type, a, b, code); |
338 | else | |
726a989a | 339 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
340 | type, TREE_TYPE (type), |
341 | a, b, code); | |
342 | } | |
343 | ||
d246ab4f AS |
344 | /* Try to expand vector comparison expression OP0 CODE OP1 by |
345 | querying optab if the following expression: | |
346 | VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> | |
347 | can be expanded. */ | |
348 | static tree | |
349 | expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, | |
350 | tree op1, enum tree_code code) | |
351 | { | |
352 | tree t; | |
353 | if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0))) | |
354 | t = expand_vector_piecewise (gsi, do_compare, type, | |
355 | TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); | |
356 | else | |
357 | t = NULL_TREE; | |
358 | ||
359 | return t; | |
360 | } | |
361 | ||
4ee4c52c JJ |
362 | /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type |
363 | of OP0 with shift counts in SHIFTCNTS array and return the temporary holding | |
364 | the result if successful, otherwise return NULL_TREE. */ | |
365 | static tree | |
366 | add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) | |
367 | { | |
368 | optab op; | |
369 | unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type); | |
370 | bool scalar_shift = true; | |
371 | ||
372 | for (i = 1; i < nunits; i++) | |
373 | { | |
374 | if (shiftcnts[i] != shiftcnts[0]) | |
375 | scalar_shift = false; | |
376 | } | |
377 | ||
378 | if (scalar_shift && shiftcnts[0] == 0) | |
379 | return op0; | |
380 | ||
381 | if (scalar_shift) | |
382 | { | |
383 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); | |
2225b9f2 | 384 | if (op != unknown_optab |
4ee4c52c JJ |
385 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
386 | return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
387 | build_int_cst (NULL_TREE, shiftcnts[0])); | |
388 | } | |
389 | ||
390 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
2225b9f2 | 391 | if (op != unknown_optab |
4ee4c52c JJ |
392 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
393 | { | |
394 | tree *vec = XALLOCAVEC (tree, nunits); | |
395 | for (i = 0; i < nunits; i++) | |
396 | vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]); | |
397 | return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
398 | build_vector (type, vec)); | |
399 | } | |
400 | ||
401 | return NULL_TREE; | |
402 | } | |
403 | ||
404 | /* Try to expand integer vector division by constant using | |
405 | widening multiply, shifts and additions. */ | |
406 | static tree | |
407 | expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, | |
408 | tree op1, enum tree_code code) | |
409 | { | |
410 | bool use_pow2 = true; | |
411 | bool has_vector_shift = true; | |
412 | int mode = -1, this_mode; | |
413 | int pre_shift = -1, post_shift; | |
414 | unsigned int nunits = TYPE_VECTOR_SUBPARTS (type); | |
415 | int *shifts = XALLOCAVEC (int, nunits * 4); | |
416 | int *pre_shifts = shifts + nunits; | |
417 | int *post_shifts = pre_shifts + nunits; | |
418 | int *shift_temps = post_shifts + nunits; | |
419 | unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); | |
420 | int prec = TYPE_PRECISION (TREE_TYPE (type)); | |
421 | int dummy_int; | |
807e902e KZ |
422 | unsigned int i; |
423 | signop sign_p = TYPE_SIGN (TREE_TYPE (type)); | |
4ee4c52c | 424 | unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); |
4ee4c52c | 425 | tree *vec; |
00f07b86 RH |
426 | tree cur_op, mulcst, tem; |
427 | optab op; | |
4ee4c52c JJ |
428 | |
429 | if (prec > HOST_BITS_PER_WIDE_INT) | |
430 | return NULL_TREE; | |
431 | ||
432 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
2225b9f2 | 433 | if (op == unknown_optab |
4ee4c52c JJ |
434 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
435 | has_vector_shift = false; | |
436 | ||
437 | /* Analysis phase. Determine if all op1 elements are either power | |
438 | of two and it is possible to expand it using shifts (or for remainder | |
439 | using masking). Additionally compute the multiplicative constants | |
440 | and pre and post shifts if the division is to be expanded using | |
441 | widening or high part multiplication plus shifts. */ | |
442 | for (i = 0; i < nunits; i++) | |
443 | { | |
444 | tree cst = VECTOR_CST_ELT (op1, i); | |
445 | unsigned HOST_WIDE_INT ml; | |
446 | ||
6b58915b | 447 | if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) |
4ee4c52c JJ |
448 | return NULL_TREE; |
449 | pre_shifts[i] = 0; | |
450 | post_shifts[i] = 0; | |
451 | mulc[i] = 0; | |
452 | if (use_pow2 | |
453 | && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) | |
454 | use_pow2 = false; | |
455 | if (use_pow2) | |
456 | { | |
457 | shifts[i] = tree_log2 (cst); | |
458 | if (shifts[i] != shifts[0] | |
459 | && code == TRUNC_DIV_EXPR | |
460 | && !has_vector_shift) | |
461 | use_pow2 = false; | |
462 | } | |
463 | if (mode == -2) | |
464 | continue; | |
807e902e | 465 | if (sign_p == UNSIGNED) |
4ee4c52c JJ |
466 | { |
467 | unsigned HOST_WIDE_INT mh; | |
6b58915b | 468 | unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; |
4ee4c52c JJ |
469 | |
470 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) | |
471 | /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ | |
472 | return NULL_TREE; | |
473 | ||
474 | if (d <= 1) | |
475 | { | |
476 | mode = -2; | |
477 | continue; | |
478 | } | |
479 | ||
480 | /* Find a suitable multiplier and right shift count | |
481 | instead of multiplying with D. */ | |
482 | mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); | |
483 | ||
484 | /* If the suggested multiplier is more than SIZE bits, we can | |
485 | do better for even divisors, using an initial right shift. */ | |
486 | if ((mh != 0 && (d & 1) == 0) | |
487 | || (!has_vector_shift && pre_shift != -1)) | |
488 | { | |
489 | if (has_vector_shift) | |
490 | pre_shift = floor_log2 (d & -d); | |
491 | else if (pre_shift == -1) | |
492 | { | |
493 | unsigned int j; | |
494 | for (j = 0; j < nunits; j++) | |
495 | { | |
496 | tree cst2 = VECTOR_CST_ELT (op1, j); | |
497 | unsigned HOST_WIDE_INT d2; | |
498 | int this_pre_shift; | |
499 | ||
cc269bb6 | 500 | if (!tree_fits_uhwi_p (cst2)) |
4ee4c52c | 501 | return NULL_TREE; |
ae7e9ddd | 502 | d2 = tree_to_uhwi (cst2) & mask; |
4ee4c52c JJ |
503 | if (d2 == 0) |
504 | return NULL_TREE; | |
505 | this_pre_shift = floor_log2 (d2 & -d2); | |
506 | if (pre_shift == -1 || this_pre_shift < pre_shift) | |
507 | pre_shift = this_pre_shift; | |
508 | } | |
509 | if (i != 0 && pre_shift != 0) | |
510 | { | |
511 | /* Restart. */ | |
512 | i = -1U; | |
513 | mode = -1; | |
514 | continue; | |
515 | } | |
516 | } | |
517 | if (pre_shift != 0) | |
518 | { | |
519 | if ((d >> pre_shift) <= 1) | |
520 | { | |
521 | mode = -2; | |
522 | continue; | |
523 | } | |
524 | mh = choose_multiplier (d >> pre_shift, prec, | |
525 | prec - pre_shift, | |
526 | &ml, &post_shift, &dummy_int); | |
527 | gcc_assert (!mh); | |
528 | pre_shifts[i] = pre_shift; | |
529 | } | |
530 | } | |
531 | if (!mh) | |
532 | this_mode = 0; | |
533 | else | |
534 | this_mode = 1; | |
535 | } | |
536 | else | |
537 | { | |
6b58915b | 538 | HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); |
4ee4c52c JJ |
539 | unsigned HOST_WIDE_INT abs_d; |
540 | ||
541 | if (d == -1) | |
542 | return NULL_TREE; | |
543 | ||
544 | /* Since d might be INT_MIN, we have to cast to | |
545 | unsigned HOST_WIDE_INT before negating to avoid | |
546 | undefined signed overflow. */ | |
547 | abs_d = (d >= 0 | |
548 | ? (unsigned HOST_WIDE_INT) d | |
549 | : - (unsigned HOST_WIDE_INT) d); | |
550 | ||
551 | /* n rem d = n rem -d */ | |
552 | if (code == TRUNC_MOD_EXPR && d < 0) | |
553 | d = abs_d; | |
554 | else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1)) | |
555 | { | |
556 | /* This case is not handled correctly below. */ | |
557 | mode = -2; | |
558 | continue; | |
559 | } | |
560 | if (abs_d <= 1) | |
561 | { | |
562 | mode = -2; | |
563 | continue; | |
564 | } | |
565 | ||
566 | choose_multiplier (abs_d, prec, prec - 1, &ml, | |
567 | &post_shift, &dummy_int); | |
568 | if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1)) | |
569 | { | |
570 | this_mode = 4 + (d < 0); | |
571 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1); | |
572 | } | |
573 | else | |
574 | this_mode = 2 + (d < 0); | |
575 | } | |
576 | mulc[i] = ml; | |
577 | post_shifts[i] = post_shift; | |
578 | if ((i && !has_vector_shift && post_shifts[0] != post_shift) | |
579 | || post_shift >= prec | |
580 | || pre_shifts[i] >= prec) | |
581 | this_mode = -2; | |
582 | ||
583 | if (i == 0) | |
584 | mode = this_mode; | |
585 | else if (mode != this_mode) | |
586 | mode = -2; | |
587 | } | |
588 | ||
589 | vec = XALLOCAVEC (tree, nunits); | |
590 | ||
591 | if (use_pow2) | |
592 | { | |
593 | tree addend = NULL_TREE; | |
807e902e | 594 | if (sign_p == SIGNED) |
4ee4c52c JJ |
595 | { |
596 | tree uns_type; | |
597 | ||
598 | /* Both division and remainder sequences need | |
599 | op0 < 0 ? mask : 0 computed. It can be either computed as | |
600 | (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) | |
601 | if none of the shifts is 0, or as the conditional. */ | |
602 | for (i = 0; i < nunits; i++) | |
603 | if (shifts[i] == 0) | |
604 | break; | |
605 | uns_type | |
606 | = build_vector_type (build_nonstandard_integer_type (prec, 1), | |
607 | nunits); | |
608 | if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) | |
609 | { | |
610 | for (i = 0; i < nunits; i++) | |
611 | shift_temps[i] = prec - 1; | |
612 | cur_op = add_rshift (gsi, type, op0, shift_temps); | |
613 | if (cur_op != NULL_TREE) | |
614 | { | |
615 | cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
616 | uns_type, cur_op); | |
617 | for (i = 0; i < nunits; i++) | |
618 | shift_temps[i] = prec - shifts[i]; | |
619 | cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); | |
620 | if (cur_op != NULL_TREE) | |
621 | addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
622 | type, cur_op); | |
623 | } | |
624 | } | |
625 | if (addend == NULL_TREE | |
626 | && expand_vec_cond_expr_p (type, type)) | |
627 | { | |
628 | tree zero, cst, cond; | |
629 | gimple stmt; | |
630 | ||
631 | zero = build_zero_cst (type); | |
632 | cond = build2 (LT_EXPR, type, op0, zero); | |
633 | for (i = 0; i < nunits; i++) | |
634 | vec[i] = build_int_cst (TREE_TYPE (type), | |
635 | ((unsigned HOST_WIDE_INT) 1 | |
636 | << shifts[i]) - 1); | |
637 | cst = build_vector (type, vec); | |
b731b390 | 638 | addend = make_ssa_name (type); |
0d0e4a03 JJ |
639 | stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, |
640 | cst, zero); | |
4ee4c52c JJ |
641 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
642 | } | |
643 | } | |
644 | if (code == TRUNC_DIV_EXPR) | |
645 | { | |
807e902e | 646 | if (sign_p == UNSIGNED) |
4ee4c52c JJ |
647 | { |
648 | /* q = op0 >> shift; */ | |
649 | cur_op = add_rshift (gsi, type, op0, shifts); | |
650 | if (cur_op != NULL_TREE) | |
651 | return cur_op; | |
652 | } | |
653 | else if (addend != NULL_TREE) | |
654 | { | |
655 | /* t1 = op0 + addend; | |
656 | q = t1 >> shift; */ | |
657 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 658 | if (op != unknown_optab |
4ee4c52c JJ |
659 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
660 | { | |
661 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); | |
662 | cur_op = add_rshift (gsi, type, cur_op, shifts); | |
663 | if (cur_op != NULL_TREE) | |
664 | return cur_op; | |
665 | } | |
666 | } | |
667 | } | |
668 | else | |
669 | { | |
670 | tree mask; | |
671 | for (i = 0; i < nunits; i++) | |
672 | vec[i] = build_int_cst (TREE_TYPE (type), | |
673 | ((unsigned HOST_WIDE_INT) 1 | |
674 | << shifts[i]) - 1); | |
675 | mask = build_vector (type, vec); | |
676 | op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); | |
2225b9f2 | 677 | if (op != unknown_optab |
4ee4c52c JJ |
678 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
679 | { | |
807e902e | 680 | if (sign_p == UNSIGNED) |
4ee4c52c JJ |
681 | /* r = op0 & mask; */ |
682 | return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); | |
683 | else if (addend != NULL_TREE) | |
684 | { | |
685 | /* t1 = op0 + addend; | |
686 | t2 = t1 & mask; | |
687 | r = t2 - addend; */ | |
688 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 689 | if (op != unknown_optab |
4ee4c52c JJ |
690 | && optab_handler (op, TYPE_MODE (type)) |
691 | != CODE_FOR_nothing) | |
692 | { | |
693 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, | |
694 | addend); | |
695 | cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, | |
696 | cur_op, mask); | |
697 | op = optab_for_tree_code (MINUS_EXPR, type, | |
698 | optab_default); | |
2225b9f2 | 699 | if (op != unknown_optab |
4ee4c52c JJ |
700 | && optab_handler (op, TYPE_MODE (type)) |
701 | != CODE_FOR_nothing) | |
702 | return gimplify_build2 (gsi, MINUS_EXPR, type, | |
703 | cur_op, addend); | |
704 | } | |
705 | } | |
706 | } | |
707 | } | |
708 | } | |
709 | ||
710 | if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
711 | return NULL_TREE; | |
712 | ||
00f07b86 RH |
713 | if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) |
714 | return NULL_TREE; | |
4ee4c52c JJ |
715 | |
716 | cur_op = op0; | |
717 | ||
718 | switch (mode) | |
719 | { | |
720 | case 0: | |
807e902e | 721 | gcc_assert (sign_p == UNSIGNED); |
4ee4c52c | 722 | /* t1 = oprnd0 >> pre_shift; |
c9ba3307 | 723 | t2 = t1 h* ml; |
4ee4c52c JJ |
724 | q = t2 >> post_shift; */ |
725 | cur_op = add_rshift (gsi, type, cur_op, pre_shifts); | |
726 | if (cur_op == NULL_TREE) | |
727 | return NULL_TREE; | |
728 | break; | |
729 | case 1: | |
807e902e | 730 | gcc_assert (sign_p == UNSIGNED); |
4ee4c52c JJ |
731 | for (i = 0; i < nunits; i++) |
732 | { | |
733 | shift_temps[i] = 1; | |
734 | post_shifts[i]--; | |
735 | } | |
736 | break; | |
737 | case 2: | |
738 | case 3: | |
739 | case 4: | |
740 | case 5: | |
807e902e | 741 | gcc_assert (sign_p == SIGNED); |
4ee4c52c JJ |
742 | for (i = 0; i < nunits; i++) |
743 | shift_temps[i] = prec - 1; | |
744 | break; | |
745 | default: | |
746 | return NULL_TREE; | |
747 | } | |
748 | ||
749 | for (i = 0; i < nunits; i++) | |
750 | vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); | |
751 | mulcst = build_vector (type, vec); | |
0fcc85cd | 752 | |
00f07b86 | 753 | cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); |
4ee4c52c JJ |
754 | |
755 | switch (mode) | |
756 | { | |
757 | case 0: | |
758 | /* t1 = oprnd0 >> pre_shift; | |
c9ba3307 | 759 | t2 = t1 h* ml; |
4ee4c52c JJ |
760 | q = t2 >> post_shift; */ |
761 | cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
762 | break; | |
763 | case 1: | |
c9ba3307 | 764 | /* t1 = oprnd0 h* ml; |
4ee4c52c JJ |
765 | t2 = oprnd0 - t1; |
766 | t3 = t2 >> 1; | |
767 | t4 = t1 + t3; | |
768 | q = t4 >> (post_shift - 1); */ | |
769 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
2225b9f2 | 770 | if (op == unknown_optab |
4ee4c52c JJ |
771 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
772 | return NULL_TREE; | |
773 | tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); | |
774 | tem = add_rshift (gsi, type, tem, shift_temps); | |
775 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 776 | if (op == unknown_optab |
4ee4c52c JJ |
777 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
778 | return NULL_TREE; | |
779 | tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); | |
780 | cur_op = add_rshift (gsi, type, tem, post_shifts); | |
781 | if (cur_op == NULL_TREE) | |
782 | return NULL_TREE; | |
783 | break; | |
784 | case 2: | |
785 | case 3: | |
786 | case 4: | |
787 | case 5: | |
c9ba3307 | 788 | /* t1 = oprnd0 h* ml; |
4ee4c52c JJ |
789 | t2 = t1; [ iff (mode & 2) != 0 ] |
790 | t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] | |
791 | t3 = t2 >> post_shift; | |
792 | t4 = oprnd0 >> (prec - 1); | |
793 | q = t3 - t4; [ iff (mode & 1) == 0 ] | |
794 | q = t4 - t3; [ iff (mode & 1) != 0 ] */ | |
795 | if ((mode & 2) == 0) | |
796 | { | |
797 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
2225b9f2 | 798 | if (op == unknown_optab |
4ee4c52c JJ |
799 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
800 | return NULL_TREE; | |
801 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); | |
802 | } | |
803 | cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
804 | if (cur_op == NULL_TREE) | |
805 | return NULL_TREE; | |
806 | tem = add_rshift (gsi, type, op0, shift_temps); | |
807 | if (tem == NULL_TREE) | |
808 | return NULL_TREE; | |
809 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
2225b9f2 | 810 | if (op == unknown_optab |
4ee4c52c JJ |
811 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
812 | return NULL_TREE; | |
813 | if ((mode & 1) == 0) | |
814 | cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); | |
815 | else | |
816 | cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); | |
817 | break; | |
818 | default: | |
819 | gcc_unreachable (); | |
820 | } | |
821 | ||
822 | if (code == TRUNC_DIV_EXPR) | |
823 | return cur_op; | |
824 | ||
825 | /* We divided. Now finish by: | |
826 | t1 = q * oprnd1; | |
827 | r = oprnd0 - t1; */ | |
828 | op = optab_for_tree_code (MULT_EXPR, type, optab_default); | |
2225b9f2 | 829 | if (op == unknown_optab |
4ee4c52c JJ |
830 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
831 | return NULL_TREE; | |
832 | tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); | |
833 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
2225b9f2 | 834 | if (op == unknown_optab |
4ee4c52c JJ |
835 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
836 | return NULL_TREE; | |
837 | return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); | |
838 | } | |
839 | ||
374ab2d7 MG |
840 | /* Expand a vector condition to scalars, by using many conditions |
841 | on the vector's elements. */ | |
842 | static void | |
843 | expand_vector_condition (gimple_stmt_iterator *gsi) | |
844 | { | |
538dd0b7 | 845 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); |
374ab2d7 MG |
846 | tree type = gimple_expr_type (stmt); |
847 | tree a = gimple_assign_rhs1 (stmt); | |
848 | tree a1 = a; | |
849 | tree a2; | |
850 | bool a_is_comparison = false; | |
851 | tree b = gimple_assign_rhs2 (stmt); | |
852 | tree c = gimple_assign_rhs3 (stmt); | |
9771b263 | 853 | vec<constructor_elt, va_gc> *v; |
374ab2d7 MG |
854 | tree constr; |
855 | tree inner_type = TREE_TYPE (type); | |
856 | tree cond_type = TREE_TYPE (TREE_TYPE (a)); | |
857 | tree comp_inner_type = cond_type; | |
858 | tree width = TYPE_SIZE (inner_type); | |
859 | tree index = bitsize_int (0); | |
860 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
861 | int i; | |
862 | location_t loc = gimple_location (gsi_stmt (*gsi)); | |
863 | ||
784fb9b3 | 864 | if (!is_gimple_val (a)) |
374ab2d7 MG |
865 | { |
866 | gcc_assert (COMPARISON_CLASS_P (a)); | |
867 | a_is_comparison = true; | |
868 | a1 = TREE_OPERAND (a, 0); | |
869 | a2 = TREE_OPERAND (a, 1); | |
870 | comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); | |
871 | } | |
872 | ||
873 | if (expand_vec_cond_expr_p (type, TREE_TYPE (a1))) | |
874 | return; | |
875 | ||
876 | /* TODO: try and find a smaller vector type. */ | |
877 | ||
878 | warning_at (loc, OPT_Wvector_operation_performance, | |
879 | "vector condition will be expanded piecewise"); | |
880 | ||
9771b263 | 881 | vec_alloc (v, nunits); |
374ab2d7 MG |
882 | for (i = 0; i < nunits; |
883 | i++, index = int_const_binop (PLUS_EXPR, index, width)) | |
884 | { | |
885 | tree aa, result; | |
886 | tree bb = tree_vec_extract (gsi, inner_type, b, width, index); | |
887 | tree cc = tree_vec_extract (gsi, inner_type, c, width, index); | |
888 | if (a_is_comparison) | |
889 | { | |
890 | tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index); | |
891 | tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index); | |
892 | aa = build2 (TREE_CODE (a), cond_type, aa1, aa2); | |
893 | } | |
894 | else | |
895 | aa = tree_vec_extract (gsi, cond_type, a, width, index); | |
896 | result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); | |
897 | constructor_elt ce = {NULL_TREE, result}; | |
9771b263 | 898 | v->quick_push (ce); |
374ab2d7 MG |
899 | } |
900 | ||
901 | constr = build_constructor (type, v); | |
902 | gimple_assign_set_rhs_from_tree (gsi, constr); | |
903 | update_stmt (gsi_stmt (*gsi)); | |
904 | } | |
905 | ||
2b725155 | 906 | static tree |
726a989a | 907 | expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, |
538dd0b7 | 908 | gassign *assign, enum tree_code code) |
2b725155 | 909 | { |
ef4bddc2 | 910 | machine_mode compute_mode = TYPE_MODE (compute_type); |
2b725155 RH |
911 | |
912 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
913 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
914 | to expand the operations in parallel. */ | |
915 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
325217ed CF |
916 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT |
917 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
918 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
919 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
920 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
2b725155 RH |
921 | switch (code) |
922 | { | |
923 | case PLUS_EXPR: | |
924 | case MINUS_EXPR: | |
20bd649a | 925 | if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) |
cdbb5ba3 AS |
926 | return expand_vector_addition (gsi, do_binop, do_plus_minus, type, |
927 | gimple_assign_rhs1 (assign), | |
726a989a | 928 | gimple_assign_rhs2 (assign), code); |
2b725155 RH |
929 | break; |
930 | ||
931 | case NEGATE_EXPR: | |
20bd649a | 932 | if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) |
726a989a RB |
933 | return expand_vector_addition (gsi, do_unop, do_negate, type, |
934 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
935 | NULL_TREE, code); |
936 | break; | |
937 | ||
938 | case BIT_AND_EXPR: | |
939 | case BIT_IOR_EXPR: | |
940 | case BIT_XOR_EXPR: | |
726a989a RB |
941 | return expand_vector_parallel (gsi, do_binop, type, |
942 | gimple_assign_rhs1 (assign), | |
943 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
944 | |
945 | case BIT_NOT_EXPR: | |
726a989a RB |
946 | return expand_vector_parallel (gsi, do_unop, type, |
947 | gimple_assign_rhs1 (assign), | |
d246ab4f AS |
948 | NULL_TREE, code); |
949 | case EQ_EXPR: | |
950 | case NE_EXPR: | |
951 | case GT_EXPR: | |
952 | case LT_EXPR: | |
953 | case GE_EXPR: | |
954 | case LE_EXPR: | |
955 | case UNEQ_EXPR: | |
956 | case UNGT_EXPR: | |
957 | case UNLT_EXPR: | |
958 | case UNGE_EXPR: | |
959 | case UNLE_EXPR: | |
960 | case LTGT_EXPR: | |
961 | case ORDERED_EXPR: | |
962 | case UNORDERED_EXPR: | |
963 | { | |
964 | tree rhs1 = gimple_assign_rhs1 (assign); | |
965 | tree rhs2 = gimple_assign_rhs2 (assign); | |
2b725155 | 966 | |
d246ab4f AS |
967 | return expand_vector_comparison (gsi, type, rhs1, rhs2, code); |
968 | } | |
4ee4c52c JJ |
969 | |
970 | case TRUNC_DIV_EXPR: | |
971 | case TRUNC_MOD_EXPR: | |
972 | { | |
973 | tree rhs1 = gimple_assign_rhs1 (assign); | |
974 | tree rhs2 = gimple_assign_rhs2 (assign); | |
975 | tree ret; | |
976 | ||
977 | if (!optimize | |
978 | || !VECTOR_INTEGER_TYPE_P (type) | |
2b332829 JJ |
979 | || TREE_CODE (rhs2) != VECTOR_CST |
980 | || !VECTOR_MODE_P (TYPE_MODE (type))) | |
4ee4c52c JJ |
981 | break; |
982 | ||
983 | ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); | |
984 | if (ret != NULL_TREE) | |
985 | return ret; | |
986 | break; | |
987 | } | |
988 | ||
2b725155 RH |
989 | default: |
990 | break; | |
991 | } | |
992 | ||
993 | if (TREE_CODE_CLASS (code) == tcc_unary) | |
726a989a RB |
994 | return expand_vector_piecewise (gsi, do_unop, type, compute_type, |
995 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
996 | NULL_TREE, code); |
997 | else | |
726a989a RB |
998 | return expand_vector_piecewise (gsi, do_binop, type, compute_type, |
999 | gimple_assign_rhs1 (assign), | |
1000 | gimple_assign_rhs2 (assign), code); | |
2b725155 | 1001 | } |
ce7e41fc JJ |
1002 | |
1003 | /* Try to optimize | |
1004 | a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 }; | |
1005 | style stmts into: | |
1006 | _9 = { b_7, b_7, b_7, b_7 }; | |
1007 | a_5 = _9 + { 0, 3, 6, 9 }; | |
1008 | because vector splat operation is usually more efficient | |
1009 | than piecewise initialization of the vector. */ | |
1010 | ||
1011 | static void | |
1012 | optimize_vector_constructor (gimple_stmt_iterator *gsi) | |
1013 | { | |
538dd0b7 | 1014 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); |
ce7e41fc JJ |
1015 | tree lhs = gimple_assign_lhs (stmt); |
1016 | tree rhs = gimple_assign_rhs1 (stmt); | |
1017 | tree type = TREE_TYPE (rhs); | |
1018 | unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type); | |
1019 | bool all_same = true; | |
1020 | constructor_elt *elt; | |
1021 | tree *cst; | |
1022 | gimple g; | |
1023 | tree base = NULL_TREE; | |
1f254157 | 1024 | optab op; |
ce7e41fc JJ |
1025 | |
1026 | if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts) | |
1027 | return; | |
1f254157 JJ |
1028 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); |
1029 | if (op == unknown_optab | |
1030 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
1031 | return; | |
ce7e41fc JJ |
1032 | FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt) |
1033 | if (TREE_CODE (elt->value) != SSA_NAME | |
1034 | || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) | |
1035 | return; | |
1036 | else | |
1037 | { | |
1038 | tree this_base = elt->value; | |
1039 | if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value) | |
1040 | all_same = false; | |
1041 | for (j = 0; j < nelts + 1; j++) | |
1042 | { | |
1043 | g = SSA_NAME_DEF_STMT (this_base); | |
1044 | if (is_gimple_assign (g) | |
1045 | && gimple_assign_rhs_code (g) == PLUS_EXPR | |
1046 | && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST | |
1047 | && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME | |
1048 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) | |
1049 | this_base = gimple_assign_rhs1 (g); | |
1050 | else | |
1051 | break; | |
1052 | } | |
1053 | if (i == 0) | |
1054 | base = this_base; | |
1055 | else if (this_base != base) | |
1056 | return; | |
1057 | } | |
1058 | if (all_same) | |
1059 | return; | |
1060 | cst = XALLOCAVEC (tree, nelts); | |
1061 | for (i = 0; i < nelts; i++) | |
1062 | { | |
1063 | tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;; | |
1064 | cst[i] = build_zero_cst (TREE_TYPE (base)); | |
1065 | while (this_base != base) | |
1066 | { | |
1067 | g = SSA_NAME_DEF_STMT (this_base); | |
1068 | cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base), | |
1069 | cst[i], gimple_assign_rhs2 (g)); | |
1070 | if (cst[i] == NULL_TREE | |
1071 | || TREE_CODE (cst[i]) != INTEGER_CST | |
1072 | || TREE_OVERFLOW (cst[i])) | |
1073 | return; | |
1074 | this_base = gimple_assign_rhs1 (g); | |
1075 | } | |
1076 | } | |
1077 | for (i = 0; i < nelts; i++) | |
1078 | CONSTRUCTOR_ELT (rhs, i)->value = base; | |
b731b390 | 1079 | g = gimple_build_assign (make_ssa_name (type), rhs); |
ce7e41fc | 1080 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
0d0e4a03 JJ |
1081 | g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g), |
1082 | build_vector (type, cst)); | |
ce7e41fc JJ |
1083 | gsi_replace (gsi, g, false); |
1084 | } | |
2b725155 | 1085 | \f |
995ec132 RG |
1086 | /* Return a type for the widest vector mode whose components are of type |
1087 | TYPE, or NULL_TREE if none is found. */ | |
325217ed | 1088 | |
2b725155 | 1089 | static tree |
995ec132 | 1090 | type_for_widest_vector_mode (tree type, optab op) |
2b725155 | 1091 | { |
ef4bddc2 RS |
1092 | machine_mode inner_mode = TYPE_MODE (type); |
1093 | machine_mode best_mode = VOIDmode, mode; | |
2b725155 RH |
1094 | int best_nunits = 0; |
1095 | ||
3d8bf70f | 1096 | if (SCALAR_FLOAT_MODE_P (inner_mode)) |
2b725155 | 1097 | mode = MIN_MODE_VECTOR_FLOAT; |
325217ed CF |
1098 | else if (SCALAR_FRACT_MODE_P (inner_mode)) |
1099 | mode = MIN_MODE_VECTOR_FRACT; | |
1100 | else if (SCALAR_UFRACT_MODE_P (inner_mode)) | |
1101 | mode = MIN_MODE_VECTOR_UFRACT; | |
1102 | else if (SCALAR_ACCUM_MODE_P (inner_mode)) | |
1103 | mode = MIN_MODE_VECTOR_ACCUM; | |
1104 | else if (SCALAR_UACCUM_MODE_P (inner_mode)) | |
1105 | mode = MIN_MODE_VECTOR_UACCUM; | |
2b725155 RH |
1106 | else |
1107 | mode = MIN_MODE_VECTOR_INT; | |
1108 | ||
1109 | for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) | |
1110 | if (GET_MODE_INNER (mode) == inner_mode | |
1111 | && GET_MODE_NUNITS (mode) > best_nunits | |
947131ba | 1112 | && optab_handler (op, mode) != CODE_FOR_nothing) |
2b725155 RH |
1113 | best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); |
1114 | ||
1115 | if (best_mode == VOIDmode) | |
1116 | return NULL_TREE; | |
1117 | else | |
995ec132 | 1118 | return build_vector_type_for_mode (type, best_mode); |
2b725155 RH |
1119 | } |
1120 | ||
f90e8e2e AS |
1121 | |
1122 | /* Build a reference to the element of the vector VECT. Function | |
1123 | returns either the element itself, either BIT_FIELD_REF, or an | |
1124 | ARRAY_REF expression. | |
1125 | ||
073a8998 | 1126 | GSI is required to insert temporary variables while building a |
f90e8e2e AS |
1127 | refernece to the element of the vector VECT. |
1128 | ||
1129 | PTMPVEC is a pointer to the temporary variable for caching | |
1130 | purposes. In case when PTMPVEC is NULL new temporary variable | |
1131 | will be created. */ | |
1132 | static tree | |
1133 | vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) | |
1134 | { | |
067f5960 | 1135 | tree vect_type, vect_elt_type; |
f90e8e2e AS |
1136 | gimple asgn; |
1137 | tree tmpvec; | |
1138 | tree arraytype; | |
1139 | bool need_asgn = true; | |
067f5960 | 1140 | unsigned int elements; |
f90e8e2e | 1141 | |
067f5960 RH |
1142 | vect_type = TREE_TYPE (vect); |
1143 | vect_elt_type = TREE_TYPE (vect_type); | |
1144 | elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
f90e8e2e | 1145 | |
f90e8e2e AS |
1146 | if (TREE_CODE (idx) == INTEGER_CST) |
1147 | { | |
1148 | unsigned HOST_WIDE_INT index; | |
1149 | ||
067f5960 RH |
1150 | /* Given that we're about to compute a binary modulus, |
1151 | we don't care about the high bits of the value. */ | |
1152 | index = TREE_INT_CST_LOW (idx); | |
cc269bb6 | 1153 | if (!tree_fits_uhwi_p (idx) || index >= elements) |
067f5960 RH |
1154 | { |
1155 | index &= elements - 1; | |
1156 | idx = build_int_cst (TREE_TYPE (idx), index); | |
1157 | } | |
f90e8e2e | 1158 | |
bc622b2a RG |
1159 | /* When lowering a vector statement sequence do some easy |
1160 | simplification by looking through intermediate vector results. */ | |
1161 | if (TREE_CODE (vect) == SSA_NAME) | |
1162 | { | |
1163 | gimple def_stmt = SSA_NAME_DEF_STMT (vect); | |
1164 | if (is_gimple_assign (def_stmt) | |
1165 | && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST | |
1166 | || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) | |
1167 | vect = gimple_assign_rhs1 (def_stmt); | |
1168 | } | |
1169 | ||
f90e8e2e | 1170 | if (TREE_CODE (vect) == VECTOR_CST) |
d2a12ae7 | 1171 | return VECTOR_CST_ELT (vect, index); |
4a2c20cc JJ |
1172 | else if (TREE_CODE (vect) == CONSTRUCTOR |
1173 | && (CONSTRUCTOR_NELTS (vect) == 0 | |
1174 | || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) | |
1175 | != VECTOR_TYPE)) | |
f90e8e2e | 1176 | { |
4a2c20cc JJ |
1177 | if (index < CONSTRUCTOR_NELTS (vect)) |
1178 | return CONSTRUCTOR_ELT (vect, index)->value; | |
067f5960 | 1179 | return build_zero_cst (vect_elt_type); |
f90e8e2e | 1180 | } |
067f5960 | 1181 | else |
f90e8e2e | 1182 | { |
067f5960 | 1183 | tree size = TYPE_SIZE (vect_elt_type); |
26a7fca2 JJ |
1184 | tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), |
1185 | size); | |
1186 | return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); | |
f90e8e2e | 1187 | } |
f90e8e2e AS |
1188 | } |
1189 | ||
1190 | if (!ptmpvec) | |
067f5960 | 1191 | tmpvec = create_tmp_var (vect_type, "vectmp"); |
f90e8e2e | 1192 | else if (!*ptmpvec) |
067f5960 | 1193 | tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); |
f90e8e2e AS |
1194 | else |
1195 | { | |
1196 | tmpvec = *ptmpvec; | |
1197 | need_asgn = false; | |
1198 | } | |
1199 | ||
1200 | if (need_asgn) | |
1201 | { | |
1202 | TREE_ADDRESSABLE (tmpvec) = 1; | |
1203 | asgn = gimple_build_assign (tmpvec, vect); | |
1204 | gsi_insert_before (gsi, asgn, GSI_SAME_STMT); | |
1205 | } | |
1206 | ||
067f5960 RH |
1207 | arraytype = build_array_type_nelts (vect_elt_type, elements); |
1208 | return build4 (ARRAY_REF, vect_elt_type, | |
f90e8e2e AS |
1209 | build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), |
1210 | idx, NULL_TREE, NULL_TREE); | |
1211 | } | |
1212 | ||
2205ed25 | 1213 | /* Check if VEC_PERM_EXPR within the given setting is supported |
067f5960 | 1214 | by hardware, or lower it piecewise. |
f90e8e2e | 1215 | |
2205ed25 RH |
1216 | When VEC_PERM_EXPR has the same first and second operands: |
1217 | VEC_PERM_EXPR <v0, v0, mask> the lowered version would be | |
f90e8e2e AS |
1218 | {v0[mask[0]], v0[mask[1]], ...} |
1219 | MASK and V0 must have the same number of elements. | |
1220 | ||
2205ed25 | 1221 | Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to |
f90e8e2e AS |
1222 | {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} |
1223 | V0 and V1 must have the same type. MASK, V0, V1 must have the | |
1224 | same number of arguments. */ | |
f90e8e2e | 1225 | |
067f5960 | 1226 | static void |
2205ed25 | 1227 | lower_vec_perm (gimple_stmt_iterator *gsi) |
067f5960 | 1228 | { |
538dd0b7 | 1229 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); |
f90e8e2e AS |
1230 | tree mask = gimple_assign_rhs3 (stmt); |
1231 | tree vec0 = gimple_assign_rhs1 (stmt); | |
1232 | tree vec1 = gimple_assign_rhs2 (stmt); | |
067f5960 RH |
1233 | tree vect_type = TREE_TYPE (vec0); |
1234 | tree mask_type = TREE_TYPE (mask); | |
1235 | tree vect_elt_type = TREE_TYPE (vect_type); | |
1236 | tree mask_elt_type = TREE_TYPE (mask_type); | |
1237 | unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
9771b263 | 1238 | vec<constructor_elt, va_gc> *v; |
067f5960 RH |
1239 | tree constr, t, si, i_val; |
1240 | tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; | |
1241 | bool two_operand_p = !operand_equal_p (vec0, vec1, 0); | |
cdbb5ba3 | 1242 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
067f5960 | 1243 | unsigned i; |
f90e8e2e | 1244 | |
273d260f RR |
1245 | if (TREE_CODE (mask) == SSA_NAME) |
1246 | { | |
1247 | gimple def_stmt = SSA_NAME_DEF_STMT (mask); | |
1248 | if (is_gimple_assign (def_stmt) | |
1249 | && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) | |
1250 | mask = gimple_assign_rhs1 (def_stmt); | |
1251 | } | |
1252 | ||
22e4dee7 RH |
1253 | if (TREE_CODE (mask) == VECTOR_CST) |
1254 | { | |
1255 | unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); | |
22e4dee7 | 1256 | |
d2a12ae7 RG |
1257 | for (i = 0; i < elements; ++i) |
1258 | sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) | |
1259 | & (2 * elements - 1)); | |
22e4dee7 RH |
1260 | |
1261 | if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) | |
273d260f RR |
1262 | { |
1263 | gimple_assign_set_rhs3 (stmt, mask); | |
1264 | update_stmt (stmt); | |
1265 | return; | |
1266 | } | |
22e4dee7 RH |
1267 | } |
1268 | else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) | |
067f5960 | 1269 | return; |
cdbb5ba3 AS |
1270 | |
1271 | warning_at (loc, OPT_Wvector_operation_performance, | |
1272 | "vector shuffling operation will be expanded piecewise"); | |
1273 | ||
9771b263 | 1274 | vec_alloc (v, elements); |
067f5960 | 1275 | for (i = 0; i < elements; i++) |
f90e8e2e | 1276 | { |
067f5960 RH |
1277 | si = size_int (i); |
1278 | i_val = vector_element (gsi, mask, si, &masktmp); | |
f90e8e2e | 1279 | |
067f5960 | 1280 | if (TREE_CODE (i_val) == INTEGER_CST) |
f90e8e2e | 1281 | { |
067f5960 | 1282 | unsigned HOST_WIDE_INT index; |
f90e8e2e | 1283 | |
067f5960 | 1284 | index = TREE_INT_CST_LOW (i_val); |
cc269bb6 | 1285 | if (!tree_fits_uhwi_p (i_val) || index >= elements) |
067f5960 | 1286 | i_val = build_int_cst (mask_elt_type, index & (elements - 1)); |
f90e8e2e | 1287 | |
067f5960 RH |
1288 | if (two_operand_p && (index & elements) != 0) |
1289 | t = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1290 | else | |
1291 | t = vector_element (gsi, vec0, i_val, &vec0tmp); | |
f90e8e2e | 1292 | |
067f5960 RH |
1293 | t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, |
1294 | true, GSI_SAME_STMT); | |
f90e8e2e | 1295 | } |
067f5960 | 1296 | else |
f90e8e2e | 1297 | { |
067f5960 RH |
1298 | tree cond = NULL_TREE, v0_val; |
1299 | ||
1300 | if (two_operand_p) | |
1301 | { | |
1302 | cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1303 | build_int_cst (mask_elt_type, elements)); | |
1304 | cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1305 | true, GSI_SAME_STMT); | |
1306 | } | |
1307 | ||
1308 | i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1309 | build_int_cst (mask_elt_type, elements - 1)); | |
1310 | i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, | |
1311 | true, GSI_SAME_STMT); | |
1312 | ||
1313 | v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); | |
1314 | v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, | |
1315 | true, GSI_SAME_STMT); | |
1316 | ||
1317 | if (two_operand_p) | |
1318 | { | |
1319 | tree v1_val; | |
1320 | ||
1321 | v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1322 | v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, | |
1323 | true, GSI_SAME_STMT); | |
1324 | ||
1325 | cond = fold_build2 (EQ_EXPR, boolean_type_node, | |
1326 | cond, build_zero_cst (mask_elt_type)); | |
1327 | cond = fold_build3 (COND_EXPR, vect_elt_type, | |
1328 | cond, v0_val, v1_val); | |
1329 | t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1330 | true, GSI_SAME_STMT); | |
f90e8e2e | 1331 | } |
067f5960 RH |
1332 | else |
1333 | t = v0_val; | |
f90e8e2e | 1334 | } |
067f5960 | 1335 | |
4a2c20cc | 1336 | CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); |
f90e8e2e AS |
1337 | } |
1338 | ||
067f5960 | 1339 | constr = build_constructor (vect_type, v); |
f90e8e2e | 1340 | gimple_assign_set_rhs_from_tree (gsi, constr); |
067f5960 | 1341 | update_stmt (gsi_stmt (*gsi)); |
f90e8e2e AS |
1342 | } |
1343 | ||
a4ee446d JJ |
1344 | /* Return type in which CODE operation with optab OP can be |
1345 | computed. */ | |
1346 | ||
1347 | static tree | |
1348 | get_compute_type (enum tree_code code, optab op, tree type) | |
1349 | { | |
1350 | /* For very wide vectors, try using a smaller vector mode. */ | |
1351 | tree compute_type = type; | |
1352 | if (op | |
1353 | && (!VECTOR_MODE_P (TYPE_MODE (type)) | |
1354 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)) | |
1355 | { | |
1356 | tree vector_compute_type | |
1357 | = type_for_widest_vector_mode (TREE_TYPE (type), op); | |
1358 | if (vector_compute_type != NULL_TREE | |
1359 | && (TYPE_VECTOR_SUBPARTS (vector_compute_type) | |
1360 | < TYPE_VECTOR_SUBPARTS (compute_type)) | |
1361 | && (optab_handler (op, TYPE_MODE (vector_compute_type)) | |
1362 | != CODE_FOR_nothing)) | |
1363 | compute_type = vector_compute_type; | |
1364 | } | |
1365 | ||
1366 | /* If we are breaking a BLKmode vector into smaller pieces, | |
1367 | type_for_widest_vector_mode has already looked into the optab, | |
1368 | so skip these checks. */ | |
1369 | if (compute_type == type) | |
1370 | { | |
ef4bddc2 | 1371 | machine_mode compute_mode = TYPE_MODE (compute_type); |
a4ee446d JJ |
1372 | if (VECTOR_MODE_P (compute_mode)) |
1373 | { | |
1374 | if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) | |
1375 | return compute_type; | |
1376 | if (code == MULT_HIGHPART_EXPR | |
1377 | && can_mult_highpart_p (compute_mode, | |
1378 | TYPE_UNSIGNED (compute_type))) | |
1379 | return compute_type; | |
1380 | } | |
1381 | /* There is no operation in hardware, so fall back to scalars. */ | |
1382 | compute_type = TREE_TYPE (type); | |
1383 | } | |
1384 | ||
1385 | return compute_type; | |
1386 | } | |
1387 | ||
1388 | /* Helper function of expand_vector_operations_1. Return number of | |
1389 | vector elements for vector types or 1 for other types. */ | |
1390 | ||
1391 | static inline int | |
1392 | count_type_subparts (tree type) | |
1393 | { | |
1394 | return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1; | |
1395 | } | |
1396 | ||
f8c29d98 JJ |
1397 | static tree |
1398 | do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
1399 | tree bitpos, tree bitsize, enum tree_code code) | |
1400 | { | |
1401 | if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) | |
1402 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
1403 | if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) | |
1404 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
1405 | tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); | |
1406 | return gimplify_build3 (gsi, code, inner_type, cond, a, b); | |
1407 | } | |
1408 | ||
1409 | /* Expand a vector COND_EXPR to scalars, piecewise. */ | |
1410 | static void | |
1411 | expand_vector_scalar_condition (gimple_stmt_iterator *gsi) | |
1412 | { | |
1413 | gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); | |
1414 | tree type = gimple_expr_type (stmt); | |
1415 | tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); | |
1416 | machine_mode compute_mode = TYPE_MODE (compute_type); | |
1417 | gcc_assert (compute_mode != BLKmode); | |
1418 | tree lhs = gimple_assign_lhs (stmt); | |
1419 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
1420 | tree rhs3 = gimple_assign_rhs3 (stmt); | |
1421 | tree new_rhs; | |
1422 | ||
1423 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
1424 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
1425 | to expand the operations in parallel. */ | |
1426 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
1427 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT | |
1428 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
1429 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
1430 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
1431 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
1432 | new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, | |
1433 | COND_EXPR); | |
1434 | else | |
1435 | new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, | |
1436 | rhs2, rhs3, COND_EXPR); | |
1437 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) | |
1438 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
1439 | new_rhs); | |
1440 | ||
1441 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
1442 | way to do it is change expand_vector_operation and its callees to | |
1443 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
1444 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
1445 | update_stmt (gsi_stmt (*gsi)); | |
1446 | } | |
1447 | ||
2b725155 RH |
1448 | /* Process one statement. If we identify a vector operation, expand it. */ |
1449 | ||
1450 | static void | |
726a989a | 1451 | expand_vector_operations_1 (gimple_stmt_iterator *gsi) |
2b725155 | 1452 | { |
a4ee446d | 1453 | tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE; |
2b725155 | 1454 | enum tree_code code; |
2225b9f2 | 1455 | optab op = unknown_optab; |
726a989a RB |
1456 | enum gimple_rhs_class rhs_class; |
1457 | tree new_rhs; | |
2b725155 | 1458 | |
538dd0b7 DM |
1459 | /* Only consider code == GIMPLE_ASSIGN. */ |
1460 | gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); | |
1461 | if (!stmt) | |
726a989a | 1462 | return; |
2b725155 | 1463 | |
726a989a RB |
1464 | code = gimple_assign_rhs_code (stmt); |
1465 | rhs_class = get_gimple_rhs_class (code); | |
d246ab4f | 1466 | lhs = gimple_assign_lhs (stmt); |
2b725155 | 1467 | |
2205ed25 | 1468 | if (code == VEC_PERM_EXPR) |
f90e8e2e | 1469 | { |
2205ed25 | 1470 | lower_vec_perm (gsi); |
067f5960 | 1471 | return; |
f90e8e2e AS |
1472 | } |
1473 | ||
374ab2d7 MG |
1474 | if (code == VEC_COND_EXPR) |
1475 | { | |
1476 | expand_vector_condition (gsi); | |
1477 | return; | |
1478 | } | |
ce7e41fc | 1479 | |
f8c29d98 JJ |
1480 | if (code == COND_EXPR |
1481 | && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE | |
1482 | && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) | |
1483 | { | |
1484 | expand_vector_scalar_condition (gsi); | |
1485 | return; | |
1486 | } | |
1487 | ||
ce7e41fc JJ |
1488 | if (code == CONSTRUCTOR |
1489 | && TREE_CODE (lhs) == SSA_NAME | |
1490 | && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) | |
1491 | && !gimple_clobber_p (stmt) | |
1492 | && optimize) | |
1493 | { | |
1494 | optimize_vector_constructor (gsi); | |
1495 | return; | |
1496 | } | |
1497 | ||
726a989a RB |
1498 | if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) |
1499 | return; | |
2b725155 | 1500 | |
726a989a RB |
1501 | rhs1 = gimple_assign_rhs1 (stmt); |
1502 | type = gimple_expr_type (stmt); | |
1503 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1504 | rhs2 = gimple_assign_rhs2 (stmt); | |
2b725155 | 1505 | |
2b725155 RH |
1506 | if (TREE_CODE (type) != VECTOR_TYPE) |
1507 | return; | |
1508 | ||
625a9766 | 1509 | if (CONVERT_EXPR_CODE_P (code) |
f57d17f1 TM |
1510 | || code == FLOAT_EXPR |
1511 | || code == FIX_TRUNC_EXPR | |
1512 | || code == VIEW_CONVERT_EXPR) | |
2b725155 | 1513 | return; |
b8698a0f | 1514 | |
9f106823 UB |
1515 | /* The signedness is determined from input argument. */ |
1516 | if (code == VEC_UNPACK_FLOAT_HI_EXPR | |
1517 | || code == VEC_UNPACK_FLOAT_LO_EXPR) | |
726a989a | 1518 | type = TREE_TYPE (rhs1); |
9f106823 | 1519 | |
3f30a9a6 RH |
1520 | /* For widening/narrowing vector operations, the relevant type is of the |
1521 | arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is | |
1522 | calculated in the same way above. */ | |
1523 | if (code == WIDEN_SUM_EXPR | |
1524 | || code == VEC_WIDEN_MULT_HI_EXPR | |
1525 | || code == VEC_WIDEN_MULT_LO_EXPR | |
1526 | || code == VEC_WIDEN_MULT_EVEN_EXPR | |
1527 | || code == VEC_WIDEN_MULT_ODD_EXPR | |
1528 | || code == VEC_UNPACK_HI_EXPR | |
1529 | || code == VEC_UNPACK_LO_EXPR | |
1530 | || code == VEC_PACK_TRUNC_EXPR | |
1531 | || code == VEC_PACK_SAT_EXPR | |
1532 | || code == VEC_PACK_FIX_TRUNC_EXPR | |
1533 | || code == VEC_WIDEN_LSHIFT_HI_EXPR | |
1534 | || code == VEC_WIDEN_LSHIFT_LO_EXPR) | |
1535 | type = TREE_TYPE (rhs1); | |
1536 | ||
71d46ca5 MM |
1537 | /* Choose between vector shift/rotate by vector and vector shift/rotate by |
1538 | scalar */ | |
b8698a0f L |
1539 | if (code == LSHIFT_EXPR |
1540 | || code == RSHIFT_EXPR | |
726a989a | 1541 | || code == LROTATE_EXPR |
71d46ca5 MM |
1542 | || code == RROTATE_EXPR) |
1543 | { | |
0f3d6c10 RH |
1544 | optab opv; |
1545 | ||
bdc3ee5d RH |
1546 | /* Check whether we have vector <op> {x,x,x,x} where x |
1547 | could be a scalar variable or a constant. Transform | |
1548 | vector <op> {x,x,x,x} ==> vector <op> scalar. */ | |
0f3d6c10 | 1549 | if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) |
47853c73 AS |
1550 | { |
1551 | tree first; | |
1552 | gimple def_stmt; | |
1553 | ||
bdc3ee5d RH |
1554 | if ((TREE_CODE (rhs2) == VECTOR_CST |
1555 | && (first = uniform_vector_p (rhs2)) != NULL_TREE) | |
1556 | || (TREE_CODE (rhs2) == SSA_NAME | |
1557 | && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) | |
1558 | && gimple_assign_single_p (def_stmt) | |
1559 | && (first = uniform_vector_p | |
1560 | (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)) | |
47853c73 AS |
1561 | { |
1562 | gimple_assign_set_rhs2 (stmt, first); | |
1563 | update_stmt (stmt); | |
1564 | rhs2 = first; | |
1565 | } | |
47853c73 | 1566 | } |
f90e8e2e | 1567 | |
0f3d6c10 RH |
1568 | opv = optab_for_tree_code (code, type, optab_vector); |
1569 | if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) | |
1570 | op = opv; | |
bdc3ee5d | 1571 | else |
2fa6eeff | 1572 | { |
bdc3ee5d | 1573 | op = optab_for_tree_code (code, type, optab_scalar); |
2fa6eeff | 1574 | |
a4ee446d JJ |
1575 | compute_type = get_compute_type (code, op, type); |
1576 | if (compute_type == type) | |
1577 | return; | |
bdc3ee5d | 1578 | /* The rtl expander will expand vector/scalar as vector/vector |
a4ee446d JJ |
1579 | if necessary. Pick one with wider vector type. */ |
1580 | tree compute_vtype = get_compute_type (code, opv, type); | |
1581 | if (count_type_subparts (compute_vtype) | |
1582 | > count_type_subparts (compute_type)) | |
1583 | { | |
1584 | compute_type = compute_vtype; | |
1585 | op = opv; | |
1586 | } | |
1587 | } | |
1588 | ||
1589 | if (code == LROTATE_EXPR || code == RROTATE_EXPR) | |
1590 | { | |
1591 | if (compute_type == NULL_TREE) | |
1592 | compute_type = get_compute_type (code, op, type); | |
1593 | if (compute_type == type) | |
0f3d6c10 | 1594 | return; |
a4ee446d JJ |
1595 | /* Before splitting vector rotates into scalar rotates, |
1596 | see if we can't use vector shifts and BIT_IOR_EXPR | |
1597 | instead. For vector by vector rotates we'd also | |
1598 | need to check BIT_AND_EXPR and NEGATE_EXPR, punt there | |
1599 | for now, fold doesn't seem to create such rotates anyway. */ | |
1600 | if (compute_type == TREE_TYPE (type) | |
1601 | && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) | |
1602 | { | |
1603 | optab oplv = vashl_optab, opl = ashl_optab; | |
1604 | optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab; | |
1605 | tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type); | |
1606 | tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type); | |
1607 | tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type); | |
1608 | tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type); | |
1609 | tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); | |
1610 | /* The rtl expander will expand vector/scalar as vector/vector | |
1611 | if necessary. Pick one with wider vector type. */ | |
1612 | if (count_type_subparts (compute_lvtype) | |
1613 | > count_type_subparts (compute_ltype)) | |
1614 | { | |
1615 | compute_ltype = compute_lvtype; | |
1616 | opl = oplv; | |
1617 | } | |
1618 | if (count_type_subparts (compute_rvtype) | |
1619 | > count_type_subparts (compute_rtype)) | |
1620 | { | |
1621 | compute_rtype = compute_rvtype; | |
1622 | opr = oprv; | |
1623 | } | |
1624 | /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and | |
1625 | BIT_IOR_EXPR. */ | |
1626 | compute_type = compute_ltype; | |
1627 | if (count_type_subparts (compute_type) | |
1628 | > count_type_subparts (compute_rtype)) | |
1629 | compute_type = compute_rtype; | |
1630 | if (count_type_subparts (compute_type) | |
1631 | > count_type_subparts (compute_otype)) | |
1632 | compute_type = compute_otype; | |
1633 | /* Verify all 3 operations can be performed in that type. */ | |
1634 | if (compute_type != TREE_TYPE (type)) | |
1635 | { | |
1636 | if (optab_handler (opl, TYPE_MODE (compute_type)) | |
1637 | == CODE_FOR_nothing | |
1638 | || optab_handler (opr, TYPE_MODE (compute_type)) | |
1639 | == CODE_FOR_nothing | |
1640 | || optab_handler (opo, TYPE_MODE (compute_type)) | |
1641 | == CODE_FOR_nothing) | |
1642 | compute_type = TREE_TYPE (type); | |
1643 | } | |
1644 | } | |
2fa6eeff | 1645 | } |
71d46ca5 MM |
1646 | } |
1647 | else | |
1648 | op = optab_for_tree_code (code, type, optab_default); | |
2b725155 RH |
1649 | |
1650 | /* Optabs will try converting a negation into a subtraction, so | |
1651 | look for it as well. TODO: negation of floating-point vectors | |
1652 | might be turned into an exclusive OR toggling the sign bit. */ | |
2225b9f2 | 1653 | if (op == unknown_optab |
2b725155 RH |
1654 | && code == NEGATE_EXPR |
1655 | && INTEGRAL_TYPE_P (TREE_TYPE (type))) | |
71d46ca5 | 1656 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); |
2b725155 | 1657 | |
a4ee446d JJ |
1658 | if (compute_type == NULL_TREE) |
1659 | compute_type = get_compute_type (code, op, type); | |
2b725155 | 1660 | if (compute_type == type) |
a4ee446d | 1661 | return; |
2b725155 | 1662 | |
726a989a | 1663 | new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); |
d246ab4f AS |
1664 | |
1665 | /* Leave expression untouched for later expansion. */ | |
1666 | if (new_rhs == NULL_TREE) | |
1667 | return; | |
1668 | ||
726a989a RB |
1669 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) |
1670 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
1671 | new_rhs); | |
1672 | ||
1673 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
1674 | way to do it is change expand_vector_operation and its callees to | |
1675 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
1676 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
3865a06f | 1677 | update_stmt (gsi_stmt (*gsi)); |
2b725155 RH |
1678 | } |
1679 | \f | |
1680 | /* Use this to lower vector operations introduced by the vectorizer, | |
1681 | if it may need the bit-twiddling tricks implemented in this file. */ | |
1682 | ||
c2924966 | 1683 | static unsigned int |
2b725155 RH |
1684 | expand_vector_operations (void) |
1685 | { | |
726a989a | 1686 | gimple_stmt_iterator gsi; |
2b725155 | 1687 | basic_block bb; |
3865a06f | 1688 | bool cfg_changed = false; |
2b725155 | 1689 | |
11cd3bed | 1690 | FOR_EACH_BB_FN (bb, cfun) |
2b725155 | 1691 | { |
726a989a | 1692 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2b725155 | 1693 | { |
726a989a | 1694 | expand_vector_operations_1 (&gsi); |
3865a06f RG |
1695 | /* ??? If we do not cleanup EH then we will ICE in |
1696 | verification. But in reality we have created wrong-code | |
1697 | as we did not properly transition EH info and edges to | |
1698 | the piecewise computations. */ | |
1699 | if (maybe_clean_eh_stmt (gsi_stmt (gsi)) | |
1700 | && gimple_purge_dead_eh_edges (bb)) | |
1701 | cfg_changed = true; | |
2b725155 RH |
1702 | } |
1703 | } | |
3865a06f RG |
1704 | |
1705 | return cfg_changed ? TODO_cleanup_cfg : 0; | |
2b725155 RH |
1706 | } |
1707 | ||
27a4cd48 DM |
1708 | namespace { |
1709 | ||
1710 | const pass_data pass_data_lower_vector = | |
2b725155 | 1711 | { |
27a4cd48 DM |
1712 | GIMPLE_PASS, /* type */ |
1713 | "veclower", /* name */ | |
1714 | OPTGROUP_VEC, /* optinfo_flags */ | |
27a4cd48 DM |
1715 | TV_NONE, /* tv_id */ |
1716 | PROP_cfg, /* properties_required */ | |
1717 | PROP_gimple_lvec, /* properties_provided */ | |
1718 | 0, /* properties_destroyed */ | |
1719 | 0, /* todo_flags_start */ | |
9538c95b | 1720 | TODO_update_ssa, /* todo_flags_finish */ |
2b725155 RH |
1721 | }; |
1722 | ||
27a4cd48 DM |
1723 | class pass_lower_vector : public gimple_opt_pass |
1724 | { | |
1725 | public: | |
c3284718 RS |
1726 | pass_lower_vector (gcc::context *ctxt) |
1727 | : gimple_opt_pass (pass_data_lower_vector, ctxt) | |
27a4cd48 DM |
1728 | {} |
1729 | ||
1730 | /* opt_pass methods: */ | |
1a3d085c TS |
1731 | virtual bool gate (function *fun) |
1732 | { | |
1733 | return !(fun->curr_properties & PROP_gimple_lvec); | |
1734 | } | |
1735 | ||
be55bfe6 TS |
1736 | virtual unsigned int execute (function *) |
1737 | { | |
1738 | return expand_vector_operations (); | |
1739 | } | |
27a4cd48 DM |
1740 | |
1741 | }; // class pass_lower_vector | |
1742 | ||
1743 | } // anon namespace | |
1744 | ||
1745 | gimple_opt_pass * | |
1746 | make_pass_lower_vector (gcc::context *ctxt) | |
1747 | { | |
1748 | return new pass_lower_vector (ctxt); | |
1749 | } | |
1750 | ||
1751 | namespace { | |
1752 | ||
1753 | const pass_data pass_data_lower_vector_ssa = | |
2b725155 | 1754 | { |
27a4cd48 DM |
1755 | GIMPLE_PASS, /* type */ |
1756 | "veclower2", /* name */ | |
1757 | OPTGROUP_VEC, /* optinfo_flags */ | |
27a4cd48 DM |
1758 | TV_NONE, /* tv_id */ |
1759 | PROP_cfg, /* properties_required */ | |
1760 | PROP_gimple_lvec, /* properties_provided */ | |
1761 | 0, /* properties_destroyed */ | |
1762 | 0, /* todo_flags_start */ | |
3bea341f | 1763 | ( TODO_update_ssa |
27a4cd48 | 1764 | | TODO_cleanup_cfg ), /* todo_flags_finish */ |
2b725155 RH |
1765 | }; |
1766 | ||
27a4cd48 DM |
1767 | class pass_lower_vector_ssa : public gimple_opt_pass |
1768 | { | |
1769 | public: | |
c3284718 RS |
1770 | pass_lower_vector_ssa (gcc::context *ctxt) |
1771 | : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) | |
27a4cd48 DM |
1772 | {} |
1773 | ||
1774 | /* opt_pass methods: */ | |
65d3284b | 1775 | opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } |
be55bfe6 TS |
1776 | virtual unsigned int execute (function *) |
1777 | { | |
1778 | return expand_vector_operations (); | |
1779 | } | |
27a4cd48 DM |
1780 | |
1781 | }; // class pass_lower_vector_ssa | |
1782 | ||
1783 | } // anon namespace | |
1784 | ||
1785 | gimple_opt_pass * | |
1786 | make_pass_lower_vector_ssa (gcc::context *ctxt) | |
1787 | { | |
1788 | return new pass_lower_vector_ssa (ctxt); | |
1789 | } | |
1790 | ||
2b725155 | 1791 | #include "gt-tree-vect-generic.h" |