]>
Commit | Line | Data |
---|---|---|
0501cacc | 1 | /* Lower vector operations to scalar operations. |
711789cc | 2 | Copyright (C) 2004-2013 Free Software Foundation, Inc. |
0501cacc | 3 | |
4 | This file is part of GCC. | |
48e1416a | 5 | |
0501cacc | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
0501cacc | 9 | later version. |
48e1416a | 10 | |
0501cacc | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
0501cacc | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
0501cacc | 19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tree.h" | |
9ed99284 | 24 | #include "stor-layout.h" |
0501cacc | 25 | #include "tm.h" |
0501cacc | 26 | #include "langhooks.h" |
75a70cf9 | 27 | #include "gimple.h" |
dcf1a1ec | 28 | #include "gimple-iterator.h" |
e795d6e1 | 29 | #include "gimplify-me.h" |
073c1fd5 | 30 | #include "gimple-ssa.h" |
31 | #include "tree-cfg.h" | |
9ed99284 | 32 | #include "stringpool.h" |
073c1fd5 | 33 | #include "tree-ssanames.h" |
0501cacc | 34 | #include "tree-iterator.h" |
35 | #include "tree-pass.h" | |
36 | #include "flags.h" | |
37 | #include "ggc.h" | |
6cf89e04 | 38 | #include "diagnostic.h" |
10dd7335 | 39 | #include "target.h" |
0501cacc | 40 | |
8e3cb73b | 41 | /* Need to include rtl.h, expr.h, etc. for optabs. */ |
42 | #include "expr.h" | |
43 | #include "optabs.h" | |
0501cacc | 44 | |
d7ad16c2 | 45 | |
46 | static void expand_vector_operations_1 (gimple_stmt_iterator *); | |
47 | ||
48 | ||
0501cacc | 49 | /* Build a constant of type TYPE, made of VALUE's bits replicated |
50 | every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | |
51 | static tree | |
52 | build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | |
53 | { | |
e913b5cd | 54 | int width = tree_to_uhwi (TYPE_SIZE (inner_type)); |
55 | int n = TYPE_PRECISION (type) / width; | |
56 | unsigned HOST_WIDE_INT low, mask; | |
57 | HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; | |
58 | int i; | |
0501cacc | 59 | |
60 | gcc_assert (n); | |
61 | ||
62 | if (width == HOST_BITS_PER_WIDE_INT) | |
63 | low = value; | |
64 | else | |
65 | { | |
66 | mask = ((HOST_WIDE_INT)1 << width) - 1; | |
67 | low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | |
68 | } | |
69 | ||
e913b5cd | 70 | for (i = 0; i < n; i++) |
71 | a[i] = low; | |
0501cacc | 72 | |
ddb1be65 | 73 | return wide_int_to_tree |
05363b4a | 74 | (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); |
0501cacc | 75 | } |
76 | ||
77 | static GTY(()) tree vector_inner_type; | |
78 | static GTY(()) tree vector_last_type; | |
79 | static GTY(()) int vector_last_nunits; | |
80 | ||
81 | /* Return a suitable vector types made of SUBPARTS units each of mode | |
82 | "word_mode" (the global variable). */ | |
83 | static tree | |
84 | build_word_mode_vector_type (int nunits) | |
85 | { | |
86 | if (!vector_inner_type) | |
87 | vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
88 | else if (vector_last_nunits == nunits) | |
89 | { | |
90 | gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); | |
91 | return vector_last_type; | |
92 | } | |
93 | ||
94 | /* We build a new type, but we canonicalize it nevertheless, | |
95 | because it still saves some memory. */ | |
96 | vector_last_nunits = nunits; | |
97 | vector_last_type = type_hash_canon (nunits, | |
98 | build_vector_type (vector_inner_type, | |
99 | nunits)); | |
100 | return vector_last_type; | |
101 | } | |
102 | ||
75a70cf9 | 103 | typedef tree (*elem_op_func) (gimple_stmt_iterator *, |
0501cacc | 104 | tree, tree, tree, tree, tree, enum tree_code); |
105 | ||
106 | static inline tree | |
75a70cf9 | 107 | tree_vec_extract (gimple_stmt_iterator *gsi, tree type, |
0501cacc | 108 | tree t, tree bitsize, tree bitpos) |
109 | { | |
110 | if (bitpos) | |
75a70cf9 | 111 | return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); |
0501cacc | 112 | else |
75a70cf9 | 113 | return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); |
0501cacc | 114 | } |
115 | ||
116 | static tree | |
75a70cf9 | 117 | do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, |
0501cacc | 118 | tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, |
119 | enum tree_code code) | |
120 | { | |
75a70cf9 | 121 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
122 | return gimplify_build1 (gsi, code, inner_type, a); | |
0501cacc | 123 | } |
124 | ||
125 | static tree | |
75a70cf9 | 126 | do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, |
0501cacc | 127 | tree bitpos, tree bitsize, enum tree_code code) |
128 | { | |
eab22dca | 129 | if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) |
130 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
131 | if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) | |
132 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
75a70cf9 | 133 | return gimplify_build2 (gsi, code, inner_type, a, b); |
0501cacc | 134 | } |
135 | ||
d7ad16c2 | 136 | /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 |
137 | ||
138 | INNER_TYPE is the type of A and B elements | |
139 | ||
140 | returned expression is of signed integer type with the | |
141 | size equal to the size of INNER_TYPE. */ | |
142 | static tree | |
143 | do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
144 | tree bitpos, tree bitsize, enum tree_code code) | |
145 | { | |
146 | tree comp_type; | |
147 | ||
148 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
149 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
150 | ||
151 | comp_type = build_nonstandard_integer_type | |
152 | (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0); | |
153 | ||
154 | return gimplify_build3 (gsi, COND_EXPR, comp_type, | |
155 | fold_build2 (code, boolean_type_node, a, b), | |
156 | build_int_cst (comp_type, -1), | |
157 | build_int_cst (comp_type, 0)); | |
158 | } | |
159 | ||
0501cacc | 160 | /* Expand vector addition to scalars. This does bit twiddling |
161 | in order to increase parallelism: | |
162 | ||
163 | a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ | |
164 | (a ^ b) & 0x80808080 | |
165 | ||
166 | a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ | |
167 | (a ^ ~b) & 0x80808080 | |
168 | ||
169 | -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) | |
170 | ||
171 | This optimization should be done only if 4 vector items or more | |
172 | fit into a word. */ | |
173 | static tree | |
75a70cf9 | 174 | do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, |
0501cacc | 175 | tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, |
176 | enum tree_code code) | |
177 | { | |
178 | tree inner_type = TREE_TYPE (TREE_TYPE (a)); | |
179 | unsigned HOST_WIDE_INT max; | |
180 | tree low_bits, high_bits, a_low, b_low, result_low, signs; | |
181 | ||
182 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
183 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
184 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
185 | ||
75a70cf9 | 186 | a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); |
187 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); | |
0501cacc | 188 | |
75a70cf9 | 189 | signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); |
190 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); | |
0501cacc | 191 | if (code == PLUS_EXPR) |
75a70cf9 | 192 | a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); |
0501cacc | 193 | else |
194 | { | |
75a70cf9 | 195 | a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); |
196 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); | |
0501cacc | 197 | } |
198 | ||
75a70cf9 | 199 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); |
200 | result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); | |
201 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
0501cacc | 202 | } |
203 | ||
204 | static tree | |
75a70cf9 | 205 | do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, |
0501cacc | 206 | tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, |
207 | tree bitsize ATTRIBUTE_UNUSED, | |
208 | enum tree_code code ATTRIBUTE_UNUSED) | |
209 | { | |
210 | tree inner_type = TREE_TYPE (TREE_TYPE (b)); | |
211 | HOST_WIDE_INT max; | |
212 | tree low_bits, high_bits, b_low, result_low, signs; | |
213 | ||
214 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
215 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
216 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
217 | ||
75a70cf9 | 218 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); |
0501cacc | 219 | |
75a70cf9 | 220 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); |
221 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); | |
222 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
223 | result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); | |
224 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
0501cacc | 225 | } |
226 | ||
227 | /* Expand a vector operation to scalars, by using many operations | |
228 | whose type is the vector type's inner type. */ | |
229 | static tree | |
75a70cf9 | 230 | expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, |
0501cacc | 231 | tree type, tree inner_type, |
232 | tree a, tree b, enum tree_code code) | |
233 | { | |
f1f41a6c | 234 | vec<constructor_elt, va_gc> *v; |
0501cacc | 235 | tree part_width = TYPE_SIZE (inner_type); |
236 | tree index = bitsize_int (0); | |
237 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
e913b5cd | 238 | int delta = tree_to_uhwi (part_width) |
239 | / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); | |
0501cacc | 240 | int i; |
928efcfe | 241 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
242 | ||
243 | if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) | |
244 | warning_at (loc, OPT_Wvector_operation_performance, | |
245 | "vector operation will be expanded piecewise"); | |
246 | else | |
247 | warning_at (loc, OPT_Wvector_operation_performance, | |
248 | "vector operation will be expanded in parallel"); | |
0501cacc | 249 | |
f1f41a6c | 250 | vec_alloc (v, (nunits + delta - 1) / delta); |
0501cacc | 251 | for (i = 0; i < nunits; |
317e2a67 | 252 | i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) |
0501cacc | 253 | { |
75a70cf9 | 254 | tree result = f (gsi, inner_type, a, b, index, part_width, code); |
e82e4eb5 | 255 | constructor_elt ce = {NULL_TREE, result}; |
f1f41a6c | 256 | v->quick_push (ce); |
0501cacc | 257 | } |
258 | ||
c75b4594 | 259 | return build_constructor (type, v); |
0501cacc | 260 | } |
261 | ||
262 | /* Expand a vector operation to scalars with the freedom to use | |
263 | a scalar integer type, or to use a different size for the items | |
264 | in the vector type. */ | |
265 | static tree | |
75a70cf9 | 266 | expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, |
0501cacc | 267 | tree a, tree b, |
268 | enum tree_code code) | |
269 | { | |
270 | tree result, compute_type; | |
271 | enum machine_mode mode; | |
e913b5cd | 272 | int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; |
928efcfe | 273 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
0501cacc | 274 | |
275 | /* We have three strategies. If the type is already correct, just do | |
276 | the operation an element at a time. Else, if the vector is wider than | |
277 | one word, do it a word at a time; finally, if the vector is smaller | |
278 | than one word, do it as a scalar. */ | |
279 | if (TYPE_MODE (TREE_TYPE (type)) == word_mode) | |
75a70cf9 | 280 | return expand_vector_piecewise (gsi, f, |
0501cacc | 281 | type, TREE_TYPE (type), |
282 | a, b, code); | |
283 | else if (n_words > 1) | |
284 | { | |
285 | tree word_type = build_word_mode_vector_type (n_words); | |
75a70cf9 | 286 | result = expand_vector_piecewise (gsi, f, |
0501cacc | 287 | word_type, TREE_TYPE (word_type), |
288 | a, b, code); | |
75a70cf9 | 289 | result = force_gimple_operand_gsi (gsi, result, true, NULL, true, |
290 | GSI_SAME_STMT); | |
0501cacc | 291 | } |
292 | else | |
293 | { | |
294 | /* Use a single scalar operation with a mode no wider than word_mode. */ | |
e913b5cd | 295 | mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0); |
0501cacc | 296 | compute_type = lang_hooks.types.type_for_mode (mode, 1); |
75a70cf9 | 297 | result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); |
928efcfe | 298 | warning_at (loc, OPT_Wvector_operation_performance, |
299 | "vector operation will be expanded with a " | |
300 | "single scalar operation"); | |
0501cacc | 301 | } |
302 | ||
303 | return result; | |
304 | } | |
305 | ||
306 | /* Expand a vector operation to scalars; for integer types we can use | |
307 | special bit twiddling tricks to do the sums a word at a time, using | |
308 | function F_PARALLEL instead of F. These tricks are done only if | |
309 | they can process at least four items, that is, only if the vector | |
310 | holds at least four items and if a word can hold four items. */ | |
311 | static tree | |
75a70cf9 | 312 | expand_vector_addition (gimple_stmt_iterator *gsi, |
0501cacc | 313 | elem_op_func f, elem_op_func f_parallel, |
314 | tree type, tree a, tree b, enum tree_code code) | |
315 | { | |
316 | int parts_per_word = UNITS_PER_WORD | |
e913b5cd | 317 | / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); |
0501cacc | 318 | |
319 | if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | |
320 | && parts_per_word >= 4 | |
321 | && TYPE_VECTOR_SUBPARTS (type) >= 4) | |
75a70cf9 | 322 | return expand_vector_parallel (gsi, f_parallel, |
0501cacc | 323 | type, a, b, code); |
324 | else | |
75a70cf9 | 325 | return expand_vector_piecewise (gsi, f, |
0501cacc | 326 | type, TREE_TYPE (type), |
327 | a, b, code); | |
328 | } | |
329 | ||
d7ad16c2 | 330 | /* Try to expand vector comparison expression OP0 CODE OP1 by |
331 | querying optab if the following expression: | |
332 | VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> | |
333 | can be expanded. */ | |
334 | static tree | |
335 | expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, | |
336 | tree op1, enum tree_code code) | |
337 | { | |
338 | tree t; | |
339 | if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0))) | |
340 | t = expand_vector_piecewise (gsi, do_compare, type, | |
341 | TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); | |
342 | else | |
343 | t = NULL_TREE; | |
344 | ||
345 | return t; | |
346 | } | |
347 | ||
60420e1c | 348 | /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type |
349 | of OP0 with shift counts in SHIFTCNTS array and return the temporary holding | |
350 | the result if successful, otherwise return NULL_TREE. */ | |
351 | static tree | |
352 | add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) | |
353 | { | |
354 | optab op; | |
355 | unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type); | |
356 | bool scalar_shift = true; | |
357 | ||
358 | for (i = 1; i < nunits; i++) | |
359 | { | |
360 | if (shiftcnts[i] != shiftcnts[0]) | |
361 | scalar_shift = false; | |
362 | } | |
363 | ||
364 | if (scalar_shift && shiftcnts[0] == 0) | |
365 | return op0; | |
366 | ||
367 | if (scalar_shift) | |
368 | { | |
369 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); | |
6cdd383a | 370 | if (op != unknown_optab |
60420e1c | 371 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
372 | return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
373 | build_int_cst (NULL_TREE, shiftcnts[0])); | |
374 | } | |
375 | ||
376 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
6cdd383a | 377 | if (op != unknown_optab |
60420e1c | 378 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
379 | { | |
380 | tree *vec = XALLOCAVEC (tree, nunits); | |
381 | for (i = 0; i < nunits; i++) | |
382 | vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]); | |
383 | return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
384 | build_vector (type, vec)); | |
385 | } | |
386 | ||
387 | return NULL_TREE; | |
388 | } | |
389 | ||
390 | /* Try to expand integer vector division by constant using | |
391 | widening multiply, shifts and additions. */ | |
392 | static tree | |
393 | expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, | |
394 | tree op1, enum tree_code code) | |
395 | { | |
396 | bool use_pow2 = true; | |
397 | bool has_vector_shift = true; | |
398 | int mode = -1, this_mode; | |
399 | int pre_shift = -1, post_shift; | |
400 | unsigned int nunits = TYPE_VECTOR_SUBPARTS (type); | |
401 | int *shifts = XALLOCAVEC (int, nunits * 4); | |
402 | int *pre_shifts = shifts + nunits; | |
403 | int *post_shifts = pre_shifts + nunits; | |
404 | int *shift_temps = post_shifts + nunits; | |
405 | unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); | |
406 | int prec = TYPE_PRECISION (TREE_TYPE (type)); | |
407 | int dummy_int; | |
ddb1be65 | 408 | unsigned int i; |
e913b5cd | 409 | signop sign_p = TYPE_SIGN (TREE_TYPE (type)); |
60420e1c | 410 | unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); |
60420e1c | 411 | tree *vec; |
ebf4f764 | 412 | tree cur_op, mulcst, tem; |
413 | optab op; | |
60420e1c | 414 | |
415 | if (prec > HOST_BITS_PER_WIDE_INT) | |
416 | return NULL_TREE; | |
417 | ||
418 | op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
6cdd383a | 419 | if (op == unknown_optab |
60420e1c | 420 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
421 | has_vector_shift = false; | |
422 | ||
423 | /* Analysis phase. Determine if all op1 elements are either power | |
424 | of two and it is possible to expand it using shifts (or for remainder | |
425 | using masking). Additionally compute the multiplicative constants | |
426 | and pre and post shifts if the division is to be expanded using | |
427 | widening or high part multiplication plus shifts. */ | |
428 | for (i = 0; i < nunits; i++) | |
429 | { | |
430 | tree cst = VECTOR_CST_ELT (op1, i); | |
431 | unsigned HOST_WIDE_INT ml; | |
432 | ||
20448fd9 | 433 | if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) |
60420e1c | 434 | return NULL_TREE; |
435 | pre_shifts[i] = 0; | |
436 | post_shifts[i] = 0; | |
437 | mulc[i] = 0; | |
438 | if (use_pow2 | |
439 | && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) | |
440 | use_pow2 = false; | |
441 | if (use_pow2) | |
442 | { | |
443 | shifts[i] = tree_log2 (cst); | |
444 | if (shifts[i] != shifts[0] | |
445 | && code == TRUNC_DIV_EXPR | |
446 | && !has_vector_shift) | |
447 | use_pow2 = false; | |
448 | } | |
449 | if (mode == -2) | |
450 | continue; | |
e913b5cd | 451 | if (sign_p == UNSIGNED) |
60420e1c | 452 | { |
453 | unsigned HOST_WIDE_INT mh; | |
f9ae6f95 | 454 | unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; |
60420e1c | 455 | |
456 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) | |
457 | /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ | |
458 | return NULL_TREE; | |
459 | ||
460 | if (d <= 1) | |
461 | { | |
462 | mode = -2; | |
463 | continue; | |
464 | } | |
465 | ||
466 | /* Find a suitable multiplier and right shift count | |
467 | instead of multiplying with D. */ | |
468 | mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); | |
469 | ||
470 | /* If the suggested multiplier is more than SIZE bits, we can | |
471 | do better for even divisors, using an initial right shift. */ | |
472 | if ((mh != 0 && (d & 1) == 0) | |
473 | || (!has_vector_shift && pre_shift != -1)) | |
474 | { | |
475 | if (has_vector_shift) | |
476 | pre_shift = floor_log2 (d & -d); | |
477 | else if (pre_shift == -1) | |
478 | { | |
479 | unsigned int j; | |
480 | for (j = 0; j < nunits; j++) | |
481 | { | |
482 | tree cst2 = VECTOR_CST_ELT (op1, j); | |
483 | unsigned HOST_WIDE_INT d2; | |
484 | int this_pre_shift; | |
485 | ||
e913b5cd | 486 | if (!tree_fits_uhwi_p (cst2)) |
60420e1c | 487 | return NULL_TREE; |
e913b5cd | 488 | d2 = tree_to_uhwi (cst2) & mask; |
60420e1c | 489 | if (d2 == 0) |
490 | return NULL_TREE; | |
491 | this_pre_shift = floor_log2 (d2 & -d2); | |
492 | if (pre_shift == -1 || this_pre_shift < pre_shift) | |
493 | pre_shift = this_pre_shift; | |
494 | } | |
495 | if (i != 0 && pre_shift != 0) | |
496 | { | |
497 | /* Restart. */ | |
498 | i = -1U; | |
499 | mode = -1; | |
500 | continue; | |
501 | } | |
502 | } | |
503 | if (pre_shift != 0) | |
504 | { | |
505 | if ((d >> pre_shift) <= 1) | |
506 | { | |
507 | mode = -2; | |
508 | continue; | |
509 | } | |
510 | mh = choose_multiplier (d >> pre_shift, prec, | |
511 | prec - pre_shift, | |
512 | &ml, &post_shift, &dummy_int); | |
513 | gcc_assert (!mh); | |
514 | pre_shifts[i] = pre_shift; | |
515 | } | |
516 | } | |
517 | if (!mh) | |
518 | this_mode = 0; | |
519 | else | |
520 | this_mode = 1; | |
521 | } | |
522 | else | |
523 | { | |
f9ae6f95 | 524 | HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); |
60420e1c | 525 | unsigned HOST_WIDE_INT abs_d; |
526 | ||
527 | if (d == -1) | |
528 | return NULL_TREE; | |
529 | ||
530 | /* Since d might be INT_MIN, we have to cast to | |
531 | unsigned HOST_WIDE_INT before negating to avoid | |
532 | undefined signed overflow. */ | |
533 | abs_d = (d >= 0 | |
534 | ? (unsigned HOST_WIDE_INT) d | |
535 | : - (unsigned HOST_WIDE_INT) d); | |
536 | ||
537 | /* n rem d = n rem -d */ | |
538 | if (code == TRUNC_MOD_EXPR && d < 0) | |
539 | d = abs_d; | |
540 | else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1)) | |
541 | { | |
542 | /* This case is not handled correctly below. */ | |
543 | mode = -2; | |
544 | continue; | |
545 | } | |
546 | if (abs_d <= 1) | |
547 | { | |
548 | mode = -2; | |
549 | continue; | |
550 | } | |
551 | ||
552 | choose_multiplier (abs_d, prec, prec - 1, &ml, | |
553 | &post_shift, &dummy_int); | |
554 | if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1)) | |
555 | { | |
556 | this_mode = 4 + (d < 0); | |
557 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1); | |
558 | } | |
559 | else | |
560 | this_mode = 2 + (d < 0); | |
561 | } | |
562 | mulc[i] = ml; | |
563 | post_shifts[i] = post_shift; | |
564 | if ((i && !has_vector_shift && post_shifts[0] != post_shift) | |
565 | || post_shift >= prec | |
566 | || pre_shifts[i] >= prec) | |
567 | this_mode = -2; | |
568 | ||
569 | if (i == 0) | |
570 | mode = this_mode; | |
571 | else if (mode != this_mode) | |
572 | mode = -2; | |
573 | } | |
574 | ||
575 | vec = XALLOCAVEC (tree, nunits); | |
576 | ||
577 | if (use_pow2) | |
578 | { | |
579 | tree addend = NULL_TREE; | |
e913b5cd | 580 | if (sign_p == SIGNED) |
60420e1c | 581 | { |
582 | tree uns_type; | |
583 | ||
584 | /* Both division and remainder sequences need | |
585 | op0 < 0 ? mask : 0 computed. It can be either computed as | |
586 | (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) | |
587 | if none of the shifts is 0, or as the conditional. */ | |
588 | for (i = 0; i < nunits; i++) | |
589 | if (shifts[i] == 0) | |
590 | break; | |
591 | uns_type | |
592 | = build_vector_type (build_nonstandard_integer_type (prec, 1), | |
593 | nunits); | |
594 | if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) | |
595 | { | |
596 | for (i = 0; i < nunits; i++) | |
597 | shift_temps[i] = prec - 1; | |
598 | cur_op = add_rshift (gsi, type, op0, shift_temps); | |
599 | if (cur_op != NULL_TREE) | |
600 | { | |
601 | cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
602 | uns_type, cur_op); | |
603 | for (i = 0; i < nunits; i++) | |
604 | shift_temps[i] = prec - shifts[i]; | |
605 | cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); | |
606 | if (cur_op != NULL_TREE) | |
607 | addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
608 | type, cur_op); | |
609 | } | |
610 | } | |
611 | if (addend == NULL_TREE | |
612 | && expand_vec_cond_expr_p (type, type)) | |
613 | { | |
614 | tree zero, cst, cond; | |
615 | gimple stmt; | |
616 | ||
617 | zero = build_zero_cst (type); | |
618 | cond = build2 (LT_EXPR, type, op0, zero); | |
619 | for (i = 0; i < nunits; i++) | |
620 | vec[i] = build_int_cst (TREE_TYPE (type), | |
621 | ((unsigned HOST_WIDE_INT) 1 | |
622 | << shifts[i]) - 1); | |
623 | cst = build_vector (type, vec); | |
03d37e4e | 624 | addend = make_ssa_name (type, NULL); |
446e85eb | 625 | stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend, |
626 | cond, cst, zero); | |
60420e1c | 627 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
628 | } | |
629 | } | |
630 | if (code == TRUNC_DIV_EXPR) | |
631 | { | |
e913b5cd | 632 | if (sign_p == UNSIGNED) |
60420e1c | 633 | { |
634 | /* q = op0 >> shift; */ | |
635 | cur_op = add_rshift (gsi, type, op0, shifts); | |
636 | if (cur_op != NULL_TREE) | |
637 | return cur_op; | |
638 | } | |
639 | else if (addend != NULL_TREE) | |
640 | { | |
641 | /* t1 = op0 + addend; | |
642 | q = t1 >> shift; */ | |
643 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
6cdd383a | 644 | if (op != unknown_optab |
60420e1c | 645 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
646 | { | |
647 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); | |
648 | cur_op = add_rshift (gsi, type, cur_op, shifts); | |
649 | if (cur_op != NULL_TREE) | |
650 | return cur_op; | |
651 | } | |
652 | } | |
653 | } | |
654 | else | |
655 | { | |
656 | tree mask; | |
657 | for (i = 0; i < nunits; i++) | |
658 | vec[i] = build_int_cst (TREE_TYPE (type), | |
659 | ((unsigned HOST_WIDE_INT) 1 | |
660 | << shifts[i]) - 1); | |
661 | mask = build_vector (type, vec); | |
662 | op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); | |
6cdd383a | 663 | if (op != unknown_optab |
60420e1c | 664 | && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) |
665 | { | |
e913b5cd | 666 | if (sign_p == UNSIGNED) |
60420e1c | 667 | /* r = op0 & mask; */ |
668 | return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); | |
669 | else if (addend != NULL_TREE) | |
670 | { | |
671 | /* t1 = op0 + addend; | |
672 | t2 = t1 & mask; | |
673 | r = t2 - addend; */ | |
674 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
6cdd383a | 675 | if (op != unknown_optab |
60420e1c | 676 | && optab_handler (op, TYPE_MODE (type)) |
677 | != CODE_FOR_nothing) | |
678 | { | |
679 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, | |
680 | addend); | |
681 | cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, | |
682 | cur_op, mask); | |
683 | op = optab_for_tree_code (MINUS_EXPR, type, | |
684 | optab_default); | |
6cdd383a | 685 | if (op != unknown_optab |
60420e1c | 686 | && optab_handler (op, TYPE_MODE (type)) |
687 | != CODE_FOR_nothing) | |
688 | return gimplify_build2 (gsi, MINUS_EXPR, type, | |
689 | cur_op, addend); | |
690 | } | |
691 | } | |
692 | } | |
693 | } | |
694 | } | |
695 | ||
696 | if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
697 | return NULL_TREE; | |
698 | ||
ebf4f764 | 699 | if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) |
700 | return NULL_TREE; | |
60420e1c | 701 | |
702 | cur_op = op0; | |
703 | ||
704 | switch (mode) | |
705 | { | |
706 | case 0: | |
e913b5cd | 707 | gcc_assert (sign_p == UNSIGNED); |
60420e1c | 708 | /* t1 = oprnd0 >> pre_shift; |
99ee4cc8 | 709 | t2 = t1 h* ml; |
60420e1c | 710 | q = t2 >> post_shift; */ |
711 | cur_op = add_rshift (gsi, type, cur_op, pre_shifts); | |
712 | if (cur_op == NULL_TREE) | |
713 | return NULL_TREE; | |
714 | break; | |
715 | case 1: | |
e913b5cd | 716 | gcc_assert (sign_p == UNSIGNED); |
60420e1c | 717 | for (i = 0; i < nunits; i++) |
718 | { | |
719 | shift_temps[i] = 1; | |
720 | post_shifts[i]--; | |
721 | } | |
722 | break; | |
723 | case 2: | |
724 | case 3: | |
725 | case 4: | |
726 | case 5: | |
e913b5cd | 727 | gcc_assert (sign_p == SIGNED); |
60420e1c | 728 | for (i = 0; i < nunits; i++) |
729 | shift_temps[i] = prec - 1; | |
730 | break; | |
731 | default: | |
732 | return NULL_TREE; | |
733 | } | |
734 | ||
735 | for (i = 0; i < nunits; i++) | |
736 | vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); | |
737 | mulcst = build_vector (type, vec); | |
10dd7335 | 738 | |
ebf4f764 | 739 | cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); |
60420e1c | 740 | |
741 | switch (mode) | |
742 | { | |
743 | case 0: | |
744 | /* t1 = oprnd0 >> pre_shift; | |
99ee4cc8 | 745 | t2 = t1 h* ml; |
60420e1c | 746 | q = t2 >> post_shift; */ |
747 | cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
748 | break; | |
749 | case 1: | |
99ee4cc8 | 750 | /* t1 = oprnd0 h* ml; |
60420e1c | 751 | t2 = oprnd0 - t1; |
752 | t3 = t2 >> 1; | |
753 | t4 = t1 + t3; | |
754 | q = t4 >> (post_shift - 1); */ | |
755 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
6cdd383a | 756 | if (op == unknown_optab |
60420e1c | 757 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
758 | return NULL_TREE; | |
759 | tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); | |
760 | tem = add_rshift (gsi, type, tem, shift_temps); | |
761 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
6cdd383a | 762 | if (op == unknown_optab |
60420e1c | 763 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
764 | return NULL_TREE; | |
765 | tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); | |
766 | cur_op = add_rshift (gsi, type, tem, post_shifts); | |
767 | if (cur_op == NULL_TREE) | |
768 | return NULL_TREE; | |
769 | break; | |
770 | case 2: | |
771 | case 3: | |
772 | case 4: | |
773 | case 5: | |
99ee4cc8 | 774 | /* t1 = oprnd0 h* ml; |
60420e1c | 775 | t2 = t1; [ iff (mode & 2) != 0 ] |
776 | t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] | |
777 | t3 = t2 >> post_shift; | |
778 | t4 = oprnd0 >> (prec - 1); | |
779 | q = t3 - t4; [ iff (mode & 1) == 0 ] | |
780 | q = t4 - t3; [ iff (mode & 1) != 0 ] */ | |
781 | if ((mode & 2) == 0) | |
782 | { | |
783 | op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
6cdd383a | 784 | if (op == unknown_optab |
60420e1c | 785 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
786 | return NULL_TREE; | |
787 | cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); | |
788 | } | |
789 | cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
790 | if (cur_op == NULL_TREE) | |
791 | return NULL_TREE; | |
792 | tem = add_rshift (gsi, type, op0, shift_temps); | |
793 | if (tem == NULL_TREE) | |
794 | return NULL_TREE; | |
795 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
6cdd383a | 796 | if (op == unknown_optab |
60420e1c | 797 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
798 | return NULL_TREE; | |
799 | if ((mode & 1) == 0) | |
800 | cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); | |
801 | else | |
802 | cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); | |
803 | break; | |
804 | default: | |
805 | gcc_unreachable (); | |
806 | } | |
807 | ||
808 | if (code == TRUNC_DIV_EXPR) | |
809 | return cur_op; | |
810 | ||
811 | /* We divided. Now finish by: | |
812 | t1 = q * oprnd1; | |
813 | r = oprnd0 - t1; */ | |
814 | op = optab_for_tree_code (MULT_EXPR, type, optab_default); | |
6cdd383a | 815 | if (op == unknown_optab |
60420e1c | 816 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
817 | return NULL_TREE; | |
818 | tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); | |
819 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
6cdd383a | 820 | if (op == unknown_optab |
60420e1c | 821 | || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
822 | return NULL_TREE; | |
823 | return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); | |
824 | } | |
825 | ||
dd8c5e6c | 826 | /* Expand a vector condition to scalars, by using many conditions |
827 | on the vector's elements. */ | |
828 | static void | |
829 | expand_vector_condition (gimple_stmt_iterator *gsi) | |
830 | { | |
831 | gimple stmt = gsi_stmt (*gsi); | |
832 | tree type = gimple_expr_type (stmt); | |
833 | tree a = gimple_assign_rhs1 (stmt); | |
834 | tree a1 = a; | |
835 | tree a2; | |
836 | bool a_is_comparison = false; | |
837 | tree b = gimple_assign_rhs2 (stmt); | |
838 | tree c = gimple_assign_rhs3 (stmt); | |
f1f41a6c | 839 | vec<constructor_elt, va_gc> *v; |
dd8c5e6c | 840 | tree constr; |
841 | tree inner_type = TREE_TYPE (type); | |
842 | tree cond_type = TREE_TYPE (TREE_TYPE (a)); | |
843 | tree comp_inner_type = cond_type; | |
844 | tree width = TYPE_SIZE (inner_type); | |
845 | tree index = bitsize_int (0); | |
846 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
847 | int i; | |
848 | location_t loc = gimple_location (gsi_stmt (*gsi)); | |
849 | ||
f72ca119 | 850 | if (!is_gimple_val (a)) |
dd8c5e6c | 851 | { |
852 | gcc_assert (COMPARISON_CLASS_P (a)); | |
853 | a_is_comparison = true; | |
854 | a1 = TREE_OPERAND (a, 0); | |
855 | a2 = TREE_OPERAND (a, 1); | |
856 | comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); | |
857 | } | |
858 | ||
859 | if (expand_vec_cond_expr_p (type, TREE_TYPE (a1))) | |
860 | return; | |
861 | ||
862 | /* TODO: try and find a smaller vector type. */ | |
863 | ||
864 | warning_at (loc, OPT_Wvector_operation_performance, | |
865 | "vector condition will be expanded piecewise"); | |
866 | ||
f1f41a6c | 867 | vec_alloc (v, nunits); |
dd8c5e6c | 868 | for (i = 0; i < nunits; |
869 | i++, index = int_const_binop (PLUS_EXPR, index, width)) | |
870 | { | |
871 | tree aa, result; | |
872 | tree bb = tree_vec_extract (gsi, inner_type, b, width, index); | |
873 | tree cc = tree_vec_extract (gsi, inner_type, c, width, index); | |
874 | if (a_is_comparison) | |
875 | { | |
876 | tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index); | |
877 | tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index); | |
878 | aa = build2 (TREE_CODE (a), cond_type, aa1, aa2); | |
879 | } | |
880 | else | |
881 | aa = tree_vec_extract (gsi, cond_type, a, width, index); | |
882 | result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); | |
883 | constructor_elt ce = {NULL_TREE, result}; | |
f1f41a6c | 884 | v->quick_push (ce); |
dd8c5e6c | 885 | } |
886 | ||
887 | constr = build_constructor (type, v); | |
888 | gimple_assign_set_rhs_from_tree (gsi, constr); | |
889 | update_stmt (gsi_stmt (*gsi)); | |
890 | } | |
891 | ||
0501cacc | 892 | static tree |
75a70cf9 | 893 | expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, |
894 | gimple assign, enum tree_code code) | |
0501cacc | 895 | { |
896 | enum machine_mode compute_mode = TYPE_MODE (compute_type); | |
897 | ||
898 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
899 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
900 | to expand the operations in parallel. */ | |
901 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
06f0b99c | 902 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT |
903 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
904 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
905 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
906 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
0501cacc | 907 | switch (code) |
908 | { | |
909 | case PLUS_EXPR: | |
910 | case MINUS_EXPR: | |
981eb798 | 911 | if (!TYPE_OVERFLOW_TRAPS (type)) |
928efcfe | 912 | return expand_vector_addition (gsi, do_binop, do_plus_minus, type, |
913 | gimple_assign_rhs1 (assign), | |
75a70cf9 | 914 | gimple_assign_rhs2 (assign), code); |
0501cacc | 915 | break; |
916 | ||
917 | case NEGATE_EXPR: | |
981eb798 | 918 | if (!TYPE_OVERFLOW_TRAPS (type)) |
75a70cf9 | 919 | return expand_vector_addition (gsi, do_unop, do_negate, type, |
920 | gimple_assign_rhs1 (assign), | |
0501cacc | 921 | NULL_TREE, code); |
922 | break; | |
923 | ||
924 | case BIT_AND_EXPR: | |
925 | case BIT_IOR_EXPR: | |
926 | case BIT_XOR_EXPR: | |
75a70cf9 | 927 | return expand_vector_parallel (gsi, do_binop, type, |
928 | gimple_assign_rhs1 (assign), | |
929 | gimple_assign_rhs2 (assign), code); | |
0501cacc | 930 | |
931 | case BIT_NOT_EXPR: | |
75a70cf9 | 932 | return expand_vector_parallel (gsi, do_unop, type, |
933 | gimple_assign_rhs1 (assign), | |
d7ad16c2 | 934 | NULL_TREE, code); |
935 | case EQ_EXPR: | |
936 | case NE_EXPR: | |
937 | case GT_EXPR: | |
938 | case LT_EXPR: | |
939 | case GE_EXPR: | |
940 | case LE_EXPR: | |
941 | case UNEQ_EXPR: | |
942 | case UNGT_EXPR: | |
943 | case UNLT_EXPR: | |
944 | case UNGE_EXPR: | |
945 | case UNLE_EXPR: | |
946 | case LTGT_EXPR: | |
947 | case ORDERED_EXPR: | |
948 | case UNORDERED_EXPR: | |
949 | { | |
950 | tree rhs1 = gimple_assign_rhs1 (assign); | |
951 | tree rhs2 = gimple_assign_rhs2 (assign); | |
0501cacc | 952 | |
d7ad16c2 | 953 | return expand_vector_comparison (gsi, type, rhs1, rhs2, code); |
954 | } | |
60420e1c | 955 | |
956 | case TRUNC_DIV_EXPR: | |
957 | case TRUNC_MOD_EXPR: | |
958 | { | |
959 | tree rhs1 = gimple_assign_rhs1 (assign); | |
960 | tree rhs2 = gimple_assign_rhs2 (assign); | |
961 | tree ret; | |
962 | ||
963 | if (!optimize | |
964 | || !VECTOR_INTEGER_TYPE_P (type) | |
965 | || TREE_CODE (rhs2) != VECTOR_CST) | |
966 | break; | |
967 | ||
968 | ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); | |
969 | if (ret != NULL_TREE) | |
970 | return ret; | |
971 | break; | |
972 | } | |
973 | ||
0501cacc | 974 | default: |
975 | break; | |
976 | } | |
977 | ||
978 | if (TREE_CODE_CLASS (code) == tcc_unary) | |
75a70cf9 | 979 | return expand_vector_piecewise (gsi, do_unop, type, compute_type, |
980 | gimple_assign_rhs1 (assign), | |
0501cacc | 981 | NULL_TREE, code); |
982 | else | |
75a70cf9 | 983 | return expand_vector_piecewise (gsi, do_binop, type, compute_type, |
984 | gimple_assign_rhs1 (assign), | |
985 | gimple_assign_rhs2 (assign), code); | |
0501cacc | 986 | } |
987 | \f | |
f1690ec2 | 988 | /* Return a type for the widest vector mode whose components are of type |
989 | TYPE, or NULL_TREE if none is found. */ | |
06f0b99c | 990 | |
0501cacc | 991 | static tree |
f1690ec2 | 992 | type_for_widest_vector_mode (tree type, optab op) |
0501cacc | 993 | { |
f1690ec2 | 994 | enum machine_mode inner_mode = TYPE_MODE (type); |
0501cacc | 995 | enum machine_mode best_mode = VOIDmode, mode; |
996 | int best_nunits = 0; | |
997 | ||
cee7491d | 998 | if (SCALAR_FLOAT_MODE_P (inner_mode)) |
0501cacc | 999 | mode = MIN_MODE_VECTOR_FLOAT; |
06f0b99c | 1000 | else if (SCALAR_FRACT_MODE_P (inner_mode)) |
1001 | mode = MIN_MODE_VECTOR_FRACT; | |
1002 | else if (SCALAR_UFRACT_MODE_P (inner_mode)) | |
1003 | mode = MIN_MODE_VECTOR_UFRACT; | |
1004 | else if (SCALAR_ACCUM_MODE_P (inner_mode)) | |
1005 | mode = MIN_MODE_VECTOR_ACCUM; | |
1006 | else if (SCALAR_UACCUM_MODE_P (inner_mode)) | |
1007 | mode = MIN_MODE_VECTOR_UACCUM; | |
0501cacc | 1008 | else |
1009 | mode = MIN_MODE_VECTOR_INT; | |
1010 | ||
1011 | for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) | |
1012 | if (GET_MODE_INNER (mode) == inner_mode | |
1013 | && GET_MODE_NUNITS (mode) > best_nunits | |
d6bf3b14 | 1014 | && optab_handler (op, mode) != CODE_FOR_nothing) |
0501cacc | 1015 | best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); |
1016 | ||
1017 | if (best_mode == VOIDmode) | |
1018 | return NULL_TREE; | |
1019 | else | |
f1690ec2 | 1020 | return build_vector_type_for_mode (type, best_mode); |
0501cacc | 1021 | } |
1022 | ||
6cf89e04 | 1023 | |
1024 | /* Build a reference to the element of the vector VECT. Function | |
1025 | returns either the element itself, either BIT_FIELD_REF, or an | |
1026 | ARRAY_REF expression. | |
1027 | ||
9d75589a | 1028 | GSI is required to insert temporary variables while building a |
6cf89e04 | 1029 | refernece to the element of the vector VECT. |
1030 | ||
1031 | PTMPVEC is a pointer to the temporary variable for caching | |
1032 | purposes. In case when PTMPVEC is NULL new temporary variable | |
1033 | will be created. */ | |
1034 | static tree | |
1035 | vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) | |
1036 | { | |
3c425d7c | 1037 | tree vect_type, vect_elt_type; |
6cf89e04 | 1038 | gimple asgn; |
1039 | tree tmpvec; | |
1040 | tree arraytype; | |
1041 | bool need_asgn = true; | |
3c425d7c | 1042 | unsigned int elements; |
6cf89e04 | 1043 | |
3c425d7c | 1044 | vect_type = TREE_TYPE (vect); |
1045 | vect_elt_type = TREE_TYPE (vect_type); | |
1046 | elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
6cf89e04 | 1047 | |
6cf89e04 | 1048 | if (TREE_CODE (idx) == INTEGER_CST) |
1049 | { | |
1050 | unsigned HOST_WIDE_INT index; | |
1051 | ||
3c425d7c | 1052 | /* Given that we're about to compute a binary modulus, |
1053 | we don't care about the high bits of the value. */ | |
f9ae6f95 | 1054 | index = TREE_INT_CST_LOW (idx); |
e913b5cd | 1055 | if (!tree_fits_uhwi_p (idx) || index >= elements) |
3c425d7c | 1056 | { |
1057 | index &= elements - 1; | |
1058 | idx = build_int_cst (TREE_TYPE (idx), index); | |
1059 | } | |
6cf89e04 | 1060 | |
649aab9e | 1061 | /* When lowering a vector statement sequence do some easy |
1062 | simplification by looking through intermediate vector results. */ | |
1063 | if (TREE_CODE (vect) == SSA_NAME) | |
1064 | { | |
1065 | gimple def_stmt = SSA_NAME_DEF_STMT (vect); | |
1066 | if (is_gimple_assign (def_stmt) | |
1067 | && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST | |
1068 | || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) | |
1069 | vect = gimple_assign_rhs1 (def_stmt); | |
1070 | } | |
1071 | ||
6cf89e04 | 1072 | if (TREE_CODE (vect) == VECTOR_CST) |
fadf62f4 | 1073 | return VECTOR_CST_ELT (vect, index); |
569d18a5 | 1074 | else if (TREE_CODE (vect) == CONSTRUCTOR |
1075 | && (CONSTRUCTOR_NELTS (vect) == 0 | |
1076 | || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) | |
1077 | != VECTOR_TYPE)) | |
6cf89e04 | 1078 | { |
569d18a5 | 1079 | if (index < CONSTRUCTOR_NELTS (vect)) |
1080 | return CONSTRUCTOR_ELT (vect, index)->value; | |
3c425d7c | 1081 | return build_zero_cst (vect_elt_type); |
6cf89e04 | 1082 | } |
3c425d7c | 1083 | else |
6cf89e04 | 1084 | { |
3c425d7c | 1085 | tree size = TYPE_SIZE (vect_elt_type); |
891f5177 | 1086 | tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), |
1087 | size); | |
1088 | return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); | |
6cf89e04 | 1089 | } |
6cf89e04 | 1090 | } |
1091 | ||
1092 | if (!ptmpvec) | |
3c425d7c | 1093 | tmpvec = create_tmp_var (vect_type, "vectmp"); |
6cf89e04 | 1094 | else if (!*ptmpvec) |
3c425d7c | 1095 | tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); |
6cf89e04 | 1096 | else |
1097 | { | |
1098 | tmpvec = *ptmpvec; | |
1099 | need_asgn = false; | |
1100 | } | |
1101 | ||
1102 | if (need_asgn) | |
1103 | { | |
1104 | TREE_ADDRESSABLE (tmpvec) = 1; | |
1105 | asgn = gimple_build_assign (tmpvec, vect); | |
1106 | gsi_insert_before (gsi, asgn, GSI_SAME_STMT); | |
1107 | } | |
1108 | ||
3c425d7c | 1109 | arraytype = build_array_type_nelts (vect_elt_type, elements); |
1110 | return build4 (ARRAY_REF, vect_elt_type, | |
6cf89e04 | 1111 | build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), |
1112 | idx, NULL_TREE, NULL_TREE); | |
1113 | } | |
1114 | ||
f4803722 | 1115 | /* Check if VEC_PERM_EXPR within the given setting is supported |
3c425d7c | 1116 | by hardware, or lower it piecewise. |
6cf89e04 | 1117 | |
f4803722 | 1118 | When VEC_PERM_EXPR has the same first and second operands: |
1119 | VEC_PERM_EXPR <v0, v0, mask> the lowered version would be | |
6cf89e04 | 1120 | {v0[mask[0]], v0[mask[1]], ...} |
1121 | MASK and V0 must have the same number of elements. | |
1122 | ||
f4803722 | 1123 | Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to |
6cf89e04 | 1124 | {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} |
1125 | V0 and V1 must have the same type. MASK, V0, V1 must have the | |
1126 | same number of arguments. */ | |
6cf89e04 | 1127 | |
3c425d7c | 1128 | static void |
f4803722 | 1129 | lower_vec_perm (gimple_stmt_iterator *gsi) |
3c425d7c | 1130 | { |
6cf89e04 | 1131 | gimple stmt = gsi_stmt (*gsi); |
1132 | tree mask = gimple_assign_rhs3 (stmt); | |
1133 | tree vec0 = gimple_assign_rhs1 (stmt); | |
1134 | tree vec1 = gimple_assign_rhs2 (stmt); | |
3c425d7c | 1135 | tree vect_type = TREE_TYPE (vec0); |
1136 | tree mask_type = TREE_TYPE (mask); | |
1137 | tree vect_elt_type = TREE_TYPE (vect_type); | |
1138 | tree mask_elt_type = TREE_TYPE (mask_type); | |
1139 | unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
f1f41a6c | 1140 | vec<constructor_elt, va_gc> *v; |
3c425d7c | 1141 | tree constr, t, si, i_val; |
1142 | tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; | |
1143 | bool two_operand_p = !operand_equal_p (vec0, vec1, 0); | |
928efcfe | 1144 | location_t loc = gimple_location (gsi_stmt (*gsi)); |
3c425d7c | 1145 | unsigned i; |
6cf89e04 | 1146 | |
53d84863 | 1147 | if (TREE_CODE (mask) == SSA_NAME) |
1148 | { | |
1149 | gimple def_stmt = SSA_NAME_DEF_STMT (mask); | |
1150 | if (is_gimple_assign (def_stmt) | |
1151 | && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) | |
1152 | mask = gimple_assign_rhs1 (def_stmt); | |
1153 | } | |
1154 | ||
e21c468f | 1155 | if (TREE_CODE (mask) == VECTOR_CST) |
1156 | { | |
1157 | unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); | |
e21c468f | 1158 | |
fadf62f4 | 1159 | for (i = 0; i < elements; ++i) |
f9ae6f95 | 1160 | sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) |
fadf62f4 | 1161 | & (2 * elements - 1)); |
e21c468f | 1162 | |
1163 | if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) | |
53d84863 | 1164 | { |
1165 | gimple_assign_set_rhs3 (stmt, mask); | |
1166 | update_stmt (stmt); | |
1167 | return; | |
1168 | } | |
e21c468f | 1169 | } |
1170 | else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) | |
3c425d7c | 1171 | return; |
928efcfe | 1172 | |
1173 | warning_at (loc, OPT_Wvector_operation_performance, | |
1174 | "vector shuffling operation will be expanded piecewise"); | |
1175 | ||
f1f41a6c | 1176 | vec_alloc (v, elements); |
3c425d7c | 1177 | for (i = 0; i < elements; i++) |
6cf89e04 | 1178 | { |
3c425d7c | 1179 | si = size_int (i); |
1180 | i_val = vector_element (gsi, mask, si, &masktmp); | |
6cf89e04 | 1181 | |
3c425d7c | 1182 | if (TREE_CODE (i_val) == INTEGER_CST) |
6cf89e04 | 1183 | { |
3c425d7c | 1184 | unsigned HOST_WIDE_INT index; |
6cf89e04 | 1185 | |
f9ae6f95 | 1186 | index = TREE_INT_CST_LOW (i_val); |
e913b5cd | 1187 | if (!tree_fits_uhwi_p (i_val) || index >= elements) |
3c425d7c | 1188 | i_val = build_int_cst (mask_elt_type, index & (elements - 1)); |
6cf89e04 | 1189 | |
3c425d7c | 1190 | if (two_operand_p && (index & elements) != 0) |
1191 | t = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1192 | else | |
1193 | t = vector_element (gsi, vec0, i_val, &vec0tmp); | |
6cf89e04 | 1194 | |
3c425d7c | 1195 | t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, |
1196 | true, GSI_SAME_STMT); | |
6cf89e04 | 1197 | } |
3c425d7c | 1198 | else |
6cf89e04 | 1199 | { |
3c425d7c | 1200 | tree cond = NULL_TREE, v0_val; |
1201 | ||
1202 | if (two_operand_p) | |
1203 | { | |
1204 | cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1205 | build_int_cst (mask_elt_type, elements)); | |
1206 | cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1207 | true, GSI_SAME_STMT); | |
1208 | } | |
1209 | ||
1210 | i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1211 | build_int_cst (mask_elt_type, elements - 1)); | |
1212 | i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, | |
1213 | true, GSI_SAME_STMT); | |
1214 | ||
1215 | v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); | |
1216 | v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, | |
1217 | true, GSI_SAME_STMT); | |
1218 | ||
1219 | if (two_operand_p) | |
1220 | { | |
1221 | tree v1_val; | |
1222 | ||
1223 | v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1224 | v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, | |
1225 | true, GSI_SAME_STMT); | |
1226 | ||
1227 | cond = fold_build2 (EQ_EXPR, boolean_type_node, | |
1228 | cond, build_zero_cst (mask_elt_type)); | |
1229 | cond = fold_build3 (COND_EXPR, vect_elt_type, | |
1230 | cond, v0_val, v1_val); | |
1231 | t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1232 | true, GSI_SAME_STMT); | |
6cf89e04 | 1233 | } |
3c425d7c | 1234 | else |
1235 | t = v0_val; | |
6cf89e04 | 1236 | } |
3c425d7c | 1237 | |
569d18a5 | 1238 | CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); |
6cf89e04 | 1239 | } |
1240 | ||
3c425d7c | 1241 | constr = build_constructor (vect_type, v); |
6cf89e04 | 1242 | gimple_assign_set_rhs_from_tree (gsi, constr); |
3c425d7c | 1243 | update_stmt (gsi_stmt (*gsi)); |
6cf89e04 | 1244 | } |
1245 | ||
0501cacc | 1246 | /* Process one statement. If we identify a vector operation, expand it. */ |
1247 | ||
1248 | static void | |
75a70cf9 | 1249 | expand_vector_operations_1 (gimple_stmt_iterator *gsi) |
0501cacc | 1250 | { |
75a70cf9 | 1251 | gimple stmt = gsi_stmt (*gsi); |
1252 | tree lhs, rhs1, rhs2 = NULL, type, compute_type; | |
0501cacc | 1253 | enum tree_code code; |
1254 | enum machine_mode compute_mode; | |
6cdd383a | 1255 | optab op = unknown_optab; |
75a70cf9 | 1256 | enum gimple_rhs_class rhs_class; |
1257 | tree new_rhs; | |
0501cacc | 1258 | |
75a70cf9 | 1259 | if (gimple_code (stmt) != GIMPLE_ASSIGN) |
1260 | return; | |
0501cacc | 1261 | |
75a70cf9 | 1262 | code = gimple_assign_rhs_code (stmt); |
1263 | rhs_class = get_gimple_rhs_class (code); | |
d7ad16c2 | 1264 | lhs = gimple_assign_lhs (stmt); |
0501cacc | 1265 | |
f4803722 | 1266 | if (code == VEC_PERM_EXPR) |
6cf89e04 | 1267 | { |
f4803722 | 1268 | lower_vec_perm (gsi); |
3c425d7c | 1269 | return; |
6cf89e04 | 1270 | } |
1271 | ||
dd8c5e6c | 1272 | if (code == VEC_COND_EXPR) |
1273 | { | |
1274 | expand_vector_condition (gsi); | |
1275 | return; | |
1276 | } | |
75a70cf9 | 1277 | if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) |
1278 | return; | |
0501cacc | 1279 | |
75a70cf9 | 1280 | rhs1 = gimple_assign_rhs1 (stmt); |
1281 | type = gimple_expr_type (stmt); | |
1282 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1283 | rhs2 = gimple_assign_rhs2 (stmt); | |
0501cacc | 1284 | |
0501cacc | 1285 | if (TREE_CODE (type) != VECTOR_TYPE) |
1286 | return; | |
1287 | ||
48e1416a | 1288 | if (code == NOP_EXPR |
9d8bf4aa | 1289 | || code == FLOAT_EXPR |
1290 | || code == FIX_TRUNC_EXPR | |
1291 | || code == VIEW_CONVERT_EXPR) | |
0501cacc | 1292 | return; |
48e1416a | 1293 | |
0501cacc | 1294 | gcc_assert (code != CONVERT_EXPR); |
bb6c9541 | 1295 | |
1296 | /* The signedness is determined from input argument. */ | |
1297 | if (code == VEC_UNPACK_FLOAT_HI_EXPR | |
1298 | || code == VEC_UNPACK_FLOAT_LO_EXPR) | |
75a70cf9 | 1299 | type = TREE_TYPE (rhs1); |
bb6c9541 | 1300 | |
79a78f7f | 1301 | /* For widening/narrowing vector operations, the relevant type is of the |
1302 | arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is | |
1303 | calculated in the same way above. */ | |
1304 | if (code == WIDEN_SUM_EXPR | |
1305 | || code == VEC_WIDEN_MULT_HI_EXPR | |
1306 | || code == VEC_WIDEN_MULT_LO_EXPR | |
1307 | || code == VEC_WIDEN_MULT_EVEN_EXPR | |
1308 | || code == VEC_WIDEN_MULT_ODD_EXPR | |
1309 | || code == VEC_UNPACK_HI_EXPR | |
1310 | || code == VEC_UNPACK_LO_EXPR | |
1311 | || code == VEC_PACK_TRUNC_EXPR | |
1312 | || code == VEC_PACK_SAT_EXPR | |
1313 | || code == VEC_PACK_FIX_TRUNC_EXPR | |
1314 | || code == VEC_WIDEN_LSHIFT_HI_EXPR | |
1315 | || code == VEC_WIDEN_LSHIFT_LO_EXPR) | |
1316 | type = TREE_TYPE (rhs1); | |
1317 | ||
4d54df85 | 1318 | /* Choose between vector shift/rotate by vector and vector shift/rotate by |
1319 | scalar */ | |
48e1416a | 1320 | if (code == LSHIFT_EXPR |
1321 | || code == RSHIFT_EXPR | |
75a70cf9 | 1322 | || code == LROTATE_EXPR |
4d54df85 | 1323 | || code == RROTATE_EXPR) |
1324 | { | |
64791788 | 1325 | optab opv; |
1326 | ||
83a28c11 | 1327 | /* Check whether we have vector <op> {x,x,x,x} where x |
1328 | could be a scalar variable or a constant. Transform | |
1329 | vector <op> {x,x,x,x} ==> vector <op> scalar. */ | |
64791788 | 1330 | if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) |
2fee2038 | 1331 | { |
1332 | tree first; | |
1333 | gimple def_stmt; | |
1334 | ||
83a28c11 | 1335 | if ((TREE_CODE (rhs2) == VECTOR_CST |
1336 | && (first = uniform_vector_p (rhs2)) != NULL_TREE) | |
1337 | || (TREE_CODE (rhs2) == SSA_NAME | |
1338 | && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) | |
1339 | && gimple_assign_single_p (def_stmt) | |
1340 | && (first = uniform_vector_p | |
1341 | (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)) | |
2fee2038 | 1342 | { |
1343 | gimple_assign_set_rhs2 (stmt, first); | |
1344 | update_stmt (stmt); | |
1345 | rhs2 = first; | |
1346 | } | |
2fee2038 | 1347 | } |
6cf89e04 | 1348 | |
64791788 | 1349 | opv = optab_for_tree_code (code, type, optab_vector); |
1350 | if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) | |
1351 | op = opv; | |
83a28c11 | 1352 | else |
4d5b2207 | 1353 | { |
83a28c11 | 1354 | op = optab_for_tree_code (code, type, optab_scalar); |
4d5b2207 | 1355 | |
83a28c11 | 1356 | /* The rtl expander will expand vector/scalar as vector/vector |
1357 | if necessary. Don't bother converting the stmt here. */ | |
64791788 | 1358 | if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing |
1359 | && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing) | |
1360 | return; | |
4d5b2207 | 1361 | } |
4d54df85 | 1362 | } |
1363 | else | |
1364 | op = optab_for_tree_code (code, type, optab_default); | |
0501cacc | 1365 | |
1366 | /* Optabs will try converting a negation into a subtraction, so | |
1367 | look for it as well. TODO: negation of floating-point vectors | |
1368 | might be turned into an exclusive OR toggling the sign bit. */ | |
6cdd383a | 1369 | if (op == unknown_optab |
0501cacc | 1370 | && code == NEGATE_EXPR |
1371 | && INTEGRAL_TYPE_P (TREE_TYPE (type))) | |
4d54df85 | 1372 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); |
0501cacc | 1373 | |
1374 | /* For very wide vectors, try using a smaller vector mode. */ | |
1375 | compute_type = type; | |
64791788 | 1376 | if (!VECTOR_MODE_P (TYPE_MODE (type)) && op) |
0501cacc | 1377 | { |
1378 | tree vector_compute_type | |
f1690ec2 | 1379 | = type_for_widest_vector_mode (TREE_TYPE (type), op); |
275b8285 | 1380 | if (vector_compute_type != NULL_TREE |
1381 | && (TYPE_VECTOR_SUBPARTS (vector_compute_type) | |
64791788 | 1382 | < TYPE_VECTOR_SUBPARTS (compute_type)) |
1383 | && (optab_handler (op, TYPE_MODE (vector_compute_type)) | |
1384 | != CODE_FOR_nothing)) | |
275b8285 | 1385 | compute_type = vector_compute_type; |
0501cacc | 1386 | } |
1387 | ||
1388 | /* If we are breaking a BLKmode vector into smaller pieces, | |
1389 | type_for_widest_vector_mode has already looked into the optab, | |
1390 | so skip these checks. */ | |
1391 | if (compute_type == type) | |
1392 | { | |
1393 | compute_mode = TYPE_MODE (compute_type); | |
ebf4f764 | 1394 | if (VECTOR_MODE_P (compute_mode)) |
1395 | { | |
1396 | if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) | |
1397 | return; | |
1398 | if (code == MULT_HIGHPART_EXPR | |
1399 | && can_mult_highpart_p (compute_mode, | |
1400 | TYPE_UNSIGNED (compute_type))) | |
1401 | return; | |
1402 | } | |
1403 | /* There is no operation in hardware, so fall back to scalars. */ | |
1404 | compute_type = TREE_TYPE (type); | |
0501cacc | 1405 | } |
1406 | ||
925c62d4 | 1407 | gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); |
75a70cf9 | 1408 | new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); |
d7ad16c2 | 1409 | |
1410 | /* Leave expression untouched for later expansion. */ | |
1411 | if (new_rhs == NULL_TREE) | |
1412 | return; | |
1413 | ||
75a70cf9 | 1414 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) |
1415 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
1416 | new_rhs); | |
1417 | ||
1418 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
1419 | way to do it is change expand_vector_operation and its callees to | |
1420 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
1421 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
82f9a36f | 1422 | update_stmt (gsi_stmt (*gsi)); |
0501cacc | 1423 | } |
1424 | \f | |
1425 | /* Use this to lower vector operations introduced by the vectorizer, | |
1426 | if it may need the bit-twiddling tricks implemented in this file. */ | |
1427 | ||
1428 | static bool | |
6cf89e04 | 1429 | gate_expand_vector_operations_ssa (void) |
0501cacc | 1430 | { |
7c3b431d | 1431 | return !(cfun->curr_properties & PROP_gimple_lvec); |
0501cacc | 1432 | } |
1433 | ||
2a1990e9 | 1434 | static unsigned int |
0501cacc | 1435 | expand_vector_operations (void) |
1436 | { | |
75a70cf9 | 1437 | gimple_stmt_iterator gsi; |
0501cacc | 1438 | basic_block bb; |
82f9a36f | 1439 | bool cfg_changed = false; |
0501cacc | 1440 | |
1441 | FOR_EACH_BB (bb) | |
1442 | { | |
75a70cf9 | 1443 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
0501cacc | 1444 | { |
75a70cf9 | 1445 | expand_vector_operations_1 (&gsi); |
82f9a36f | 1446 | /* ??? If we do not cleanup EH then we will ICE in |
1447 | verification. But in reality we have created wrong-code | |
1448 | as we did not properly transition EH info and edges to | |
1449 | the piecewise computations. */ | |
1450 | if (maybe_clean_eh_stmt (gsi_stmt (gsi)) | |
1451 | && gimple_purge_dead_eh_edges (bb)) | |
1452 | cfg_changed = true; | |
0501cacc | 1453 | } |
1454 | } | |
82f9a36f | 1455 | |
1456 | return cfg_changed ? TODO_cleanup_cfg : 0; | |
0501cacc | 1457 | } |
1458 | ||
cbe8bda8 | 1459 | namespace { |
1460 | ||
1461 | const pass_data pass_data_lower_vector = | |
0501cacc | 1462 | { |
cbe8bda8 | 1463 | GIMPLE_PASS, /* type */ |
1464 | "veclower", /* name */ | |
1465 | OPTGROUP_VEC, /* optinfo_flags */ | |
1466 | true, /* has_gate */ | |
1467 | true, /* has_execute */ | |
1468 | TV_NONE, /* tv_id */ | |
1469 | PROP_cfg, /* properties_required */ | |
1470 | PROP_gimple_lvec, /* properties_provided */ | |
1471 | 0, /* properties_destroyed */ | |
1472 | 0, /* todo_flags_start */ | |
1473 | ( TODO_update_ssa | TODO_verify_ssa | |
1474 | | TODO_verify_stmts | |
1475 | | TODO_verify_flow | |
1476 | | TODO_cleanup_cfg ), /* todo_flags_finish */ | |
0501cacc | 1477 | }; |
1478 | ||
cbe8bda8 | 1479 | class pass_lower_vector : public gimple_opt_pass |
1480 | { | |
1481 | public: | |
9af5ce0c | 1482 | pass_lower_vector (gcc::context *ctxt) |
1483 | : gimple_opt_pass (pass_data_lower_vector, ctxt) | |
cbe8bda8 | 1484 | {} |
1485 | ||
1486 | /* opt_pass methods: */ | |
1487 | bool gate () { return gate_expand_vector_operations_ssa (); } | |
1488 | unsigned int execute () { return expand_vector_operations (); } | |
1489 | ||
1490 | }; // class pass_lower_vector | |
1491 | ||
1492 | } // anon namespace | |
1493 | ||
1494 | gimple_opt_pass * | |
1495 | make_pass_lower_vector (gcc::context *ctxt) | |
1496 | { | |
1497 | return new pass_lower_vector (ctxt); | |
1498 | } | |
1499 | ||
1500 | namespace { | |
1501 | ||
1502 | const pass_data pass_data_lower_vector_ssa = | |
0501cacc | 1503 | { |
cbe8bda8 | 1504 | GIMPLE_PASS, /* type */ |
1505 | "veclower2", /* name */ | |
1506 | OPTGROUP_VEC, /* optinfo_flags */ | |
1507 | false, /* has_gate */ | |
1508 | true, /* has_execute */ | |
1509 | TV_NONE, /* tv_id */ | |
1510 | PROP_cfg, /* properties_required */ | |
1511 | PROP_gimple_lvec, /* properties_provided */ | |
1512 | 0, /* properties_destroyed */ | |
1513 | 0, /* todo_flags_start */ | |
1514 | ( TODO_update_ssa | TODO_verify_ssa | |
1515 | | TODO_verify_stmts | |
1516 | | TODO_verify_flow | |
1517 | | TODO_cleanup_cfg ), /* todo_flags_finish */ | |
0501cacc | 1518 | }; |
1519 | ||
cbe8bda8 | 1520 | class pass_lower_vector_ssa : public gimple_opt_pass |
1521 | { | |
1522 | public: | |
9af5ce0c | 1523 | pass_lower_vector_ssa (gcc::context *ctxt) |
1524 | : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) | |
cbe8bda8 | 1525 | {} |
1526 | ||
1527 | /* opt_pass methods: */ | |
ae84f584 | 1528 | opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } |
cbe8bda8 | 1529 | unsigned int execute () { return expand_vector_operations (); } |
1530 | ||
1531 | }; // class pass_lower_vector_ssa | |
1532 | ||
1533 | } // anon namespace | |
1534 | ||
1535 | gimple_opt_pass * | |
1536 | make_pass_lower_vector_ssa (gcc::context *ctxt) | |
1537 | { | |
1538 | return new pass_lower_vector_ssa (ctxt); | |
1539 | } | |
1540 | ||
0501cacc | 1541 | #include "gt-tree-vect-generic.h" |