]>
Commit | Line | Data |
---|---|---|
2b725155 | 1 | /* Lower vector operations to scalar operations. |
7072a650 ILT |
2 | Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 |
3 | Free Software Foundation, Inc. | |
2b725155 RH |
4 | |
5 | This file is part of GCC. | |
b8698a0f | 6 | |
2b725155 RH |
7 | GCC is free software; you can redistribute it and/or modify it |
8 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 9 | Free Software Foundation; either version 3, or (at your option) any |
2b725155 | 10 | later version. |
b8698a0f | 11 | |
2b725155 RH |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
b8698a0f | 16 | |
2b725155 | 17 | You should have received a copy of the GNU General Public License |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
2b725155 RH |
20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tree.h" | |
25 | #include "tm.h" | |
26 | #include "rtl.h" | |
27 | #include "expr.h" | |
28 | #include "insn-codes.h" | |
29 | #include "diagnostic.h" | |
30 | #include "optabs.h" | |
31 | #include "machmode.h" | |
32 | #include "langhooks.h" | |
33 | #include "tree-flow.h" | |
726a989a | 34 | #include "gimple.h" |
2b725155 RH |
35 | #include "tree-iterator.h" |
36 | #include "tree-pass.h" | |
37 | #include "flags.h" | |
38 | #include "ggc.h" | |
39 | ||
40 | ||
41 | /* Build a constant of type TYPE, made of VALUE's bits replicated | |
42 | every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | |
43 | static tree | |
44 | build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | |
45 | { | |
46 | int width = tree_low_cst (TYPE_SIZE (inner_type), 1); | |
47 | int n = HOST_BITS_PER_WIDE_INT / width; | |
48 | unsigned HOST_WIDE_INT low, high, mask; | |
49 | tree ret; | |
50 | ||
51 | gcc_assert (n); | |
52 | ||
53 | if (width == HOST_BITS_PER_WIDE_INT) | |
54 | low = value; | |
55 | else | |
56 | { | |
57 | mask = ((HOST_WIDE_INT)1 << width) - 1; | |
58 | low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | |
59 | } | |
60 | ||
61 | if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) | |
62 | low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; | |
63 | else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) | |
64 | high = 0; | |
65 | else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) | |
66 | high = low; | |
67 | else | |
68 | gcc_unreachable (); | |
69 | ||
70 | ret = build_int_cst_wide (type, low, high); | |
71 | return ret; | |
72 | } | |
73 | ||
74 | static GTY(()) tree vector_inner_type; | |
75 | static GTY(()) tree vector_last_type; | |
76 | static GTY(()) int vector_last_nunits; | |
77 | ||
78 | /* Return a suitable vector types made of SUBPARTS units each of mode | |
79 | "word_mode" (the global variable). */ | |
80 | static tree | |
81 | build_word_mode_vector_type (int nunits) | |
82 | { | |
83 | if (!vector_inner_type) | |
84 | vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
85 | else if (vector_last_nunits == nunits) | |
86 | { | |
87 | gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); | |
88 | return vector_last_type; | |
89 | } | |
90 | ||
91 | /* We build a new type, but we canonicalize it nevertheless, | |
92 | because it still saves some memory. */ | |
93 | vector_last_nunits = nunits; | |
94 | vector_last_type = type_hash_canon (nunits, | |
95 | build_vector_type (vector_inner_type, | |
96 | nunits)); | |
97 | return vector_last_type; | |
98 | } | |
99 | ||
726a989a | 100 | typedef tree (*elem_op_func) (gimple_stmt_iterator *, |
2b725155 RH |
101 | tree, tree, tree, tree, tree, enum tree_code); |
102 | ||
103 | static inline tree | |
726a989a | 104 | tree_vec_extract (gimple_stmt_iterator *gsi, tree type, |
2b725155 RH |
105 | tree t, tree bitsize, tree bitpos) |
106 | { | |
107 | if (bitpos) | |
726a989a | 108 | return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); |
2b725155 | 109 | else |
726a989a | 110 | return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); |
2b725155 RH |
111 | } |
112 | ||
113 | static tree | |
726a989a | 114 | do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, |
2b725155 RH |
115 | tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, |
116 | enum tree_code code) | |
117 | { | |
726a989a RB |
118 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
119 | return gimplify_build1 (gsi, code, inner_type, a); | |
2b725155 RH |
120 | } |
121 | ||
122 | static tree | |
726a989a | 123 | do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, |
2b725155 RH |
124 | tree bitpos, tree bitsize, enum tree_code code) |
125 | { | |
726a989a RB |
126 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
127 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
128 | return gimplify_build2 (gsi, code, inner_type, a, b); | |
2b725155 RH |
129 | } |
130 | ||
131 | /* Expand vector addition to scalars. This does bit twiddling | |
132 | in order to increase parallelism: | |
133 | ||
134 | a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ | |
135 | (a ^ b) & 0x80808080 | |
136 | ||
137 | a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ | |
138 | (a ^ ~b) & 0x80808080 | |
139 | ||
140 | -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) | |
141 | ||
142 | This optimization should be done only if 4 vector items or more | |
143 | fit into a word. */ | |
144 | static tree | |
726a989a | 145 | do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, |
2b725155 RH |
146 | tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, |
147 | enum tree_code code) | |
148 | { | |
149 | tree inner_type = TREE_TYPE (TREE_TYPE (a)); | |
150 | unsigned HOST_WIDE_INT max; | |
151 | tree low_bits, high_bits, a_low, b_low, result_low, signs; | |
152 | ||
153 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
154 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
155 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
156 | ||
726a989a RB |
157 | a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); |
158 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); | |
2b725155 | 159 | |
726a989a RB |
160 | signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); |
161 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); | |
2b725155 | 162 | if (code == PLUS_EXPR) |
726a989a | 163 | a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); |
2b725155 RH |
164 | else |
165 | { | |
726a989a RB |
166 | a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); |
167 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); | |
2b725155 RH |
168 | } |
169 | ||
726a989a RB |
170 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); |
171 | result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); | |
172 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
173 | } |
174 | ||
175 | static tree | |
726a989a | 176 | do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, |
2b725155 RH |
177 | tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, |
178 | tree bitsize ATTRIBUTE_UNUSED, | |
179 | enum tree_code code ATTRIBUTE_UNUSED) | |
180 | { | |
181 | tree inner_type = TREE_TYPE (TREE_TYPE (b)); | |
182 | HOST_WIDE_INT max; | |
183 | tree low_bits, high_bits, b_low, result_low, signs; | |
184 | ||
185 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
186 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
187 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
188 | ||
726a989a | 189 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); |
2b725155 | 190 | |
726a989a RB |
191 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); |
192 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); | |
193 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
194 | result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); | |
195 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
196 | } |
197 | ||
198 | /* Expand a vector operation to scalars, by using many operations | |
199 | whose type is the vector type's inner type. */ | |
200 | static tree | |
726a989a | 201 | expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, |
2b725155 RH |
202 | tree type, tree inner_type, |
203 | tree a, tree b, enum tree_code code) | |
204 | { | |
4038c495 | 205 | VEC(constructor_elt,gc) *v; |
2b725155 RH |
206 | tree part_width = TYPE_SIZE (inner_type); |
207 | tree index = bitsize_int (0); | |
208 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
209 | int delta = tree_low_cst (part_width, 1) | |
210 | / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); | |
211 | int i; | |
212 | ||
4038c495 | 213 | v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta); |
2b725155 RH |
214 | for (i = 0; i < nunits; |
215 | i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) | |
216 | { | |
726a989a | 217 | tree result = f (gsi, inner_type, a, b, index, part_width, code); |
4038c495 GB |
218 | constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL); |
219 | ce->index = NULL_TREE; | |
220 | ce->value = result; | |
2b725155 RH |
221 | } |
222 | ||
4038c495 | 223 | return build_constructor (type, v); |
2b725155 RH |
224 | } |
225 | ||
226 | /* Expand a vector operation to scalars with the freedom to use | |
227 | a scalar integer type, or to use a different size for the items | |
228 | in the vector type. */ | |
229 | static tree | |
726a989a | 230 | expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, |
2b725155 RH |
231 | tree a, tree b, |
232 | enum tree_code code) | |
233 | { | |
234 | tree result, compute_type; | |
235 | enum machine_mode mode; | |
236 | int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; | |
237 | ||
238 | /* We have three strategies. If the type is already correct, just do | |
239 | the operation an element at a time. Else, if the vector is wider than | |
240 | one word, do it a word at a time; finally, if the vector is smaller | |
241 | than one word, do it as a scalar. */ | |
242 | if (TYPE_MODE (TREE_TYPE (type)) == word_mode) | |
726a989a | 243 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
244 | type, TREE_TYPE (type), |
245 | a, b, code); | |
246 | else if (n_words > 1) | |
247 | { | |
248 | tree word_type = build_word_mode_vector_type (n_words); | |
726a989a | 249 | result = expand_vector_piecewise (gsi, f, |
2b725155 RH |
250 | word_type, TREE_TYPE (word_type), |
251 | a, b, code); | |
726a989a RB |
252 | result = force_gimple_operand_gsi (gsi, result, true, NULL, true, |
253 | GSI_SAME_STMT); | |
2b725155 RH |
254 | } |
255 | else | |
256 | { | |
257 | /* Use a single scalar operation with a mode no wider than word_mode. */ | |
258 | mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); | |
259 | compute_type = lang_hooks.types.type_for_mode (mode, 1); | |
726a989a | 260 | result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); |
2b725155 RH |
261 | } |
262 | ||
263 | return result; | |
264 | } | |
265 | ||
266 | /* Expand a vector operation to scalars; for integer types we can use | |
267 | special bit twiddling tricks to do the sums a word at a time, using | |
268 | function F_PARALLEL instead of F. These tricks are done only if | |
269 | they can process at least four items, that is, only if the vector | |
270 | holds at least four items and if a word can hold four items. */ | |
271 | static tree | |
726a989a | 272 | expand_vector_addition (gimple_stmt_iterator *gsi, |
2b725155 RH |
273 | elem_op_func f, elem_op_func f_parallel, |
274 | tree type, tree a, tree b, enum tree_code code) | |
275 | { | |
276 | int parts_per_word = UNITS_PER_WORD | |
277 | / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); | |
278 | ||
279 | if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | |
280 | && parts_per_word >= 4 | |
281 | && TYPE_VECTOR_SUBPARTS (type) >= 4) | |
726a989a | 282 | return expand_vector_parallel (gsi, f_parallel, |
2b725155 RH |
283 | type, a, b, code); |
284 | else | |
726a989a | 285 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
286 | type, TREE_TYPE (type), |
287 | a, b, code); | |
288 | } | |
289 | ||
290 | static tree | |
726a989a RB |
291 | expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, |
292 | gimple assign, enum tree_code code) | |
2b725155 RH |
293 | { |
294 | enum machine_mode compute_mode = TYPE_MODE (compute_type); | |
295 | ||
296 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
297 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
298 | to expand the operations in parallel. */ | |
299 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
325217ed CF |
300 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT |
301 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
302 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
303 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
304 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
2b725155 RH |
305 | switch (code) |
306 | { | |
307 | case PLUS_EXPR: | |
308 | case MINUS_EXPR: | |
eeef0e45 | 309 | if (!TYPE_OVERFLOW_TRAPS (type)) |
726a989a RB |
310 | return expand_vector_addition (gsi, do_binop, do_plus_minus, type, |
311 | gimple_assign_rhs1 (assign), | |
312 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
313 | break; |
314 | ||
315 | case NEGATE_EXPR: | |
eeef0e45 | 316 | if (!TYPE_OVERFLOW_TRAPS (type)) |
726a989a RB |
317 | return expand_vector_addition (gsi, do_unop, do_negate, type, |
318 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
319 | NULL_TREE, code); |
320 | break; | |
321 | ||
322 | case BIT_AND_EXPR: | |
323 | case BIT_IOR_EXPR: | |
324 | case BIT_XOR_EXPR: | |
726a989a RB |
325 | return expand_vector_parallel (gsi, do_binop, type, |
326 | gimple_assign_rhs1 (assign), | |
327 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
328 | |
329 | case BIT_NOT_EXPR: | |
726a989a RB |
330 | return expand_vector_parallel (gsi, do_unop, type, |
331 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
332 | NULL_TREE, code); |
333 | ||
334 | default: | |
335 | break; | |
336 | } | |
337 | ||
338 | if (TREE_CODE_CLASS (code) == tcc_unary) | |
726a989a RB |
339 | return expand_vector_piecewise (gsi, do_unop, type, compute_type, |
340 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
341 | NULL_TREE, code); |
342 | else | |
726a989a RB |
343 | return expand_vector_piecewise (gsi, do_binop, type, compute_type, |
344 | gimple_assign_rhs1 (assign), | |
345 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
346 | } |
347 | \f | |
348 | /* Return a type for the widest vector mode whose components are of mode | |
325217ed CF |
349 | INNER_MODE, or NULL_TREE if none is found. |
350 | SATP is true for saturating fixed-point types. */ | |
351 | ||
2b725155 | 352 | static tree |
325217ed | 353 | type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp) |
2b725155 RH |
354 | { |
355 | enum machine_mode best_mode = VOIDmode, mode; | |
356 | int best_nunits = 0; | |
357 | ||
3d8bf70f | 358 | if (SCALAR_FLOAT_MODE_P (inner_mode)) |
2b725155 | 359 | mode = MIN_MODE_VECTOR_FLOAT; |
325217ed CF |
360 | else if (SCALAR_FRACT_MODE_P (inner_mode)) |
361 | mode = MIN_MODE_VECTOR_FRACT; | |
362 | else if (SCALAR_UFRACT_MODE_P (inner_mode)) | |
363 | mode = MIN_MODE_VECTOR_UFRACT; | |
364 | else if (SCALAR_ACCUM_MODE_P (inner_mode)) | |
365 | mode = MIN_MODE_VECTOR_ACCUM; | |
366 | else if (SCALAR_UACCUM_MODE_P (inner_mode)) | |
367 | mode = MIN_MODE_VECTOR_UACCUM; | |
2b725155 RH |
368 | else |
369 | mode = MIN_MODE_VECTOR_INT; | |
370 | ||
371 | for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) | |
372 | if (GET_MODE_INNER (mode) == inner_mode | |
373 | && GET_MODE_NUNITS (mode) > best_nunits | |
166cdb08 | 374 | && optab_handler (op, mode)->insn_code != CODE_FOR_nothing) |
2b725155 RH |
375 | best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); |
376 | ||
377 | if (best_mode == VOIDmode) | |
378 | return NULL_TREE; | |
379 | else | |
325217ed CF |
380 | { |
381 | /* For fixed-point modes, we need to pass satp as the 2nd parameter. */ | |
382 | if (ALL_FIXED_POINT_MODE_P (best_mode)) | |
383 | return lang_hooks.types.type_for_mode (best_mode, satp); | |
384 | ||
385 | return lang_hooks.types.type_for_mode (best_mode, 1); | |
386 | } | |
2b725155 RH |
387 | } |
388 | ||
389 | /* Process one statement. If we identify a vector operation, expand it. */ | |
390 | ||
391 | static void | |
726a989a | 392 | expand_vector_operations_1 (gimple_stmt_iterator *gsi) |
2b725155 | 393 | { |
726a989a RB |
394 | gimple stmt = gsi_stmt (*gsi); |
395 | tree lhs, rhs1, rhs2 = NULL, type, compute_type; | |
2b725155 RH |
396 | enum tree_code code; |
397 | enum machine_mode compute_mode; | |
398 | optab op; | |
726a989a RB |
399 | enum gimple_rhs_class rhs_class; |
400 | tree new_rhs; | |
2b725155 | 401 | |
726a989a RB |
402 | if (gimple_code (stmt) != GIMPLE_ASSIGN) |
403 | return; | |
2b725155 | 404 | |
726a989a RB |
405 | code = gimple_assign_rhs_code (stmt); |
406 | rhs_class = get_gimple_rhs_class (code); | |
2b725155 | 407 | |
726a989a RB |
408 | if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) |
409 | return; | |
2b725155 | 410 | |
726a989a RB |
411 | lhs = gimple_assign_lhs (stmt); |
412 | rhs1 = gimple_assign_rhs1 (stmt); | |
413 | type = gimple_expr_type (stmt); | |
414 | if (rhs_class == GIMPLE_BINARY_RHS) | |
415 | rhs2 = gimple_assign_rhs2 (stmt); | |
2b725155 | 416 | |
2b725155 RH |
417 | if (TREE_CODE (type) != VECTOR_TYPE) |
418 | return; | |
419 | ||
b8698a0f | 420 | if (code == NOP_EXPR |
f57d17f1 TM |
421 | || code == FLOAT_EXPR |
422 | || code == FIX_TRUNC_EXPR | |
423 | || code == VIEW_CONVERT_EXPR) | |
2b725155 | 424 | return; |
b8698a0f | 425 | |
2b725155 | 426 | gcc_assert (code != CONVERT_EXPR); |
9f106823 UB |
427 | |
428 | /* The signedness is determined from input argument. */ | |
429 | if (code == VEC_UNPACK_FLOAT_HI_EXPR | |
430 | || code == VEC_UNPACK_FLOAT_LO_EXPR) | |
726a989a | 431 | type = TREE_TYPE (rhs1); |
9f106823 | 432 | |
71d46ca5 MM |
433 | /* Choose between vector shift/rotate by vector and vector shift/rotate by |
434 | scalar */ | |
b8698a0f L |
435 | if (code == LSHIFT_EXPR |
436 | || code == RSHIFT_EXPR | |
726a989a | 437 | || code == LROTATE_EXPR |
71d46ca5 MM |
438 | || code == RROTATE_EXPR) |
439 | { | |
440 | /* If the 2nd argument is vector, we need a vector/vector shift */ | |
726a989a | 441 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2)))) |
71d46ca5 | 442 | op = optab_for_tree_code (code, type, optab_vector); |
71d46ca5 MM |
443 | else |
444 | { | |
445 | /* Try for a vector/scalar shift, and if we don't have one, see if we | |
446 | have a vector/vector shift */ | |
447 | op = optab_for_tree_code (code, type, optab_scalar); | |
448 | if (!op | |
449 | || (op->handlers[(int) TYPE_MODE (type)].insn_code | |
450 | == CODE_FOR_nothing)) | |
451 | op = optab_for_tree_code (code, type, optab_vector); | |
452 | } | |
453 | } | |
454 | else | |
455 | op = optab_for_tree_code (code, type, optab_default); | |
2b725155 | 456 | |
b8698a0f | 457 | /* For widening/narrowing vector operations, the relevant type is of the |
9f106823 UB |
458 | arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is |
459 | calculated in the same way above. */ | |
89d67cca DN |
460 | if (code == WIDEN_SUM_EXPR |
461 | || code == VEC_WIDEN_MULT_HI_EXPR | |
462 | || code == VEC_WIDEN_MULT_LO_EXPR | |
463 | || code == VEC_UNPACK_HI_EXPR | |
464 | || code == VEC_UNPACK_LO_EXPR | |
8115817b | 465 | || code == VEC_PACK_TRUNC_EXPR |
d9987fb4 UB |
466 | || code == VEC_PACK_SAT_EXPR |
467 | || code == VEC_PACK_FIX_TRUNC_EXPR) | |
726a989a | 468 | type = TREE_TYPE (rhs1); |
20f06221 | 469 | |
2b725155 RH |
470 | /* Optabs will try converting a negation into a subtraction, so |
471 | look for it as well. TODO: negation of floating-point vectors | |
472 | might be turned into an exclusive OR toggling the sign bit. */ | |
473 | if (op == NULL | |
474 | && code == NEGATE_EXPR | |
475 | && INTEGRAL_TYPE_P (TREE_TYPE (type))) | |
71d46ca5 | 476 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); |
2b725155 RH |
477 | |
478 | /* For very wide vectors, try using a smaller vector mode. */ | |
479 | compute_type = type; | |
480 | if (TYPE_MODE (type) == BLKmode && op) | |
481 | { | |
482 | tree vector_compute_type | |
325217ed CF |
483 | = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op, |
484 | TYPE_SATURATING (TREE_TYPE (type))); | |
1e9ae5ab UB |
485 | if (vector_compute_type != NULL_TREE |
486 | && (TYPE_VECTOR_SUBPARTS (vector_compute_type) | |
487 | < TYPE_VECTOR_SUBPARTS (compute_type))) | |
488 | compute_type = vector_compute_type; | |
2b725155 RH |
489 | } |
490 | ||
491 | /* If we are breaking a BLKmode vector into smaller pieces, | |
492 | type_for_widest_vector_mode has already looked into the optab, | |
493 | so skip these checks. */ | |
494 | if (compute_type == type) | |
495 | { | |
496 | compute_mode = TYPE_MODE (compute_type); | |
497 | if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT | |
325217ed CF |
498 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT |
499 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT | |
500 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT | |
501 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM | |
502 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM) | |
2b725155 | 503 | && op != NULL |
166cdb08 | 504 | && optab_handler (op, compute_mode)->insn_code != CODE_FOR_nothing) |
2b725155 RH |
505 | return; |
506 | else | |
507 | /* There is no operation in hardware, so fall back to scalars. */ | |
508 | compute_type = TREE_TYPE (type); | |
509 | } | |
510 | ||
a6b46ba2 | 511 | gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); |
726a989a RB |
512 | new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); |
513 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) | |
514 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
515 | new_rhs); | |
516 | ||
517 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
518 | way to do it is change expand_vector_operation and its callees to | |
519 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
520 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
2b725155 | 521 | |
726a989a | 522 | gimple_set_modified (gsi_stmt (*gsi), true); |
2b725155 RH |
523 | } |
524 | \f | |
525 | /* Use this to lower vector operations introduced by the vectorizer, | |
526 | if it may need the bit-twiddling tricks implemented in this file. */ | |
527 | ||
528 | static bool | |
529 | gate_expand_vector_operations (void) | |
530 | { | |
531 | return flag_tree_vectorize != 0; | |
532 | } | |
533 | ||
c2924966 | 534 | static unsigned int |
2b725155 RH |
535 | expand_vector_operations (void) |
536 | { | |
726a989a | 537 | gimple_stmt_iterator gsi; |
2b725155 RH |
538 | basic_block bb; |
539 | ||
540 | FOR_EACH_BB (bb) | |
541 | { | |
726a989a | 542 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2b725155 | 543 | { |
726a989a RB |
544 | expand_vector_operations_1 (&gsi); |
545 | update_stmt_if_modified (gsi_stmt (gsi)); | |
2b725155 RH |
546 | } |
547 | } | |
c2924966 | 548 | return 0; |
2b725155 RH |
549 | } |
550 | ||
b8698a0f | 551 | struct gimple_opt_pass pass_lower_vector = |
2b725155 | 552 | { |
8ddbbcae JH |
553 | { |
554 | GIMPLE_PASS, | |
2b725155 RH |
555 | "veclower", /* name */ |
556 | 0, /* gate */ | |
557 | expand_vector_operations, /* execute */ | |
558 | NULL, /* sub */ | |
559 | NULL, /* next */ | |
560 | 0, /* static_pass_number */ | |
7072a650 | 561 | TV_NONE, /* tv_id */ |
2b725155 RH |
562 | PROP_cfg, /* properties_required */ |
563 | 0, /* properties_provided */ | |
564 | 0, /* properties_destroyed */ | |
565 | 0, /* todo_flags_start */ | |
566 | TODO_dump_func | TODO_ggc_collect | |
8ddbbcae JH |
567 | | TODO_verify_stmts /* todo_flags_finish */ |
568 | } | |
2b725155 RH |
569 | }; |
570 | ||
b8698a0f | 571 | struct gimple_opt_pass pass_lower_vector_ssa = |
2b725155 | 572 | { |
8ddbbcae JH |
573 | { |
574 | GIMPLE_PASS, | |
2b725155 RH |
575 | "veclower2", /* name */ |
576 | gate_expand_vector_operations, /* gate */ | |
577 | expand_vector_operations, /* execute */ | |
578 | NULL, /* sub */ | |
579 | NULL, /* next */ | |
580 | 0, /* static_pass_number */ | |
7072a650 | 581 | TV_NONE, /* tv_id */ |
2b725155 RH |
582 | PROP_cfg, /* properties_required */ |
583 | 0, /* properties_provided */ | |
584 | 0, /* properties_destroyed */ | |
585 | 0, /* todo_flags_start */ | |
586 | TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ | |
587 | | TODO_verify_ssa | |
8ddbbcae JH |
588 | | TODO_verify_stmts | TODO_verify_flow |
589 | } | |
2b725155 RH |
590 | }; |
591 | ||
592 | #include "gt-tree-vect-generic.h" |