]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-complex.c
tree-cfg.c (gimplify_val): Move from tree-complex.c.
[thirdparty/gcc.git] / gcc / tree-complex.c
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38
39
40 /* Extract the real or imaginary part of a complex variable or constant.
41 Make sure that it's a proper gimple_val and gimplify it if not.
42 Emit any new code before BSI. */
43
44 static tree
45 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
46 {
47 tree ret, inner_type;
48
49 inner_type = TREE_TYPE (TREE_TYPE (t));
50 switch (TREE_CODE (t))
51 {
52 case COMPLEX_CST:
53 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
54 break;
55
56 case COMPLEX_EXPR:
57 ret = TREE_OPERAND (t, imagpart_p);
58 break;
59
60 case VAR_DECL:
61 case PARM_DECL:
62 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
63 inner_type, t);
64 break;
65
66 default:
67 abort ();
68 }
69
70 return gimplify_val (bsi, inner_type, ret);
71 }
72
73 /* Update an assignment to a complex variable in place. */
74
75 static void
76 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
77 {
78 tree stmt = bsi_stmt (*bsi);
79 tree type;
80
81 modify_stmt (stmt);
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
84
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
87 }
88
89 /* Expand complex addition to scalars:
90 a + b = (ar + br) + i(ai + bi)
91 a - b = (ar - br) + i(ai + bi)
92 */
93
94 static void
95 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
96 tree ar, tree ai, tree br, tree bi,
97 enum tree_code code)
98 {
99 tree rr, ri;
100
101 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
102 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
103
104 update_complex_assignment (bsi, rr, ri);
105 }
106
107 /* Expand complex multiplication to scalars:
108 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
109 */
110
111 static void
112 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
113 tree ar, tree ai, tree br, tree bi)
114 {
115 tree t1, t2, t3, t4, rr, ri;
116
117 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
118 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
119 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
120
121 /* Avoid expanding redundant multiplication for the common
122 case of squaring a complex number. */
123 if (ar == br && ai == bi)
124 t4 = t3;
125 else
126 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
127
128 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
129 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
130
131 update_complex_assignment (bsi, rr, ri);
132 }
133
134 /* Expand complex division to scalars, straightforward algorithm.
135 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
136 t = br*br + bi*bi
137 */
138
139 static void
140 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
141 tree ar, tree ai, tree br, tree bi,
142 enum tree_code code)
143 {
144 tree rr, ri, div, t1, t2, t3;
145
146 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
147 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
148 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
149
150 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
151 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
152 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
153 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
154
155 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
156 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
157 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
158 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
159
160 update_complex_assignment (bsi, rr, ri);
161 }
162
163 /* Expand complex division to scalars, modified algorithm to minimize
164 overflow with wide input ranges. */
165
166 static void
167 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
168 tree ar, tree ai, tree br, tree bi,
169 enum tree_code code)
170 {
171 tree rr, ri, ratio, div, t1, t2, min, max, cond;
172
173 /* Examine |br| < |bi|, and branch. */
174 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
175 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
176 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
177 STRIP_NOPS (cond);
178
179 if (TREE_CONSTANT (cond))
180 {
181 if (integer_zerop (cond))
182 min = bi, max = br;
183 else
184 min = br, max = bi;
185 }
186 else
187 {
188 basic_block bb_cond, bb_true, bb_false, bb_join;
189 tree l1, l2, l3;
190 edge e;
191
192 l1 = create_artificial_label ();
193 t1 = build (GOTO_EXPR, void_type_node, l1);
194 l2 = create_artificial_label ();
195 t2 = build (GOTO_EXPR, void_type_node, l2);
196 cond = build (COND_EXPR, void_type_node, cond, t1, t2);
197 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
198
199 min = make_rename_temp (inner_type, NULL);
200 max = make_rename_temp (inner_type, NULL);
201 l3 = create_artificial_label ();
202
203 /* Split the original block, and create the TRUE and FALSE blocks. */
204 e = split_block (bsi->bb, cond);
205 bb_cond = e->src;
206 bb_join = e->dest;
207 bb_true = create_empty_bb (bb_cond);
208 bb_false = create_empty_bb (bb_true);
209
210 /* Wire the blocks together. */
211 e->flags = EDGE_TRUE_VALUE;
212 redirect_edge_succ (e, bb_true);
213 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
214 make_edge (bb_true, bb_join, 0);
215 make_edge (bb_false, bb_join, 0);
216
217 /* Update dominance info. Note that bb_join's data was
218 updated by split_block. */
219 if (dom_computed[CDI_DOMINATORS] >= DOM_CONS_OK)
220 {
221 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
222 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
223 }
224
225 /* Compute min and max for TRUE block. */
226 *bsi = bsi_start (bb_true);
227 t1 = build (LABEL_EXPR, void_type_node, l1);
228 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
229 t1 = build (MODIFY_EXPR, inner_type, min, br);
230 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
231 t1 = build (MODIFY_EXPR, inner_type, max, bi);
232 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
233
234 /* Compute min and max for FALSE block. */
235 *bsi = bsi_start (bb_false);
236 t1 = build (LABEL_EXPR, void_type_node, l2);
237 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
238 t1 = build (MODIFY_EXPR, inner_type, min, bi);
239 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
240 t1 = build (MODIFY_EXPR, inner_type, max, br);
241 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
242
243 /* Insert the join label into the tail of the original block. */
244 *bsi = bsi_start (bb_join);
245 t1 = build (LABEL_EXPR, void_type_node, l3);
246 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
247 }
248
249 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
250 ratio min/max to scale both the dividend and divisor. */
251 ratio = gimplify_build2 (bsi, code, inner_type, min, max);
252
253 /* Calculate the divisor: min*ratio + max. */
254 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio);
255 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max);
256
257 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
258 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
259 t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1);
260 rr = gimplify_build2 (bsi, code, inner_type, t2, div);
261
262 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
263 t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
264 ri = gimplify_build2 (bsi, code, inner_type, t2, div);
265
266 update_complex_assignment (bsi, rr, ri);
267 }
268
269 /* Expand complex division to scalars. */
270
271 static void
272 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
273 tree ar, tree ai, tree br, tree bi,
274 enum tree_code code)
275 {
276 switch (flag_complex_divide_method)
277 {
278 case 0:
279 /* straightforward implementation of complex divide acceptable. */
280 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
281 break;
282 case 1:
283 /* wide ranges of inputs must work for complex divide. */
284 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
285 break;
286 default:
287 /* C99-like requirements for complex divide (not yet implemented). */
288 abort ();
289 }
290 }
291
292 /* Expand complex negation to scalars:
293 -a = (-ar) + i(-ai)
294 */
295
296 static void
297 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
298 tree ar, tree ai)
299 {
300 tree rr, ri;
301
302 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
303 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
304
305 update_complex_assignment (bsi, rr, ri);
306 }
307
308 /* Expand complex conjugate to scalars:
309 ~a = (ar) + i(-ai)
310 */
311
312 static void
313 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
314 tree ar, tree ai)
315 {
316 tree ri;
317
318 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
319
320 update_complex_assignment (bsi, ar, ri);
321 }
322
323 /* Expand complex comparison (EQ or NE only). */
324
325 static void
326 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
327 tree br, tree bi, enum tree_code code)
328 {
329 tree cr, ci, cc, stmt, type;
330
331 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
332 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
333 cc = gimplify_build2 (bsi,
334 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
335 boolean_type_node, cr, ci);
336
337 stmt = bsi_stmt (*bsi);
338 modify_stmt (stmt);
339
340 switch (TREE_CODE (stmt))
341 {
342 case RETURN_EXPR:
343 stmt = TREE_OPERAND (stmt, 0);
344 /* FALLTHRU */
345 case MODIFY_EXPR:
346 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
347 TREE_OPERAND (stmt, 1) = fold_convert (type, cc);
348 break;
349 case COND_EXPR:
350 TREE_OPERAND (stmt, 0) = cc;
351 break;
352 default:
353 abort ();
354 }
355 }
356
357 /* Process one statement. If we identify a complex operation, expand it. */
358
359 static void
360 expand_complex_operations_1 (block_stmt_iterator *bsi)
361 {
362 tree stmt = bsi_stmt (*bsi);
363 tree rhs, type, inner_type;
364 tree ac, ar, ai, bc, br, bi;
365 enum tree_code code;
366
367 switch (TREE_CODE (stmt))
368 {
369 case RETURN_EXPR:
370 stmt = TREE_OPERAND (stmt, 0);
371 if (!stmt)
372 return;
373 if (TREE_CODE (stmt) != MODIFY_EXPR)
374 return;
375 /* FALLTHRU */
376
377 case MODIFY_EXPR:
378 rhs = TREE_OPERAND (stmt, 1);
379 break;
380
381 case COND_EXPR:
382 rhs = TREE_OPERAND (stmt, 0);
383 break;
384
385 default:
386 return;
387 }
388
389 type = TREE_TYPE (rhs);
390 code = TREE_CODE (rhs);
391
392 /* Initial filter for operations we handle. */
393 switch (code)
394 {
395 case PLUS_EXPR:
396 case MINUS_EXPR:
397 case MULT_EXPR:
398 case TRUNC_DIV_EXPR:
399 case CEIL_DIV_EXPR:
400 case FLOOR_DIV_EXPR:
401 case ROUND_DIV_EXPR:
402 case RDIV_EXPR:
403 case NEGATE_EXPR:
404 case CONJ_EXPR:
405 if (TREE_CODE (type) != COMPLEX_TYPE)
406 return;
407 inner_type = TREE_TYPE (type);
408 break;
409
410 case EQ_EXPR:
411 case NE_EXPR:
412 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
413 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
414 return;
415 break;
416
417 default:
418 return;
419 }
420
421 /* Extract the components of the two complex values. Make sure and
422 handle the common case of the same value used twice specially. */
423 ac = TREE_OPERAND (rhs, 0);
424 ar = extract_component (bsi, ac, 0);
425 ai = extract_component (bsi, ac, 1);
426
427 if (TREE_CODE_CLASS (code) == '1')
428 bc = br = bi = NULL;
429 else
430 {
431 bc = TREE_OPERAND (rhs, 1);
432 if (ac == bc)
433 br = ar, bi = ai;
434 else
435 {
436 br = extract_component (bsi, bc, 0);
437 bi = extract_component (bsi, bc, 1);
438 }
439 }
440
441 switch (code)
442 {
443 case PLUS_EXPR:
444 case MINUS_EXPR:
445 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
446 break;
447
448 case MULT_EXPR:
449 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
450 break;
451
452 case TRUNC_DIV_EXPR:
453 case CEIL_DIV_EXPR:
454 case FLOOR_DIV_EXPR:
455 case ROUND_DIV_EXPR:
456 case RDIV_EXPR:
457 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
458 break;
459
460 case NEGATE_EXPR:
461 expand_complex_negation (bsi, inner_type, ar, ai);
462 break;
463
464 case CONJ_EXPR:
465 expand_complex_conjugate (bsi, inner_type, ar, ai);
466 break;
467
468 case EQ_EXPR:
469 case NE_EXPR:
470 expand_complex_comparison (bsi, ar, ai, br, bi, code);
471 break;
472
473 default:
474 abort ();
475 }
476 }
477 \f
478 /* Build a constant of type TYPE, made of VALUE's bits replicated
479 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
480 static tree
481 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
482 {
483 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
484 int n = HOST_BITS_PER_WIDE_INT / width;
485 unsigned HOST_WIDE_INT low, high, mask;
486 tree ret;
487
488 if (n == 0)
489 abort ();
490
491 if (width == HOST_BITS_PER_WIDE_INT)
492 low = value;
493 else
494 {
495 mask = ((HOST_WIDE_INT)1 << width) - 1;
496 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
497 }
498
499 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
500 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
501 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
502 high = 0;
503 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
504 high = low;
505 else
506 abort ();
507
508 ret = build_int_2 (low, high);
509 TREE_TYPE (ret) = type;
510 return ret;
511 }
512
513 /* Return a suitable vector types made of SUBPARTS units each of mode
514 "word_mode" (the global variable). */
515 static tree
516 build_word_mode_vector_type (int nunits)
517 {
518 static tree innertype;
519 static tree last;
520 static int last_nunits;
521
522 if (!innertype)
523 innertype = lang_hooks.types.type_for_mode (word_mode, 1);
524 else if (last_nunits == nunits)
525 return last;
526
527 /* We build a new type, but we canonicalize it nevertheless,
528 because it still saves some memory. */
529 last_nunits = nunits;
530 last = type_hash_canon (nunits, build_vector_type (innertype, nunits));
531 return last;
532 }
533
534 typedef tree (*elem_op_func) (block_stmt_iterator *,
535 tree, tree, tree, tree, tree, enum tree_code);
536
537 static inline tree
538 tree_vec_extract (block_stmt_iterator *bsi, tree type,
539 tree t, tree bitsize, tree bitpos)
540 {
541 if (bitpos)
542 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
543 else
544 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
545 }
546
547 static tree
548 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
549 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
550 enum tree_code code)
551 {
552 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
553 return gimplify_build1 (bsi, code, inner_type, a);
554 }
555
556 static tree
557 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
558 tree bitpos, tree bitsize, enum tree_code code)
559 {
560 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
561 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
562 return gimplify_build2 (bsi, code, inner_type, a, b);
563 }
564
565 /* Expand vector addition to scalars. This does bit twiddling
566 in order to increase parallelism:
567
568 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
569 (a ^ b) & 0x80808080
570
571 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
572 (a ^ ~b) & 0x80808080
573
574 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
575
576 This optimization should be done only if 4 vector items or more
577 fit into a word. */
578 static tree
579 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
580 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
581 enum tree_code code)
582 {
583 tree inner_type = TREE_TYPE (TREE_TYPE (a));
584 unsigned HOST_WIDE_INT max;
585 tree low_bits, high_bits, a_low, b_low, result_low, signs;
586
587 max = GET_MODE_MASK (TYPE_MODE (inner_type));
588 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
589 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
590
591 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
592 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
593
594 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
595 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
596 if (code == PLUS_EXPR)
597 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
598 else
599 {
600 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
601 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
602 }
603
604 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
605 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
606 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
607 }
608
609 static tree
610 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
611 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
612 tree bitsize ATTRIBUTE_UNUSED,
613 enum tree_code code ATTRIBUTE_UNUSED)
614 {
615 tree inner_type = TREE_TYPE (TREE_TYPE (b));
616 HOST_WIDE_INT max;
617 tree low_bits, high_bits, b_low, result_low, signs;
618
619 max = GET_MODE_MASK (TYPE_MODE (inner_type));
620 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
621 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
622
623 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
624
625 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
626 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
627 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
628 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
629 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
630 }
631
632 /* Expand a vector operation to scalars, by using many operations
633 whose type is the vector type's inner type. */
634 static tree
635 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
636 tree type, tree inner_type,
637 tree a, tree b, enum tree_code code)
638 {
639 tree head, *chain = &head;
640 tree part_width = TYPE_SIZE (inner_type);
641 tree index = bitsize_int (0);
642 int nunits = TYPE_VECTOR_SUBPARTS (type);
643 int delta = tree_low_cst (part_width, 1)
644 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
645 int i;
646
647 for (i = 0; i < nunits;
648 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
649 {
650 tree result = f (bsi, inner_type, a, b, index, part_width, code);
651 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
652 chain = &TREE_CHAIN (*chain);
653 }
654
655 return build1 (CONSTRUCTOR, type, head);
656 }
657
658 /* Expand a vector operation to scalars with the freedom to use
659 a scalar integer type, or to use a different size for the items
660 in the vector type. */
661 static tree
662 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
663 tree a, tree b,
664 enum tree_code code)
665 {
666 tree result, compute_type;
667 enum machine_mode mode;
668 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
669
670 /* We have three strategies. If the type is already correct, just do
671 the operation an element at a time. Else, if the vector is wider than
672 one word, do it a word at a time; finally, if the vector is smaller
673 than one word, do it as a scalar. */
674 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
675 return expand_vector_piecewise (bsi, f,
676 type, TREE_TYPE (type),
677 a, b, code);
678 else if (n_words > 1)
679 {
680 tree word_type = build_word_mode_vector_type (n_words);
681 result = expand_vector_piecewise (bsi, f,
682 word_type, TREE_TYPE (word_type),
683 a, b, code);
684 result = gimplify_val (bsi, word_type, result);
685 }
686 else
687 {
688 /* Use a single scalar operation with a mode no wider than word_mode. */
689 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
690 compute_type = lang_hooks.types.type_for_mode (mode, 1);
691 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
692 }
693
694 return build1 (VIEW_CONVERT_EXPR, type, result);
695 }
696
697 /* Expand a vector operation to scalars; for integer types we can use
698 special bit twiddling tricks to do the sums a word at a time, using
699 function F_PARALLEL instead of F. These tricks are done only if
700 they can process at least four items, that is, only if the vector
701 holds at least four items and if a word can hold four items. */
702 static tree
703 expand_vector_addition (block_stmt_iterator *bsi,
704 elem_op_func f, elem_op_func f_parallel,
705 tree type, tree a, tree b, enum tree_code code)
706 {
707 int parts_per_word = UNITS_PER_WORD
708 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
709
710 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
711 && parts_per_word >= 4
712 && TYPE_VECTOR_SUBPARTS (type) >= 4)
713 return expand_vector_parallel (bsi, f_parallel,
714 type, a, b, code);
715 else
716 return expand_vector_piecewise (bsi, f,
717 type, TREE_TYPE (type),
718 a, b, code);
719 }
720
721 /* Return a type for the widest vector mode whose components are of mode
722 INNER_MODE, or NULL_TREE if none is found. */
723 static tree
724 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
725 {
726 enum machine_mode best_mode = VOIDmode, mode;
727 int best_nunits = 0;
728
729 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
730 mode = MIN_MODE_VECTOR_FLOAT;
731 else
732 mode = MIN_MODE_VECTOR_INT;
733
734 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
735 if (GET_MODE_INNER (mode) == inner_mode
736 && GET_MODE_NUNITS (mode) > best_nunits
737 && op->handlers[mode].insn_code != CODE_FOR_nothing)
738 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
739
740 if (best_mode == VOIDmode)
741 return NULL_TREE;
742 else
743 return lang_hooks.types.type_for_mode (best_mode, 1);
744 }
745
746 /* Process one statement. If we identify a vector operation, expand it. */
747
748 static void
749 expand_vector_operations_1 (block_stmt_iterator *bsi)
750 {
751 tree stmt = bsi_stmt (*bsi);
752 tree *p_rhs, rhs, type, compute_type;
753 enum tree_code code;
754 enum machine_mode compute_mode;
755 optab op;
756
757 switch (TREE_CODE (stmt))
758 {
759 case RETURN_EXPR:
760 stmt = TREE_OPERAND (stmt, 0);
761 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
762 return;
763
764 /* FALLTHRU */
765
766 case MODIFY_EXPR:
767 p_rhs = &TREE_OPERAND (stmt, 1);
768 rhs = *p_rhs;
769 break;
770
771 default:
772 return;
773 }
774
775 type = TREE_TYPE (rhs);
776 if (TREE_CODE (type) != VECTOR_TYPE)
777 return;
778
779 code = TREE_CODE (rhs);
780 if (TREE_CODE_CLASS (code) != '1'
781 && TREE_CODE_CLASS (code) != '2')
782 return;
783
784 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
785 return;
786
787 if (code == CONVERT_EXPR)
788 abort ();
789
790 op = optab_for_tree_code (code, type);
791
792 /* Optabs will try converting a negation into a subtraction, so
793 look for it as well. TODO: negation of floating-point vectors
794 might be turned into an exclusive OR toggling the sign bit. */
795 if (op == NULL
796 && code == NEGATE_EXPR
797 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
798 op = optab_for_tree_code (MINUS_EXPR, type);
799
800 /* For very wide vectors, try using a smaller vector mode. */
801 compute_type = type;
802 if (TYPE_MODE (type) == BLKmode && op)
803 {
804 tree vector_compute_type
805 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
806 if (vector_compute_type != NULL_TREE)
807 compute_type = vector_compute_type;
808 }
809
810 compute_mode = TYPE_MODE (compute_type);
811
812 /* If we are breaking a BLKmode vector into smaller pieces,
813 type_for_widest_vector_mode has already looked into the optab,
814 so skip these checks. */
815 if (compute_type == type)
816 {
817 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
818 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
819 && op != NULL
820 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
821 return;
822 else
823 {
824 /* There is no operation in hardware, so fall back to scalars. */
825 compute_type = TREE_TYPE (type);
826 compute_mode = TYPE_MODE (compute_type);
827 }
828 }
829
830 /* If the compute mode is not a vector mode (hence we are decomposing
831 a BLKmode vector to smaller, hardware-supported vectors), we may
832 want to expand the operations in parallel. */
833 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
834 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
835 switch (code)
836 {
837 case PLUS_EXPR:
838 case MINUS_EXPR:
839 if (TYPE_TRAP_SIGNED (type))
840 break;
841
842 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
843 TREE_OPERAND (rhs, 0),
844 TREE_OPERAND (rhs, 1), code);
845 modify_stmt (bsi_stmt (*bsi));
846 return;
847
848 case NEGATE_EXPR:
849 if (TYPE_TRAP_SIGNED (type))
850 break;
851
852 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
853 TREE_OPERAND (rhs, 0),
854 NULL_TREE, code);
855 modify_stmt (bsi_stmt (*bsi));
856 return;
857
858 case BIT_AND_EXPR:
859 case BIT_IOR_EXPR:
860 case BIT_XOR_EXPR:
861 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
862 TREE_OPERAND (rhs, 0),
863 TREE_OPERAND (rhs, 1), code);
864 modify_stmt (bsi_stmt (*bsi));
865 return;
866
867 case BIT_NOT_EXPR:
868 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
869 TREE_OPERAND (rhs, 0),
870 NULL_TREE, code);
871 modify_stmt (bsi_stmt (*bsi));
872 return;
873
874 default:
875 break;
876 }
877
878 if (TREE_CODE_CLASS (code) == '1')
879 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
880 TREE_OPERAND (rhs, 0),
881 NULL_TREE, code);
882 else
883 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
884 TREE_OPERAND (rhs, 0),
885 TREE_OPERAND (rhs, 1), code);
886
887 modify_stmt (bsi_stmt (*bsi));
888 }
889 \f
890 static void
891 expand_vector_operations (void)
892 {
893 block_stmt_iterator bsi;
894 basic_block bb;
895
896 FOR_EACH_BB (bb)
897 {
898 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
899 expand_vector_operations_1 (&bsi);
900 }
901 }
902
903 static void
904 tree_lower_operations (void)
905 {
906 int old_last_basic_block = last_basic_block;
907 block_stmt_iterator bsi;
908 basic_block bb;
909
910 FOR_EACH_BB (bb)
911 {
912 if (bb->index >= old_last_basic_block)
913 continue;
914 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
915 {
916 expand_complex_operations_1 (&bsi);
917 expand_vector_operations_1 (&bsi);
918 }
919 }
920 }
921
922
923 struct tree_opt_pass pass_lower_vector_ssa =
924 {
925 "vector", /* name */
926 NULL, /* gate */
927 expand_vector_operations, /* execute */
928 NULL, /* sub */
929 NULL, /* next */
930 0, /* static_pass_number */
931 0, /* tv_id */
932 PROP_cfg, /* properties_required */
933 0, /* properties_provided */
934 0, /* properties_destroyed */
935 0, /* todo_flags_start */
936 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
937 | TODO_ggc_collect | TODO_verify_ssa
938 | TODO_verify_stmts | TODO_verify_flow
939 };
940
941 struct tree_opt_pass pass_pre_expand =
942 {
943 "oplower", /* name */
944 0, /* gate */
945 tree_lower_operations, /* execute */
946 NULL, /* sub */
947 NULL, /* next */
948 0, /* static_pass_number */
949 0, /* tv_id */
950 PROP_cfg, /* properties_required */
951 0, /* properties_provided */
952 0, /* properties_destroyed */
953 0, /* todo_flags_start */
954 TODO_dump_func | TODO_ggc_collect
955 | TODO_verify_stmts /* todo_flags_finish */
956 };