]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-reassoc.c
* gimple-walk.h: New File. Relocate prototypes from gimple.h.
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-inline.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimple-ssa.h"
35 #include "tree-cfg.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "tree-ssanames.h"
39 #include "tree-ssa-loop-niter.h"
40 #include "tree-ssa-loop.h"
41 #include "tree-dfa.h"
42 #include "tree-ssa.h"
43 #include "tree-iterator.h"
44 #include "tree-pass.h"
45 #include "alloc-pool.h"
46 #include "vec.h"
47 #include "langhooks.h"
48 #include "pointer-set.h"
49 #include "cfgloop.h"
50 #include "flags.h"
51 #include "target.h"
52 #include "params.h"
53 #include "diagnostic-core.h"
54
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
58
59 It consists of five steps:
60
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
63
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_t
68
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
71
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
75
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
78
79 5. Repropagate negates, as nothing else will clean it up ATM.
80
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
83
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
86
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
91
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
96
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
99
100 a (1), b (1), c (1), d (2), e (2)
101
102
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
105
106 You want to first merge all leaves of the same rank, as much as
107 possible.
108
109 So first build a binary op of
110
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
112
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
115
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
120
121 Then build a binary op of d + e
122 mergetmp2 = d + e
123
124 and put mergetmp2 on the merge worklist.
125
126 so merge worklist = {mergetmp, c, mergetmp2}
127
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
130
131 So we have
132
133 build binary op
134 mergetmp3 = mergetmp + c
135
136 worklist = {mergetmp2, mergetmp3}
137
138 mergetmp4 = mergetmp2 + mergetmp3
139
140 worklist = {mergetmp4}
141
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
144
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
147
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
150
151 So why don't we do this?
152
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
158
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
161
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
165
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
169
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
175
176
177 /* Statistics */
178 static struct
179 {
180 int linearized;
181 int constants_eliminated;
182 int ops_eliminated;
183 int rewritten;
184 int pows_encountered;
185 int pows_created;
186 } reassociate_stats;
187
188 /* Operator, rank pair. */
189 typedef struct operand_entry
190 {
191 unsigned int rank;
192 int id;
193 tree op;
194 unsigned int count;
195 } *operand_entry_t;
196
197 static alloc_pool operand_entry_pool;
198
199 /* This is used to assign a unique ID to each struct operand_entry
200 so that qsort results are identical on different hosts. */
201 static int next_operand_entry_id;
202
203 /* Starting rank number for a given basic block, so that we can rank
204 operations using unmovable instructions in that BB based on the bb
205 depth. */
206 static long *bb_rank;
207
208 /* Operand->rank hashtable. */
209 static struct pointer_map_t *operand_rank;
210
211 /* Forward decls. */
212 static long get_rank (tree);
213
214
215 /* Bias amount for loop-carried phis. We want this to be larger than
216 the depth of any reassociation tree we can see, but not larger than
217 the rank difference between two blocks. */
218 #define PHI_LOOP_BIAS (1 << 15)
219
220 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
221 an innermost loop, and the phi has only a single use which is inside
222 the loop, then the rank is the block rank of the loop latch plus an
223 extra bias for the loop-carried dependence. This causes expressions
224 calculated into an accumulator variable to be independent for each
225 iteration of the loop. If STMT is some other phi, the rank is the
226 block rank of its containing block. */
227 static long
228 phi_rank (gimple stmt)
229 {
230 basic_block bb = gimple_bb (stmt);
231 struct loop *father = bb->loop_father;
232 tree res;
233 unsigned i;
234 use_operand_p use;
235 gimple use_stmt;
236
237 /* We only care about real loops (those with a latch). */
238 if (!father->latch)
239 return bb_rank[bb->index];
240
241 /* Interesting phis must be in headers of innermost loops. */
242 if (bb != father->header
243 || father->inner)
244 return bb_rank[bb->index];
245
246 /* Ignore virtual SSA_NAMEs. */
247 res = gimple_phi_result (stmt);
248 if (virtual_operand_p (res))
249 return bb_rank[bb->index];
250
251 /* The phi definition must have a single use, and that use must be
252 within the loop. Otherwise this isn't an accumulator pattern. */
253 if (!single_imm_use (res, &use, &use_stmt)
254 || gimple_bb (use_stmt)->loop_father != father)
255 return bb_rank[bb->index];
256
257 /* Look for phi arguments from within the loop. If found, bias this phi. */
258 for (i = 0; i < gimple_phi_num_args (stmt); i++)
259 {
260 tree arg = gimple_phi_arg_def (stmt, i);
261 if (TREE_CODE (arg) == SSA_NAME
262 && !SSA_NAME_IS_DEFAULT_DEF (arg))
263 {
264 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
265 if (gimple_bb (def_stmt)->loop_father == father)
266 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
267 }
268 }
269
270 /* Must be an uninteresting phi. */
271 return bb_rank[bb->index];
272 }
273
274 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
275 loop-carried dependence of an innermost loop, return TRUE; else
276 return FALSE. */
277 static bool
278 loop_carried_phi (tree exp)
279 {
280 gimple phi_stmt;
281 long block_rank;
282
283 if (TREE_CODE (exp) != SSA_NAME
284 || SSA_NAME_IS_DEFAULT_DEF (exp))
285 return false;
286
287 phi_stmt = SSA_NAME_DEF_STMT (exp);
288
289 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
290 return false;
291
292 /* Non-loop-carried phis have block rank. Loop-carried phis have
293 an additional bias added in. If this phi doesn't have block rank,
294 it's biased and should not be propagated. */
295 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
296
297 if (phi_rank (phi_stmt) != block_rank)
298 return true;
299
300 return false;
301 }
302
303 /* Return the maximum of RANK and the rank that should be propagated
304 from expression OP. For most operands, this is just the rank of OP.
305 For loop-carried phis, the value is zero to avoid undoing the bias
306 in favor of the phi. */
307 static long
308 propagate_rank (long rank, tree op)
309 {
310 long op_rank;
311
312 if (loop_carried_phi (op))
313 return rank;
314
315 op_rank = get_rank (op);
316
317 return MAX (rank, op_rank);
318 }
319
320 /* Look up the operand rank structure for expression E. */
321
322 static inline long
323 find_operand_rank (tree e)
324 {
325 void **slot = pointer_map_contains (operand_rank, e);
326 return slot ? (long) (intptr_t) *slot : -1;
327 }
328
329 /* Insert {E,RANK} into the operand rank hashtable. */
330
331 static inline void
332 insert_operand_rank (tree e, long rank)
333 {
334 void **slot;
335 gcc_assert (rank > 0);
336 slot = pointer_map_insert (operand_rank, e);
337 gcc_assert (!*slot);
338 *slot = (void *) (intptr_t) rank;
339 }
340
341 /* Given an expression E, return the rank of the expression. */
342
343 static long
344 get_rank (tree e)
345 {
346 /* Constants have rank 0. */
347 if (is_gimple_min_invariant (e))
348 return 0;
349
350 /* SSA_NAME's have the rank of the expression they are the result
351 of.
352 For globals and uninitialized values, the rank is 0.
353 For function arguments, use the pre-setup rank.
354 For PHI nodes, stores, asm statements, etc, we use the rank of
355 the BB.
356 For simple operations, the rank is the maximum rank of any of
357 its operands, or the bb_rank, whichever is less.
358 I make no claims that this is optimal, however, it gives good
359 results. */
360
361 /* We make an exception to the normal ranking system to break
362 dependences of accumulator variables in loops. Suppose we
363 have a simple one-block loop containing:
364
365 x_1 = phi(x_0, x_2)
366 b = a + x_1
367 c = b + d
368 x_2 = c + e
369
370 As shown, each iteration of the calculation into x is fully
371 dependent upon the iteration before it. We would prefer to
372 see this in the form:
373
374 x_1 = phi(x_0, x_2)
375 b = a + d
376 c = b + e
377 x_2 = c + x_1
378
379 If the loop is unrolled, the calculations of b and c from
380 different iterations can be interleaved.
381
382 To obtain this result during reassociation, we bias the rank
383 of the phi definition x_1 upward, when it is recognized as an
384 accumulator pattern. The artificial rank causes it to be
385 added last, providing the desired independence. */
386
387 if (TREE_CODE (e) == SSA_NAME)
388 {
389 gimple stmt;
390 long rank;
391 int i, n;
392 tree op;
393
394 if (SSA_NAME_IS_DEFAULT_DEF (e))
395 return find_operand_rank (e);
396
397 stmt = SSA_NAME_DEF_STMT (e);
398 if (gimple_code (stmt) == GIMPLE_PHI)
399 return phi_rank (stmt);
400
401 if (!is_gimple_assign (stmt)
402 || gimple_vdef (stmt))
403 return bb_rank[gimple_bb (stmt)->index];
404
405 /* If we already have a rank for this expression, use that. */
406 rank = find_operand_rank (e);
407 if (rank != -1)
408 return rank;
409
410 /* Otherwise, find the maximum rank for the operands. As an
411 exception, remove the bias from loop-carried phis when propagating
412 the rank so that dependent operations are not also biased. */
413 rank = 0;
414 if (gimple_assign_single_p (stmt))
415 {
416 tree rhs = gimple_assign_rhs1 (stmt);
417 n = TREE_OPERAND_LENGTH (rhs);
418 if (n == 0)
419 rank = propagate_rank (rank, rhs);
420 else
421 {
422 for (i = 0; i < n; i++)
423 {
424 op = TREE_OPERAND (rhs, i);
425
426 if (op != NULL_TREE)
427 rank = propagate_rank (rank, op);
428 }
429 }
430 }
431 else
432 {
433 n = gimple_num_ops (stmt);
434 for (i = 1; i < n; i++)
435 {
436 op = gimple_op (stmt, i);
437 gcc_assert (op);
438 rank = propagate_rank (rank, op);
439 }
440 }
441
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 {
444 fprintf (dump_file, "Rank for ");
445 print_generic_expr (dump_file, e, 0);
446 fprintf (dump_file, " is %ld\n", (rank + 1));
447 }
448
449 /* Note the rank in the hashtable so we don't recompute it. */
450 insert_operand_rank (e, (rank + 1));
451 return (rank + 1);
452 }
453
454 /* Globals, etc, are rank 0 */
455 return 0;
456 }
457
458
459 /* We want integer ones to end up last no matter what, since they are
460 the ones we can do the most with. */
461 #define INTEGER_CONST_TYPE 1 << 3
462 #define FLOAT_CONST_TYPE 1 << 2
463 #define OTHER_CONST_TYPE 1 << 1
464
465 /* Classify an invariant tree into integer, float, or other, so that
466 we can sort them to be near other constants of the same type. */
467 static inline int
468 constant_type (tree t)
469 {
470 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
471 return INTEGER_CONST_TYPE;
472 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
473 return FLOAT_CONST_TYPE;
474 else
475 return OTHER_CONST_TYPE;
476 }
477
478 /* qsort comparison function to sort operand entries PA and PB by rank
479 so that the sorted array is ordered by rank in decreasing order. */
480 static int
481 sort_by_operand_rank (const void *pa, const void *pb)
482 {
483 const operand_entry_t oea = *(const operand_entry_t *)pa;
484 const operand_entry_t oeb = *(const operand_entry_t *)pb;
485
486 /* It's nicer for optimize_expression if constants that are likely
487 to fold when added/multiplied//whatever are put next to each
488 other. Since all constants have rank 0, order them by type. */
489 if (oeb->rank == 0 && oea->rank == 0)
490 {
491 if (constant_type (oeb->op) != constant_type (oea->op))
492 return constant_type (oeb->op) - constant_type (oea->op);
493 else
494 /* To make sorting result stable, we use unique IDs to determine
495 order. */
496 return oeb->id - oea->id;
497 }
498
499 /* Lastly, make sure the versions that are the same go next to each
500 other. We use SSA_NAME_VERSION because it's stable. */
501 if ((oeb->rank - oea->rank == 0)
502 && TREE_CODE (oea->op) == SSA_NAME
503 && TREE_CODE (oeb->op) == SSA_NAME)
504 {
505 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
506 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
507 else
508 return oeb->id - oea->id;
509 }
510
511 if (oeb->rank != oea->rank)
512 return oeb->rank - oea->rank;
513 else
514 return oeb->id - oea->id;
515 }
516
517 /* Add an operand entry to *OPS for the tree operand OP. */
518
519 static void
520 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
521 {
522 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
523
524 oe->op = op;
525 oe->rank = get_rank (op);
526 oe->id = next_operand_entry_id++;
527 oe->count = 1;
528 ops->safe_push (oe);
529 }
530
531 /* Add an operand entry to *OPS for the tree operand OP with repeat
532 count REPEAT. */
533
534 static void
535 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
536 HOST_WIDE_INT repeat)
537 {
538 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
539
540 oe->op = op;
541 oe->rank = get_rank (op);
542 oe->id = next_operand_entry_id++;
543 oe->count = repeat;
544 ops->safe_push (oe);
545
546 reassociate_stats.pows_encountered++;
547 }
548
549 /* Return true if STMT is reassociable operation containing a binary
550 operation with tree code CODE, and is inside LOOP. */
551
552 static bool
553 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
554 {
555 basic_block bb = gimple_bb (stmt);
556
557 if (gimple_bb (stmt) == NULL)
558 return false;
559
560 if (!flow_bb_inside_loop_p (loop, bb))
561 return false;
562
563 if (is_gimple_assign (stmt)
564 && gimple_assign_rhs_code (stmt) == code
565 && has_single_use (gimple_assign_lhs (stmt)))
566 return true;
567
568 return false;
569 }
570
571
572 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
573 operand of the negate operation. Otherwise, return NULL. */
574
575 static tree
576 get_unary_op (tree name, enum tree_code opcode)
577 {
578 gimple stmt = SSA_NAME_DEF_STMT (name);
579
580 if (!is_gimple_assign (stmt))
581 return NULL_TREE;
582
583 if (gimple_assign_rhs_code (stmt) == opcode)
584 return gimple_assign_rhs1 (stmt);
585 return NULL_TREE;
586 }
587
588 /* If CURR and LAST are a pair of ops that OPCODE allows us to
589 eliminate through equivalences, do so, remove them from OPS, and
590 return true. Otherwise, return false. */
591
592 static bool
593 eliminate_duplicate_pair (enum tree_code opcode,
594 vec<operand_entry_t> *ops,
595 bool *all_done,
596 unsigned int i,
597 operand_entry_t curr,
598 operand_entry_t last)
599 {
600
601 /* If we have two of the same op, and the opcode is & |, min, or max,
602 we can eliminate one of them.
603 If we have two of the same op, and the opcode is ^, we can
604 eliminate both of them. */
605
606 if (last && last->op == curr->op)
607 {
608 switch (opcode)
609 {
610 case MAX_EXPR:
611 case MIN_EXPR:
612 case BIT_IOR_EXPR:
613 case BIT_AND_EXPR:
614 if (dump_file && (dump_flags & TDF_DETAILS))
615 {
616 fprintf (dump_file, "Equivalence: ");
617 print_generic_expr (dump_file, curr->op, 0);
618 fprintf (dump_file, " [&|minmax] ");
619 print_generic_expr (dump_file, last->op, 0);
620 fprintf (dump_file, " -> ");
621 print_generic_stmt (dump_file, last->op, 0);
622 }
623
624 ops->ordered_remove (i);
625 reassociate_stats.ops_eliminated ++;
626
627 return true;
628
629 case BIT_XOR_EXPR:
630 if (dump_file && (dump_flags & TDF_DETAILS))
631 {
632 fprintf (dump_file, "Equivalence: ");
633 print_generic_expr (dump_file, curr->op, 0);
634 fprintf (dump_file, " ^ ");
635 print_generic_expr (dump_file, last->op, 0);
636 fprintf (dump_file, " -> nothing\n");
637 }
638
639 reassociate_stats.ops_eliminated += 2;
640
641 if (ops->length () == 2)
642 {
643 ops->create (0);
644 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
645 *all_done = true;
646 }
647 else
648 {
649 ops->ordered_remove (i-1);
650 ops->ordered_remove (i-1);
651 }
652
653 return true;
654
655 default:
656 break;
657 }
658 }
659 return false;
660 }
661
662 static vec<tree> plus_negates;
663
664 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
665 expression, look in OPS for a corresponding positive operation to cancel
666 it out. If we find one, remove the other from OPS, replace
667 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
668 return false. */
669
670 static bool
671 eliminate_plus_minus_pair (enum tree_code opcode,
672 vec<operand_entry_t> *ops,
673 unsigned int currindex,
674 operand_entry_t curr)
675 {
676 tree negateop;
677 tree notop;
678 unsigned int i;
679 operand_entry_t oe;
680
681 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
682 return false;
683
684 negateop = get_unary_op (curr->op, NEGATE_EXPR);
685 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
686 if (negateop == NULL_TREE && notop == NULL_TREE)
687 return false;
688
689 /* Any non-negated version will have a rank that is one less than
690 the current rank. So once we hit those ranks, if we don't find
691 one, we can stop. */
692
693 for (i = currindex + 1;
694 ops->iterate (i, &oe)
695 && oe->rank >= curr->rank - 1 ;
696 i++)
697 {
698 if (oe->op == negateop)
699 {
700
701 if (dump_file && (dump_flags & TDF_DETAILS))
702 {
703 fprintf (dump_file, "Equivalence: ");
704 print_generic_expr (dump_file, negateop, 0);
705 fprintf (dump_file, " + -");
706 print_generic_expr (dump_file, oe->op, 0);
707 fprintf (dump_file, " -> 0\n");
708 }
709
710 ops->ordered_remove (i);
711 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
712 ops->ordered_remove (currindex);
713 reassociate_stats.ops_eliminated ++;
714
715 return true;
716 }
717 else if (oe->op == notop)
718 {
719 tree op_type = TREE_TYPE (oe->op);
720
721 if (dump_file && (dump_flags & TDF_DETAILS))
722 {
723 fprintf (dump_file, "Equivalence: ");
724 print_generic_expr (dump_file, notop, 0);
725 fprintf (dump_file, " + ~");
726 print_generic_expr (dump_file, oe->op, 0);
727 fprintf (dump_file, " -> -1\n");
728 }
729
730 ops->ordered_remove (i);
731 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
732 ops->ordered_remove (currindex);
733 reassociate_stats.ops_eliminated ++;
734
735 return true;
736 }
737 }
738
739 /* CURR->OP is a negate expr in a plus expr: save it for later
740 inspection in repropagate_negates(). */
741 if (negateop != NULL_TREE)
742 plus_negates.safe_push (curr->op);
743
744 return false;
745 }
746
747 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
748 bitwise not expression, look in OPS for a corresponding operand to
749 cancel it out. If we find one, remove the other from OPS, replace
750 OPS[CURRINDEX] with 0, and return true. Otherwise, return
751 false. */
752
753 static bool
754 eliminate_not_pairs (enum tree_code opcode,
755 vec<operand_entry_t> *ops,
756 unsigned int currindex,
757 operand_entry_t curr)
758 {
759 tree notop;
760 unsigned int i;
761 operand_entry_t oe;
762
763 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
764 || TREE_CODE (curr->op) != SSA_NAME)
765 return false;
766
767 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
768 if (notop == NULL_TREE)
769 return false;
770
771 /* Any non-not version will have a rank that is one less than
772 the current rank. So once we hit those ranks, if we don't find
773 one, we can stop. */
774
775 for (i = currindex + 1;
776 ops->iterate (i, &oe)
777 && oe->rank >= curr->rank - 1;
778 i++)
779 {
780 if (oe->op == notop)
781 {
782 if (dump_file && (dump_flags & TDF_DETAILS))
783 {
784 fprintf (dump_file, "Equivalence: ");
785 print_generic_expr (dump_file, notop, 0);
786 if (opcode == BIT_AND_EXPR)
787 fprintf (dump_file, " & ~");
788 else if (opcode == BIT_IOR_EXPR)
789 fprintf (dump_file, " | ~");
790 print_generic_expr (dump_file, oe->op, 0);
791 if (opcode == BIT_AND_EXPR)
792 fprintf (dump_file, " -> 0\n");
793 else if (opcode == BIT_IOR_EXPR)
794 fprintf (dump_file, " -> -1\n");
795 }
796
797 if (opcode == BIT_AND_EXPR)
798 oe->op = build_zero_cst (TREE_TYPE (oe->op));
799 else if (opcode == BIT_IOR_EXPR)
800 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
801 TYPE_PRECISION (TREE_TYPE (oe->op)));
802
803 reassociate_stats.ops_eliminated += ops->length () - 1;
804 ops->truncate (0);
805 ops->quick_push (oe);
806 return true;
807 }
808 }
809
810 return false;
811 }
812
813 /* Use constant value that may be present in OPS to try to eliminate
814 operands. Note that this function is only really used when we've
815 eliminated ops for other reasons, or merged constants. Across
816 single statements, fold already does all of this, plus more. There
817 is little point in duplicating logic, so I've only included the
818 identities that I could ever construct testcases to trigger. */
819
820 static void
821 eliminate_using_constants (enum tree_code opcode,
822 vec<operand_entry_t> *ops)
823 {
824 operand_entry_t oelast = ops->last ();
825 tree type = TREE_TYPE (oelast->op);
826
827 if (oelast->rank == 0
828 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
829 {
830 switch (opcode)
831 {
832 case BIT_AND_EXPR:
833 if (integer_zerop (oelast->op))
834 {
835 if (ops->length () != 1)
836 {
837 if (dump_file && (dump_flags & TDF_DETAILS))
838 fprintf (dump_file, "Found & 0, removing all other ops\n");
839
840 reassociate_stats.ops_eliminated += ops->length () - 1;
841
842 ops->truncate (0);
843 ops->quick_push (oelast);
844 return;
845 }
846 }
847 else if (integer_all_onesp (oelast->op))
848 {
849 if (ops->length () != 1)
850 {
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (dump_file, "Found & -1, removing\n");
853 ops->pop ();
854 reassociate_stats.ops_eliminated++;
855 }
856 }
857 break;
858 case BIT_IOR_EXPR:
859 if (integer_all_onesp (oelast->op))
860 {
861 if (ops->length () != 1)
862 {
863 if (dump_file && (dump_flags & TDF_DETAILS))
864 fprintf (dump_file, "Found | -1, removing all other ops\n");
865
866 reassociate_stats.ops_eliminated += ops->length () - 1;
867
868 ops->truncate (0);
869 ops->quick_push (oelast);
870 return;
871 }
872 }
873 else if (integer_zerop (oelast->op))
874 {
875 if (ops->length () != 1)
876 {
877 if (dump_file && (dump_flags & TDF_DETAILS))
878 fprintf (dump_file, "Found | 0, removing\n");
879 ops->pop ();
880 reassociate_stats.ops_eliminated++;
881 }
882 }
883 break;
884 case MULT_EXPR:
885 if (integer_zerop (oelast->op)
886 || (FLOAT_TYPE_P (type)
887 && !HONOR_NANS (TYPE_MODE (type))
888 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
889 && real_zerop (oelast->op)))
890 {
891 if (ops->length () != 1)
892 {
893 if (dump_file && (dump_flags & TDF_DETAILS))
894 fprintf (dump_file, "Found * 0, removing all other ops\n");
895
896 reassociate_stats.ops_eliminated += ops->length () - 1;
897 ops->truncate (1);
898 ops->quick_push (oelast);
899 return;
900 }
901 }
902 else if (integer_onep (oelast->op)
903 || (FLOAT_TYPE_P (type)
904 && !HONOR_SNANS (TYPE_MODE (type))
905 && real_onep (oelast->op)))
906 {
907 if (ops->length () != 1)
908 {
909 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Found * 1, removing\n");
911 ops->pop ();
912 reassociate_stats.ops_eliminated++;
913 return;
914 }
915 }
916 break;
917 case BIT_XOR_EXPR:
918 case PLUS_EXPR:
919 case MINUS_EXPR:
920 if (integer_zerop (oelast->op)
921 || (FLOAT_TYPE_P (type)
922 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
923 && fold_real_zero_addition_p (type, oelast->op,
924 opcode == MINUS_EXPR)))
925 {
926 if (ops->length () != 1)
927 {
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found [|^+] 0, removing\n");
930 ops->pop ();
931 reassociate_stats.ops_eliminated++;
932 return;
933 }
934 }
935 break;
936 default:
937 break;
938 }
939 }
940 }
941
942
943 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
944 bool, bool);
945
946 /* Structure for tracking and counting operands. */
947 typedef struct oecount_s {
948 int cnt;
949 int id;
950 enum tree_code oecode;
951 tree op;
952 } oecount;
953
954
955 /* The heap for the oecount hashtable and the sorted list of operands. */
956 static vec<oecount> cvec;
957
958
959 /* Oecount hashtable helpers. */
960
961 struct oecount_hasher : typed_noop_remove <void>
962 {
963 /* Note that this hash table stores integers, not pointers.
964 So, observe the casting in the member functions. */
965 typedef void value_type;
966 typedef void compare_type;
967 static inline hashval_t hash (const value_type *);
968 static inline bool equal (const value_type *, const compare_type *);
969 };
970
971 /* Hash function for oecount. */
972
973 inline hashval_t
974 oecount_hasher::hash (const value_type *p)
975 {
976 const oecount *c = &cvec[(size_t)p - 42];
977 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
978 }
979
980 /* Comparison function for oecount. */
981
982 inline bool
983 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
984 {
985 const oecount *c1 = &cvec[(size_t)p1 - 42];
986 const oecount *c2 = &cvec[(size_t)p2 - 42];
987 return (c1->oecode == c2->oecode
988 && c1->op == c2->op);
989 }
990
991 /* Comparison function for qsort sorting oecount elements by count. */
992
993 static int
994 oecount_cmp (const void *p1, const void *p2)
995 {
996 const oecount *c1 = (const oecount *)p1;
997 const oecount *c2 = (const oecount *)p2;
998 if (c1->cnt != c2->cnt)
999 return c1->cnt - c2->cnt;
1000 else
1001 /* If counts are identical, use unique IDs to stabilize qsort. */
1002 return c1->id - c2->id;
1003 }
1004
1005 /* Return TRUE iff STMT represents a builtin call that raises OP
1006 to some exponent. */
1007
1008 static bool
1009 stmt_is_power_of_op (gimple stmt, tree op)
1010 {
1011 tree fndecl;
1012
1013 if (!is_gimple_call (stmt))
1014 return false;
1015
1016 fndecl = gimple_call_fndecl (stmt);
1017
1018 if (!fndecl
1019 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1020 return false;
1021
1022 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1023 {
1024 CASE_FLT_FN (BUILT_IN_POW):
1025 CASE_FLT_FN (BUILT_IN_POWI):
1026 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1027
1028 default:
1029 return false;
1030 }
1031 }
1032
1033 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1034 in place and return the result. Assumes that stmt_is_power_of_op
1035 was previously called for STMT and returned TRUE. */
1036
1037 static HOST_WIDE_INT
1038 decrement_power (gimple stmt)
1039 {
1040 REAL_VALUE_TYPE c, cint;
1041 HOST_WIDE_INT power;
1042 tree arg1;
1043
1044 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1045 {
1046 CASE_FLT_FN (BUILT_IN_POW):
1047 arg1 = gimple_call_arg (stmt, 1);
1048 c = TREE_REAL_CST (arg1);
1049 power = real_to_integer (&c) - 1;
1050 real_from_integer (&cint, VOIDmode, power, 0, 0);
1051 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1052 return power;
1053
1054 CASE_FLT_FN (BUILT_IN_POWI):
1055 arg1 = gimple_call_arg (stmt, 1);
1056 power = TREE_INT_CST_LOW (arg1) - 1;
1057 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1058 return power;
1059
1060 default:
1061 gcc_unreachable ();
1062 }
1063 }
1064
1065 /* Find the single immediate use of STMT's LHS, and replace it
1066 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1067 replace *DEF with OP as well. */
1068
1069 static void
1070 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1071 {
1072 tree lhs;
1073 gimple use_stmt;
1074 use_operand_p use;
1075 gimple_stmt_iterator gsi;
1076
1077 if (is_gimple_call (stmt))
1078 lhs = gimple_call_lhs (stmt);
1079 else
1080 lhs = gimple_assign_lhs (stmt);
1081
1082 gcc_assert (has_single_use (lhs));
1083 single_imm_use (lhs, &use, &use_stmt);
1084 if (lhs == *def)
1085 *def = op;
1086 SET_USE (use, op);
1087 if (TREE_CODE (op) != SSA_NAME)
1088 update_stmt (use_stmt);
1089 gsi = gsi_for_stmt (stmt);
1090 unlink_stmt_vdef (stmt);
1091 gsi_remove (&gsi, true);
1092 release_defs (stmt);
1093 }
1094
1095 /* Walks the linear chain with result *DEF searching for an operation
1096 with operand OP and code OPCODE removing that from the chain. *DEF
1097 is updated if there is only one operand but no operation left. */
1098
1099 static void
1100 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1101 {
1102 gimple stmt = SSA_NAME_DEF_STMT (*def);
1103
1104 do
1105 {
1106 tree name;
1107
1108 if (opcode == MULT_EXPR
1109 && stmt_is_power_of_op (stmt, op))
1110 {
1111 if (decrement_power (stmt) == 1)
1112 propagate_op_to_single_use (op, stmt, def);
1113 return;
1114 }
1115
1116 name = gimple_assign_rhs1 (stmt);
1117
1118 /* If this is the operation we look for and one of the operands
1119 is ours simply propagate the other operand into the stmts
1120 single use. */
1121 if (gimple_assign_rhs_code (stmt) == opcode
1122 && (name == op
1123 || gimple_assign_rhs2 (stmt) == op))
1124 {
1125 if (name == op)
1126 name = gimple_assign_rhs2 (stmt);
1127 propagate_op_to_single_use (name, stmt, def);
1128 return;
1129 }
1130
1131 /* We might have a multiply of two __builtin_pow* calls, and
1132 the operand might be hiding in the rightmost one. */
1133 if (opcode == MULT_EXPR
1134 && gimple_assign_rhs_code (stmt) == opcode
1135 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1136 && has_single_use (gimple_assign_rhs2 (stmt)))
1137 {
1138 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1139 if (stmt_is_power_of_op (stmt2, op))
1140 {
1141 if (decrement_power (stmt2) == 1)
1142 propagate_op_to_single_use (op, stmt2, def);
1143 return;
1144 }
1145 }
1146
1147 /* Continue walking the chain. */
1148 gcc_assert (name != op
1149 && TREE_CODE (name) == SSA_NAME);
1150 stmt = SSA_NAME_DEF_STMT (name);
1151 }
1152 while (1);
1153 }
1154
1155 /* Returns true if statement S1 dominates statement S2. Like
1156 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1157
1158 static bool
1159 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1160 {
1161 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1162
1163 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1164 SSA_NAME. Assume it lives at the beginning of function and
1165 thus dominates everything. */
1166 if (!bb1 || s1 == s2)
1167 return true;
1168
1169 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1170 if (!bb2)
1171 return false;
1172
1173 if (bb1 == bb2)
1174 {
1175 /* PHIs in the same basic block are assumed to be
1176 executed all in parallel, if only one stmt is a PHI,
1177 it dominates the other stmt in the same basic block. */
1178 if (gimple_code (s1) == GIMPLE_PHI)
1179 return true;
1180
1181 if (gimple_code (s2) == GIMPLE_PHI)
1182 return false;
1183
1184 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1185
1186 if (gimple_uid (s1) < gimple_uid (s2))
1187 return true;
1188
1189 if (gimple_uid (s1) > gimple_uid (s2))
1190 return false;
1191
1192 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1193 unsigned int uid = gimple_uid (s1);
1194 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1195 {
1196 gimple s = gsi_stmt (gsi);
1197 if (gimple_uid (s) != uid)
1198 break;
1199 if (s == s2)
1200 return true;
1201 }
1202
1203 return false;
1204 }
1205
1206 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1207 }
1208
1209 /* Insert STMT after INSERT_POINT. */
1210
1211 static void
1212 insert_stmt_after (gimple stmt, gimple insert_point)
1213 {
1214 gimple_stmt_iterator gsi;
1215 basic_block bb;
1216
1217 if (gimple_code (insert_point) == GIMPLE_PHI)
1218 bb = gimple_bb (insert_point);
1219 else if (!stmt_ends_bb_p (insert_point))
1220 {
1221 gsi = gsi_for_stmt (insert_point);
1222 gimple_set_uid (stmt, gimple_uid (insert_point));
1223 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1224 return;
1225 }
1226 else
1227 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1228 thus if it must end a basic block, it should be a call that can
1229 throw, or some assignment that can throw. If it throws, the LHS
1230 of it will not be initialized though, so only valid places using
1231 the SSA_NAME should be dominated by the fallthru edge. */
1232 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1233 gsi = gsi_after_labels (bb);
1234 if (gsi_end_p (gsi))
1235 {
1236 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1237 gimple_set_uid (stmt,
1238 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1239 }
1240 else
1241 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1242 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1243 }
1244
1245 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1246 the result. Places the statement after the definition of either
1247 OP1 or OP2. Returns the new statement. */
1248
1249 static gimple
1250 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1251 {
1252 gimple op1def = NULL, op2def = NULL;
1253 gimple_stmt_iterator gsi;
1254 tree op;
1255 gimple sum;
1256
1257 /* Create the addition statement. */
1258 op = make_ssa_name (type, NULL);
1259 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1260
1261 /* Find an insertion place and insert. */
1262 if (TREE_CODE (op1) == SSA_NAME)
1263 op1def = SSA_NAME_DEF_STMT (op1);
1264 if (TREE_CODE (op2) == SSA_NAME)
1265 op2def = SSA_NAME_DEF_STMT (op2);
1266 if ((!op1def || gimple_nop_p (op1def))
1267 && (!op2def || gimple_nop_p (op2def)))
1268 {
1269 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1270 if (gsi_end_p (gsi))
1271 {
1272 gimple_stmt_iterator gsi2
1273 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1274 gimple_set_uid (sum,
1275 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1276 }
1277 else
1278 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1279 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1280 }
1281 else
1282 {
1283 gimple insert_point;
1284 if ((!op1def || gimple_nop_p (op1def))
1285 || (op2def && !gimple_nop_p (op2def)
1286 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1287 insert_point = op2def;
1288 else
1289 insert_point = op1def;
1290 insert_stmt_after (sum, insert_point);
1291 }
1292 update_stmt (sum);
1293
1294 return sum;
1295 }
1296
1297 /* Perform un-distribution of divisions and multiplications.
1298 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1299 to (A + B) / X for real X.
1300
1301 The algorithm is organized as follows.
1302
1303 - First we walk the addition chain *OPS looking for summands that
1304 are defined by a multiplication or a real division. This results
1305 in the candidates bitmap with relevant indices into *OPS.
1306
1307 - Second we build the chains of multiplications or divisions for
1308 these candidates, counting the number of occurrences of (operand, code)
1309 pairs in all of the candidates chains.
1310
1311 - Third we sort the (operand, code) pairs by number of occurrence and
1312 process them starting with the pair with the most uses.
1313
1314 * For each such pair we walk the candidates again to build a
1315 second candidate bitmap noting all multiplication/division chains
1316 that have at least one occurrence of (operand, code).
1317
1318 * We build an alternate addition chain only covering these
1319 candidates with one (operand, code) operation removed from their
1320 multiplication/division chain.
1321
1322 * The first candidate gets replaced by the alternate addition chain
1323 multiplied/divided by the operand.
1324
1325 * All candidate chains get disabled for further processing and
1326 processing of (operand, code) pairs continues.
1327
1328 The alternate addition chains built are re-processed by the main
1329 reassociation algorithm which allows optimizing a * x * y + b * y * x
1330 to (a + b ) * x * y in one invocation of the reassociation pass. */
1331
1332 static bool
1333 undistribute_ops_list (enum tree_code opcode,
1334 vec<operand_entry_t> *ops, struct loop *loop)
1335 {
1336 unsigned int length = ops->length ();
1337 operand_entry_t oe1;
1338 unsigned i, j;
1339 sbitmap candidates, candidates2;
1340 unsigned nr_candidates, nr_candidates2;
1341 sbitmap_iterator sbi0;
1342 vec<operand_entry_t> *subops;
1343 hash_table <oecount_hasher> ctable;
1344 bool changed = false;
1345 int next_oecount_id = 0;
1346
1347 if (length <= 1
1348 || opcode != PLUS_EXPR)
1349 return false;
1350
1351 /* Build a list of candidates to process. */
1352 candidates = sbitmap_alloc (length);
1353 bitmap_clear (candidates);
1354 nr_candidates = 0;
1355 FOR_EACH_VEC_ELT (*ops, i, oe1)
1356 {
1357 enum tree_code dcode;
1358 gimple oe1def;
1359
1360 if (TREE_CODE (oe1->op) != SSA_NAME)
1361 continue;
1362 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1363 if (!is_gimple_assign (oe1def))
1364 continue;
1365 dcode = gimple_assign_rhs_code (oe1def);
1366 if ((dcode != MULT_EXPR
1367 && dcode != RDIV_EXPR)
1368 || !is_reassociable_op (oe1def, dcode, loop))
1369 continue;
1370
1371 bitmap_set_bit (candidates, i);
1372 nr_candidates++;
1373 }
1374
1375 if (nr_candidates < 2)
1376 {
1377 sbitmap_free (candidates);
1378 return false;
1379 }
1380
1381 if (dump_file && (dump_flags & TDF_DETAILS))
1382 {
1383 fprintf (dump_file, "searching for un-distribute opportunities ");
1384 print_generic_expr (dump_file,
1385 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1386 fprintf (dump_file, " %d\n", nr_candidates);
1387 }
1388
1389 /* Build linearized sub-operand lists and the counting table. */
1390 cvec.create (0);
1391 ctable.create (15);
1392 /* ??? Macro arguments cannot have multi-argument template types in
1393 them. This typedef is needed to workaround that limitation. */
1394 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1395 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1396 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1397 {
1398 gimple oedef;
1399 enum tree_code oecode;
1400 unsigned j;
1401
1402 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1403 oecode = gimple_assign_rhs_code (oedef);
1404 linearize_expr_tree (&subops[i], oedef,
1405 associative_tree_code (oecode), false);
1406
1407 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1408 {
1409 oecount c;
1410 void **slot;
1411 size_t idx;
1412 c.oecode = oecode;
1413 c.cnt = 1;
1414 c.id = next_oecount_id++;
1415 c.op = oe1->op;
1416 cvec.safe_push (c);
1417 idx = cvec.length () + 41;
1418 slot = ctable.find_slot ((void *)idx, INSERT);
1419 if (!*slot)
1420 {
1421 *slot = (void *)idx;
1422 }
1423 else
1424 {
1425 cvec.pop ();
1426 cvec[(size_t)*slot - 42].cnt++;
1427 }
1428 }
1429 }
1430 ctable.dispose ();
1431
1432 /* Sort the counting table. */
1433 cvec.qsort (oecount_cmp);
1434
1435 if (dump_file && (dump_flags & TDF_DETAILS))
1436 {
1437 oecount *c;
1438 fprintf (dump_file, "Candidates:\n");
1439 FOR_EACH_VEC_ELT (cvec, j, c)
1440 {
1441 fprintf (dump_file, " %u %s: ", c->cnt,
1442 c->oecode == MULT_EXPR
1443 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1444 print_generic_expr (dump_file, c->op, 0);
1445 fprintf (dump_file, "\n");
1446 }
1447 }
1448
1449 /* Process the (operand, code) pairs in order of most occurrence. */
1450 candidates2 = sbitmap_alloc (length);
1451 while (!cvec.is_empty ())
1452 {
1453 oecount *c = &cvec.last ();
1454 if (c->cnt < 2)
1455 break;
1456
1457 /* Now collect the operands in the outer chain that contain
1458 the common operand in their inner chain. */
1459 bitmap_clear (candidates2);
1460 nr_candidates2 = 0;
1461 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1462 {
1463 gimple oedef;
1464 enum tree_code oecode;
1465 unsigned j;
1466 tree op = (*ops)[i]->op;
1467
1468 /* If we undistributed in this chain already this may be
1469 a constant. */
1470 if (TREE_CODE (op) != SSA_NAME)
1471 continue;
1472
1473 oedef = SSA_NAME_DEF_STMT (op);
1474 oecode = gimple_assign_rhs_code (oedef);
1475 if (oecode != c->oecode)
1476 continue;
1477
1478 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1479 {
1480 if (oe1->op == c->op)
1481 {
1482 bitmap_set_bit (candidates2, i);
1483 ++nr_candidates2;
1484 break;
1485 }
1486 }
1487 }
1488
1489 if (nr_candidates2 >= 2)
1490 {
1491 operand_entry_t oe1, oe2;
1492 gimple prod;
1493 int first = bitmap_first_set_bit (candidates2);
1494
1495 /* Build the new addition chain. */
1496 oe1 = (*ops)[first];
1497 if (dump_file && (dump_flags & TDF_DETAILS))
1498 {
1499 fprintf (dump_file, "Building (");
1500 print_generic_expr (dump_file, oe1->op, 0);
1501 }
1502 zero_one_operation (&oe1->op, c->oecode, c->op);
1503 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1504 {
1505 gimple sum;
1506 oe2 = (*ops)[i];
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1508 {
1509 fprintf (dump_file, " + ");
1510 print_generic_expr (dump_file, oe2->op, 0);
1511 }
1512 zero_one_operation (&oe2->op, c->oecode, c->op);
1513 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1514 oe1->op, oe2->op, opcode);
1515 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1516 oe2->rank = 0;
1517 oe1->op = gimple_get_lhs (sum);
1518 }
1519
1520 /* Apply the multiplication/division. */
1521 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1522 oe1->op, c->op, c->oecode);
1523 if (dump_file && (dump_flags & TDF_DETAILS))
1524 {
1525 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1526 print_generic_expr (dump_file, c->op, 0);
1527 fprintf (dump_file, "\n");
1528 }
1529
1530 /* Record it in the addition chain and disable further
1531 undistribution with this op. */
1532 oe1->op = gimple_assign_lhs (prod);
1533 oe1->rank = get_rank (oe1->op);
1534 subops[first].release ();
1535
1536 changed = true;
1537 }
1538
1539 cvec.pop ();
1540 }
1541
1542 for (i = 0; i < ops->length (); ++i)
1543 subops[i].release ();
1544 free (subops);
1545 cvec.release ();
1546 sbitmap_free (candidates);
1547 sbitmap_free (candidates2);
1548
1549 return changed;
1550 }
1551
1552 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1553 expression, examine the other OPS to see if any of them are comparisons
1554 of the same values, which we may be able to combine or eliminate.
1555 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1556
1557 static bool
1558 eliminate_redundant_comparison (enum tree_code opcode,
1559 vec<operand_entry_t> *ops,
1560 unsigned int currindex,
1561 operand_entry_t curr)
1562 {
1563 tree op1, op2;
1564 enum tree_code lcode, rcode;
1565 gimple def1, def2;
1566 int i;
1567 operand_entry_t oe;
1568
1569 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1570 return false;
1571
1572 /* Check that CURR is a comparison. */
1573 if (TREE_CODE (curr->op) != SSA_NAME)
1574 return false;
1575 def1 = SSA_NAME_DEF_STMT (curr->op);
1576 if (!is_gimple_assign (def1))
1577 return false;
1578 lcode = gimple_assign_rhs_code (def1);
1579 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1580 return false;
1581 op1 = gimple_assign_rhs1 (def1);
1582 op2 = gimple_assign_rhs2 (def1);
1583
1584 /* Now look for a similar comparison in the remaining OPS. */
1585 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1586 {
1587 tree t;
1588
1589 if (TREE_CODE (oe->op) != SSA_NAME)
1590 continue;
1591 def2 = SSA_NAME_DEF_STMT (oe->op);
1592 if (!is_gimple_assign (def2))
1593 continue;
1594 rcode = gimple_assign_rhs_code (def2);
1595 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1596 continue;
1597
1598 /* If we got here, we have a match. See if we can combine the
1599 two comparisons. */
1600 if (opcode == BIT_IOR_EXPR)
1601 t = maybe_fold_or_comparisons (lcode, op1, op2,
1602 rcode, gimple_assign_rhs1 (def2),
1603 gimple_assign_rhs2 (def2));
1604 else
1605 t = maybe_fold_and_comparisons (lcode, op1, op2,
1606 rcode, gimple_assign_rhs1 (def2),
1607 gimple_assign_rhs2 (def2));
1608 if (!t)
1609 continue;
1610
1611 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1612 always give us a boolean_type_node value back. If the original
1613 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1614 we need to convert. */
1615 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1616 t = fold_convert (TREE_TYPE (curr->op), t);
1617
1618 if (TREE_CODE (t) != INTEGER_CST
1619 && !operand_equal_p (t, curr->op, 0))
1620 {
1621 enum tree_code subcode;
1622 tree newop1, newop2;
1623 if (!COMPARISON_CLASS_P (t))
1624 continue;
1625 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1626 STRIP_USELESS_TYPE_CONVERSION (newop1);
1627 STRIP_USELESS_TYPE_CONVERSION (newop2);
1628 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1629 continue;
1630 }
1631
1632 if (dump_file && (dump_flags & TDF_DETAILS))
1633 {
1634 fprintf (dump_file, "Equivalence: ");
1635 print_generic_expr (dump_file, curr->op, 0);
1636 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1637 print_generic_expr (dump_file, oe->op, 0);
1638 fprintf (dump_file, " -> ");
1639 print_generic_expr (dump_file, t, 0);
1640 fprintf (dump_file, "\n");
1641 }
1642
1643 /* Now we can delete oe, as it has been subsumed by the new combined
1644 expression t. */
1645 ops->ordered_remove (i);
1646 reassociate_stats.ops_eliminated ++;
1647
1648 /* If t is the same as curr->op, we're done. Otherwise we must
1649 replace curr->op with t. Special case is if we got a constant
1650 back, in which case we add it to the end instead of in place of
1651 the current entry. */
1652 if (TREE_CODE (t) == INTEGER_CST)
1653 {
1654 ops->ordered_remove (currindex);
1655 add_to_ops_vec (ops, t);
1656 }
1657 else if (!operand_equal_p (t, curr->op, 0))
1658 {
1659 gimple sum;
1660 enum tree_code subcode;
1661 tree newop1;
1662 tree newop2;
1663 gcc_assert (COMPARISON_CLASS_P (t));
1664 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1665 STRIP_USELESS_TYPE_CONVERSION (newop1);
1666 STRIP_USELESS_TYPE_CONVERSION (newop2);
1667 gcc_checking_assert (is_gimple_val (newop1)
1668 && is_gimple_val (newop2));
1669 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1670 curr->op = gimple_get_lhs (sum);
1671 }
1672 return true;
1673 }
1674
1675 return false;
1676 }
1677
1678 /* Perform various identities and other optimizations on the list of
1679 operand entries, stored in OPS. The tree code for the binary
1680 operation between all the operands is OPCODE. */
1681
1682 static void
1683 optimize_ops_list (enum tree_code opcode,
1684 vec<operand_entry_t> *ops)
1685 {
1686 unsigned int length = ops->length ();
1687 unsigned int i;
1688 operand_entry_t oe;
1689 operand_entry_t oelast = NULL;
1690 bool iterate = false;
1691
1692 if (length == 1)
1693 return;
1694
1695 oelast = ops->last ();
1696
1697 /* If the last two are constants, pop the constants off, merge them
1698 and try the next two. */
1699 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1700 {
1701 operand_entry_t oelm1 = (*ops)[length - 2];
1702
1703 if (oelm1->rank == 0
1704 && is_gimple_min_invariant (oelm1->op)
1705 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1706 TREE_TYPE (oelast->op)))
1707 {
1708 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1709 oelm1->op, oelast->op);
1710
1711 if (folded && is_gimple_min_invariant (folded))
1712 {
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1714 fprintf (dump_file, "Merging constants\n");
1715
1716 ops->pop ();
1717 ops->pop ();
1718
1719 add_to_ops_vec (ops, folded);
1720 reassociate_stats.constants_eliminated++;
1721
1722 optimize_ops_list (opcode, ops);
1723 return;
1724 }
1725 }
1726 }
1727
1728 eliminate_using_constants (opcode, ops);
1729 oelast = NULL;
1730
1731 for (i = 0; ops->iterate (i, &oe);)
1732 {
1733 bool done = false;
1734
1735 if (eliminate_not_pairs (opcode, ops, i, oe))
1736 return;
1737 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1738 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1739 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1740 {
1741 if (done)
1742 return;
1743 iterate = true;
1744 oelast = NULL;
1745 continue;
1746 }
1747 oelast = oe;
1748 i++;
1749 }
1750
1751 length = ops->length ();
1752 oelast = ops->last ();
1753
1754 if (iterate)
1755 optimize_ops_list (opcode, ops);
1756 }
1757
1758 /* The following functions are subroutines to optimize_range_tests and allow
1759 it to try to change a logical combination of comparisons into a range
1760 test.
1761
1762 For example, both
1763 X == 2 || X == 5 || X == 3 || X == 4
1764 and
1765 X >= 2 && X <= 5
1766 are converted to
1767 (unsigned) (X - 2) <= 3
1768
1769 For more information see comments above fold_test_range in fold-const.c,
1770 this implementation is for GIMPLE. */
1771
1772 struct range_entry
1773 {
1774 tree exp;
1775 tree low;
1776 tree high;
1777 bool in_p;
1778 bool strict_overflow_p;
1779 unsigned int idx, next;
1780 };
1781
1782 /* This is similar to make_range in fold-const.c, but on top of
1783 GIMPLE instead of trees. If EXP is non-NULL, it should be
1784 an SSA_NAME and STMT argument is ignored, otherwise STMT
1785 argument should be a GIMPLE_COND. */
1786
1787 static void
1788 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1789 {
1790 int in_p;
1791 tree low, high;
1792 bool is_bool, strict_overflow_p;
1793
1794 r->exp = NULL_TREE;
1795 r->in_p = false;
1796 r->strict_overflow_p = false;
1797 r->low = NULL_TREE;
1798 r->high = NULL_TREE;
1799 if (exp != NULL_TREE
1800 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1801 return;
1802
1803 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1804 and see if we can refine the range. Some of the cases below may not
1805 happen, but it doesn't seem worth worrying about this. We "continue"
1806 the outer loop when we've changed something; otherwise we "break"
1807 the switch, which will "break" the while. */
1808 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1809 high = low;
1810 in_p = 0;
1811 strict_overflow_p = false;
1812 is_bool = false;
1813 if (exp == NULL_TREE)
1814 is_bool = true;
1815 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1816 {
1817 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1818 is_bool = true;
1819 else
1820 return;
1821 }
1822 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1823 is_bool = true;
1824
1825 while (1)
1826 {
1827 enum tree_code code;
1828 tree arg0, arg1, exp_type;
1829 tree nexp;
1830 location_t loc;
1831
1832 if (exp != NULL_TREE)
1833 {
1834 if (TREE_CODE (exp) != SSA_NAME)
1835 break;
1836
1837 stmt = SSA_NAME_DEF_STMT (exp);
1838 if (!is_gimple_assign (stmt))
1839 break;
1840
1841 code = gimple_assign_rhs_code (stmt);
1842 arg0 = gimple_assign_rhs1 (stmt);
1843 arg1 = gimple_assign_rhs2 (stmt);
1844 exp_type = TREE_TYPE (exp);
1845 }
1846 else
1847 {
1848 code = gimple_cond_code (stmt);
1849 arg0 = gimple_cond_lhs (stmt);
1850 arg1 = gimple_cond_rhs (stmt);
1851 exp_type = boolean_type_node;
1852 }
1853
1854 if (TREE_CODE (arg0) != SSA_NAME)
1855 break;
1856 loc = gimple_location (stmt);
1857 switch (code)
1858 {
1859 case BIT_NOT_EXPR:
1860 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1861 /* Ensure the range is either +[-,0], +[0,0],
1862 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1863 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1864 or similar expression of unconditional true or
1865 false, it should not be negated. */
1866 && ((high && integer_zerop (high))
1867 || (low && integer_onep (low))))
1868 {
1869 in_p = !in_p;
1870 exp = arg0;
1871 continue;
1872 }
1873 break;
1874 case SSA_NAME:
1875 exp = arg0;
1876 continue;
1877 CASE_CONVERT:
1878 if (is_bool)
1879 goto do_default;
1880 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1881 {
1882 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1883 is_bool = true;
1884 else
1885 return;
1886 }
1887 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1888 is_bool = true;
1889 goto do_default;
1890 case EQ_EXPR:
1891 case NE_EXPR:
1892 case LT_EXPR:
1893 case LE_EXPR:
1894 case GE_EXPR:
1895 case GT_EXPR:
1896 is_bool = true;
1897 /* FALLTHRU */
1898 default:
1899 if (!is_bool)
1900 return;
1901 do_default:
1902 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1903 &low, &high, &in_p,
1904 &strict_overflow_p);
1905 if (nexp != NULL_TREE)
1906 {
1907 exp = nexp;
1908 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1909 continue;
1910 }
1911 break;
1912 }
1913 break;
1914 }
1915 if (is_bool)
1916 {
1917 r->exp = exp;
1918 r->in_p = in_p;
1919 r->low = low;
1920 r->high = high;
1921 r->strict_overflow_p = strict_overflow_p;
1922 }
1923 }
1924
1925 /* Comparison function for qsort. Sort entries
1926 without SSA_NAME exp first, then with SSA_NAMEs sorted
1927 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1928 by increasing ->low and if ->low is the same, by increasing
1929 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1930 maximum. */
1931
1932 static int
1933 range_entry_cmp (const void *a, const void *b)
1934 {
1935 const struct range_entry *p = (const struct range_entry *) a;
1936 const struct range_entry *q = (const struct range_entry *) b;
1937
1938 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1939 {
1940 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1941 {
1942 /* Group range_entries for the same SSA_NAME together. */
1943 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1944 return -1;
1945 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1946 return 1;
1947 /* If ->low is different, NULL low goes first, then by
1948 ascending low. */
1949 if (p->low != NULL_TREE)
1950 {
1951 if (q->low != NULL_TREE)
1952 {
1953 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1954 p->low, q->low);
1955 if (tem && integer_onep (tem))
1956 return -1;
1957 tem = fold_binary (GT_EXPR, boolean_type_node,
1958 p->low, q->low);
1959 if (tem && integer_onep (tem))
1960 return 1;
1961 }
1962 else
1963 return 1;
1964 }
1965 else if (q->low != NULL_TREE)
1966 return -1;
1967 /* If ->high is different, NULL high goes last, before that by
1968 ascending high. */
1969 if (p->high != NULL_TREE)
1970 {
1971 if (q->high != NULL_TREE)
1972 {
1973 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1974 p->high, q->high);
1975 if (tem && integer_onep (tem))
1976 return -1;
1977 tem = fold_binary (GT_EXPR, boolean_type_node,
1978 p->high, q->high);
1979 if (tem && integer_onep (tem))
1980 return 1;
1981 }
1982 else
1983 return -1;
1984 }
1985 else if (p->high != NULL_TREE)
1986 return 1;
1987 /* If both ranges are the same, sort below by ascending idx. */
1988 }
1989 else
1990 return 1;
1991 }
1992 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1993 return -1;
1994
1995 if (p->idx < q->idx)
1996 return -1;
1997 else
1998 {
1999 gcc_checking_assert (p->idx > q->idx);
2000 return 1;
2001 }
2002 }
2003
2004 /* Helper routine of optimize_range_test.
2005 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2006 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2007 OPCODE and OPS are arguments of optimize_range_tests. Return
2008 true if the range merge has been successful.
2009 If OPCODE is ERROR_MARK, this is called from within
2010 maybe_optimize_range_tests and is performing inter-bb range optimization.
2011 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2012 oe->rank. */
2013
2014 static bool
2015 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2016 unsigned int count, enum tree_code opcode,
2017 vec<operand_entry_t> *ops, tree exp, bool in_p,
2018 tree low, tree high, bool strict_overflow_p)
2019 {
2020 operand_entry_t oe = (*ops)[range->idx];
2021 tree op = oe->op;
2022 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2023 location_t loc = gimple_location (stmt);
2024 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2025 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2026 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2027 gimple_stmt_iterator gsi;
2028
2029 if (tem == NULL_TREE)
2030 return false;
2031
2032 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2033 warning_at (loc, OPT_Wstrict_overflow,
2034 "assuming signed overflow does not occur "
2035 "when simplifying range test");
2036
2037 if (dump_file && (dump_flags & TDF_DETAILS))
2038 {
2039 struct range_entry *r;
2040 fprintf (dump_file, "Optimizing range tests ");
2041 print_generic_expr (dump_file, range->exp, 0);
2042 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2043 print_generic_expr (dump_file, range->low, 0);
2044 fprintf (dump_file, ", ");
2045 print_generic_expr (dump_file, range->high, 0);
2046 fprintf (dump_file, "]");
2047 for (r = otherrange; r < otherrange + count; r++)
2048 {
2049 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2050 print_generic_expr (dump_file, r->low, 0);
2051 fprintf (dump_file, ", ");
2052 print_generic_expr (dump_file, r->high, 0);
2053 fprintf (dump_file, "]");
2054 }
2055 fprintf (dump_file, "\n into ");
2056 print_generic_expr (dump_file, tem, 0);
2057 fprintf (dump_file, "\n");
2058 }
2059
2060 if (opcode == BIT_IOR_EXPR
2061 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2062 tem = invert_truthvalue_loc (loc, tem);
2063
2064 tem = fold_convert_loc (loc, optype, tem);
2065 gsi = gsi_for_stmt (stmt);
2066 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2067 GSI_SAME_STMT);
2068 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2069 if (gimple_uid (gsi_stmt (gsi)))
2070 break;
2071 else
2072 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2073
2074 oe->op = tem;
2075 range->exp = exp;
2076 range->low = low;
2077 range->high = high;
2078 range->in_p = in_p;
2079 range->strict_overflow_p = false;
2080
2081 for (range = otherrange; range < otherrange + count; range++)
2082 {
2083 oe = (*ops)[range->idx];
2084 /* Now change all the other range test immediate uses, so that
2085 those tests will be optimized away. */
2086 if (opcode == ERROR_MARK)
2087 {
2088 if (oe->op)
2089 oe->op = build_int_cst (TREE_TYPE (oe->op),
2090 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2091 else
2092 oe->op = (oe->rank == BIT_IOR_EXPR
2093 ? boolean_false_node : boolean_true_node);
2094 }
2095 else
2096 oe->op = error_mark_node;
2097 range->exp = NULL_TREE;
2098 }
2099 return true;
2100 }
2101
2102 /* Optimize X == CST1 || X == CST2
2103 if popcount (CST1 ^ CST2) == 1 into
2104 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2105 Similarly for ranges. E.g.
2106 X != 2 && X != 3 && X != 10 && X != 11
2107 will be transformed by the previous optimization into
2108 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2109 and this loop can transform that into
2110 !(((X & ~8) - 2U) <= 1U). */
2111
2112 static bool
2113 optimize_range_tests_xor (enum tree_code opcode, tree type,
2114 tree lowi, tree lowj, tree highi, tree highj,
2115 vec<operand_entry_t> *ops,
2116 struct range_entry *rangei,
2117 struct range_entry *rangej)
2118 {
2119 tree lowxor, highxor, tem, exp;
2120 /* Check highi ^ lowi == highj ^ lowj and
2121 popcount (highi ^ lowi) == 1. */
2122 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2123 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2124 return false;
2125 if (tree_log2 (lowxor) < 0)
2126 return false;
2127 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2128 if (!tree_int_cst_equal (lowxor, highxor))
2129 return false;
2130
2131 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2132 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2133 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2134 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2135 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2136 rangei->in_p, lowj, highj,
2137 rangei->strict_overflow_p
2138 || rangej->strict_overflow_p))
2139 return true;
2140 return false;
2141 }
2142
2143 /* Optimize X == CST1 || X == CST2
2144 if popcount (CST2 - CST1) == 1 into
2145 ((X - CST1) & ~(CST2 - CST1)) == 0.
2146 Similarly for ranges. E.g.
2147 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2148 || X == 75 || X == 45
2149 will be transformed by the previous optimization into
2150 (X - 43U) <= 3U || (X - 75U) <= 3U
2151 and this loop can transform that into
2152 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2153 static bool
2154 optimize_range_tests_diff (enum tree_code opcode, tree type,
2155 tree lowi, tree lowj, tree highi, tree highj,
2156 vec<operand_entry_t> *ops,
2157 struct range_entry *rangei,
2158 struct range_entry *rangej)
2159 {
2160 tree tem1, tem2, mask;
2161 /* Check highi - lowi == highj - lowj. */
2162 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2163 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2164 return false;
2165 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2166 if (!tree_int_cst_equal (tem1, tem2))
2167 return false;
2168 /* Check popcount (lowj - lowi) == 1. */
2169 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2170 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2171 return false;
2172 if (tree_log2 (tem1) < 0)
2173 return false;
2174
2175 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2176 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2177 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2178 lowj = build_int_cst (type, 0);
2179 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2180 rangei->in_p, lowj, tem2,
2181 rangei->strict_overflow_p
2182 || rangej->strict_overflow_p))
2183 return true;
2184 return false;
2185 }
2186
2187 /* It does some common checks for function optimize_range_tests_xor and
2188 optimize_range_tests_diff.
2189 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2190 Else it calls optimize_range_tests_diff. */
2191
2192 static bool
2193 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2194 bool optimize_xor, vec<operand_entry_t> *ops,
2195 struct range_entry *ranges)
2196 {
2197 int i, j;
2198 bool any_changes = false;
2199 for (i = first; i < length; i++)
2200 {
2201 tree lowi, highi, lowj, highj, type, tem;
2202
2203 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2204 continue;
2205 type = TREE_TYPE (ranges[i].exp);
2206 if (!INTEGRAL_TYPE_P (type))
2207 continue;
2208 lowi = ranges[i].low;
2209 if (lowi == NULL_TREE)
2210 lowi = TYPE_MIN_VALUE (type);
2211 highi = ranges[i].high;
2212 if (highi == NULL_TREE)
2213 continue;
2214 for (j = i + 1; j < length && j < i + 64; j++)
2215 {
2216 bool changes;
2217 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2218 continue;
2219 lowj = ranges[j].low;
2220 if (lowj == NULL_TREE)
2221 continue;
2222 highj = ranges[j].high;
2223 if (highj == NULL_TREE)
2224 highj = TYPE_MAX_VALUE (type);
2225 /* Check lowj > highi. */
2226 tem = fold_binary (GT_EXPR, boolean_type_node,
2227 lowj, highi);
2228 if (tem == NULL_TREE || !integer_onep (tem))
2229 continue;
2230 if (optimize_xor)
2231 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2232 highi, highj, ops,
2233 ranges + i, ranges + j);
2234 else
2235 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2236 highi, highj, ops,
2237 ranges + i, ranges + j);
2238 if (changes)
2239 {
2240 any_changes = true;
2241 break;
2242 }
2243 }
2244 }
2245 return any_changes;
2246 }
2247
2248 /* Optimize range tests, similarly how fold_range_test optimizes
2249 it on trees. The tree code for the binary
2250 operation between all the operands is OPCODE.
2251 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2252 maybe_optimize_range_tests for inter-bb range optimization.
2253 In that case if oe->op is NULL, oe->id is bb->index whose
2254 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2255 the actual opcode. */
2256
2257 static bool
2258 optimize_range_tests (enum tree_code opcode,
2259 vec<operand_entry_t> *ops)
2260 {
2261 unsigned int length = ops->length (), i, j, first;
2262 operand_entry_t oe;
2263 struct range_entry *ranges;
2264 bool any_changes = false;
2265
2266 if (length == 1)
2267 return false;
2268
2269 ranges = XNEWVEC (struct range_entry, length);
2270 for (i = 0; i < length; i++)
2271 {
2272 oe = (*ops)[i];
2273 ranges[i].idx = i;
2274 init_range_entry (ranges + i, oe->op,
2275 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2276 /* For | invert it now, we will invert it again before emitting
2277 the optimized expression. */
2278 if (opcode == BIT_IOR_EXPR
2279 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2280 ranges[i].in_p = !ranges[i].in_p;
2281 }
2282
2283 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2284 for (i = 0; i < length; i++)
2285 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2286 break;
2287
2288 /* Try to merge ranges. */
2289 for (first = i; i < length; i++)
2290 {
2291 tree low = ranges[i].low;
2292 tree high = ranges[i].high;
2293 int in_p = ranges[i].in_p;
2294 bool strict_overflow_p = ranges[i].strict_overflow_p;
2295 int update_fail_count = 0;
2296
2297 for (j = i + 1; j < length; j++)
2298 {
2299 if (ranges[i].exp != ranges[j].exp)
2300 break;
2301 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2302 ranges[j].in_p, ranges[j].low, ranges[j].high))
2303 break;
2304 strict_overflow_p |= ranges[j].strict_overflow_p;
2305 }
2306
2307 if (j == i + 1)
2308 continue;
2309
2310 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2311 ops, ranges[i].exp, in_p, low, high,
2312 strict_overflow_p))
2313 {
2314 i = j - 1;
2315 any_changes = true;
2316 }
2317 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2318 while update_range_test would fail. */
2319 else if (update_fail_count == 64)
2320 i = j - 1;
2321 else
2322 ++update_fail_count;
2323 }
2324
2325 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2326 ops, ranges);
2327
2328 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2329 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2330 ops, ranges);
2331
2332 if (any_changes && opcode != ERROR_MARK)
2333 {
2334 j = 0;
2335 FOR_EACH_VEC_ELT (*ops, i, oe)
2336 {
2337 if (oe->op == error_mark_node)
2338 continue;
2339 else if (i != j)
2340 (*ops)[j] = oe;
2341 j++;
2342 }
2343 ops->truncate (j);
2344 }
2345
2346 XDELETEVEC (ranges);
2347 return any_changes;
2348 }
2349
2350 /* Return true if STMT is a cast like:
2351 <bb N>:
2352 ...
2353 _123 = (int) _234;
2354
2355 <bb M>:
2356 # _345 = PHI <_123(N), 1(...), 1(...)>
2357 where _234 has bool type, _123 has single use and
2358 bb N has a single successor M. This is commonly used in
2359 the last block of a range test. */
2360
2361 static bool
2362 final_range_test_p (gimple stmt)
2363 {
2364 basic_block bb, rhs_bb;
2365 edge e;
2366 tree lhs, rhs;
2367 use_operand_p use_p;
2368 gimple use_stmt;
2369
2370 if (!gimple_assign_cast_p (stmt))
2371 return false;
2372 bb = gimple_bb (stmt);
2373 if (!single_succ_p (bb))
2374 return false;
2375 e = single_succ_edge (bb);
2376 if (e->flags & EDGE_COMPLEX)
2377 return false;
2378
2379 lhs = gimple_assign_lhs (stmt);
2380 rhs = gimple_assign_rhs1 (stmt);
2381 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2382 || TREE_CODE (rhs) != SSA_NAME
2383 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2384 return false;
2385
2386 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2387 if (!single_imm_use (lhs, &use_p, &use_stmt))
2388 return false;
2389
2390 if (gimple_code (use_stmt) != GIMPLE_PHI
2391 || gimple_bb (use_stmt) != e->dest)
2392 return false;
2393
2394 /* And that the rhs is defined in the same loop. */
2395 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2396 if (rhs_bb == NULL
2397 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2398 return false;
2399
2400 return true;
2401 }
2402
2403 /* Return true if BB is suitable basic block for inter-bb range test
2404 optimization. If BACKWARD is true, BB should be the only predecessor
2405 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2406 or compared with to find a common basic block to which all conditions
2407 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2408 be the only predecessor of BB. */
2409
2410 static bool
2411 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2412 bool backward)
2413 {
2414 edge_iterator ei, ei2;
2415 edge e, e2;
2416 gimple stmt;
2417 gimple_stmt_iterator gsi;
2418 bool other_edge_seen = false;
2419 bool is_cond;
2420
2421 if (test_bb == bb)
2422 return false;
2423 /* Check last stmt first. */
2424 stmt = last_stmt (bb);
2425 if (stmt == NULL
2426 || (gimple_code (stmt) != GIMPLE_COND
2427 && (backward || !final_range_test_p (stmt)))
2428 || gimple_visited_p (stmt)
2429 || stmt_could_throw_p (stmt)
2430 || *other_bb == bb)
2431 return false;
2432 is_cond = gimple_code (stmt) == GIMPLE_COND;
2433 if (is_cond)
2434 {
2435 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2436 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2437 to *OTHER_BB (if not set yet, try to find it out). */
2438 if (EDGE_COUNT (bb->succs) != 2)
2439 return false;
2440 FOR_EACH_EDGE (e, ei, bb->succs)
2441 {
2442 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2443 return false;
2444 if (e->dest == test_bb)
2445 {
2446 if (backward)
2447 continue;
2448 else
2449 return false;
2450 }
2451 if (e->dest == bb)
2452 return false;
2453 if (*other_bb == NULL)
2454 {
2455 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2456 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2457 return false;
2458 else if (e->dest == e2->dest)
2459 *other_bb = e->dest;
2460 if (*other_bb == NULL)
2461 return false;
2462 }
2463 if (e->dest == *other_bb)
2464 other_edge_seen = true;
2465 else if (backward)
2466 return false;
2467 }
2468 if (*other_bb == NULL || !other_edge_seen)
2469 return false;
2470 }
2471 else if (single_succ (bb) != *other_bb)
2472 return false;
2473
2474 /* Now check all PHIs of *OTHER_BB. */
2475 e = find_edge (bb, *other_bb);
2476 e2 = find_edge (test_bb, *other_bb);
2477 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2478 {
2479 gimple phi = gsi_stmt (gsi);
2480 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2481 corresponding to BB and TEST_BB predecessor must be the same. */
2482 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2483 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2484 {
2485 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2486 one of the PHIs should have the lhs of the last stmt in
2487 that block as PHI arg and that PHI should have 0 or 1
2488 corresponding to it in all other range test basic blocks
2489 considered. */
2490 if (!is_cond)
2491 {
2492 if (gimple_phi_arg_def (phi, e->dest_idx)
2493 == gimple_assign_lhs (stmt)
2494 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2495 || integer_onep (gimple_phi_arg_def (phi,
2496 e2->dest_idx))))
2497 continue;
2498 }
2499 else
2500 {
2501 gimple test_last = last_stmt (test_bb);
2502 if (gimple_code (test_last) != GIMPLE_COND
2503 && gimple_phi_arg_def (phi, e2->dest_idx)
2504 == gimple_assign_lhs (test_last)
2505 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2506 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2507 continue;
2508 }
2509
2510 return false;
2511 }
2512 }
2513 return true;
2514 }
2515
2516 /* Return true if BB doesn't have side-effects that would disallow
2517 range test optimization, all SSA_NAMEs set in the bb are consumed
2518 in the bb and there are no PHIs. */
2519
2520 static bool
2521 no_side_effect_bb (basic_block bb)
2522 {
2523 gimple_stmt_iterator gsi;
2524 gimple last;
2525
2526 if (!gimple_seq_empty_p (phi_nodes (bb)))
2527 return false;
2528 last = last_stmt (bb);
2529 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2530 {
2531 gimple stmt = gsi_stmt (gsi);
2532 tree lhs;
2533 imm_use_iterator imm_iter;
2534 use_operand_p use_p;
2535
2536 if (is_gimple_debug (stmt))
2537 continue;
2538 if (gimple_has_side_effects (stmt))
2539 return false;
2540 if (stmt == last)
2541 return true;
2542 if (!is_gimple_assign (stmt))
2543 return false;
2544 lhs = gimple_assign_lhs (stmt);
2545 if (TREE_CODE (lhs) != SSA_NAME)
2546 return false;
2547 if (gimple_assign_rhs_could_trap_p (stmt))
2548 return false;
2549 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2550 {
2551 gimple use_stmt = USE_STMT (use_p);
2552 if (is_gimple_debug (use_stmt))
2553 continue;
2554 if (gimple_bb (use_stmt) != bb)
2555 return false;
2556 }
2557 }
2558 return false;
2559 }
2560
2561 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2562 return true and fill in *OPS recursively. */
2563
2564 static bool
2565 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2566 struct loop *loop)
2567 {
2568 gimple stmt = SSA_NAME_DEF_STMT (var);
2569 tree rhs[2];
2570 int i;
2571
2572 if (!is_reassociable_op (stmt, code, loop))
2573 return false;
2574
2575 rhs[0] = gimple_assign_rhs1 (stmt);
2576 rhs[1] = gimple_assign_rhs2 (stmt);
2577 gimple_set_visited (stmt, true);
2578 for (i = 0; i < 2; i++)
2579 if (TREE_CODE (rhs[i]) == SSA_NAME
2580 && !get_ops (rhs[i], code, ops, loop)
2581 && has_single_use (rhs[i]))
2582 {
2583 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2584
2585 oe->op = rhs[i];
2586 oe->rank = code;
2587 oe->id = 0;
2588 oe->count = 1;
2589 ops->safe_push (oe);
2590 }
2591 return true;
2592 }
2593
2594 /* Find the ops that were added by get_ops starting from VAR, see if
2595 they were changed during update_range_test and if yes, create new
2596 stmts. */
2597
2598 static tree
2599 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2600 unsigned int *pidx, struct loop *loop)
2601 {
2602 gimple stmt = SSA_NAME_DEF_STMT (var);
2603 tree rhs[4];
2604 int i;
2605
2606 if (!is_reassociable_op (stmt, code, loop))
2607 return NULL;
2608
2609 rhs[0] = gimple_assign_rhs1 (stmt);
2610 rhs[1] = gimple_assign_rhs2 (stmt);
2611 rhs[2] = rhs[0];
2612 rhs[3] = rhs[1];
2613 for (i = 0; i < 2; i++)
2614 if (TREE_CODE (rhs[i]) == SSA_NAME)
2615 {
2616 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2617 if (rhs[2 + i] == NULL_TREE)
2618 {
2619 if (has_single_use (rhs[i]))
2620 rhs[2 + i] = ops[(*pidx)++]->op;
2621 else
2622 rhs[2 + i] = rhs[i];
2623 }
2624 }
2625 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2626 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2627 {
2628 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2629 var = make_ssa_name (TREE_TYPE (var), NULL);
2630 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2631 var, rhs[2], rhs[3]);
2632 gimple_set_uid (g, gimple_uid (stmt));
2633 gimple_set_visited (g, true);
2634 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2635 }
2636 return var;
2637 }
2638
2639 /* Structure to track the initial value passed to get_ops and
2640 the range in the ops vector for each basic block. */
2641
2642 struct inter_bb_range_test_entry
2643 {
2644 tree op;
2645 unsigned int first_idx, last_idx;
2646 };
2647
2648 /* Inter-bb range test optimization. */
2649
2650 static void
2651 maybe_optimize_range_tests (gimple stmt)
2652 {
2653 basic_block first_bb = gimple_bb (stmt);
2654 basic_block last_bb = first_bb;
2655 basic_block other_bb = NULL;
2656 basic_block bb;
2657 edge_iterator ei;
2658 edge e;
2659 vec<operand_entry_t> ops = vNULL;
2660 vec<inter_bb_range_test_entry> bbinfo = vNULL;
2661 bool any_changes = false;
2662
2663 /* Consider only basic blocks that end with GIMPLE_COND or
2664 a cast statement satisfying final_range_test_p. All
2665 but the last bb in the first_bb .. last_bb range
2666 should end with GIMPLE_COND. */
2667 if (gimple_code (stmt) == GIMPLE_COND)
2668 {
2669 if (EDGE_COUNT (first_bb->succs) != 2)
2670 return;
2671 }
2672 else if (final_range_test_p (stmt))
2673 other_bb = single_succ (first_bb);
2674 else
2675 return;
2676
2677 if (stmt_could_throw_p (stmt))
2678 return;
2679
2680 /* As relative ordering of post-dominator sons isn't fixed,
2681 maybe_optimize_range_tests can be called first on any
2682 bb in the range we want to optimize. So, start searching
2683 backwards, if first_bb can be set to a predecessor. */
2684 while (single_pred_p (first_bb))
2685 {
2686 basic_block pred_bb = single_pred (first_bb);
2687 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2688 break;
2689 if (!no_side_effect_bb (first_bb))
2690 break;
2691 first_bb = pred_bb;
2692 }
2693 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2694 Before starting forward search in last_bb successors, find
2695 out the other_bb. */
2696 if (first_bb == last_bb)
2697 {
2698 other_bb = NULL;
2699 /* As non-GIMPLE_COND last stmt always terminates the range,
2700 if forward search didn't discover anything, just give up. */
2701 if (gimple_code (stmt) != GIMPLE_COND)
2702 return;
2703 /* Look at both successors. Either it ends with a GIMPLE_COND
2704 and satisfies suitable_cond_bb, or ends with a cast and
2705 other_bb is that cast's successor. */
2706 FOR_EACH_EDGE (e, ei, first_bb->succs)
2707 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2708 || e->dest == first_bb)
2709 return;
2710 else if (single_pred_p (e->dest))
2711 {
2712 stmt = last_stmt (e->dest);
2713 if (stmt
2714 && gimple_code (stmt) == GIMPLE_COND
2715 && EDGE_COUNT (e->dest->succs) == 2)
2716 {
2717 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2718 break;
2719 else
2720 other_bb = NULL;
2721 }
2722 else if (stmt
2723 && final_range_test_p (stmt)
2724 && find_edge (first_bb, single_succ (e->dest)))
2725 {
2726 other_bb = single_succ (e->dest);
2727 if (other_bb == first_bb)
2728 other_bb = NULL;
2729 }
2730 }
2731 if (other_bb == NULL)
2732 return;
2733 }
2734 /* Now do the forward search, moving last_bb to successor bbs
2735 that aren't other_bb. */
2736 while (EDGE_COUNT (last_bb->succs) == 2)
2737 {
2738 FOR_EACH_EDGE (e, ei, last_bb->succs)
2739 if (e->dest != other_bb)
2740 break;
2741 if (e == NULL)
2742 break;
2743 if (!single_pred_p (e->dest))
2744 break;
2745 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2746 break;
2747 if (!no_side_effect_bb (e->dest))
2748 break;
2749 last_bb = e->dest;
2750 }
2751 if (first_bb == last_bb)
2752 return;
2753 /* Here basic blocks first_bb through last_bb's predecessor
2754 end with GIMPLE_COND, all of them have one of the edges to
2755 other_bb and another to another block in the range,
2756 all blocks except first_bb don't have side-effects and
2757 last_bb ends with either GIMPLE_COND, or cast satisfying
2758 final_range_test_p. */
2759 for (bb = last_bb; ; bb = single_pred (bb))
2760 {
2761 enum tree_code code;
2762 tree lhs, rhs;
2763 inter_bb_range_test_entry bb_ent;
2764
2765 bb_ent.op = NULL_TREE;
2766 bb_ent.first_idx = ops.length ();
2767 bb_ent.last_idx = bb_ent.first_idx;
2768 e = find_edge (bb, other_bb);
2769 stmt = last_stmt (bb);
2770 gimple_set_visited (stmt, true);
2771 if (gimple_code (stmt) != GIMPLE_COND)
2772 {
2773 use_operand_p use_p;
2774 gimple phi;
2775 edge e2;
2776 unsigned int d;
2777
2778 lhs = gimple_assign_lhs (stmt);
2779 rhs = gimple_assign_rhs1 (stmt);
2780 gcc_assert (bb == last_bb);
2781
2782 /* stmt is
2783 _123 = (int) _234;
2784
2785 followed by:
2786 <bb M>:
2787 # _345 = PHI <_123(N), 1(...), 1(...)>
2788
2789 or 0 instead of 1. If it is 0, the _234
2790 range test is anded together with all the
2791 other range tests, if it is 1, it is ored with
2792 them. */
2793 single_imm_use (lhs, &use_p, &phi);
2794 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2795 e2 = find_edge (first_bb, other_bb);
2796 d = e2->dest_idx;
2797 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2798 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2799 code = BIT_AND_EXPR;
2800 else
2801 {
2802 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2803 code = BIT_IOR_EXPR;
2804 }
2805
2806 /* If _234 SSA_NAME_DEF_STMT is
2807 _234 = _567 | _789;
2808 (or &, corresponding to 1/0 in the phi arguments,
2809 push into ops the individual range test arguments
2810 of the bitwise or resp. and, recursively. */
2811 if (!get_ops (rhs, code, &ops,
2812 loop_containing_stmt (stmt))
2813 && has_single_use (rhs))
2814 {
2815 /* Otherwise, push the _234 range test itself. */
2816 operand_entry_t oe
2817 = (operand_entry_t) pool_alloc (operand_entry_pool);
2818
2819 oe->op = rhs;
2820 oe->rank = code;
2821 oe->id = 0;
2822 oe->count = 1;
2823 ops.safe_push (oe);
2824 bb_ent.last_idx++;
2825 }
2826 else
2827 bb_ent.last_idx = ops.length ();
2828 bb_ent.op = rhs;
2829 bbinfo.safe_push (bb_ent);
2830 continue;
2831 }
2832 /* Otherwise stmt is GIMPLE_COND. */
2833 code = gimple_cond_code (stmt);
2834 lhs = gimple_cond_lhs (stmt);
2835 rhs = gimple_cond_rhs (stmt);
2836 if (TREE_CODE (lhs) == SSA_NAME
2837 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2838 && ((code != EQ_EXPR && code != NE_EXPR)
2839 || rhs != boolean_false_node
2840 /* Either push into ops the individual bitwise
2841 or resp. and operands, depending on which
2842 edge is other_bb. */
2843 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2844 ^ (code == EQ_EXPR))
2845 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2846 loop_containing_stmt (stmt))))
2847 {
2848 /* Or push the GIMPLE_COND stmt itself. */
2849 operand_entry_t oe
2850 = (operand_entry_t) pool_alloc (operand_entry_pool);
2851
2852 oe->op = NULL;
2853 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2854 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2855 /* oe->op = NULL signs that there is no SSA_NAME
2856 for the range test, and oe->id instead is the
2857 basic block number, at which's end the GIMPLE_COND
2858 is. */
2859 oe->id = bb->index;
2860 oe->count = 1;
2861 ops.safe_push (oe);
2862 bb_ent.op = NULL;
2863 bb_ent.last_idx++;
2864 }
2865 else if (ops.length () > bb_ent.first_idx)
2866 {
2867 bb_ent.op = lhs;
2868 bb_ent.last_idx = ops.length ();
2869 }
2870 bbinfo.safe_push (bb_ent);
2871 if (bb == first_bb)
2872 break;
2873 }
2874 if (ops.length () > 1)
2875 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2876 if (any_changes)
2877 {
2878 unsigned int idx;
2879 /* update_ops relies on has_single_use predicates returning the
2880 same values as it did during get_ops earlier. Additionally it
2881 never removes statements, only adds new ones and it should walk
2882 from the single imm use and check the predicate already before
2883 making those changes.
2884 On the other side, the handling of GIMPLE_COND directly can turn
2885 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2886 it needs to be done in a separate loop afterwards. */
2887 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2888 {
2889 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2890 && bbinfo[idx].op != NULL_TREE)
2891 {
2892 tree new_op;
2893
2894 stmt = last_stmt (bb);
2895 new_op = update_ops (bbinfo[idx].op,
2896 (enum tree_code)
2897 ops[bbinfo[idx].first_idx]->rank,
2898 ops, &bbinfo[idx].first_idx,
2899 loop_containing_stmt (stmt));
2900 if (new_op == NULL_TREE)
2901 {
2902 gcc_assert (bb == last_bb);
2903 new_op = ops[bbinfo[idx].first_idx++]->op;
2904 }
2905 if (bbinfo[idx].op != new_op)
2906 {
2907 imm_use_iterator iter;
2908 use_operand_p use_p;
2909 gimple use_stmt, cast_stmt = NULL;
2910
2911 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2912 if (is_gimple_debug (use_stmt))
2913 continue;
2914 else if (gimple_code (use_stmt) == GIMPLE_COND
2915 || gimple_code (use_stmt) == GIMPLE_PHI)
2916 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2917 SET_USE (use_p, new_op);
2918 else if (gimple_assign_cast_p (use_stmt))
2919 cast_stmt = use_stmt;
2920 else
2921 gcc_unreachable ();
2922 if (cast_stmt)
2923 {
2924 gcc_assert (bb == last_bb);
2925 tree lhs = gimple_assign_lhs (cast_stmt);
2926 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2927 enum tree_code rhs_code
2928 = gimple_assign_rhs_code (cast_stmt);
2929 gimple g
2930 = gimple_build_assign_with_ops (rhs_code, new_lhs,
2931 new_op, NULL_TREE);
2932 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2933 gimple_set_uid (g, gimple_uid (cast_stmt));
2934 gimple_set_visited (g, true);
2935 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2936 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2937 if (is_gimple_debug (use_stmt))
2938 continue;
2939 else if (gimple_code (use_stmt) == GIMPLE_COND
2940 || gimple_code (use_stmt) == GIMPLE_PHI)
2941 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2942 SET_USE (use_p, new_lhs);
2943 else
2944 gcc_unreachable ();
2945 }
2946 }
2947 }
2948 if (bb == first_bb)
2949 break;
2950 }
2951 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2952 {
2953 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2954 && bbinfo[idx].op == NULL_TREE
2955 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2956 {
2957 stmt = last_stmt (bb);
2958 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2959 gimple_cond_make_false (stmt);
2960 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2961 gimple_cond_make_true (stmt);
2962 else
2963 {
2964 gimple_cond_set_code (stmt, NE_EXPR);
2965 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2966 gimple_cond_set_rhs (stmt, boolean_false_node);
2967 }
2968 update_stmt (stmt);
2969 }
2970 if (bb == first_bb)
2971 break;
2972 }
2973 }
2974 bbinfo.release ();
2975 ops.release ();
2976 }
2977
2978 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2979 of STMT in it's operands. This is also known as a "destructive
2980 update" operation. */
2981
2982 static bool
2983 is_phi_for_stmt (gimple stmt, tree operand)
2984 {
2985 gimple def_stmt;
2986 tree lhs;
2987 use_operand_p arg_p;
2988 ssa_op_iter i;
2989
2990 if (TREE_CODE (operand) != SSA_NAME)
2991 return false;
2992
2993 lhs = gimple_assign_lhs (stmt);
2994
2995 def_stmt = SSA_NAME_DEF_STMT (operand);
2996 if (gimple_code (def_stmt) != GIMPLE_PHI)
2997 return false;
2998
2999 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3000 if (lhs == USE_FROM_PTR (arg_p))
3001 return true;
3002 return false;
3003 }
3004
3005 /* Remove def stmt of VAR if VAR has zero uses and recurse
3006 on rhs1 operand if so. */
3007
3008 static void
3009 remove_visited_stmt_chain (tree var)
3010 {
3011 gimple stmt;
3012 gimple_stmt_iterator gsi;
3013
3014 while (1)
3015 {
3016 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3017 return;
3018 stmt = SSA_NAME_DEF_STMT (var);
3019 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3020 {
3021 var = gimple_assign_rhs1 (stmt);
3022 gsi = gsi_for_stmt (stmt);
3023 gsi_remove (&gsi, true);
3024 release_defs (stmt);
3025 }
3026 else
3027 return;
3028 }
3029 }
3030
3031 /* This function checks three consequtive operands in
3032 passed operands vector OPS starting from OPINDEX and
3033 swaps two operands if it is profitable for binary operation
3034 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3035
3036 We pair ops with the same rank if possible.
3037
3038 The alternative we try is to see if STMT is a destructive
3039 update style statement, which is like:
3040 b = phi (a, ...)
3041 a = c + b;
3042 In that case, we want to use the destructive update form to
3043 expose the possible vectorizer sum reduction opportunity.
3044 In that case, the third operand will be the phi node. This
3045 check is not performed if STMT is null.
3046
3047 We could, of course, try to be better as noted above, and do a
3048 lot of work to try to find these opportunities in >3 operand
3049 cases, but it is unlikely to be worth it. */
3050
3051 static void
3052 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3053 unsigned int opindex, gimple stmt)
3054 {
3055 operand_entry_t oe1, oe2, oe3;
3056
3057 oe1 = ops[opindex];
3058 oe2 = ops[opindex + 1];
3059 oe3 = ops[opindex + 2];
3060
3061 if ((oe1->rank == oe2->rank
3062 && oe2->rank != oe3->rank)
3063 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3064 && !is_phi_for_stmt (stmt, oe1->op)
3065 && !is_phi_for_stmt (stmt, oe2->op)))
3066 {
3067 struct operand_entry temp = *oe3;
3068 oe3->op = oe1->op;
3069 oe3->rank = oe1->rank;
3070 oe1->op = temp.op;
3071 oe1->rank= temp.rank;
3072 }
3073 else if ((oe1->rank == oe3->rank
3074 && oe2->rank != oe3->rank)
3075 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3076 && !is_phi_for_stmt (stmt, oe1->op)
3077 && !is_phi_for_stmt (stmt, oe3->op)))
3078 {
3079 struct operand_entry temp = *oe2;
3080 oe2->op = oe1->op;
3081 oe2->rank = oe1->rank;
3082 oe1->op = temp.op;
3083 oe1->rank = temp.rank;
3084 }
3085 }
3086
3087 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3088 two definitions, otherwise return STMT. */
3089
3090 static inline gimple
3091 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3092 {
3093 if (TREE_CODE (rhs1) == SSA_NAME
3094 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3095 stmt = SSA_NAME_DEF_STMT (rhs1);
3096 if (TREE_CODE (rhs2) == SSA_NAME
3097 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3098 stmt = SSA_NAME_DEF_STMT (rhs2);
3099 return stmt;
3100 }
3101
3102 /* Recursively rewrite our linearized statements so that the operators
3103 match those in OPS[OPINDEX], putting the computation in rank
3104 order. Return new lhs. */
3105
3106 static tree
3107 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3108 vec<operand_entry_t> ops, bool changed)
3109 {
3110 tree rhs1 = gimple_assign_rhs1 (stmt);
3111 tree rhs2 = gimple_assign_rhs2 (stmt);
3112 tree lhs = gimple_assign_lhs (stmt);
3113 operand_entry_t oe;
3114
3115 /* The final recursion case for this function is that you have
3116 exactly two operations left.
3117 If we had one exactly one op in the entire list to start with, we
3118 would have never called this function, and the tail recursion
3119 rewrites them one at a time. */
3120 if (opindex + 2 == ops.length ())
3121 {
3122 operand_entry_t oe1, oe2;
3123
3124 oe1 = ops[opindex];
3125 oe2 = ops[opindex + 1];
3126
3127 if (rhs1 != oe1->op || rhs2 != oe2->op)
3128 {
3129 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3130 unsigned int uid = gimple_uid (stmt);
3131
3132 if (dump_file && (dump_flags & TDF_DETAILS))
3133 {
3134 fprintf (dump_file, "Transforming ");
3135 print_gimple_stmt (dump_file, stmt, 0, 0);
3136 }
3137
3138 if (changed)
3139 {
3140 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3141 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3142 stmt
3143 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3144 lhs, oe1->op, oe2->op);
3145 gimple_set_uid (stmt, uid);
3146 gimple_set_visited (stmt, true);
3147 if (insert_point == gsi_stmt (gsi))
3148 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3149 else
3150 insert_stmt_after (stmt, insert_point);
3151 }
3152 else
3153 {
3154 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3155 == stmt);
3156 gimple_assign_set_rhs1 (stmt, oe1->op);
3157 gimple_assign_set_rhs2 (stmt, oe2->op);
3158 update_stmt (stmt);
3159 }
3160
3161 if (rhs1 != oe1->op && rhs1 != oe2->op)
3162 remove_visited_stmt_chain (rhs1);
3163
3164 if (dump_file && (dump_flags & TDF_DETAILS))
3165 {
3166 fprintf (dump_file, " into ");
3167 print_gimple_stmt (dump_file, stmt, 0, 0);
3168 }
3169 }
3170 return lhs;
3171 }
3172
3173 /* If we hit here, we should have 3 or more ops left. */
3174 gcc_assert (opindex + 2 < ops.length ());
3175
3176 /* Rewrite the next operator. */
3177 oe = ops[opindex];
3178
3179 /* Recurse on the LHS of the binary operator, which is guaranteed to
3180 be the non-leaf side. */
3181 tree new_rhs1
3182 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3183 changed || oe->op != rhs2);
3184
3185 if (oe->op != rhs2 || new_rhs1 != rhs1)
3186 {
3187 if (dump_file && (dump_flags & TDF_DETAILS))
3188 {
3189 fprintf (dump_file, "Transforming ");
3190 print_gimple_stmt (dump_file, stmt, 0, 0);
3191 }
3192
3193 /* If changed is false, this is either opindex == 0
3194 or all outer rhs2's were equal to corresponding oe->op,
3195 and powi_result is NULL.
3196 That means lhs is equivalent before and after reassociation.
3197 Otherwise ensure the old lhs SSA_NAME is not reused and
3198 create a new stmt as well, so that any debug stmts will be
3199 properly adjusted. */
3200 if (changed)
3201 {
3202 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3203 unsigned int uid = gimple_uid (stmt);
3204 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3205
3206 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3207 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3208 lhs, new_rhs1, oe->op);
3209 gimple_set_uid (stmt, uid);
3210 gimple_set_visited (stmt, true);
3211 if (insert_point == gsi_stmt (gsi))
3212 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3213 else
3214 insert_stmt_after (stmt, insert_point);
3215 }
3216 else
3217 {
3218 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3219 == stmt);
3220 gimple_assign_set_rhs1 (stmt, new_rhs1);
3221 gimple_assign_set_rhs2 (stmt, oe->op);
3222 update_stmt (stmt);
3223 }
3224
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3226 {
3227 fprintf (dump_file, " into ");
3228 print_gimple_stmt (dump_file, stmt, 0, 0);
3229 }
3230 }
3231 return lhs;
3232 }
3233
3234 /* Find out how many cycles we need to compute statements chain.
3235 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3236 maximum number of independent statements we may execute per cycle. */
3237
3238 static int
3239 get_required_cycles (int ops_num, int cpu_width)
3240 {
3241 int res;
3242 int elog;
3243 unsigned int rest;
3244
3245 /* While we have more than 2 * cpu_width operands
3246 we may reduce number of operands by cpu_width
3247 per cycle. */
3248 res = ops_num / (2 * cpu_width);
3249
3250 /* Remained operands count may be reduced twice per cycle
3251 until we have only one operand. */
3252 rest = (unsigned)(ops_num - res * cpu_width);
3253 elog = exact_log2 (rest);
3254 if (elog >= 0)
3255 res += elog;
3256 else
3257 res += floor_log2 (rest) + 1;
3258
3259 return res;
3260 }
3261
3262 /* Returns an optimal number of registers to use for computation of
3263 given statements. */
3264
3265 static int
3266 get_reassociation_width (int ops_num, enum tree_code opc,
3267 enum machine_mode mode)
3268 {
3269 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3270 int width;
3271 int width_min;
3272 int cycles_best;
3273
3274 if (param_width > 0)
3275 width = param_width;
3276 else
3277 width = targetm.sched.reassociation_width (opc, mode);
3278
3279 if (width == 1)
3280 return width;
3281
3282 /* Get the minimal time required for sequence computation. */
3283 cycles_best = get_required_cycles (ops_num, width);
3284
3285 /* Check if we may use less width and still compute sequence for
3286 the same time. It will allow us to reduce registers usage.
3287 get_required_cycles is monotonically increasing with lower width
3288 so we can perform a binary search for the minimal width that still
3289 results in the optimal cycle count. */
3290 width_min = 1;
3291 while (width > width_min)
3292 {
3293 int width_mid = (width + width_min) / 2;
3294
3295 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3296 width = width_mid;
3297 else if (width_min < width_mid)
3298 width_min = width_mid;
3299 else
3300 break;
3301 }
3302
3303 return width;
3304 }
3305
3306 /* Recursively rewrite our linearized statements so that the operators
3307 match those in OPS[OPINDEX], putting the computation in rank
3308 order and trying to allow operations to be executed in
3309 parallel. */
3310
3311 static void
3312 rewrite_expr_tree_parallel (gimple stmt, int width,
3313 vec<operand_entry_t> ops)
3314 {
3315 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3316 int op_num = ops.length ();
3317 int stmt_num = op_num - 1;
3318 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3319 int op_index = op_num - 1;
3320 int stmt_index = 0;
3321 int ready_stmts_end = 0;
3322 int i = 0;
3323 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3324
3325 /* We start expression rewriting from the top statements.
3326 So, in this loop we create a full list of statements
3327 we will work with. */
3328 stmts[stmt_num - 1] = stmt;
3329 for (i = stmt_num - 2; i >= 0; i--)
3330 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3331
3332 for (i = 0; i < stmt_num; i++)
3333 {
3334 tree op1, op2;
3335
3336 /* Determine whether we should use results of
3337 already handled statements or not. */
3338 if (ready_stmts_end == 0
3339 && (i - stmt_index >= width || op_index < 1))
3340 ready_stmts_end = i;
3341
3342 /* Now we choose operands for the next statement. Non zero
3343 value in ready_stmts_end means here that we should use
3344 the result of already generated statements as new operand. */
3345 if (ready_stmts_end > 0)
3346 {
3347 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3348 if (ready_stmts_end > stmt_index)
3349 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3350 else if (op_index >= 0)
3351 op2 = ops[op_index--]->op;
3352 else
3353 {
3354 gcc_assert (stmt_index < i);
3355 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3356 }
3357
3358 if (stmt_index >= ready_stmts_end)
3359 ready_stmts_end = 0;
3360 }
3361 else
3362 {
3363 if (op_index > 1)
3364 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3365 op2 = ops[op_index--]->op;
3366 op1 = ops[op_index--]->op;
3367 }
3368
3369 /* If we emit the last statement then we should put
3370 operands into the last statement. It will also
3371 break the loop. */
3372 if (op_index < 0 && stmt_index == i)
3373 i = stmt_num - 1;
3374
3375 if (dump_file && (dump_flags & TDF_DETAILS))
3376 {
3377 fprintf (dump_file, "Transforming ");
3378 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3379 }
3380
3381 /* We keep original statement only for the last one. All
3382 others are recreated. */
3383 if (i == stmt_num - 1)
3384 {
3385 gimple_assign_set_rhs1 (stmts[i], op1);
3386 gimple_assign_set_rhs2 (stmts[i], op2);
3387 update_stmt (stmts[i]);
3388 }
3389 else
3390 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3391
3392 if (dump_file && (dump_flags & TDF_DETAILS))
3393 {
3394 fprintf (dump_file, " into ");
3395 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3396 }
3397 }
3398
3399 remove_visited_stmt_chain (last_rhs1);
3400 }
3401
3402 /* Transform STMT, which is really (A +B) + (C + D) into the left
3403 linear form, ((A+B)+C)+D.
3404 Recurse on D if necessary. */
3405
3406 static void
3407 linearize_expr (gimple stmt)
3408 {
3409 gimple_stmt_iterator gsi;
3410 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3411 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3412 gimple oldbinrhs = binrhs;
3413 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3414 gimple newbinrhs = NULL;
3415 struct loop *loop = loop_containing_stmt (stmt);
3416 tree lhs = gimple_assign_lhs (stmt);
3417
3418 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3419 && is_reassociable_op (binrhs, rhscode, loop));
3420
3421 gsi = gsi_for_stmt (stmt);
3422
3423 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3424 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3425 make_ssa_name (TREE_TYPE (lhs), NULL),
3426 gimple_assign_lhs (binlhs),
3427 gimple_assign_rhs2 (binrhs));
3428 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3429 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3430 gimple_set_uid (binrhs, gimple_uid (stmt));
3431
3432 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3433 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3434
3435 if (dump_file && (dump_flags & TDF_DETAILS))
3436 {
3437 fprintf (dump_file, "Linearized: ");
3438 print_gimple_stmt (dump_file, stmt, 0, 0);
3439 }
3440
3441 reassociate_stats.linearized++;
3442 update_stmt (stmt);
3443
3444 gsi = gsi_for_stmt (oldbinrhs);
3445 gsi_remove (&gsi, true);
3446 release_defs (oldbinrhs);
3447
3448 gimple_set_visited (stmt, true);
3449 gimple_set_visited (binlhs, true);
3450 gimple_set_visited (binrhs, true);
3451
3452 /* Tail recurse on the new rhs if it still needs reassociation. */
3453 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3454 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3455 want to change the algorithm while converting to tuples. */
3456 linearize_expr (stmt);
3457 }
3458
3459 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3460 it. Otherwise, return NULL. */
3461
3462 static gimple
3463 get_single_immediate_use (tree lhs)
3464 {
3465 use_operand_p immuse;
3466 gimple immusestmt;
3467
3468 if (TREE_CODE (lhs) == SSA_NAME
3469 && single_imm_use (lhs, &immuse, &immusestmt)
3470 && is_gimple_assign (immusestmt))
3471 return immusestmt;
3472
3473 return NULL;
3474 }
3475
3476 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3477 representing the negated value. Insertions of any necessary
3478 instructions go before GSI.
3479 This function is recursive in that, if you hand it "a_5" as the
3480 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3481 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3482
3483 static tree
3484 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3485 {
3486 gimple negatedefstmt = NULL;
3487 tree resultofnegate;
3488 gimple_stmt_iterator gsi;
3489 unsigned int uid;
3490
3491 /* If we are trying to negate a name, defined by an add, negate the
3492 add operands instead. */
3493 if (TREE_CODE (tonegate) == SSA_NAME)
3494 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3495 if (TREE_CODE (tonegate) == SSA_NAME
3496 && is_gimple_assign (negatedefstmt)
3497 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3498 && has_single_use (gimple_assign_lhs (negatedefstmt))
3499 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3500 {
3501 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3502 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3503 tree lhs = gimple_assign_lhs (negatedefstmt);
3504 gimple g;
3505
3506 gsi = gsi_for_stmt (negatedefstmt);
3507 rhs1 = negate_value (rhs1, &gsi);
3508
3509 gsi = gsi_for_stmt (negatedefstmt);
3510 rhs2 = negate_value (rhs2, &gsi);
3511
3512 gsi = gsi_for_stmt (negatedefstmt);
3513 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3514 gimple_set_visited (negatedefstmt, true);
3515 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3516 gimple_set_uid (g, gimple_uid (negatedefstmt));
3517 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3518 return lhs;
3519 }
3520
3521 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3522 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3523 NULL_TREE, true, GSI_SAME_STMT);
3524 gsi = *gsip;
3525 uid = gimple_uid (gsi_stmt (gsi));
3526 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3527 {
3528 gimple stmt = gsi_stmt (gsi);
3529 if (gimple_uid (stmt) != 0)
3530 break;
3531 gimple_set_uid (stmt, uid);
3532 }
3533 return resultofnegate;
3534 }
3535
3536 /* Return true if we should break up the subtract in STMT into an add
3537 with negate. This is true when we the subtract operands are really
3538 adds, or the subtract itself is used in an add expression. In
3539 either case, breaking up the subtract into an add with negate
3540 exposes the adds to reassociation. */
3541
3542 static bool
3543 should_break_up_subtract (gimple stmt)
3544 {
3545 tree lhs = gimple_assign_lhs (stmt);
3546 tree binlhs = gimple_assign_rhs1 (stmt);
3547 tree binrhs = gimple_assign_rhs2 (stmt);
3548 gimple immusestmt;
3549 struct loop *loop = loop_containing_stmt (stmt);
3550
3551 if (TREE_CODE (binlhs) == SSA_NAME
3552 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3553 return true;
3554
3555 if (TREE_CODE (binrhs) == SSA_NAME
3556 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3557 return true;
3558
3559 if (TREE_CODE (lhs) == SSA_NAME
3560 && (immusestmt = get_single_immediate_use (lhs))
3561 && is_gimple_assign (immusestmt)
3562 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3563 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3564 return true;
3565 return false;
3566 }
3567
3568 /* Transform STMT from A - B into A + -B. */
3569
3570 static void
3571 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3572 {
3573 tree rhs1 = gimple_assign_rhs1 (stmt);
3574 tree rhs2 = gimple_assign_rhs2 (stmt);
3575
3576 if (dump_file && (dump_flags & TDF_DETAILS))
3577 {
3578 fprintf (dump_file, "Breaking up subtract ");
3579 print_gimple_stmt (dump_file, stmt, 0, 0);
3580 }
3581
3582 rhs2 = negate_value (rhs2, gsip);
3583 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3584 update_stmt (stmt);
3585 }
3586
3587 /* Determine whether STMT is a builtin call that raises an SSA name
3588 to an integer power and has only one use. If so, and this is early
3589 reassociation and unsafe math optimizations are permitted, place
3590 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3591 If any of these conditions does not hold, return FALSE. */
3592
3593 static bool
3594 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3595 {
3596 tree fndecl, arg1;
3597 REAL_VALUE_TYPE c, cint;
3598
3599 if (!first_pass_instance
3600 || !flag_unsafe_math_optimizations
3601 || !is_gimple_call (stmt)
3602 || !has_single_use (gimple_call_lhs (stmt)))
3603 return false;
3604
3605 fndecl = gimple_call_fndecl (stmt);
3606
3607 if (!fndecl
3608 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3609 return false;
3610
3611 switch (DECL_FUNCTION_CODE (fndecl))
3612 {
3613 CASE_FLT_FN (BUILT_IN_POW):
3614 *base = gimple_call_arg (stmt, 0);
3615 arg1 = gimple_call_arg (stmt, 1);
3616
3617 if (TREE_CODE (arg1) != REAL_CST)
3618 return false;
3619
3620 c = TREE_REAL_CST (arg1);
3621
3622 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3623 return false;
3624
3625 *exponent = real_to_integer (&c);
3626 real_from_integer (&cint, VOIDmode, *exponent,
3627 *exponent < 0 ? -1 : 0, 0);
3628 if (!real_identical (&c, &cint))
3629 return false;
3630
3631 break;
3632
3633 CASE_FLT_FN (BUILT_IN_POWI):
3634 *base = gimple_call_arg (stmt, 0);
3635 arg1 = gimple_call_arg (stmt, 1);
3636
3637 if (!host_integerp (arg1, 0))
3638 return false;
3639
3640 *exponent = TREE_INT_CST_LOW (arg1);
3641 break;
3642
3643 default:
3644 return false;
3645 }
3646
3647 /* Expanding negative exponents is generally unproductive, so we don't
3648 complicate matters with those. Exponents of zero and one should
3649 have been handled by expression folding. */
3650 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3651 return false;
3652
3653 return true;
3654 }
3655
3656 /* Recursively linearize a binary expression that is the RHS of STMT.
3657 Place the operands of the expression tree in the vector named OPS. */
3658
3659 static void
3660 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3661 bool is_associative, bool set_visited)
3662 {
3663 tree binlhs = gimple_assign_rhs1 (stmt);
3664 tree binrhs = gimple_assign_rhs2 (stmt);
3665 gimple binlhsdef = NULL, binrhsdef = NULL;
3666 bool binlhsisreassoc = false;
3667 bool binrhsisreassoc = false;
3668 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3669 struct loop *loop = loop_containing_stmt (stmt);
3670 tree base = NULL_TREE;
3671 HOST_WIDE_INT exponent = 0;
3672
3673 if (set_visited)
3674 gimple_set_visited (stmt, true);
3675
3676 if (TREE_CODE (binlhs) == SSA_NAME)
3677 {
3678 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3679 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3680 && !stmt_could_throw_p (binlhsdef));
3681 }
3682
3683 if (TREE_CODE (binrhs) == SSA_NAME)
3684 {
3685 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3686 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3687 && !stmt_could_throw_p (binrhsdef));
3688 }
3689
3690 /* If the LHS is not reassociable, but the RHS is, we need to swap
3691 them. If neither is reassociable, there is nothing we can do, so
3692 just put them in the ops vector. If the LHS is reassociable,
3693 linearize it. If both are reassociable, then linearize the RHS
3694 and the LHS. */
3695
3696 if (!binlhsisreassoc)
3697 {
3698 tree temp;
3699
3700 /* If this is not a associative operation like division, give up. */
3701 if (!is_associative)
3702 {
3703 add_to_ops_vec (ops, binrhs);
3704 return;
3705 }
3706
3707 if (!binrhsisreassoc)
3708 {
3709 if (rhscode == MULT_EXPR
3710 && TREE_CODE (binrhs) == SSA_NAME
3711 && acceptable_pow_call (binrhsdef, &base, &exponent))
3712 {
3713 add_repeat_to_ops_vec (ops, base, exponent);
3714 gimple_set_visited (binrhsdef, true);
3715 }
3716 else
3717 add_to_ops_vec (ops, binrhs);
3718
3719 if (rhscode == MULT_EXPR
3720 && TREE_CODE (binlhs) == SSA_NAME
3721 && acceptable_pow_call (binlhsdef, &base, &exponent))
3722 {
3723 add_repeat_to_ops_vec (ops, base, exponent);
3724 gimple_set_visited (binlhsdef, true);
3725 }
3726 else
3727 add_to_ops_vec (ops, binlhs);
3728
3729 return;
3730 }
3731
3732 if (dump_file && (dump_flags & TDF_DETAILS))
3733 {
3734 fprintf (dump_file, "swapping operands of ");
3735 print_gimple_stmt (dump_file, stmt, 0, 0);
3736 }
3737
3738 swap_ssa_operands (stmt,
3739 gimple_assign_rhs1_ptr (stmt),
3740 gimple_assign_rhs2_ptr (stmt));
3741 update_stmt (stmt);
3742
3743 if (dump_file && (dump_flags & TDF_DETAILS))
3744 {
3745 fprintf (dump_file, " is now ");
3746 print_gimple_stmt (dump_file, stmt, 0, 0);
3747 }
3748
3749 /* We want to make it so the lhs is always the reassociative op,
3750 so swap. */
3751 temp = binlhs;
3752 binlhs = binrhs;
3753 binrhs = temp;
3754 }
3755 else if (binrhsisreassoc)
3756 {
3757 linearize_expr (stmt);
3758 binlhs = gimple_assign_rhs1 (stmt);
3759 binrhs = gimple_assign_rhs2 (stmt);
3760 }
3761
3762 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3763 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3764 rhscode, loop));
3765 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3766 is_associative, set_visited);
3767
3768 if (rhscode == MULT_EXPR
3769 && TREE_CODE (binrhs) == SSA_NAME
3770 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3771 {
3772 add_repeat_to_ops_vec (ops, base, exponent);
3773 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3774 }
3775 else
3776 add_to_ops_vec (ops, binrhs);
3777 }
3778
3779 /* Repropagate the negates back into subtracts, since no other pass
3780 currently does it. */
3781
3782 static void
3783 repropagate_negates (void)
3784 {
3785 unsigned int i = 0;
3786 tree negate;
3787
3788 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3789 {
3790 gimple user = get_single_immediate_use (negate);
3791
3792 if (!user || !is_gimple_assign (user))
3793 continue;
3794
3795 /* The negate operand can be either operand of a PLUS_EXPR
3796 (it can be the LHS if the RHS is a constant for example).
3797
3798 Force the negate operand to the RHS of the PLUS_EXPR, then
3799 transform the PLUS_EXPR into a MINUS_EXPR. */
3800 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3801 {
3802 /* If the negated operand appears on the LHS of the
3803 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3804 to force the negated operand to the RHS of the PLUS_EXPR. */
3805 if (gimple_assign_rhs1 (user) == negate)
3806 {
3807 swap_ssa_operands (user,
3808 gimple_assign_rhs1_ptr (user),
3809 gimple_assign_rhs2_ptr (user));
3810 }
3811
3812 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3813 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3814 if (gimple_assign_rhs2 (user) == negate)
3815 {
3816 tree rhs1 = gimple_assign_rhs1 (user);
3817 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3818 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3819 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3820 update_stmt (user);
3821 }
3822 }
3823 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3824 {
3825 if (gimple_assign_rhs1 (user) == negate)
3826 {
3827 /* We have
3828 x = -a
3829 y = x - b
3830 which we transform into
3831 x = a + b
3832 y = -x .
3833 This pushes down the negate which we possibly can merge
3834 into some other operation, hence insert it into the
3835 plus_negates vector. */
3836 gimple feed = SSA_NAME_DEF_STMT (negate);
3837 tree a = gimple_assign_rhs1 (feed);
3838 tree b = gimple_assign_rhs2 (user);
3839 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3840 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3841 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3842 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3843 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3844 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3845 user = gsi_stmt (gsi2);
3846 update_stmt (user);
3847 gsi_remove (&gsi, true);
3848 release_defs (feed);
3849 plus_negates.safe_push (gimple_assign_lhs (user));
3850 }
3851 else
3852 {
3853 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3854 rid of one operation. */
3855 gimple feed = SSA_NAME_DEF_STMT (negate);
3856 tree a = gimple_assign_rhs1 (feed);
3857 tree rhs1 = gimple_assign_rhs1 (user);
3858 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3859 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3860 update_stmt (gsi_stmt (gsi));
3861 }
3862 }
3863 }
3864 }
3865
3866 /* Returns true if OP is of a type for which we can do reassociation.
3867 That is for integral or non-saturating fixed-point types, and for
3868 floating point type when associative-math is enabled. */
3869
3870 static bool
3871 can_reassociate_p (tree op)
3872 {
3873 tree type = TREE_TYPE (op);
3874 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3875 || NON_SAT_FIXED_POINT_TYPE_P (type)
3876 || (flag_associative_math && FLOAT_TYPE_P (type)))
3877 return true;
3878 return false;
3879 }
3880
3881 /* Break up subtract operations in block BB.
3882
3883 We do this top down because we don't know whether the subtract is
3884 part of a possible chain of reassociation except at the top.
3885
3886 IE given
3887 d = f + g
3888 c = a + e
3889 b = c - d
3890 q = b - r
3891 k = t - q
3892
3893 we want to break up k = t - q, but we won't until we've transformed q
3894 = b - r, which won't be broken up until we transform b = c - d.
3895
3896 En passant, clear the GIMPLE visited flag on every statement
3897 and set UIDs within each basic block. */
3898
3899 static void
3900 break_up_subtract_bb (basic_block bb)
3901 {
3902 gimple_stmt_iterator gsi;
3903 basic_block son;
3904 unsigned int uid = 1;
3905
3906 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3907 {
3908 gimple stmt = gsi_stmt (gsi);
3909 gimple_set_visited (stmt, false);
3910 gimple_set_uid (stmt, uid++);
3911
3912 if (!is_gimple_assign (stmt)
3913 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3914 continue;
3915
3916 /* Look for simple gimple subtract operations. */
3917 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3918 {
3919 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3920 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3921 continue;
3922
3923 /* Check for a subtract used only in an addition. If this
3924 is the case, transform it into add of a negate for better
3925 reassociation. IE transform C = A-B into C = A + -B if C
3926 is only used in an addition. */
3927 if (should_break_up_subtract (stmt))
3928 break_up_subtract (stmt, &gsi);
3929 }
3930 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3931 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3932 plus_negates.safe_push (gimple_assign_lhs (stmt));
3933 }
3934 for (son = first_dom_son (CDI_DOMINATORS, bb);
3935 son;
3936 son = next_dom_son (CDI_DOMINATORS, son))
3937 break_up_subtract_bb (son);
3938 }
3939
3940 /* Used for repeated factor analysis. */
3941 struct repeat_factor_d
3942 {
3943 /* An SSA name that occurs in a multiply chain. */
3944 tree factor;
3945
3946 /* Cached rank of the factor. */
3947 unsigned rank;
3948
3949 /* Number of occurrences of the factor in the chain. */
3950 HOST_WIDE_INT count;
3951
3952 /* An SSA name representing the product of this factor and
3953 all factors appearing later in the repeated factor vector. */
3954 tree repr;
3955 };
3956
3957 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3958 typedef const struct repeat_factor_d *const_repeat_factor_t;
3959
3960
3961 static vec<repeat_factor> repeat_factor_vec;
3962
3963 /* Used for sorting the repeat factor vector. Sort primarily by
3964 ascending occurrence count, secondarily by descending rank. */
3965
3966 static int
3967 compare_repeat_factors (const void *x1, const void *x2)
3968 {
3969 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3970 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3971
3972 if (rf1->count != rf2->count)
3973 return rf1->count - rf2->count;
3974
3975 return rf2->rank - rf1->rank;
3976 }
3977
3978 /* Look for repeated operands in OPS in the multiply tree rooted at
3979 STMT. Replace them with an optimal sequence of multiplies and powi
3980 builtin calls, and remove the used operands from OPS. Return an
3981 SSA name representing the value of the replacement sequence. */
3982
3983 static tree
3984 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3985 {
3986 unsigned i, j, vec_len;
3987 int ii;
3988 operand_entry_t oe;
3989 repeat_factor_t rf1, rf2;
3990 repeat_factor rfnew;
3991 tree result = NULL_TREE;
3992 tree target_ssa, iter_result;
3993 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3994 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3995 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3996 gimple mul_stmt, pow_stmt;
3997
3998 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3999 target. */
4000 if (!powi_fndecl)
4001 return NULL_TREE;
4002
4003 /* Allocate the repeated factor vector. */
4004 repeat_factor_vec.create (10);
4005
4006 /* Scan the OPS vector for all SSA names in the product and build
4007 up a vector of occurrence counts for each factor. */
4008 FOR_EACH_VEC_ELT (*ops, i, oe)
4009 {
4010 if (TREE_CODE (oe->op) == SSA_NAME)
4011 {
4012 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4013 {
4014 if (rf1->factor == oe->op)
4015 {
4016 rf1->count += oe->count;
4017 break;
4018 }
4019 }
4020
4021 if (j >= repeat_factor_vec.length ())
4022 {
4023 rfnew.factor = oe->op;
4024 rfnew.rank = oe->rank;
4025 rfnew.count = oe->count;
4026 rfnew.repr = NULL_TREE;
4027 repeat_factor_vec.safe_push (rfnew);
4028 }
4029 }
4030 }
4031
4032 /* Sort the repeated factor vector by (a) increasing occurrence count,
4033 and (b) decreasing rank. */
4034 repeat_factor_vec.qsort (compare_repeat_factors);
4035
4036 /* It is generally best to combine as many base factors as possible
4037 into a product before applying __builtin_powi to the result.
4038 However, the sort order chosen for the repeated factor vector
4039 allows us to cache partial results for the product of the base
4040 factors for subsequent use. When we already have a cached partial
4041 result from a previous iteration, it is best to make use of it
4042 before looking for another __builtin_pow opportunity.
4043
4044 As an example, consider x * x * y * y * y * z * z * z * z.
4045 We want to first compose the product x * y * z, raise it to the
4046 second power, then multiply this by y * z, and finally multiply
4047 by z. This can be done in 5 multiplies provided we cache y * z
4048 for use in both expressions:
4049
4050 t1 = y * z
4051 t2 = t1 * x
4052 t3 = t2 * t2
4053 t4 = t1 * t3
4054 result = t4 * z
4055
4056 If we instead ignored the cached y * z and first multiplied by
4057 the __builtin_pow opportunity z * z, we would get the inferior:
4058
4059 t1 = y * z
4060 t2 = t1 * x
4061 t3 = t2 * t2
4062 t4 = z * z
4063 t5 = t3 * t4
4064 result = t5 * y */
4065
4066 vec_len = repeat_factor_vec.length ();
4067
4068 /* Repeatedly look for opportunities to create a builtin_powi call. */
4069 while (true)
4070 {
4071 HOST_WIDE_INT power;
4072
4073 /* First look for the largest cached product of factors from
4074 preceding iterations. If found, create a builtin_powi for
4075 it if the minimum occurrence count for its factors is at
4076 least 2, or just use this cached product as our next
4077 multiplicand if the minimum occurrence count is 1. */
4078 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4079 {
4080 if (rf1->repr && rf1->count > 0)
4081 break;
4082 }
4083
4084 if (j < vec_len)
4085 {
4086 power = rf1->count;
4087
4088 if (power == 1)
4089 {
4090 iter_result = rf1->repr;
4091
4092 if (dump_file && (dump_flags & TDF_DETAILS))
4093 {
4094 unsigned elt;
4095 repeat_factor_t rf;
4096 fputs ("Multiplying by cached product ", dump_file);
4097 for (elt = j; elt < vec_len; elt++)
4098 {
4099 rf = &repeat_factor_vec[elt];
4100 print_generic_expr (dump_file, rf->factor, 0);
4101 if (elt < vec_len - 1)
4102 fputs (" * ", dump_file);
4103 }
4104 fputs ("\n", dump_file);
4105 }
4106 }
4107 else
4108 {
4109 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4110 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4111 build_int_cst (integer_type_node,
4112 power));
4113 gimple_call_set_lhs (pow_stmt, iter_result);
4114 gimple_set_location (pow_stmt, gimple_location (stmt));
4115 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4116
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4118 {
4119 unsigned elt;
4120 repeat_factor_t rf;
4121 fputs ("Building __builtin_pow call for cached product (",
4122 dump_file);
4123 for (elt = j; elt < vec_len; elt++)
4124 {
4125 rf = &repeat_factor_vec[elt];
4126 print_generic_expr (dump_file, rf->factor, 0);
4127 if (elt < vec_len - 1)
4128 fputs (" * ", dump_file);
4129 }
4130 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4131 power);
4132 }
4133 }
4134 }
4135 else
4136 {
4137 /* Otherwise, find the first factor in the repeated factor
4138 vector whose occurrence count is at least 2. If no such
4139 factor exists, there are no builtin_powi opportunities
4140 remaining. */
4141 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4142 {
4143 if (rf1->count >= 2)
4144 break;
4145 }
4146
4147 if (j >= vec_len)
4148 break;
4149
4150 power = rf1->count;
4151
4152 if (dump_file && (dump_flags & TDF_DETAILS))
4153 {
4154 unsigned elt;
4155 repeat_factor_t rf;
4156 fputs ("Building __builtin_pow call for (", dump_file);
4157 for (elt = j; elt < vec_len; elt++)
4158 {
4159 rf = &repeat_factor_vec[elt];
4160 print_generic_expr (dump_file, rf->factor, 0);
4161 if (elt < vec_len - 1)
4162 fputs (" * ", dump_file);
4163 }
4164 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4165 }
4166
4167 reassociate_stats.pows_created++;
4168
4169 /* Visit each element of the vector in reverse order (so that
4170 high-occurrence elements are visited first, and within the
4171 same occurrence count, lower-ranked elements are visited
4172 first). Form a linear product of all elements in this order
4173 whose occurrencce count is at least that of element J.
4174 Record the SSA name representing the product of each element
4175 with all subsequent elements in the vector. */
4176 if (j == vec_len - 1)
4177 rf1->repr = rf1->factor;
4178 else
4179 {
4180 for (ii = vec_len - 2; ii >= (int)j; ii--)
4181 {
4182 tree op1, op2;
4183
4184 rf1 = &repeat_factor_vec[ii];
4185 rf2 = &repeat_factor_vec[ii + 1];
4186
4187 /* Init the last factor's representative to be itself. */
4188 if (!rf2->repr)
4189 rf2->repr = rf2->factor;
4190
4191 op1 = rf1->factor;
4192 op2 = rf2->repr;
4193
4194 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4195 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4196 target_ssa,
4197 op1, op2);
4198 gimple_set_location (mul_stmt, gimple_location (stmt));
4199 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4200 rf1->repr = target_ssa;
4201
4202 /* Don't reprocess the multiply we just introduced. */
4203 gimple_set_visited (mul_stmt, true);
4204 }
4205 }
4206
4207 /* Form a call to __builtin_powi for the maximum product
4208 just formed, raised to the power obtained earlier. */
4209 rf1 = &repeat_factor_vec[j];
4210 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4211 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4212 build_int_cst (integer_type_node,
4213 power));
4214 gimple_call_set_lhs (pow_stmt, iter_result);
4215 gimple_set_location (pow_stmt, gimple_location (stmt));
4216 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4217 }
4218
4219 /* If we previously formed at least one other builtin_powi call,
4220 form the product of this one and those others. */
4221 if (result)
4222 {
4223 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4224 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4225 result, iter_result);
4226 gimple_set_location (mul_stmt, gimple_location (stmt));
4227 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4228 gimple_set_visited (mul_stmt, true);
4229 result = new_result;
4230 }
4231 else
4232 result = iter_result;
4233
4234 /* Decrement the occurrence count of each element in the product
4235 by the count found above, and remove this many copies of each
4236 factor from OPS. */
4237 for (i = j; i < vec_len; i++)
4238 {
4239 unsigned k = power;
4240 unsigned n;
4241
4242 rf1 = &repeat_factor_vec[i];
4243 rf1->count -= power;
4244
4245 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4246 {
4247 if (oe->op == rf1->factor)
4248 {
4249 if (oe->count <= k)
4250 {
4251 ops->ordered_remove (n);
4252 k -= oe->count;
4253
4254 if (k == 0)
4255 break;
4256 }
4257 else
4258 {
4259 oe->count -= k;
4260 break;
4261 }
4262 }
4263 }
4264 }
4265 }
4266
4267 /* At this point all elements in the repeated factor vector have a
4268 remaining occurrence count of 0 or 1, and those with a count of 1
4269 don't have cached representatives. Re-sort the ops vector and
4270 clean up. */
4271 ops->qsort (sort_by_operand_rank);
4272 repeat_factor_vec.release ();
4273
4274 /* Return the final product computed herein. Note that there may
4275 still be some elements with single occurrence count left in OPS;
4276 those will be handled by the normal reassociation logic. */
4277 return result;
4278 }
4279
4280 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4281
4282 static void
4283 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4284 {
4285 tree rhs1;
4286
4287 if (dump_file && (dump_flags & TDF_DETAILS))
4288 {
4289 fprintf (dump_file, "Transforming ");
4290 print_gimple_stmt (dump_file, stmt, 0, 0);
4291 }
4292
4293 rhs1 = gimple_assign_rhs1 (stmt);
4294 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4295 update_stmt (stmt);
4296 remove_visited_stmt_chain (rhs1);
4297
4298 if (dump_file && (dump_flags & TDF_DETAILS))
4299 {
4300 fprintf (dump_file, " into ");
4301 print_gimple_stmt (dump_file, stmt, 0, 0);
4302 }
4303 }
4304
4305 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4306
4307 static void
4308 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4309 tree rhs1, tree rhs2)
4310 {
4311 if (dump_file && (dump_flags & TDF_DETAILS))
4312 {
4313 fprintf (dump_file, "Transforming ");
4314 print_gimple_stmt (dump_file, stmt, 0, 0);
4315 }
4316
4317 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4318 update_stmt (gsi_stmt (*gsi));
4319 remove_visited_stmt_chain (rhs1);
4320
4321 if (dump_file && (dump_flags & TDF_DETAILS))
4322 {
4323 fprintf (dump_file, " into ");
4324 print_gimple_stmt (dump_file, stmt, 0, 0);
4325 }
4326 }
4327
4328 /* Reassociate expressions in basic block BB and its post-dominator as
4329 children. */
4330
4331 static void
4332 reassociate_bb (basic_block bb)
4333 {
4334 gimple_stmt_iterator gsi;
4335 basic_block son;
4336 gimple stmt = last_stmt (bb);
4337
4338 if (stmt && !gimple_visited_p (stmt))
4339 maybe_optimize_range_tests (stmt);
4340
4341 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4342 {
4343 stmt = gsi_stmt (gsi);
4344
4345 if (is_gimple_assign (stmt)
4346 && !stmt_could_throw_p (stmt))
4347 {
4348 tree lhs, rhs1, rhs2;
4349 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4350
4351 /* If this is not a gimple binary expression, there is
4352 nothing for us to do with it. */
4353 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4354 continue;
4355
4356 /* If this was part of an already processed statement,
4357 we don't need to touch it again. */
4358 if (gimple_visited_p (stmt))
4359 {
4360 /* This statement might have become dead because of previous
4361 reassociations. */
4362 if (has_zero_uses (gimple_get_lhs (stmt)))
4363 {
4364 gsi_remove (&gsi, true);
4365 release_defs (stmt);
4366 /* We might end up removing the last stmt above which
4367 places the iterator to the end of the sequence.
4368 Reset it to the last stmt in this case which might
4369 be the end of the sequence as well if we removed
4370 the last statement of the sequence. In which case
4371 we need to bail out. */
4372 if (gsi_end_p (gsi))
4373 {
4374 gsi = gsi_last_bb (bb);
4375 if (gsi_end_p (gsi))
4376 break;
4377 }
4378 }
4379 continue;
4380 }
4381
4382 lhs = gimple_assign_lhs (stmt);
4383 rhs1 = gimple_assign_rhs1 (stmt);
4384 rhs2 = gimple_assign_rhs2 (stmt);
4385
4386 /* For non-bit or min/max operations we can't associate
4387 all types. Verify that here. */
4388 if (rhs_code != BIT_IOR_EXPR
4389 && rhs_code != BIT_AND_EXPR
4390 && rhs_code != BIT_XOR_EXPR
4391 && rhs_code != MIN_EXPR
4392 && rhs_code != MAX_EXPR
4393 && (!can_reassociate_p (lhs)
4394 || !can_reassociate_p (rhs1)
4395 || !can_reassociate_p (rhs2)))
4396 continue;
4397
4398 if (associative_tree_code (rhs_code))
4399 {
4400 vec<operand_entry_t> ops = vNULL;
4401 tree powi_result = NULL_TREE;
4402
4403 /* There may be no immediate uses left by the time we
4404 get here because we may have eliminated them all. */
4405 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4406 continue;
4407
4408 gimple_set_visited (stmt, true);
4409 linearize_expr_tree (&ops, stmt, true, true);
4410 ops.qsort (sort_by_operand_rank);
4411 optimize_ops_list (rhs_code, &ops);
4412 if (undistribute_ops_list (rhs_code, &ops,
4413 loop_containing_stmt (stmt)))
4414 {
4415 ops.qsort (sort_by_operand_rank);
4416 optimize_ops_list (rhs_code, &ops);
4417 }
4418
4419 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4420 optimize_range_tests (rhs_code, &ops);
4421
4422 if (first_pass_instance
4423 && rhs_code == MULT_EXPR
4424 && flag_unsafe_math_optimizations)
4425 powi_result = attempt_builtin_powi (stmt, &ops);
4426
4427 /* If the operand vector is now empty, all operands were
4428 consumed by the __builtin_powi optimization. */
4429 if (ops.length () == 0)
4430 transform_stmt_to_copy (&gsi, stmt, powi_result);
4431 else if (ops.length () == 1)
4432 {
4433 tree last_op = ops.last ()->op;
4434
4435 if (powi_result)
4436 transform_stmt_to_multiply (&gsi, stmt, last_op,
4437 powi_result);
4438 else
4439 transform_stmt_to_copy (&gsi, stmt, last_op);
4440 }
4441 else
4442 {
4443 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4444 int ops_num = ops.length ();
4445 int width = get_reassociation_width (ops_num, rhs_code, mode);
4446 tree new_lhs = lhs;
4447
4448 if (dump_file && (dump_flags & TDF_DETAILS))
4449 fprintf (dump_file,
4450 "Width = %d was chosen for reassociation\n", width);
4451
4452 if (width > 1
4453 && ops.length () > 3)
4454 rewrite_expr_tree_parallel (stmt, width, ops);
4455 else
4456 {
4457 /* When there are three operands left, we want
4458 to make sure the ones that get the double
4459 binary op are chosen wisely. */
4460 int len = ops.length ();
4461 if (len >= 3)
4462 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4463
4464 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4465 powi_result != NULL);
4466 }
4467
4468 /* If we combined some repeated factors into a
4469 __builtin_powi call, multiply that result by the
4470 reassociated operands. */
4471 if (powi_result)
4472 {
4473 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4474 tree type = TREE_TYPE (lhs);
4475 tree target_ssa = make_temp_ssa_name (type, NULL,
4476 "reassocpow");
4477 gimple_set_lhs (lhs_stmt, target_ssa);
4478 update_stmt (lhs_stmt);
4479 if (lhs != new_lhs)
4480 target_ssa = new_lhs;
4481 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4482 powi_result,
4483 target_ssa);
4484 gimple_set_location (mul_stmt, gimple_location (stmt));
4485 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4486 }
4487 }
4488
4489 ops.release ();
4490 }
4491 }
4492 }
4493 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4494 son;
4495 son = next_dom_son (CDI_POST_DOMINATORS, son))
4496 reassociate_bb (son);
4497 }
4498
4499 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4500 void debug_ops_vector (vec<operand_entry_t> ops);
4501
4502 /* Dump the operand entry vector OPS to FILE. */
4503
4504 void
4505 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4506 {
4507 operand_entry_t oe;
4508 unsigned int i;
4509
4510 FOR_EACH_VEC_ELT (ops, i, oe)
4511 {
4512 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4513 print_generic_expr (file, oe->op, 0);
4514 }
4515 }
4516
4517 /* Dump the operand entry vector OPS to STDERR. */
4518
4519 DEBUG_FUNCTION void
4520 debug_ops_vector (vec<operand_entry_t> ops)
4521 {
4522 dump_ops_vector (stderr, ops);
4523 }
4524
4525 static void
4526 do_reassoc (void)
4527 {
4528 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4529 reassociate_bb (EXIT_BLOCK_PTR);
4530 }
4531
4532 /* Initialize the reassociation pass. */
4533
4534 static void
4535 init_reassoc (void)
4536 {
4537 int i;
4538 long rank = 2;
4539 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4540
4541 /* Find the loops, so that we can prevent moving calculations in
4542 them. */
4543 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4544
4545 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4546
4547 operand_entry_pool = create_alloc_pool ("operand entry pool",
4548 sizeof (struct operand_entry), 30);
4549 next_operand_entry_id = 0;
4550
4551 /* Reverse RPO (Reverse Post Order) will give us something where
4552 deeper loops come later. */
4553 pre_and_rev_post_order_compute (NULL, bbs, false);
4554 bb_rank = XCNEWVEC (long, last_basic_block);
4555 operand_rank = pointer_map_create ();
4556
4557 /* Give each default definition a distinct rank. This includes
4558 parameters and the static chain. Walk backwards over all
4559 SSA names so that we get proper rank ordering according
4560 to tree_swap_operands_p. */
4561 for (i = num_ssa_names - 1; i > 0; --i)
4562 {
4563 tree name = ssa_name (i);
4564 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4565 insert_operand_rank (name, ++rank);
4566 }
4567
4568 /* Set up rank for each BB */
4569 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4570 bb_rank[bbs[i]] = ++rank << 16;
4571
4572 free (bbs);
4573 calculate_dominance_info (CDI_POST_DOMINATORS);
4574 plus_negates = vNULL;
4575 }
4576
4577 /* Cleanup after the reassociation pass, and print stats if
4578 requested. */
4579
4580 static void
4581 fini_reassoc (void)
4582 {
4583 statistics_counter_event (cfun, "Linearized",
4584 reassociate_stats.linearized);
4585 statistics_counter_event (cfun, "Constants eliminated",
4586 reassociate_stats.constants_eliminated);
4587 statistics_counter_event (cfun, "Ops eliminated",
4588 reassociate_stats.ops_eliminated);
4589 statistics_counter_event (cfun, "Statements rewritten",
4590 reassociate_stats.rewritten);
4591 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4592 reassociate_stats.pows_encountered);
4593 statistics_counter_event (cfun, "Built-in powi calls created",
4594 reassociate_stats.pows_created);
4595
4596 pointer_map_destroy (operand_rank);
4597 free_alloc_pool (operand_entry_pool);
4598 free (bb_rank);
4599 plus_negates.release ();
4600 free_dominance_info (CDI_POST_DOMINATORS);
4601 loop_optimizer_finalize ();
4602 }
4603
4604 /* Gate and execute functions for Reassociation. */
4605
4606 static unsigned int
4607 execute_reassoc (void)
4608 {
4609 init_reassoc ();
4610
4611 do_reassoc ();
4612 repropagate_negates ();
4613
4614 fini_reassoc ();
4615 return 0;
4616 }
4617
4618 static bool
4619 gate_tree_ssa_reassoc (void)
4620 {
4621 return flag_tree_reassoc != 0;
4622 }
4623
4624 namespace {
4625
4626 const pass_data pass_data_reassoc =
4627 {
4628 GIMPLE_PASS, /* type */
4629 "reassoc", /* name */
4630 OPTGROUP_NONE, /* optinfo_flags */
4631 true, /* has_gate */
4632 true, /* has_execute */
4633 TV_TREE_REASSOC, /* tv_id */
4634 ( PROP_cfg | PROP_ssa ), /* properties_required */
4635 0, /* properties_provided */
4636 0, /* properties_destroyed */
4637 0, /* todo_flags_start */
4638 ( TODO_verify_ssa
4639 | TODO_update_ssa_only_virtuals
4640 | TODO_verify_flow ), /* todo_flags_finish */
4641 };
4642
4643 class pass_reassoc : public gimple_opt_pass
4644 {
4645 public:
4646 pass_reassoc (gcc::context *ctxt)
4647 : gimple_opt_pass (pass_data_reassoc, ctxt)
4648 {}
4649
4650 /* opt_pass methods: */
4651 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4652 bool gate () { return gate_tree_ssa_reassoc (); }
4653 unsigned int execute () { return execute_reassoc (); }
4654
4655 }; // class pass_reassoc
4656
4657 } // anon namespace
4658
4659 gimple_opt_pass *
4660 make_pass_reassoc (gcc::context *ctxt)
4661 {
4662 return new pass_reassoc (ctxt);
4663 }