1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-inline.h"
33 #include "gimple-iterator.h"
34 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "tree-ssanames.h"
39 #include "tree-ssa-loop-niter.h"
40 #include "tree-ssa-loop.h"
43 #include "tree-iterator.h"
44 #include "tree-pass.h"
45 #include "alloc-pool.h"
47 #include "langhooks.h"
48 #include "pointer-set.h"
53 #include "diagnostic-core.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_t
69 3. Optimization of the operand lists, eliminating things like a +
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
106 You want to first merge all leaves of the same rank, as much as
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
160 newstmt = mergetmp + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
181 int constants_eliminated
;
184 int pows_encountered
;
188 /* Operator, rank pair. */
189 typedef struct operand_entry
197 static alloc_pool operand_entry_pool
;
199 /* This is used to assign a unique ID to each struct operand_entry
200 so that qsort results are identical on different hosts. */
201 static int next_operand_entry_id
;
203 /* Starting rank number for a given basic block, so that we can rank
204 operations using unmovable instructions in that BB based on the bb
206 static long *bb_rank
;
208 /* Operand->rank hashtable. */
209 static struct pointer_map_t
*operand_rank
;
212 static long get_rank (tree
);
215 /* Bias amount for loop-carried phis. We want this to be larger than
216 the depth of any reassociation tree we can see, but not larger than
217 the rank difference between two blocks. */
218 #define PHI_LOOP_BIAS (1 << 15)
220 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
221 an innermost loop, and the phi has only a single use which is inside
222 the loop, then the rank is the block rank of the loop latch plus an
223 extra bias for the loop-carried dependence. This causes expressions
224 calculated into an accumulator variable to be independent for each
225 iteration of the loop. If STMT is some other phi, the rank is the
226 block rank of its containing block. */
228 phi_rank (gimple stmt
)
230 basic_block bb
= gimple_bb (stmt
);
231 struct loop
*father
= bb
->loop_father
;
237 /* We only care about real loops (those with a latch). */
239 return bb_rank
[bb
->index
];
241 /* Interesting phis must be in headers of innermost loops. */
242 if (bb
!= father
->header
244 return bb_rank
[bb
->index
];
246 /* Ignore virtual SSA_NAMEs. */
247 res
= gimple_phi_result (stmt
);
248 if (virtual_operand_p (res
))
249 return bb_rank
[bb
->index
];
251 /* The phi definition must have a single use, and that use must be
252 within the loop. Otherwise this isn't an accumulator pattern. */
253 if (!single_imm_use (res
, &use
, &use_stmt
)
254 || gimple_bb (use_stmt
)->loop_father
!= father
)
255 return bb_rank
[bb
->index
];
257 /* Look for phi arguments from within the loop. If found, bias this phi. */
258 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
260 tree arg
= gimple_phi_arg_def (stmt
, i
);
261 if (TREE_CODE (arg
) == SSA_NAME
262 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
264 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
265 if (gimple_bb (def_stmt
)->loop_father
== father
)
266 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
270 /* Must be an uninteresting phi. */
271 return bb_rank
[bb
->index
];
274 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
275 loop-carried dependence of an innermost loop, return TRUE; else
278 loop_carried_phi (tree exp
)
283 if (TREE_CODE (exp
) != SSA_NAME
284 || SSA_NAME_IS_DEFAULT_DEF (exp
))
287 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
289 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
292 /* Non-loop-carried phis have block rank. Loop-carried phis have
293 an additional bias added in. If this phi doesn't have block rank,
294 it's biased and should not be propagated. */
295 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
297 if (phi_rank (phi_stmt
) != block_rank
)
303 /* Return the maximum of RANK and the rank that should be propagated
304 from expression OP. For most operands, this is just the rank of OP.
305 For loop-carried phis, the value is zero to avoid undoing the bias
306 in favor of the phi. */
308 propagate_rank (long rank
, tree op
)
312 if (loop_carried_phi (op
))
315 op_rank
= get_rank (op
);
317 return MAX (rank
, op_rank
);
320 /* Look up the operand rank structure for expression E. */
323 find_operand_rank (tree e
)
325 void **slot
= pointer_map_contains (operand_rank
, e
);
326 return slot
? (long) (intptr_t) *slot
: -1;
329 /* Insert {E,RANK} into the operand rank hashtable. */
332 insert_operand_rank (tree e
, long rank
)
335 gcc_assert (rank
> 0);
336 slot
= pointer_map_insert (operand_rank
, e
);
338 *slot
= (void *) (intptr_t) rank
;
341 /* Given an expression E, return the rank of the expression. */
346 /* Constants have rank 0. */
347 if (is_gimple_min_invariant (e
))
350 /* SSA_NAME's have the rank of the expression they are the result
352 For globals and uninitialized values, the rank is 0.
353 For function arguments, use the pre-setup rank.
354 For PHI nodes, stores, asm statements, etc, we use the rank of
356 For simple operations, the rank is the maximum rank of any of
357 its operands, or the bb_rank, whichever is less.
358 I make no claims that this is optimal, however, it gives good
361 /* We make an exception to the normal ranking system to break
362 dependences of accumulator variables in loops. Suppose we
363 have a simple one-block loop containing:
370 As shown, each iteration of the calculation into x is fully
371 dependent upon the iteration before it. We would prefer to
372 see this in the form:
379 If the loop is unrolled, the calculations of b and c from
380 different iterations can be interleaved.
382 To obtain this result during reassociation, we bias the rank
383 of the phi definition x_1 upward, when it is recognized as an
384 accumulator pattern. The artificial rank causes it to be
385 added last, providing the desired independence. */
387 if (TREE_CODE (e
) == SSA_NAME
)
394 if (SSA_NAME_IS_DEFAULT_DEF (e
))
395 return find_operand_rank (e
);
397 stmt
= SSA_NAME_DEF_STMT (e
);
398 if (gimple_code (stmt
) == GIMPLE_PHI
)
399 return phi_rank (stmt
);
401 if (!is_gimple_assign (stmt
)
402 || gimple_vdef (stmt
))
403 return bb_rank
[gimple_bb (stmt
)->index
];
405 /* If we already have a rank for this expression, use that. */
406 rank
= find_operand_rank (e
);
410 /* Otherwise, find the maximum rank for the operands. As an
411 exception, remove the bias from loop-carried phis when propagating
412 the rank so that dependent operations are not also biased. */
414 if (gimple_assign_single_p (stmt
))
416 tree rhs
= gimple_assign_rhs1 (stmt
);
417 n
= TREE_OPERAND_LENGTH (rhs
);
419 rank
= propagate_rank (rank
, rhs
);
422 for (i
= 0; i
< n
; i
++)
424 op
= TREE_OPERAND (rhs
, i
);
427 rank
= propagate_rank (rank
, op
);
433 n
= gimple_num_ops (stmt
);
434 for (i
= 1; i
< n
; i
++)
436 op
= gimple_op (stmt
, i
);
438 rank
= propagate_rank (rank
, op
);
442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
444 fprintf (dump_file
, "Rank for ");
445 print_generic_expr (dump_file
, e
, 0);
446 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
449 /* Note the rank in the hashtable so we don't recompute it. */
450 insert_operand_rank (e
, (rank
+ 1));
454 /* Globals, etc, are rank 0 */
459 /* We want integer ones to end up last no matter what, since they are
460 the ones we can do the most with. */
461 #define INTEGER_CONST_TYPE 1 << 3
462 #define FLOAT_CONST_TYPE 1 << 2
463 #define OTHER_CONST_TYPE 1 << 1
465 /* Classify an invariant tree into integer, float, or other, so that
466 we can sort them to be near other constants of the same type. */
468 constant_type (tree t
)
470 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
471 return INTEGER_CONST_TYPE
;
472 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
473 return FLOAT_CONST_TYPE
;
475 return OTHER_CONST_TYPE
;
478 /* qsort comparison function to sort operand entries PA and PB by rank
479 so that the sorted array is ordered by rank in decreasing order. */
481 sort_by_operand_rank (const void *pa
, const void *pb
)
483 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
484 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
486 /* It's nicer for optimize_expression if constants that are likely
487 to fold when added/multiplied//whatever are put next to each
488 other. Since all constants have rank 0, order them by type. */
489 if (oeb
->rank
== 0 && oea
->rank
== 0)
491 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
492 return constant_type (oeb
->op
) - constant_type (oea
->op
);
494 /* To make sorting result stable, we use unique IDs to determine
496 return oeb
->id
- oea
->id
;
499 /* Lastly, make sure the versions that are the same go next to each
500 other. We use SSA_NAME_VERSION because it's stable. */
501 if ((oeb
->rank
- oea
->rank
== 0)
502 && TREE_CODE (oea
->op
) == SSA_NAME
503 && TREE_CODE (oeb
->op
) == SSA_NAME
)
505 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
506 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
508 return oeb
->id
- oea
->id
;
511 if (oeb
->rank
!= oea
->rank
)
512 return oeb
->rank
- oea
->rank
;
514 return oeb
->id
- oea
->id
;
517 /* Add an operand entry to *OPS for the tree operand OP. */
520 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
522 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
525 oe
->rank
= get_rank (op
);
526 oe
->id
= next_operand_entry_id
++;
531 /* Add an operand entry to *OPS for the tree operand OP with repeat
535 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
536 HOST_WIDE_INT repeat
)
538 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
541 oe
->rank
= get_rank (op
);
542 oe
->id
= next_operand_entry_id
++;
546 reassociate_stats
.pows_encountered
++;
549 /* Return true if STMT is reassociable operation containing a binary
550 operation with tree code CODE, and is inside LOOP. */
553 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
555 basic_block bb
= gimple_bb (stmt
);
557 if (gimple_bb (stmt
) == NULL
)
560 if (!flow_bb_inside_loop_p (loop
, bb
))
563 if (is_gimple_assign (stmt
)
564 && gimple_assign_rhs_code (stmt
) == code
565 && has_single_use (gimple_assign_lhs (stmt
)))
572 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
573 operand of the negate operation. Otherwise, return NULL. */
576 get_unary_op (tree name
, enum tree_code opcode
)
578 gimple stmt
= SSA_NAME_DEF_STMT (name
);
580 if (!is_gimple_assign (stmt
))
583 if (gimple_assign_rhs_code (stmt
) == opcode
)
584 return gimple_assign_rhs1 (stmt
);
588 /* If CURR and LAST are a pair of ops that OPCODE allows us to
589 eliminate through equivalences, do so, remove them from OPS, and
590 return true. Otherwise, return false. */
593 eliminate_duplicate_pair (enum tree_code opcode
,
594 vec
<operand_entry_t
> *ops
,
597 operand_entry_t curr
,
598 operand_entry_t last
)
601 /* If we have two of the same op, and the opcode is & |, min, or max,
602 we can eliminate one of them.
603 If we have two of the same op, and the opcode is ^, we can
604 eliminate both of them. */
606 if (last
&& last
->op
== curr
->op
)
614 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
616 fprintf (dump_file
, "Equivalence: ");
617 print_generic_expr (dump_file
, curr
->op
, 0);
618 fprintf (dump_file
, " [&|minmax] ");
619 print_generic_expr (dump_file
, last
->op
, 0);
620 fprintf (dump_file
, " -> ");
621 print_generic_stmt (dump_file
, last
->op
, 0);
624 ops
->ordered_remove (i
);
625 reassociate_stats
.ops_eliminated
++;
630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
632 fprintf (dump_file
, "Equivalence: ");
633 print_generic_expr (dump_file
, curr
->op
, 0);
634 fprintf (dump_file
, " ^ ");
635 print_generic_expr (dump_file
, last
->op
, 0);
636 fprintf (dump_file
, " -> nothing\n");
639 reassociate_stats
.ops_eliminated
+= 2;
641 if (ops
->length () == 2)
644 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
649 ops
->ordered_remove (i
-1);
650 ops
->ordered_remove (i
-1);
662 static vec
<tree
> plus_negates
;
664 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
665 expression, look in OPS for a corresponding positive operation to cancel
666 it out. If we find one, remove the other from OPS, replace
667 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
671 eliminate_plus_minus_pair (enum tree_code opcode
,
672 vec
<operand_entry_t
> *ops
,
673 unsigned int currindex
,
674 operand_entry_t curr
)
681 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
684 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
685 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
686 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
689 /* Any non-negated version will have a rank that is one less than
690 the current rank. So once we hit those ranks, if we don't find
693 for (i
= currindex
+ 1;
694 ops
->iterate (i
, &oe
)
695 && oe
->rank
>= curr
->rank
- 1 ;
698 if (oe
->op
== negateop
)
701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
703 fprintf (dump_file
, "Equivalence: ");
704 print_generic_expr (dump_file
, negateop
, 0);
705 fprintf (dump_file
, " + -");
706 print_generic_expr (dump_file
, oe
->op
, 0);
707 fprintf (dump_file
, " -> 0\n");
710 ops
->ordered_remove (i
);
711 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
712 ops
->ordered_remove (currindex
);
713 reassociate_stats
.ops_eliminated
++;
717 else if (oe
->op
== notop
)
719 tree op_type
= TREE_TYPE (oe
->op
);
721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
723 fprintf (dump_file
, "Equivalence: ");
724 print_generic_expr (dump_file
, notop
, 0);
725 fprintf (dump_file
, " + ~");
726 print_generic_expr (dump_file
, oe
->op
, 0);
727 fprintf (dump_file
, " -> -1\n");
730 ops
->ordered_remove (i
);
731 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
732 ops
->ordered_remove (currindex
);
733 reassociate_stats
.ops_eliminated
++;
739 /* CURR->OP is a negate expr in a plus expr: save it for later
740 inspection in repropagate_negates(). */
741 if (negateop
!= NULL_TREE
)
742 plus_negates
.safe_push (curr
->op
);
747 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
748 bitwise not expression, look in OPS for a corresponding operand to
749 cancel it out. If we find one, remove the other from OPS, replace
750 OPS[CURRINDEX] with 0, and return true. Otherwise, return
754 eliminate_not_pairs (enum tree_code opcode
,
755 vec
<operand_entry_t
> *ops
,
756 unsigned int currindex
,
757 operand_entry_t curr
)
763 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
764 || TREE_CODE (curr
->op
) != SSA_NAME
)
767 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
768 if (notop
== NULL_TREE
)
771 /* Any non-not version will have a rank that is one less than
772 the current rank. So once we hit those ranks, if we don't find
775 for (i
= currindex
+ 1;
776 ops
->iterate (i
, &oe
)
777 && oe
->rank
>= curr
->rank
- 1;
782 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
784 fprintf (dump_file
, "Equivalence: ");
785 print_generic_expr (dump_file
, notop
, 0);
786 if (opcode
== BIT_AND_EXPR
)
787 fprintf (dump_file
, " & ~");
788 else if (opcode
== BIT_IOR_EXPR
)
789 fprintf (dump_file
, " | ~");
790 print_generic_expr (dump_file
, oe
->op
, 0);
791 if (opcode
== BIT_AND_EXPR
)
792 fprintf (dump_file
, " -> 0\n");
793 else if (opcode
== BIT_IOR_EXPR
)
794 fprintf (dump_file
, " -> -1\n");
797 if (opcode
== BIT_AND_EXPR
)
798 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
799 else if (opcode
== BIT_IOR_EXPR
)
800 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
801 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
803 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
805 ops
->quick_push (oe
);
813 /* Use constant value that may be present in OPS to try to eliminate
814 operands. Note that this function is only really used when we've
815 eliminated ops for other reasons, or merged constants. Across
816 single statements, fold already does all of this, plus more. There
817 is little point in duplicating logic, so I've only included the
818 identities that I could ever construct testcases to trigger. */
821 eliminate_using_constants (enum tree_code opcode
,
822 vec
<operand_entry_t
> *ops
)
824 operand_entry_t oelast
= ops
->last ();
825 tree type
= TREE_TYPE (oelast
->op
);
827 if (oelast
->rank
== 0
828 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
833 if (integer_zerop (oelast
->op
))
835 if (ops
->length () != 1)
837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
838 fprintf (dump_file
, "Found & 0, removing all other ops\n");
840 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
843 ops
->quick_push (oelast
);
847 else if (integer_all_onesp (oelast
->op
))
849 if (ops
->length () != 1)
851 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
852 fprintf (dump_file
, "Found & -1, removing\n");
854 reassociate_stats
.ops_eliminated
++;
859 if (integer_all_onesp (oelast
->op
))
861 if (ops
->length () != 1)
863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
864 fprintf (dump_file
, "Found | -1, removing all other ops\n");
866 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
869 ops
->quick_push (oelast
);
873 else if (integer_zerop (oelast
->op
))
875 if (ops
->length () != 1)
877 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
878 fprintf (dump_file
, "Found | 0, removing\n");
880 reassociate_stats
.ops_eliminated
++;
885 if (integer_zerop (oelast
->op
)
886 || (FLOAT_TYPE_P (type
)
887 && !HONOR_NANS (TYPE_MODE (type
))
888 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
889 && real_zerop (oelast
->op
)))
891 if (ops
->length () != 1)
893 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
894 fprintf (dump_file
, "Found * 0, removing all other ops\n");
896 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
898 ops
->quick_push (oelast
);
902 else if (integer_onep (oelast
->op
)
903 || (FLOAT_TYPE_P (type
)
904 && !HONOR_SNANS (TYPE_MODE (type
))
905 && real_onep (oelast
->op
)))
907 if (ops
->length () != 1)
909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
910 fprintf (dump_file
, "Found * 1, removing\n");
912 reassociate_stats
.ops_eliminated
++;
920 if (integer_zerop (oelast
->op
)
921 || (FLOAT_TYPE_P (type
)
922 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
923 && fold_real_zero_addition_p (type
, oelast
->op
,
924 opcode
== MINUS_EXPR
)))
926 if (ops
->length () != 1)
928 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
929 fprintf (dump_file
, "Found [|^+] 0, removing\n");
931 reassociate_stats
.ops_eliminated
++;
943 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
946 /* Structure for tracking and counting operands. */
947 typedef struct oecount_s
{
950 enum tree_code oecode
;
955 /* The heap for the oecount hashtable and the sorted list of operands. */
956 static vec
<oecount
> cvec
;
959 /* Oecount hashtable helpers. */
961 struct oecount_hasher
: typed_noop_remove
<void>
963 /* Note that this hash table stores integers, not pointers.
964 So, observe the casting in the member functions. */
965 typedef void value_type
;
966 typedef void compare_type
;
967 static inline hashval_t
hash (const value_type
*);
968 static inline bool equal (const value_type
*, const compare_type
*);
971 /* Hash function for oecount. */
974 oecount_hasher::hash (const value_type
*p
)
976 const oecount
*c
= &cvec
[(size_t)p
- 42];
977 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
980 /* Comparison function for oecount. */
983 oecount_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
985 const oecount
*c1
= &cvec
[(size_t)p1
- 42];
986 const oecount
*c2
= &cvec
[(size_t)p2
- 42];
987 return (c1
->oecode
== c2
->oecode
988 && c1
->op
== c2
->op
);
991 /* Comparison function for qsort sorting oecount elements by count. */
994 oecount_cmp (const void *p1
, const void *p2
)
996 const oecount
*c1
= (const oecount
*)p1
;
997 const oecount
*c2
= (const oecount
*)p2
;
998 if (c1
->cnt
!= c2
->cnt
)
999 return c1
->cnt
- c2
->cnt
;
1001 /* If counts are identical, use unique IDs to stabilize qsort. */
1002 return c1
->id
- c2
->id
;
1005 /* Return TRUE iff STMT represents a builtin call that raises OP
1006 to some exponent. */
1009 stmt_is_power_of_op (gimple stmt
, tree op
)
1013 if (!is_gimple_call (stmt
))
1016 fndecl
= gimple_call_fndecl (stmt
);
1019 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1022 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1024 CASE_FLT_FN (BUILT_IN_POW
):
1025 CASE_FLT_FN (BUILT_IN_POWI
):
1026 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1033 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1034 in place and return the result. Assumes that stmt_is_power_of_op
1035 was previously called for STMT and returned TRUE. */
1037 static HOST_WIDE_INT
1038 decrement_power (gimple stmt
)
1040 REAL_VALUE_TYPE c
, cint
;
1041 HOST_WIDE_INT power
;
1044 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1046 CASE_FLT_FN (BUILT_IN_POW
):
1047 arg1
= gimple_call_arg (stmt
, 1);
1048 c
= TREE_REAL_CST (arg1
);
1049 power
= real_to_integer (&c
) - 1;
1050 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1051 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1054 CASE_FLT_FN (BUILT_IN_POWI
):
1055 arg1
= gimple_call_arg (stmt
, 1);
1056 power
= TREE_INT_CST_LOW (arg1
) - 1;
1057 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1065 /* Find the single immediate use of STMT's LHS, and replace it
1066 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1067 replace *DEF with OP as well. */
1070 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1075 gimple_stmt_iterator gsi
;
1077 if (is_gimple_call (stmt
))
1078 lhs
= gimple_call_lhs (stmt
);
1080 lhs
= gimple_assign_lhs (stmt
);
1082 gcc_assert (has_single_use (lhs
));
1083 single_imm_use (lhs
, &use
, &use_stmt
);
1087 if (TREE_CODE (op
) != SSA_NAME
)
1088 update_stmt (use_stmt
);
1089 gsi
= gsi_for_stmt (stmt
);
1090 unlink_stmt_vdef (stmt
);
1091 gsi_remove (&gsi
, true);
1092 release_defs (stmt
);
1095 /* Walks the linear chain with result *DEF searching for an operation
1096 with operand OP and code OPCODE removing that from the chain. *DEF
1097 is updated if there is only one operand but no operation left. */
1100 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1102 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1108 if (opcode
== MULT_EXPR
1109 && stmt_is_power_of_op (stmt
, op
))
1111 if (decrement_power (stmt
) == 1)
1112 propagate_op_to_single_use (op
, stmt
, def
);
1116 name
= gimple_assign_rhs1 (stmt
);
1118 /* If this is the operation we look for and one of the operands
1119 is ours simply propagate the other operand into the stmts
1121 if (gimple_assign_rhs_code (stmt
) == opcode
1123 || gimple_assign_rhs2 (stmt
) == op
))
1126 name
= gimple_assign_rhs2 (stmt
);
1127 propagate_op_to_single_use (name
, stmt
, def
);
1131 /* We might have a multiply of two __builtin_pow* calls, and
1132 the operand might be hiding in the rightmost one. */
1133 if (opcode
== MULT_EXPR
1134 && gimple_assign_rhs_code (stmt
) == opcode
1135 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1136 && has_single_use (gimple_assign_rhs2 (stmt
)))
1138 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1139 if (stmt_is_power_of_op (stmt2
, op
))
1141 if (decrement_power (stmt2
) == 1)
1142 propagate_op_to_single_use (op
, stmt2
, def
);
1147 /* Continue walking the chain. */
1148 gcc_assert (name
!= op
1149 && TREE_CODE (name
) == SSA_NAME
);
1150 stmt
= SSA_NAME_DEF_STMT (name
);
1155 /* Returns true if statement S1 dominates statement S2. Like
1156 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1159 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1161 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1163 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1164 SSA_NAME. Assume it lives at the beginning of function and
1165 thus dominates everything. */
1166 if (!bb1
|| s1
== s2
)
1169 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1175 /* PHIs in the same basic block are assumed to be
1176 executed all in parallel, if only one stmt is a PHI,
1177 it dominates the other stmt in the same basic block. */
1178 if (gimple_code (s1
) == GIMPLE_PHI
)
1181 if (gimple_code (s2
) == GIMPLE_PHI
)
1184 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1186 if (gimple_uid (s1
) < gimple_uid (s2
))
1189 if (gimple_uid (s1
) > gimple_uid (s2
))
1192 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1193 unsigned int uid
= gimple_uid (s1
);
1194 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1196 gimple s
= gsi_stmt (gsi
);
1197 if (gimple_uid (s
) != uid
)
1206 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1209 /* Insert STMT after INSERT_POINT. */
1212 insert_stmt_after (gimple stmt
, gimple insert_point
)
1214 gimple_stmt_iterator gsi
;
1217 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1218 bb
= gimple_bb (insert_point
);
1219 else if (!stmt_ends_bb_p (insert_point
))
1221 gsi
= gsi_for_stmt (insert_point
);
1222 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1223 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1227 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1228 thus if it must end a basic block, it should be a call that can
1229 throw, or some assignment that can throw. If it throws, the LHS
1230 of it will not be initialized though, so only valid places using
1231 the SSA_NAME should be dominated by the fallthru edge. */
1232 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1233 gsi
= gsi_after_labels (bb
);
1234 if (gsi_end_p (gsi
))
1236 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1237 gimple_set_uid (stmt
,
1238 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1241 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1242 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1245 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1246 the result. Places the statement after the definition of either
1247 OP1 or OP2. Returns the new statement. */
1250 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1252 gimple op1def
= NULL
, op2def
= NULL
;
1253 gimple_stmt_iterator gsi
;
1257 /* Create the addition statement. */
1258 op
= make_ssa_name (type
, NULL
);
1259 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1261 /* Find an insertion place and insert. */
1262 if (TREE_CODE (op1
) == SSA_NAME
)
1263 op1def
= SSA_NAME_DEF_STMT (op1
);
1264 if (TREE_CODE (op2
) == SSA_NAME
)
1265 op2def
= SSA_NAME_DEF_STMT (op2
);
1266 if ((!op1def
|| gimple_nop_p (op1def
))
1267 && (!op2def
|| gimple_nop_p (op2def
)))
1269 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
1270 if (gsi_end_p (gsi
))
1272 gimple_stmt_iterator gsi2
1273 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR
));
1274 gimple_set_uid (sum
,
1275 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1278 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1279 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1283 gimple insert_point
;
1284 if ((!op1def
|| gimple_nop_p (op1def
))
1285 || (op2def
&& !gimple_nop_p (op2def
)
1286 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1287 insert_point
= op2def
;
1289 insert_point
= op1def
;
1290 insert_stmt_after (sum
, insert_point
);
1297 /* Perform un-distribution of divisions and multiplications.
1298 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1299 to (A + B) / X for real X.
1301 The algorithm is organized as follows.
1303 - First we walk the addition chain *OPS looking for summands that
1304 are defined by a multiplication or a real division. This results
1305 in the candidates bitmap with relevant indices into *OPS.
1307 - Second we build the chains of multiplications or divisions for
1308 these candidates, counting the number of occurrences of (operand, code)
1309 pairs in all of the candidates chains.
1311 - Third we sort the (operand, code) pairs by number of occurrence and
1312 process them starting with the pair with the most uses.
1314 * For each such pair we walk the candidates again to build a
1315 second candidate bitmap noting all multiplication/division chains
1316 that have at least one occurrence of (operand, code).
1318 * We build an alternate addition chain only covering these
1319 candidates with one (operand, code) operation removed from their
1320 multiplication/division chain.
1322 * The first candidate gets replaced by the alternate addition chain
1323 multiplied/divided by the operand.
1325 * All candidate chains get disabled for further processing and
1326 processing of (operand, code) pairs continues.
1328 The alternate addition chains built are re-processed by the main
1329 reassociation algorithm which allows optimizing a * x * y + b * y * x
1330 to (a + b ) * x * y in one invocation of the reassociation pass. */
1333 undistribute_ops_list (enum tree_code opcode
,
1334 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1336 unsigned int length
= ops
->length ();
1337 operand_entry_t oe1
;
1339 sbitmap candidates
, candidates2
;
1340 unsigned nr_candidates
, nr_candidates2
;
1341 sbitmap_iterator sbi0
;
1342 vec
<operand_entry_t
> *subops
;
1343 hash_table
<oecount_hasher
> ctable
;
1344 bool changed
= false;
1345 int next_oecount_id
= 0;
1348 || opcode
!= PLUS_EXPR
)
1351 /* Build a list of candidates to process. */
1352 candidates
= sbitmap_alloc (length
);
1353 bitmap_clear (candidates
);
1355 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1357 enum tree_code dcode
;
1360 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1362 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1363 if (!is_gimple_assign (oe1def
))
1365 dcode
= gimple_assign_rhs_code (oe1def
);
1366 if ((dcode
!= MULT_EXPR
1367 && dcode
!= RDIV_EXPR
)
1368 || !is_reassociable_op (oe1def
, dcode
, loop
))
1371 bitmap_set_bit (candidates
, i
);
1375 if (nr_candidates
< 2)
1377 sbitmap_free (candidates
);
1381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1383 fprintf (dump_file
, "searching for un-distribute opportunities ");
1384 print_generic_expr (dump_file
,
1385 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1386 fprintf (dump_file
, " %d\n", nr_candidates
);
1389 /* Build linearized sub-operand lists and the counting table. */
1392 /* ??? Macro arguments cannot have multi-argument template types in
1393 them. This typedef is needed to workaround that limitation. */
1394 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1395 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1396 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1399 enum tree_code oecode
;
1402 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1403 oecode
= gimple_assign_rhs_code (oedef
);
1404 linearize_expr_tree (&subops
[i
], oedef
,
1405 associative_tree_code (oecode
), false);
1407 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1414 c
.id
= next_oecount_id
++;
1417 idx
= cvec
.length () + 41;
1418 slot
= ctable
.find_slot ((void *)idx
, INSERT
);
1421 *slot
= (void *)idx
;
1426 cvec
[(size_t)*slot
- 42].cnt
++;
1432 /* Sort the counting table. */
1433 cvec
.qsort (oecount_cmp
);
1435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1438 fprintf (dump_file
, "Candidates:\n");
1439 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1441 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1442 c
->oecode
== MULT_EXPR
1443 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1444 print_generic_expr (dump_file
, c
->op
, 0);
1445 fprintf (dump_file
, "\n");
1449 /* Process the (operand, code) pairs in order of most occurrence. */
1450 candidates2
= sbitmap_alloc (length
);
1451 while (!cvec
.is_empty ())
1453 oecount
*c
= &cvec
.last ();
1457 /* Now collect the operands in the outer chain that contain
1458 the common operand in their inner chain. */
1459 bitmap_clear (candidates2
);
1461 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1464 enum tree_code oecode
;
1466 tree op
= (*ops
)[i
]->op
;
1468 /* If we undistributed in this chain already this may be
1470 if (TREE_CODE (op
) != SSA_NAME
)
1473 oedef
= SSA_NAME_DEF_STMT (op
);
1474 oecode
= gimple_assign_rhs_code (oedef
);
1475 if (oecode
!= c
->oecode
)
1478 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1480 if (oe1
->op
== c
->op
)
1482 bitmap_set_bit (candidates2
, i
);
1489 if (nr_candidates2
>= 2)
1491 operand_entry_t oe1
, oe2
;
1493 int first
= bitmap_first_set_bit (candidates2
);
1495 /* Build the new addition chain. */
1496 oe1
= (*ops
)[first
];
1497 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1499 fprintf (dump_file
, "Building (");
1500 print_generic_expr (dump_file
, oe1
->op
, 0);
1502 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1503 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1507 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1509 fprintf (dump_file
, " + ");
1510 print_generic_expr (dump_file
, oe2
->op
, 0);
1512 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1513 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1514 oe1
->op
, oe2
->op
, opcode
);
1515 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1517 oe1
->op
= gimple_get_lhs (sum
);
1520 /* Apply the multiplication/division. */
1521 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1522 oe1
->op
, c
->op
, c
->oecode
);
1523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1525 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1526 print_generic_expr (dump_file
, c
->op
, 0);
1527 fprintf (dump_file
, "\n");
1530 /* Record it in the addition chain and disable further
1531 undistribution with this op. */
1532 oe1
->op
= gimple_assign_lhs (prod
);
1533 oe1
->rank
= get_rank (oe1
->op
);
1534 subops
[first
].release ();
1542 for (i
= 0; i
< ops
->length (); ++i
)
1543 subops
[i
].release ();
1546 sbitmap_free (candidates
);
1547 sbitmap_free (candidates2
);
1552 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1553 expression, examine the other OPS to see if any of them are comparisons
1554 of the same values, which we may be able to combine or eliminate.
1555 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1558 eliminate_redundant_comparison (enum tree_code opcode
,
1559 vec
<operand_entry_t
> *ops
,
1560 unsigned int currindex
,
1561 operand_entry_t curr
)
1564 enum tree_code lcode
, rcode
;
1569 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1572 /* Check that CURR is a comparison. */
1573 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1575 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1576 if (!is_gimple_assign (def1
))
1578 lcode
= gimple_assign_rhs_code (def1
);
1579 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1581 op1
= gimple_assign_rhs1 (def1
);
1582 op2
= gimple_assign_rhs2 (def1
);
1584 /* Now look for a similar comparison in the remaining OPS. */
1585 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1589 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1591 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1592 if (!is_gimple_assign (def2
))
1594 rcode
= gimple_assign_rhs_code (def2
);
1595 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1598 /* If we got here, we have a match. See if we can combine the
1600 if (opcode
== BIT_IOR_EXPR
)
1601 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1602 rcode
, gimple_assign_rhs1 (def2
),
1603 gimple_assign_rhs2 (def2
));
1605 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1606 rcode
, gimple_assign_rhs1 (def2
),
1607 gimple_assign_rhs2 (def2
));
1611 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1612 always give us a boolean_type_node value back. If the original
1613 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1614 we need to convert. */
1615 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1616 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1618 if (TREE_CODE (t
) != INTEGER_CST
1619 && !operand_equal_p (t
, curr
->op
, 0))
1621 enum tree_code subcode
;
1622 tree newop1
, newop2
;
1623 if (!COMPARISON_CLASS_P (t
))
1625 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1626 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1627 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1628 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1632 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1634 fprintf (dump_file
, "Equivalence: ");
1635 print_generic_expr (dump_file
, curr
->op
, 0);
1636 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1637 print_generic_expr (dump_file
, oe
->op
, 0);
1638 fprintf (dump_file
, " -> ");
1639 print_generic_expr (dump_file
, t
, 0);
1640 fprintf (dump_file
, "\n");
1643 /* Now we can delete oe, as it has been subsumed by the new combined
1645 ops
->ordered_remove (i
);
1646 reassociate_stats
.ops_eliminated
++;
1648 /* If t is the same as curr->op, we're done. Otherwise we must
1649 replace curr->op with t. Special case is if we got a constant
1650 back, in which case we add it to the end instead of in place of
1651 the current entry. */
1652 if (TREE_CODE (t
) == INTEGER_CST
)
1654 ops
->ordered_remove (currindex
);
1655 add_to_ops_vec (ops
, t
);
1657 else if (!operand_equal_p (t
, curr
->op
, 0))
1660 enum tree_code subcode
;
1663 gcc_assert (COMPARISON_CLASS_P (t
));
1664 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1665 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1666 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1667 gcc_checking_assert (is_gimple_val (newop1
)
1668 && is_gimple_val (newop2
));
1669 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1670 curr
->op
= gimple_get_lhs (sum
);
1678 /* Perform various identities and other optimizations on the list of
1679 operand entries, stored in OPS. The tree code for the binary
1680 operation between all the operands is OPCODE. */
1683 optimize_ops_list (enum tree_code opcode
,
1684 vec
<operand_entry_t
> *ops
)
1686 unsigned int length
= ops
->length ();
1689 operand_entry_t oelast
= NULL
;
1690 bool iterate
= false;
1695 oelast
= ops
->last ();
1697 /* If the last two are constants, pop the constants off, merge them
1698 and try the next two. */
1699 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1701 operand_entry_t oelm1
= (*ops
)[length
- 2];
1703 if (oelm1
->rank
== 0
1704 && is_gimple_min_invariant (oelm1
->op
)
1705 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1706 TREE_TYPE (oelast
->op
)))
1708 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1709 oelm1
->op
, oelast
->op
);
1711 if (folded
&& is_gimple_min_invariant (folded
))
1713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1714 fprintf (dump_file
, "Merging constants\n");
1719 add_to_ops_vec (ops
, folded
);
1720 reassociate_stats
.constants_eliminated
++;
1722 optimize_ops_list (opcode
, ops
);
1728 eliminate_using_constants (opcode
, ops
);
1731 for (i
= 0; ops
->iterate (i
, &oe
);)
1735 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1737 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1738 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1739 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1751 length
= ops
->length ();
1752 oelast
= ops
->last ();
1755 optimize_ops_list (opcode
, ops
);
1758 /* The following functions are subroutines to optimize_range_tests and allow
1759 it to try to change a logical combination of comparisons into a range
1763 X == 2 || X == 5 || X == 3 || X == 4
1767 (unsigned) (X - 2) <= 3
1769 For more information see comments above fold_test_range in fold-const.c,
1770 this implementation is for GIMPLE. */
1778 bool strict_overflow_p
;
1779 unsigned int idx
, next
;
1782 /* This is similar to make_range in fold-const.c, but on top of
1783 GIMPLE instead of trees. If EXP is non-NULL, it should be
1784 an SSA_NAME and STMT argument is ignored, otherwise STMT
1785 argument should be a GIMPLE_COND. */
1788 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1792 bool is_bool
, strict_overflow_p
;
1796 r
->strict_overflow_p
= false;
1798 r
->high
= NULL_TREE
;
1799 if (exp
!= NULL_TREE
1800 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1803 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1804 and see if we can refine the range. Some of the cases below may not
1805 happen, but it doesn't seem worth worrying about this. We "continue"
1806 the outer loop when we've changed something; otherwise we "break"
1807 the switch, which will "break" the while. */
1808 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1811 strict_overflow_p
= false;
1813 if (exp
== NULL_TREE
)
1815 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1817 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1822 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1827 enum tree_code code
;
1828 tree arg0
, arg1
, exp_type
;
1832 if (exp
!= NULL_TREE
)
1834 if (TREE_CODE (exp
) != SSA_NAME
)
1837 stmt
= SSA_NAME_DEF_STMT (exp
);
1838 if (!is_gimple_assign (stmt
))
1841 code
= gimple_assign_rhs_code (stmt
);
1842 arg0
= gimple_assign_rhs1 (stmt
);
1843 arg1
= gimple_assign_rhs2 (stmt
);
1844 exp_type
= TREE_TYPE (exp
);
1848 code
= gimple_cond_code (stmt
);
1849 arg0
= gimple_cond_lhs (stmt
);
1850 arg1
= gimple_cond_rhs (stmt
);
1851 exp_type
= boolean_type_node
;
1854 if (TREE_CODE (arg0
) != SSA_NAME
)
1856 loc
= gimple_location (stmt
);
1860 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1861 /* Ensure the range is either +[-,0], +[0,0],
1862 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1863 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1864 or similar expression of unconditional true or
1865 false, it should not be negated. */
1866 && ((high
&& integer_zerop (high
))
1867 || (low
&& integer_onep (low
))))
1880 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1882 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1887 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1902 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1904 &strict_overflow_p
);
1905 if (nexp
!= NULL_TREE
)
1908 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1921 r
->strict_overflow_p
= strict_overflow_p
;
1925 /* Comparison function for qsort. Sort entries
1926 without SSA_NAME exp first, then with SSA_NAMEs sorted
1927 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1928 by increasing ->low and if ->low is the same, by increasing
1929 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1933 range_entry_cmp (const void *a
, const void *b
)
1935 const struct range_entry
*p
= (const struct range_entry
*) a
;
1936 const struct range_entry
*q
= (const struct range_entry
*) b
;
1938 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1940 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1942 /* Group range_entries for the same SSA_NAME together. */
1943 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1945 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1947 /* If ->low is different, NULL low goes first, then by
1949 if (p
->low
!= NULL_TREE
)
1951 if (q
->low
!= NULL_TREE
)
1953 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1955 if (tem
&& integer_onep (tem
))
1957 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1959 if (tem
&& integer_onep (tem
))
1965 else if (q
->low
!= NULL_TREE
)
1967 /* If ->high is different, NULL high goes last, before that by
1969 if (p
->high
!= NULL_TREE
)
1971 if (q
->high
!= NULL_TREE
)
1973 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1975 if (tem
&& integer_onep (tem
))
1977 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1979 if (tem
&& integer_onep (tem
))
1985 else if (p
->high
!= NULL_TREE
)
1987 /* If both ranges are the same, sort below by ascending idx. */
1992 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1995 if (p
->idx
< q
->idx
)
1999 gcc_checking_assert (p
->idx
> q
->idx
);
2004 /* Helper routine of optimize_range_test.
2005 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2006 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2007 OPCODE and OPS are arguments of optimize_range_tests. Return
2008 true if the range merge has been successful.
2009 If OPCODE is ERROR_MARK, this is called from within
2010 maybe_optimize_range_tests and is performing inter-bb range optimization.
2011 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2015 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2016 unsigned int count
, enum tree_code opcode
,
2017 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
2018 tree low
, tree high
, bool strict_overflow_p
)
2020 operand_entry_t oe
= (*ops
)[range
->idx
];
2022 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) : last_stmt (BASIC_BLOCK (oe
->id
));
2023 location_t loc
= gimple_location (stmt
);
2024 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2025 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
2026 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2027 gimple_stmt_iterator gsi
;
2029 if (tem
== NULL_TREE
)
2032 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2033 warning_at (loc
, OPT_Wstrict_overflow
,
2034 "assuming signed overflow does not occur "
2035 "when simplifying range test");
2037 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2039 struct range_entry
*r
;
2040 fprintf (dump_file
, "Optimizing range tests ");
2041 print_generic_expr (dump_file
, range
->exp
, 0);
2042 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2043 print_generic_expr (dump_file
, range
->low
, 0);
2044 fprintf (dump_file
, ", ");
2045 print_generic_expr (dump_file
, range
->high
, 0);
2046 fprintf (dump_file
, "]");
2047 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
2049 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2050 print_generic_expr (dump_file
, r
->low
, 0);
2051 fprintf (dump_file
, ", ");
2052 print_generic_expr (dump_file
, r
->high
, 0);
2053 fprintf (dump_file
, "]");
2055 fprintf (dump_file
, "\n into ");
2056 print_generic_expr (dump_file
, tem
, 0);
2057 fprintf (dump_file
, "\n");
2060 if (opcode
== BIT_IOR_EXPR
2061 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2062 tem
= invert_truthvalue_loc (loc
, tem
);
2064 tem
= fold_convert_loc (loc
, optype
, tem
);
2065 gsi
= gsi_for_stmt (stmt
);
2066 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2068 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2069 if (gimple_uid (gsi_stmt (gsi
)))
2072 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2079 range
->strict_overflow_p
= false;
2081 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2083 oe
= (*ops
)[range
->idx
];
2084 /* Now change all the other range test immediate uses, so that
2085 those tests will be optimized away. */
2086 if (opcode
== ERROR_MARK
)
2089 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2090 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2092 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2093 ? boolean_false_node
: boolean_true_node
);
2096 oe
->op
= error_mark_node
;
2097 range
->exp
= NULL_TREE
;
2102 /* Optimize X == CST1 || X == CST2
2103 if popcount (CST1 ^ CST2) == 1 into
2104 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2105 Similarly for ranges. E.g.
2106 X != 2 && X != 3 && X != 10 && X != 11
2107 will be transformed by the previous optimization into
2108 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2109 and this loop can transform that into
2110 !(((X & ~8) - 2U) <= 1U). */
2113 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2114 tree lowi
, tree lowj
, tree highi
, tree highj
,
2115 vec
<operand_entry_t
> *ops
,
2116 struct range_entry
*rangei
,
2117 struct range_entry
*rangej
)
2119 tree lowxor
, highxor
, tem
, exp
;
2120 /* Check highi ^ lowi == highj ^ lowj and
2121 popcount (highi ^ lowi) == 1. */
2122 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2123 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2125 if (tree_log2 (lowxor
) < 0)
2127 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2128 if (!tree_int_cst_equal (lowxor
, highxor
))
2131 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2132 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2133 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2134 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2135 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, exp
,
2136 rangei
->in_p
, lowj
, highj
,
2137 rangei
->strict_overflow_p
2138 || rangej
->strict_overflow_p
))
2143 /* Optimize X == CST1 || X == CST2
2144 if popcount (CST2 - CST1) == 1 into
2145 ((X - CST1) & ~(CST2 - CST1)) == 0.
2146 Similarly for ranges. E.g.
2147 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2148 || X == 75 || X == 45
2149 will be transformed by the previous optimization into
2150 (X - 43U) <= 3U || (X - 75U) <= 3U
2151 and this loop can transform that into
2152 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2154 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2155 tree lowi
, tree lowj
, tree highi
, tree highj
,
2156 vec
<operand_entry_t
> *ops
,
2157 struct range_entry
*rangei
,
2158 struct range_entry
*rangej
)
2160 tree tem1
, tem2
, mask
;
2161 /* Check highi - lowi == highj - lowj. */
2162 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2163 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2165 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2166 if (!tree_int_cst_equal (tem1
, tem2
))
2168 /* Check popcount (lowj - lowi) == 1. */
2169 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2170 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2172 if (tree_log2 (tem1
) < 0)
2175 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2176 tem1
= fold_binary (MINUS_EXPR
, type
, rangei
->exp
, lowi
);
2177 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2178 lowj
= build_int_cst (type
, 0);
2179 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, tem1
,
2180 rangei
->in_p
, lowj
, tem2
,
2181 rangei
->strict_overflow_p
2182 || rangej
->strict_overflow_p
))
2187 /* It does some common checks for function optimize_range_tests_xor and
2188 optimize_range_tests_diff.
2189 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2190 Else it calls optimize_range_tests_diff. */
2193 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2194 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2195 struct range_entry
*ranges
)
2198 bool any_changes
= false;
2199 for (i
= first
; i
< length
; i
++)
2201 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2203 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2205 type
= TREE_TYPE (ranges
[i
].exp
);
2206 if (!INTEGRAL_TYPE_P (type
))
2208 lowi
= ranges
[i
].low
;
2209 if (lowi
== NULL_TREE
)
2210 lowi
= TYPE_MIN_VALUE (type
);
2211 highi
= ranges
[i
].high
;
2212 if (highi
== NULL_TREE
)
2214 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2217 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2219 lowj
= ranges
[j
].low
;
2220 if (lowj
== NULL_TREE
)
2222 highj
= ranges
[j
].high
;
2223 if (highj
== NULL_TREE
)
2224 highj
= TYPE_MAX_VALUE (type
);
2225 /* Check lowj > highi. */
2226 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2228 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2231 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2233 ranges
+ i
, ranges
+ j
);
2235 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2237 ranges
+ i
, ranges
+ j
);
2248 /* Optimize range tests, similarly how fold_range_test optimizes
2249 it on trees. The tree code for the binary
2250 operation between all the operands is OPCODE.
2251 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2252 maybe_optimize_range_tests for inter-bb range optimization.
2253 In that case if oe->op is NULL, oe->id is bb->index whose
2254 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2255 the actual opcode. */
2258 optimize_range_tests (enum tree_code opcode
,
2259 vec
<operand_entry_t
> *ops
)
2261 unsigned int length
= ops
->length (), i
, j
, first
;
2263 struct range_entry
*ranges
;
2264 bool any_changes
= false;
2269 ranges
= XNEWVEC (struct range_entry
, length
);
2270 for (i
= 0; i
< length
; i
++)
2274 init_range_entry (ranges
+ i
, oe
->op
,
2275 oe
->op
? NULL
: last_stmt (BASIC_BLOCK (oe
->id
)));
2276 /* For | invert it now, we will invert it again before emitting
2277 the optimized expression. */
2278 if (opcode
== BIT_IOR_EXPR
2279 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2280 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2283 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2284 for (i
= 0; i
< length
; i
++)
2285 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2288 /* Try to merge ranges. */
2289 for (first
= i
; i
< length
; i
++)
2291 tree low
= ranges
[i
].low
;
2292 tree high
= ranges
[i
].high
;
2293 int in_p
= ranges
[i
].in_p
;
2294 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2295 int update_fail_count
= 0;
2297 for (j
= i
+ 1; j
< length
; j
++)
2299 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2301 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2302 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2304 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2310 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2311 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2317 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2318 while update_range_test would fail. */
2319 else if (update_fail_count
== 64)
2322 ++update_fail_count
;
2325 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2328 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2329 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2332 if (any_changes
&& opcode
!= ERROR_MARK
)
2335 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2337 if (oe
->op
== error_mark_node
)
2346 XDELETEVEC (ranges
);
2350 /* Return true if STMT is a cast like:
2356 # _345 = PHI <_123(N), 1(...), 1(...)>
2357 where _234 has bool type, _123 has single use and
2358 bb N has a single successor M. This is commonly used in
2359 the last block of a range test. */
2362 final_range_test_p (gimple stmt
)
2364 basic_block bb
, rhs_bb
;
2367 use_operand_p use_p
;
2370 if (!gimple_assign_cast_p (stmt
))
2372 bb
= gimple_bb (stmt
);
2373 if (!single_succ_p (bb
))
2375 e
= single_succ_edge (bb
);
2376 if (e
->flags
& EDGE_COMPLEX
)
2379 lhs
= gimple_assign_lhs (stmt
);
2380 rhs
= gimple_assign_rhs1 (stmt
);
2381 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2382 || TREE_CODE (rhs
) != SSA_NAME
2383 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2386 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2387 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2390 if (gimple_code (use_stmt
) != GIMPLE_PHI
2391 || gimple_bb (use_stmt
) != e
->dest
)
2394 /* And that the rhs is defined in the same loop. */
2395 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2397 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2403 /* Return true if BB is suitable basic block for inter-bb range test
2404 optimization. If BACKWARD is true, BB should be the only predecessor
2405 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2406 or compared with to find a common basic block to which all conditions
2407 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2408 be the only predecessor of BB. */
2411 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2414 edge_iterator ei
, ei2
;
2417 gimple_stmt_iterator gsi
;
2418 bool other_edge_seen
= false;
2423 /* Check last stmt first. */
2424 stmt
= last_stmt (bb
);
2426 || (gimple_code (stmt
) != GIMPLE_COND
2427 && (backward
|| !final_range_test_p (stmt
)))
2428 || gimple_visited_p (stmt
)
2429 || stmt_could_throw_p (stmt
)
2432 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2435 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2436 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2437 to *OTHER_BB (if not set yet, try to find it out). */
2438 if (EDGE_COUNT (bb
->succs
) != 2)
2440 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2442 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2444 if (e
->dest
== test_bb
)
2453 if (*other_bb
== NULL
)
2455 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2456 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2458 else if (e
->dest
== e2
->dest
)
2459 *other_bb
= e
->dest
;
2460 if (*other_bb
== NULL
)
2463 if (e
->dest
== *other_bb
)
2464 other_edge_seen
= true;
2468 if (*other_bb
== NULL
|| !other_edge_seen
)
2471 else if (single_succ (bb
) != *other_bb
)
2474 /* Now check all PHIs of *OTHER_BB. */
2475 e
= find_edge (bb
, *other_bb
);
2476 e2
= find_edge (test_bb
, *other_bb
);
2477 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2479 gimple phi
= gsi_stmt (gsi
);
2480 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2481 corresponding to BB and TEST_BB predecessor must be the same. */
2482 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2483 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2485 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2486 one of the PHIs should have the lhs of the last stmt in
2487 that block as PHI arg and that PHI should have 0 or 1
2488 corresponding to it in all other range test basic blocks
2492 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2493 == gimple_assign_lhs (stmt
)
2494 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2495 || integer_onep (gimple_phi_arg_def (phi
,
2501 gimple test_last
= last_stmt (test_bb
);
2502 if (gimple_code (test_last
) != GIMPLE_COND
2503 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2504 == gimple_assign_lhs (test_last
)
2505 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2506 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2516 /* Return true if BB doesn't have side-effects that would disallow
2517 range test optimization, all SSA_NAMEs set in the bb are consumed
2518 in the bb and there are no PHIs. */
2521 no_side_effect_bb (basic_block bb
)
2523 gimple_stmt_iterator gsi
;
2526 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2528 last
= last_stmt (bb
);
2529 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2531 gimple stmt
= gsi_stmt (gsi
);
2533 imm_use_iterator imm_iter
;
2534 use_operand_p use_p
;
2536 if (is_gimple_debug (stmt
))
2538 if (gimple_has_side_effects (stmt
))
2542 if (!is_gimple_assign (stmt
))
2544 lhs
= gimple_assign_lhs (stmt
);
2545 if (TREE_CODE (lhs
) != SSA_NAME
)
2547 if (gimple_assign_rhs_could_trap_p (stmt
))
2549 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2551 gimple use_stmt
= USE_STMT (use_p
);
2552 if (is_gimple_debug (use_stmt
))
2554 if (gimple_bb (use_stmt
) != bb
)
2561 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2562 return true and fill in *OPS recursively. */
2565 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2568 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2572 if (!is_reassociable_op (stmt
, code
, loop
))
2575 rhs
[0] = gimple_assign_rhs1 (stmt
);
2576 rhs
[1] = gimple_assign_rhs2 (stmt
);
2577 gimple_set_visited (stmt
, true);
2578 for (i
= 0; i
< 2; i
++)
2579 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2580 && !get_ops (rhs
[i
], code
, ops
, loop
)
2581 && has_single_use (rhs
[i
]))
2583 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2589 ops
->safe_push (oe
);
2594 /* Find the ops that were added by get_ops starting from VAR, see if
2595 they were changed during update_range_test and if yes, create new
2599 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2600 unsigned int *pidx
, struct loop
*loop
)
2602 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2606 if (!is_reassociable_op (stmt
, code
, loop
))
2609 rhs
[0] = gimple_assign_rhs1 (stmt
);
2610 rhs
[1] = gimple_assign_rhs2 (stmt
);
2613 for (i
= 0; i
< 2; i
++)
2614 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2616 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2617 if (rhs
[2 + i
] == NULL_TREE
)
2619 if (has_single_use (rhs
[i
]))
2620 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2622 rhs
[2 + i
] = rhs
[i
];
2625 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2626 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2628 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2629 var
= make_ssa_name (TREE_TYPE (var
), NULL
);
2630 gimple g
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
2631 var
, rhs
[2], rhs
[3]);
2632 gimple_set_uid (g
, gimple_uid (stmt
));
2633 gimple_set_visited (g
, true);
2634 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2639 /* Structure to track the initial value passed to get_ops and
2640 the range in the ops vector for each basic block. */
2642 struct inter_bb_range_test_entry
2645 unsigned int first_idx
, last_idx
;
2648 /* Inter-bb range test optimization. */
2651 maybe_optimize_range_tests (gimple stmt
)
2653 basic_block first_bb
= gimple_bb (stmt
);
2654 basic_block last_bb
= first_bb
;
2655 basic_block other_bb
= NULL
;
2659 vec
<operand_entry_t
> ops
= vNULL
;
2660 vec
<inter_bb_range_test_entry
> bbinfo
= vNULL
;
2661 bool any_changes
= false;
2663 /* Consider only basic blocks that end with GIMPLE_COND or
2664 a cast statement satisfying final_range_test_p. All
2665 but the last bb in the first_bb .. last_bb range
2666 should end with GIMPLE_COND. */
2667 if (gimple_code (stmt
) == GIMPLE_COND
)
2669 if (EDGE_COUNT (first_bb
->succs
) != 2)
2672 else if (final_range_test_p (stmt
))
2673 other_bb
= single_succ (first_bb
);
2677 if (stmt_could_throw_p (stmt
))
2680 /* As relative ordering of post-dominator sons isn't fixed,
2681 maybe_optimize_range_tests can be called first on any
2682 bb in the range we want to optimize. So, start searching
2683 backwards, if first_bb can be set to a predecessor. */
2684 while (single_pred_p (first_bb
))
2686 basic_block pred_bb
= single_pred (first_bb
);
2687 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2689 if (!no_side_effect_bb (first_bb
))
2693 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2694 Before starting forward search in last_bb successors, find
2695 out the other_bb. */
2696 if (first_bb
== last_bb
)
2699 /* As non-GIMPLE_COND last stmt always terminates the range,
2700 if forward search didn't discover anything, just give up. */
2701 if (gimple_code (stmt
) != GIMPLE_COND
)
2703 /* Look at both successors. Either it ends with a GIMPLE_COND
2704 and satisfies suitable_cond_bb, or ends with a cast and
2705 other_bb is that cast's successor. */
2706 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2707 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2708 || e
->dest
== first_bb
)
2710 else if (single_pred_p (e
->dest
))
2712 stmt
= last_stmt (e
->dest
);
2714 && gimple_code (stmt
) == GIMPLE_COND
2715 && EDGE_COUNT (e
->dest
->succs
) == 2)
2717 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2723 && final_range_test_p (stmt
)
2724 && find_edge (first_bb
, single_succ (e
->dest
)))
2726 other_bb
= single_succ (e
->dest
);
2727 if (other_bb
== first_bb
)
2731 if (other_bb
== NULL
)
2734 /* Now do the forward search, moving last_bb to successor bbs
2735 that aren't other_bb. */
2736 while (EDGE_COUNT (last_bb
->succs
) == 2)
2738 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2739 if (e
->dest
!= other_bb
)
2743 if (!single_pred_p (e
->dest
))
2745 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2747 if (!no_side_effect_bb (e
->dest
))
2751 if (first_bb
== last_bb
)
2753 /* Here basic blocks first_bb through last_bb's predecessor
2754 end with GIMPLE_COND, all of them have one of the edges to
2755 other_bb and another to another block in the range,
2756 all blocks except first_bb don't have side-effects and
2757 last_bb ends with either GIMPLE_COND, or cast satisfying
2758 final_range_test_p. */
2759 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2761 enum tree_code code
;
2763 inter_bb_range_test_entry bb_ent
;
2765 bb_ent
.op
= NULL_TREE
;
2766 bb_ent
.first_idx
= ops
.length ();
2767 bb_ent
.last_idx
= bb_ent
.first_idx
;
2768 e
= find_edge (bb
, other_bb
);
2769 stmt
= last_stmt (bb
);
2770 gimple_set_visited (stmt
, true);
2771 if (gimple_code (stmt
) != GIMPLE_COND
)
2773 use_operand_p use_p
;
2778 lhs
= gimple_assign_lhs (stmt
);
2779 rhs
= gimple_assign_rhs1 (stmt
);
2780 gcc_assert (bb
== last_bb
);
2787 # _345 = PHI <_123(N), 1(...), 1(...)>
2789 or 0 instead of 1. If it is 0, the _234
2790 range test is anded together with all the
2791 other range tests, if it is 1, it is ored with
2793 single_imm_use (lhs
, &use_p
, &phi
);
2794 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2795 e2
= find_edge (first_bb
, other_bb
);
2797 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2798 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2799 code
= BIT_AND_EXPR
;
2802 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2803 code
= BIT_IOR_EXPR
;
2806 /* If _234 SSA_NAME_DEF_STMT is
2808 (or &, corresponding to 1/0 in the phi arguments,
2809 push into ops the individual range test arguments
2810 of the bitwise or resp. and, recursively. */
2811 if (!get_ops (rhs
, code
, &ops
,
2812 loop_containing_stmt (stmt
))
2813 && has_single_use (rhs
))
2815 /* Otherwise, push the _234 range test itself. */
2817 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2827 bb_ent
.last_idx
= ops
.length ();
2829 bbinfo
.safe_push (bb_ent
);
2832 /* Otherwise stmt is GIMPLE_COND. */
2833 code
= gimple_cond_code (stmt
);
2834 lhs
= gimple_cond_lhs (stmt
);
2835 rhs
= gimple_cond_rhs (stmt
);
2836 if (TREE_CODE (lhs
) == SSA_NAME
2837 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2838 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2839 || rhs
!= boolean_false_node
2840 /* Either push into ops the individual bitwise
2841 or resp. and operands, depending on which
2842 edge is other_bb. */
2843 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2844 ^ (code
== EQ_EXPR
))
2845 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2846 loop_containing_stmt (stmt
))))
2848 /* Or push the GIMPLE_COND stmt itself. */
2850 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2853 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2854 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2855 /* oe->op = NULL signs that there is no SSA_NAME
2856 for the range test, and oe->id instead is the
2857 basic block number, at which's end the GIMPLE_COND
2865 else if (ops
.length () > bb_ent
.first_idx
)
2868 bb_ent
.last_idx
= ops
.length ();
2870 bbinfo
.safe_push (bb_ent
);
2874 if (ops
.length () > 1)
2875 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
2879 /* update_ops relies on has_single_use predicates returning the
2880 same values as it did during get_ops earlier. Additionally it
2881 never removes statements, only adds new ones and it should walk
2882 from the single imm use and check the predicate already before
2883 making those changes.
2884 On the other side, the handling of GIMPLE_COND directly can turn
2885 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2886 it needs to be done in a separate loop afterwards. */
2887 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2889 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2890 && bbinfo
[idx
].op
!= NULL_TREE
)
2894 stmt
= last_stmt (bb
);
2895 new_op
= update_ops (bbinfo
[idx
].op
,
2897 ops
[bbinfo
[idx
].first_idx
]->rank
,
2898 ops
, &bbinfo
[idx
].first_idx
,
2899 loop_containing_stmt (stmt
));
2900 if (new_op
== NULL_TREE
)
2902 gcc_assert (bb
== last_bb
);
2903 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
2905 if (bbinfo
[idx
].op
!= new_op
)
2907 imm_use_iterator iter
;
2908 use_operand_p use_p
;
2909 gimple use_stmt
, cast_stmt
= NULL
;
2911 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
2912 if (is_gimple_debug (use_stmt
))
2914 else if (gimple_code (use_stmt
) == GIMPLE_COND
2915 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2916 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2917 SET_USE (use_p
, new_op
);
2918 else if (gimple_assign_cast_p (use_stmt
))
2919 cast_stmt
= use_stmt
;
2924 gcc_assert (bb
== last_bb
);
2925 tree lhs
= gimple_assign_lhs (cast_stmt
);
2926 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
2927 enum tree_code rhs_code
2928 = gimple_assign_rhs_code (cast_stmt
);
2930 = gimple_build_assign_with_ops (rhs_code
, new_lhs
,
2932 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
2933 gimple_set_uid (g
, gimple_uid (cast_stmt
));
2934 gimple_set_visited (g
, true);
2935 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2936 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2937 if (is_gimple_debug (use_stmt
))
2939 else if (gimple_code (use_stmt
) == GIMPLE_COND
2940 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2941 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2942 SET_USE (use_p
, new_lhs
);
2951 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2953 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2954 && bbinfo
[idx
].op
== NULL_TREE
2955 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
2957 stmt
= last_stmt (bb
);
2958 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
2959 gimple_cond_make_false (stmt
);
2960 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
2961 gimple_cond_make_true (stmt
);
2964 gimple_cond_set_code (stmt
, NE_EXPR
);
2965 gimple_cond_set_lhs (stmt
, ops
[bbinfo
[idx
].first_idx
]->op
);
2966 gimple_cond_set_rhs (stmt
, boolean_false_node
);
2978 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2979 of STMT in it's operands. This is also known as a "destructive
2980 update" operation. */
2983 is_phi_for_stmt (gimple stmt
, tree operand
)
2987 use_operand_p arg_p
;
2990 if (TREE_CODE (operand
) != SSA_NAME
)
2993 lhs
= gimple_assign_lhs (stmt
);
2995 def_stmt
= SSA_NAME_DEF_STMT (operand
);
2996 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
2999 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
3000 if (lhs
== USE_FROM_PTR (arg_p
))
3005 /* Remove def stmt of VAR if VAR has zero uses and recurse
3006 on rhs1 operand if so. */
3009 remove_visited_stmt_chain (tree var
)
3012 gimple_stmt_iterator gsi
;
3016 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3018 stmt
= SSA_NAME_DEF_STMT (var
);
3019 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3021 var
= gimple_assign_rhs1 (stmt
);
3022 gsi
= gsi_for_stmt (stmt
);
3023 gsi_remove (&gsi
, true);
3024 release_defs (stmt
);
3031 /* This function checks three consequtive operands in
3032 passed operands vector OPS starting from OPINDEX and
3033 swaps two operands if it is profitable for binary operation
3034 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3036 We pair ops with the same rank if possible.
3038 The alternative we try is to see if STMT is a destructive
3039 update style statement, which is like:
3042 In that case, we want to use the destructive update form to
3043 expose the possible vectorizer sum reduction opportunity.
3044 In that case, the third operand will be the phi node. This
3045 check is not performed if STMT is null.
3047 We could, of course, try to be better as noted above, and do a
3048 lot of work to try to find these opportunities in >3 operand
3049 cases, but it is unlikely to be worth it. */
3052 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3053 unsigned int opindex
, gimple stmt
)
3055 operand_entry_t oe1
, oe2
, oe3
;
3058 oe2
= ops
[opindex
+ 1];
3059 oe3
= ops
[opindex
+ 2];
3061 if ((oe1
->rank
== oe2
->rank
3062 && oe2
->rank
!= oe3
->rank
)
3063 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3064 && !is_phi_for_stmt (stmt
, oe1
->op
)
3065 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3067 struct operand_entry temp
= *oe3
;
3069 oe3
->rank
= oe1
->rank
;
3071 oe1
->rank
= temp
.rank
;
3073 else if ((oe1
->rank
== oe3
->rank
3074 && oe2
->rank
!= oe3
->rank
)
3075 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3076 && !is_phi_for_stmt (stmt
, oe1
->op
)
3077 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3079 struct operand_entry temp
= *oe2
;
3081 oe2
->rank
= oe1
->rank
;
3083 oe1
->rank
= temp
.rank
;
3087 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3088 two definitions, otherwise return STMT. */
3090 static inline gimple
3091 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3093 if (TREE_CODE (rhs1
) == SSA_NAME
3094 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3095 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3096 if (TREE_CODE (rhs2
) == SSA_NAME
3097 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3098 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3102 /* Recursively rewrite our linearized statements so that the operators
3103 match those in OPS[OPINDEX], putting the computation in rank
3104 order. Return new lhs. */
3107 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3108 vec
<operand_entry_t
> ops
, bool changed
)
3110 tree rhs1
= gimple_assign_rhs1 (stmt
);
3111 tree rhs2
= gimple_assign_rhs2 (stmt
);
3112 tree lhs
= gimple_assign_lhs (stmt
);
3115 /* The final recursion case for this function is that you have
3116 exactly two operations left.
3117 If we had one exactly one op in the entire list to start with, we
3118 would have never called this function, and the tail recursion
3119 rewrites them one at a time. */
3120 if (opindex
+ 2 == ops
.length ())
3122 operand_entry_t oe1
, oe2
;
3125 oe2
= ops
[opindex
+ 1];
3127 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3129 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3130 unsigned int uid
= gimple_uid (stmt
);
3132 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3134 fprintf (dump_file
, "Transforming ");
3135 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3140 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3141 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3143 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3144 lhs
, oe1
->op
, oe2
->op
);
3145 gimple_set_uid (stmt
, uid
);
3146 gimple_set_visited (stmt
, true);
3147 if (insert_point
== gsi_stmt (gsi
))
3148 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3150 insert_stmt_after (stmt
, insert_point
);
3154 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3156 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3157 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3161 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3162 remove_visited_stmt_chain (rhs1
);
3164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3166 fprintf (dump_file
, " into ");
3167 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3173 /* If we hit here, we should have 3 or more ops left. */
3174 gcc_assert (opindex
+ 2 < ops
.length ());
3176 /* Rewrite the next operator. */
3179 /* Recurse on the LHS of the binary operator, which is guaranteed to
3180 be the non-leaf side. */
3182 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3183 changed
|| oe
->op
!= rhs2
);
3185 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3189 fprintf (dump_file
, "Transforming ");
3190 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3193 /* If changed is false, this is either opindex == 0
3194 or all outer rhs2's were equal to corresponding oe->op,
3195 and powi_result is NULL.
3196 That means lhs is equivalent before and after reassociation.
3197 Otherwise ensure the old lhs SSA_NAME is not reused and
3198 create a new stmt as well, so that any debug stmts will be
3199 properly adjusted. */
3202 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3203 unsigned int uid
= gimple_uid (stmt
);
3204 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3206 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3207 stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3208 lhs
, new_rhs1
, oe
->op
);
3209 gimple_set_uid (stmt
, uid
);
3210 gimple_set_visited (stmt
, true);
3211 if (insert_point
== gsi_stmt (gsi
))
3212 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3214 insert_stmt_after (stmt
, insert_point
);
3218 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3220 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3221 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3225 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3227 fprintf (dump_file
, " into ");
3228 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3234 /* Find out how many cycles we need to compute statements chain.
3235 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3236 maximum number of independent statements we may execute per cycle. */
3239 get_required_cycles (int ops_num
, int cpu_width
)
3245 /* While we have more than 2 * cpu_width operands
3246 we may reduce number of operands by cpu_width
3248 res
= ops_num
/ (2 * cpu_width
);
3250 /* Remained operands count may be reduced twice per cycle
3251 until we have only one operand. */
3252 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3253 elog
= exact_log2 (rest
);
3257 res
+= floor_log2 (rest
) + 1;
3262 /* Returns an optimal number of registers to use for computation of
3263 given statements. */
3266 get_reassociation_width (int ops_num
, enum tree_code opc
,
3267 enum machine_mode mode
)
3269 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3274 if (param_width
> 0)
3275 width
= param_width
;
3277 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3282 /* Get the minimal time required for sequence computation. */
3283 cycles_best
= get_required_cycles (ops_num
, width
);
3285 /* Check if we may use less width and still compute sequence for
3286 the same time. It will allow us to reduce registers usage.
3287 get_required_cycles is monotonically increasing with lower width
3288 so we can perform a binary search for the minimal width that still
3289 results in the optimal cycle count. */
3291 while (width
> width_min
)
3293 int width_mid
= (width
+ width_min
) / 2;
3295 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3297 else if (width_min
< width_mid
)
3298 width_min
= width_mid
;
3306 /* Recursively rewrite our linearized statements so that the operators
3307 match those in OPS[OPINDEX], putting the computation in rank
3308 order and trying to allow operations to be executed in
3312 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3313 vec
<operand_entry_t
> ops
)
3315 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3316 int op_num
= ops
.length ();
3317 int stmt_num
= op_num
- 1;
3318 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3319 int op_index
= op_num
- 1;
3321 int ready_stmts_end
= 0;
3323 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3325 /* We start expression rewriting from the top statements.
3326 So, in this loop we create a full list of statements
3327 we will work with. */
3328 stmts
[stmt_num
- 1] = stmt
;
3329 for (i
= stmt_num
- 2; i
>= 0; i
--)
3330 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3332 for (i
= 0; i
< stmt_num
; i
++)
3336 /* Determine whether we should use results of
3337 already handled statements or not. */
3338 if (ready_stmts_end
== 0
3339 && (i
- stmt_index
>= width
|| op_index
< 1))
3340 ready_stmts_end
= i
;
3342 /* Now we choose operands for the next statement. Non zero
3343 value in ready_stmts_end means here that we should use
3344 the result of already generated statements as new operand. */
3345 if (ready_stmts_end
> 0)
3347 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3348 if (ready_stmts_end
> stmt_index
)
3349 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3350 else if (op_index
>= 0)
3351 op2
= ops
[op_index
--]->op
;
3354 gcc_assert (stmt_index
< i
);
3355 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3358 if (stmt_index
>= ready_stmts_end
)
3359 ready_stmts_end
= 0;
3364 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3365 op2
= ops
[op_index
--]->op
;
3366 op1
= ops
[op_index
--]->op
;
3369 /* If we emit the last statement then we should put
3370 operands into the last statement. It will also
3372 if (op_index
< 0 && stmt_index
== i
)
3375 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3377 fprintf (dump_file
, "Transforming ");
3378 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3381 /* We keep original statement only for the last one. All
3382 others are recreated. */
3383 if (i
== stmt_num
- 1)
3385 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3386 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3387 update_stmt (stmts
[i
]);
3390 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3394 fprintf (dump_file
, " into ");
3395 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3399 remove_visited_stmt_chain (last_rhs1
);
3402 /* Transform STMT, which is really (A +B) + (C + D) into the left
3403 linear form, ((A+B)+C)+D.
3404 Recurse on D if necessary. */
3407 linearize_expr (gimple stmt
)
3409 gimple_stmt_iterator gsi
;
3410 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3411 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3412 gimple oldbinrhs
= binrhs
;
3413 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3414 gimple newbinrhs
= NULL
;
3415 struct loop
*loop
= loop_containing_stmt (stmt
);
3416 tree lhs
= gimple_assign_lhs (stmt
);
3418 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3419 && is_reassociable_op (binrhs
, rhscode
, loop
));
3421 gsi
= gsi_for_stmt (stmt
);
3423 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3424 binrhs
= gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs
),
3425 make_ssa_name (TREE_TYPE (lhs
), NULL
),
3426 gimple_assign_lhs (binlhs
),
3427 gimple_assign_rhs2 (binrhs
));
3428 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3429 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3430 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3432 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3433 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3437 fprintf (dump_file
, "Linearized: ");
3438 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3441 reassociate_stats
.linearized
++;
3444 gsi
= gsi_for_stmt (oldbinrhs
);
3445 gsi_remove (&gsi
, true);
3446 release_defs (oldbinrhs
);
3448 gimple_set_visited (stmt
, true);
3449 gimple_set_visited (binlhs
, true);
3450 gimple_set_visited (binrhs
, true);
3452 /* Tail recurse on the new rhs if it still needs reassociation. */
3453 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3454 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3455 want to change the algorithm while converting to tuples. */
3456 linearize_expr (stmt
);
3459 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3460 it. Otherwise, return NULL. */
3463 get_single_immediate_use (tree lhs
)
3465 use_operand_p immuse
;
3468 if (TREE_CODE (lhs
) == SSA_NAME
3469 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3470 && is_gimple_assign (immusestmt
))
3476 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3477 representing the negated value. Insertions of any necessary
3478 instructions go before GSI.
3479 This function is recursive in that, if you hand it "a_5" as the
3480 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3481 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3484 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3486 gimple negatedefstmt
= NULL
;
3487 tree resultofnegate
;
3488 gimple_stmt_iterator gsi
;
3491 /* If we are trying to negate a name, defined by an add, negate the
3492 add operands instead. */
3493 if (TREE_CODE (tonegate
) == SSA_NAME
)
3494 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3495 if (TREE_CODE (tonegate
) == SSA_NAME
3496 && is_gimple_assign (negatedefstmt
)
3497 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3498 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3499 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3501 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3502 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3503 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3506 gsi
= gsi_for_stmt (negatedefstmt
);
3507 rhs1
= negate_value (rhs1
, &gsi
);
3509 gsi
= gsi_for_stmt (negatedefstmt
);
3510 rhs2
= negate_value (rhs2
, &gsi
);
3512 gsi
= gsi_for_stmt (negatedefstmt
);
3513 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3514 gimple_set_visited (negatedefstmt
, true);
3515 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, rhs1
, rhs2
);
3516 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3517 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3521 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3522 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3523 NULL_TREE
, true, GSI_SAME_STMT
);
3525 uid
= gimple_uid (gsi_stmt (gsi
));
3526 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3528 gimple stmt
= gsi_stmt (gsi
);
3529 if (gimple_uid (stmt
) != 0)
3531 gimple_set_uid (stmt
, uid
);
3533 return resultofnegate
;
3536 /* Return true if we should break up the subtract in STMT into an add
3537 with negate. This is true when we the subtract operands are really
3538 adds, or the subtract itself is used in an add expression. In
3539 either case, breaking up the subtract into an add with negate
3540 exposes the adds to reassociation. */
3543 should_break_up_subtract (gimple stmt
)
3545 tree lhs
= gimple_assign_lhs (stmt
);
3546 tree binlhs
= gimple_assign_rhs1 (stmt
);
3547 tree binrhs
= gimple_assign_rhs2 (stmt
);
3549 struct loop
*loop
= loop_containing_stmt (stmt
);
3551 if (TREE_CODE (binlhs
) == SSA_NAME
3552 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3555 if (TREE_CODE (binrhs
) == SSA_NAME
3556 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3559 if (TREE_CODE (lhs
) == SSA_NAME
3560 && (immusestmt
= get_single_immediate_use (lhs
))
3561 && is_gimple_assign (immusestmt
)
3562 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3563 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3568 /* Transform STMT from A - B into A + -B. */
3571 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3573 tree rhs1
= gimple_assign_rhs1 (stmt
);
3574 tree rhs2
= gimple_assign_rhs2 (stmt
);
3576 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3578 fprintf (dump_file
, "Breaking up subtract ");
3579 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3582 rhs2
= negate_value (rhs2
, gsip
);
3583 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3587 /* Determine whether STMT is a builtin call that raises an SSA name
3588 to an integer power and has only one use. If so, and this is early
3589 reassociation and unsafe math optimizations are permitted, place
3590 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3591 If any of these conditions does not hold, return FALSE. */
3594 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3597 REAL_VALUE_TYPE c
, cint
;
3599 if (!first_pass_instance
3600 || !flag_unsafe_math_optimizations
3601 || !is_gimple_call (stmt
)
3602 || !has_single_use (gimple_call_lhs (stmt
)))
3605 fndecl
= gimple_call_fndecl (stmt
);
3608 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3611 switch (DECL_FUNCTION_CODE (fndecl
))
3613 CASE_FLT_FN (BUILT_IN_POW
):
3614 *base
= gimple_call_arg (stmt
, 0);
3615 arg1
= gimple_call_arg (stmt
, 1);
3617 if (TREE_CODE (arg1
) != REAL_CST
)
3620 c
= TREE_REAL_CST (arg1
);
3622 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3625 *exponent
= real_to_integer (&c
);
3626 real_from_integer (&cint
, VOIDmode
, *exponent
,
3627 *exponent
< 0 ? -1 : 0, 0);
3628 if (!real_identical (&c
, &cint
))
3633 CASE_FLT_FN (BUILT_IN_POWI
):
3634 *base
= gimple_call_arg (stmt
, 0);
3635 arg1
= gimple_call_arg (stmt
, 1);
3637 if (!host_integerp (arg1
, 0))
3640 *exponent
= TREE_INT_CST_LOW (arg1
);
3647 /* Expanding negative exponents is generally unproductive, so we don't
3648 complicate matters with those. Exponents of zero and one should
3649 have been handled by expression folding. */
3650 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3656 /* Recursively linearize a binary expression that is the RHS of STMT.
3657 Place the operands of the expression tree in the vector named OPS. */
3660 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3661 bool is_associative
, bool set_visited
)
3663 tree binlhs
= gimple_assign_rhs1 (stmt
);
3664 tree binrhs
= gimple_assign_rhs2 (stmt
);
3665 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3666 bool binlhsisreassoc
= false;
3667 bool binrhsisreassoc
= false;
3668 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3669 struct loop
*loop
= loop_containing_stmt (stmt
);
3670 tree base
= NULL_TREE
;
3671 HOST_WIDE_INT exponent
= 0;
3674 gimple_set_visited (stmt
, true);
3676 if (TREE_CODE (binlhs
) == SSA_NAME
)
3678 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3679 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3680 && !stmt_could_throw_p (binlhsdef
));
3683 if (TREE_CODE (binrhs
) == SSA_NAME
)
3685 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3686 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3687 && !stmt_could_throw_p (binrhsdef
));
3690 /* If the LHS is not reassociable, but the RHS is, we need to swap
3691 them. If neither is reassociable, there is nothing we can do, so
3692 just put them in the ops vector. If the LHS is reassociable,
3693 linearize it. If both are reassociable, then linearize the RHS
3696 if (!binlhsisreassoc
)
3700 /* If this is not a associative operation like division, give up. */
3701 if (!is_associative
)
3703 add_to_ops_vec (ops
, binrhs
);
3707 if (!binrhsisreassoc
)
3709 if (rhscode
== MULT_EXPR
3710 && TREE_CODE (binrhs
) == SSA_NAME
3711 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3713 add_repeat_to_ops_vec (ops
, base
, exponent
);
3714 gimple_set_visited (binrhsdef
, true);
3717 add_to_ops_vec (ops
, binrhs
);
3719 if (rhscode
== MULT_EXPR
3720 && TREE_CODE (binlhs
) == SSA_NAME
3721 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3723 add_repeat_to_ops_vec (ops
, base
, exponent
);
3724 gimple_set_visited (binlhsdef
, true);
3727 add_to_ops_vec (ops
, binlhs
);
3732 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3734 fprintf (dump_file
, "swapping operands of ");
3735 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3738 swap_ssa_operands (stmt
,
3739 gimple_assign_rhs1_ptr (stmt
),
3740 gimple_assign_rhs2_ptr (stmt
));
3743 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3745 fprintf (dump_file
, " is now ");
3746 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3749 /* We want to make it so the lhs is always the reassociative op,
3755 else if (binrhsisreassoc
)
3757 linearize_expr (stmt
);
3758 binlhs
= gimple_assign_rhs1 (stmt
);
3759 binrhs
= gimple_assign_rhs2 (stmt
);
3762 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3763 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3765 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3766 is_associative
, set_visited
);
3768 if (rhscode
== MULT_EXPR
3769 && TREE_CODE (binrhs
) == SSA_NAME
3770 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3772 add_repeat_to_ops_vec (ops
, base
, exponent
);
3773 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3776 add_to_ops_vec (ops
, binrhs
);
3779 /* Repropagate the negates back into subtracts, since no other pass
3780 currently does it. */
3783 repropagate_negates (void)
3788 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3790 gimple user
= get_single_immediate_use (negate
);
3792 if (!user
|| !is_gimple_assign (user
))
3795 /* The negate operand can be either operand of a PLUS_EXPR
3796 (it can be the LHS if the RHS is a constant for example).
3798 Force the negate operand to the RHS of the PLUS_EXPR, then
3799 transform the PLUS_EXPR into a MINUS_EXPR. */
3800 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3802 /* If the negated operand appears on the LHS of the
3803 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3804 to force the negated operand to the RHS of the PLUS_EXPR. */
3805 if (gimple_assign_rhs1 (user
) == negate
)
3807 swap_ssa_operands (user
,
3808 gimple_assign_rhs1_ptr (user
),
3809 gimple_assign_rhs2_ptr (user
));
3812 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3813 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3814 if (gimple_assign_rhs2 (user
) == negate
)
3816 tree rhs1
= gimple_assign_rhs1 (user
);
3817 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3818 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3819 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3823 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3825 if (gimple_assign_rhs1 (user
) == negate
)
3830 which we transform into
3833 This pushes down the negate which we possibly can merge
3834 into some other operation, hence insert it into the
3835 plus_negates vector. */
3836 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3837 tree a
= gimple_assign_rhs1 (feed
);
3838 tree b
= gimple_assign_rhs2 (user
);
3839 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
3840 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
3841 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)), NULL
);
3842 gimple g
= gimple_build_assign_with_ops (PLUS_EXPR
, x
, a
, b
);
3843 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
3844 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
, NULL
);
3845 user
= gsi_stmt (gsi2
);
3847 gsi_remove (&gsi
, true);
3848 release_defs (feed
);
3849 plus_negates
.safe_push (gimple_assign_lhs (user
));
3853 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3854 rid of one operation. */
3855 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3856 tree a
= gimple_assign_rhs1 (feed
);
3857 tree rhs1
= gimple_assign_rhs1 (user
);
3858 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3859 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3860 update_stmt (gsi_stmt (gsi
));
3866 /* Returns true if OP is of a type for which we can do reassociation.
3867 That is for integral or non-saturating fixed-point types, and for
3868 floating point type when associative-math is enabled. */
3871 can_reassociate_p (tree op
)
3873 tree type
= TREE_TYPE (op
);
3874 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3875 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3876 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3881 /* Break up subtract operations in block BB.
3883 We do this top down because we don't know whether the subtract is
3884 part of a possible chain of reassociation except at the top.
3893 we want to break up k = t - q, but we won't until we've transformed q
3894 = b - r, which won't be broken up until we transform b = c - d.
3896 En passant, clear the GIMPLE visited flag on every statement
3897 and set UIDs within each basic block. */
3900 break_up_subtract_bb (basic_block bb
)
3902 gimple_stmt_iterator gsi
;
3904 unsigned int uid
= 1;
3906 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3908 gimple stmt
= gsi_stmt (gsi
);
3909 gimple_set_visited (stmt
, false);
3910 gimple_set_uid (stmt
, uid
++);
3912 if (!is_gimple_assign (stmt
)
3913 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3916 /* Look for simple gimple subtract operations. */
3917 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3919 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3920 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3923 /* Check for a subtract used only in an addition. If this
3924 is the case, transform it into add of a negate for better
3925 reassociation. IE transform C = A-B into C = A + -B if C
3926 is only used in an addition. */
3927 if (should_break_up_subtract (stmt
))
3928 break_up_subtract (stmt
, &gsi
);
3930 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3931 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3932 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
3934 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3936 son
= next_dom_son (CDI_DOMINATORS
, son
))
3937 break_up_subtract_bb (son
);
3940 /* Used for repeated factor analysis. */
3941 struct repeat_factor_d
3943 /* An SSA name that occurs in a multiply chain. */
3946 /* Cached rank of the factor. */
3949 /* Number of occurrences of the factor in the chain. */
3950 HOST_WIDE_INT count
;
3952 /* An SSA name representing the product of this factor and
3953 all factors appearing later in the repeated factor vector. */
3957 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
3958 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
3961 static vec
<repeat_factor
> repeat_factor_vec
;
3963 /* Used for sorting the repeat factor vector. Sort primarily by
3964 ascending occurrence count, secondarily by descending rank. */
3967 compare_repeat_factors (const void *x1
, const void *x2
)
3969 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
3970 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
3972 if (rf1
->count
!= rf2
->count
)
3973 return rf1
->count
- rf2
->count
;
3975 return rf2
->rank
- rf1
->rank
;
3978 /* Look for repeated operands in OPS in the multiply tree rooted at
3979 STMT. Replace them with an optimal sequence of multiplies and powi
3980 builtin calls, and remove the used operands from OPS. Return an
3981 SSA name representing the value of the replacement sequence. */
3984 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
3986 unsigned i
, j
, vec_len
;
3989 repeat_factor_t rf1
, rf2
;
3990 repeat_factor rfnew
;
3991 tree result
= NULL_TREE
;
3992 tree target_ssa
, iter_result
;
3993 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
3994 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
3995 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3996 gimple mul_stmt
, pow_stmt
;
3998 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4003 /* Allocate the repeated factor vector. */
4004 repeat_factor_vec
.create (10);
4006 /* Scan the OPS vector for all SSA names in the product and build
4007 up a vector of occurrence counts for each factor. */
4008 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4010 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4012 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4014 if (rf1
->factor
== oe
->op
)
4016 rf1
->count
+= oe
->count
;
4021 if (j
>= repeat_factor_vec
.length ())
4023 rfnew
.factor
= oe
->op
;
4024 rfnew
.rank
= oe
->rank
;
4025 rfnew
.count
= oe
->count
;
4026 rfnew
.repr
= NULL_TREE
;
4027 repeat_factor_vec
.safe_push (rfnew
);
4032 /* Sort the repeated factor vector by (a) increasing occurrence count,
4033 and (b) decreasing rank. */
4034 repeat_factor_vec
.qsort (compare_repeat_factors
);
4036 /* It is generally best to combine as many base factors as possible
4037 into a product before applying __builtin_powi to the result.
4038 However, the sort order chosen for the repeated factor vector
4039 allows us to cache partial results for the product of the base
4040 factors for subsequent use. When we already have a cached partial
4041 result from a previous iteration, it is best to make use of it
4042 before looking for another __builtin_pow opportunity.
4044 As an example, consider x * x * y * y * y * z * z * z * z.
4045 We want to first compose the product x * y * z, raise it to the
4046 second power, then multiply this by y * z, and finally multiply
4047 by z. This can be done in 5 multiplies provided we cache y * z
4048 for use in both expressions:
4056 If we instead ignored the cached y * z and first multiplied by
4057 the __builtin_pow opportunity z * z, we would get the inferior:
4066 vec_len
= repeat_factor_vec
.length ();
4068 /* Repeatedly look for opportunities to create a builtin_powi call. */
4071 HOST_WIDE_INT power
;
4073 /* First look for the largest cached product of factors from
4074 preceding iterations. If found, create a builtin_powi for
4075 it if the minimum occurrence count for its factors is at
4076 least 2, or just use this cached product as our next
4077 multiplicand if the minimum occurrence count is 1. */
4078 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4080 if (rf1
->repr
&& rf1
->count
> 0)
4090 iter_result
= rf1
->repr
;
4092 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4096 fputs ("Multiplying by cached product ", dump_file
);
4097 for (elt
= j
; elt
< vec_len
; elt
++)
4099 rf
= &repeat_factor_vec
[elt
];
4100 print_generic_expr (dump_file
, rf
->factor
, 0);
4101 if (elt
< vec_len
- 1)
4102 fputs (" * ", dump_file
);
4104 fputs ("\n", dump_file
);
4109 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4110 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4111 build_int_cst (integer_type_node
,
4113 gimple_call_set_lhs (pow_stmt
, iter_result
);
4114 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4115 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4121 fputs ("Building __builtin_pow call for cached product (",
4123 for (elt
= j
; elt
< vec_len
; elt
++)
4125 rf
= &repeat_factor_vec
[elt
];
4126 print_generic_expr (dump_file
, rf
->factor
, 0);
4127 if (elt
< vec_len
- 1)
4128 fputs (" * ", dump_file
);
4130 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4137 /* Otherwise, find the first factor in the repeated factor
4138 vector whose occurrence count is at least 2. If no such
4139 factor exists, there are no builtin_powi opportunities
4141 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4143 if (rf1
->count
>= 2)
4152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4156 fputs ("Building __builtin_pow call for (", dump_file
);
4157 for (elt
= j
; elt
< vec_len
; elt
++)
4159 rf
= &repeat_factor_vec
[elt
];
4160 print_generic_expr (dump_file
, rf
->factor
, 0);
4161 if (elt
< vec_len
- 1)
4162 fputs (" * ", dump_file
);
4164 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4167 reassociate_stats
.pows_created
++;
4169 /* Visit each element of the vector in reverse order (so that
4170 high-occurrence elements are visited first, and within the
4171 same occurrence count, lower-ranked elements are visited
4172 first). Form a linear product of all elements in this order
4173 whose occurrencce count is at least that of element J.
4174 Record the SSA name representing the product of each element
4175 with all subsequent elements in the vector. */
4176 if (j
== vec_len
- 1)
4177 rf1
->repr
= rf1
->factor
;
4180 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4184 rf1
= &repeat_factor_vec
[ii
];
4185 rf2
= &repeat_factor_vec
[ii
+ 1];
4187 /* Init the last factor's representative to be itself. */
4189 rf2
->repr
= rf2
->factor
;
4194 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4195 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4198 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4199 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4200 rf1
->repr
= target_ssa
;
4202 /* Don't reprocess the multiply we just introduced. */
4203 gimple_set_visited (mul_stmt
, true);
4207 /* Form a call to __builtin_powi for the maximum product
4208 just formed, raised to the power obtained earlier. */
4209 rf1
= &repeat_factor_vec
[j
];
4210 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4211 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4212 build_int_cst (integer_type_node
,
4214 gimple_call_set_lhs (pow_stmt
, iter_result
);
4215 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4216 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4219 /* If we previously formed at least one other builtin_powi call,
4220 form the product of this one and those others. */
4223 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4224 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4225 result
, iter_result
);
4226 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4227 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4228 gimple_set_visited (mul_stmt
, true);
4229 result
= new_result
;
4232 result
= iter_result
;
4234 /* Decrement the occurrence count of each element in the product
4235 by the count found above, and remove this many copies of each
4237 for (i
= j
; i
< vec_len
; i
++)
4242 rf1
= &repeat_factor_vec
[i
];
4243 rf1
->count
-= power
;
4245 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4247 if (oe
->op
== rf1
->factor
)
4251 ops
->ordered_remove (n
);
4267 /* At this point all elements in the repeated factor vector have a
4268 remaining occurrence count of 0 or 1, and those with a count of 1
4269 don't have cached representatives. Re-sort the ops vector and
4271 ops
->qsort (sort_by_operand_rank
);
4272 repeat_factor_vec
.release ();
4274 /* Return the final product computed herein. Note that there may
4275 still be some elements with single occurrence count left in OPS;
4276 those will be handled by the normal reassociation logic. */
4280 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4283 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4289 fprintf (dump_file
, "Transforming ");
4290 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4293 rhs1
= gimple_assign_rhs1 (stmt
);
4294 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4296 remove_visited_stmt_chain (rhs1
);
4298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4300 fprintf (dump_file
, " into ");
4301 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4305 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4308 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4309 tree rhs1
, tree rhs2
)
4311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4313 fprintf (dump_file
, "Transforming ");
4314 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4317 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4318 update_stmt (gsi_stmt (*gsi
));
4319 remove_visited_stmt_chain (rhs1
);
4321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4323 fprintf (dump_file
, " into ");
4324 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4328 /* Reassociate expressions in basic block BB and its post-dominator as
4332 reassociate_bb (basic_block bb
)
4334 gimple_stmt_iterator gsi
;
4336 gimple stmt
= last_stmt (bb
);
4338 if (stmt
&& !gimple_visited_p (stmt
))
4339 maybe_optimize_range_tests (stmt
);
4341 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4343 stmt
= gsi_stmt (gsi
);
4345 if (is_gimple_assign (stmt
)
4346 && !stmt_could_throw_p (stmt
))
4348 tree lhs
, rhs1
, rhs2
;
4349 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4351 /* If this is not a gimple binary expression, there is
4352 nothing for us to do with it. */
4353 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4356 /* If this was part of an already processed statement,
4357 we don't need to touch it again. */
4358 if (gimple_visited_p (stmt
))
4360 /* This statement might have become dead because of previous
4362 if (has_zero_uses (gimple_get_lhs (stmt
)))
4364 gsi_remove (&gsi
, true);
4365 release_defs (stmt
);
4366 /* We might end up removing the last stmt above which
4367 places the iterator to the end of the sequence.
4368 Reset it to the last stmt in this case which might
4369 be the end of the sequence as well if we removed
4370 the last statement of the sequence. In which case
4371 we need to bail out. */
4372 if (gsi_end_p (gsi
))
4374 gsi
= gsi_last_bb (bb
);
4375 if (gsi_end_p (gsi
))
4382 lhs
= gimple_assign_lhs (stmt
);
4383 rhs1
= gimple_assign_rhs1 (stmt
);
4384 rhs2
= gimple_assign_rhs2 (stmt
);
4386 /* For non-bit or min/max operations we can't associate
4387 all types. Verify that here. */
4388 if (rhs_code
!= BIT_IOR_EXPR
4389 && rhs_code
!= BIT_AND_EXPR
4390 && rhs_code
!= BIT_XOR_EXPR
4391 && rhs_code
!= MIN_EXPR
4392 && rhs_code
!= MAX_EXPR
4393 && (!can_reassociate_p (lhs
)
4394 || !can_reassociate_p (rhs1
)
4395 || !can_reassociate_p (rhs2
)))
4398 if (associative_tree_code (rhs_code
))
4400 vec
<operand_entry_t
> ops
= vNULL
;
4401 tree powi_result
= NULL_TREE
;
4403 /* There may be no immediate uses left by the time we
4404 get here because we may have eliminated them all. */
4405 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4408 gimple_set_visited (stmt
, true);
4409 linearize_expr_tree (&ops
, stmt
, true, true);
4410 ops
.qsort (sort_by_operand_rank
);
4411 optimize_ops_list (rhs_code
, &ops
);
4412 if (undistribute_ops_list (rhs_code
, &ops
,
4413 loop_containing_stmt (stmt
)))
4415 ops
.qsort (sort_by_operand_rank
);
4416 optimize_ops_list (rhs_code
, &ops
);
4419 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4420 optimize_range_tests (rhs_code
, &ops
);
4422 if (first_pass_instance
4423 && rhs_code
== MULT_EXPR
4424 && flag_unsafe_math_optimizations
)
4425 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4427 /* If the operand vector is now empty, all operands were
4428 consumed by the __builtin_powi optimization. */
4429 if (ops
.length () == 0)
4430 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4431 else if (ops
.length () == 1)
4433 tree last_op
= ops
.last ()->op
;
4436 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4439 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4443 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4444 int ops_num
= ops
.length ();
4445 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4450 "Width = %d was chosen for reassociation\n", width
);
4453 && ops
.length () > 3)
4454 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4457 /* When there are three operands left, we want
4458 to make sure the ones that get the double
4459 binary op are chosen wisely. */
4460 int len
= ops
.length ();
4462 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4464 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4465 powi_result
!= NULL
);
4468 /* If we combined some repeated factors into a
4469 __builtin_powi call, multiply that result by the
4470 reassociated operands. */
4473 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4474 tree type
= TREE_TYPE (lhs
);
4475 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4477 gimple_set_lhs (lhs_stmt
, target_ssa
);
4478 update_stmt (lhs_stmt
);
4480 target_ssa
= new_lhs
;
4481 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4484 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4485 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4493 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4495 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4496 reassociate_bb (son
);
4499 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4500 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4502 /* Dump the operand entry vector OPS to FILE. */
4505 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4510 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4512 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4513 print_generic_expr (file
, oe
->op
, 0);
4517 /* Dump the operand entry vector OPS to STDERR. */
4520 debug_ops_vector (vec
<operand_entry_t
> ops
)
4522 dump_ops_vector (stderr
, ops
);
4528 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
4529 reassociate_bb (EXIT_BLOCK_PTR
);
4532 /* Initialize the reassociation pass. */
4539 int *bbs
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
4541 /* Find the loops, so that we can prevent moving calculations in
4543 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4545 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4547 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4548 sizeof (struct operand_entry
), 30);
4549 next_operand_entry_id
= 0;
4551 /* Reverse RPO (Reverse Post Order) will give us something where
4552 deeper loops come later. */
4553 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4554 bb_rank
= XCNEWVEC (long, last_basic_block
);
4555 operand_rank
= pointer_map_create ();
4557 /* Give each default definition a distinct rank. This includes
4558 parameters and the static chain. Walk backwards over all
4559 SSA names so that we get proper rank ordering according
4560 to tree_swap_operands_p. */
4561 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4563 tree name
= ssa_name (i
);
4564 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4565 insert_operand_rank (name
, ++rank
);
4568 /* Set up rank for each BB */
4569 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
4570 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4573 calculate_dominance_info (CDI_POST_DOMINATORS
);
4574 plus_negates
= vNULL
;
4577 /* Cleanup after the reassociation pass, and print stats if
4583 statistics_counter_event (cfun
, "Linearized",
4584 reassociate_stats
.linearized
);
4585 statistics_counter_event (cfun
, "Constants eliminated",
4586 reassociate_stats
.constants_eliminated
);
4587 statistics_counter_event (cfun
, "Ops eliminated",
4588 reassociate_stats
.ops_eliminated
);
4589 statistics_counter_event (cfun
, "Statements rewritten",
4590 reassociate_stats
.rewritten
);
4591 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4592 reassociate_stats
.pows_encountered
);
4593 statistics_counter_event (cfun
, "Built-in powi calls created",
4594 reassociate_stats
.pows_created
);
4596 pointer_map_destroy (operand_rank
);
4597 free_alloc_pool (operand_entry_pool
);
4599 plus_negates
.release ();
4600 free_dominance_info (CDI_POST_DOMINATORS
);
4601 loop_optimizer_finalize ();
4604 /* Gate and execute functions for Reassociation. */
4607 execute_reassoc (void)
4612 repropagate_negates ();
4619 gate_tree_ssa_reassoc (void)
4621 return flag_tree_reassoc
!= 0;
4626 const pass_data pass_data_reassoc
=
4628 GIMPLE_PASS
, /* type */
4629 "reassoc", /* name */
4630 OPTGROUP_NONE
, /* optinfo_flags */
4631 true, /* has_gate */
4632 true, /* has_execute */
4633 TV_TREE_REASSOC
, /* tv_id */
4634 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4635 0, /* properties_provided */
4636 0, /* properties_destroyed */
4637 0, /* todo_flags_start */
4639 | TODO_update_ssa_only_virtuals
4640 | TODO_verify_flow
), /* todo_flags_finish */
4643 class pass_reassoc
: public gimple_opt_pass
4646 pass_reassoc (gcc::context
*ctxt
)
4647 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
4650 /* opt_pass methods: */
4651 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
4652 bool gate () { return gate_tree_ssa_reassoc (); }
4653 unsigned int execute () { return execute_reassoc (); }
4655 }; // class pass_reassoc
4660 make_pass_reassoc (gcc::context
*ctxt
)
4662 return new pass_reassoc (ctxt
);