1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-inline.h"
33 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop.h"
41 #include "tree-iterator.h"
42 #include "tree-pass.h"
43 #include "alloc-pool.h"
45 #include "langhooks.h"
46 #include "pointer-set.h"
51 #include "diagnostic-core.h"
53 /* This is a simple global reassociation pass. It is, in part, based
54 on the LLVM pass of the same name (They do some things more/less
55 than we do, in different orders, etc).
57 It consists of five steps:
59 1. Breaking up subtract operations into addition + negate, where
60 it would promote the reassociation of adds.
62 2. Left linearization of the expression trees, so that (A+B)+(C+D)
63 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
64 During linearization, we place the operands of the binary
65 expressions into a vector of operand_entry_t
67 3. Optimization of the operand lists, eliminating things like a +
70 3a. Combine repeated factors with the same occurrence counts
71 into a __builtin_powi call that will later be optimized into
72 an optimal number of multiplies.
74 4. Rewrite the expression trees we linearized and optimized so
75 they are in proper rank order.
77 5. Repropagate negates, as nothing else will clean it up ATM.
79 A bit of theory on #4, since nobody seems to write anything down
80 about why it makes sense to do it the way they do it:
82 We could do this much nicer theoretically, but don't (for reasons
83 explained after how to do it theoretically nice :P).
85 In order to promote the most redundancy elimination, you want
86 binary expressions whose operands are the same rank (or
87 preferably, the same value) exposed to the redundancy eliminator,
88 for possible elimination.
90 So the way to do this if we really cared, is to build the new op
91 tree from the leaves to the roots, merging as you go, and putting the
92 new op on the end of the worklist, until you are left with one
93 thing on the worklist.
95 IE if you have to rewrite the following set of operands (listed with
96 rank in parentheses), with opcode PLUS_EXPR:
98 a (1), b (1), c (1), d (2), e (2)
101 We start with our merge worklist empty, and the ops list with all of
104 You want to first merge all leaves of the same rank, as much as
107 So first build a binary op of
109 mergetmp = a + b, and put "mergetmp" on the merge worklist.
111 Because there is no three operand form of PLUS_EXPR, c is not going to
112 be exposed to redundancy elimination as a rank 1 operand.
114 So you might as well throw it on the merge worklist (you could also
115 consider it to now be a rank two operand, and merge it with d and e,
116 but in this case, you then have evicted e from a binary op. So at
117 least in this situation, you can't win.)
119 Then build a binary op of d + e
122 and put mergetmp2 on the merge worklist.
124 so merge worklist = {mergetmp, c, mergetmp2}
126 Continue building binary ops of these operations until you have only
127 one operation left on the worklist.
132 mergetmp3 = mergetmp + c
134 worklist = {mergetmp2, mergetmp3}
136 mergetmp4 = mergetmp2 + mergetmp3
138 worklist = {mergetmp4}
140 because we have one operation left, we can now just set the original
141 statement equal to the result of that operation.
143 This will at least expose a + b and d + e to redundancy elimination
144 as binary operations.
146 For extra points, you can reuse the old statements to build the
147 mergetmps, since you shouldn't run out.
149 So why don't we do this?
151 Because it's expensive, and rarely will help. Most trees we are
152 reassociating have 3 or less ops. If they have 2 ops, they already
153 will be written into a nice single binary op. If you have 3 ops, a
154 single simple check suffices to tell you whether the first two are of the
155 same rank. If so, you know to order it
158 newstmt = mergetmp + op3
162 newstmt = mergetmp + op1
164 If all three are of the same rank, you can't expose them all in a
165 single binary operator anyway, so the above is *still* the best you
168 Thus, this is what we do. When we have three ops left, we check to see
169 what order to put them in, and call it a day. As a nod to vector sum
170 reduction, we check if any of the ops are really a phi node that is a
171 destructive update for the associating op, and keep the destructive
172 update together for vector sum reduction recognition. */
179 int constants_eliminated
;
182 int pows_encountered
;
186 /* Operator, rank pair. */
187 typedef struct operand_entry
195 static alloc_pool operand_entry_pool
;
197 /* This is used to assign a unique ID to each struct operand_entry
198 so that qsort results are identical on different hosts. */
199 static int next_operand_entry_id
;
201 /* Starting rank number for a given basic block, so that we can rank
202 operations using unmovable instructions in that BB based on the bb
204 static long *bb_rank
;
206 /* Operand->rank hashtable. */
207 static struct pointer_map_t
*operand_rank
;
210 static long get_rank (tree
);
213 /* Bias amount for loop-carried phis. We want this to be larger than
214 the depth of any reassociation tree we can see, but not larger than
215 the rank difference between two blocks. */
216 #define PHI_LOOP_BIAS (1 << 15)
218 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
219 an innermost loop, and the phi has only a single use which is inside
220 the loop, then the rank is the block rank of the loop latch plus an
221 extra bias for the loop-carried dependence. This causes expressions
222 calculated into an accumulator variable to be independent for each
223 iteration of the loop. If STMT is some other phi, the rank is the
224 block rank of its containing block. */
226 phi_rank (gimple stmt
)
228 basic_block bb
= gimple_bb (stmt
);
229 struct loop
*father
= bb
->loop_father
;
235 /* We only care about real loops (those with a latch). */
237 return bb_rank
[bb
->index
];
239 /* Interesting phis must be in headers of innermost loops. */
240 if (bb
!= father
->header
242 return bb_rank
[bb
->index
];
244 /* Ignore virtual SSA_NAMEs. */
245 res
= gimple_phi_result (stmt
);
246 if (virtual_operand_p (res
))
247 return bb_rank
[bb
->index
];
249 /* The phi definition must have a single use, and that use must be
250 within the loop. Otherwise this isn't an accumulator pattern. */
251 if (!single_imm_use (res
, &use
, &use_stmt
)
252 || gimple_bb (use_stmt
)->loop_father
!= father
)
253 return bb_rank
[bb
->index
];
255 /* Look for phi arguments from within the loop. If found, bias this phi. */
256 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
258 tree arg
= gimple_phi_arg_def (stmt
, i
);
259 if (TREE_CODE (arg
) == SSA_NAME
260 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
262 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
263 if (gimple_bb (def_stmt
)->loop_father
== father
)
264 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
268 /* Must be an uninteresting phi. */
269 return bb_rank
[bb
->index
];
272 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
273 loop-carried dependence of an innermost loop, return TRUE; else
276 loop_carried_phi (tree exp
)
281 if (TREE_CODE (exp
) != SSA_NAME
282 || SSA_NAME_IS_DEFAULT_DEF (exp
))
285 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
287 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
290 /* Non-loop-carried phis have block rank. Loop-carried phis have
291 an additional bias added in. If this phi doesn't have block rank,
292 it's biased and should not be propagated. */
293 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
295 if (phi_rank (phi_stmt
) != block_rank
)
301 /* Return the maximum of RANK and the rank that should be propagated
302 from expression OP. For most operands, this is just the rank of OP.
303 For loop-carried phis, the value is zero to avoid undoing the bias
304 in favor of the phi. */
306 propagate_rank (long rank
, tree op
)
310 if (loop_carried_phi (op
))
313 op_rank
= get_rank (op
);
315 return MAX (rank
, op_rank
);
318 /* Look up the operand rank structure for expression E. */
321 find_operand_rank (tree e
)
323 void **slot
= pointer_map_contains (operand_rank
, e
);
324 return slot
? (long) (intptr_t) *slot
: -1;
327 /* Insert {E,RANK} into the operand rank hashtable. */
330 insert_operand_rank (tree e
, long rank
)
333 gcc_assert (rank
> 0);
334 slot
= pointer_map_insert (operand_rank
, e
);
336 *slot
= (void *) (intptr_t) rank
;
339 /* Given an expression E, return the rank of the expression. */
344 /* Constants have rank 0. */
345 if (is_gimple_min_invariant (e
))
348 /* SSA_NAME's have the rank of the expression they are the result
350 For globals and uninitialized values, the rank is 0.
351 For function arguments, use the pre-setup rank.
352 For PHI nodes, stores, asm statements, etc, we use the rank of
354 For simple operations, the rank is the maximum rank of any of
355 its operands, or the bb_rank, whichever is less.
356 I make no claims that this is optimal, however, it gives good
359 /* We make an exception to the normal ranking system to break
360 dependences of accumulator variables in loops. Suppose we
361 have a simple one-block loop containing:
368 As shown, each iteration of the calculation into x is fully
369 dependent upon the iteration before it. We would prefer to
370 see this in the form:
377 If the loop is unrolled, the calculations of b and c from
378 different iterations can be interleaved.
380 To obtain this result during reassociation, we bias the rank
381 of the phi definition x_1 upward, when it is recognized as an
382 accumulator pattern. The artificial rank causes it to be
383 added last, providing the desired independence. */
385 if (TREE_CODE (e
) == SSA_NAME
)
392 if (SSA_NAME_IS_DEFAULT_DEF (e
))
393 return find_operand_rank (e
);
395 stmt
= SSA_NAME_DEF_STMT (e
);
396 if (gimple_code (stmt
) == GIMPLE_PHI
)
397 return phi_rank (stmt
);
399 if (!is_gimple_assign (stmt
)
400 || gimple_vdef (stmt
))
401 return bb_rank
[gimple_bb (stmt
)->index
];
403 /* If we already have a rank for this expression, use that. */
404 rank
= find_operand_rank (e
);
408 /* Otherwise, find the maximum rank for the operands. As an
409 exception, remove the bias from loop-carried phis when propagating
410 the rank so that dependent operations are not also biased. */
412 if (gimple_assign_single_p (stmt
))
414 tree rhs
= gimple_assign_rhs1 (stmt
);
415 n
= TREE_OPERAND_LENGTH (rhs
);
417 rank
= propagate_rank (rank
, rhs
);
420 for (i
= 0; i
< n
; i
++)
422 op
= TREE_OPERAND (rhs
, i
);
425 rank
= propagate_rank (rank
, op
);
431 n
= gimple_num_ops (stmt
);
432 for (i
= 1; i
< n
; i
++)
434 op
= gimple_op (stmt
, i
);
436 rank
= propagate_rank (rank
, op
);
440 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
442 fprintf (dump_file
, "Rank for ");
443 print_generic_expr (dump_file
, e
, 0);
444 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
447 /* Note the rank in the hashtable so we don't recompute it. */
448 insert_operand_rank (e
, (rank
+ 1));
452 /* Globals, etc, are rank 0 */
457 /* We want integer ones to end up last no matter what, since they are
458 the ones we can do the most with. */
459 #define INTEGER_CONST_TYPE 1 << 3
460 #define FLOAT_CONST_TYPE 1 << 2
461 #define OTHER_CONST_TYPE 1 << 1
463 /* Classify an invariant tree into integer, float, or other, so that
464 we can sort them to be near other constants of the same type. */
466 constant_type (tree t
)
468 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
469 return INTEGER_CONST_TYPE
;
470 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
471 return FLOAT_CONST_TYPE
;
473 return OTHER_CONST_TYPE
;
476 /* qsort comparison function to sort operand entries PA and PB by rank
477 so that the sorted array is ordered by rank in decreasing order. */
479 sort_by_operand_rank (const void *pa
, const void *pb
)
481 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
482 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
484 /* It's nicer for optimize_expression if constants that are likely
485 to fold when added/multiplied//whatever are put next to each
486 other. Since all constants have rank 0, order them by type. */
487 if (oeb
->rank
== 0 && oea
->rank
== 0)
489 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
490 return constant_type (oeb
->op
) - constant_type (oea
->op
);
492 /* To make sorting result stable, we use unique IDs to determine
494 return oeb
->id
- oea
->id
;
497 /* Lastly, make sure the versions that are the same go next to each
498 other. We use SSA_NAME_VERSION because it's stable. */
499 if ((oeb
->rank
- oea
->rank
== 0)
500 && TREE_CODE (oea
->op
) == SSA_NAME
501 && TREE_CODE (oeb
->op
) == SSA_NAME
)
503 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
504 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
506 return oeb
->id
- oea
->id
;
509 if (oeb
->rank
!= oea
->rank
)
510 return oeb
->rank
- oea
->rank
;
512 return oeb
->id
- oea
->id
;
515 /* Add an operand entry to *OPS for the tree operand OP. */
518 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
520 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
523 oe
->rank
= get_rank (op
);
524 oe
->id
= next_operand_entry_id
++;
529 /* Add an operand entry to *OPS for the tree operand OP with repeat
533 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
534 HOST_WIDE_INT repeat
)
536 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
539 oe
->rank
= get_rank (op
);
540 oe
->id
= next_operand_entry_id
++;
544 reassociate_stats
.pows_encountered
++;
547 /* Return true if STMT is reassociable operation containing a binary
548 operation with tree code CODE, and is inside LOOP. */
551 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
553 basic_block bb
= gimple_bb (stmt
);
555 if (gimple_bb (stmt
) == NULL
)
558 if (!flow_bb_inside_loop_p (loop
, bb
))
561 if (is_gimple_assign (stmt
)
562 && gimple_assign_rhs_code (stmt
) == code
563 && has_single_use (gimple_assign_lhs (stmt
)))
570 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
571 operand of the negate operation. Otherwise, return NULL. */
574 get_unary_op (tree name
, enum tree_code opcode
)
576 gimple stmt
= SSA_NAME_DEF_STMT (name
);
578 if (!is_gimple_assign (stmt
))
581 if (gimple_assign_rhs_code (stmt
) == opcode
)
582 return gimple_assign_rhs1 (stmt
);
586 /* If CURR and LAST are a pair of ops that OPCODE allows us to
587 eliminate through equivalences, do so, remove them from OPS, and
588 return true. Otherwise, return false. */
591 eliminate_duplicate_pair (enum tree_code opcode
,
592 vec
<operand_entry_t
> *ops
,
595 operand_entry_t curr
,
596 operand_entry_t last
)
599 /* If we have two of the same op, and the opcode is & |, min, or max,
600 we can eliminate one of them.
601 If we have two of the same op, and the opcode is ^, we can
602 eliminate both of them. */
604 if (last
&& last
->op
== curr
->op
)
612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
614 fprintf (dump_file
, "Equivalence: ");
615 print_generic_expr (dump_file
, curr
->op
, 0);
616 fprintf (dump_file
, " [&|minmax] ");
617 print_generic_expr (dump_file
, last
->op
, 0);
618 fprintf (dump_file
, " -> ");
619 print_generic_stmt (dump_file
, last
->op
, 0);
622 ops
->ordered_remove (i
);
623 reassociate_stats
.ops_eliminated
++;
628 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
630 fprintf (dump_file
, "Equivalence: ");
631 print_generic_expr (dump_file
, curr
->op
, 0);
632 fprintf (dump_file
, " ^ ");
633 print_generic_expr (dump_file
, last
->op
, 0);
634 fprintf (dump_file
, " -> nothing\n");
637 reassociate_stats
.ops_eliminated
+= 2;
639 if (ops
->length () == 2)
642 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
647 ops
->ordered_remove (i
-1);
648 ops
->ordered_remove (i
-1);
660 static vec
<tree
> plus_negates
;
662 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
663 expression, look in OPS for a corresponding positive operation to cancel
664 it out. If we find one, remove the other from OPS, replace
665 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
669 eliminate_plus_minus_pair (enum tree_code opcode
,
670 vec
<operand_entry_t
> *ops
,
671 unsigned int currindex
,
672 operand_entry_t curr
)
679 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
682 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
683 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
684 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
687 /* Any non-negated version will have a rank that is one less than
688 the current rank. So once we hit those ranks, if we don't find
691 for (i
= currindex
+ 1;
692 ops
->iterate (i
, &oe
)
693 && oe
->rank
>= curr
->rank
- 1 ;
696 if (oe
->op
== negateop
)
699 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
701 fprintf (dump_file
, "Equivalence: ");
702 print_generic_expr (dump_file
, negateop
, 0);
703 fprintf (dump_file
, " + -");
704 print_generic_expr (dump_file
, oe
->op
, 0);
705 fprintf (dump_file
, " -> 0\n");
708 ops
->ordered_remove (i
);
709 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
710 ops
->ordered_remove (currindex
);
711 reassociate_stats
.ops_eliminated
++;
715 else if (oe
->op
== notop
)
717 tree op_type
= TREE_TYPE (oe
->op
);
719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
721 fprintf (dump_file
, "Equivalence: ");
722 print_generic_expr (dump_file
, notop
, 0);
723 fprintf (dump_file
, " + ~");
724 print_generic_expr (dump_file
, oe
->op
, 0);
725 fprintf (dump_file
, " -> -1\n");
728 ops
->ordered_remove (i
);
729 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
730 ops
->ordered_remove (currindex
);
731 reassociate_stats
.ops_eliminated
++;
737 /* CURR->OP is a negate expr in a plus expr: save it for later
738 inspection in repropagate_negates(). */
739 if (negateop
!= NULL_TREE
)
740 plus_negates
.safe_push (curr
->op
);
745 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
746 bitwise not expression, look in OPS for a corresponding operand to
747 cancel it out. If we find one, remove the other from OPS, replace
748 OPS[CURRINDEX] with 0, and return true. Otherwise, return
752 eliminate_not_pairs (enum tree_code opcode
,
753 vec
<operand_entry_t
> *ops
,
754 unsigned int currindex
,
755 operand_entry_t curr
)
761 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
762 || TREE_CODE (curr
->op
) != SSA_NAME
)
765 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
766 if (notop
== NULL_TREE
)
769 /* Any non-not version will have a rank that is one less than
770 the current rank. So once we hit those ranks, if we don't find
773 for (i
= currindex
+ 1;
774 ops
->iterate (i
, &oe
)
775 && oe
->rank
>= curr
->rank
- 1;
780 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
782 fprintf (dump_file
, "Equivalence: ");
783 print_generic_expr (dump_file
, notop
, 0);
784 if (opcode
== BIT_AND_EXPR
)
785 fprintf (dump_file
, " & ~");
786 else if (opcode
== BIT_IOR_EXPR
)
787 fprintf (dump_file
, " | ~");
788 print_generic_expr (dump_file
, oe
->op
, 0);
789 if (opcode
== BIT_AND_EXPR
)
790 fprintf (dump_file
, " -> 0\n");
791 else if (opcode
== BIT_IOR_EXPR
)
792 fprintf (dump_file
, " -> -1\n");
795 if (opcode
== BIT_AND_EXPR
)
796 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
797 else if (opcode
== BIT_IOR_EXPR
)
798 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
799 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
801 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
803 ops
->quick_push (oe
);
811 /* Use constant value that may be present in OPS to try to eliminate
812 operands. Note that this function is only really used when we've
813 eliminated ops for other reasons, or merged constants. Across
814 single statements, fold already does all of this, plus more. There
815 is little point in duplicating logic, so I've only included the
816 identities that I could ever construct testcases to trigger. */
819 eliminate_using_constants (enum tree_code opcode
,
820 vec
<operand_entry_t
> *ops
)
822 operand_entry_t oelast
= ops
->last ();
823 tree type
= TREE_TYPE (oelast
->op
);
825 if (oelast
->rank
== 0
826 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
831 if (integer_zerop (oelast
->op
))
833 if (ops
->length () != 1)
835 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
836 fprintf (dump_file
, "Found & 0, removing all other ops\n");
838 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
841 ops
->quick_push (oelast
);
845 else if (integer_all_onesp (oelast
->op
))
847 if (ops
->length () != 1)
849 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
850 fprintf (dump_file
, "Found & -1, removing\n");
852 reassociate_stats
.ops_eliminated
++;
857 if (integer_all_onesp (oelast
->op
))
859 if (ops
->length () != 1)
861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
862 fprintf (dump_file
, "Found | -1, removing all other ops\n");
864 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
867 ops
->quick_push (oelast
);
871 else if (integer_zerop (oelast
->op
))
873 if (ops
->length () != 1)
875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
876 fprintf (dump_file
, "Found | 0, removing\n");
878 reassociate_stats
.ops_eliminated
++;
883 if (integer_zerop (oelast
->op
)
884 || (FLOAT_TYPE_P (type
)
885 && !HONOR_NANS (TYPE_MODE (type
))
886 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
887 && real_zerop (oelast
->op
)))
889 if (ops
->length () != 1)
891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
892 fprintf (dump_file
, "Found * 0, removing all other ops\n");
894 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
896 ops
->quick_push (oelast
);
900 else if (integer_onep (oelast
->op
)
901 || (FLOAT_TYPE_P (type
)
902 && !HONOR_SNANS (TYPE_MODE (type
))
903 && real_onep (oelast
->op
)))
905 if (ops
->length () != 1)
907 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
908 fprintf (dump_file
, "Found * 1, removing\n");
910 reassociate_stats
.ops_eliminated
++;
918 if (integer_zerop (oelast
->op
)
919 || (FLOAT_TYPE_P (type
)
920 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
921 && fold_real_zero_addition_p (type
, oelast
->op
,
922 opcode
== MINUS_EXPR
)))
924 if (ops
->length () != 1)
926 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
927 fprintf (dump_file
, "Found [|^+] 0, removing\n");
929 reassociate_stats
.ops_eliminated
++;
941 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
944 /* Structure for tracking and counting operands. */
945 typedef struct oecount_s
{
948 enum tree_code oecode
;
953 /* The heap for the oecount hashtable and the sorted list of operands. */
954 static vec
<oecount
> cvec
;
957 /* Oecount hashtable helpers. */
959 struct oecount_hasher
: typed_noop_remove
<void>
961 /* Note that this hash table stores integers, not pointers.
962 So, observe the casting in the member functions. */
963 typedef void value_type
;
964 typedef void compare_type
;
965 static inline hashval_t
hash (const value_type
*);
966 static inline bool equal (const value_type
*, const compare_type
*);
969 /* Hash function for oecount. */
972 oecount_hasher::hash (const value_type
*p
)
974 const oecount
*c
= &cvec
[(size_t)p
- 42];
975 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
978 /* Comparison function for oecount. */
981 oecount_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
983 const oecount
*c1
= &cvec
[(size_t)p1
- 42];
984 const oecount
*c2
= &cvec
[(size_t)p2
- 42];
985 return (c1
->oecode
== c2
->oecode
986 && c1
->op
== c2
->op
);
989 /* Comparison function for qsort sorting oecount elements by count. */
992 oecount_cmp (const void *p1
, const void *p2
)
994 const oecount
*c1
= (const oecount
*)p1
;
995 const oecount
*c2
= (const oecount
*)p2
;
996 if (c1
->cnt
!= c2
->cnt
)
997 return c1
->cnt
- c2
->cnt
;
999 /* If counts are identical, use unique IDs to stabilize qsort. */
1000 return c1
->id
- c2
->id
;
1003 /* Return TRUE iff STMT represents a builtin call that raises OP
1004 to some exponent. */
1007 stmt_is_power_of_op (gimple stmt
, tree op
)
1011 if (!is_gimple_call (stmt
))
1014 fndecl
= gimple_call_fndecl (stmt
);
1017 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1020 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1022 CASE_FLT_FN (BUILT_IN_POW
):
1023 CASE_FLT_FN (BUILT_IN_POWI
):
1024 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1031 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1032 in place and return the result. Assumes that stmt_is_power_of_op
1033 was previously called for STMT and returned TRUE. */
1035 static HOST_WIDE_INT
1036 decrement_power (gimple stmt
)
1038 REAL_VALUE_TYPE c
, cint
;
1039 HOST_WIDE_INT power
;
1042 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1044 CASE_FLT_FN (BUILT_IN_POW
):
1045 arg1
= gimple_call_arg (stmt
, 1);
1046 c
= TREE_REAL_CST (arg1
);
1047 power
= real_to_integer (&c
) - 1;
1048 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1049 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1052 CASE_FLT_FN (BUILT_IN_POWI
):
1053 arg1
= gimple_call_arg (stmt
, 1);
1054 power
= TREE_INT_CST_LOW (arg1
) - 1;
1055 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1063 /* Find the single immediate use of STMT's LHS, and replace it
1064 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1065 replace *DEF with OP as well. */
1068 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1073 gimple_stmt_iterator gsi
;
1075 if (is_gimple_call (stmt
))
1076 lhs
= gimple_call_lhs (stmt
);
1078 lhs
= gimple_assign_lhs (stmt
);
1080 gcc_assert (has_single_use (lhs
));
1081 single_imm_use (lhs
, &use
, &use_stmt
);
1085 if (TREE_CODE (op
) != SSA_NAME
)
1086 update_stmt (use_stmt
);
1087 gsi
= gsi_for_stmt (stmt
);
1088 unlink_stmt_vdef (stmt
);
1089 gsi_remove (&gsi
, true);
1090 release_defs (stmt
);
1093 /* Walks the linear chain with result *DEF searching for an operation
1094 with operand OP and code OPCODE removing that from the chain. *DEF
1095 is updated if there is only one operand but no operation left. */
1098 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1100 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1106 if (opcode
== MULT_EXPR
1107 && stmt_is_power_of_op (stmt
, op
))
1109 if (decrement_power (stmt
) == 1)
1110 propagate_op_to_single_use (op
, stmt
, def
);
1114 name
= gimple_assign_rhs1 (stmt
);
1116 /* If this is the operation we look for and one of the operands
1117 is ours simply propagate the other operand into the stmts
1119 if (gimple_assign_rhs_code (stmt
) == opcode
1121 || gimple_assign_rhs2 (stmt
) == op
))
1124 name
= gimple_assign_rhs2 (stmt
);
1125 propagate_op_to_single_use (name
, stmt
, def
);
1129 /* We might have a multiply of two __builtin_pow* calls, and
1130 the operand might be hiding in the rightmost one. */
1131 if (opcode
== MULT_EXPR
1132 && gimple_assign_rhs_code (stmt
) == opcode
1133 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1134 && has_single_use (gimple_assign_rhs2 (stmt
)))
1136 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1137 if (stmt_is_power_of_op (stmt2
, op
))
1139 if (decrement_power (stmt2
) == 1)
1140 propagate_op_to_single_use (op
, stmt2
, def
);
1145 /* Continue walking the chain. */
1146 gcc_assert (name
!= op
1147 && TREE_CODE (name
) == SSA_NAME
);
1148 stmt
= SSA_NAME_DEF_STMT (name
);
1153 /* Returns the UID of STMT if it is non-NULL. Otherwise return 1. */
1155 static inline unsigned
1156 get_stmt_uid_with_default (gimple stmt
)
1158 return stmt
? gimple_uid (stmt
) : 1;
1161 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1162 the result. Places the statement after the definition of either
1163 OP1 or OP2. Returns the new statement. */
1166 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1168 gimple op1def
= NULL
, op2def
= NULL
;
1169 gimple_stmt_iterator gsi
;
1173 /* Create the addition statement. */
1174 op
= make_ssa_name (type
, NULL
);
1175 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1177 /* Find an insertion place and insert. */
1178 if (TREE_CODE (op1
) == SSA_NAME
)
1179 op1def
= SSA_NAME_DEF_STMT (op1
);
1180 if (TREE_CODE (op2
) == SSA_NAME
)
1181 op2def
= SSA_NAME_DEF_STMT (op2
);
1182 if ((!op1def
|| gimple_nop_p (op1def
))
1183 && (!op2def
|| gimple_nop_p (op2def
)))
1185 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
1186 gimple_set_uid (sum
, get_stmt_uid_with_default (gsi_stmt (gsi
)));
1187 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1189 else if ((!op1def
|| gimple_nop_p (op1def
))
1190 || (op2def
&& !gimple_nop_p (op2def
)
1191 && stmt_dominates_stmt_p (op1def
, op2def
)))
1193 if (gimple_code (op2def
) == GIMPLE_PHI
)
1195 gsi
= gsi_after_labels (gimple_bb (op2def
));
1196 gimple_set_uid (sum
, get_stmt_uid_with_default (gsi_stmt (gsi
)));
1197 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1201 if (!stmt_ends_bb_p (op2def
))
1203 gsi
= gsi_for_stmt (op2def
);
1204 gimple_set_uid (sum
, gimple_uid (op2def
));
1205 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
1212 FOR_EACH_EDGE (e
, ei
, gimple_bb (op2def
)->succs
)
1213 if (e
->flags
& EDGE_FALLTHRU
)
1214 gsi_insert_on_edge_immediate (e
, sum
);
1220 if (gimple_code (op1def
) == GIMPLE_PHI
)
1222 gsi
= gsi_after_labels (gimple_bb (op1def
));
1223 gimple_set_uid (sum
, get_stmt_uid_with_default (gsi_stmt (gsi
)));
1224 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1228 if (!stmt_ends_bb_p (op1def
))
1230 gsi
= gsi_for_stmt (op1def
);
1231 gimple_set_uid (sum
, gimple_uid (op1def
));
1232 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
1239 FOR_EACH_EDGE (e
, ei
, gimple_bb (op1def
)->succs
)
1240 if (e
->flags
& EDGE_FALLTHRU
)
1241 gsi_insert_on_edge_immediate (e
, sum
);
1250 /* Perform un-distribution of divisions and multiplications.
1251 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1252 to (A + B) / X for real X.
1254 The algorithm is organized as follows.
1256 - First we walk the addition chain *OPS looking for summands that
1257 are defined by a multiplication or a real division. This results
1258 in the candidates bitmap with relevant indices into *OPS.
1260 - Second we build the chains of multiplications or divisions for
1261 these candidates, counting the number of occurrences of (operand, code)
1262 pairs in all of the candidates chains.
1264 - Third we sort the (operand, code) pairs by number of occurrence and
1265 process them starting with the pair with the most uses.
1267 * For each such pair we walk the candidates again to build a
1268 second candidate bitmap noting all multiplication/division chains
1269 that have at least one occurrence of (operand, code).
1271 * We build an alternate addition chain only covering these
1272 candidates with one (operand, code) operation removed from their
1273 multiplication/division chain.
1275 * The first candidate gets replaced by the alternate addition chain
1276 multiplied/divided by the operand.
1278 * All candidate chains get disabled for further processing and
1279 processing of (operand, code) pairs continues.
1281 The alternate addition chains built are re-processed by the main
1282 reassociation algorithm which allows optimizing a * x * y + b * y * x
1283 to (a + b ) * x * y in one invocation of the reassociation pass. */
1286 undistribute_ops_list (enum tree_code opcode
,
1287 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1289 unsigned int length
= ops
->length ();
1290 operand_entry_t oe1
;
1292 sbitmap candidates
, candidates2
;
1293 unsigned nr_candidates
, nr_candidates2
;
1294 sbitmap_iterator sbi0
;
1295 vec
<operand_entry_t
> *subops
;
1296 hash_table
<oecount_hasher
> ctable
;
1297 bool changed
= false;
1298 int next_oecount_id
= 0;
1301 || opcode
!= PLUS_EXPR
)
1304 /* Build a list of candidates to process. */
1305 candidates
= sbitmap_alloc (length
);
1306 bitmap_clear (candidates
);
1308 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1310 enum tree_code dcode
;
1313 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1315 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1316 if (!is_gimple_assign (oe1def
))
1318 dcode
= gimple_assign_rhs_code (oe1def
);
1319 if ((dcode
!= MULT_EXPR
1320 && dcode
!= RDIV_EXPR
)
1321 || !is_reassociable_op (oe1def
, dcode
, loop
))
1324 bitmap_set_bit (candidates
, i
);
1328 if (nr_candidates
< 2)
1330 sbitmap_free (candidates
);
1334 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1336 fprintf (dump_file
, "searching for un-distribute opportunities ");
1337 print_generic_expr (dump_file
,
1338 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1339 fprintf (dump_file
, " %d\n", nr_candidates
);
1342 /* Build linearized sub-operand lists and the counting table. */
1345 /* ??? Macro arguments cannot have multi-argument template types in
1346 them. This typedef is needed to workaround that limitation. */
1347 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1348 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1349 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1352 enum tree_code oecode
;
1355 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1356 oecode
= gimple_assign_rhs_code (oedef
);
1357 linearize_expr_tree (&subops
[i
], oedef
,
1358 associative_tree_code (oecode
), false);
1360 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1367 c
.id
= next_oecount_id
++;
1370 idx
= cvec
.length () + 41;
1371 slot
= ctable
.find_slot ((void *)idx
, INSERT
);
1374 *slot
= (void *)idx
;
1379 cvec
[(size_t)*slot
- 42].cnt
++;
1385 /* Sort the counting table. */
1386 cvec
.qsort (oecount_cmp
);
1388 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1391 fprintf (dump_file
, "Candidates:\n");
1392 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1394 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1395 c
->oecode
== MULT_EXPR
1396 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1397 print_generic_expr (dump_file
, c
->op
, 0);
1398 fprintf (dump_file
, "\n");
1402 /* Process the (operand, code) pairs in order of most occurrence. */
1403 candidates2
= sbitmap_alloc (length
);
1404 while (!cvec
.is_empty ())
1406 oecount
*c
= &cvec
.last ();
1410 /* Now collect the operands in the outer chain that contain
1411 the common operand in their inner chain. */
1412 bitmap_clear (candidates2
);
1414 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1417 enum tree_code oecode
;
1419 tree op
= (*ops
)[i
]->op
;
1421 /* If we undistributed in this chain already this may be
1423 if (TREE_CODE (op
) != SSA_NAME
)
1426 oedef
= SSA_NAME_DEF_STMT (op
);
1427 oecode
= gimple_assign_rhs_code (oedef
);
1428 if (oecode
!= c
->oecode
)
1431 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1433 if (oe1
->op
== c
->op
)
1435 bitmap_set_bit (candidates2
, i
);
1442 if (nr_candidates2
>= 2)
1444 operand_entry_t oe1
, oe2
;
1446 int first
= bitmap_first_set_bit (candidates2
);
1448 /* Build the new addition chain. */
1449 oe1
= (*ops
)[first
];
1450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1452 fprintf (dump_file
, "Building (");
1453 print_generic_expr (dump_file
, oe1
->op
, 0);
1455 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1456 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1462 fprintf (dump_file
, " + ");
1463 print_generic_expr (dump_file
, oe2
->op
, 0);
1465 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1466 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1467 oe1
->op
, oe2
->op
, opcode
);
1468 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1470 oe1
->op
= gimple_get_lhs (sum
);
1473 /* Apply the multiplication/division. */
1474 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1475 oe1
->op
, c
->op
, c
->oecode
);
1476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1478 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1479 print_generic_expr (dump_file
, c
->op
, 0);
1480 fprintf (dump_file
, "\n");
1483 /* Record it in the addition chain and disable further
1484 undistribution with this op. */
1485 oe1
->op
= gimple_assign_lhs (prod
);
1486 oe1
->rank
= get_rank (oe1
->op
);
1487 subops
[first
].release ();
1495 for (i
= 0; i
< ops
->length (); ++i
)
1496 subops
[i
].release ();
1499 sbitmap_free (candidates
);
1500 sbitmap_free (candidates2
);
1505 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1506 expression, examine the other OPS to see if any of them are comparisons
1507 of the same values, which we may be able to combine or eliminate.
1508 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1511 eliminate_redundant_comparison (enum tree_code opcode
,
1512 vec
<operand_entry_t
> *ops
,
1513 unsigned int currindex
,
1514 operand_entry_t curr
)
1517 enum tree_code lcode
, rcode
;
1522 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1525 /* Check that CURR is a comparison. */
1526 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1528 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1529 if (!is_gimple_assign (def1
))
1531 lcode
= gimple_assign_rhs_code (def1
);
1532 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1534 op1
= gimple_assign_rhs1 (def1
);
1535 op2
= gimple_assign_rhs2 (def1
);
1537 /* Now look for a similar comparison in the remaining OPS. */
1538 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1542 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1544 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1545 if (!is_gimple_assign (def2
))
1547 rcode
= gimple_assign_rhs_code (def2
);
1548 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1551 /* If we got here, we have a match. See if we can combine the
1553 if (opcode
== BIT_IOR_EXPR
)
1554 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1555 rcode
, gimple_assign_rhs1 (def2
),
1556 gimple_assign_rhs2 (def2
));
1558 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1559 rcode
, gimple_assign_rhs1 (def2
),
1560 gimple_assign_rhs2 (def2
));
1564 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1565 always give us a boolean_type_node value back. If the original
1566 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1567 we need to convert. */
1568 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1569 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1571 if (TREE_CODE (t
) != INTEGER_CST
1572 && !operand_equal_p (t
, curr
->op
, 0))
1574 enum tree_code subcode
;
1575 tree newop1
, newop2
;
1576 if (!COMPARISON_CLASS_P (t
))
1578 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1579 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1580 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1581 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1585 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1587 fprintf (dump_file
, "Equivalence: ");
1588 print_generic_expr (dump_file
, curr
->op
, 0);
1589 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1590 print_generic_expr (dump_file
, oe
->op
, 0);
1591 fprintf (dump_file
, " -> ");
1592 print_generic_expr (dump_file
, t
, 0);
1593 fprintf (dump_file
, "\n");
1596 /* Now we can delete oe, as it has been subsumed by the new combined
1598 ops
->ordered_remove (i
);
1599 reassociate_stats
.ops_eliminated
++;
1601 /* If t is the same as curr->op, we're done. Otherwise we must
1602 replace curr->op with t. Special case is if we got a constant
1603 back, in which case we add it to the end instead of in place of
1604 the current entry. */
1605 if (TREE_CODE (t
) == INTEGER_CST
)
1607 ops
->ordered_remove (currindex
);
1608 add_to_ops_vec (ops
, t
);
1610 else if (!operand_equal_p (t
, curr
->op
, 0))
1613 enum tree_code subcode
;
1616 gcc_assert (COMPARISON_CLASS_P (t
));
1617 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1618 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1619 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1620 gcc_checking_assert (is_gimple_val (newop1
)
1621 && is_gimple_val (newop2
));
1622 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1623 curr
->op
= gimple_get_lhs (sum
);
1631 /* Perform various identities and other optimizations on the list of
1632 operand entries, stored in OPS. The tree code for the binary
1633 operation between all the operands is OPCODE. */
1636 optimize_ops_list (enum tree_code opcode
,
1637 vec
<operand_entry_t
> *ops
)
1639 unsigned int length
= ops
->length ();
1642 operand_entry_t oelast
= NULL
;
1643 bool iterate
= false;
1648 oelast
= ops
->last ();
1650 /* If the last two are constants, pop the constants off, merge them
1651 and try the next two. */
1652 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1654 operand_entry_t oelm1
= (*ops
)[length
- 2];
1656 if (oelm1
->rank
== 0
1657 && is_gimple_min_invariant (oelm1
->op
)
1658 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1659 TREE_TYPE (oelast
->op
)))
1661 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1662 oelm1
->op
, oelast
->op
);
1664 if (folded
&& is_gimple_min_invariant (folded
))
1666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1667 fprintf (dump_file
, "Merging constants\n");
1672 add_to_ops_vec (ops
, folded
);
1673 reassociate_stats
.constants_eliminated
++;
1675 optimize_ops_list (opcode
, ops
);
1681 eliminate_using_constants (opcode
, ops
);
1684 for (i
= 0; ops
->iterate (i
, &oe
);)
1688 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1690 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1691 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1692 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1704 length
= ops
->length ();
1705 oelast
= ops
->last ();
1708 optimize_ops_list (opcode
, ops
);
1711 /* The following functions are subroutines to optimize_range_tests and allow
1712 it to try to change a logical combination of comparisons into a range
1716 X == 2 || X == 5 || X == 3 || X == 4
1720 (unsigned) (X - 2) <= 3
1722 For more information see comments above fold_test_range in fold-const.c,
1723 this implementation is for GIMPLE. */
1731 bool strict_overflow_p
;
1732 unsigned int idx
, next
;
1735 /* This is similar to make_range in fold-const.c, but on top of
1736 GIMPLE instead of trees. If EXP is non-NULL, it should be
1737 an SSA_NAME and STMT argument is ignored, otherwise STMT
1738 argument should be a GIMPLE_COND. */
1741 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1745 bool is_bool
, strict_overflow_p
;
1749 r
->strict_overflow_p
= false;
1751 r
->high
= NULL_TREE
;
1752 if (exp
!= NULL_TREE
1753 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1756 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1757 and see if we can refine the range. Some of the cases below may not
1758 happen, but it doesn't seem worth worrying about this. We "continue"
1759 the outer loop when we've changed something; otherwise we "break"
1760 the switch, which will "break" the while. */
1761 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1764 strict_overflow_p
= false;
1766 if (exp
== NULL_TREE
)
1768 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1770 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1775 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1780 enum tree_code code
;
1781 tree arg0
, arg1
, exp_type
;
1785 if (exp
!= NULL_TREE
)
1787 if (TREE_CODE (exp
) != SSA_NAME
)
1790 stmt
= SSA_NAME_DEF_STMT (exp
);
1791 if (!is_gimple_assign (stmt
))
1794 code
= gimple_assign_rhs_code (stmt
);
1795 arg0
= gimple_assign_rhs1 (stmt
);
1796 arg1
= gimple_assign_rhs2 (stmt
);
1797 exp_type
= TREE_TYPE (exp
);
1801 code
= gimple_cond_code (stmt
);
1802 arg0
= gimple_cond_lhs (stmt
);
1803 arg1
= gimple_cond_rhs (stmt
);
1804 exp_type
= boolean_type_node
;
1807 if (TREE_CODE (arg0
) != SSA_NAME
)
1809 loc
= gimple_location (stmt
);
1813 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1814 /* Ensure the range is either +[-,0], +[0,0],
1815 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1816 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1817 or similar expression of unconditional true or
1818 false, it should not be negated. */
1819 && ((high
&& integer_zerop (high
))
1820 || (low
&& integer_onep (low
))))
1833 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1835 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1840 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1855 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1857 &strict_overflow_p
);
1858 if (nexp
!= NULL_TREE
)
1861 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1874 r
->strict_overflow_p
= strict_overflow_p
;
1878 /* Comparison function for qsort. Sort entries
1879 without SSA_NAME exp first, then with SSA_NAMEs sorted
1880 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1881 by increasing ->low and if ->low is the same, by increasing
1882 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1886 range_entry_cmp (const void *a
, const void *b
)
1888 const struct range_entry
*p
= (const struct range_entry
*) a
;
1889 const struct range_entry
*q
= (const struct range_entry
*) b
;
1891 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1893 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1895 /* Group range_entries for the same SSA_NAME together. */
1896 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1898 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1900 /* If ->low is different, NULL low goes first, then by
1902 if (p
->low
!= NULL_TREE
)
1904 if (q
->low
!= NULL_TREE
)
1906 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1908 if (tem
&& integer_onep (tem
))
1910 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1912 if (tem
&& integer_onep (tem
))
1918 else if (q
->low
!= NULL_TREE
)
1920 /* If ->high is different, NULL high goes last, before that by
1922 if (p
->high
!= NULL_TREE
)
1924 if (q
->high
!= NULL_TREE
)
1926 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1928 if (tem
&& integer_onep (tem
))
1930 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1932 if (tem
&& integer_onep (tem
))
1938 else if (p
->high
!= NULL_TREE
)
1940 /* If both ranges are the same, sort below by ascending idx. */
1945 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1948 if (p
->idx
< q
->idx
)
1952 gcc_checking_assert (p
->idx
> q
->idx
);
1957 /* Helper routine of optimize_range_test.
1958 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1959 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1960 OPCODE and OPS are arguments of optimize_range_tests. Return
1961 true if the range merge has been successful.
1962 If OPCODE is ERROR_MARK, this is called from within
1963 maybe_optimize_range_tests and is performing inter-bb range optimization.
1964 Changes should be then performed right away, and whether an op is
1965 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1968 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
1969 unsigned int count
, enum tree_code opcode
,
1970 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
1971 tree low
, tree high
, bool strict_overflow_p
)
1973 operand_entry_t oe
= (*ops
)[range
->idx
];
1975 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) : last_stmt (BASIC_BLOCK (oe
->id
));
1976 location_t loc
= gimple_location (stmt
);
1977 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
1978 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
1979 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
1980 gimple_stmt_iterator gsi
;
1982 if (tem
== NULL_TREE
)
1985 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
1986 warning_at (loc
, OPT_Wstrict_overflow
,
1987 "assuming signed overflow does not occur "
1988 "when simplifying range test");
1990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1992 struct range_entry
*r
;
1993 fprintf (dump_file
, "Optimizing range tests ");
1994 print_generic_expr (dump_file
, range
->exp
, 0);
1995 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
1996 print_generic_expr (dump_file
, range
->low
, 0);
1997 fprintf (dump_file
, ", ");
1998 print_generic_expr (dump_file
, range
->high
, 0);
1999 fprintf (dump_file
, "]");
2000 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
2002 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2003 print_generic_expr (dump_file
, r
->low
, 0);
2004 fprintf (dump_file
, ", ");
2005 print_generic_expr (dump_file
, r
->high
, 0);
2006 fprintf (dump_file
, "]");
2008 fprintf (dump_file
, "\n into ");
2009 print_generic_expr (dump_file
, tem
, 0);
2010 fprintf (dump_file
, "\n");
2013 if (opcode
== BIT_IOR_EXPR
2014 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2015 tem
= invert_truthvalue_loc (loc
, tem
);
2017 tem
= fold_convert_loc (loc
, optype
, tem
);
2018 gsi
= gsi_for_stmt (stmt
);
2019 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2022 /* If doing inter-bb range test optimization, update the
2023 stmts immediately. Start with changing the first range test
2024 immediate use to the new value (TEM), or, if the first range
2025 test is a GIMPLE_COND stmt, change that condition. */
2026 if (opcode
== ERROR_MARK
)
2030 imm_use_iterator iter
;
2031 use_operand_p use_p
;
2034 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, op
)
2036 if (is_gimple_debug (use_stmt
))
2038 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2039 SET_USE (use_p
, tem
);
2040 update_stmt (use_stmt
);
2045 gimple_cond_set_code (stmt
, NE_EXPR
);
2046 gimple_cond_set_lhs (stmt
, tem
);
2047 gimple_cond_set_rhs (stmt
, boolean_false_node
);
2056 range
->strict_overflow_p
= false;
2058 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2060 oe
= (*ops
)[range
->idx
];
2061 /* Now change all the other range test immediate uses, so that
2062 those tests will be optimized away. */
2063 if (opcode
== ERROR_MARK
)
2067 imm_use_iterator iter
;
2068 use_operand_p use_p
;
2071 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, oe
->op
)
2073 if (is_gimple_debug (use_stmt
))
2075 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2076 adjust it into _7 = _9;. */
2077 if (is_gimple_assign (use_stmt
)
2078 && gimple_assign_rhs_code (use_stmt
) == oe
->rank
)
2080 tree expr
= NULL_TREE
;
2081 if (oe
->op
== gimple_assign_rhs1 (use_stmt
))
2082 expr
= gimple_assign_rhs2 (use_stmt
);
2083 else if (oe
->op
== gimple_assign_rhs2 (use_stmt
))
2084 expr
= gimple_assign_rhs1 (use_stmt
);
2087 && TREE_CODE (expr
) == SSA_NAME
)
2089 gimple_stmt_iterator gsi2
= gsi_for_stmt (use_stmt
);
2090 gimple_assign_set_rhs_with_ops (&gsi2
, SSA_NAME
,
2092 update_stmt (use_stmt
);
2096 /* If imm use of _8 is a statement like _7 = (int) _8;,
2097 adjust it into _7 = 0; or _7 = 1;. */
2098 if (gimple_assign_cast_p (use_stmt
)
2099 && oe
->op
== gimple_assign_rhs1 (use_stmt
))
2101 tree lhs
= gimple_assign_lhs (use_stmt
);
2102 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
2104 gimple_stmt_iterator gsi2
2105 = gsi_for_stmt (use_stmt
);
2106 tree expr
= build_int_cst (TREE_TYPE (lhs
),
2107 oe
->rank
== BIT_IOR_EXPR
2109 gimple_assign_set_rhs_with_ops (&gsi2
,
2112 update_stmt (use_stmt
);
2116 /* Otherwise replace the use with 0 or 1. */
2117 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2119 build_int_cst (TREE_TYPE (oe
->op
),
2120 oe
->rank
== BIT_IOR_EXPR
2122 update_stmt (use_stmt
);
2127 /* If range test was a GIMPLE_COND, simply change it
2128 into an always false or always true condition. */
2129 stmt
= last_stmt (BASIC_BLOCK (oe
->id
));
2130 if (oe
->rank
== BIT_IOR_EXPR
)
2131 gimple_cond_make_false (stmt
);
2133 gimple_cond_make_true (stmt
);
2137 oe
->op
= error_mark_node
;
2138 range
->exp
= NULL_TREE
;
2143 /* Optimize X == CST1 || X == CST2
2144 if popcount (CST1 ^ CST2) == 1 into
2145 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2146 Similarly for ranges. E.g.
2147 X != 2 && X != 3 && X != 10 && X != 11
2148 will be transformed by the previous optimization into
2149 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2150 and this loop can transform that into
2151 !(((X & ~8) - 2U) <= 1U). */
2154 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2155 tree lowi
, tree lowj
, tree highi
, tree highj
,
2156 vec
<operand_entry_t
> *ops
,
2157 struct range_entry
*rangei
,
2158 struct range_entry
*rangej
)
2160 tree lowxor
, highxor
, tem
, exp
;
2161 /* Check highi ^ lowi == highj ^ lowj and
2162 popcount (highi ^ lowi) == 1. */
2163 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2164 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2166 if (tree_log2 (lowxor
) < 0)
2168 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2169 if (!tree_int_cst_equal (lowxor
, highxor
))
2172 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2173 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2174 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2175 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2176 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, exp
,
2177 rangei
->in_p
, lowj
, highj
,
2178 rangei
->strict_overflow_p
2179 || rangej
->strict_overflow_p
))
2184 /* Optimize X == CST1 || X == CST2
2185 if popcount (CST2 - CST1) == 1 into
2186 ((X - CST1) & ~(CST2 - CST1)) == 0.
2187 Similarly for ranges. E.g.
2188 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2189 || X == 75 || X == 45
2190 will be transformed by the previous optimization into
2191 (X - 43U) <= 3U || (X - 75U) <= 3U
2192 and this loop can transform that into
2193 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2195 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2196 tree lowi
, tree lowj
, tree highi
, tree highj
,
2197 vec
<operand_entry_t
> *ops
,
2198 struct range_entry
*rangei
,
2199 struct range_entry
*rangej
)
2201 tree tem1
, tem2
, mask
;
2202 /* Check highi - lowi == highj - lowj. */
2203 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2204 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2206 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2207 if (!tree_int_cst_equal (tem1
, tem2
))
2209 /* Check popcount (lowj - lowi) == 1. */
2210 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2211 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2213 if (tree_log2 (tem1
) < 0)
2216 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2217 tem1
= fold_binary (MINUS_EXPR
, type
, rangei
->exp
, lowi
);
2218 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2219 lowj
= build_int_cst (type
, 0);
2220 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, tem1
,
2221 rangei
->in_p
, lowj
, tem2
,
2222 rangei
->strict_overflow_p
2223 || rangej
->strict_overflow_p
))
2228 /* It does some common checks for function optimize_range_tests_xor and
2229 optimize_range_tests_diff.
2230 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2231 Else it calls optimize_range_tests_diff. */
2234 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2235 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2236 struct range_entry
*ranges
)
2239 bool any_changes
= false;
2240 for (i
= first
; i
< length
; i
++)
2242 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2244 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2246 type
= TREE_TYPE (ranges
[i
].exp
);
2247 if (!INTEGRAL_TYPE_P (type
))
2249 lowi
= ranges
[i
].low
;
2250 if (lowi
== NULL_TREE
)
2251 lowi
= TYPE_MIN_VALUE (type
);
2252 highi
= ranges
[i
].high
;
2253 if (highi
== NULL_TREE
)
2255 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2258 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2260 lowj
= ranges
[j
].low
;
2261 if (lowj
== NULL_TREE
)
2263 highj
= ranges
[j
].high
;
2264 if (highj
== NULL_TREE
)
2265 highj
= TYPE_MAX_VALUE (type
);
2266 /* Check lowj > highi. */
2267 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2269 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2272 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2274 ranges
+ i
, ranges
+ j
);
2276 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2278 ranges
+ i
, ranges
+ j
);
2289 /* Optimize range tests, similarly how fold_range_test optimizes
2290 it on trees. The tree code for the binary
2291 operation between all the operands is OPCODE.
2292 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2293 maybe_optimize_range_tests for inter-bb range optimization.
2294 In that case if oe->op is NULL, oe->id is bb->index whose
2295 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2296 the actual opcode. */
2299 optimize_range_tests (enum tree_code opcode
,
2300 vec
<operand_entry_t
> *ops
)
2302 unsigned int length
= ops
->length (), i
, j
, first
;
2304 struct range_entry
*ranges
;
2305 bool any_changes
= false;
2310 ranges
= XNEWVEC (struct range_entry
, length
);
2311 for (i
= 0; i
< length
; i
++)
2315 init_range_entry (ranges
+ i
, oe
->op
,
2316 oe
->op
? NULL
: last_stmt (BASIC_BLOCK (oe
->id
)));
2317 /* For | invert it now, we will invert it again before emitting
2318 the optimized expression. */
2319 if (opcode
== BIT_IOR_EXPR
2320 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2321 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2324 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2325 for (i
= 0; i
< length
; i
++)
2326 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2329 /* Try to merge ranges. */
2330 for (first
= i
; i
< length
; i
++)
2332 tree low
= ranges
[i
].low
;
2333 tree high
= ranges
[i
].high
;
2334 int in_p
= ranges
[i
].in_p
;
2335 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2336 int update_fail_count
= 0;
2338 for (j
= i
+ 1; j
< length
; j
++)
2340 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2342 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2343 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2345 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2351 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2352 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2358 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2359 while update_range_test would fail. */
2360 else if (update_fail_count
== 64)
2363 ++update_fail_count
;
2366 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2369 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2370 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2373 if (any_changes
&& opcode
!= ERROR_MARK
)
2376 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2378 if (oe
->op
== error_mark_node
)
2387 XDELETEVEC (ranges
);
2390 /* Return true if STMT is a cast like:
2396 # _345 = PHI <_123(N), 1(...), 1(...)>
2397 where _234 has bool type, _123 has single use and
2398 bb N has a single successor M. This is commonly used in
2399 the last block of a range test. */
2402 final_range_test_p (gimple stmt
)
2404 basic_block bb
, rhs_bb
;
2407 use_operand_p use_p
;
2410 if (!gimple_assign_cast_p (stmt
))
2412 bb
= gimple_bb (stmt
);
2413 if (!single_succ_p (bb
))
2415 e
= single_succ_edge (bb
);
2416 if (e
->flags
& EDGE_COMPLEX
)
2419 lhs
= gimple_assign_lhs (stmt
);
2420 rhs
= gimple_assign_rhs1 (stmt
);
2421 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2422 || TREE_CODE (rhs
) != SSA_NAME
2423 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2426 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2427 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2430 if (gimple_code (use_stmt
) != GIMPLE_PHI
2431 || gimple_bb (use_stmt
) != e
->dest
)
2434 /* And that the rhs is defined in the same loop. */
2435 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2437 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2443 /* Return true if BB is suitable basic block for inter-bb range test
2444 optimization. If BACKWARD is true, BB should be the only predecessor
2445 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2446 or compared with to find a common basic block to which all conditions
2447 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2448 be the only predecessor of BB. */
2451 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2454 edge_iterator ei
, ei2
;
2457 gimple_stmt_iterator gsi
;
2458 bool other_edge_seen
= false;
2463 /* Check last stmt first. */
2464 stmt
= last_stmt (bb
);
2466 || (gimple_code (stmt
) != GIMPLE_COND
2467 && (backward
|| !final_range_test_p (stmt
)))
2468 || gimple_visited_p (stmt
)
2469 || stmt_could_throw_p (stmt
)
2472 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2475 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2476 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2477 to *OTHER_BB (if not set yet, try to find it out). */
2478 if (EDGE_COUNT (bb
->succs
) != 2)
2480 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2482 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2484 if (e
->dest
== test_bb
)
2493 if (*other_bb
== NULL
)
2495 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2496 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2498 else if (e
->dest
== e2
->dest
)
2499 *other_bb
= e
->dest
;
2500 if (*other_bb
== NULL
)
2503 if (e
->dest
== *other_bb
)
2504 other_edge_seen
= true;
2508 if (*other_bb
== NULL
|| !other_edge_seen
)
2511 else if (single_succ (bb
) != *other_bb
)
2514 /* Now check all PHIs of *OTHER_BB. */
2515 e
= find_edge (bb
, *other_bb
);
2516 e2
= find_edge (test_bb
, *other_bb
);
2517 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2519 gimple phi
= gsi_stmt (gsi
);
2520 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2521 corresponding to BB and TEST_BB predecessor must be the same. */
2522 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2523 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2525 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2526 one of the PHIs should have the lhs of the last stmt in
2527 that block as PHI arg and that PHI should have 0 or 1
2528 corresponding to it in all other range test basic blocks
2532 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2533 == gimple_assign_lhs (stmt
)
2534 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2535 || integer_onep (gimple_phi_arg_def (phi
,
2541 gimple test_last
= last_stmt (test_bb
);
2542 if (gimple_code (test_last
) != GIMPLE_COND
2543 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2544 == gimple_assign_lhs (test_last
)
2545 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2546 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2556 /* Return true if BB doesn't have side-effects that would disallow
2557 range test optimization, all SSA_NAMEs set in the bb are consumed
2558 in the bb and there are no PHIs. */
2561 no_side_effect_bb (basic_block bb
)
2563 gimple_stmt_iterator gsi
;
2566 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2568 last
= last_stmt (bb
);
2569 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2571 gimple stmt
= gsi_stmt (gsi
);
2573 imm_use_iterator imm_iter
;
2574 use_operand_p use_p
;
2576 if (is_gimple_debug (stmt
))
2578 if (gimple_has_side_effects (stmt
))
2582 if (!is_gimple_assign (stmt
))
2584 lhs
= gimple_assign_lhs (stmt
);
2585 if (TREE_CODE (lhs
) != SSA_NAME
)
2587 if (gimple_assign_rhs_could_trap_p (stmt
))
2589 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2591 gimple use_stmt
= USE_STMT (use_p
);
2592 if (is_gimple_debug (use_stmt
))
2594 if (gimple_bb (use_stmt
) != bb
)
2601 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2602 return true and fill in *OPS recursively. */
2605 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2608 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2612 if (!is_reassociable_op (stmt
, code
, loop
))
2615 rhs
[0] = gimple_assign_rhs1 (stmt
);
2616 rhs
[1] = gimple_assign_rhs2 (stmt
);
2617 gimple_set_visited (stmt
, true);
2618 for (i
= 0; i
< 2; i
++)
2619 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2620 && !get_ops (rhs
[i
], code
, ops
, loop
)
2621 && has_single_use (rhs
[i
]))
2623 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2629 ops
->safe_push (oe
);
2634 /* Inter-bb range test optimization. */
2637 maybe_optimize_range_tests (gimple stmt
)
2639 basic_block first_bb
= gimple_bb (stmt
);
2640 basic_block last_bb
= first_bb
;
2641 basic_block other_bb
= NULL
;
2645 vec
<operand_entry_t
> ops
= vNULL
;
2647 /* Consider only basic blocks that end with GIMPLE_COND or
2648 a cast statement satisfying final_range_test_p. All
2649 but the last bb in the first_bb .. last_bb range
2650 should end with GIMPLE_COND. */
2651 if (gimple_code (stmt
) == GIMPLE_COND
)
2653 if (EDGE_COUNT (first_bb
->succs
) != 2)
2656 else if (final_range_test_p (stmt
))
2657 other_bb
= single_succ (first_bb
);
2661 if (stmt_could_throw_p (stmt
))
2664 /* As relative ordering of post-dominator sons isn't fixed,
2665 maybe_optimize_range_tests can be called first on any
2666 bb in the range we want to optimize. So, start searching
2667 backwards, if first_bb can be set to a predecessor. */
2668 while (single_pred_p (first_bb
))
2670 basic_block pred_bb
= single_pred (first_bb
);
2671 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2673 if (!no_side_effect_bb (first_bb
))
2677 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2678 Before starting forward search in last_bb successors, find
2679 out the other_bb. */
2680 if (first_bb
== last_bb
)
2683 /* As non-GIMPLE_COND last stmt always terminates the range,
2684 if forward search didn't discover anything, just give up. */
2685 if (gimple_code (stmt
) != GIMPLE_COND
)
2687 /* Look at both successors. Either it ends with a GIMPLE_COND
2688 and satisfies suitable_cond_bb, or ends with a cast and
2689 other_bb is that cast's successor. */
2690 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2691 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2692 || e
->dest
== first_bb
)
2694 else if (single_pred_p (e
->dest
))
2696 stmt
= last_stmt (e
->dest
);
2698 && gimple_code (stmt
) == GIMPLE_COND
2699 && EDGE_COUNT (e
->dest
->succs
) == 2)
2701 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2707 && final_range_test_p (stmt
)
2708 && find_edge (first_bb
, single_succ (e
->dest
)))
2710 other_bb
= single_succ (e
->dest
);
2711 if (other_bb
== first_bb
)
2715 if (other_bb
== NULL
)
2718 /* Now do the forward search, moving last_bb to successor bbs
2719 that aren't other_bb. */
2720 while (EDGE_COUNT (last_bb
->succs
) == 2)
2722 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2723 if (e
->dest
!= other_bb
)
2727 if (!single_pred_p (e
->dest
))
2729 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2731 if (!no_side_effect_bb (e
->dest
))
2735 if (first_bb
== last_bb
)
2737 /* Here basic blocks first_bb through last_bb's predecessor
2738 end with GIMPLE_COND, all of them have one of the edges to
2739 other_bb and another to another block in the range,
2740 all blocks except first_bb don't have side-effects and
2741 last_bb ends with either GIMPLE_COND, or cast satisfying
2742 final_range_test_p. */
2743 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2745 enum tree_code code
;
2748 e
= find_edge (bb
, other_bb
);
2749 stmt
= last_stmt (bb
);
2750 gimple_set_visited (stmt
, true);
2751 if (gimple_code (stmt
) != GIMPLE_COND
)
2753 use_operand_p use_p
;
2758 lhs
= gimple_assign_lhs (stmt
);
2759 rhs
= gimple_assign_rhs1 (stmt
);
2760 gcc_assert (bb
== last_bb
);
2767 # _345 = PHI <_123(N), 1(...), 1(...)>
2769 or 0 instead of 1. If it is 0, the _234
2770 range test is anded together with all the
2771 other range tests, if it is 1, it is ored with
2773 single_imm_use (lhs
, &use_p
, &phi
);
2774 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2775 e2
= find_edge (first_bb
, other_bb
);
2777 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2778 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2779 code
= BIT_AND_EXPR
;
2782 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2783 code
= BIT_IOR_EXPR
;
2786 /* If _234 SSA_NAME_DEF_STMT is
2788 (or &, corresponding to 1/0 in the phi arguments,
2789 push into ops the individual range test arguments
2790 of the bitwise or resp. and, recursively. */
2791 if (!get_ops (rhs
, code
, &ops
,
2792 loop_containing_stmt (stmt
))
2793 && has_single_use (rhs
))
2795 /* Otherwise, push the _234 range test itself. */
2797 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2807 /* Otherwise stmt is GIMPLE_COND. */
2808 code
= gimple_cond_code (stmt
);
2809 lhs
= gimple_cond_lhs (stmt
);
2810 rhs
= gimple_cond_rhs (stmt
);
2811 if (TREE_CODE (lhs
) == SSA_NAME
2812 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2813 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2814 || rhs
!= boolean_false_node
2815 /* Either push into ops the individual bitwise
2816 or resp. and operands, depending on which
2817 edge is other_bb. */
2818 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2819 ^ (code
== EQ_EXPR
))
2820 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2821 loop_containing_stmt (stmt
))))
2823 /* Or push the GIMPLE_COND stmt itself. */
2825 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2828 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2829 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2830 /* oe->op = NULL signs that there is no SSA_NAME
2831 for the range test, and oe->id instead is the
2832 basic block number, at which's end the GIMPLE_COND
2841 if (ops
.length () > 1)
2842 optimize_range_tests (ERROR_MARK
, &ops
);
2846 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2847 of STMT in it's operands. This is also known as a "destructive
2848 update" operation. */
2851 is_phi_for_stmt (gimple stmt
, tree operand
)
2855 use_operand_p arg_p
;
2858 if (TREE_CODE (operand
) != SSA_NAME
)
2861 lhs
= gimple_assign_lhs (stmt
);
2863 def_stmt
= SSA_NAME_DEF_STMT (operand
);
2864 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
2867 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
2868 if (lhs
== USE_FROM_PTR (arg_p
))
2873 /* Remove def stmt of VAR if VAR has zero uses and recurse
2874 on rhs1 operand if so. */
2877 remove_visited_stmt_chain (tree var
)
2880 gimple_stmt_iterator gsi
;
2884 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
2886 stmt
= SSA_NAME_DEF_STMT (var
);
2887 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
2889 var
= gimple_assign_rhs1 (stmt
);
2890 gsi
= gsi_for_stmt (stmt
);
2891 gsi_remove (&gsi
, true);
2892 release_defs (stmt
);
2899 /* This function checks three consequtive operands in
2900 passed operands vector OPS starting from OPINDEX and
2901 swaps two operands if it is profitable for binary operation
2902 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2904 We pair ops with the same rank if possible.
2906 The alternative we try is to see if STMT is a destructive
2907 update style statement, which is like:
2910 In that case, we want to use the destructive update form to
2911 expose the possible vectorizer sum reduction opportunity.
2912 In that case, the third operand will be the phi node. This
2913 check is not performed if STMT is null.
2915 We could, of course, try to be better as noted above, and do a
2916 lot of work to try to find these opportunities in >3 operand
2917 cases, but it is unlikely to be worth it. */
2920 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
2921 unsigned int opindex
, gimple stmt
)
2923 operand_entry_t oe1
, oe2
, oe3
;
2926 oe2
= ops
[opindex
+ 1];
2927 oe3
= ops
[opindex
+ 2];
2929 if ((oe1
->rank
== oe2
->rank
2930 && oe2
->rank
!= oe3
->rank
)
2931 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
2932 && !is_phi_for_stmt (stmt
, oe1
->op
)
2933 && !is_phi_for_stmt (stmt
, oe2
->op
)))
2935 struct operand_entry temp
= *oe3
;
2937 oe3
->rank
= oe1
->rank
;
2939 oe1
->rank
= temp
.rank
;
2941 else if ((oe1
->rank
== oe3
->rank
2942 && oe2
->rank
!= oe3
->rank
)
2943 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
2944 && !is_phi_for_stmt (stmt
, oe1
->op
)
2945 && !is_phi_for_stmt (stmt
, oe3
->op
)))
2947 struct operand_entry temp
= *oe2
;
2949 oe2
->rank
= oe1
->rank
;
2951 oe1
->rank
= temp
.rank
;
2955 /* Determine if stmt A is not dominated by stmt B. If A and B are in
2956 same basic block, then A's UID has to be less than B. If they are
2957 in different BB's, then A's BB must not be dominated by B's BB. */
2960 not_dominated_by (gimple a
, gimple b
)
2962 basic_block bb_a
, bb_b
;
2963 bb_a
= gimple_bb (a
);
2964 bb_b
= gimple_bb (b
);
2965 return ((bb_a
== bb_b
&& gimple_uid (a
) < gimple_uid (b
))
2967 && !dominated_by_p (CDI_DOMINATORS
, bb_a
, bb_b
)));
2971 /* Among STMT1 and STMT2, return the statement that appears later. Both
2972 statements are in same BB and have the same UID. */
2975 appears_later_in_bb (gimple stmt1
, gimple stmt2
)
2977 unsigned uid
= gimple_uid (stmt1
);
2978 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt1
);
2979 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2981 gimple stmt
= gsi_stmt (gsi
);
2983 /* If STMT has a different UID than STMT1 and we haven't seen
2984 STMT2 during traversal, we know STMT1 appears later. */
2985 if (gimple_uid (stmt
) != uid
)
2987 else if (stmt
== stmt2
)
2993 /* Find the statement after which STMT must be moved so that the
2994 dependency from DEP_STMT to STMT is maintained. */
2997 find_insert_point (gimple stmt
, gimple dep_stmt
)
2999 gimple insert_stmt
= stmt
;
3000 if (dep_stmt
== NULL
)
3002 if (gimple_uid (insert_stmt
) == gimple_uid (dep_stmt
)
3003 && gimple_bb (insert_stmt
) == gimple_bb (dep_stmt
)
3004 && insert_stmt
!= dep_stmt
)
3005 insert_stmt
= appears_later_in_bb (insert_stmt
, dep_stmt
);
3006 else if (not_dominated_by (insert_stmt
, dep_stmt
))
3007 insert_stmt
= dep_stmt
;
3011 /* Insert STMT after INSERT_POINT. */
3014 insert_stmt_after (gimple stmt
, gimple insert_point
)
3016 imm_use_iterator iter
;
3019 gimple_stmt_iterator gsistmt
= gsi_for_stmt (stmt
), gsi_insert
;
3020 basic_block insert_bb
= gimple_bb (insert_point
);
3021 bool insert_bb_different
= (insert_bb
!= gimple_bb (stmt
));
3022 lhs
= gimple_assign_lhs (stmt
);
3023 /* If there are any debug uses of LHS, reset them. */
3024 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3026 if (is_gimple_debug (use_stmt
)
3027 && not_dominated_by (use_stmt
, insert_point
))
3029 gimple_debug_bind_reset_value (use_stmt
);
3030 update_stmt (use_stmt
);
3033 /* If INSERT_STMT is a phi node, then do not insert just after that statement.
3034 Instead, find the first non-label gimple statement in BB and insert before
3036 if (gimple_code (insert_point
) == GIMPLE_PHI
)
3038 gsi_insert
= gsi_after_labels (insert_bb
);
3039 gsi_move_before (&gsistmt
, &gsi_insert
);
3041 /* Statements marked for throw can not be in the middle of a basic block. So
3042 we can not insert a statement (not marked for throw) immediately after. */
3043 else if (stmt_ends_bb_p (insert_point
))
3045 edge succ_edge
= find_fallthru_edge (insert_bb
->succs
);
3046 insert_bb
= succ_edge
->dest
;
3047 insert_bb_different
= (insert_bb
!= gimple_bb (stmt
));
3048 /* Insert STMT at the beginning of the successor basic block. */
3049 gsi_insert
= gsi_after_labels (insert_bb
);
3050 gsi_move_before (&gsistmt
, &gsi_insert
);
3054 gsi_insert
= gsi_for_stmt (insert_point
);
3055 gsi_move_after (&gsistmt
, &gsi_insert
);
3057 /* Set the UID of STMT to that of INSERT_POINT so that subsequent comparisons
3058 of UIDs to determine dominance within a basic block works. */
3059 gimple_set_uid (stmt
, gimple_uid (insert_point
));
3060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3062 fprintf (dump_file
, "Moved stmt ");
3063 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3064 fprintf (dump_file
, " %s to satisfy dependences\n",
3065 insert_bb_different
? "to a different BB" : "within same BB");
3070 /* If OP is a SSA variable and is not the default definition, return the
3071 gimple statement that defines OP. Else return NULL. */
3073 static inline gimple
3074 get_def_stmt (tree op
)
3076 if (TREE_CODE (op
) == SSA_NAME
3077 && !SSA_NAME_IS_DEFAULT_DEF (op
))
3078 return SSA_NAME_DEF_STMT (op
);
3083 /* Ensure that operands in the OPS vector are available for STMT and all
3084 gimple statements on which STMT depends. */
3087 ensure_ops_are_available (gimple stmt
, vec
<operand_entry_t
> ops
, int opindex
)
3089 unsigned int len
= ops
.length ();
3090 gimple insert_stmt
= stmt
;
3091 gimple dep_stmts
[2];
3092 dep_stmts
[0] = get_def_stmt (ops
[opindex
]->op
);
3093 if (len
- opindex
== 2)
3095 dep_stmts
[1] = get_def_stmt (ops
[opindex
+ 1]->op
);
3099 gimple stmt1
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3100 ensure_ops_are_available (stmt1
, ops
, opindex
+ 1);
3101 dep_stmts
[1] = stmt1
;
3103 for (int i
= 0; i
< 2; i
++)
3104 insert_stmt
= find_insert_point (insert_stmt
, dep_stmts
[i
]);
3106 if (insert_stmt
!= stmt
)
3107 insert_stmt_after (stmt
, insert_stmt
);
3110 /* Recursively rewrite our linearized statements so that the operators
3111 match those in OPS[OPINDEX], putting the computation in rank
3115 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3116 vec
<operand_entry_t
> ops
, bool moved
)
3118 tree rhs1
= gimple_assign_rhs1 (stmt
);
3119 tree rhs2
= gimple_assign_rhs2 (stmt
);
3122 /* The final recursion case for this function is that you have
3123 exactly two operations left.
3124 If we had one exactly one op in the entire list to start with, we
3125 would have never called this function, and the tail recursion
3126 rewrites them one at a time. */
3127 if (opindex
+ 2 == ops
.length ())
3129 operand_entry_t oe1
, oe2
;
3132 oe2
= ops
[opindex
+ 1];
3134 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3136 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3138 fprintf (dump_file
, "Transforming ");
3139 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3142 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3143 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3145 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3146 remove_visited_stmt_chain (rhs1
);
3148 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3150 fprintf (dump_file
, " into ");
3151 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3157 /* If we hit here, we should have 3 or more ops left. */
3158 gcc_assert (opindex
+ 2 < ops
.length ());
3160 /* Rewrite the next operator. */
3167 ensure_ops_are_available (stmt
, ops
, opindex
);
3171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3173 fprintf (dump_file
, "Transforming ");
3174 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3177 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3180 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3182 fprintf (dump_file
, " into ");
3183 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3186 /* Recurse on the LHS of the binary operator, which is guaranteed to
3187 be the non-leaf side. */
3188 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
, moved
);
3191 /* Find out how many cycles we need to compute statements chain.
3192 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3193 maximum number of independent statements we may execute per cycle. */
3196 get_required_cycles (int ops_num
, int cpu_width
)
3202 /* While we have more than 2 * cpu_width operands
3203 we may reduce number of operands by cpu_width
3205 res
= ops_num
/ (2 * cpu_width
);
3207 /* Remained operands count may be reduced twice per cycle
3208 until we have only one operand. */
3209 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3210 elog
= exact_log2 (rest
);
3214 res
+= floor_log2 (rest
) + 1;
3219 /* Returns an optimal number of registers to use for computation of
3220 given statements. */
3223 get_reassociation_width (int ops_num
, enum tree_code opc
,
3224 enum machine_mode mode
)
3226 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3231 if (param_width
> 0)
3232 width
= param_width
;
3234 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3239 /* Get the minimal time required for sequence computation. */
3240 cycles_best
= get_required_cycles (ops_num
, width
);
3242 /* Check if we may use less width and still compute sequence for
3243 the same time. It will allow us to reduce registers usage.
3244 get_required_cycles is monotonically increasing with lower width
3245 so we can perform a binary search for the minimal width that still
3246 results in the optimal cycle count. */
3248 while (width
> width_min
)
3250 int width_mid
= (width
+ width_min
) / 2;
3252 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3254 else if (width_min
< width_mid
)
3255 width_min
= width_mid
;
3263 /* Recursively rewrite our linearized statements so that the operators
3264 match those in OPS[OPINDEX], putting the computation in rank
3265 order and trying to allow operations to be executed in
3269 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3270 vec
<operand_entry_t
> ops
)
3272 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3273 int op_num
= ops
.length ();
3274 int stmt_num
= op_num
- 1;
3275 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3276 int op_index
= op_num
- 1;
3278 int ready_stmts_end
= 0;
3280 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3282 /* We start expression rewriting from the top statements.
3283 So, in this loop we create a full list of statements
3284 we will work with. */
3285 stmts
[stmt_num
- 1] = stmt
;
3286 for (i
= stmt_num
- 2; i
>= 0; i
--)
3287 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3289 for (i
= 0; i
< stmt_num
; i
++)
3293 /* Determine whether we should use results of
3294 already handled statements or not. */
3295 if (ready_stmts_end
== 0
3296 && (i
- stmt_index
>= width
|| op_index
< 1))
3297 ready_stmts_end
= i
;
3299 /* Now we choose operands for the next statement. Non zero
3300 value in ready_stmts_end means here that we should use
3301 the result of already generated statements as new operand. */
3302 if (ready_stmts_end
> 0)
3304 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3305 if (ready_stmts_end
> stmt_index
)
3306 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3307 else if (op_index
>= 0)
3308 op2
= ops
[op_index
--]->op
;
3311 gcc_assert (stmt_index
< i
);
3312 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3315 if (stmt_index
>= ready_stmts_end
)
3316 ready_stmts_end
= 0;
3321 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3322 op2
= ops
[op_index
--]->op
;
3323 op1
= ops
[op_index
--]->op
;
3326 /* If we emit the last statement then we should put
3327 operands into the last statement. It will also
3329 if (op_index
< 0 && stmt_index
== i
)
3332 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3334 fprintf (dump_file
, "Transforming ");
3335 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3338 /* We keep original statement only for the last one. All
3339 others are recreated. */
3340 if (i
== stmt_num
- 1)
3342 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3343 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3344 update_stmt (stmts
[i
]);
3347 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3351 fprintf (dump_file
, " into ");
3352 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3356 remove_visited_stmt_chain (last_rhs1
);
3359 /* Transform STMT, which is really (A +B) + (C + D) into the left
3360 linear form, ((A+B)+C)+D.
3361 Recurse on D if necessary. */
3364 linearize_expr (gimple stmt
)
3366 gimple_stmt_iterator gsinow
, gsirhs
;
3367 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3368 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3369 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3370 gimple newbinrhs
= NULL
;
3371 struct loop
*loop
= loop_containing_stmt (stmt
);
3373 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3374 && is_reassociable_op (binrhs
, rhscode
, loop
));
3376 gsinow
= gsi_for_stmt (stmt
);
3377 gsirhs
= gsi_for_stmt (binrhs
);
3378 gsi_move_before (&gsirhs
, &gsinow
);
3379 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3381 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3382 gimple_assign_set_rhs1 (binrhs
, gimple_assign_lhs (binlhs
));
3383 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3385 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3386 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3388 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3390 fprintf (dump_file
, "Linearized: ");
3391 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3394 reassociate_stats
.linearized
++;
3395 update_stmt (binrhs
);
3396 update_stmt (binlhs
);
3399 gimple_set_visited (stmt
, true);
3400 gimple_set_visited (binlhs
, true);
3401 gimple_set_visited (binrhs
, true);
3403 /* Tail recurse on the new rhs if it still needs reassociation. */
3404 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3405 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3406 want to change the algorithm while converting to tuples. */
3407 linearize_expr (stmt
);
3410 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3411 it. Otherwise, return NULL. */
3414 get_single_immediate_use (tree lhs
)
3416 use_operand_p immuse
;
3419 if (TREE_CODE (lhs
) == SSA_NAME
3420 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3421 && is_gimple_assign (immusestmt
))
3427 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3428 representing the negated value. Insertions of any necessary
3429 instructions go before GSI.
3430 This function is recursive in that, if you hand it "a_5" as the
3431 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3432 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3435 negate_value (tree tonegate
, gimple_stmt_iterator
*gsi
)
3437 gimple negatedefstmt
= NULL
;
3438 tree resultofnegate
;
3440 /* If we are trying to negate a name, defined by an add, negate the
3441 add operands instead. */
3442 if (TREE_CODE (tonegate
) == SSA_NAME
)
3443 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3444 if (TREE_CODE (tonegate
) == SSA_NAME
3445 && is_gimple_assign (negatedefstmt
)
3446 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3447 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3448 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3450 gimple_stmt_iterator gsi
;
3451 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3452 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3454 gsi
= gsi_for_stmt (negatedefstmt
);
3455 rhs1
= negate_value (rhs1
, &gsi
);
3456 gimple_assign_set_rhs1 (negatedefstmt
, rhs1
);
3458 gsi
= gsi_for_stmt (negatedefstmt
);
3459 rhs2
= negate_value (rhs2
, &gsi
);
3460 gimple_assign_set_rhs2 (negatedefstmt
, rhs2
);
3462 update_stmt (negatedefstmt
);
3463 return gimple_assign_lhs (negatedefstmt
);
3466 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3467 resultofnegate
= force_gimple_operand_gsi (gsi
, tonegate
, true,
3468 NULL_TREE
, true, GSI_SAME_STMT
);
3469 return resultofnegate
;
3472 /* Return true if we should break up the subtract in STMT into an add
3473 with negate. This is true when we the subtract operands are really
3474 adds, or the subtract itself is used in an add expression. In
3475 either case, breaking up the subtract into an add with negate
3476 exposes the adds to reassociation. */
3479 should_break_up_subtract (gimple stmt
)
3481 tree lhs
= gimple_assign_lhs (stmt
);
3482 tree binlhs
= gimple_assign_rhs1 (stmt
);
3483 tree binrhs
= gimple_assign_rhs2 (stmt
);
3485 struct loop
*loop
= loop_containing_stmt (stmt
);
3487 if (TREE_CODE (binlhs
) == SSA_NAME
3488 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3491 if (TREE_CODE (binrhs
) == SSA_NAME
3492 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3495 if (TREE_CODE (lhs
) == SSA_NAME
3496 && (immusestmt
= get_single_immediate_use (lhs
))
3497 && is_gimple_assign (immusestmt
)
3498 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3499 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3504 /* Transform STMT from A - B into A + -B. */
3507 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3509 tree rhs1
= gimple_assign_rhs1 (stmt
);
3510 tree rhs2
= gimple_assign_rhs2 (stmt
);
3512 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3514 fprintf (dump_file
, "Breaking up subtract ");
3515 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3518 rhs2
= negate_value (rhs2
, gsip
);
3519 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3523 /* Determine whether STMT is a builtin call that raises an SSA name
3524 to an integer power and has only one use. If so, and this is early
3525 reassociation and unsafe math optimizations are permitted, place
3526 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3527 If any of these conditions does not hold, return FALSE. */
3530 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3533 REAL_VALUE_TYPE c
, cint
;
3535 if (!first_pass_instance
3536 || !flag_unsafe_math_optimizations
3537 || !is_gimple_call (stmt
)
3538 || !has_single_use (gimple_call_lhs (stmt
)))
3541 fndecl
= gimple_call_fndecl (stmt
);
3544 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3547 switch (DECL_FUNCTION_CODE (fndecl
))
3549 CASE_FLT_FN (BUILT_IN_POW
):
3550 *base
= gimple_call_arg (stmt
, 0);
3551 arg1
= gimple_call_arg (stmt
, 1);
3553 if (TREE_CODE (arg1
) != REAL_CST
)
3556 c
= TREE_REAL_CST (arg1
);
3558 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3561 *exponent
= real_to_integer (&c
);
3562 real_from_integer (&cint
, VOIDmode
, *exponent
,
3563 *exponent
< 0 ? -1 : 0, 0);
3564 if (!real_identical (&c
, &cint
))
3569 CASE_FLT_FN (BUILT_IN_POWI
):
3570 *base
= gimple_call_arg (stmt
, 0);
3571 arg1
= gimple_call_arg (stmt
, 1);
3573 if (!host_integerp (arg1
, 0))
3576 *exponent
= TREE_INT_CST_LOW (arg1
);
3583 /* Expanding negative exponents is generally unproductive, so we don't
3584 complicate matters with those. Exponents of zero and one should
3585 have been handled by expression folding. */
3586 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3592 /* Recursively linearize a binary expression that is the RHS of STMT.
3593 Place the operands of the expression tree in the vector named OPS. */
3596 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3597 bool is_associative
, bool set_visited
)
3599 tree binlhs
= gimple_assign_rhs1 (stmt
);
3600 tree binrhs
= gimple_assign_rhs2 (stmt
);
3601 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3602 bool binlhsisreassoc
= false;
3603 bool binrhsisreassoc
= false;
3604 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3605 struct loop
*loop
= loop_containing_stmt (stmt
);
3606 tree base
= NULL_TREE
;
3607 HOST_WIDE_INT exponent
= 0;
3610 gimple_set_visited (stmt
, true);
3612 if (TREE_CODE (binlhs
) == SSA_NAME
)
3614 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3615 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3616 && !stmt_could_throw_p (binlhsdef
));
3619 if (TREE_CODE (binrhs
) == SSA_NAME
)
3621 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3622 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3623 && !stmt_could_throw_p (binrhsdef
));
3626 /* If the LHS is not reassociable, but the RHS is, we need to swap
3627 them. If neither is reassociable, there is nothing we can do, so
3628 just put them in the ops vector. If the LHS is reassociable,
3629 linearize it. If both are reassociable, then linearize the RHS
3632 if (!binlhsisreassoc
)
3636 /* If this is not a associative operation like division, give up. */
3637 if (!is_associative
)
3639 add_to_ops_vec (ops
, binrhs
);
3643 if (!binrhsisreassoc
)
3645 if (rhscode
== MULT_EXPR
3646 && TREE_CODE (binrhs
) == SSA_NAME
3647 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3649 add_repeat_to_ops_vec (ops
, base
, exponent
);
3650 gimple_set_visited (binrhsdef
, true);
3653 add_to_ops_vec (ops
, binrhs
);
3655 if (rhscode
== MULT_EXPR
3656 && TREE_CODE (binlhs
) == SSA_NAME
3657 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3659 add_repeat_to_ops_vec (ops
, base
, exponent
);
3660 gimple_set_visited (binlhsdef
, true);
3663 add_to_ops_vec (ops
, binlhs
);
3668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3670 fprintf (dump_file
, "swapping operands of ");
3671 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3674 swap_ssa_operands (stmt
,
3675 gimple_assign_rhs1_ptr (stmt
),
3676 gimple_assign_rhs2_ptr (stmt
));
3679 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3681 fprintf (dump_file
, " is now ");
3682 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3685 /* We want to make it so the lhs is always the reassociative op,
3691 else if (binrhsisreassoc
)
3693 linearize_expr (stmt
);
3694 binlhs
= gimple_assign_rhs1 (stmt
);
3695 binrhs
= gimple_assign_rhs2 (stmt
);
3698 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3699 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3701 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3702 is_associative
, set_visited
);
3704 if (rhscode
== MULT_EXPR
3705 && TREE_CODE (binrhs
) == SSA_NAME
3706 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3708 add_repeat_to_ops_vec (ops
, base
, exponent
);
3709 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3712 add_to_ops_vec (ops
, binrhs
);
3715 /* Repropagate the negates back into subtracts, since no other pass
3716 currently does it. */
3719 repropagate_negates (void)
3724 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3726 gimple user
= get_single_immediate_use (negate
);
3728 if (!user
|| !is_gimple_assign (user
))
3731 /* The negate operand can be either operand of a PLUS_EXPR
3732 (it can be the LHS if the RHS is a constant for example).
3734 Force the negate operand to the RHS of the PLUS_EXPR, then
3735 transform the PLUS_EXPR into a MINUS_EXPR. */
3736 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3738 /* If the negated operand appears on the LHS of the
3739 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3740 to force the negated operand to the RHS of the PLUS_EXPR. */
3741 if (gimple_assign_rhs1 (user
) == negate
)
3743 swap_ssa_operands (user
,
3744 gimple_assign_rhs1_ptr (user
),
3745 gimple_assign_rhs2_ptr (user
));
3748 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3749 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3750 if (gimple_assign_rhs2 (user
) == negate
)
3752 tree rhs1
= gimple_assign_rhs1 (user
);
3753 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3754 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3755 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3759 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3761 if (gimple_assign_rhs1 (user
) == negate
)
3766 which we transform into
3769 This pushes down the negate which we possibly can merge
3770 into some other operation, hence insert it into the
3771 plus_negates vector. */
3772 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3773 tree a
= gimple_assign_rhs1 (feed
);
3774 tree rhs2
= gimple_assign_rhs2 (user
);
3775 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
), gsi2
;
3776 gimple_replace_ssa_lhs (feed
, negate
);
3777 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, a
, rhs2
);
3778 update_stmt (gsi_stmt (gsi
));
3779 gsi2
= gsi_for_stmt (user
);
3780 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, negate
, NULL
);
3781 update_stmt (gsi_stmt (gsi2
));
3782 gsi_move_before (&gsi
, &gsi2
);
3783 plus_negates
.safe_push (gimple_assign_lhs (gsi_stmt (gsi2
)));
3787 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3788 rid of one operation. */
3789 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3790 tree a
= gimple_assign_rhs1 (feed
);
3791 tree rhs1
= gimple_assign_rhs1 (user
);
3792 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3793 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3794 update_stmt (gsi_stmt (gsi
));
3800 /* Returns true if OP is of a type for which we can do reassociation.
3801 That is for integral or non-saturating fixed-point types, and for
3802 floating point type when associative-math is enabled. */
3805 can_reassociate_p (tree op
)
3807 tree type
= TREE_TYPE (op
);
3808 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3809 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3810 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3815 /* Break up subtract operations in block BB.
3817 We do this top down because we don't know whether the subtract is
3818 part of a possible chain of reassociation except at the top.
3827 we want to break up k = t - q, but we won't until we've transformed q
3828 = b - r, which won't be broken up until we transform b = c - d.
3830 En passant, clear the GIMPLE visited flag on every statement. */
3833 break_up_subtract_bb (basic_block bb
)
3835 gimple_stmt_iterator gsi
;
3838 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3840 gimple stmt
= gsi_stmt (gsi
);
3841 gimple_set_visited (stmt
, false);
3843 if (!is_gimple_assign (stmt
)
3844 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3847 /* Look for simple gimple subtract operations. */
3848 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3850 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3851 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3854 /* Check for a subtract used only in an addition. If this
3855 is the case, transform it into add of a negate for better
3856 reassociation. IE transform C = A-B into C = A + -B if C
3857 is only used in an addition. */
3858 if (should_break_up_subtract (stmt
))
3859 break_up_subtract (stmt
, &gsi
);
3861 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3862 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3863 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
3865 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3867 son
= next_dom_son (CDI_DOMINATORS
, son
))
3868 break_up_subtract_bb (son
);
3871 /* Used for repeated factor analysis. */
3872 struct repeat_factor_d
3874 /* An SSA name that occurs in a multiply chain. */
3877 /* Cached rank of the factor. */
3880 /* Number of occurrences of the factor in the chain. */
3881 HOST_WIDE_INT count
;
3883 /* An SSA name representing the product of this factor and
3884 all factors appearing later in the repeated factor vector. */
3888 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
3889 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
3892 static vec
<repeat_factor
> repeat_factor_vec
;
3894 /* Used for sorting the repeat factor vector. Sort primarily by
3895 ascending occurrence count, secondarily by descending rank. */
3898 compare_repeat_factors (const void *x1
, const void *x2
)
3900 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
3901 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
3903 if (rf1
->count
!= rf2
->count
)
3904 return rf1
->count
- rf2
->count
;
3906 return rf2
->rank
- rf1
->rank
;
3909 /* Look for repeated operands in OPS in the multiply tree rooted at
3910 STMT. Replace them with an optimal sequence of multiplies and powi
3911 builtin calls, and remove the used operands from OPS. Return an
3912 SSA name representing the value of the replacement sequence. */
3915 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
3917 unsigned i
, j
, vec_len
;
3920 repeat_factor_t rf1
, rf2
;
3921 repeat_factor rfnew
;
3922 tree result
= NULL_TREE
;
3923 tree target_ssa
, iter_result
;
3924 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
3925 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
3926 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3927 gimple mul_stmt
, pow_stmt
;
3929 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3934 /* Allocate the repeated factor vector. */
3935 repeat_factor_vec
.create (10);
3937 /* Scan the OPS vector for all SSA names in the product and build
3938 up a vector of occurrence counts for each factor. */
3939 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3941 if (TREE_CODE (oe
->op
) == SSA_NAME
)
3943 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
3945 if (rf1
->factor
== oe
->op
)
3947 rf1
->count
+= oe
->count
;
3952 if (j
>= repeat_factor_vec
.length ())
3954 rfnew
.factor
= oe
->op
;
3955 rfnew
.rank
= oe
->rank
;
3956 rfnew
.count
= oe
->count
;
3957 rfnew
.repr
= NULL_TREE
;
3958 repeat_factor_vec
.safe_push (rfnew
);
3963 /* Sort the repeated factor vector by (a) increasing occurrence count,
3964 and (b) decreasing rank. */
3965 repeat_factor_vec
.qsort (compare_repeat_factors
);
3967 /* It is generally best to combine as many base factors as possible
3968 into a product before applying __builtin_powi to the result.
3969 However, the sort order chosen for the repeated factor vector
3970 allows us to cache partial results for the product of the base
3971 factors for subsequent use. When we already have a cached partial
3972 result from a previous iteration, it is best to make use of it
3973 before looking for another __builtin_pow opportunity.
3975 As an example, consider x * x * y * y * y * z * z * z * z.
3976 We want to first compose the product x * y * z, raise it to the
3977 second power, then multiply this by y * z, and finally multiply
3978 by z. This can be done in 5 multiplies provided we cache y * z
3979 for use in both expressions:
3987 If we instead ignored the cached y * z and first multiplied by
3988 the __builtin_pow opportunity z * z, we would get the inferior:
3997 vec_len
= repeat_factor_vec
.length ();
3999 /* Repeatedly look for opportunities to create a builtin_powi call. */
4002 HOST_WIDE_INT power
;
4004 /* First look for the largest cached product of factors from
4005 preceding iterations. If found, create a builtin_powi for
4006 it if the minimum occurrence count for its factors is at
4007 least 2, or just use this cached product as our next
4008 multiplicand if the minimum occurrence count is 1. */
4009 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4011 if (rf1
->repr
&& rf1
->count
> 0)
4021 iter_result
= rf1
->repr
;
4023 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4027 fputs ("Multiplying by cached product ", dump_file
);
4028 for (elt
= j
; elt
< vec_len
; elt
++)
4030 rf
= &repeat_factor_vec
[elt
];
4031 print_generic_expr (dump_file
, rf
->factor
, 0);
4032 if (elt
< vec_len
- 1)
4033 fputs (" * ", dump_file
);
4035 fputs ("\n", dump_file
);
4040 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4041 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4042 build_int_cst (integer_type_node
,
4044 gimple_call_set_lhs (pow_stmt
, iter_result
);
4045 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4046 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4048 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4052 fputs ("Building __builtin_pow call for cached product (",
4054 for (elt
= j
; elt
< vec_len
; elt
++)
4056 rf
= &repeat_factor_vec
[elt
];
4057 print_generic_expr (dump_file
, rf
->factor
, 0);
4058 if (elt
< vec_len
- 1)
4059 fputs (" * ", dump_file
);
4061 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4068 /* Otherwise, find the first factor in the repeated factor
4069 vector whose occurrence count is at least 2. If no such
4070 factor exists, there are no builtin_powi opportunities
4072 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4074 if (rf1
->count
>= 2)
4083 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4087 fputs ("Building __builtin_pow call for (", dump_file
);
4088 for (elt
= j
; elt
< vec_len
; elt
++)
4090 rf
= &repeat_factor_vec
[elt
];
4091 print_generic_expr (dump_file
, rf
->factor
, 0);
4092 if (elt
< vec_len
- 1)
4093 fputs (" * ", dump_file
);
4095 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4098 reassociate_stats
.pows_created
++;
4100 /* Visit each element of the vector in reverse order (so that
4101 high-occurrence elements are visited first, and within the
4102 same occurrence count, lower-ranked elements are visited
4103 first). Form a linear product of all elements in this order
4104 whose occurrencce count is at least that of element J.
4105 Record the SSA name representing the product of each element
4106 with all subsequent elements in the vector. */
4107 if (j
== vec_len
- 1)
4108 rf1
->repr
= rf1
->factor
;
4111 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4115 rf1
= &repeat_factor_vec
[ii
];
4116 rf2
= &repeat_factor_vec
[ii
+ 1];
4118 /* Init the last factor's representative to be itself. */
4120 rf2
->repr
= rf2
->factor
;
4125 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4126 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4129 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4130 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4131 rf1
->repr
= target_ssa
;
4133 /* Don't reprocess the multiply we just introduced. */
4134 gimple_set_visited (mul_stmt
, true);
4138 /* Form a call to __builtin_powi for the maximum product
4139 just formed, raised to the power obtained earlier. */
4140 rf1
= &repeat_factor_vec
[j
];
4141 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4142 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4143 build_int_cst (integer_type_node
,
4145 gimple_call_set_lhs (pow_stmt
, iter_result
);
4146 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4147 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4150 /* If we previously formed at least one other builtin_powi call,
4151 form the product of this one and those others. */
4154 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4155 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4156 result
, iter_result
);
4157 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4158 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4159 gimple_set_visited (mul_stmt
, true);
4160 result
= new_result
;
4163 result
= iter_result
;
4165 /* Decrement the occurrence count of each element in the product
4166 by the count found above, and remove this many copies of each
4168 for (i
= j
; i
< vec_len
; i
++)
4173 rf1
= &repeat_factor_vec
[i
];
4174 rf1
->count
-= power
;
4176 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4178 if (oe
->op
== rf1
->factor
)
4182 ops
->ordered_remove (n
);
4198 /* At this point all elements in the repeated factor vector have a
4199 remaining occurrence count of 0 or 1, and those with a count of 1
4200 don't have cached representatives. Re-sort the ops vector and
4202 ops
->qsort (sort_by_operand_rank
);
4203 repeat_factor_vec
.release ();
4205 /* Return the final product computed herein. Note that there may
4206 still be some elements with single occurrence count left in OPS;
4207 those will be handled by the normal reassociation logic. */
4211 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4214 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4220 fprintf (dump_file
, "Transforming ");
4221 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4224 rhs1
= gimple_assign_rhs1 (stmt
);
4225 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4227 remove_visited_stmt_chain (rhs1
);
4229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4231 fprintf (dump_file
, " into ");
4232 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4236 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4239 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4240 tree rhs1
, tree rhs2
)
4242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4244 fprintf (dump_file
, "Transforming ");
4245 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4248 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4249 update_stmt (gsi_stmt (*gsi
));
4250 remove_visited_stmt_chain (rhs1
);
4252 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4254 fprintf (dump_file
, " into ");
4255 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4259 /* Reassociate expressions in basic block BB and its post-dominator as
4263 reassociate_bb (basic_block bb
)
4265 gimple_stmt_iterator gsi
;
4267 gimple stmt
= last_stmt (bb
);
4269 if (stmt
&& !gimple_visited_p (stmt
))
4270 maybe_optimize_range_tests (stmt
);
4272 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4274 stmt
= gsi_stmt (gsi
);
4276 if (is_gimple_assign (stmt
)
4277 && !stmt_could_throw_p (stmt
))
4279 tree lhs
, rhs1
, rhs2
;
4280 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4282 /* If this is not a gimple binary expression, there is
4283 nothing for us to do with it. */
4284 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4287 /* If this was part of an already processed statement,
4288 we don't need to touch it again. */
4289 if (gimple_visited_p (stmt
))
4291 /* This statement might have become dead because of previous
4293 if (has_zero_uses (gimple_get_lhs (stmt
)))
4295 gsi_remove (&gsi
, true);
4296 release_defs (stmt
);
4297 /* We might end up removing the last stmt above which
4298 places the iterator to the end of the sequence.
4299 Reset it to the last stmt in this case which might
4300 be the end of the sequence as well if we removed
4301 the last statement of the sequence. In which case
4302 we need to bail out. */
4303 if (gsi_end_p (gsi
))
4305 gsi
= gsi_last_bb (bb
);
4306 if (gsi_end_p (gsi
))
4313 lhs
= gimple_assign_lhs (stmt
);
4314 rhs1
= gimple_assign_rhs1 (stmt
);
4315 rhs2
= gimple_assign_rhs2 (stmt
);
4317 /* For non-bit or min/max operations we can't associate
4318 all types. Verify that here. */
4319 if (rhs_code
!= BIT_IOR_EXPR
4320 && rhs_code
!= BIT_AND_EXPR
4321 && rhs_code
!= BIT_XOR_EXPR
4322 && rhs_code
!= MIN_EXPR
4323 && rhs_code
!= MAX_EXPR
4324 && (!can_reassociate_p (lhs
)
4325 || !can_reassociate_p (rhs1
)
4326 || !can_reassociate_p (rhs2
)))
4329 if (associative_tree_code (rhs_code
))
4331 vec
<operand_entry_t
> ops
= vNULL
;
4332 tree powi_result
= NULL_TREE
;
4334 /* There may be no immediate uses left by the time we
4335 get here because we may have eliminated them all. */
4336 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4339 gimple_set_visited (stmt
, true);
4340 linearize_expr_tree (&ops
, stmt
, true, true);
4341 ops
.qsort (sort_by_operand_rank
);
4342 optimize_ops_list (rhs_code
, &ops
);
4343 if (undistribute_ops_list (rhs_code
, &ops
,
4344 loop_containing_stmt (stmt
)))
4346 ops
.qsort (sort_by_operand_rank
);
4347 optimize_ops_list (rhs_code
, &ops
);
4350 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4351 optimize_range_tests (rhs_code
, &ops
);
4353 if (first_pass_instance
4354 && rhs_code
== MULT_EXPR
4355 && flag_unsafe_math_optimizations
)
4356 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4358 /* If the operand vector is now empty, all operands were
4359 consumed by the __builtin_powi optimization. */
4360 if (ops
.length () == 0)
4361 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4362 else if (ops
.length () == 1)
4364 tree last_op
= ops
.last ()->op
;
4367 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4370 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4374 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4375 int ops_num
= ops
.length ();
4376 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4378 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4380 "Width = %d was chosen for reassociation\n", width
);
4383 && ops
.length () > 3)
4384 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4387 /* When there are three operands left, we want
4388 to make sure the ones that get the double
4389 binary op are chosen wisely. */
4390 int len
= ops
.length ();
4392 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4394 rewrite_expr_tree (stmt
, 0, ops
, false);
4397 /* If we combined some repeated factors into a
4398 __builtin_powi call, multiply that result by the
4399 reassociated operands. */
4403 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4404 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4406 gimple_set_lhs (stmt
, target_ssa
);
4408 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4411 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4412 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4420 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4422 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4423 reassociate_bb (son
);
4426 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4427 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4429 /* Dump the operand entry vector OPS to FILE. */
4432 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4437 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4439 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4440 print_generic_expr (file
, oe
->op
, 0);
4444 /* Dump the operand entry vector OPS to STDERR. */
4447 debug_ops_vector (vec
<operand_entry_t
> ops
)
4449 dump_ops_vector (stderr
, ops
);
4455 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
4456 renumber_gimple_stmt_uids ();
4457 reassociate_bb (EXIT_BLOCK_PTR
);
4460 /* Initialize the reassociation pass. */
4467 int *bbs
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
4469 /* Find the loops, so that we can prevent moving calculations in
4471 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4473 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4475 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4476 sizeof (struct operand_entry
), 30);
4477 next_operand_entry_id
= 0;
4479 /* Reverse RPO (Reverse Post Order) will give us something where
4480 deeper loops come later. */
4481 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4482 bb_rank
= XCNEWVEC (long, last_basic_block
);
4483 operand_rank
= pointer_map_create ();
4485 /* Give each default definition a distinct rank. This includes
4486 parameters and the static chain. Walk backwards over all
4487 SSA names so that we get proper rank ordering according
4488 to tree_swap_operands_p. */
4489 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4491 tree name
= ssa_name (i
);
4492 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4493 insert_operand_rank (name
, ++rank
);
4496 /* Set up rank for each BB */
4497 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
4498 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4501 calculate_dominance_info (CDI_POST_DOMINATORS
);
4502 plus_negates
= vNULL
;
4505 /* Cleanup after the reassociation pass, and print stats if
4511 statistics_counter_event (cfun
, "Linearized",
4512 reassociate_stats
.linearized
);
4513 statistics_counter_event (cfun
, "Constants eliminated",
4514 reassociate_stats
.constants_eliminated
);
4515 statistics_counter_event (cfun
, "Ops eliminated",
4516 reassociate_stats
.ops_eliminated
);
4517 statistics_counter_event (cfun
, "Statements rewritten",
4518 reassociate_stats
.rewritten
);
4519 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4520 reassociate_stats
.pows_encountered
);
4521 statistics_counter_event (cfun
, "Built-in powi calls created",
4522 reassociate_stats
.pows_created
);
4524 pointer_map_destroy (operand_rank
);
4525 free_alloc_pool (operand_entry_pool
);
4527 plus_negates
.release ();
4528 free_dominance_info (CDI_POST_DOMINATORS
);
4529 loop_optimizer_finalize ();
4532 /* Gate and execute functions for Reassociation. */
4535 execute_reassoc (void)
4540 repropagate_negates ();
4547 gate_tree_ssa_reassoc (void)
4549 return flag_tree_reassoc
!= 0;
4554 const pass_data pass_data_reassoc
=
4556 GIMPLE_PASS
, /* type */
4557 "reassoc", /* name */
4558 OPTGROUP_NONE
, /* optinfo_flags */
4559 true, /* has_gate */
4560 true, /* has_execute */
4561 TV_TREE_REASSOC
, /* tv_id */
4562 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4563 0, /* properties_provided */
4564 0, /* properties_destroyed */
4565 0, /* todo_flags_start */
4567 | TODO_update_ssa_only_virtuals
4568 | TODO_verify_flow
), /* todo_flags_finish */
4571 class pass_reassoc
: public gimple_opt_pass
4574 pass_reassoc (gcc::context
*ctxt
)
4575 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
4578 /* opt_pass methods: */
4579 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
4580 bool gate () { return gate_tree_ssa_reassoc (); }
4581 unsigned int execute () { return execute_reassoc (); }
4583 }; // class pass_reassoc
4588 make_pass_reassoc (gcc::context
*ctxt
)
4590 return new pass_reassoc (ctxt
);