1 /* Reassociation for trees.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
106 You want to first merge all leaves of the same rank, as much as
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
160 newstmt = mergetmp + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p
;
184 int constants_eliminated
;
187 int pows_encountered
;
191 /* Operator, rank pair. */
198 gimple
*stmt_to_insert
;
201 static object_allocator
<operand_entry
> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static unsigned int next_operand_entry_id
;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
211 static long *bb_rank
;
213 /* Operand->rank hashtable. */
214 static hash_map
<tree
, long> *operand_rank
;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec
<tree
> reassoc_branch_fixups
;
223 static long get_rank (tree
);
224 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
230 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
232 gimple
*stmt
= gsi_stmt (*gsi
);
234 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
235 return gsi_remove (gsi
, true);
237 gimple_stmt_iterator prev
= *gsi
;
239 unsigned uid
= gimple_uid (stmt
);
240 basic_block bb
= gimple_bb (stmt
);
241 bool ret
= gsi_remove (gsi
, true);
242 if (!gsi_end_p (prev
))
245 prev
= gsi_start_bb (bb
);
246 gimple
*end_stmt
= gsi_stmt (*gsi
);
247 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
249 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
250 gimple_set_uid (stmt
, uid
);
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
269 phi_rank (gimple
*stmt
)
271 basic_block bb
= gimple_bb (stmt
);
272 class loop
*father
= bb
->loop_father
;
278 /* We only care about real loops (those with a latch). */
280 return bb_rank
[bb
->index
];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb
!= father
->header
285 return bb_rank
[bb
->index
];
287 /* Ignore virtual SSA_NAMEs. */
288 res
= gimple_phi_result (stmt
);
289 if (virtual_operand_p (res
))
290 return bb_rank
[bb
->index
];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res
, &use
, &use_stmt
)
295 || gimple_bb (use_stmt
)->loop_father
!= father
)
296 return bb_rank
[bb
->index
];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
301 tree arg
= gimple_phi_arg_def (stmt
, i
);
302 if (TREE_CODE (arg
) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
305 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
306 if (gimple_bb (def_stmt
)->loop_father
== father
)
307 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
311 /* Must be an uninteresting phi. */
312 return bb_rank
[bb
->index
];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
319 loop_carried_phi (tree exp
)
324 if (TREE_CODE (exp
) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp
))
328 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
338 if (phi_rank (phi_stmt
) != block_rank
)
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
349 propagate_rank (long rank
, tree op
)
353 if (loop_carried_phi (op
))
356 op_rank
= get_rank (op
);
358 return MAX (rank
, op_rank
);
361 /* Look up the operand rank structure for expression E. */
364 find_operand_rank (tree e
)
366 long *slot
= operand_rank
->get (e
);
367 return slot
? *slot
: -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
373 insert_operand_rank (tree e
, long rank
)
375 gcc_assert (rank
> 0);
376 gcc_assert (!operand_rank
->put (e
, rank
));
379 /* Given an expression E, return the rank of the expression. */
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e
) == SSA_NAME
)
428 if (SSA_NAME_IS_DEFAULT_DEF (e
))
429 return find_operand_rank (e
);
431 stmt
= SSA_NAME_DEF_STMT (e
);
432 if (gimple_code (stmt
) == GIMPLE_PHI
)
433 return phi_rank (stmt
);
435 if (!is_gimple_assign (stmt
))
436 return bb_rank
[gimple_bb (stmt
)->index
];
438 /* If we already have a rank for this expression, use that. */
439 rank
= find_operand_rank (e
);
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
450 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
451 rank
= propagate_rank (rank
, op
);
453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
455 fprintf (dump_file
, "Rank for ");
456 print_generic_expr (dump_file
, e
);
457 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e
, (rank
+ 1));
465 /* Constants, globals, etc., are rank 0 */
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
480 constant_type (tree t
)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
483 return INTEGER_CONST_TYPE
;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
486 /* Sort -1.0 and 1.0 constants last, while in some cases
487 const_binop can't optimize some inexact operations, multiplication
488 by -1.0 or 1.0 can be always merged with others. */
489 if (real_onep (t
) || real_minus_onep (t
))
490 return FLOAT_ONE_CONST_TYPE
;
491 return FLOAT_CONST_TYPE
;
494 return OTHER_CONST_TYPE
;
497 /* qsort comparison function to sort operand entries PA and PB by rank
498 so that the sorted array is ordered by rank in decreasing order. */
500 sort_by_operand_rank (const void *pa
, const void *pb
)
502 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
503 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
505 if (oeb
->rank
!= oea
->rank
)
506 return oeb
->rank
> oea
->rank
? 1 : -1;
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied/whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
513 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
514 return constant_type (oea
->op
) - constant_type (oeb
->op
);
516 /* To make sorting result stable, we use unique IDs to determine
518 return oeb
->id
> oea
->id
? 1 : -1;
521 if (TREE_CODE (oea
->op
) != SSA_NAME
)
523 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
524 return oeb
->id
> oea
->id
? 1 : -1;
528 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
531 /* Lastly, make sure the versions that are the same go next to each
533 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
535 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
540 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
541 basic_block bba
= gimple_bb (stmta
);
542 basic_block bbb
= gimple_bb (stmtb
);
545 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 but the other might not. */
551 /* If neither is, compare bb_rank. */
552 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
553 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
556 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
557 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
561 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
564 return oeb
->id
> oea
->id
? 1 : -1;
567 /* Add an operand entry to *OPS for the tree operand OP. */
570 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
572 operand_entry
*oe
= operand_entry_pool
.allocate ();
575 oe
->rank
= get_rank (op
);
576 oe
->id
= next_operand_entry_id
++;
578 oe
->stmt_to_insert
= stmt_to_insert
;
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
586 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
587 HOST_WIDE_INT repeat
)
589 operand_entry
*oe
= operand_entry_pool
.allocate ();
592 oe
->rank
= get_rank (op
);
593 oe
->id
= next_operand_entry_id
++;
595 oe
->stmt_to_insert
= NULL
;
598 reassociate_stats
.pows_encountered
++;
601 /* Return true if STMT is reassociable operation containing a binary
602 operation with tree code CODE, and is inside LOOP. */
605 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
607 basic_block bb
= gimple_bb (stmt
);
609 if (gimple_bb (stmt
) == NULL
)
612 if (!flow_bb_inside_loop_p (loop
, bb
))
615 if (is_gimple_assign (stmt
)
616 && gimple_assign_rhs_code (stmt
) == code
617 && has_single_use (gimple_assign_lhs (stmt
)))
619 tree rhs1
= gimple_assign_rhs1 (stmt
);
620 tree rhs2
= gimple_assign_rhs2 (stmt
);
621 if (TREE_CODE (rhs1
) == SSA_NAME
622 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
625 && TREE_CODE (rhs2
) == SSA_NAME
626 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
))
635 /* Return true if STMT is a nop-conversion. */
638 gimple_nop_conversion_p (gimple
*stmt
)
640 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
642 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
643 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
644 TREE_TYPE (gimple_assign_rhs1 (ass
))))
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651 operand of the negate operation. Otherwise, return NULL. */
654 get_unary_op (tree name
, enum tree_code opcode
)
656 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
658 /* Look through nop conversions (sign changes). */
659 if (gimple_nop_conversion_p (stmt
)
660 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
661 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
663 if (!is_gimple_assign (stmt
))
666 if (gimple_assign_rhs_code (stmt
) == opcode
)
667 return gimple_assign_rhs1 (stmt
);
671 /* Return true if OP1 and OP2 have the same value if casted to either type. */
674 ops_equal_values_p (tree op1
, tree op2
)
680 if (TREE_CODE (op1
) == SSA_NAME
)
682 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
683 if (gimple_nop_conversion_p (stmt
))
685 op1
= gimple_assign_rhs1 (stmt
);
691 if (TREE_CODE (op2
) == SSA_NAME
)
693 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
694 if (gimple_nop_conversion_p (stmt
))
696 op2
= gimple_assign_rhs1 (stmt
);
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708 eliminate through equivalences, do so, remove them from OPS, and
709 return true. Otherwise, return false. */
712 eliminate_duplicate_pair (enum tree_code opcode
,
713 vec
<operand_entry
*> *ops
,
720 /* If we have two of the same op, and the opcode is & |, min, or max,
721 we can eliminate one of them.
722 If we have two of the same op, and the opcode is ^, we can
723 eliminate both of them. */
725 if (last
&& last
->op
== curr
->op
)
733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
735 fprintf (dump_file
, "Equivalence: ");
736 print_generic_expr (dump_file
, curr
->op
);
737 fprintf (dump_file
, " [&|minmax] ");
738 print_generic_expr (dump_file
, last
->op
);
739 fprintf (dump_file
, " -> ");
740 print_generic_stmt (dump_file
, last
->op
);
743 ops
->ordered_remove (i
);
744 reassociate_stats
.ops_eliminated
++;
749 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
751 fprintf (dump_file
, "Equivalence: ");
752 print_generic_expr (dump_file
, curr
->op
);
753 fprintf (dump_file
, " ^ ");
754 print_generic_expr (dump_file
, last
->op
);
755 fprintf (dump_file
, " -> nothing\n");
758 reassociate_stats
.ops_eliminated
+= 2;
760 if (ops
->length () == 2)
763 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
768 ops
->ordered_remove (i
-1);
769 ops
->ordered_remove (i
-1);
781 static vec
<tree
> plus_negates
;
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784 expression, look in OPS for a corresponding positive operation to cancel
785 it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
790 eliminate_plus_minus_pair (enum tree_code opcode
,
791 vec
<operand_entry
*> *ops
,
792 unsigned int currindex
,
800 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
803 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
804 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
805 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
808 /* Any non-negated version will have a rank that is one less than
809 the current rank. So once we hit those ranks, if we don't find
812 for (i
= currindex
+ 1;
813 ops
->iterate (i
, &oe
)
814 && oe
->rank
>= curr
->rank
- 1 ;
818 && ops_equal_values_p (oe
->op
, negateop
))
820 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
822 fprintf (dump_file
, "Equivalence: ");
823 print_generic_expr (dump_file
, negateop
);
824 fprintf (dump_file
, " + -");
825 print_generic_expr (dump_file
, oe
->op
);
826 fprintf (dump_file
, " -> 0\n");
829 ops
->ordered_remove (i
);
830 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
831 ops
->ordered_remove (currindex
);
832 reassociate_stats
.ops_eliminated
++;
837 && ops_equal_values_p (oe
->op
, notop
))
839 tree op_type
= TREE_TYPE (oe
->op
);
841 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
843 fprintf (dump_file
, "Equivalence: ");
844 print_generic_expr (dump_file
, notop
);
845 fprintf (dump_file
, " + ~");
846 print_generic_expr (dump_file
, oe
->op
);
847 fprintf (dump_file
, " -> -1\n");
850 ops
->ordered_remove (i
);
851 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
852 ops
->ordered_remove (currindex
);
853 reassociate_stats
.ops_eliminated
++;
859 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860 save it for later inspection in repropagate_negates(). */
861 if (negateop
!= NULL_TREE
862 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
863 plus_negates
.safe_push (curr
->op
);
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869 bitwise not expression, look in OPS for a corresponding operand to
870 cancel it out. If we find one, remove the other from OPS, replace
871 OPS[CURRINDEX] with 0, and return true. Otherwise, return
875 eliminate_not_pairs (enum tree_code opcode
,
876 vec
<operand_entry
*> *ops
,
877 unsigned int currindex
,
884 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
885 || TREE_CODE (curr
->op
) != SSA_NAME
)
888 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
889 if (notop
== NULL_TREE
)
892 /* Any non-not version will have a rank that is one less than
893 the current rank. So once we hit those ranks, if we don't find
896 for (i
= currindex
+ 1;
897 ops
->iterate (i
, &oe
)
898 && oe
->rank
>= curr
->rank
- 1;
903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
905 fprintf (dump_file
, "Equivalence: ");
906 print_generic_expr (dump_file
, notop
);
907 if (opcode
== BIT_AND_EXPR
)
908 fprintf (dump_file
, " & ~");
909 else if (opcode
== BIT_IOR_EXPR
)
910 fprintf (dump_file
, " | ~");
911 print_generic_expr (dump_file
, oe
->op
);
912 if (opcode
== BIT_AND_EXPR
)
913 fprintf (dump_file
, " -> 0\n");
914 else if (opcode
== BIT_IOR_EXPR
)
915 fprintf (dump_file
, " -> -1\n");
918 if (opcode
== BIT_AND_EXPR
)
919 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
920 else if (opcode
== BIT_IOR_EXPR
)
921 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
923 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
925 ops
->quick_push (oe
);
933 /* Use constant value that may be present in OPS to try to eliminate
934 operands. Note that this function is only really used when we've
935 eliminated ops for other reasons, or merged constants. Across
936 single statements, fold already does all of this, plus more. There
937 is little point in duplicating logic, so I've only included the
938 identities that I could ever construct testcases to trigger. */
941 eliminate_using_constants (enum tree_code opcode
,
942 vec
<operand_entry
*> *ops
)
944 operand_entry
*oelast
= ops
->last ();
945 tree type
= TREE_TYPE (oelast
->op
);
947 if (oelast
->rank
== 0
948 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
953 if (integer_zerop (oelast
->op
))
955 if (ops
->length () != 1)
957 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
958 fprintf (dump_file
, "Found & 0, removing all other ops\n");
960 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
963 ops
->quick_push (oelast
);
967 else if (integer_all_onesp (oelast
->op
))
969 if (ops
->length () != 1)
971 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
972 fprintf (dump_file
, "Found & -1, removing\n");
974 reassociate_stats
.ops_eliminated
++;
979 if (integer_all_onesp (oelast
->op
))
981 if (ops
->length () != 1)
983 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
984 fprintf (dump_file
, "Found | -1, removing all other ops\n");
986 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
989 ops
->quick_push (oelast
);
993 else if (integer_zerop (oelast
->op
))
995 if (ops
->length () != 1)
997 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
998 fprintf (dump_file
, "Found | 0, removing\n");
1000 reassociate_stats
.ops_eliminated
++;
1005 if (integer_zerop (oelast
->op
)
1006 || (FLOAT_TYPE_P (type
)
1007 && !HONOR_NANS (type
)
1008 && !HONOR_SIGNED_ZEROS (type
)
1009 && real_zerop (oelast
->op
)))
1011 if (ops
->length () != 1)
1013 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1014 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1016 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1018 ops
->quick_push (oelast
);
1022 else if (integer_onep (oelast
->op
)
1023 || (FLOAT_TYPE_P (type
)
1024 && !HONOR_SNANS (type
)
1025 && real_onep (oelast
->op
)))
1027 if (ops
->length () != 1)
1029 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1030 fprintf (dump_file
, "Found * 1, removing\n");
1032 reassociate_stats
.ops_eliminated
++;
1040 if (integer_zerop (oelast
->op
)
1041 || (FLOAT_TYPE_P (type
)
1042 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1043 && fold_real_zero_addition_p (type
, oelast
->op
,
1044 opcode
== MINUS_EXPR
)))
1046 if (ops
->length () != 1)
1048 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1049 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1051 reassociate_stats
.ops_eliminated
++;
1063 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1066 /* Structure for tracking and counting operands. */
1070 enum tree_code oecode
;
1075 /* The heap for the oecount hashtable and the sorted list of operands. */
1076 static vec
<oecount
> cvec
;
1079 /* Oecount hashtable helpers. */
1081 struct oecount_hasher
: int_hash
<int, 0, 1>
1083 static inline hashval_t
hash (int);
1084 static inline bool equal (int, int);
1087 /* Hash function for oecount. */
1090 oecount_hasher::hash (int p
)
1092 const oecount
*c
= &cvec
[p
- 42];
1093 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1096 /* Comparison function for oecount. */
1099 oecount_hasher::equal (int p1
, int p2
)
1101 const oecount
*c1
= &cvec
[p1
- 42];
1102 const oecount
*c2
= &cvec
[p2
- 42];
1103 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1106 /* Comparison function for qsort sorting oecount elements by count. */
1109 oecount_cmp (const void *p1
, const void *p2
)
1111 const oecount
*c1
= (const oecount
*)p1
;
1112 const oecount
*c2
= (const oecount
*)p2
;
1113 if (c1
->cnt
!= c2
->cnt
)
1114 return c1
->cnt
> c2
->cnt
? 1 : -1;
1116 /* If counts are identical, use unique IDs to stabilize qsort. */
1117 return c1
->id
> c2
->id
? 1 : -1;
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121 to some exponent. */
1124 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1126 if (!is_gimple_call (stmt
))
1129 switch (gimple_call_combined_fn (stmt
))
1133 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141 in place and return the result. Assumes that stmt_is_power_of_op
1142 was previously called for STMT and returned TRUE. */
1144 static HOST_WIDE_INT
1145 decrement_power (gimple
*stmt
)
1147 REAL_VALUE_TYPE c
, cint
;
1148 HOST_WIDE_INT power
;
1151 switch (gimple_call_combined_fn (stmt
))
1154 arg1
= gimple_call_arg (stmt
, 1);
1155 c
= TREE_REAL_CST (arg1
);
1156 power
= real_to_integer (&c
) - 1;
1157 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1158 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1162 arg1
= gimple_call_arg (stmt
, 1);
1163 power
= TREE_INT_CST_LOW (arg1
) - 1;
1164 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173 SSA. Also return the new SSA. */
1176 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1180 imm_use_iterator iter
;
1181 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1182 tree lhs
= gimple_get_lhs (stmt
);
1184 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1185 gimple_set_lhs (stmt
, new_lhs
);
1187 /* Also need to update GIMPLE_DEBUGs. */
1188 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1190 tree repl
= new_lhs
;
1191 if (is_gimple_debug (use_stmt
))
1193 if (new_debug_lhs
== NULL_TREE
)
1195 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1197 = gimple_build_debug_bind (new_debug_lhs
,
1198 build2 (opcode
, TREE_TYPE (lhs
),
1201 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1202 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1203 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1204 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1205 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1206 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1208 repl
= new_debug_lhs
;
1210 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1211 SET_USE (use
, repl
);
1212 update_stmt (use_stmt
);
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218 uses with new SSAs. Also do this for the stmt that defines DEF
1219 if *DEF is not OP. */
1222 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1223 vec
<gimple
*> &stmts_to_fix
)
1229 && TREE_CODE (*def
) == SSA_NAME
1230 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1231 && gimple_code (stmt
) != GIMPLE_NOP
)
1232 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1234 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1235 make_new_ssa_for_def (stmt
, opcode
, op
);
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1240 replace *DEF with OP as well. */
1243 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1248 gimple_stmt_iterator gsi
;
1250 if (is_gimple_call (stmt
))
1251 lhs
= gimple_call_lhs (stmt
);
1253 lhs
= gimple_assign_lhs (stmt
);
1255 gcc_assert (has_single_use (lhs
));
1256 single_imm_use (lhs
, &use
, &use_stmt
);
1260 if (TREE_CODE (op
) != SSA_NAME
)
1261 update_stmt (use_stmt
);
1262 gsi
= gsi_for_stmt (stmt
);
1263 unlink_stmt_vdef (stmt
);
1264 reassoc_remove_stmt (&gsi
);
1265 release_defs (stmt
);
1268 /* Walks the linear chain with result *DEF searching for an operation
1269 with operand OP and code OPCODE removing that from the chain. *DEF
1270 is updated if there is only one operand but no operation left. */
1273 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1275 tree orig_def
= *def
;
1276 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1277 /* PR72835 - Record the stmt chain that has to be updated such that
1278 we dont use the same LHS when the values computed are different. */
1279 auto_vec
<gimple
*, 64> stmts_to_fix
;
1285 if (opcode
== MULT_EXPR
)
1287 if (stmt_is_power_of_op (stmt
, op
))
1289 if (decrement_power (stmt
) == 1)
1291 if (stmts_to_fix
.length () > 0)
1292 stmts_to_fix
.pop ();
1293 propagate_op_to_single_use (op
, stmt
, def
);
1297 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1299 if (gimple_assign_rhs1 (stmt
) == op
)
1301 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1302 if (stmts_to_fix
.length () > 0)
1303 stmts_to_fix
.pop ();
1304 propagate_op_to_single_use (cst
, stmt
, def
);
1307 else if (integer_minus_onep (op
)
1308 || real_minus_onep (op
))
1310 gimple_assign_set_rhs_code
1311 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1317 name
= gimple_assign_rhs1 (stmt
);
1319 /* If this is the operation we look for and one of the operands
1320 is ours simply propagate the other operand into the stmts
1322 if (gimple_assign_rhs_code (stmt
) == opcode
1324 || gimple_assign_rhs2 (stmt
) == op
))
1327 name
= gimple_assign_rhs2 (stmt
);
1328 if (stmts_to_fix
.length () > 0)
1329 stmts_to_fix
.pop ();
1330 propagate_op_to_single_use (name
, stmt
, def
);
1334 /* We might have a multiply of two __builtin_pow* calls, and
1335 the operand might be hiding in the rightmost one. Likewise
1336 this can happen for a negate. */
1337 if (opcode
== MULT_EXPR
1338 && gimple_assign_rhs_code (stmt
) == opcode
1339 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1340 && has_single_use (gimple_assign_rhs2 (stmt
)))
1342 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1343 if (stmt_is_power_of_op (stmt2
, op
))
1345 if (decrement_power (stmt2
) == 1)
1346 propagate_op_to_single_use (op
, stmt2
, def
);
1348 stmts_to_fix
.safe_push (stmt2
);
1351 else if (is_gimple_assign (stmt2
)
1352 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1354 if (gimple_assign_rhs1 (stmt2
) == op
)
1356 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1357 propagate_op_to_single_use (cst
, stmt2
, def
);
1360 else if (integer_minus_onep (op
)
1361 || real_minus_onep (op
))
1363 stmts_to_fix
.safe_push (stmt2
);
1364 gimple_assign_set_rhs_code
1365 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1371 /* Continue walking the chain. */
1372 gcc_assert (name
!= op
1373 && TREE_CODE (name
) == SSA_NAME
);
1374 stmt
= SSA_NAME_DEF_STMT (name
);
1375 stmts_to_fix
.safe_push (stmt
);
1379 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1380 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1383 /* Returns true if statement S1 dominates statement S2. Like
1384 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1387 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1389 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1391 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392 SSA_NAME. Assume it lives at the beginning of function and
1393 thus dominates everything. */
1394 if (!bb1
|| s1
== s2
)
1397 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1403 /* PHIs in the same basic block are assumed to be
1404 executed all in parallel, if only one stmt is a PHI,
1405 it dominates the other stmt in the same basic block. */
1406 if (gimple_code (s1
) == GIMPLE_PHI
)
1409 if (gimple_code (s2
) == GIMPLE_PHI
)
1412 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1414 if (gimple_uid (s1
) < gimple_uid (s2
))
1417 if (gimple_uid (s1
) > gimple_uid (s2
))
1420 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1421 unsigned int uid
= gimple_uid (s1
);
1422 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1424 gimple
*s
= gsi_stmt (gsi
);
1425 if (gimple_uid (s
) != uid
)
1434 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1437 /* Insert STMT after INSERT_POINT. */
1440 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1442 gimple_stmt_iterator gsi
;
1445 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1446 bb
= gimple_bb (insert_point
);
1447 else if (!stmt_ends_bb_p (insert_point
))
1449 gsi
= gsi_for_stmt (insert_point
);
1450 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1451 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1455 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456 thus if it must end a basic block, it should be a call that can
1457 throw, or some assignment that can throw. If it throws, the LHS
1458 of it will not be initialized though, so only valid places using
1459 the SSA_NAME should be dominated by the fallthru edge. */
1460 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1461 gsi
= gsi_after_labels (bb
);
1462 if (gsi_end_p (gsi
))
1464 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1465 gimple_set_uid (stmt
,
1466 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1469 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1470 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474 the result. Places the statement after the definition of either
1475 OP1 or OP2. Returns the new statement. */
1478 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1480 gimple
*op1def
= NULL
, *op2def
= NULL
;
1481 gimple_stmt_iterator gsi
;
1485 /* Create the addition statement. */
1486 op
= make_ssa_name (type
);
1487 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1489 /* Find an insertion place and insert. */
1490 if (TREE_CODE (op1
) == SSA_NAME
)
1491 op1def
= SSA_NAME_DEF_STMT (op1
);
1492 if (TREE_CODE (op2
) == SSA_NAME
)
1493 op2def
= SSA_NAME_DEF_STMT (op2
);
1494 if ((!op1def
|| gimple_nop_p (op1def
))
1495 && (!op2def
|| gimple_nop_p (op2def
)))
1497 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1498 if (gsi_end_p (gsi
))
1500 gimple_stmt_iterator gsi2
1501 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1502 gimple_set_uid (sum
,
1503 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1506 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1507 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1511 gimple
*insert_point
;
1512 if ((!op1def
|| gimple_nop_p (op1def
))
1513 || (op2def
&& !gimple_nop_p (op2def
)
1514 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1515 insert_point
= op2def
;
1517 insert_point
= op1def
;
1518 insert_stmt_after (sum
, insert_point
);
1525 /* Perform un-distribution of divisions and multiplications.
1526 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1527 to (A + B) / X for real X.
1529 The algorithm is organized as follows.
1531 - First we walk the addition chain *OPS looking for summands that
1532 are defined by a multiplication or a real division. This results
1533 in the candidates bitmap with relevant indices into *OPS.
1535 - Second we build the chains of multiplications or divisions for
1536 these candidates, counting the number of occurrences of (operand, code)
1537 pairs in all of the candidates chains.
1539 - Third we sort the (operand, code) pairs by number of occurrence and
1540 process them starting with the pair with the most uses.
1542 * For each such pair we walk the candidates again to build a
1543 second candidate bitmap noting all multiplication/division chains
1544 that have at least one occurrence of (operand, code).
1546 * We build an alternate addition chain only covering these
1547 candidates with one (operand, code) operation removed from their
1548 multiplication/division chain.
1550 * The first candidate gets replaced by the alternate addition chain
1551 multiplied/divided by the operand.
1553 * All candidate chains get disabled for further processing and
1554 processing of (operand, code) pairs continues.
1556 The alternate addition chains built are re-processed by the main
1557 reassociation algorithm which allows optimizing a * x * y + b * y * x
1558 to (a + b ) * x * y in one invocation of the reassociation pass. */
1561 undistribute_ops_list (enum tree_code opcode
,
1562 vec
<operand_entry
*> *ops
, class loop
*loop
)
1564 unsigned int length
= ops
->length ();
1567 unsigned nr_candidates
, nr_candidates2
;
1568 sbitmap_iterator sbi0
;
1569 vec
<operand_entry
*> *subops
;
1570 bool changed
= false;
1571 unsigned int next_oecount_id
= 0;
1574 || opcode
!= PLUS_EXPR
)
1577 /* Build a list of candidates to process. */
1578 auto_sbitmap
candidates (length
);
1579 bitmap_clear (candidates
);
1581 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1583 enum tree_code dcode
;
1586 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1588 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1589 if (!is_gimple_assign (oe1def
))
1591 dcode
= gimple_assign_rhs_code (oe1def
);
1592 if ((dcode
!= MULT_EXPR
1593 && dcode
!= RDIV_EXPR
)
1594 || !is_reassociable_op (oe1def
, dcode
, loop
))
1597 bitmap_set_bit (candidates
, i
);
1601 if (nr_candidates
< 2)
1604 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1606 fprintf (dump_file
, "searching for un-distribute opportunities ");
1607 print_generic_expr (dump_file
,
1608 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1609 fprintf (dump_file
, " %d\n", nr_candidates
);
1612 /* Build linearized sub-operand lists and the counting table. */
1615 hash_table
<oecount_hasher
> ctable (15);
1617 /* ??? Macro arguments cannot have multi-argument template types in
1618 them. This typedef is needed to workaround that limitation. */
1619 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1620 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1621 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1624 enum tree_code oecode
;
1627 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1628 oecode
= gimple_assign_rhs_code (oedef
);
1629 linearize_expr_tree (&subops
[i
], oedef
,
1630 associative_tree_code (oecode
), false);
1632 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1639 c
.id
= next_oecount_id
++;
1642 idx
= cvec
.length () + 41;
1643 slot
= ctable
.find_slot (idx
, INSERT
);
1651 cvec
[*slot
- 42].cnt
++;
1656 /* Sort the counting table. */
1657 cvec
.qsort (oecount_cmp
);
1659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1662 fprintf (dump_file
, "Candidates:\n");
1663 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1665 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1666 c
->oecode
== MULT_EXPR
1667 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1668 print_generic_expr (dump_file
, c
->op
);
1669 fprintf (dump_file
, "\n");
1673 /* Process the (operand, code) pairs in order of most occurrence. */
1674 auto_sbitmap
candidates2 (length
);
1675 while (!cvec
.is_empty ())
1677 oecount
*c
= &cvec
.last ();
1681 /* Now collect the operands in the outer chain that contain
1682 the common operand in their inner chain. */
1683 bitmap_clear (candidates2
);
1685 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1688 enum tree_code oecode
;
1690 tree op
= (*ops
)[i
]->op
;
1692 /* If we undistributed in this chain already this may be
1694 if (TREE_CODE (op
) != SSA_NAME
)
1697 oedef
= SSA_NAME_DEF_STMT (op
);
1698 oecode
= gimple_assign_rhs_code (oedef
);
1699 if (oecode
!= c
->oecode
)
1702 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1704 if (oe1
->op
== c
->op
)
1706 bitmap_set_bit (candidates2
, i
);
1713 if (nr_candidates2
>= 2)
1715 operand_entry
*oe1
, *oe2
;
1717 int first
= bitmap_first_set_bit (candidates2
);
1719 /* Build the new addition chain. */
1720 oe1
= (*ops
)[first
];
1721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1723 fprintf (dump_file
, "Building (");
1724 print_generic_expr (dump_file
, oe1
->op
);
1726 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1727 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1733 fprintf (dump_file
, " + ");
1734 print_generic_expr (dump_file
, oe2
->op
);
1736 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1737 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1738 oe1
->op
, oe2
->op
, opcode
);
1739 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1741 oe1
->op
= gimple_get_lhs (sum
);
1744 /* Apply the multiplication/division. */
1745 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1746 oe1
->op
, c
->op
, c
->oecode
);
1747 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1749 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1750 print_generic_expr (dump_file
, c
->op
);
1751 fprintf (dump_file
, "\n");
1754 /* Record it in the addition chain and disable further
1755 undistribution with this op. */
1756 oe1
->op
= gimple_assign_lhs (prod
);
1757 oe1
->rank
= get_rank (oe1
->op
);
1758 subops
[first
].release ();
1766 for (i
= 0; i
< ops
->length (); ++i
)
1767 subops
[i
].release ();
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1775 first: element index for each relevant BIT_FIELD_REF.
1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1777 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1780 auto_vec
<v_info_elem
, 32> vec
;
1782 typedef v_info
*v_info_ptr
;
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1786 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1788 const tree tr1
= *((const tree
*) p_i
);
1789 const tree tr2
= *((const tree
*) p_j
);
1790 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1791 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1794 else if (mode1
< mode2
)
1796 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1798 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1803 /* Cleanup hash map for VECTOR information. */
1805 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1807 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1808 it
!= info_map
.end (); ++it
)
1810 v_info_ptr info
= (*it
).second
;
1812 (*it
).second
= NULL
;
1816 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1817 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1819 Vs = (V1 + V2 + ... + Vn)
1820 Vs[0] + Vs[1] + ... + Vs[k]
1822 The basic steps are listed below:
1824 1) Check the addition chain *OPS by looking those summands coming from
1825 VECTOR bit_field_ref on VECTOR type. Put the information into
1826 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1828 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1829 continuous, they can cover the whole VECTOR perfectly without any holes.
1830 Obtain one VECTOR list which contain candidates to be transformed.
1832 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1833 candidates with same mode, build the addition statements for them and
1834 generate BIT_FIELD_REFs accordingly.
1837 The current implementation requires the whole VECTORs should be fully
1838 covered, but it can be extended to support partial, checking adjacent
1839 but not fill the whole, it may need some cost model to define the
1840 boundary to do or not.
1843 undistribute_bitref_for_vector (enum tree_code opcode
,
1844 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1846 if (ops
->length () <= 1)
1849 if (opcode
!= PLUS_EXPR
1850 && opcode
!= MULT_EXPR
1851 && opcode
!= BIT_XOR_EXPR
1852 && opcode
!= BIT_IOR_EXPR
1853 && opcode
!= BIT_AND_EXPR
)
1856 hash_map
<tree
, v_info_ptr
> v_info_map
;
1860 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1861 information into map. */
1862 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1864 enum tree_code dcode
;
1867 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1869 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1870 if (!is_gimple_assign (oe1def
))
1872 dcode
= gimple_assign_rhs_code (oe1def
);
1873 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1876 tree rhs
= gimple_assign_rhs1 (oe1def
);
1877 tree vec
= TREE_OPERAND (rhs
, 0);
1878 tree vec_type
= TREE_TYPE (vec
);
1880 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1883 /* Ignore it if target machine can't support this VECTOR type. */
1884 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1887 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1888 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1891 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1892 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1895 /* The type of BIT_FIELD_REF might not be equal to the element type of
1896 the vector. We want to use a vector type with element type the
1897 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1898 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1900 machine_mode simd_mode
;
1901 unsigned HOST_WIDE_INT size
, nunits
;
1902 unsigned HOST_WIDE_INT elem_size
1903 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1904 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1906 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1908 nunits
= size
/ elem_size
;
1909 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1910 nunits
).exists (&simd_mode
))
1912 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1914 /* Ignore it if target machine can't support this VECTOR type. */
1915 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1918 /* Check const vector type, constrain BIT_FIELD_REF offset and
1920 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1923 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
1924 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
1928 tree elem_type
= TREE_TYPE (vec_type
);
1929 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
1930 if (maybe_ne (bit_field_size (rhs
), elem_size
))
1934 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
1937 /* Ignore it if target machine can't support this type of VECTOR
1939 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
1940 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
1944 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
1948 info
->vec_type
= vec_type
;
1950 else if (!types_compatible_p (vec_type
, info
->vec_type
))
1952 info
->vec
.safe_push (std::make_pair (idx
, i
));
1955 /* At least two VECTOR to combine. */
1956 if (v_info_map
.elements () <= 1)
1958 cleanup_vinfo_map (v_info_map
);
1962 /* Verify all VECTOR candidates by checking two conditions:
1963 1) sorted offsets are adjacent, no holes.
1964 2) can fill the whole VECTOR perfectly.
1965 And add the valid candidates to a vector for further handling. */
1966 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
1967 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
1968 it
!= v_info_map
.end (); ++it
)
1970 tree cand_vec
= (*it
).first
;
1971 v_info_ptr cand_info
= (*it
).second
;
1972 unsigned int num_elems
1973 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
1974 if (cand_info
->vec
.length () != num_elems
)
1976 sbitmap holes
= sbitmap_alloc (num_elems
);
1977 bitmap_ones (holes
);
1980 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
1982 if (!bitmap_bit_p (holes
, curr
->first
))
1988 bitmap_clear_bit (holes
, curr
->first
);
1990 if (valid
&& bitmap_empty_p (holes
))
1991 valid_vecs
.quick_push (cand_vec
);
1992 sbitmap_free (holes
);
1995 /* At least two VECTOR to combine. */
1996 if (valid_vecs
.length () <= 1)
1998 cleanup_vinfo_map (v_info_map
);
2002 valid_vecs
.qsort (sort_by_mach_mode
);
2003 /* Go through all candidates by machine mode order, query the mode_to_total
2004 to get the total number for each mode and skip the single one. */
2005 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2007 tree tvec
= valid_vecs
[i
];
2008 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2010 /* Skip modes with only a single candidate. */
2011 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2014 unsigned int idx
, j
;
2016 tree sum_vec
= tvec
;
2017 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2019 tree vec_type
= info_ptr
->vec_type
;
2021 /* Build the sum for all candidates with same mode. */
2024 sum
= build_and_add_sum (vec_type
, sum_vec
,
2025 valid_vecs
[i
+ 1], opcode
);
2026 if (!useless_type_conversion_p (vec_type
,
2027 TREE_TYPE (valid_vecs
[i
+ 1])))
2029 /* Update the operands only after build_and_add_sum,
2030 so that we don't have to repeat the placement algorithm
2031 of build_and_add_sum. */
2032 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2033 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2035 tree lhs
= make_ssa_name (vec_type
);
2036 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2037 gimple_set_uid (g
, gimple_uid (sum
));
2038 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2039 gimple_assign_set_rhs2 (sum
, lhs
);
2040 if (sum_vec
== tvec
)
2042 vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2043 lhs
= make_ssa_name (vec_type
);
2044 g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2045 gimple_set_uid (g
, gimple_uid (sum
));
2046 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2047 gimple_assign_set_rhs1 (sum
, lhs
);
2051 sum_vec
= gimple_get_lhs (sum
);
2052 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2053 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2054 /* Update those related ops of current candidate VECTOR. */
2055 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2058 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2059 /* Set this then op definition will get DCEd later. */
2060 gimple_set_visited (def
, true);
2061 if (opcode
== PLUS_EXPR
2062 || opcode
== BIT_XOR_EXPR
2063 || opcode
== BIT_IOR_EXPR
)
2064 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2065 else if (opcode
== MULT_EXPR
)
2066 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2069 gcc_assert (opcode
== BIT_AND_EXPR
);
2071 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2073 (*ops
)[idx
]->rank
= 0;
2075 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2077 fprintf (dump_file
, "Generating addition -> ");
2078 print_gimple_stmt (dump_file
, sum
, 0);
2082 while ((i
< valid_vecs
.length () - 1)
2083 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2085 /* Referring to first valid VECTOR with this mode, generate the
2086 BIT_FIELD_REF statements accordingly. */
2087 info_ptr
= *(v_info_map
.get (tvec
));
2089 tree elem_type
= TREE_TYPE (vec_type
);
2090 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2093 tree dst
= make_ssa_name (elem_type
);
2094 tree pos
= bitsize_int (elem
->first
2095 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2096 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2097 TYPE_SIZE (elem_type
), pos
);
2098 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2099 insert_stmt_after (gs
, sum
);
2100 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2101 /* Set this then op definition will get DCEd later. */
2102 gimple_set_visited (def
, true);
2103 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2104 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2105 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2107 fprintf (dump_file
, "Generating bit_field_ref -> ");
2108 print_gimple_stmt (dump_file
, gs
, 0);
2113 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2114 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2116 cleanup_vinfo_map (v_info_map
);
2121 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2122 expression, examine the other OPS to see if any of them are comparisons
2123 of the same values, which we may be able to combine or eliminate.
2124 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2127 eliminate_redundant_comparison (enum tree_code opcode
,
2128 vec
<operand_entry
*> *ops
,
2129 unsigned int currindex
,
2130 operand_entry
*curr
)
2133 enum tree_code lcode
, rcode
;
2134 gimple
*def1
, *def2
;
2138 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2141 /* Check that CURR is a comparison. */
2142 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2144 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2145 if (!is_gimple_assign (def1
))
2147 lcode
= gimple_assign_rhs_code (def1
);
2148 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2150 op1
= gimple_assign_rhs1 (def1
);
2151 op2
= gimple_assign_rhs2 (def1
);
2153 /* Now look for a similar comparison in the remaining OPS. */
2154 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2158 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2160 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2161 if (!is_gimple_assign (def2
))
2163 rcode
= gimple_assign_rhs_code (def2
);
2164 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2167 /* If we got here, we have a match. See if we can combine the
2169 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2170 if (opcode
== BIT_IOR_EXPR
)
2171 t
= maybe_fold_or_comparisons (type
,
2173 rcode
, gimple_assign_rhs1 (def2
),
2174 gimple_assign_rhs2 (def2
));
2176 t
= maybe_fold_and_comparisons (type
,
2178 rcode
, gimple_assign_rhs1 (def2
),
2179 gimple_assign_rhs2 (def2
));
2183 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2184 always give us a boolean_type_node value back. If the original
2185 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2186 we need to convert. */
2187 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2188 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2190 if (TREE_CODE (t
) != INTEGER_CST
2191 && !operand_equal_p (t
, curr
->op
, 0))
2193 enum tree_code subcode
;
2194 tree newop1
, newop2
;
2195 if (!COMPARISON_CLASS_P (t
))
2197 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2198 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2199 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2200 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2206 fprintf (dump_file
, "Equivalence: ");
2207 print_generic_expr (dump_file
, curr
->op
);
2208 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2209 print_generic_expr (dump_file
, oe
->op
);
2210 fprintf (dump_file
, " -> ");
2211 print_generic_expr (dump_file
, t
);
2212 fprintf (dump_file
, "\n");
2215 /* Now we can delete oe, as it has been subsumed by the new combined
2217 ops
->ordered_remove (i
);
2218 reassociate_stats
.ops_eliminated
++;
2220 /* If t is the same as curr->op, we're done. Otherwise we must
2221 replace curr->op with t. Special case is if we got a constant
2222 back, in which case we add it to the end instead of in place of
2223 the current entry. */
2224 if (TREE_CODE (t
) == INTEGER_CST
)
2226 ops
->ordered_remove (currindex
);
2227 add_to_ops_vec (ops
, t
);
2229 else if (!operand_equal_p (t
, curr
->op
, 0))
2232 enum tree_code subcode
;
2235 gcc_assert (COMPARISON_CLASS_P (t
));
2236 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2237 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2238 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2239 gcc_checking_assert (is_gimple_val (newop1
)
2240 && is_gimple_val (newop2
));
2241 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2242 curr
->op
= gimple_get_lhs (sum
);
2251 /* Transform repeated addition of same values into multiply with
2254 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2257 tree op
= NULL_TREE
;
2259 int i
, start
= -1, end
= 0, count
= 0;
2260 auto_vec
<std::pair
<int, int> > indxs
;
2261 bool changed
= false;
2263 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2264 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2265 || !flag_unsafe_math_optimizations
))
2268 /* Look for repeated operands. */
2269 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2277 else if (operand_equal_p (oe
->op
, op
, 0))
2285 indxs
.safe_push (std::make_pair (start
, end
));
2293 indxs
.safe_push (std::make_pair (start
, end
));
2295 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2297 /* Convert repeated operand addition to multiplication. */
2298 start
= indxs
[j
].first
;
2299 end
= indxs
[j
].second
;
2300 op
= (*ops
)[start
]->op
;
2301 count
= end
- start
+ 1;
2302 for (i
= end
; i
>= start
; --i
)
2303 ops
->unordered_remove (i
);
2304 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2305 tree cst
= build_int_cst (integer_type_node
, count
);
2307 = gimple_build_assign (tmp
, MULT_EXPR
,
2308 op
, fold_convert (TREE_TYPE (op
), cst
));
2309 gimple_set_visited (mul_stmt
, true);
2310 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2318 /* Perform various identities and other optimizations on the list of
2319 operand entries, stored in OPS. The tree code for the binary
2320 operation between all the operands is OPCODE. */
2323 optimize_ops_list (enum tree_code opcode
,
2324 vec
<operand_entry
*> *ops
)
2326 unsigned int length
= ops
->length ();
2329 operand_entry
*oelast
= NULL
;
2330 bool iterate
= false;
2335 oelast
= ops
->last ();
2337 /* If the last two are constants, pop the constants off, merge them
2338 and try the next two. */
2339 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2341 operand_entry
*oelm1
= (*ops
)[length
- 2];
2343 if (oelm1
->rank
== 0
2344 && is_gimple_min_invariant (oelm1
->op
)
2345 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2346 TREE_TYPE (oelast
->op
)))
2348 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2349 oelm1
->op
, oelast
->op
);
2351 if (folded
&& is_gimple_min_invariant (folded
))
2353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2354 fprintf (dump_file
, "Merging constants\n");
2359 add_to_ops_vec (ops
, folded
);
2360 reassociate_stats
.constants_eliminated
++;
2362 optimize_ops_list (opcode
, ops
);
2368 eliminate_using_constants (opcode
, ops
);
2371 for (i
= 0; ops
->iterate (i
, &oe
);)
2375 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2377 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2378 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2379 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2392 optimize_ops_list (opcode
, ops
);
2395 /* The following functions are subroutines to optimize_range_tests and allow
2396 it to try to change a logical combination of comparisons into a range
2400 X == 2 || X == 5 || X == 3 || X == 4
2404 (unsigned) (X - 2) <= 3
2406 For more information see comments above fold_test_range in fold-const.c,
2407 this implementation is for GIMPLE. */
2415 bool strict_overflow_p
;
2416 unsigned int idx
, next
;
2419 /* This is similar to make_range in fold-const.c, but on top of
2420 GIMPLE instead of trees. If EXP is non-NULL, it should be
2421 an SSA_NAME and STMT argument is ignored, otherwise STMT
2422 argument should be a GIMPLE_COND. */
2425 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2429 bool is_bool
, strict_overflow_p
;
2433 r
->strict_overflow_p
= false;
2435 r
->high
= NULL_TREE
;
2436 if (exp
!= NULL_TREE
2437 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2440 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2441 and see if we can refine the range. Some of the cases below may not
2442 happen, but it doesn't seem worth worrying about this. We "continue"
2443 the outer loop when we've changed something; otherwise we "break"
2444 the switch, which will "break" the while. */
2445 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2448 strict_overflow_p
= false;
2450 if (exp
== NULL_TREE
)
2452 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2454 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2459 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2464 enum tree_code code
;
2465 tree arg0
, arg1
, exp_type
;
2469 if (exp
!= NULL_TREE
)
2471 if (TREE_CODE (exp
) != SSA_NAME
2472 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2475 stmt
= SSA_NAME_DEF_STMT (exp
);
2476 if (!is_gimple_assign (stmt
))
2479 code
= gimple_assign_rhs_code (stmt
);
2480 arg0
= gimple_assign_rhs1 (stmt
);
2481 arg1
= gimple_assign_rhs2 (stmt
);
2482 exp_type
= TREE_TYPE (exp
);
2486 code
= gimple_cond_code (stmt
);
2487 arg0
= gimple_cond_lhs (stmt
);
2488 arg1
= gimple_cond_rhs (stmt
);
2489 exp_type
= boolean_type_node
;
2492 if (TREE_CODE (arg0
) != SSA_NAME
2493 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2495 loc
= gimple_location (stmt
);
2499 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2500 /* Ensure the range is either +[-,0], +[0,0],
2501 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2502 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2503 or similar expression of unconditional true or
2504 false, it should not be negated. */
2505 && ((high
&& integer_zerop (high
))
2506 || (low
&& integer_onep (low
))))
2519 if ((TYPE_PRECISION (exp_type
) == 1
2520 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2521 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2524 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2526 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2531 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2546 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2548 &strict_overflow_p
);
2549 if (nexp
!= NULL_TREE
)
2552 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2565 r
->strict_overflow_p
= strict_overflow_p
;
2569 /* Comparison function for qsort. Sort entries
2570 without SSA_NAME exp first, then with SSA_NAMEs sorted
2571 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2572 by increasing ->low and if ->low is the same, by increasing
2573 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2577 range_entry_cmp (const void *a
, const void *b
)
2579 const struct range_entry
*p
= (const struct range_entry
*) a
;
2580 const struct range_entry
*q
= (const struct range_entry
*) b
;
2582 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2584 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2586 /* Group range_entries for the same SSA_NAME together. */
2587 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2589 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2591 /* If ->low is different, NULL low goes first, then by
2593 if (p
->low
!= NULL_TREE
)
2595 if (q
->low
!= NULL_TREE
)
2597 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2599 if (tem
&& integer_onep (tem
))
2601 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2603 if (tem
&& integer_onep (tem
))
2609 else if (q
->low
!= NULL_TREE
)
2611 /* If ->high is different, NULL high goes last, before that by
2613 if (p
->high
!= NULL_TREE
)
2615 if (q
->high
!= NULL_TREE
)
2617 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2619 if (tem
&& integer_onep (tem
))
2621 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2623 if (tem
&& integer_onep (tem
))
2629 else if (q
->high
!= NULL_TREE
)
2631 /* If both ranges are the same, sort below by ascending idx. */
2636 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2639 if (p
->idx
< q
->idx
)
2643 gcc_checking_assert (p
->idx
> q
->idx
);
2648 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2649 insert needed statements BEFORE or after GSI. */
2652 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2654 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2655 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2656 if (TREE_CODE (ret
) != SSA_NAME
)
2658 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2660 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2662 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2663 ret
= gimple_assign_lhs (g
);
2668 /* Helper routine of optimize_range_test.
2669 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2670 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2671 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2672 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2673 an array of COUNT pointers to other ranges. Return
2674 true if the range merge has been successful.
2675 If OPCODE is ERROR_MARK, this is called from within
2676 maybe_optimize_range_tests and is performing inter-bb range optimization.
2677 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2681 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2682 struct range_entry
**otherrangep
,
2683 unsigned int count
, enum tree_code opcode
,
2684 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2685 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2687 operand_entry
*oe
= (*ops
)[range
->idx
];
2689 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2690 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2691 location_t loc
= gimple_location (stmt
);
2692 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2693 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2695 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2696 gimple_stmt_iterator gsi
;
2697 unsigned int i
, uid
;
2699 if (tem
== NULL_TREE
)
2702 /* If op is default def SSA_NAME, there is no place to insert the
2703 new comparison. Give up, unless we can use OP itself as the
2705 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2707 if (op
== range
->exp
2708 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2709 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2711 || (TREE_CODE (tem
) == EQ_EXPR
2712 && TREE_OPERAND (tem
, 0) == op
2713 && integer_onep (TREE_OPERAND (tem
, 1))))
2714 && opcode
!= BIT_IOR_EXPR
2715 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2724 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2725 warning_at (loc
, OPT_Wstrict_overflow
,
2726 "assuming signed overflow does not occur "
2727 "when simplifying range test");
2729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2731 struct range_entry
*r
;
2732 fprintf (dump_file
, "Optimizing range tests ");
2733 print_generic_expr (dump_file
, range
->exp
);
2734 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2735 print_generic_expr (dump_file
, range
->low
);
2736 fprintf (dump_file
, ", ");
2737 print_generic_expr (dump_file
, range
->high
);
2738 fprintf (dump_file
, "]");
2739 for (i
= 0; i
< count
; i
++)
2746 && r
->exp
!= range
->exp
2747 && TREE_CODE (r
->exp
) == SSA_NAME
)
2749 fprintf (dump_file
, " and ");
2750 print_generic_expr (dump_file
, r
->exp
);
2753 fprintf (dump_file
, " and");
2754 fprintf (dump_file
, " %c[", r
->in_p
? '+' : '-');
2755 print_generic_expr (dump_file
, r
->low
);
2756 fprintf (dump_file
, ", ");
2757 print_generic_expr (dump_file
, r
->high
);
2758 fprintf (dump_file
, "]");
2760 fprintf (dump_file
, "\n into ");
2761 print_generic_expr (dump_file
, tem
);
2762 fprintf (dump_file
, "\n");
2765 if (opcode
== BIT_IOR_EXPR
2766 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2767 tem
= invert_truthvalue_loc (loc
, tem
);
2769 tem
= fold_convert_loc (loc
, optype
, tem
);
2772 gsi
= gsi_for_stmt (stmt
);
2773 uid
= gimple_uid (stmt
);
2781 gcc_checking_assert (tem
== op
);
2782 /* In rare cases range->exp can be equal to lhs of stmt.
2783 In that case we have to insert after the stmt rather then before
2784 it. If stmt is a PHI, insert it at the start of the basic block. */
2785 else if (op
!= range
->exp
)
2787 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2788 tem
= force_into_ssa_name (&gsi
, tem
, true);
2791 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2793 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2794 tem
= force_into_ssa_name (&gsi
, tem
, false);
2798 gsi
= gsi_after_labels (gimple_bb (stmt
));
2799 if (!gsi_end_p (gsi
))
2800 uid
= gimple_uid (gsi_stmt (gsi
));
2803 gsi
= gsi_start_bb (gimple_bb (stmt
));
2805 while (!gsi_end_p (gsi
))
2807 uid
= gimple_uid (gsi_stmt (gsi
));
2811 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2812 tem
= force_into_ssa_name (&gsi
, tem
, true);
2813 if (gsi_end_p (gsi
))
2814 gsi
= gsi_last_bb (gimple_bb (stmt
));
2818 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2819 if (gimple_uid (gsi_stmt (gsi
)))
2822 gimple_set_uid (gsi_stmt (gsi
), uid
);
2829 range
->strict_overflow_p
= false;
2831 for (i
= 0; i
< count
; i
++)
2834 range
= otherrange
+ i
;
2836 range
= otherrangep
[i
];
2837 oe
= (*ops
)[range
->idx
];
2838 /* Now change all the other range test immediate uses, so that
2839 those tests will be optimized away. */
2840 if (opcode
== ERROR_MARK
)
2843 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2844 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2846 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2847 ? boolean_false_node
: boolean_true_node
);
2850 oe
->op
= error_mark_node
;
2851 range
->exp
= NULL_TREE
;
2852 range
->low
= NULL_TREE
;
2853 range
->high
= NULL_TREE
;
2858 /* Optimize X == CST1 || X == CST2
2859 if popcount (CST1 ^ CST2) == 1 into
2860 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2861 Similarly for ranges. E.g.
2862 X != 2 && X != 3 && X != 10 && X != 11
2863 will be transformed by the previous optimization into
2864 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2865 and this loop can transform that into
2866 !(((X & ~8) - 2U) <= 1U). */
2869 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2870 tree lowi
, tree lowj
, tree highi
, tree highj
,
2871 vec
<operand_entry
*> *ops
,
2872 struct range_entry
*rangei
,
2873 struct range_entry
*rangej
)
2875 tree lowxor
, highxor
, tem
, exp
;
2876 /* Check lowi ^ lowj == highi ^ highj and
2877 popcount (lowi ^ lowj) == 1. */
2878 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2879 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2881 if (!integer_pow2p (lowxor
))
2883 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2884 if (!tree_int_cst_equal (lowxor
, highxor
))
2888 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2889 int prec
= GET_MODE_PRECISION (mode
);
2890 if (TYPE_PRECISION (type
) < prec
2891 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2892 != wi::min_value (prec
, TYPE_SIGN (type
)))
2893 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2894 != wi::max_value (prec
, TYPE_SIGN (type
))))
2896 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
2897 exp
= fold_convert (type
, exp
);
2898 lowxor
= fold_convert (type
, lowxor
);
2899 lowi
= fold_convert (type
, lowi
);
2900 highi
= fold_convert (type
, highi
);
2902 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2903 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
2904 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2905 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2906 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2907 NULL
, rangei
->in_p
, lowj
, highj
,
2908 rangei
->strict_overflow_p
2909 || rangej
->strict_overflow_p
))
2914 /* Optimize X == CST1 || X == CST2
2915 if popcount (CST2 - CST1) == 1 into
2916 ((X - CST1) & ~(CST2 - CST1)) == 0.
2917 Similarly for ranges. E.g.
2918 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2919 || X == 75 || X == 45
2920 will be transformed by the previous optimization into
2921 (X - 43U) <= 3U || (X - 75U) <= 3U
2922 and this loop can transform that into
2923 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2925 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2926 tree lowi
, tree lowj
, tree highi
, tree highj
,
2927 vec
<operand_entry
*> *ops
,
2928 struct range_entry
*rangei
,
2929 struct range_entry
*rangej
)
2931 tree tem1
, tem2
, mask
;
2932 /* Check highi - lowi == highj - lowj. */
2933 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2934 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2936 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2937 if (!tree_int_cst_equal (tem1
, tem2
))
2939 /* Check popcount (lowj - lowi) == 1. */
2940 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2941 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2943 if (!integer_pow2p (tem1
))
2946 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2947 int prec
= GET_MODE_PRECISION (mode
);
2948 if (TYPE_PRECISION (type
) < prec
2949 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2950 != wi::min_value (prec
, TYPE_SIGN (type
)))
2951 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2952 != wi::max_value (prec
, TYPE_SIGN (type
))))
2953 type
= build_nonstandard_integer_type (prec
, 1);
2955 type
= unsigned_type_for (type
);
2956 tem1
= fold_convert (type
, tem1
);
2957 tem2
= fold_convert (type
, tem2
);
2958 lowi
= fold_convert (type
, lowi
);
2959 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2960 tem1
= fold_build2 (MINUS_EXPR
, type
,
2961 fold_convert (type
, rangei
->exp
), lowi
);
2962 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2963 lowj
= build_int_cst (type
, 0);
2964 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2965 NULL
, rangei
->in_p
, lowj
, tem2
,
2966 rangei
->strict_overflow_p
2967 || rangej
->strict_overflow_p
))
2972 /* It does some common checks for function optimize_range_tests_xor and
2973 optimize_range_tests_diff.
2974 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2975 Else it calls optimize_range_tests_diff. */
2978 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2979 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2980 struct range_entry
*ranges
)
2983 bool any_changes
= false;
2984 for (i
= first
; i
< length
; i
++)
2986 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2988 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2990 type
= TREE_TYPE (ranges
[i
].exp
);
2991 if (!INTEGRAL_TYPE_P (type
))
2993 lowi
= ranges
[i
].low
;
2994 if (lowi
== NULL_TREE
)
2995 lowi
= TYPE_MIN_VALUE (type
);
2996 highi
= ranges
[i
].high
;
2997 if (highi
== NULL_TREE
)
2999 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3002 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3004 lowj
= ranges
[j
].low
;
3005 if (lowj
== NULL_TREE
)
3007 highj
= ranges
[j
].high
;
3008 if (highj
== NULL_TREE
)
3009 highj
= TYPE_MAX_VALUE (type
);
3010 /* Check lowj > highi. */
3011 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3013 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3016 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3018 ranges
+ i
, ranges
+ j
);
3020 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3022 ranges
+ i
, ranges
+ j
);
3033 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3034 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3035 EXP on success, NULL otherwise. */
3038 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3039 wide_int
*mask
, tree
*totallowp
)
3041 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3042 if (tem
== NULL_TREE
3043 || TREE_CODE (tem
) != INTEGER_CST
3044 || TREE_OVERFLOW (tem
)
3045 || tree_int_cst_sgn (tem
) == -1
3046 || compare_tree_int (tem
, prec
) != -1)
3049 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3050 *mask
= wi::shifted_mask (0, max
, false, prec
);
3051 if (TREE_CODE (exp
) == BIT_AND_EXPR
3052 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3054 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3055 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3056 if (wi::popcount (msk
) == 1
3057 && wi::ltu_p (msk
, prec
- max
))
3059 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3060 max
+= msk
.to_uhwi ();
3061 exp
= TREE_OPERAND (exp
, 0);
3062 if (integer_zerop (low
)
3063 && TREE_CODE (exp
) == PLUS_EXPR
3064 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3066 tree ret
= TREE_OPERAND (exp
, 0);
3069 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3070 TYPE_PRECISION (TREE_TYPE (low
))));
3071 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3077 else if (!tree_int_cst_lt (totallow
, tbias
))
3079 bias
= wi::to_widest (tbias
);
3080 bias
-= wi::to_widest (totallow
);
3081 if (bias
>= 0 && bias
< prec
- max
)
3083 *mask
= wi::lshift (*mask
, bias
);
3091 if (!tree_int_cst_lt (totallow
, low
))
3093 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3094 if (tem
== NULL_TREE
3095 || TREE_CODE (tem
) != INTEGER_CST
3096 || TREE_OVERFLOW (tem
)
3097 || compare_tree_int (tem
, prec
- max
) == 1)
3100 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3104 /* Attempt to optimize small range tests using bit test.
3106 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3107 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3108 has been by earlier optimizations optimized into:
3109 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3110 As all the 43 through 82 range is less than 64 numbers,
3111 for 64-bit word targets optimize that into:
3112 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3115 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3116 vec
<operand_entry
*> *ops
,
3117 struct range_entry
*ranges
)
3120 bool any_changes
= false;
3121 int prec
= GET_MODE_BITSIZE (word_mode
);
3122 auto_vec
<struct range_entry
*, 64> candidates
;
3124 for (i
= first
; i
< length
- 2; i
++)
3126 tree lowi
, highi
, lowj
, highj
, type
;
3128 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3130 type
= TREE_TYPE (ranges
[i
].exp
);
3131 if (!INTEGRAL_TYPE_P (type
))
3133 lowi
= ranges
[i
].low
;
3134 if (lowi
== NULL_TREE
)
3135 lowi
= TYPE_MIN_VALUE (type
);
3136 highi
= ranges
[i
].high
;
3137 if (highi
== NULL_TREE
)
3140 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3141 highi
, &mask
, &lowi
);
3142 if (exp
== NULL_TREE
)
3144 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3145 candidates
.truncate (0);
3146 int end
= MIN (i
+ 64, length
);
3147 for (j
= i
+ 1; j
< end
; j
++)
3150 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3152 if (ranges
[j
].exp
== exp
)
3154 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3156 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3159 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3161 exp2
= TREE_OPERAND (exp2
, 0);
3171 lowj
= ranges
[j
].low
;
3172 if (lowj
== NULL_TREE
)
3174 highj
= ranges
[j
].high
;
3175 if (highj
== NULL_TREE
)
3176 highj
= TYPE_MAX_VALUE (type
);
3178 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3179 highj
, &mask2
, NULL
);
3183 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3184 candidates
.safe_push (&ranges
[j
]);
3187 /* If we need otherwise 3 or more comparisons, use a bit test. */
3188 if (candidates
.length () >= 2)
3190 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3191 wi::to_widest (lowi
)
3192 + prec
- 1 - wi::clz (mask
));
3193 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3195 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3196 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3197 location_t loc
= gimple_location (stmt
);
3198 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3200 /* See if it isn't cheaper to pretend the minimum value of the
3201 range is 0, if maximum value is small enough.
3202 We can avoid then subtraction of the minimum value, but the
3203 mask constant could be perhaps more expensive. */
3204 if (compare_tree_int (lowi
, 0) > 0
3205 && compare_tree_int (high
, prec
) < 0)
3208 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3209 rtx reg
= gen_raw_REG (word_mode
, 10000);
3210 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3211 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3213 word_mode
, speed_p
);
3214 rtx r
= immed_wide_int_const (mask
, word_mode
);
3215 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3216 word_mode
, speed_p
);
3217 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3218 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3219 word_mode
, speed_p
);
3222 mask
= wi::lshift (mask
, m
);
3223 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3227 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3229 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3231 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3232 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3233 fold_convert_loc (loc
, etype
, exp
),
3234 fold_convert_loc (loc
, etype
, lowi
));
3235 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3236 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3237 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3238 build_int_cst (word_type
, 1), exp
);
3239 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3240 wide_int_to_tree (word_type
, mask
));
3241 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3242 build_zero_cst (word_type
));
3243 if (is_gimple_val (exp
))
3246 /* The shift might have undefined behavior if TEM is true,
3247 but reassociate_bb isn't prepared to have basic blocks
3248 split when it is running. So, temporarily emit a code
3249 with BIT_IOR_EXPR instead of &&, and fix it up in
3252 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3253 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3254 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3256 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3257 gimple_seq_add_seq_without_update (&seq
, seq2
);
3258 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3259 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3260 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3261 BIT_IOR_EXPR
, tem
, exp
);
3262 gimple_set_location (g
, loc
);
3263 gimple_seq_add_stmt_without_update (&seq
, g
);
3264 exp
= gimple_assign_lhs (g
);
3265 tree val
= build_zero_cst (optype
);
3266 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3267 candidates
.length (), opcode
, ops
, exp
,
3268 seq
, false, val
, val
, strict_overflow_p
))
3271 reassoc_branch_fixups
.safe_push (tem
);
3274 gimple_seq_discard (seq
);
3280 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3281 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3284 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3285 vec
<operand_entry
*> *ops
,
3286 struct range_entry
*ranges
)
3290 bool any_changes
= false;
3291 auto_vec
<int, 128> buckets
;
3292 auto_vec
<int, 32> chains
;
3293 auto_vec
<struct range_entry
*, 32> candidates
;
3295 for (i
= first
; i
< length
; i
++)
3297 if (ranges
[i
].exp
== NULL_TREE
3298 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3300 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3301 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
3302 || ranges
[i
].low
== NULL_TREE
3303 || ranges
[i
].low
!= ranges
[i
].high
)
3306 bool zero_p
= integer_zerop (ranges
[i
].low
);
3307 if (!zero_p
&& !integer_all_onesp (ranges
[i
].low
))
3310 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 2 + !zero_p
;
3311 if (buckets
.length () <= b
)
3312 buckets
.safe_grow_cleared (b
+ 1);
3313 if (chains
.length () <= (unsigned) i
)
3314 chains
.safe_grow (i
+ 1);
3315 chains
[i
] = buckets
[b
];
3319 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3320 if (i
&& chains
[i
- 1])
3323 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3325 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3326 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3327 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3330 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3331 tree type2
= NULL_TREE
;
3332 bool strict_overflow_p
= false;
3333 candidates
.truncate (0);
3334 for (j
= i
; j
; j
= chains
[j
- 1])
3336 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3337 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3339 || useless_type_conversion_p (type1
, type
))
3341 else if (type2
== NULL_TREE
3342 || useless_type_conversion_p (type2
, type
))
3344 if (type2
== NULL_TREE
)
3346 candidates
.safe_push (&ranges
[j
- 1]);
3349 unsigned l
= candidates
.length ();
3350 for (j
= i
; j
; j
= chains
[j
- 1])
3352 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3355 if (useless_type_conversion_p (type1
, type
))
3357 else if (type2
== NULL_TREE
3358 || useless_type_conversion_p (type2
, type
))
3360 candidates
.safe_push (&ranges
[j
- 1]);
3362 gimple_seq seq
= NULL
;
3363 tree op
= NULL_TREE
;
3365 struct range_entry
*r
;
3366 candidates
.safe_push (&ranges
[k
- 1]);
3367 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3377 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3378 gimple_seq_add_stmt_without_update (&seq
, g
);
3379 op
= gimple_assign_lhs (g
);
3381 tree type
= TREE_TYPE (r
->exp
);
3383 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3385 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3386 gimple_seq_add_stmt_without_update (&seq
, g
);
3387 exp
= gimple_assign_lhs (g
);
3389 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3390 (b
& 1) ? BIT_AND_EXPR
: BIT_IOR_EXPR
,
3392 gimple_seq_add_stmt_without_update (&seq
, g
);
3393 op
= gimple_assign_lhs (g
);
3396 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3397 candidates
.length (), opcode
, ops
, op
,
3398 seq
, true, ranges
[k
- 1].low
,
3399 ranges
[k
- 1].low
, strict_overflow_p
))
3402 gimple_seq_discard (seq
);
3408 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3409 a >= 0 && a < b into (unsigned) a < (unsigned) b
3410 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3413 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3414 vec
<operand_entry
*> *ops
,
3415 struct range_entry
*ranges
,
3416 basic_block first_bb
)
3419 bool any_changes
= false;
3420 hash_map
<tree
, int> *map
= NULL
;
3422 for (i
= first
; i
< length
; i
++)
3424 if (ranges
[i
].exp
== NULL_TREE
3425 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3429 tree type
= TREE_TYPE (ranges
[i
].exp
);
3430 if (!INTEGRAL_TYPE_P (type
)
3431 || TYPE_UNSIGNED (type
)
3432 || ranges
[i
].low
== NULL_TREE
3433 || !integer_zerop (ranges
[i
].low
)
3434 || ranges
[i
].high
!= NULL_TREE
)
3436 /* EXP >= 0 here. */
3438 map
= new hash_map
<tree
, int>;
3439 map
->put (ranges
[i
].exp
, i
);
3445 for (i
= 0; i
< length
; i
++)
3447 bool in_p
= ranges
[i
].in_p
;
3448 if (ranges
[i
].low
== NULL_TREE
3449 || ranges
[i
].high
== NULL_TREE
)
3451 if (!integer_zerop (ranges
[i
].low
)
3452 || !integer_zerop (ranges
[i
].high
))
3455 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3456 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3457 && integer_onep (ranges
[i
].low
)
3458 && integer_onep (ranges
[i
].high
))
3469 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3471 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3472 if (!is_gimple_assign (stmt
))
3474 ccode
= gimple_assign_rhs_code (stmt
);
3475 rhs1
= gimple_assign_rhs1 (stmt
);
3476 rhs2
= gimple_assign_rhs2 (stmt
);
3480 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3481 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3482 if (gimple_code (stmt
) != GIMPLE_COND
)
3484 ccode
= gimple_cond_code (stmt
);
3485 rhs1
= gimple_cond_lhs (stmt
);
3486 rhs2
= gimple_cond_rhs (stmt
);
3489 if (TREE_CODE (rhs1
) != SSA_NAME
3490 || rhs2
== NULL_TREE
3491 || TREE_CODE (rhs2
) != SSA_NAME
)
3505 ccode
= invert_tree_comparison (ccode
, false);
3510 std::swap (rhs1
, rhs2
);
3511 ccode
= swap_tree_comparison (ccode
);
3520 int *idx
= map
->get (rhs1
);
3524 /* maybe_optimize_range_tests allows statements without side-effects
3525 in the basic blocks as long as they are consumed in the same bb.
3526 Make sure rhs2's def stmt is not among them, otherwise we can't
3527 use safely get_nonzero_bits on it. E.g. in:
3528 # RANGE [-83, 1] NONZERO 173
3529 # k_32 = PHI <k_47(13), k_12(9)>
3532 goto <bb 5>; [26.46%]
3534 goto <bb 9>; [73.54%]
3536 <bb 5> [local count: 140323371]:
3537 # RANGE [0, 1] NONZERO 1
3539 # RANGE [0, 4] NONZERO 4
3541 # RANGE [0, 4] NONZERO 4
3542 iftmp.0_44 = (char) _21;
3543 if (k_32 < iftmp.0_44)
3544 goto <bb 6>; [84.48%]
3546 goto <bb 9>; [15.52%]
3547 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3548 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3549 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3550 those stmts even for negative k_32 and the value ranges would be no
3551 longer guaranteed and so the optimization would be invalid. */
3552 while (opcode
== ERROR_MARK
)
3554 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3555 basic_block bb2
= gimple_bb (g
);
3558 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3560 /* As an exception, handle a few common cases. */
3561 if (gimple_assign_cast_p (g
)
3562 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3564 tree op0
= gimple_assign_rhs1 (g
);
3565 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3566 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3567 > TYPE_PRECISION (TREE_TYPE (op0
))))
3568 /* Zero-extension is always ok. */
3570 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3571 == TYPE_PRECISION (TREE_TYPE (op0
))
3572 && TREE_CODE (op0
) == SSA_NAME
)
3574 /* Cast from signed to unsigned or vice versa. Retry
3575 with the op0 as new rhs2. */
3580 else if (is_gimple_assign (g
)
3581 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3582 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3583 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3584 /* Masking with INTEGER_CST with MSB clear is always ok
3591 if (rhs2
== NULL_TREE
)
3594 wide_int nz
= get_nonzero_bits (rhs2
);
3598 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3599 and RHS2 is known to be RHS2 >= 0. */
3600 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3602 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3603 if ((ranges
[*idx
].strict_overflow_p
3604 || ranges
[i
].strict_overflow_p
)
3605 && issue_strict_overflow_warning (wc
))
3606 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3607 "assuming signed overflow does not occur "
3608 "when simplifying range test");
3610 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3612 struct range_entry
*r
= &ranges
[*idx
];
3613 fprintf (dump_file
, "Optimizing range test ");
3614 print_generic_expr (dump_file
, r
->exp
);
3615 fprintf (dump_file
, " +[");
3616 print_generic_expr (dump_file
, r
->low
);
3617 fprintf (dump_file
, ", ");
3618 print_generic_expr (dump_file
, r
->high
);
3619 fprintf (dump_file
, "] and comparison ");
3620 print_generic_expr (dump_file
, rhs1
);
3621 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3622 print_generic_expr (dump_file
, rhs2
);
3623 fprintf (dump_file
, "\n into (");
3624 print_generic_expr (dump_file
, utype
);
3625 fprintf (dump_file
, ") ");
3626 print_generic_expr (dump_file
, rhs1
);
3627 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3628 print_generic_expr (dump_file
, utype
);
3629 fprintf (dump_file
, ") ");
3630 print_generic_expr (dump_file
, rhs2
);
3631 fprintf (dump_file
, "\n");
3634 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3636 if (opcode
== BIT_IOR_EXPR
3637 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3640 ccode
= invert_tree_comparison (ccode
, false);
3643 unsigned int uid
= gimple_uid (stmt
);
3644 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3645 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3646 gimple_set_uid (g
, uid
);
3647 rhs1
= gimple_assign_lhs (g
);
3648 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3649 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
3651 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3652 gimple_set_uid (g
, uid
);
3653 rhs2
= gimple_assign_lhs (g
);
3654 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3656 if (tree_swap_operands_p (rhs1
, rhs2
))
3658 std::swap (rhs1
, rhs2
);
3659 ccode
= swap_tree_comparison (ccode
);
3661 if (gimple_code (stmt
) == GIMPLE_COND
)
3663 gcond
*c
= as_a
<gcond
*> (stmt
);
3664 gimple_cond_set_code (c
, ccode
);
3665 gimple_cond_set_lhs (c
, rhs1
);
3666 gimple_cond_set_rhs (c
, rhs2
);
3671 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3672 if (!INTEGRAL_TYPE_P (ctype
)
3673 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3674 && TYPE_PRECISION (ctype
) != 1))
3675 ctype
= boolean_type_node
;
3676 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3677 gimple_set_uid (g
, uid
);
3678 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3679 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3681 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3682 NOP_EXPR
, gimple_assign_lhs (g
));
3683 gimple_set_uid (g
, uid
);
3684 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3686 ranges
[i
].exp
= gimple_assign_lhs (g
);
3687 oe
->op
= ranges
[i
].exp
;
3688 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3689 ranges
[i
].high
= ranges
[i
].low
;
3691 ranges
[i
].strict_overflow_p
= false;
3692 oe
= (*ops
)[ranges
[*idx
].idx
];
3693 /* Now change all the other range test immediate uses, so that
3694 those tests will be optimized away. */
3695 if (opcode
== ERROR_MARK
)
3698 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3699 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3701 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3702 ? boolean_false_node
: boolean_true_node
);
3705 oe
->op
= error_mark_node
;
3706 ranges
[*idx
].exp
= NULL_TREE
;
3707 ranges
[*idx
].low
= NULL_TREE
;
3708 ranges
[*idx
].high
= NULL_TREE
;
3716 /* Optimize range tests, similarly how fold_range_test optimizes
3717 it on trees. The tree code for the binary
3718 operation between all the operands is OPCODE.
3719 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3720 maybe_optimize_range_tests for inter-bb range optimization.
3721 In that case if oe->op is NULL, oe->id is bb->index whose
3722 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3724 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3727 optimize_range_tests (enum tree_code opcode
,
3728 vec
<operand_entry
*> *ops
, basic_block first_bb
)
3730 unsigned int length
= ops
->length (), i
, j
, first
;
3732 struct range_entry
*ranges
;
3733 bool any_changes
= false;
3738 ranges
= XNEWVEC (struct range_entry
, length
);
3739 for (i
= 0; i
< length
; i
++)
3743 init_range_entry (ranges
+ i
, oe
->op
,
3746 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3747 /* For | invert it now, we will invert it again before emitting
3748 the optimized expression. */
3749 if (opcode
== BIT_IOR_EXPR
3750 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3751 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3754 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3755 for (i
= 0; i
< length
; i
++)
3756 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3759 /* Try to merge ranges. */
3760 for (first
= i
; i
< length
; i
++)
3762 tree low
= ranges
[i
].low
;
3763 tree high
= ranges
[i
].high
;
3764 int in_p
= ranges
[i
].in_p
;
3765 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3766 int update_fail_count
= 0;
3768 for (j
= i
+ 1; j
< length
; j
++)
3770 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3772 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3773 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3775 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3781 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3782 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3783 low
, high
, strict_overflow_p
))
3788 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3789 while update_range_test would fail. */
3790 else if (update_fail_count
== 64)
3793 ++update_fail_count
;
3796 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3799 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3800 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3802 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3803 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3805 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3807 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3810 if (any_changes
&& opcode
!= ERROR_MARK
)
3813 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3815 if (oe
->op
== error_mark_node
)
3824 XDELETEVEC (ranges
);
3828 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3829 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3830 otherwise the comparison code. TYPE is a return value that is set
3831 to type of comparison. */
3834 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
)
3836 if (TREE_CODE (var
) != SSA_NAME
)
3839 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
3843 /* ??? If we start creating more COND_EXPR, we could perform
3844 this same optimization with them. For now, simplify. */
3845 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
3848 tree cond
= gimple_assign_rhs1 (stmt
);
3849 tree_code cmp
= TREE_CODE (cond
);
3850 if (TREE_CODE_CLASS (cmp
) != tcc_comparison
)
3853 /* ??? For now, allow only canonical true and false result vectors.
3854 We could expand this to other constants should the need arise,
3855 but at the moment we don't create them. */
3856 tree t
= gimple_assign_rhs2 (stmt
);
3857 tree f
= gimple_assign_rhs3 (stmt
);
3859 if (integer_all_onesp (t
))
3861 else if (integer_all_onesp (f
))
3863 cmp
= invert_tree_comparison (cmp
, false);
3868 if (!integer_zerop (f
))
3877 *type
= TREE_TYPE (cond
);
3881 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3882 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3885 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
3887 unsigned int length
= ops
->length (), i
, j
;
3888 bool any_changes
= false;
3893 for (i
= 0; i
< length
; ++i
)
3895 tree elt0
= (*ops
)[i
]->op
;
3900 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
);
3901 if (cmp0
== ERROR_MARK
)
3904 for (j
= i
+ 1; j
< length
; ++j
)
3906 tree
&elt1
= (*ops
)[j
]->op
;
3909 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
);
3910 if (cmp1
== ERROR_MARK
)
3913 tree cond0
= gimple_assign_rhs1 (stmt0
);
3914 tree x0
= TREE_OPERAND (cond0
, 0);
3915 tree y0
= TREE_OPERAND (cond0
, 1);
3917 tree cond1
= gimple_assign_rhs1 (stmt1
);
3918 tree x1
= TREE_OPERAND (cond1
, 0);
3919 tree y1
= TREE_OPERAND (cond1
, 1);
3922 if (opcode
== BIT_AND_EXPR
)
3923 comb
= maybe_fold_and_comparisons (type
, cmp0
, x0
, y0
, cmp1
, x1
,
3925 else if (opcode
== BIT_IOR_EXPR
)
3926 comb
= maybe_fold_or_comparisons (type
, cmp0
, x0
, y0
, cmp1
, x1
,
3934 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3936 fprintf (dump_file
, "Transforming ");
3937 print_generic_expr (dump_file
, cond0
);
3938 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
3939 print_generic_expr (dump_file
, cond1
);
3940 fprintf (dump_file
, " into ");
3941 print_generic_expr (dump_file
, comb
);
3942 fputc ('\n', dump_file
);
3945 gimple_assign_set_rhs1 (stmt0
, comb
);
3947 std::swap (*gimple_assign_rhs2_ptr (stmt0
),
3948 *gimple_assign_rhs3_ptr (stmt0
));
3949 update_stmt (stmt0
);
3951 elt1
= error_mark_node
;
3960 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3962 if (oe
->op
== error_mark_node
)
3974 /* Return true if STMT is a cast like:
3980 # _345 = PHI <_123(N), 1(...), 1(...)>
3981 where _234 has bool type, _123 has single use and
3982 bb N has a single successor M. This is commonly used in
3983 the last block of a range test.
3985 Also Return true if STMT is tcc_compare like:
3991 # _345 = PHI <_234(N), 1(...), 1(...)>
3993 where _234 has booltype, single use and
3994 bb N has a single successor M. This is commonly used in
3995 the last block of a range test. */
3998 final_range_test_p (gimple
*stmt
)
4000 basic_block bb
, rhs_bb
, lhs_bb
;
4003 use_operand_p use_p
;
4006 if (!gimple_assign_cast_p (stmt
)
4007 && (!is_gimple_assign (stmt
)
4008 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4009 != tcc_comparison
)))
4011 bb
= gimple_bb (stmt
);
4012 if (!single_succ_p (bb
))
4014 e
= single_succ_edge (bb
);
4015 if (e
->flags
& EDGE_COMPLEX
)
4018 lhs
= gimple_assign_lhs (stmt
);
4019 rhs
= gimple_assign_rhs1 (stmt
);
4020 if (gimple_assign_cast_p (stmt
)
4021 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4022 || TREE_CODE (rhs
) != SSA_NAME
4023 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4026 if (!gimple_assign_cast_p (stmt
)
4027 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4030 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4031 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4034 if (gimple_code (use_stmt
) != GIMPLE_PHI
4035 || gimple_bb (use_stmt
) != e
->dest
)
4038 /* And that the rhs is defined in the same loop. */
4039 if (gimple_assign_cast_p (stmt
))
4041 if (TREE_CODE (rhs
) != SSA_NAME
4042 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4043 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4048 if (TREE_CODE (lhs
) != SSA_NAME
4049 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4050 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4057 /* Return true if BB is suitable basic block for inter-bb range test
4058 optimization. If BACKWARD is true, BB should be the only predecessor
4059 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4060 or compared with to find a common basic block to which all conditions
4061 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4062 be the only predecessor of BB. */
4065 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4068 edge_iterator ei
, ei2
;
4072 bool other_edge_seen
= false;
4077 /* Check last stmt first. */
4078 stmt
= last_stmt (bb
);
4080 || (gimple_code (stmt
) != GIMPLE_COND
4081 && (backward
|| !final_range_test_p (stmt
)))
4082 || gimple_visited_p (stmt
)
4083 || stmt_could_throw_p (cfun
, stmt
)
4086 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4089 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4090 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4091 to *OTHER_BB (if not set yet, try to find it out). */
4092 if (EDGE_COUNT (bb
->succs
) != 2)
4094 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4096 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4098 if (e
->dest
== test_bb
)
4107 if (*other_bb
== NULL
)
4109 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4110 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4112 else if (e
->dest
== e2
->dest
)
4113 *other_bb
= e
->dest
;
4114 if (*other_bb
== NULL
)
4117 if (e
->dest
== *other_bb
)
4118 other_edge_seen
= true;
4122 if (*other_bb
== NULL
|| !other_edge_seen
)
4125 else if (single_succ (bb
) != *other_bb
)
4128 /* Now check all PHIs of *OTHER_BB. */
4129 e
= find_edge (bb
, *other_bb
);
4130 e2
= find_edge (test_bb
, *other_bb
);
4131 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4133 gphi
*phi
= gsi
.phi ();
4134 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4135 corresponding to BB and TEST_BB predecessor must be the same. */
4136 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4137 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4139 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4140 one of the PHIs should have the lhs of the last stmt in
4141 that block as PHI arg and that PHI should have 0 or 1
4142 corresponding to it in all other range test basic blocks
4146 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4147 == gimple_assign_lhs (stmt
)
4148 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4149 || integer_onep (gimple_phi_arg_def (phi
,
4155 gimple
*test_last
= last_stmt (test_bb
);
4156 if (gimple_code (test_last
) != GIMPLE_COND
4157 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
4158 == gimple_assign_lhs (test_last
)
4159 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
4160 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
4170 /* Return true if BB doesn't have side-effects that would disallow
4171 range test optimization, all SSA_NAMEs set in the bb are consumed
4172 in the bb and there are no PHIs. */
4175 no_side_effect_bb (basic_block bb
)
4177 gimple_stmt_iterator gsi
;
4180 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4182 last
= last_stmt (bb
);
4183 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4185 gimple
*stmt
= gsi_stmt (gsi
);
4187 imm_use_iterator imm_iter
;
4188 use_operand_p use_p
;
4190 if (is_gimple_debug (stmt
))
4192 if (gimple_has_side_effects (stmt
))
4196 if (!is_gimple_assign (stmt
))
4198 lhs
= gimple_assign_lhs (stmt
);
4199 if (TREE_CODE (lhs
) != SSA_NAME
)
4201 if (gimple_assign_rhs_could_trap_p (stmt
))
4203 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4205 gimple
*use_stmt
= USE_STMT (use_p
);
4206 if (is_gimple_debug (use_stmt
))
4208 if (gimple_bb (use_stmt
) != bb
)
4215 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4216 return true and fill in *OPS recursively. */
4219 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4222 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4226 if (!is_reassociable_op (stmt
, code
, loop
))
4229 rhs
[0] = gimple_assign_rhs1 (stmt
);
4230 rhs
[1] = gimple_assign_rhs2 (stmt
);
4231 gimple_set_visited (stmt
, true);
4232 for (i
= 0; i
< 2; i
++)
4233 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4234 && !get_ops (rhs
[i
], code
, ops
, loop
)
4235 && has_single_use (rhs
[i
]))
4237 operand_entry
*oe
= operand_entry_pool
.allocate ();
4243 oe
->stmt_to_insert
= NULL
;
4244 ops
->safe_push (oe
);
4249 /* Find the ops that were added by get_ops starting from VAR, see if
4250 they were changed during update_range_test and if yes, create new
4254 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
4255 unsigned int *pidx
, class loop
*loop
)
4257 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4261 if (!is_reassociable_op (stmt
, code
, loop
))
4264 rhs
[0] = gimple_assign_rhs1 (stmt
);
4265 rhs
[1] = gimple_assign_rhs2 (stmt
);
4268 for (i
= 0; i
< 2; i
++)
4269 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4271 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4272 if (rhs
[2 + i
] == NULL_TREE
)
4274 if (has_single_use (rhs
[i
]))
4275 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4277 rhs
[2 + i
] = rhs
[i
];
4280 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4281 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4283 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4284 var
= make_ssa_name (TREE_TYPE (var
));
4285 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4287 gimple_set_uid (g
, gimple_uid (stmt
));
4288 gimple_set_visited (g
, true);
4289 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4294 /* Structure to track the initial value passed to get_ops and
4295 the range in the ops vector for each basic block. */
4297 struct inter_bb_range_test_entry
4300 unsigned int first_idx
, last_idx
;
4303 /* Inter-bb range test optimization.
4305 Returns TRUE if a gimple conditional is optimized to a true/false,
4306 otherwise return FALSE.
4308 This indicates to the caller that it should run a CFG cleanup pass
4309 once reassociation is completed. */
4312 maybe_optimize_range_tests (gimple
*stmt
)
4314 basic_block first_bb
= gimple_bb (stmt
);
4315 basic_block last_bb
= first_bb
;
4316 basic_block other_bb
= NULL
;
4320 auto_vec
<operand_entry
*> ops
;
4321 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4322 bool any_changes
= false;
4323 bool cfg_cleanup_needed
= false;
4325 /* Consider only basic blocks that end with GIMPLE_COND or
4326 a cast statement satisfying final_range_test_p. All
4327 but the last bb in the first_bb .. last_bb range
4328 should end with GIMPLE_COND. */
4329 if (gimple_code (stmt
) == GIMPLE_COND
)
4331 if (EDGE_COUNT (first_bb
->succs
) != 2)
4332 return cfg_cleanup_needed
;
4334 else if (final_range_test_p (stmt
))
4335 other_bb
= single_succ (first_bb
);
4337 return cfg_cleanup_needed
;
4339 if (stmt_could_throw_p (cfun
, stmt
))
4340 return cfg_cleanup_needed
;
4342 /* As relative ordering of post-dominator sons isn't fixed,
4343 maybe_optimize_range_tests can be called first on any
4344 bb in the range we want to optimize. So, start searching
4345 backwards, if first_bb can be set to a predecessor. */
4346 while (single_pred_p (first_bb
))
4348 basic_block pred_bb
= single_pred (first_bb
);
4349 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
4351 if (!no_side_effect_bb (first_bb
))
4355 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4356 Before starting forward search in last_bb successors, find
4357 out the other_bb. */
4358 if (first_bb
== last_bb
)
4361 /* As non-GIMPLE_COND last stmt always terminates the range,
4362 if forward search didn't discover anything, just give up. */
4363 if (gimple_code (stmt
) != GIMPLE_COND
)
4364 return cfg_cleanup_needed
;
4365 /* Look at both successors. Either it ends with a GIMPLE_COND
4366 and satisfies suitable_cond_bb, or ends with a cast and
4367 other_bb is that cast's successor. */
4368 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4369 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4370 || e
->dest
== first_bb
)
4371 return cfg_cleanup_needed
;
4372 else if (single_pred_p (e
->dest
))
4374 stmt
= last_stmt (e
->dest
);
4376 && gimple_code (stmt
) == GIMPLE_COND
4377 && EDGE_COUNT (e
->dest
->succs
) == 2)
4379 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
4385 && final_range_test_p (stmt
)
4386 && find_edge (first_bb
, single_succ (e
->dest
)))
4388 other_bb
= single_succ (e
->dest
);
4389 if (other_bb
== first_bb
)
4393 if (other_bb
== NULL
)
4394 return cfg_cleanup_needed
;
4396 /* Now do the forward search, moving last_bb to successor bbs
4397 that aren't other_bb. */
4398 while (EDGE_COUNT (last_bb
->succs
) == 2)
4400 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4401 if (e
->dest
!= other_bb
)
4405 if (!single_pred_p (e
->dest
))
4407 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
4409 if (!no_side_effect_bb (e
->dest
))
4413 if (first_bb
== last_bb
)
4414 return cfg_cleanup_needed
;
4415 /* Here basic blocks first_bb through last_bb's predecessor
4416 end with GIMPLE_COND, all of them have one of the edges to
4417 other_bb and another to another block in the range,
4418 all blocks except first_bb don't have side-effects and
4419 last_bb ends with either GIMPLE_COND, or cast satisfying
4420 final_range_test_p. */
4421 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4423 enum tree_code code
;
4425 inter_bb_range_test_entry bb_ent
;
4427 bb_ent
.op
= NULL_TREE
;
4428 bb_ent
.first_idx
= ops
.length ();
4429 bb_ent
.last_idx
= bb_ent
.first_idx
;
4430 e
= find_edge (bb
, other_bb
);
4431 stmt
= last_stmt (bb
);
4432 gimple_set_visited (stmt
, true);
4433 if (gimple_code (stmt
) != GIMPLE_COND
)
4435 use_operand_p use_p
;
4440 lhs
= gimple_assign_lhs (stmt
);
4441 rhs
= gimple_assign_rhs1 (stmt
);
4442 gcc_assert (bb
== last_bb
);
4451 # _345 = PHI <_123(N), 1(...), 1(...)>
4453 or 0 instead of 1. If it is 0, the _234
4454 range test is anded together with all the
4455 other range tests, if it is 1, it is ored with
4457 single_imm_use (lhs
, &use_p
, &phi
);
4458 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4459 e2
= find_edge (first_bb
, other_bb
);
4461 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4462 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4463 code
= BIT_AND_EXPR
;
4466 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4467 code
= BIT_IOR_EXPR
;
4470 /* If _234 SSA_NAME_DEF_STMT is
4472 (or &, corresponding to 1/0 in the phi arguments,
4473 push into ops the individual range test arguments
4474 of the bitwise or resp. and, recursively. */
4475 if (TREE_CODE (rhs
) == SSA_NAME
4476 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4478 && !get_ops (rhs
, code
, &ops
,
4479 loop_containing_stmt (stmt
))
4480 && has_single_use (rhs
))
4482 /* Otherwise, push the _234 range test itself. */
4483 operand_entry
*oe
= operand_entry_pool
.allocate ();
4489 oe
->stmt_to_insert
= NULL
;
4494 else if (is_gimple_assign (stmt
)
4495 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4497 && !get_ops (lhs
, code
, &ops
,
4498 loop_containing_stmt (stmt
))
4499 && has_single_use (lhs
))
4501 operand_entry
*oe
= operand_entry_pool
.allocate ();
4512 bb_ent
.last_idx
= ops
.length ();
4515 bbinfo
.safe_push (bb_ent
);
4518 /* Otherwise stmt is GIMPLE_COND. */
4519 code
= gimple_cond_code (stmt
);
4520 lhs
= gimple_cond_lhs (stmt
);
4521 rhs
= gimple_cond_rhs (stmt
);
4522 if (TREE_CODE (lhs
) == SSA_NAME
4523 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4524 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4525 || rhs
!= boolean_false_node
4526 /* Either push into ops the individual bitwise
4527 or resp. and operands, depending on which
4528 edge is other_bb. */
4529 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4530 ^ (code
== EQ_EXPR
))
4531 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4532 loop_containing_stmt (stmt
))))
4534 /* Or push the GIMPLE_COND stmt itself. */
4535 operand_entry
*oe
= operand_entry_pool
.allocate ();
4538 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4539 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4540 /* oe->op = NULL signs that there is no SSA_NAME
4541 for the range test, and oe->id instead is the
4542 basic block number, at which's end the GIMPLE_COND
4546 oe
->stmt_to_insert
= NULL
;
4551 else if (ops
.length () > bb_ent
.first_idx
)
4554 bb_ent
.last_idx
= ops
.length ();
4556 bbinfo
.safe_push (bb_ent
);
4560 if (ops
.length () > 1)
4561 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
4564 unsigned int idx
, max_idx
= 0;
4565 /* update_ops relies on has_single_use predicates returning the
4566 same values as it did during get_ops earlier. Additionally it
4567 never removes statements, only adds new ones and it should walk
4568 from the single imm use and check the predicate already before
4569 making those changes.
4570 On the other side, the handling of GIMPLE_COND directly can turn
4571 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4572 it needs to be done in a separate loop afterwards. */
4573 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4575 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4576 && bbinfo
[idx
].op
!= NULL_TREE
)
4581 stmt
= last_stmt (bb
);
4582 new_op
= update_ops (bbinfo
[idx
].op
,
4584 ops
[bbinfo
[idx
].first_idx
]->rank
,
4585 ops
, &bbinfo
[idx
].first_idx
,
4586 loop_containing_stmt (stmt
));
4587 if (new_op
== NULL_TREE
)
4589 gcc_assert (bb
== last_bb
);
4590 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4592 if (bbinfo
[idx
].op
!= new_op
)
4594 imm_use_iterator iter
;
4595 use_operand_p use_p
;
4596 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4598 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4599 if (is_gimple_debug (use_stmt
))
4601 else if (gimple_code (use_stmt
) == GIMPLE_COND
4602 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4603 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4604 SET_USE (use_p
, new_op
);
4605 else if ((is_gimple_assign (use_stmt
)
4607 (gimple_assign_rhs_code (use_stmt
))
4608 == tcc_comparison
)))
4609 cast_or_tcc_cmp_stmt
= use_stmt
;
4610 else if (gimple_assign_cast_p (use_stmt
))
4611 cast_or_tcc_cmp_stmt
= use_stmt
;
4615 if (cast_or_tcc_cmp_stmt
)
4617 gcc_assert (bb
== last_bb
);
4618 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4619 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4620 enum tree_code rhs_code
4621 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4622 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4625 if (is_gimple_min_invariant (new_op
))
4627 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4628 g
= gimple_build_assign (new_lhs
, new_op
);
4631 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4632 gimple_stmt_iterator gsi
4633 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4634 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4635 gimple_set_visited (g
, true);
4636 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4637 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4638 if (is_gimple_debug (use_stmt
))
4640 else if (gimple_code (use_stmt
) == GIMPLE_COND
4641 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4642 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4643 SET_USE (use_p
, new_lhs
);
4652 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4654 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4655 && bbinfo
[idx
].op
== NULL_TREE
4656 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4658 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4663 /* If we collapse the conditional to a true/false
4664 condition, then bubble that knowledge up to our caller. */
4665 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4667 gimple_cond_make_false (cond_stmt
);
4668 cfg_cleanup_needed
= true;
4670 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4672 gimple_cond_make_true (cond_stmt
);
4673 cfg_cleanup_needed
= true;
4677 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4678 gimple_cond_set_lhs (cond_stmt
,
4679 ops
[bbinfo
[idx
].first_idx
]->op
);
4680 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4682 update_stmt (cond_stmt
);
4688 /* The above changes could result in basic blocks after the first
4689 modified one, up to and including last_bb, to be executed even if
4690 they would not be in the original program. If the value ranges of
4691 assignment lhs' in those bbs were dependent on the conditions
4692 guarding those basic blocks which now can change, the VRs might
4693 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4694 are only used within the same bb, it should be not a big deal if
4695 we just reset all the VRs in those bbs. See PR68671. */
4696 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4697 reset_flow_sensitive_info_in_bb (bb
);
4699 return cfg_cleanup_needed
;
4702 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4703 of STMT in it's operands. This is also known as a "destructive
4704 update" operation. */
4707 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4712 use_operand_p arg_p
;
4715 if (TREE_CODE (operand
) != SSA_NAME
)
4718 lhs
= gimple_assign_lhs (stmt
);
4720 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4721 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4725 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4726 if (lhs
== USE_FROM_PTR (arg_p
))
4731 /* Remove def stmt of VAR if VAR has zero uses and recurse
4732 on rhs1 operand if so. */
4735 remove_visited_stmt_chain (tree var
)
4738 gimple_stmt_iterator gsi
;
4742 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
4744 stmt
= SSA_NAME_DEF_STMT (var
);
4745 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
4747 var
= gimple_assign_rhs1 (stmt
);
4748 gsi
= gsi_for_stmt (stmt
);
4749 reassoc_remove_stmt (&gsi
);
4750 release_defs (stmt
);
4757 /* This function checks three consequtive operands in
4758 passed operands vector OPS starting from OPINDEX and
4759 swaps two operands if it is profitable for binary operation
4760 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4762 We pair ops with the same rank if possible.
4764 The alternative we try is to see if STMT is a destructive
4765 update style statement, which is like:
4768 In that case, we want to use the destructive update form to
4769 expose the possible vectorizer sum reduction opportunity.
4770 In that case, the third operand will be the phi node. This
4771 check is not performed if STMT is null.
4773 We could, of course, try to be better as noted above, and do a
4774 lot of work to try to find these opportunities in >3 operand
4775 cases, but it is unlikely to be worth it. */
4778 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
4779 unsigned int opindex
, gimple
*stmt
)
4781 operand_entry
*oe1
, *oe2
, *oe3
;
4784 oe2
= ops
[opindex
+ 1];
4785 oe3
= ops
[opindex
+ 2];
4787 if ((oe1
->rank
== oe2
->rank
4788 && oe2
->rank
!= oe3
->rank
)
4789 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
4790 && !is_phi_for_stmt (stmt
, oe1
->op
)
4791 && !is_phi_for_stmt (stmt
, oe2
->op
)))
4792 std::swap (*oe1
, *oe3
);
4793 else if ((oe1
->rank
== oe3
->rank
4794 && oe2
->rank
!= oe3
->rank
)
4795 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
4796 && !is_phi_for_stmt (stmt
, oe1
->op
)
4797 && !is_phi_for_stmt (stmt
, oe3
->op
)))
4798 std::swap (*oe1
, *oe2
);
4801 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4802 two definitions, otherwise return STMT. */
4804 static inline gimple
*
4805 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
4807 if (TREE_CODE (rhs1
) == SSA_NAME
4808 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
4809 stmt
= SSA_NAME_DEF_STMT (rhs1
);
4810 if (TREE_CODE (rhs2
) == SSA_NAME
4811 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
4812 stmt
= SSA_NAME_DEF_STMT (rhs2
);
4816 /* If the stmt that defines operand has to be inserted, insert it
4819 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
4821 gcc_assert (is_gimple_assign (stmt_to_insert
));
4822 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
4823 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
4824 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
4825 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
4826 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
4828 /* If the insert point is not stmt, then insert_point would be
4829 the point where operand rhs1 or rhs2 is defined. In this case,
4830 stmt_to_insert has to be inserted afterwards. This would
4831 only happen when the stmt insertion point is flexible. */
4832 if (stmt
== insert_point
)
4833 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
4835 insert_stmt_after (stmt_to_insert
, insert_point
);
4839 /* Recursively rewrite our linearized statements so that the operators
4840 match those in OPS[OPINDEX], putting the computation in rank
4841 order. Return new lhs.
4842 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4843 the current stmt and during recursive invocations.
4844 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4845 recursive invocations. */
4848 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
4849 vec
<operand_entry
*> ops
, bool changed
, bool next_changed
)
4851 tree rhs1
= gimple_assign_rhs1 (stmt
);
4852 tree rhs2
= gimple_assign_rhs2 (stmt
);
4853 tree lhs
= gimple_assign_lhs (stmt
);
4856 /* The final recursion case for this function is that you have
4857 exactly two operations left.
4858 If we had exactly one op in the entire list to start with, we
4859 would have never called this function, and the tail recursion
4860 rewrites them one at a time. */
4861 if (opindex
+ 2 == ops
.length ())
4863 operand_entry
*oe1
, *oe2
;
4866 oe2
= ops
[opindex
+ 1];
4868 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
4870 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4871 unsigned int uid
= gimple_uid (stmt
);
4873 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4875 fprintf (dump_file
, "Transforming ");
4876 print_gimple_stmt (dump_file
, stmt
, 0);
4879 /* If the stmt that defines operand has to be inserted, insert it
4881 if (oe1
->stmt_to_insert
)
4882 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
4883 if (oe2
->stmt_to_insert
)
4884 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
4885 /* Even when changed is false, reassociation could have e.g. removed
4886 some redundant operations, so unless we are just swapping the
4887 arguments or unless there is no change at all (then we just
4888 return lhs), force creation of a new SSA_NAME. */
4889 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
4891 gimple
*insert_point
4892 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
4893 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4895 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4897 gimple_set_uid (stmt
, uid
);
4898 gimple_set_visited (stmt
, true);
4899 if (insert_point
== gsi_stmt (gsi
))
4900 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4902 insert_stmt_after (stmt
, insert_point
);
4906 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
4908 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
4909 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
4913 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
4914 remove_visited_stmt_chain (rhs1
);
4916 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4918 fprintf (dump_file
, " into ");
4919 print_gimple_stmt (dump_file
, stmt
, 0);
4925 /* If we hit here, we should have 3 or more ops left. */
4926 gcc_assert (opindex
+ 2 < ops
.length ());
4928 /* Rewrite the next operator. */
4931 /* If the stmt that defines operand has to be inserted, insert it
4933 if (oe
->stmt_to_insert
)
4934 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
4936 /* Recurse on the LHS of the binary operator, which is guaranteed to
4937 be the non-leaf side. */
4939 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
4940 changed
|| oe
->op
!= rhs2
|| next_changed
,
4943 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
4945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4947 fprintf (dump_file
, "Transforming ");
4948 print_gimple_stmt (dump_file
, stmt
, 0);
4951 /* If changed is false, this is either opindex == 0
4952 or all outer rhs2's were equal to corresponding oe->op,
4953 and powi_result is NULL.
4954 That means lhs is equivalent before and after reassociation.
4955 Otherwise ensure the old lhs SSA_NAME is not reused and
4956 create a new stmt as well, so that any debug stmts will be
4957 properly adjusted. */
4960 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4961 unsigned int uid
= gimple_uid (stmt
);
4962 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
4964 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4965 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4967 gimple_set_uid (stmt
, uid
);
4968 gimple_set_visited (stmt
, true);
4969 if (insert_point
== gsi_stmt (gsi
))
4970 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4972 insert_stmt_after (stmt
, insert_point
);
4976 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
4978 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
4979 gimple_assign_set_rhs2 (stmt
, oe
->op
);
4983 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4985 fprintf (dump_file
, " into ");
4986 print_gimple_stmt (dump_file
, stmt
, 0);
4992 /* Find out how many cycles we need to compute statements chain.
4993 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4994 maximum number of independent statements we may execute per cycle. */
4997 get_required_cycles (int ops_num
, int cpu_width
)
5003 /* While we have more than 2 * cpu_width operands
5004 we may reduce number of operands by cpu_width
5006 res
= ops_num
/ (2 * cpu_width
);
5008 /* Remained operands count may be reduced twice per cycle
5009 until we have only one operand. */
5010 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5011 elog
= exact_log2 (rest
);
5015 res
+= floor_log2 (rest
) + 1;
5020 /* Returns an optimal number of registers to use for computation of
5021 given statements. */
5024 get_reassociation_width (int ops_num
, enum tree_code opc
,
5027 int param_width
= param_tree_reassoc_width
;
5032 if (param_width
> 0)
5033 width
= param_width
;
5035 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5040 /* Get the minimal time required for sequence computation. */
5041 cycles_best
= get_required_cycles (ops_num
, width
);
5043 /* Check if we may use less width and still compute sequence for
5044 the same time. It will allow us to reduce registers usage.
5045 get_required_cycles is monotonically increasing with lower width
5046 so we can perform a binary search for the minimal width that still
5047 results in the optimal cycle count. */
5049 while (width
> width_min
)
5051 int width_mid
= (width
+ width_min
) / 2;
5053 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5055 else if (width_min
< width_mid
)
5056 width_min
= width_mid
;
5064 /* Recursively rewrite our linearized statements so that the operators
5065 match those in OPS[OPINDEX], putting the computation in rank
5066 order and trying to allow operations to be executed in
5070 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
5071 vec
<operand_entry
*> ops
)
5073 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5074 int op_num
= ops
.length ();
5075 gcc_assert (op_num
> 0);
5076 int stmt_num
= op_num
- 1;
5077 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5078 int op_index
= op_num
- 1;
5080 int ready_stmts_end
= 0;
5082 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
5083 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5085 /* We start expression rewriting from the top statements.
5086 So, in this loop we create a full list of statements
5087 we will work with. */
5088 stmts
[stmt_num
- 1] = stmt
;
5089 for (i
= stmt_num
- 2; i
>= 0; i
--)
5090 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5092 for (i
= 0; i
< stmt_num
; i
++)
5096 /* Determine whether we should use results of
5097 already handled statements or not. */
5098 if (ready_stmts_end
== 0
5099 && (i
- stmt_index
>= width
|| op_index
< 1))
5100 ready_stmts_end
= i
;
5102 /* Now we choose operands for the next statement. Non zero
5103 value in ready_stmts_end means here that we should use
5104 the result of already generated statements as new operand. */
5105 if (ready_stmts_end
> 0)
5107 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
5108 if (ready_stmts_end
> stmt_index
)
5109 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5110 else if (op_index
>= 0)
5112 operand_entry
*oe
= ops
[op_index
--];
5113 stmt2
= oe
->stmt_to_insert
;
5118 gcc_assert (stmt_index
< i
);
5119 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5122 if (stmt_index
>= ready_stmts_end
)
5123 ready_stmts_end
= 0;
5128 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
5129 operand_entry
*oe2
= ops
[op_index
--];
5130 operand_entry
*oe1
= ops
[op_index
--];
5132 stmt2
= oe2
->stmt_to_insert
;
5134 stmt1
= oe1
->stmt_to_insert
;
5137 /* If we emit the last statement then we should put
5138 operands into the last statement. It will also
5140 if (op_index
< 0 && stmt_index
== i
)
5143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5145 fprintf (dump_file
, "Transforming ");
5146 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5149 /* If the stmt that defines operand has to be inserted, insert it
5152 insert_stmt_before_use (stmts
[i
], stmt1
);
5154 insert_stmt_before_use (stmts
[i
], stmt2
);
5155 stmt1
= stmt2
= NULL
;
5157 /* We keep original statement only for the last one. All
5158 others are recreated. */
5159 if (i
== stmt_num
- 1)
5161 gimple_assign_set_rhs1 (stmts
[i
], op1
);
5162 gimple_assign_set_rhs2 (stmts
[i
], op2
);
5163 update_stmt (stmts
[i
]);
5167 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
5168 gimple_set_visited (stmts
[i
], true);
5170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5172 fprintf (dump_file
, " into ");
5173 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5177 remove_visited_stmt_chain (last_rhs1
);
5180 /* Transform STMT, which is really (A +B) + (C + D) into the left
5181 linear form, ((A+B)+C)+D.
5182 Recurse on D if necessary. */
5185 linearize_expr (gimple
*stmt
)
5187 gimple_stmt_iterator gsi
;
5188 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5189 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5190 gimple
*oldbinrhs
= binrhs
;
5191 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5192 gimple
*newbinrhs
= NULL
;
5193 class loop
*loop
= loop_containing_stmt (stmt
);
5194 tree lhs
= gimple_assign_lhs (stmt
);
5196 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5197 && is_reassociable_op (binrhs
, rhscode
, loop
));
5199 gsi
= gsi_for_stmt (stmt
);
5201 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5202 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5203 gimple_assign_rhs_code (binrhs
),
5204 gimple_assign_lhs (binlhs
),
5205 gimple_assign_rhs2 (binrhs
));
5206 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5207 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5208 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5210 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5211 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5213 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5215 fprintf (dump_file
, "Linearized: ");
5216 print_gimple_stmt (dump_file
, stmt
, 0);
5219 reassociate_stats
.linearized
++;
5222 gsi
= gsi_for_stmt (oldbinrhs
);
5223 reassoc_remove_stmt (&gsi
);
5224 release_defs (oldbinrhs
);
5226 gimple_set_visited (stmt
, true);
5227 gimple_set_visited (binlhs
, true);
5228 gimple_set_visited (binrhs
, true);
5230 /* Tail recurse on the new rhs if it still needs reassociation. */
5231 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5232 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5233 want to change the algorithm while converting to tuples. */
5234 linearize_expr (stmt
);
5237 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5238 it. Otherwise, return NULL. */
5241 get_single_immediate_use (tree lhs
)
5243 use_operand_p immuse
;
5246 if (TREE_CODE (lhs
) == SSA_NAME
5247 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5248 && is_gimple_assign (immusestmt
))
5254 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5255 representing the negated value. Insertions of any necessary
5256 instructions go before GSI.
5257 This function is recursive in that, if you hand it "a_5" as the
5258 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5259 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5262 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5264 gimple
*negatedefstmt
= NULL
;
5265 tree resultofnegate
;
5266 gimple_stmt_iterator gsi
;
5269 /* If we are trying to negate a name, defined by an add, negate the
5270 add operands instead. */
5271 if (TREE_CODE (tonegate
) == SSA_NAME
)
5272 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5273 if (TREE_CODE (tonegate
) == SSA_NAME
5274 && is_gimple_assign (negatedefstmt
)
5275 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5276 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5277 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5279 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5280 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5281 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5284 gsi
= gsi_for_stmt (negatedefstmt
);
5285 rhs1
= negate_value (rhs1
, &gsi
);
5287 gsi
= gsi_for_stmt (negatedefstmt
);
5288 rhs2
= negate_value (rhs2
, &gsi
);
5290 gsi
= gsi_for_stmt (negatedefstmt
);
5291 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5292 gimple_set_visited (negatedefstmt
, true);
5293 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5294 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5295 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5299 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5300 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5301 NULL_TREE
, true, GSI_SAME_STMT
);
5303 uid
= gimple_uid (gsi_stmt (gsi
));
5304 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5306 gimple
*stmt
= gsi_stmt (gsi
);
5307 if (gimple_uid (stmt
) != 0)
5309 gimple_set_uid (stmt
, uid
);
5311 return resultofnegate
;
5314 /* Return true if we should break up the subtract in STMT into an add
5315 with negate. This is true when we the subtract operands are really
5316 adds, or the subtract itself is used in an add expression. In
5317 either case, breaking up the subtract into an add with negate
5318 exposes the adds to reassociation. */
5321 should_break_up_subtract (gimple
*stmt
)
5323 tree lhs
= gimple_assign_lhs (stmt
);
5324 tree binlhs
= gimple_assign_rhs1 (stmt
);
5325 tree binrhs
= gimple_assign_rhs2 (stmt
);
5327 class loop
*loop
= loop_containing_stmt (stmt
);
5329 if (TREE_CODE (binlhs
) == SSA_NAME
5330 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5333 if (TREE_CODE (binrhs
) == SSA_NAME
5334 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5337 if (TREE_CODE (lhs
) == SSA_NAME
5338 && (immusestmt
= get_single_immediate_use (lhs
))
5339 && is_gimple_assign (immusestmt
)
5340 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5341 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5342 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5343 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5348 /* Transform STMT from A - B into A + -B. */
5351 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5353 tree rhs1
= gimple_assign_rhs1 (stmt
);
5354 tree rhs2
= gimple_assign_rhs2 (stmt
);
5356 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5358 fprintf (dump_file
, "Breaking up subtract ");
5359 print_gimple_stmt (dump_file
, stmt
, 0);
5362 rhs2
= negate_value (rhs2
, gsip
);
5363 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5367 /* Determine whether STMT is a builtin call that raises an SSA name
5368 to an integer power and has only one use. If so, and this is early
5369 reassociation and unsafe math optimizations are permitted, place
5370 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5371 If any of these conditions does not hold, return FALSE. */
5374 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5377 REAL_VALUE_TYPE c
, cint
;
5379 switch (gimple_call_combined_fn (stmt
))
5382 if (flag_errno_math
)
5385 *base
= gimple_call_arg (stmt
, 0);
5386 arg1
= gimple_call_arg (stmt
, 1);
5388 if (TREE_CODE (arg1
) != REAL_CST
)
5391 c
= TREE_REAL_CST (arg1
);
5393 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5396 *exponent
= real_to_integer (&c
);
5397 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5398 if (!real_identical (&c
, &cint
))
5404 *base
= gimple_call_arg (stmt
, 0);
5405 arg1
= gimple_call_arg (stmt
, 1);
5407 if (!tree_fits_shwi_p (arg1
))
5410 *exponent
= tree_to_shwi (arg1
);
5417 /* Expanding negative exponents is generally unproductive, so we don't
5418 complicate matters with those. Exponents of zero and one should
5419 have been handled by expression folding. */
5420 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5426 /* Try to derive and add operand entry for OP to *OPS. Return false if
5430 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5431 enum tree_code code
,
5432 tree op
, gimple
* def_stmt
)
5434 tree base
= NULL_TREE
;
5435 HOST_WIDE_INT exponent
= 0;
5437 if (TREE_CODE (op
) != SSA_NAME
5438 || ! has_single_use (op
))
5441 if (code
== MULT_EXPR
5442 && reassoc_insert_powi_p
5443 && flag_unsafe_math_optimizations
5444 && is_gimple_call (def_stmt
)
5445 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5447 add_repeat_to_ops_vec (ops
, base
, exponent
);
5448 gimple_set_visited (def_stmt
, true);
5451 else if (code
== MULT_EXPR
5452 && is_gimple_assign (def_stmt
)
5453 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5454 && !HONOR_SNANS (TREE_TYPE (op
))
5455 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5456 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
5458 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5459 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5460 add_to_ops_vec (ops
, rhs1
);
5461 add_to_ops_vec (ops
, cst
);
5462 gimple_set_visited (def_stmt
, true);
5469 /* Recursively linearize a binary expression that is the RHS of STMT.
5470 Place the operands of the expression tree in the vector named OPS. */
5473 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5474 bool is_associative
, bool set_visited
)
5476 tree binlhs
= gimple_assign_rhs1 (stmt
);
5477 tree binrhs
= gimple_assign_rhs2 (stmt
);
5478 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5479 bool binlhsisreassoc
= false;
5480 bool binrhsisreassoc
= false;
5481 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5482 class loop
*loop
= loop_containing_stmt (stmt
);
5485 gimple_set_visited (stmt
, true);
5487 if (TREE_CODE (binlhs
) == SSA_NAME
)
5489 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5490 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5491 && !stmt_could_throw_p (cfun
, binlhsdef
));
5494 if (TREE_CODE (binrhs
) == SSA_NAME
)
5496 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5497 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5498 && !stmt_could_throw_p (cfun
, binrhsdef
));
5501 /* If the LHS is not reassociable, but the RHS is, we need to swap
5502 them. If neither is reassociable, there is nothing we can do, so
5503 just put them in the ops vector. If the LHS is reassociable,
5504 linearize it. If both are reassociable, then linearize the RHS
5507 if (!binlhsisreassoc
)
5509 /* If this is not a associative operation like division, give up. */
5510 if (!is_associative
)
5512 add_to_ops_vec (ops
, binrhs
);
5516 if (!binrhsisreassoc
)
5518 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5519 add_to_ops_vec (ops
, binrhs
);
5521 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5522 add_to_ops_vec (ops
, binlhs
);
5527 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5529 fprintf (dump_file
, "swapping operands of ");
5530 print_gimple_stmt (dump_file
, stmt
, 0);
5533 swap_ssa_operands (stmt
,
5534 gimple_assign_rhs1_ptr (stmt
),
5535 gimple_assign_rhs2_ptr (stmt
));
5538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5540 fprintf (dump_file
, " is now ");
5541 print_gimple_stmt (dump_file
, stmt
, 0);
5544 /* We want to make it so the lhs is always the reassociative op,
5546 std::swap (binlhs
, binrhs
);
5548 else if (binrhsisreassoc
)
5550 linearize_expr (stmt
);
5551 binlhs
= gimple_assign_rhs1 (stmt
);
5552 binrhs
= gimple_assign_rhs2 (stmt
);
5555 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5556 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5558 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5559 is_associative
, set_visited
);
5561 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5562 add_to_ops_vec (ops
, binrhs
);
5565 /* Repropagate the negates back into subtracts, since no other pass
5566 currently does it. */
5569 repropagate_negates (void)
5574 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5576 gimple
*user
= get_single_immediate_use (negate
);
5578 if (!user
|| !is_gimple_assign (user
))
5581 /* The negate operand can be either operand of a PLUS_EXPR
5582 (it can be the LHS if the RHS is a constant for example).
5584 Force the negate operand to the RHS of the PLUS_EXPR, then
5585 transform the PLUS_EXPR into a MINUS_EXPR. */
5586 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5588 /* If the negated operand appears on the LHS of the
5589 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5590 to force the negated operand to the RHS of the PLUS_EXPR. */
5591 if (gimple_assign_rhs1 (user
) == negate
)
5593 swap_ssa_operands (user
,
5594 gimple_assign_rhs1_ptr (user
),
5595 gimple_assign_rhs2_ptr (user
));
5598 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5599 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5600 if (gimple_assign_rhs2 (user
) == negate
)
5602 tree rhs1
= gimple_assign_rhs1 (user
);
5603 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5604 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5605 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5609 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5611 if (gimple_assign_rhs1 (user
) == negate
)
5616 which we transform into
5619 This pushes down the negate which we possibly can merge
5620 into some other operation, hence insert it into the
5621 plus_negates vector. */
5622 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5623 tree a
= gimple_assign_rhs1 (feed
);
5624 tree b
= gimple_assign_rhs2 (user
);
5625 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5626 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5627 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5628 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5629 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5630 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5631 user
= gsi_stmt (gsi2
);
5633 reassoc_remove_stmt (&gsi
);
5634 release_defs (feed
);
5635 plus_negates
.safe_push (gimple_assign_lhs (user
));
5639 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5640 rid of one operation. */
5641 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5642 tree a
= gimple_assign_rhs1 (feed
);
5643 tree rhs1
= gimple_assign_rhs1 (user
);
5644 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5645 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5646 update_stmt (gsi_stmt (gsi
));
5652 /* Returns true if OP is of a type for which we can do reassociation.
5653 That is for integral or non-saturating fixed-point types, and for
5654 floating point type when associative-math is enabled. */
5657 can_reassociate_p (tree op
)
5659 tree type
= TREE_TYPE (op
);
5660 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
5662 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
5663 || NON_SAT_FIXED_POINT_TYPE_P (type
)
5664 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
5669 /* Break up subtract operations in block BB.
5671 We do this top down because we don't know whether the subtract is
5672 part of a possible chain of reassociation except at the top.
5681 we want to break up k = t - q, but we won't until we've transformed q
5682 = b - r, which won't be broken up until we transform b = c - d.
5684 En passant, clear the GIMPLE visited flag on every statement
5685 and set UIDs within each basic block. */
5688 break_up_subtract_bb (basic_block bb
)
5690 gimple_stmt_iterator gsi
;
5692 unsigned int uid
= 1;
5694 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5696 gimple
*stmt
= gsi_stmt (gsi
);
5697 gimple_set_visited (stmt
, false);
5698 gimple_set_uid (stmt
, uid
++);
5700 if (!is_gimple_assign (stmt
)
5701 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
5704 /* Look for simple gimple subtract operations. */
5705 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5707 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
5708 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
5711 /* Check for a subtract used only in an addition. If this
5712 is the case, transform it into add of a negate for better
5713 reassociation. IE transform C = A-B into C = A + -B if C
5714 is only used in an addition. */
5715 if (should_break_up_subtract (stmt
))
5716 break_up_subtract (stmt
, &gsi
);
5718 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5719 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
5720 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5722 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5724 son
= next_dom_son (CDI_DOMINATORS
, son
))
5725 break_up_subtract_bb (son
);
5728 /* Used for repeated factor analysis. */
5729 struct repeat_factor
5731 /* An SSA name that occurs in a multiply chain. */
5734 /* Cached rank of the factor. */
5737 /* Number of occurrences of the factor in the chain. */
5738 HOST_WIDE_INT count
;
5740 /* An SSA name representing the product of this factor and
5741 all factors appearing later in the repeated factor vector. */
5746 static vec
<repeat_factor
> repeat_factor_vec
;
5748 /* Used for sorting the repeat factor vector. Sort primarily by
5749 ascending occurrence count, secondarily by descending rank. */
5752 compare_repeat_factors (const void *x1
, const void *x2
)
5754 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
5755 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
5757 if (rf1
->count
!= rf2
->count
)
5758 return rf1
->count
- rf2
->count
;
5760 return rf2
->rank
- rf1
->rank
;
5763 /* Look for repeated operands in OPS in the multiply tree rooted at
5764 STMT. Replace them with an optimal sequence of multiplies and powi
5765 builtin calls, and remove the used operands from OPS. Return an
5766 SSA name representing the value of the replacement sequence. */
5769 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
5771 unsigned i
, j
, vec_len
;
5774 repeat_factor
*rf1
, *rf2
;
5775 repeat_factor rfnew
;
5776 tree result
= NULL_TREE
;
5777 tree target_ssa
, iter_result
;
5778 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
5779 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
5780 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5781 gimple
*mul_stmt
, *pow_stmt
;
5783 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5788 /* Allocate the repeated factor vector. */
5789 repeat_factor_vec
.create (10);
5791 /* Scan the OPS vector for all SSA names in the product and build
5792 up a vector of occurrence counts for each factor. */
5793 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5795 if (TREE_CODE (oe
->op
) == SSA_NAME
)
5797 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5799 if (rf1
->factor
== oe
->op
)
5801 rf1
->count
+= oe
->count
;
5806 if (j
>= repeat_factor_vec
.length ())
5808 rfnew
.factor
= oe
->op
;
5809 rfnew
.rank
= oe
->rank
;
5810 rfnew
.count
= oe
->count
;
5811 rfnew
.repr
= NULL_TREE
;
5812 repeat_factor_vec
.safe_push (rfnew
);
5817 /* Sort the repeated factor vector by (a) increasing occurrence count,
5818 and (b) decreasing rank. */
5819 repeat_factor_vec
.qsort (compare_repeat_factors
);
5821 /* It is generally best to combine as many base factors as possible
5822 into a product before applying __builtin_powi to the result.
5823 However, the sort order chosen for the repeated factor vector
5824 allows us to cache partial results for the product of the base
5825 factors for subsequent use. When we already have a cached partial
5826 result from a previous iteration, it is best to make use of it
5827 before looking for another __builtin_pow opportunity.
5829 As an example, consider x * x * y * y * y * z * z * z * z.
5830 We want to first compose the product x * y * z, raise it to the
5831 second power, then multiply this by y * z, and finally multiply
5832 by z. This can be done in 5 multiplies provided we cache y * z
5833 for use in both expressions:
5841 If we instead ignored the cached y * z and first multiplied by
5842 the __builtin_pow opportunity z * z, we would get the inferior:
5851 vec_len
= repeat_factor_vec
.length ();
5853 /* Repeatedly look for opportunities to create a builtin_powi call. */
5856 HOST_WIDE_INT power
;
5858 /* First look for the largest cached product of factors from
5859 preceding iterations. If found, create a builtin_powi for
5860 it if the minimum occurrence count for its factors is at
5861 least 2, or just use this cached product as our next
5862 multiplicand if the minimum occurrence count is 1. */
5863 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5865 if (rf1
->repr
&& rf1
->count
> 0)
5875 iter_result
= rf1
->repr
;
5877 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5881 fputs ("Multiplying by cached product ", dump_file
);
5882 for (elt
= j
; elt
< vec_len
; elt
++)
5884 rf
= &repeat_factor_vec
[elt
];
5885 print_generic_expr (dump_file
, rf
->factor
);
5886 if (elt
< vec_len
- 1)
5887 fputs (" * ", dump_file
);
5889 fputs ("\n", dump_file
);
5894 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5895 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5896 build_int_cst (integer_type_node
,
5898 gimple_call_set_lhs (pow_stmt
, iter_result
);
5899 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5900 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5901 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5907 fputs ("Building __builtin_pow call for cached product (",
5909 for (elt
= j
; elt
< vec_len
; elt
++)
5911 rf
= &repeat_factor_vec
[elt
];
5912 print_generic_expr (dump_file
, rf
->factor
);
5913 if (elt
< vec_len
- 1)
5914 fputs (" * ", dump_file
);
5916 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
5923 /* Otherwise, find the first factor in the repeated factor
5924 vector whose occurrence count is at least 2. If no such
5925 factor exists, there are no builtin_powi opportunities
5927 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5929 if (rf1
->count
>= 2)
5938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5942 fputs ("Building __builtin_pow call for (", dump_file
);
5943 for (elt
= j
; elt
< vec_len
; elt
++)
5945 rf
= &repeat_factor_vec
[elt
];
5946 print_generic_expr (dump_file
, rf
->factor
);
5947 if (elt
< vec_len
- 1)
5948 fputs (" * ", dump_file
);
5950 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
5953 reassociate_stats
.pows_created
++;
5955 /* Visit each element of the vector in reverse order (so that
5956 high-occurrence elements are visited first, and within the
5957 same occurrence count, lower-ranked elements are visited
5958 first). Form a linear product of all elements in this order
5959 whose occurrencce count is at least that of element J.
5960 Record the SSA name representing the product of each element
5961 with all subsequent elements in the vector. */
5962 if (j
== vec_len
- 1)
5963 rf1
->repr
= rf1
->factor
;
5966 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
5970 rf1
= &repeat_factor_vec
[ii
];
5971 rf2
= &repeat_factor_vec
[ii
+ 1];
5973 /* Init the last factor's representative to be itself. */
5975 rf2
->repr
= rf2
->factor
;
5980 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5981 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
5983 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5984 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5985 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5986 rf1
->repr
= target_ssa
;
5988 /* Don't reprocess the multiply we just introduced. */
5989 gimple_set_visited (mul_stmt
, true);
5993 /* Form a call to __builtin_powi for the maximum product
5994 just formed, raised to the power obtained earlier. */
5995 rf1
= &repeat_factor_vec
[j
];
5996 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5997 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5998 build_int_cst (integer_type_node
,
6000 gimple_call_set_lhs (pow_stmt
, iter_result
);
6001 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6002 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6003 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6006 /* If we previously formed at least one other builtin_powi call,
6007 form the product of this one and those others. */
6010 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6011 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6012 result
, iter_result
);
6013 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6014 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6015 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6016 gimple_set_visited (mul_stmt
, true);
6017 result
= new_result
;
6020 result
= iter_result
;
6022 /* Decrement the occurrence count of each element in the product
6023 by the count found above, and remove this many copies of each
6025 for (i
= j
; i
< vec_len
; i
++)
6030 rf1
= &repeat_factor_vec
[i
];
6031 rf1
->count
-= power
;
6033 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6035 if (oe
->op
== rf1
->factor
)
6039 ops
->ordered_remove (n
);
6055 /* At this point all elements in the repeated factor vector have a
6056 remaining occurrence count of 0 or 1, and those with a count of 1
6057 don't have cached representatives. Re-sort the ops vector and
6059 ops
->qsort (sort_by_operand_rank
);
6060 repeat_factor_vec
.release ();
6062 /* Return the final product computed herein. Note that there may
6063 still be some elements with single occurrence count left in OPS;
6064 those will be handled by the normal reassociation logic. */
6068 /* Attempt to optimize
6069 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6070 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6073 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6077 unsigned int length
= ops
->length ();
6078 tree cst
= ops
->last ()->op
;
6080 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6083 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6085 if (TREE_CODE (oe
->op
) == SSA_NAME
6086 && has_single_use (oe
->op
))
6088 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6089 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6092 switch (gimple_call_combined_fn (old_call
))
6095 CASE_CFN_COPYSIGN_FN
:
6096 arg0
= gimple_call_arg (old_call
, 0);
6097 arg1
= gimple_call_arg (old_call
, 1);
6098 /* The first argument of copysign must be a constant,
6099 otherwise there's nothing to do. */
6100 if (TREE_CODE (arg0
) == REAL_CST
)
6102 tree type
= TREE_TYPE (arg0
);
6103 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6104 /* If we couldn't fold to a single constant, skip it.
6105 That happens e.g. for inexact multiplication when
6107 if (mul
== NULL_TREE
)
6109 /* Instead of adjusting OLD_CALL, let's build a new
6110 call to not leak the LHS and prevent keeping bogus
6111 debug statements. DCE will clean up the old call. */
6113 if (gimple_call_internal_p (old_call
))
6114 new_call
= gimple_build_call_internal
6115 (IFN_COPYSIGN
, 2, mul
, arg1
);
6117 new_call
= gimple_build_call
6118 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6119 tree lhs
= make_ssa_name (type
);
6120 gimple_call_set_lhs (new_call
, lhs
);
6121 gimple_set_location (new_call
,
6122 gimple_location (old_call
));
6123 insert_stmt_after (new_call
, old_call
);
6124 /* We've used the constant, get rid of it. */
6126 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6127 /* Handle the CST1 < 0 case by negating the result. */
6130 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6132 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6133 insert_stmt_after (negate_stmt
, new_call
);
6138 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6140 fprintf (dump_file
, "Optimizing copysign: ");
6141 print_generic_expr (dump_file
, cst
);
6142 fprintf (dump_file
, " * COPYSIGN (");
6143 print_generic_expr (dump_file
, arg0
);
6144 fprintf (dump_file
, ", ");
6145 print_generic_expr (dump_file
, arg1
);
6146 fprintf (dump_file
, ") into %sCOPYSIGN (",
6147 cst1_neg
? "-" : "");
6148 print_generic_expr (dump_file
, mul
);
6149 fprintf (dump_file
, ", ");
6150 print_generic_expr (dump_file
, arg1
);
6151 fprintf (dump_file
, "\n");
6164 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6167 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6173 fprintf (dump_file
, "Transforming ");
6174 print_gimple_stmt (dump_file
, stmt
, 0);
6177 rhs1
= gimple_assign_rhs1 (stmt
);
6178 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6180 remove_visited_stmt_chain (rhs1
);
6182 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6184 fprintf (dump_file
, " into ");
6185 print_gimple_stmt (dump_file
, stmt
, 0);
6189 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6192 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6193 tree rhs1
, tree rhs2
)
6195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6197 fprintf (dump_file
, "Transforming ");
6198 print_gimple_stmt (dump_file
, stmt
, 0);
6201 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6202 update_stmt (gsi_stmt (*gsi
));
6203 remove_visited_stmt_chain (rhs1
);
6205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6207 fprintf (dump_file
, " into ");
6208 print_gimple_stmt (dump_file
, stmt
, 0);
6212 /* Reassociate expressions in basic block BB and its post-dominator as
6215 Bubble up return status from maybe_optimize_range_tests. */
6218 reassociate_bb (basic_block bb
)
6220 gimple_stmt_iterator gsi
;
6222 gimple
*stmt
= last_stmt (bb
);
6223 bool cfg_cleanup_needed
= false;
6225 if (stmt
&& !gimple_visited_p (stmt
))
6226 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6228 bool do_prev
= false;
6229 for (gsi
= gsi_last_bb (bb
);
6230 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
6233 stmt
= gsi_stmt (gsi
);
6235 if (is_gimple_assign (stmt
)
6236 && !stmt_could_throw_p (cfun
, stmt
))
6238 tree lhs
, rhs1
, rhs2
;
6239 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6241 /* If this was part of an already processed statement,
6242 we don't need to touch it again. */
6243 if (gimple_visited_p (stmt
))
6245 /* This statement might have become dead because of previous
6247 if (has_zero_uses (gimple_get_lhs (stmt
)))
6249 reassoc_remove_stmt (&gsi
);
6250 release_defs (stmt
);
6251 /* We might end up removing the last stmt above which
6252 places the iterator to the end of the sequence.
6253 Reset it to the last stmt in this case and make sure
6254 we don't do gsi_prev in that case. */
6255 if (gsi_end_p (gsi
))
6257 gsi
= gsi_last_bb (bb
);
6264 /* If this is not a gimple binary expression, there is
6265 nothing for us to do with it. */
6266 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6269 lhs
= gimple_assign_lhs (stmt
);
6270 rhs1
= gimple_assign_rhs1 (stmt
);
6271 rhs2
= gimple_assign_rhs2 (stmt
);
6273 /* For non-bit or min/max operations we can't associate
6274 all types. Verify that here. */
6275 if (rhs_code
!= BIT_IOR_EXPR
6276 && rhs_code
!= BIT_AND_EXPR
6277 && rhs_code
!= BIT_XOR_EXPR
6278 && rhs_code
!= MIN_EXPR
6279 && rhs_code
!= MAX_EXPR
6280 && (!can_reassociate_p (lhs
)
6281 || !can_reassociate_p (rhs1
)
6282 || !can_reassociate_p (rhs2
)))
6285 if (associative_tree_code (rhs_code
))
6287 auto_vec
<operand_entry
*> ops
;
6288 tree powi_result
= NULL_TREE
;
6289 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6291 /* There may be no immediate uses left by the time we
6292 get here because we may have eliminated them all. */
6293 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6296 gimple_set_visited (stmt
, true);
6297 linearize_expr_tree (&ops
, stmt
, true, true);
6298 ops
.qsort (sort_by_operand_rank
);
6299 int orig_len
= ops
.length ();
6300 optimize_ops_list (rhs_code
, &ops
);
6301 if (undistribute_ops_list (rhs_code
, &ops
,
6302 loop_containing_stmt (stmt
)))
6304 ops
.qsort (sort_by_operand_rank
);
6305 optimize_ops_list (rhs_code
, &ops
);
6307 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6308 loop_containing_stmt (stmt
)))
6310 ops
.qsort (sort_by_operand_rank
);
6311 optimize_ops_list (rhs_code
, &ops
);
6313 if (rhs_code
== PLUS_EXPR
6314 && transform_add_to_multiply (&ops
))
6315 ops
.qsort (sort_by_operand_rank
);
6317 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6320 optimize_vec_cond_expr (rhs_code
, &ops
);
6322 optimize_range_tests (rhs_code
, &ops
, NULL
);
6325 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6327 attempt_builtin_copysign (&ops
);
6329 if (reassoc_insert_powi_p
6330 && flag_unsafe_math_optimizations
)
6331 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6334 operand_entry
*last
;
6335 bool negate_result
= false;
6336 if (ops
.length () > 1
6337 && rhs_code
== MULT_EXPR
)
6340 if ((integer_minus_onep (last
->op
)
6341 || real_minus_onep (last
->op
))
6342 && !HONOR_SNANS (TREE_TYPE (lhs
))
6343 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6344 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6347 negate_result
= true;
6352 /* If the operand vector is now empty, all operands were
6353 consumed by the __builtin_powi optimization. */
6354 if (ops
.length () == 0)
6355 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6356 else if (ops
.length () == 1)
6358 tree last_op
= ops
.last ()->op
;
6360 /* If the stmt that defines operand has to be inserted, insert it
6362 if (ops
.last ()->stmt_to_insert
)
6363 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6365 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6368 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6372 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6373 int ops_num
= ops
.length ();
6376 /* For binary bit operations, if there are at least 3
6377 operands and the last operand in OPS is a constant,
6378 move it to the front. This helps ensure that we generate
6379 (X & Y) & C rather than (X & C) & Y. The former will
6380 often match a canonical bit test when we get to RTL. */
6381 if (ops
.length () > 2
6382 && (rhs_code
== BIT_AND_EXPR
6383 || rhs_code
== BIT_IOR_EXPR
6384 || rhs_code
== BIT_XOR_EXPR
)
6385 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6386 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6388 /* Only rewrite the expression tree to parallel in the
6389 last reassoc pass to avoid useless work back-and-forth
6390 with initial linearization. */
6391 if (!reassoc_insert_powi_p
6392 && ops
.length () > 3
6393 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6398 "Width = %d was chosen for reassociation\n",
6400 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6405 /* When there are three operands left, we want
6406 to make sure the ones that get the double
6407 binary op are chosen wisely. */
6408 int len
= ops
.length ();
6410 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
6412 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
6418 /* If we combined some repeated factors into a
6419 __builtin_powi call, multiply that result by the
6420 reassociated operands. */
6423 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6424 tree type
= TREE_TYPE (lhs
);
6425 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6427 gimple_set_lhs (lhs_stmt
, target_ssa
);
6428 update_stmt (lhs_stmt
);
6431 target_ssa
= new_lhs
;
6434 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6435 powi_result
, target_ssa
);
6436 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6437 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6438 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6444 stmt
= SSA_NAME_DEF_STMT (lhs
);
6445 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6446 gimple_set_lhs (stmt
, tmp
);
6449 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6451 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6452 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6458 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6460 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6461 cfg_cleanup_needed
|= reassociate_bb (son
);
6463 return cfg_cleanup_needed
;
6466 /* Add jumps around shifts for range tests turned into bit tests.
6467 For each SSA_NAME VAR we have code like:
6468 VAR = ...; // final stmt of range comparison
6469 // bit test here...;
6470 OTHERVAR = ...; // final stmt of the bit test sequence
6471 RES = VAR | OTHERVAR;
6472 Turn the above into:
6479 // bit test here...;
6482 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6490 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6492 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6495 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6497 && is_gimple_assign (use_stmt
)
6498 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6499 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6501 basic_block cond_bb
= gimple_bb (def_stmt
);
6502 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6503 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6505 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6506 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6507 build_zero_cst (TREE_TYPE (var
)),
6508 NULL_TREE
, NULL_TREE
);
6509 location_t loc
= gimple_location (use_stmt
);
6510 gimple_set_location (g
, loc
);
6511 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6513 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6514 etrue
->probability
= profile_probability::even ();
6515 edge efalse
= find_edge (cond_bb
, then_bb
);
6516 efalse
->flags
= EDGE_FALSE_VALUE
;
6517 efalse
->probability
-= etrue
->probability
;
6518 then_bb
->count
-= etrue
->count ();
6520 tree othervar
= NULL_TREE
;
6521 if (gimple_assign_rhs1 (use_stmt
) == var
)
6522 othervar
= gimple_assign_rhs2 (use_stmt
);
6523 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6524 othervar
= gimple_assign_rhs1 (use_stmt
);
6527 tree lhs
= gimple_assign_lhs (use_stmt
);
6528 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6529 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6530 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6531 gsi
= gsi_for_stmt (use_stmt
);
6532 gsi_remove (&gsi
, true);
6534 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6535 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6537 reassoc_branch_fixups
.release ();
6540 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6541 void debug_ops_vector (vec
<operand_entry
*> ops
);
6543 /* Dump the operand entry vector OPS to FILE. */
6546 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6551 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6553 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6554 print_generic_expr (file
, oe
->op
);
6555 fprintf (file
, "\n");
6559 /* Dump the operand entry vector OPS to STDERR. */
6562 debug_ops_vector (vec
<operand_entry
*> ops
)
6564 dump_ops_vector (stderr
, ops
);
6567 /* Bubble up return status from reassociate_bb. */
6572 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6573 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6576 /* Initialize the reassociation pass. */
6583 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6585 /* Find the loops, so that we can prevent moving calculations in
6587 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6589 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6591 next_operand_entry_id
= 0;
6593 /* Reverse RPO (Reverse Post Order) will give us something where
6594 deeper loops come later. */
6595 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6596 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
6597 operand_rank
= new hash_map
<tree
, long>;
6599 /* Give each default definition a distinct rank. This includes
6600 parameters and the static chain. Walk backwards over all
6601 SSA names so that we get proper rank ordering according
6602 to tree_swap_operands_p. */
6603 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6605 tree name
= ssa_name (i
);
6606 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6607 insert_operand_rank (name
, ++rank
);
6610 /* Set up rank for each BB */
6611 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6612 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6615 calculate_dominance_info (CDI_POST_DOMINATORS
);
6616 plus_negates
= vNULL
;
6619 /* Cleanup after the reassociation pass, and print stats if
6625 statistics_counter_event (cfun
, "Linearized",
6626 reassociate_stats
.linearized
);
6627 statistics_counter_event (cfun
, "Constants eliminated",
6628 reassociate_stats
.constants_eliminated
);
6629 statistics_counter_event (cfun
, "Ops eliminated",
6630 reassociate_stats
.ops_eliminated
);
6631 statistics_counter_event (cfun
, "Statements rewritten",
6632 reassociate_stats
.rewritten
);
6633 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6634 reassociate_stats
.pows_encountered
);
6635 statistics_counter_event (cfun
, "Built-in powi calls created",
6636 reassociate_stats
.pows_created
);
6638 delete operand_rank
;
6639 operand_entry_pool
.release ();
6641 plus_negates
.release ();
6642 free_dominance_info (CDI_POST_DOMINATORS
);
6643 loop_optimizer_finalize ();
6646 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6647 insertion of __builtin_powi calls.
6649 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6650 optimization of a gimple conditional. Otherwise returns zero. */
6653 execute_reassoc (bool insert_powi_p
)
6655 reassoc_insert_powi_p
= insert_powi_p
;
6659 bool cfg_cleanup_needed
;
6660 cfg_cleanup_needed
= do_reassoc ();
6661 repropagate_negates ();
6665 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6670 const pass_data pass_data_reassoc
=
6672 GIMPLE_PASS
, /* type */
6673 "reassoc", /* name */
6674 OPTGROUP_NONE
, /* optinfo_flags */
6675 TV_TREE_REASSOC
, /* tv_id */
6676 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6677 0, /* properties_provided */
6678 0, /* properties_destroyed */
6679 0, /* todo_flags_start */
6680 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6683 class pass_reassoc
: public gimple_opt_pass
6686 pass_reassoc (gcc::context
*ctxt
)
6687 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6690 /* opt_pass methods: */
6691 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6692 void set_pass_param (unsigned int n
, bool param
)
6694 gcc_assert (n
== 0);
6695 insert_powi_p
= param
;
6697 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6698 virtual unsigned int execute (function
*)
6699 { return execute_reassoc (insert_powi_p
); }
6702 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6703 point 3a in the pass header comment. */
6705 }; // class pass_reassoc
6710 make_pass_reassoc (gcc::context
*ctxt
)
6712 return new pass_reassoc (ctxt
);