1 /* Induction variable canonicalization and loop peeling.
2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass detects the loops that iterate a constant number of times,
21 adds a canonical induction variable (step -1, tested against 0)
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
31 Additionally in case we detect that it is beneficial to unroll the
32 loop completely, we do it right here to expose the optimization
33 possibilities to the following passes. */
37 #include "coretypes.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
44 #include "gimple-ssa.h"
47 #include "tree-phinodes.h"
48 #include "ssa-iterators.h"
49 #include "tree-ssanames.h"
50 #include "tree-ssa-loop.h"
51 #include "tree-into-ssa.h"
53 #include "tree-pass.h"
54 #include "tree-chrec.h"
55 #include "tree-scalar-evolution.h"
58 #include "tree-inline.h"
60 #include "tree-cfgcleanup.h"
62 /* Specifies types of loops that may be unrolled. */
66 UL_SINGLE_ITER
, /* Only loops that exit immediately in the first
68 UL_NO_GROWTH
, /* Only loops whose unrolling will not cause increase
70 UL_ALL
/* All suitable loops. */
73 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
74 is the exit edge whose condition is replaced. */
77 create_canonical_iv (struct loop
*loop
, edge exit
, tree niter
)
82 gimple_stmt_iterator incr_at
;
85 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
87 fprintf (dump_file
, "Added canonical iv to loop %d, ", loop
->num
);
88 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
89 fprintf (dump_file
, " iterations.\n");
92 cond
= last_stmt (exit
->src
);
93 in
= EDGE_SUCC (exit
->src
, 0);
95 in
= EDGE_SUCC (exit
->src
, 1);
97 /* Note that we do not need to worry about overflows, since
98 type of niter is always unsigned and all comparisons are
99 just for equality/nonequality -- i.e. everything works
100 with a modulo arithmetics. */
102 type
= TREE_TYPE (niter
);
103 niter
= fold_build2 (PLUS_EXPR
, type
,
105 build_int_cst (type
, 1));
106 incr_at
= gsi_last_bb (in
->src
);
108 build_int_cst (type
, -1),
110 &incr_at
, false, NULL
, &var
);
112 cmp
= (exit
->flags
& EDGE_TRUE_VALUE
) ? EQ_EXPR
: NE_EXPR
;
113 gimple_cond_set_code (cond
, cmp
);
114 gimple_cond_set_lhs (cond
, var
);
115 gimple_cond_set_rhs (cond
, build_int_cst (type
, 0));
119 /* Describe size of loop as detected by tree_estimate_loop_size. */
122 /* Number of instructions in the loop. */
125 /* Number of instructions that will be likely optimized out in
126 peeled iterations of loop (i.e. computation based on induction
127 variable where induction variable starts at known constant.) */
128 int eliminated_by_peeling
;
130 /* Same statistics for last iteration of loop: it is smaller because
131 instructions after exit are not executed. */
133 int last_iteration_eliminated_by_peeling
;
135 /* If some IV computation will become constant. */
138 /* Number of call stmts that are not a builtin and are pure or const
139 present on the hot path. */
140 int num_pure_calls_on_hot_path
;
141 /* Number of call stmts that are not a builtin and are not pure nor const
142 present on the hot path. */
143 int num_non_pure_calls_on_hot_path
;
144 /* Number of statements other than calls in the loop. */
145 int non_call_stmts_on_hot_path
;
146 /* Number of branches seen on the hot path. */
147 int num_branches_on_hot_path
;
150 /* Return true if OP in STMT will be constant after peeling LOOP. */
153 constant_after_peeling (tree op
, gimple stmt
, struct loop
*loop
)
157 if (is_gimple_min_invariant (op
))
160 /* We can still fold accesses to constant arrays when index is known. */
161 if (TREE_CODE (op
) != SSA_NAME
)
165 /* First make fast look if we see constant array inside. */
166 while (handled_component_p (base
))
167 base
= TREE_OPERAND (base
, 0);
169 && ctor_for_folding (base
) != error_mark_node
)
170 || CONSTANT_CLASS_P (base
))
172 /* If so, see if we understand all the indices. */
174 while (handled_component_p (base
))
176 if (TREE_CODE (base
) == ARRAY_REF
177 && !constant_after_peeling (TREE_OPERAND (base
, 1), stmt
, loop
))
179 base
= TREE_OPERAND (base
, 0);
186 /* Induction variables are constants. */
187 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false))
189 if (!is_gimple_min_invariant (iv
.base
))
191 if (!is_gimple_min_invariant (iv
.step
))
196 /* Computes an estimated number of insns in LOOP.
197 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
198 iteration of the loop.
199 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
201 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
202 Stop estimating after UPPER_BOUND is met. Return true in this case. */
205 tree_estimate_loop_size (struct loop
*loop
, edge exit
, edge edge_to_cancel
, struct loop_size
*size
,
208 basic_block
*body
= get_loop_body (loop
);
209 gimple_stmt_iterator gsi
;
212 vec
<basic_block
> path
= get_loop_hot_path (loop
);
215 size
->eliminated_by_peeling
= 0;
216 size
->last_iteration
= 0;
217 size
->last_iteration_eliminated_by_peeling
= 0;
218 size
->num_pure_calls_on_hot_path
= 0;
219 size
->num_non_pure_calls_on_hot_path
= 0;
220 size
->non_call_stmts_on_hot_path
= 0;
221 size
->num_branches_on_hot_path
= 0;
222 size
->constant_iv
= 0;
224 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
225 fprintf (dump_file
, "Estimating sizes for loop %i\n", loop
->num
);
226 for (i
= 0; i
< loop
->num_nodes
; i
++)
228 if (edge_to_cancel
&& body
[i
] != edge_to_cancel
->src
229 && dominated_by_p (CDI_DOMINATORS
, body
[i
], edge_to_cancel
->src
))
233 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
234 fprintf (dump_file
, " BB: %i, after_exit: %i\n", body
[i
]->index
, after_exit
);
236 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
238 gimple stmt
= gsi_stmt (gsi
);
239 int num
= estimate_num_insns (stmt
, &eni_size_weights
);
240 bool likely_eliminated
= false;
241 bool likely_eliminated_last
= false;
242 bool likely_eliminated_peeled
= false;
244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
246 fprintf (dump_file
, " size: %3i ", num
);
247 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, 0);
250 /* Look for reasons why we might optimize this stmt away. */
252 if (gimple_has_side_effects (stmt
))
254 /* Exit conditional. */
255 else if (exit
&& body
[i
] == exit
->src
256 && stmt
== last_stmt (exit
->src
))
258 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
259 fprintf (dump_file
, " Exit condition will be eliminated "
260 "in peeled copies.\n");
261 likely_eliminated_peeled
= true;
263 else if (edge_to_cancel
&& body
[i
] == edge_to_cancel
->src
264 && stmt
== last_stmt (edge_to_cancel
->src
))
266 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
267 fprintf (dump_file
, " Exit condition will be eliminated "
269 likely_eliminated_last
= true;
271 /* Sets of IV variables */
272 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
273 && constant_after_peeling (gimple_assign_lhs (stmt
), stmt
, loop
))
275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
276 fprintf (dump_file
, " Induction variable computation will"
277 " be folded away.\n");
278 likely_eliminated
= true;
280 /* Assignments of IV variables. */
281 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
282 && TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
283 && constant_after_peeling (gimple_assign_rhs1 (stmt
), stmt
, loop
)
284 && (gimple_assign_rhs_class (stmt
) != GIMPLE_BINARY_RHS
285 || constant_after_peeling (gimple_assign_rhs2 (stmt
),
288 size
->constant_iv
= true;
289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
290 fprintf (dump_file
, " Constant expression will be folded away.\n");
291 likely_eliminated
= true;
294 else if ((gimple_code (stmt
) == GIMPLE_COND
295 && constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
296 && constant_after_peeling (gimple_cond_rhs (stmt
), stmt
, loop
))
297 || (gimple_code (stmt
) == GIMPLE_SWITCH
298 && constant_after_peeling (gimple_switch_index (stmt
), stmt
, loop
)))
300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
301 fprintf (dump_file
, " Constant conditional.\n");
302 likely_eliminated
= true;
305 size
->overall
+= num
;
306 if (likely_eliminated
|| likely_eliminated_peeled
)
307 size
->eliminated_by_peeling
+= num
;
310 size
->last_iteration
+= num
;
311 if (likely_eliminated
|| likely_eliminated_last
)
312 size
->last_iteration_eliminated_by_peeling
+= num
;
314 if ((size
->overall
* 3 / 2 - size
->eliminated_by_peeling
315 - size
->last_iteration_eliminated_by_peeling
) > upper_bound
)
323 while (path
.length ())
325 basic_block bb
= path
.pop ();
326 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
328 gimple stmt
= gsi_stmt (gsi
);
329 if (gimple_code (stmt
) == GIMPLE_CALL
)
331 int flags
= gimple_call_flags (stmt
);
332 tree decl
= gimple_call_fndecl (stmt
);
334 if (decl
&& DECL_IS_BUILTIN (decl
)
335 && is_inexpensive_builtin (decl
))
337 else if (flags
& (ECF_PURE
| ECF_CONST
))
338 size
->num_pure_calls_on_hot_path
++;
340 size
->num_non_pure_calls_on_hot_path
++;
341 size
->num_branches_on_hot_path
++;
343 else if (gimple_code (stmt
) != GIMPLE_CALL
344 && gimple_code (stmt
) != GIMPLE_DEBUG
)
345 size
->non_call_stmts_on_hot_path
++;
346 if (((gimple_code (stmt
) == GIMPLE_COND
347 && (!constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
348 || constant_after_peeling (gimple_cond_rhs (stmt
), stmt
, loop
)))
349 || (gimple_code (stmt
) == GIMPLE_SWITCH
350 && !constant_after_peeling (gimple_switch_index (stmt
), stmt
, loop
)))
351 && (!exit
|| bb
!= exit
->src
))
352 size
->num_branches_on_hot_path
++;
356 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
357 fprintf (dump_file
, "size: %i-%i, last_iteration: %i-%i\n", size
->overall
,
358 size
->eliminated_by_peeling
, size
->last_iteration
,
359 size
->last_iteration_eliminated_by_peeling
);
365 /* Estimate number of insns of completely unrolled loop.
366 It is (NUNROLL + 1) * size of loop body with taking into account
367 the fact that in last copy everything after exit conditional
368 is dead and that some instructions will be eliminated after
371 Loop body is likely going to simplify further, this is difficult
372 to guess, we just decrease the result by 1/3. */
374 static unsigned HOST_WIDE_INT
375 estimated_unrolled_size (struct loop_size
*size
,
376 unsigned HOST_WIDE_INT nunroll
)
378 HOST_WIDE_INT unr_insns
= ((nunroll
)
379 * (HOST_WIDE_INT
) (size
->overall
380 - size
->eliminated_by_peeling
));
383 unr_insns
+= size
->last_iteration
- size
->last_iteration_eliminated_by_peeling
;
385 unr_insns
= unr_insns
* 2 / 3;
392 /* Loop LOOP is known to not loop. See if there is an edge in the loop
393 body that can be remove to make the loop to always exit and at
394 the same time it does not make any code potentially executed
395 during the last iteration dead.
397 After complette unrolling we still may get rid of the conditional
398 on the exit in the last copy even if we have no idea what it does.
399 This is quite common case for loops of form
405 Here we prove the loop to iterate 5 times but we do not know
406 it from induction variable.
408 For now we handle only simple case where there is exit condition
409 just before the latch block and the latch block contains no statements
410 with side effect that may otherwise terminate the execution of loop
411 (such as by EH or by terminating the program or longjmp).
413 In the general case we may want to cancel the paths leading to statements
414 loop-niter identified as having undefined effect in the last iteration.
415 The other cases are hopefully rare and will be cleaned up later. */
418 loop_edge_to_cancel (struct loop
*loop
)
423 gimple_stmt_iterator gsi
;
425 /* We want only one predecestor of the loop. */
426 if (EDGE_COUNT (loop
->latch
->preds
) > 1)
429 exits
= get_loop_exit_edges (loop
);
431 FOR_EACH_VEC_ELT (exits
, i
, edge_to_cancel
)
433 /* Find the other edge than the loop exit
434 leaving the conditoinal. */
435 if (EDGE_COUNT (edge_to_cancel
->src
->succs
) != 2)
437 if (EDGE_SUCC (edge_to_cancel
->src
, 0) == edge_to_cancel
)
438 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 1);
440 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 0);
442 /* We only can handle conditionals. */
443 if (!(edge_to_cancel
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
446 /* We should never have conditionals in the loop latch. */
447 gcc_assert (edge_to_cancel
->dest
!= loop
->header
);
449 /* Check that it leads to loop latch. */
450 if (edge_to_cancel
->dest
!= loop
->latch
)
455 /* Verify that the code in loop latch does nothing that may end program
456 execution without really reaching the exit. This may include
457 non-pure/const function calls, EH statements, volatile ASMs etc. */
458 for (gsi
= gsi_start_bb (loop
->latch
); !gsi_end_p (gsi
); gsi_next (&gsi
))
459 if (gimple_has_side_effects (gsi_stmt (gsi
)))
461 return edge_to_cancel
;
467 /* Remove all tests for exits that are known to be taken after LOOP was
468 peeled NPEELED times. Put gcc_unreachable before every statement
469 known to not be executed. */
472 remove_exits_and_undefined_stmts (struct loop
*loop
, unsigned int npeeled
)
474 struct nb_iter_bound
*elt
;
475 bool changed
= false;
477 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
479 /* If statement is known to be undefined after peeling, turn it
480 into unreachable (or trap when debugging experience is supposed
483 && elt
->bound
.ult (double_int::from_uhwi (npeeled
)))
485 gimple_stmt_iterator gsi
= gsi_for_stmt (elt
->stmt
);
486 gimple stmt
= gimple_build_call
487 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
), 0);
489 gimple_set_location (stmt
, gimple_location (elt
->stmt
));
490 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
492 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
494 fprintf (dump_file
, "Forced statement unreachable: ");
495 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
498 /* If we know the exit will be taken after peeling, update. */
499 else if (elt
->is_exit
500 && elt
->bound
.ule (double_int::from_uhwi (npeeled
)))
502 basic_block bb
= gimple_bb (elt
->stmt
);
503 edge exit_edge
= EDGE_SUCC (bb
, 0);
505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
507 fprintf (dump_file
, "Forced exit to be taken: ");
508 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
510 if (!loop_exit_edge_p (loop
, exit_edge
))
511 exit_edge
= EDGE_SUCC (bb
, 1);
512 gcc_checking_assert (loop_exit_edge_p (loop
, exit_edge
));
513 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
514 gimple_cond_make_true (elt
->stmt
);
516 gimple_cond_make_false (elt
->stmt
);
517 update_stmt (elt
->stmt
);
524 /* Remove all exits that are known to be never taken because of the loop bound
528 remove_redundant_iv_tests (struct loop
*loop
)
530 struct nb_iter_bound
*elt
;
531 bool changed
= false;
533 if (!loop
->any_upper_bound
)
535 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
537 /* Exit is pointless if it won't be taken before loop reaches
539 if (elt
->is_exit
&& loop
->any_upper_bound
540 && loop
->nb_iterations_upper_bound
.ult (elt
->bound
))
542 basic_block bb
= gimple_bb (elt
->stmt
);
543 edge exit_edge
= EDGE_SUCC (bb
, 0);
544 struct tree_niter_desc niter
;
546 if (!loop_exit_edge_p (loop
, exit_edge
))
547 exit_edge
= EDGE_SUCC (bb
, 1);
549 /* Only when we know the actual number of iterations, not
550 just a bound, we can remove the exit. */
551 if (!number_of_iterations_exit (loop
, exit_edge
,
552 &niter
, false, false)
553 || !integer_onep (niter
.assumptions
)
554 || !integer_zerop (niter
.may_be_zero
)
556 || TREE_CODE (niter
.niter
) != INTEGER_CST
557 || !loop
->nb_iterations_upper_bound
.ult
558 (tree_to_double_int (niter
.niter
)))
561 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
563 fprintf (dump_file
, "Removed pointless exit: ");
564 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
566 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
567 gimple_cond_make_false (elt
->stmt
);
569 gimple_cond_make_true (elt
->stmt
);
570 update_stmt (elt
->stmt
);
577 /* Stores loops that will be unlooped after we process whole loop tree. */
578 static vec
<loop_p
> loops_to_unloop
;
579 static vec
<int> loops_to_unloop_nunroll
;
581 /* Cancel all fully unrolled loops by putting __builtin_unreachable
583 We do it after all unrolling since unlooping moves basic blocks
584 across loop boundaries trashing loop closed SSA form as well
585 as SCEV info needed to be intact during unrolling.
587 IRRED_INVALIDATED is used to bookkeep if information about
588 irreducible regions may become invalid as a result
589 of the transformation.
590 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
591 when we need to go into loop closed SSA form. */
594 unloop_loops (bitmap loop_closed_ssa_invalidated
,
595 bool *irred_invalidated
)
597 while (loops_to_unloop
.length ())
599 struct loop
*loop
= loops_to_unloop
.pop ();
600 int n_unroll
= loops_to_unloop_nunroll
.pop ();
601 basic_block latch
= loop
->latch
;
602 edge latch_edge
= loop_latch_edge (loop
);
603 int flags
= latch_edge
->flags
;
604 location_t locus
= latch_edge
->goto_locus
;
606 gimple_stmt_iterator gsi
;
608 remove_exits_and_undefined_stmts (loop
, n_unroll
);
610 /* Unloop destroys the latch edge. */
611 unloop (loop
, irred_invalidated
, loop_closed_ssa_invalidated
);
613 /* Create new basic block for the latch edge destination and wire
615 stmt
= gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE
), 0);
616 latch_edge
= make_edge (latch
, create_basic_block (NULL
, NULL
, latch
), flags
);
617 latch_edge
->probability
= 0;
618 latch_edge
->count
= 0;
619 latch_edge
->flags
|= flags
;
620 latch_edge
->goto_locus
= locus
;
622 latch_edge
->dest
->loop_father
= current_loops
->tree_root
;
623 latch_edge
->dest
->count
= 0;
624 latch_edge
->dest
->frequency
= 0;
625 set_immediate_dominator (CDI_DOMINATORS
, latch_edge
->dest
, latch_edge
->src
);
627 gsi
= gsi_start_bb (latch_edge
->dest
);
628 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
630 loops_to_unloop
.release ();
631 loops_to_unloop_nunroll
.release ();
634 /* Tries to unroll LOOP completely, i.e. NITER times.
635 UL determines which loops we are allowed to unroll.
636 EXIT is the exit of the loop that should be eliminated.
637 MAXITER specfy bound on number of iterations, -1 if it is
638 not known or too large for HOST_WIDE_INT. The location
639 LOCUS corresponding to the loop is used when emitting
640 a summary of the unroll to the dump file. */
643 try_unroll_loop_completely (struct loop
*loop
,
644 edge exit
, tree niter
,
645 enum unroll_level ul
,
646 HOST_WIDE_INT maxiter
,
649 unsigned HOST_WIDE_INT n_unroll
, ninsns
, max_unroll
, unr_insns
;
651 struct loop_size size
;
652 bool n_unroll_found
= false;
653 edge edge_to_cancel
= NULL
;
655 /* See if we proved number of iterations to be low constant.
657 EXIT is an edge that will be removed in all but last iteration of
660 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
661 of the unrolled sequence and is expected to make the final loop not
664 If the number of execution of loop is determined by standard induction
665 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
667 if (host_integerp (niter
, 1))
669 n_unroll
= tree_low_cst (niter
, 1);
670 n_unroll_found
= true;
671 edge_to_cancel
= EDGE_SUCC (exit
->src
, 0);
672 if (edge_to_cancel
== exit
)
673 edge_to_cancel
= EDGE_SUCC (exit
->src
, 1);
675 /* We do not know the number of iterations and thus we can not eliminate
680 /* See if we can improve our estimate by using recorded loop bounds. */
682 && (!n_unroll_found
|| (unsigned HOST_WIDE_INT
)maxiter
< n_unroll
))
685 n_unroll_found
= true;
686 /* Loop terminates before the IV variable test, so we can not
687 remove it in the last iteration. */
688 edge_to_cancel
= NULL
;
694 max_unroll
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
);
695 if (n_unroll
> max_unroll
)
699 edge_to_cancel
= loop_edge_to_cancel (loop
);
707 vec
<edge
> to_remove
= vNULL
;
708 if (ul
== UL_SINGLE_ITER
)
711 large
= tree_estimate_loop_size
712 (loop
, exit
, edge_to_cancel
, &size
,
713 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
));
714 ninsns
= size
.overall
;
717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
718 fprintf (dump_file
, "Not unrolling loop %d: it is too large.\n",
723 unr_insns
= estimated_unrolled_size (&size
, n_unroll
);
724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
726 fprintf (dump_file
, " Loop size: %d\n", (int) ninsns
);
727 fprintf (dump_file
, " Estimated size after unrolling: %d\n",
731 /* If the code is going to shrink, we don't need to be extra cautious
732 on guessing if the unrolling is going to be profitable. */
734 /* If there is IV variable that will become constant, we save
735 one instruction in the loop prologue we do not account
737 <= ninsns
+ (size
.constant_iv
!= false))
739 /* We unroll only inner loops, because we do not consider it profitable
740 otheriwse. We still can cancel loopback edge of not rolling loop;
741 this is always a good idea. */
742 else if (ul
== UL_NO_GROWTH
)
744 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
745 fprintf (dump_file
, "Not unrolling loop %d: size would grow.\n",
749 /* Outer loops tend to be less interesting candidates for complette
750 unrolling unless we can do a lot of propagation into the inner loop
751 body. For now we disable outer loop unrolling when the code would
753 else if (loop
->inner
)
755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
756 fprintf (dump_file
, "Not unrolling loop %d: "
757 "it is not innermost and code would grow.\n",
761 /* If there is call on a hot path through the loop, then
762 there is most probably not much to optimize. */
763 else if (size
.num_non_pure_calls_on_hot_path
)
765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
766 fprintf (dump_file
, "Not unrolling loop %d: "
767 "contains call and code would grow.\n",
771 /* If there is pure/const call in the function, then we
772 can still optimize the unrolled loop body if it contains
773 some other interesting code than the calls and code
774 storing or cumulating the return value. */
775 else if (size
.num_pure_calls_on_hot_path
776 /* One IV increment, one test, one ivtmp store
777 and one useful stmt. That is about minimal loop
779 && (size
.non_call_stmts_on_hot_path
780 <= 3 + size
.num_pure_calls_on_hot_path
))
782 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
783 fprintf (dump_file
, "Not unrolling loop %d: "
784 "contains just pure calls and code would grow.\n",
788 /* Complette unrolling is major win when control flow is removed and
789 one big basic block is created. If the loop contains control flow
790 the optimization may still be a win because of eliminating the loop
791 overhead but it also may blow the branch predictor tables.
792 Limit number of branches on the hot path through the peeled
794 else if (size
.num_branches_on_hot_path
* (int)n_unroll
795 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES
))
797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
798 fprintf (dump_file
, "Not unrolling loop %d: "
799 " number of branches on hot path in the unrolled sequence"
800 " reach --param max-peel-branches limit.\n",
805 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
))
807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
808 fprintf (dump_file
, "Not unrolling loop %d: "
809 "(--param max-completely-peeled-insns limit reached).\n",
814 initialize_original_copy_tables ();
815 wont_exit
= sbitmap_alloc (n_unroll
+ 1);
816 bitmap_ones (wont_exit
);
817 bitmap_clear_bit (wont_exit
, 0);
819 if (!gimple_duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
822 DLTHE_FLAG_UPDATE_FREQ
823 | DLTHE_FLAG_COMPLETTE_PEEL
))
825 free_original_copy_tables ();
827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
828 fprintf (dump_file
, "Failed to duplicate the loop\n");
832 FOR_EACH_VEC_ELT (to_remove
, i
, e
)
834 bool ok
= remove_path (e
);
838 to_remove
.release ();
840 free_original_copy_tables ();
844 /* Remove the conditional from the last copy of the loop. */
847 cond
= last_stmt (edge_to_cancel
->src
);
848 if (edge_to_cancel
->flags
& EDGE_TRUE_VALUE
)
849 gimple_cond_make_false (cond
);
851 gimple_cond_make_true (cond
);
853 /* Do not remove the path. Doing so may remove outer loop
854 and confuse bookkeeping code in tree_unroll_loops_completelly. */
857 /* Store the loop for later unlooping and exit removal. */
858 loops_to_unloop
.safe_push (loop
);
859 loops_to_unloop_nunroll
.safe_push (n_unroll
);
861 if (dump_enabled_p ())
864 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
865 "loop turned into non-loop; it never loops\n");
868 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
869 "loop with %d iterations completely unrolled",
870 (int) (n_unroll
+ 1));
872 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
,
873 " (header execution count %d)",
874 (int)loop
->header
->count
);
875 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, "\n");
879 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
882 fprintf (dump_file
, "Exit condition of peeled iterations was "
885 fprintf (dump_file
, "Last iteration exit edge was proved true.\n");
887 fprintf (dump_file
, "Latch of last iteration was marked by "
888 "__builtin_unreachable ().\n");
894 /* Adds a canonical induction variable to LOOP if suitable.
895 CREATE_IV is true if we may create a new iv. UL determines
896 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
897 to determine the number of iterations of a loop by direct evaluation.
898 Returns true if cfg is changed. */
901 canonicalize_loop_induction_variables (struct loop
*loop
,
902 bool create_iv
, enum unroll_level ul
,
907 HOST_WIDE_INT maxiter
;
908 bool modified
= false;
909 location_t locus
= UNKNOWN_LOCATION
;
911 niter
= number_of_latch_executions (loop
);
912 exit
= single_exit (loop
);
913 if (TREE_CODE (niter
) == INTEGER_CST
)
914 locus
= gimple_location (last_stmt (exit
->src
));
917 /* If the loop has more than one exit, try checking all of them
918 for # of iterations determinable through scev. */
920 niter
= find_loop_niter (loop
, &exit
);
922 /* Finally if everything else fails, try brute force evaluation. */
924 && (chrec_contains_undetermined (niter
)
925 || TREE_CODE (niter
) != INTEGER_CST
))
926 niter
= find_loop_niter_by_eval (loop
, &exit
);
929 locus
= gimple_location (last_stmt (exit
->src
));
931 if (TREE_CODE (niter
) != INTEGER_CST
)
935 /* We work exceptionally hard here to estimate the bound
936 by find_loop_niter_by_eval. Be sure to keep it for future. */
937 if (niter
&& TREE_CODE (niter
) == INTEGER_CST
)
939 record_niter_bound (loop
, tree_to_double_int (niter
),
940 exit
== single_likely_exit (loop
), true);
943 /* Force re-computation of loop bounds so we can remove redundant exits. */
944 maxiter
= max_loop_iterations_int (loop
);
946 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
947 && TREE_CODE (niter
) == INTEGER_CST
)
949 fprintf (dump_file
, "Loop %d iterates ", loop
->num
);
950 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
951 fprintf (dump_file
, " times.\n");
953 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
956 fprintf (dump_file
, "Loop %d iterates at most %i times.\n", loop
->num
,
960 /* Remove exits that are known to be never taken based on loop bound.
961 Needs to be called after compilation of max_loop_iterations_int that
962 populates the loop bounds. */
963 modified
|= remove_redundant_iv_tests (loop
);
965 if (try_unroll_loop_completely (loop
, exit
, niter
, ul
, maxiter
, locus
))
969 && niter
&& !chrec_contains_undetermined (niter
)
970 && exit
&& just_once_each_iteration_p (loop
, exit
->src
))
971 create_canonical_iv (loop
, exit
, niter
);
976 /* The main entry point of the pass. Adds canonical induction variables
977 to the suitable loops. */
980 canonicalize_induction_variables (void)
984 bool changed
= false;
985 bool irred_invalidated
= false;
986 bitmap loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
988 free_numbers_of_iterations_estimates ();
989 estimate_numbers_of_iterations ();
991 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
993 changed
|= canonicalize_loop_induction_variables (loop
,
994 true, UL_SINGLE_ITER
,
997 gcc_assert (!need_ssa_update_p (cfun
));
999 unloop_loops (loop_closed_ssa_invalidated
, &irred_invalidated
);
1000 if (irred_invalidated
1001 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1002 mark_irreducible_loops ();
1004 /* Clean up the information about numbers of iterations, since brute force
1005 evaluation could reveal new information. */
1008 if (!bitmap_empty_p (loop_closed_ssa_invalidated
))
1010 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA
));
1011 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1013 BITMAP_FREE (loop_closed_ssa_invalidated
);
1016 return TODO_cleanup_cfg
;
1020 /* Propagate VAL into all uses of SSA_NAME. */
1023 propagate_into_all_uses (tree ssa_name
, tree val
)
1025 imm_use_iterator iter
;
1028 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, ssa_name
)
1030 gimple_stmt_iterator use_stmt_gsi
= gsi_for_stmt (use_stmt
);
1033 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1036 if (is_gimple_assign (use_stmt
)
1037 && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt
))
1038 == GIMPLE_SINGLE_RHS
)
1040 tree rhs
= gimple_assign_rhs1 (use_stmt
);
1042 if (TREE_CODE (rhs
) == ADDR_EXPR
)
1043 recompute_tree_invariant_for_addr_expr (rhs
);
1046 fold_stmt_inplace (&use_stmt_gsi
);
1047 update_stmt (use_stmt
);
1048 maybe_clean_or_replace_eh_stmt (use_stmt
, use_stmt
);
1052 /* Propagate constant SSA_NAMEs defined in basic block BB. */
1055 propagate_constants_for_unrolling (basic_block bb
)
1057 gimple_stmt_iterator gsi
;
1059 /* Look for degenerate PHI nodes with constant argument. */
1060 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); )
1062 gimple phi
= gsi_stmt (gsi
);
1063 tree result
= gimple_phi_result (phi
);
1064 tree arg
= gimple_phi_arg_def (phi
, 0);
1066 if (gimple_phi_num_args (phi
) == 1 && TREE_CODE (arg
) == INTEGER_CST
)
1068 propagate_into_all_uses (result
, arg
);
1069 gsi_remove (&gsi
, true);
1070 release_ssa_name (result
);
1076 /* Look for assignments to SSA names with constant RHS. */
1077 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
1079 gimple stmt
= gsi_stmt (gsi
);
1082 if (is_gimple_assign (stmt
)
1083 && gimple_assign_rhs_code (stmt
) == INTEGER_CST
1084 && (lhs
= gimple_assign_lhs (stmt
), TREE_CODE (lhs
) == SSA_NAME
)
1085 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1087 propagate_into_all_uses (lhs
, gimple_assign_rhs1 (stmt
));
1088 gsi_remove (&gsi
, true);
1089 release_ssa_name (lhs
);
1096 /* Process loops from innermost to outer, stopping at the innermost
1097 loop we unrolled. */
1100 tree_unroll_loops_completely_1 (bool may_increase_size
, bool unroll_outer
,
1101 vec
<loop_p
, va_stack
>& father_stack
,
1104 struct loop
*loop_father
;
1105 bool changed
= false;
1107 enum unroll_level ul
;
1109 /* Process inner loops first. */
1110 for (inner
= loop
->inner
; inner
!= NULL
; inner
= inner
->next
)
1111 changed
|= tree_unroll_loops_completely_1 (may_increase_size
,
1112 unroll_outer
, father_stack
,
1115 /* If we changed an inner loop we cannot process outer loops in this
1116 iteration because SSA form is not up-to-date. Continue with
1117 siblings of outer loops instead. */
1121 /* Don't unroll #pragma omp simd loops until the vectorizer
1122 attempts to vectorize those. */
1123 if (loop
->force_vect
)
1126 /* Try to unroll this loop. */
1127 loop_father
= loop_outer (loop
);
1131 if (may_increase_size
&& optimize_loop_nest_for_speed_p (loop
)
1132 /* Unroll outermost loops only if asked to do so or they do
1133 not cause code growth. */
1134 && (unroll_outer
|| loop_outer (loop_father
)))
1139 if (canonicalize_loop_induction_variables
1140 (loop
, false, ul
, !flag_tree_loop_ivcanon
))
1142 /* If we'll continue unrolling, we need to propagate constants
1143 within the new basic blocks to fold away induction variable
1144 computations; otherwise, the size might blow up before the
1145 iteration is complete and the IR eventually cleaned up. */
1146 if (loop_outer (loop_father
) && !loop_father
->aux
)
1148 father_stack
.safe_push (loop_father
);
1149 loop_father
->aux
= loop_father
;
1158 /* Unroll LOOPS completely if they iterate just few times. Unless
1159 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1160 size of the code does not increase. */
1163 tree_unroll_loops_completely (bool may_increase_size
, bool unroll_outer
)
1165 vec
<loop_p
, va_stack
> father_stack
;
1168 bool irred_invalidated
= false;
1170 vec_stack_alloc (loop_p
, father_stack
, 16);
1174 bitmap loop_closed_ssa_invalidated
= NULL
;
1176 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1177 loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
1179 free_numbers_of_iterations_estimates ();
1180 estimate_numbers_of_iterations ();
1182 changed
= tree_unroll_loops_completely_1 (may_increase_size
,
1183 unroll_outer
, father_stack
,
1184 current_loops
->tree_root
);
1190 /* Be sure to skip unlooped loops while procesing father_stack
1192 FOR_EACH_VEC_ELT (loops_to_unloop
, i
, iter
)
1193 (*iter
)->aux
= NULL
;
1194 FOR_EACH_VEC_ELT (father_stack
, i
, iter
)
1197 unloop_loops (loop_closed_ssa_invalidated
, &irred_invalidated
);
1199 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
1200 if (loop_closed_ssa_invalidated
1201 && !bitmap_empty_p (loop_closed_ssa_invalidated
))
1202 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated
,
1205 update_ssa (TODO_update_ssa
);
1207 /* Propagate the constants within the new basic blocks. */
1208 FOR_EACH_VEC_ELT (father_stack
, i
, iter
)
1212 basic_block
*body
= get_loop_body_in_dom_order (*iter
);
1213 for (j
= 0; j
< (*iter
)->num_nodes
; j
++)
1214 propagate_constants_for_unrolling (body
[j
]);
1216 (*iter
)->aux
= NULL
;
1218 father_stack
.truncate (0);
1220 /* This will take care of removing completely unrolled loops
1221 from the loop structures so we can continue unrolling now
1223 if (cleanup_tree_cfg ())
1224 update_ssa (TODO_update_ssa_only_virtuals
);
1226 /* Clean up the information about numbers of iterations, since
1227 complete unrolling might have invalidated it. */
1229 #ifdef ENABLE_CHECKING
1230 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1231 verify_loop_closed_ssa (true);
1234 if (loop_closed_ssa_invalidated
)
1235 BITMAP_FREE (loop_closed_ssa_invalidated
);
1238 && ++iteration
<= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS
));
1240 father_stack
.release ();
1242 if (irred_invalidated
1243 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1244 mark_irreducible_loops ();
1249 /* Canonical induction variable creation pass. */
1252 tree_ssa_loop_ivcanon (void)
1254 if (number_of_loops (cfun
) <= 1)
1257 return canonicalize_induction_variables ();
1261 gate_tree_ssa_loop_ivcanon (void)
1263 return flag_tree_loop_ivcanon
!= 0;
1268 const pass_data pass_data_iv_canon
=
1270 GIMPLE_PASS
, /* type */
1271 "ivcanon", /* name */
1272 OPTGROUP_LOOP
, /* optinfo_flags */
1273 true, /* has_gate */
1274 true, /* has_execute */
1275 TV_TREE_LOOP_IVCANON
, /* tv_id */
1276 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1277 0, /* properties_provided */
1278 0, /* properties_destroyed */
1279 0, /* todo_flags_start */
1280 0, /* todo_flags_finish */
1283 class pass_iv_canon
: public gimple_opt_pass
1286 pass_iv_canon (gcc::context
*ctxt
)
1287 : gimple_opt_pass (pass_data_iv_canon
, ctxt
)
1290 /* opt_pass methods: */
1291 bool gate () { return gate_tree_ssa_loop_ivcanon (); }
1292 unsigned int execute () { return tree_ssa_loop_ivcanon (); }
1294 }; // class pass_iv_canon
1299 make_pass_iv_canon (gcc::context
*ctxt
)
1301 return new pass_iv_canon (ctxt
);
1304 /* Complete unrolling of loops. */
1307 tree_complete_unroll (void)
1309 if (number_of_loops (cfun
) <= 1)
1312 return tree_unroll_loops_completely (flag_unroll_loops
1314 || optimize
>= 3, true);
1318 gate_tree_complete_unroll (void)
1325 const pass_data pass_data_complete_unroll
=
1327 GIMPLE_PASS
, /* type */
1328 "cunroll", /* name */
1329 OPTGROUP_LOOP
, /* optinfo_flags */
1330 true, /* has_gate */
1331 true, /* has_execute */
1332 TV_COMPLETE_UNROLL
, /* tv_id */
1333 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1334 0, /* properties_provided */
1335 0, /* properties_destroyed */
1336 0, /* todo_flags_start */
1337 0, /* todo_flags_finish */
1340 class pass_complete_unroll
: public gimple_opt_pass
1343 pass_complete_unroll (gcc::context
*ctxt
)
1344 : gimple_opt_pass (pass_data_complete_unroll
, ctxt
)
1347 /* opt_pass methods: */
1348 bool gate () { return gate_tree_complete_unroll (); }
1349 unsigned int execute () { return tree_complete_unroll (); }
1351 }; // class pass_complete_unroll
1356 make_pass_complete_unroll (gcc::context
*ctxt
)
1358 return new pass_complete_unroll (ctxt
);
1361 /* Complete unrolling of inner loops. */
1364 tree_complete_unroll_inner (void)
1368 loop_optimizer_init (LOOPS_NORMAL
1369 | LOOPS_HAVE_RECORDED_EXITS
);
1370 if (number_of_loops (cfun
) > 1)
1373 ret
= tree_unroll_loops_completely (optimize
>= 3, false);
1374 free_numbers_of_iterations_estimates ();
1377 loop_optimizer_finalize ();
1383 gate_tree_complete_unroll_inner (void)
1385 return optimize
>= 2;
1390 const pass_data pass_data_complete_unrolli
=
1392 GIMPLE_PASS
, /* type */
1393 "cunrolli", /* name */
1394 OPTGROUP_LOOP
, /* optinfo_flags */
1395 true, /* has_gate */
1396 true, /* has_execute */
1397 TV_COMPLETE_UNROLL
, /* tv_id */
1398 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1399 0, /* properties_provided */
1400 0, /* properties_destroyed */
1401 0, /* todo_flags_start */
1402 TODO_verify_flow
, /* todo_flags_finish */
1405 class pass_complete_unrolli
: public gimple_opt_pass
1408 pass_complete_unrolli (gcc::context
*ctxt
)
1409 : gimple_opt_pass (pass_data_complete_unrolli
, ctxt
)
1412 /* opt_pass methods: */
1413 bool gate () { return gate_tree_complete_unroll_inner (); }
1414 unsigned int execute () { return tree_complete_unroll_inner (); }
1416 }; // class pass_complete_unrolli
1421 make_pass_complete_unrolli (gcc::context
*ctxt
)
1423 return new pass_complete_unrolli (ctxt
);