/* Induction variable canonicalization and loop peeling.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2020 Free Software Foundation, Inc.
This file is part of GCC.
#include "cfgloop.h"
#include "tree-chrec.h"
#include "tree-scalar-evolution.h"
-#include "params.h"
#include "tree-inline.h"
#include "tree-cfgcleanup.h"
#include "builtins.h"
+#include "tree-ssa-sccvn.h"
+#include "dbgcnt.h"
/* Specifies types of loops that may be unrolled. */
};
/* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
- is the exit edge whose condition is replaced. */
+ is the exit edge whose condition is replaced. The ssa versions of the new
+ IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
+ if they are not NULL. */
-static void
-create_canonical_iv (struct loop *loop, edge exit, tree niter)
+void
+create_canonical_iv (class loop *loop, edge exit, tree niter,
+ tree *var_before = NULL, tree *var_after = NULL)
{
edge in;
tree type, var;
create_iv (niter,
build_int_cst (type, -1),
NULL_TREE, loop,
- &incr_at, false, NULL, &var);
+ &incr_at, false, var_before, &var);
+ if (var_after)
+ *var_after = var;
cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
gimple_cond_set_code (cond, cmp);
/* Return true if OP in STMT will be constant after peeling LOOP. */
static bool
-constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
+constant_after_peeling (tree op, gimple *stmt, class loop *loop)
{
- affine_iv iv;
-
- if (is_gimple_min_invariant (op))
+ if (CONSTANT_CLASS_P (op))
return true;
/* We can still fold accesses to constant arrays when index is known. */
return false;
}
- /* Induction variables are constants. */
- if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
+ /* Induction variables are constants when defined in loop. */
+ if (loop_containing_stmt (stmt) != loop)
return false;
- if (!is_gimple_min_invariant (iv.base))
- return false;
- if (!is_gimple_min_invariant (iv.step))
+ tree ev = analyze_scalar_evolution (loop, op);
+ if (chrec_contains_undetermined (ev)
+ || chrec_contains_symbols (ev))
return false;
return true;
}
Stop estimating after UPPER_BOUND is met. Return true in this case. */
static bool
-tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
+tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
struct loop_size *size, int upper_bound)
{
basic_block *body = get_loop_body (loop);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " size: %3i ", num);
- print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
+ print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
}
/* Look for reasons why we might optimize this stmt away. */
stmt, loop)
&& (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
|| constant_after_peeling (gimple_assign_rhs2 (stmt),
- stmt, loop)))
+ stmt, loop))
+ && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
{
size->constant_iv = true;
if (dump_file && (dump_flags & TDF_DETAILS))
size->non_call_stmts_on_hot_path++;
if (((gimple_code (stmt) == GIMPLE_COND
&& (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
- || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
- loop)))
+ || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+ loop)))
|| (gimple_code (stmt) == GIMPLE_SWITCH
&& !constant_after_peeling (gimple_switch_index (
as_a <gswitch *> (stmt)),
The other cases are hopefully rare and will be cleaned up later. */
static edge
-loop_edge_to_cancel (struct loop *loop)
+loop_edge_to_cancel (class loop *loop)
{
vec<edge> exits;
unsigned i;
known to not be executed. */
static bool
-remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
+remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
{
- struct nb_iter_bound *elt;
+ class nb_iter_bound *elt;
bool changed = false;
for (elt = loop->bounds; elt; elt = elt->next)
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Forced statement unreachable: ");
- print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+ print_gimple_stmt (dump_file, elt->stmt, 0);
}
}
/* If we know the exit will be taken after peeling, update. */
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Forced exit to be taken: ");
- print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+ print_gimple_stmt (dump_file, elt->stmt, 0);
}
if (!loop_exit_edge_p (loop, exit_edge))
exit_edge = EDGE_SUCC (bb, 1);
+ exit_edge->probability = profile_probability::always ();
gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
gcond *cond_stmt = as_a <gcond *> (elt->stmt);
if (exit_edge->flags & EDGE_TRUE_VALUE)
discovered. */
static bool
-remove_redundant_iv_tests (struct loop *loop)
+remove_redundant_iv_tests (class loop *loop)
{
- struct nb_iter_bound *elt;
+ class nb_iter_bound *elt;
bool changed = false;
if (!loop->any_upper_bound)
{
basic_block bb = gimple_bb (elt->stmt);
edge exit_edge = EDGE_SUCC (bb, 0);
- struct tree_niter_desc niter;
+ class tree_niter_desc niter;
if (!loop_exit_edge_p (loop, exit_edge))
exit_edge = EDGE_SUCC (bb, 1);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Removed pointless exit: ");
- print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+ print_gimple_stmt (dump_file, elt->stmt, 0);
}
gcond *cond_stmt = as_a <gcond *> (elt->stmt);
if (exit_edge->flags & EDGE_TRUE_VALUE)
{
while (loops_to_unloop.length ())
{
- struct loop *loop = loops_to_unloop.pop ();
+ class loop *loop = loops_to_unloop.pop ();
int n_unroll = loops_to_unloop_nunroll.pop ();
basic_block latch = loop->latch;
edge latch_edge = loop_latch_edge (loop);
it in. */
stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
- latch_edge->probability = 0;
- latch_edge->count = 0;
+ latch_edge->probability = profile_probability::never ();
latch_edge->flags |= flags;
latch_edge->goto_locus = locus;
add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
- latch_edge->dest->count = 0;
- latch_edge->dest->frequency = 0;
+ latch_edge->dest->count = profile_count::zero ();
set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
gsi = gsi_start_bb (latch_edge->dest);
loops_to_unloop.release ();
loops_to_unloop_nunroll.release ();
- /* Remove edges in peeled copies. */
+ /* Remove edges in peeled copies. Given remove_path removes dominated
+ regions we need to cope with removal of already removed paths. */
unsigned i;
edge e;
+ auto_vec<int, 20> src_bbs;
+ src_bbs.reserve_exact (edges_to_remove.length ());
FOR_EACH_VEC_ELT (edges_to_remove, i, e)
- {
- bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
- gcc_assert (ok);
- }
+ src_bbs.quick_push (e->src->index);
+ FOR_EACH_VEC_ELT (edges_to_remove, i, e)
+ if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
+ {
+ bool ok = remove_path (e, irred_invalidated,
+ loop_closed_ssa_invalidated);
+ gcc_assert (ok);
+ }
edges_to_remove.release ();
}
a summary of the unroll to the dump file. */
static bool
-try_unroll_loop_completely (struct loop *loop,
- edge exit, tree niter,
+try_unroll_loop_completely (class loop *loop,
+ edge exit, tree niter, bool may_be_zero,
enum unroll_level ul,
HOST_WIDE_INT maxiter,
- location_t locus)
+ dump_user_location_t locus, bool allow_peel)
{
- unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
- struct loop_size size;
+ unsigned HOST_WIDE_INT n_unroll = 0;
bool n_unroll_found = false;
edge edge_to_cancel = NULL;
- int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
/* See if we proved number of iterations to be low constant.
if (edge_to_cancel == exit)
edge_to_cancel = EDGE_SUCC (exit->src, 1);
}
- /* We do not know the number of iterations and thus we can not eliminate
+ /* We do not know the number of iterations and thus we cannot eliminate
the EXIT edge. */
else
exit = NULL;
/* See if we can improve our estimate by using recorded loop bounds. */
- if (maxiter >= 0
+ if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+ && maxiter >= 0
&& (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
{
n_unroll = maxiter;
n_unroll_found = true;
- /* Loop terminates before the IV variable test, so we can not
+ /* Loop terminates before the IV variable test, so we cannot
remove it in the last iteration. */
edge_to_cancel = NULL;
}
if (!n_unroll_found)
return false;
- if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+ if (!loop->unroll
+ && n_unroll > (unsigned) param_max_completely_peel_times)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d "
if (n_unroll)
{
- bool large;
if (ul == UL_SINGLE_ITER)
return false;
- /* EXIT can be removed only if we are sure it passes first N_UNROLL
- iterations. */
- bool remove_exit = (exit && niter
- && TREE_CODE (niter) == INTEGER_CST
- && wi::leu_p (n_unroll, wi::to_widest (niter)));
-
- large = tree_estimate_loop_size
- (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
- PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
- ninsns = size.overall;
- if (large)
+ if (loop->unroll)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
- loop->num);
- return false;
+ /* If the unrolling factor is too large, bail out. */
+ if (n_unroll > (unsigned)loop->unroll)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Not unrolling loop %d: "
+ "user didn't want it unrolled completely.\n",
+ loop->num);
+ return false;
+ }
}
-
- unr_insns = estimated_unrolled_size (&size, n_unroll);
- if (dump_file && (dump_flags & TDF_DETAILS))
+ else
{
- fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
- fprintf (dump_file, " Estimated size after unrolling: %d\n",
- (int) unr_insns);
- }
+ struct loop_size size;
+ /* EXIT can be removed only if we are sure it passes first N_UNROLL
+ iterations. */
+ bool remove_exit = (exit && niter
+ && TREE_CODE (niter) == INTEGER_CST
+ && wi::leu_p (n_unroll, wi::to_widest (niter)));
+ bool large
+ = tree_estimate_loop_size
+ (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+ param_max_completely_peeled_insns);
+ if (large)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+ loop->num);
+ return false;
+ }
- /* If the code is going to shrink, we don't need to be extra cautious
- on guessing if the unrolling is going to be profitable. */
- if (unr_insns
- /* If there is IV variable that will become constant, we save
- one instruction in the loop prologue we do not account
- otherwise. */
- <= ninsns + (size.constant_iv != false))
- ;
- /* We unroll only inner loops, because we do not consider it profitable
- otheriwse. We still can cancel loopback edge of not rolling loop;
- this is always a good idea. */
- else if (ul == UL_NO_GROWTH)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
- loop->num);
- return false;
- }
- /* Outer loops tend to be less interesting candidates for complete
- unrolling unless we can do a lot of propagation into the inner loop
- body. For now we disable outer loop unrolling when the code would
- grow. */
- else if (loop->inner)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "it is not innermost and code would grow.\n",
- loop->num);
- return false;
- }
- /* If there is call on a hot path through the loop, then
- there is most probably not much to optimize. */
- else if (size.num_non_pure_calls_on_hot_path)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "contains call and code would grow.\n",
- loop->num);
- return false;
- }
- /* If there is pure/const call in the function, then we
- can still optimize the unrolled loop body if it contains
- some other interesting code than the calls and code
- storing or cumulating the return value. */
- else if (size.num_pure_calls_on_hot_path
- /* One IV increment, one test, one ivtmp store
- and one useful stmt. That is about minimal loop
- doing pure call. */
- && (size.non_call_stmts_on_hot_path
- <= 3 + size.num_pure_calls_on_hot_path))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "contains just pure calls and code would grow.\n",
- loop->num);
- return false;
- }
- /* Complete unrolling is a major win when control flow is removed and
- one big basic block is created. If the loop contains control flow
- the optimization may still be a win because of eliminating the loop
- overhead but it also may blow the branch predictor tables.
- Limit number of branches on the hot path through the peeled
- sequence. */
- else if (size.num_branches_on_hot_path * (int)n_unroll
- > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
- {
+ unsigned HOST_WIDE_INT ninsns = size.overall;
+ unsigned HOST_WIDE_INT unr_insns
+ = estimated_unrolled_size (&size, n_unroll);
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- " number of branches on hot path in the unrolled sequence"
- " reach --param max-peel-branches limit.\n",
- loop->num);
- return false;
- }
- else if (unr_insns
- > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "(--param max-completely-peeled-insns limit reached).\n",
- loop->num);
- return false;
+ {
+ fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
+ fprintf (dump_file, " Estimated size after unrolling: %d\n",
+ (int) unr_insns);
+ }
+
+ /* If the code is going to shrink, we don't need to be extra
+ cautious on guessing if the unrolling is going to be
+ profitable. */
+ if (unr_insns
+ /* If there is IV variable that will become constant, we
+ save one instruction in the loop prologue we do not
+ account otherwise. */
+ <= ninsns + (size.constant_iv != false))
+ ;
+ /* We unroll only inner loops, because we do not consider it
+ profitable otheriwse. We still can cancel loopback edge
+ of not rolling loop; this is always a good idea. */
+ else if (ul == UL_NO_GROWTH)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* Outer loops tend to be less interesting candidates for
+ complete unrolling unless we can do a lot of propagation
+ into the inner loop body. For now we disable outer loop
+ unrolling when the code would grow. */
+ else if (loop->inner)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "it is not innermost and code would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* If there is call on a hot path through the loop, then
+ there is most probably not much to optimize. */
+ else if (size.num_non_pure_calls_on_hot_path)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "contains call and code would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* If there is pure/const call in the function, then we can
+ still optimize the unrolled loop body if it contains some
+ other interesting code than the calls and code storing or
+ cumulating the return value. */
+ else if (size.num_pure_calls_on_hot_path
+ /* One IV increment, one test, one ivtmp store and
+ one useful stmt. That is about minimal loop
+ doing pure call. */
+ && (size.non_call_stmts_on_hot_path
+ <= 3 + size.num_pure_calls_on_hot_path))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "contains just pure calls and code would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* Complete unrolling is major win when control flow is
+ removed and one big basic block is created. If the loop
+ contains control flow the optimization may still be a win
+ because of eliminating the loop overhead but it also may
+ blow the branch predictor tables. Limit number of
+ branches on the hot path through the peeled sequence. */
+ else if (size.num_branches_on_hot_path * (int)n_unroll
+ > param_max_peel_branches)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "number of branches on hot path in the unrolled "
+ "sequence reaches --param max-peel-branches limit.\n",
+ loop->num);
+ return false;
+ }
+ else if (unr_insns
+ > (unsigned) param_max_completely_peeled_insns)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "number of insns in the unrolled sequence reaches "
+ "--param max-completely-peeled-insns limit.\n",
+ loop->num);
+ return false;
+ }
}
- dump_printf_loc (report_flags, locus,
- "loop turned into non-loop; it never loops.\n");
+
+ if (!dbg_cnt (gimple_unroll))
+ return false;
initialize_original_copy_tables ();
auto_sbitmap wont_exit (n_unroll + 1);
exit = NULL;
bitmap_clear (wont_exit);
}
+ if (may_be_zero)
+ bitmap_clear_bit (wont_exit, 1);
if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
n_unroll, wont_exit,
else
gimple_cond_make_true (cond);
update_stmt (cond);
- /* Do not remove the path. Doing so may remove outer loop
- and confuse bookkeeping code in tree_unroll_loops_completelly. */
+ /* Do not remove the path, as doing so may remove outer loop and
+ confuse bookkeeping code in tree_unroll_loops_completely. */
}
/* Store the loop for later unlooping and exit removal. */
{
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
"loop with %d iterations completely unrolled",
- (int) (n_unroll + 1));
- if (profile_info)
+ (int) n_unroll);
+ if (loop->header->count.initialized_p ())
dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
" (header execution count %d)",
- (int)loop->header->count);
+ (int)loop->header->count.to_gcov_type ());
dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
}
}
Parameters are the same as for try_unroll_loops_completely */
static bool
-try_peel_loop (struct loop *loop,
- edge exit, tree niter,
+try_peel_loop (class loop *loop,
+ edge exit, tree niter, bool may_be_zero,
HOST_WIDE_INT maxiter)
{
HOST_WIDE_INT npeel;
struct loop_size size;
int peeled_size;
- if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
+ if (!flag_peel_loops
+ || param_max_peel_times <= 0
|| !peeled_loops)
return false;
return false;
}
+ /* We don't peel loops that will be unrolled as this can duplicate a
+ loop more times than the user requested. */
+ if (loop->unroll)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+ return false;
+ }
+
/* Peel only innermost loops.
While the code is perfectly capable of peeling non-innermost loops,
the heuristics would probably need some improvements. */
if (loop->inner)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: outer loop\n");
+ fprintf (dump_file, "Not peeling: outer loop\n");
return false;
}
if (!optimize_loop_for_speed_p (loop))
{
if (dump_file)
- fprintf (dump_file, "Not peeling: cold loop\n");
+ fprintf (dump_file, "Not peeling: cold loop\n");
return false;
}
if (maxiter >= 0 && maxiter <= npeel)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: upper bound is known so can "
+ fprintf (dump_file, "Not peeling: upper bound is known so can "
"unroll completely\n");
return false;
}
/* We want to peel estimated number of iterations + 1 (so we never
enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
and be sure to avoid overflows. */
- if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
+ if (npeel > param_max_peel_times - 1)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: rolls too much "
+ fprintf (dump_file, "Not peeling: rolls too much "
"(%i + 1 > --param max-peel-times)\n", (int) npeel);
return false;
}
/* Check peeled loops size. */
tree_estimate_loop_size (loop, exit, NULL, &size,
- PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
+ param_max_peeled_insns);
if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
- > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
+ > param_max_peeled_insns)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+ fprintf (dump_file, "Not peeling: peeled sequence size is too large "
"(%i insns > --param max-peel-insns)", peeled_size);
return false;
}
+ if (!dbg_cnt (gimple_unroll))
+ return false;
+
/* Duplicate possibly eliminating the exits. */
initialize_original_copy_tables ();
auto_sbitmap wont_exit (npeel + 1);
exit = NULL;
bitmap_clear (wont_exit);
}
+ if (may_be_zero)
+ bitmap_clear_bit (wont_exit, 1);
if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
npeel, wont_exit,
exit, &edges_to_remove,
loop->nb_iterations_likely_upper_bound = 0;
}
}
- gcov_type entry_count = 0;
- int entry_freq = 0;
+ profile_count entry_count = profile_count::zero ();
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, loop->header->preds)
if (e->src != loop->latch)
{
- entry_count += e->src->count;
- entry_freq += e->src->frequency;
+ if (e->src->count.initialized_p ())
+ entry_count += e->src->count;
gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
}
- int scale = 1;
- if (loop->header->count)
- scale = RDIV (entry_count * REG_BR_PROB_BASE, loop->header->count);
- else if (loop->header->frequency)
- scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
- scale_loop_profile (loop, scale, 0);
+ profile_probability p;
+ p = entry_count.probability_in (loop->header->count);
+ scale_loop_profile (loop, p, 0);
bitmap_set_bit (peeled_loops, loop->num);
return true;
}
Returns true if cfg is changed. */
static bool
-canonicalize_loop_induction_variables (struct loop *loop,
+canonicalize_loop_induction_variables (class loop *loop,
bool create_iv, enum unroll_level ul,
- bool try_eval)
+ bool try_eval, bool allow_peel)
{
edge exit = NULL;
tree niter;
HOST_WIDE_INT maxiter;
bool modified = false;
- location_t locus = UNKNOWN_LOCATION;
+ dump_user_location_t locus;
+ class tree_niter_desc niter_desc;
+ bool may_be_zero = false;
- niter = number_of_latch_executions (loop);
+ /* For unrolling allow conditional constant or zero iterations, thus
+ perform loop-header copying on-the-fly. */
exit = single_exit (loop);
+ niter = chrec_dont_know;
+ if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
+ {
+ niter = niter_desc.niter;
+ may_be_zero
+ = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
+ }
if (TREE_CODE (niter) == INTEGER_CST)
- locus = gimple_location (last_stmt (exit->src));
+ locus = last_stmt (exit->src);
else
{
+ /* For non-constant niter fold may_be_zero into niter again. */
+ if (may_be_zero)
+ {
+ if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+ niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
+ niter_desc.may_be_zero,
+ build_int_cst (TREE_TYPE (niter), 0), niter);
+ else
+ niter = chrec_dont_know;
+ may_be_zero = false;
+ }
+
/* If the loop has more than one exit, try checking all of them
for # of iterations determinable through scev. */
if (!exit)
niter = find_loop_niter_by_eval (loop, &exit);
if (exit)
- locus = gimple_location (last_stmt (exit->src));
+ locus = last_stmt (exit->src);
if (TREE_CODE (niter) != INTEGER_CST)
exit = NULL;
by find_loop_niter_by_eval. Be sure to keep it for future. */
if (niter && TREE_CODE (niter) == INTEGER_CST)
{
+ vec<edge> exits = get_loop_exit_edges (loop);
record_niter_bound (loop, wi::to_widest (niter),
- exit == single_likely_exit (loop), true);
+ exit == single_likely_exit (loop, exits), true);
+ exits.release ();
}
/* Force re-computation of loop bounds so we can remove redundant exits. */
populates the loop bounds. */
modified |= remove_redundant_iv_tests (loop);
- if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+ if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
+ maxiter, locus, allow_peel))
return true;
if (create_iv
&& niter && !chrec_contains_undetermined (niter)
&& exit && just_once_each_iteration_p (loop, exit->src))
- create_canonical_iv (loop, exit, niter);
+ {
+ tree iv_niter = niter;
+ if (may_be_zero)
+ {
+ if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+ iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
+ niter_desc.may_be_zero,
+ build_int_cst (TREE_TYPE (iv_niter), 0),
+ iv_niter);
+ else
+ iv_niter = NULL_TREE;
+ }
+ if (iv_niter)
+ create_canonical_iv (loop, exit, iv_niter);
+ }
if (ul == UL_ALL)
- modified |= try_peel_loop (loop, exit, niter, maxiter);
+ modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
return modified;
}
unsigned int
canonicalize_induction_variables (void)
{
- struct loop *loop;
+ class loop *loop;
bool changed = false;
bool irred_invalidated = false;
bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
- free_numbers_of_iterations_estimates (cfun);
- estimate_numbers_of_iterations ();
+ estimate_numbers_of_iterations (cfun);
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
{
changed |= canonicalize_loop_induction_variables (loop,
true, UL_SINGLE_ITER,
- true);
+ true, false);
}
gcc_assert (!need_ssa_update_p (cfun));
/* Clean up the information about numbers of iterations, since brute force
evaluation could reveal new information. */
+ free_numbers_of_iterations_estimates (cfun);
scev_reset ();
if (!bitmap_empty_p (loop_closed_ssa_invalidated))
return 0;
}
-/* Propagate constant SSA_NAMEs defined in basic block BB. */
-
-static void
-propagate_constants_for_unrolling (basic_block bb)
-{
- /* Look for degenerate PHI nodes with constant argument. */
- for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
- {
- gphi *phi = gsi.phi ();
- tree result = gimple_phi_result (phi);
- tree arg = gimple_phi_arg_def (phi, 0);
-
- if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
- && gimple_phi_num_args (phi) == 1
- && TREE_CODE (arg) == INTEGER_CST)
- {
- replace_uses_by (result, arg);
- gsi_remove (&gsi, true);
- release_ssa_name (result);
- }
- else
- gsi_next (&gsi);
- }
-
- /* Look for assignments to SSA names with constant RHS. */
- for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
- {
- gimple *stmt = gsi_stmt (gsi);
- tree lhs;
-
- if (is_gimple_assign (stmt)
- && gimple_assign_rhs_code (stmt) == INTEGER_CST
- && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
- && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
- {
- replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
- gsi_remove (&gsi, true);
- release_ssa_name (lhs);
- }
- else
- gsi_next (&gsi);
- }
-}
-
/* Process loops from innermost to outer, stopping at the innermost
loop we unrolled. */
static bool
tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
- bitmap father_bbs, struct loop *loop)
+ bitmap father_bbs, class loop *loop)
{
- struct loop *loop_father;
+ class loop *loop_father;
bool changed = false;
- struct loop *inner;
+ class loop *inner;
enum unroll_level ul;
+ unsigned num = number_of_loops (cfun);
- /* Process inner loops first. */
+ /* Process inner loops first. Don't walk loops added by the recursive
+ calls because SSA form is not up-to-date. They can be handled in the
+ next iteration. */
+ bitmap child_father_bbs = NULL;
for (inner = loop->inner; inner != NULL; inner = inner->next)
- changed |= tree_unroll_loops_completely_1 (may_increase_size,
- unroll_outer, father_bbs,
- inner);
-
+ if ((unsigned) inner->num < num)
+ {
+ if (!child_father_bbs)
+ child_father_bbs = BITMAP_ALLOC (NULL);
+ if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
+ child_father_bbs, inner))
+ {
+ bitmap_ior_into (father_bbs, child_father_bbs);
+ bitmap_clear (child_father_bbs);
+ changed = true;
+ }
+ }
+ if (child_father_bbs)
+ BITMAP_FREE (child_father_bbs);
+
/* If we changed an inner loop we cannot process outer loops in this
iteration because SSA form is not up-to-date. Continue with
siblings of outer loops instead. */
if (changed)
- return true;
+ {
+ /* If we are recorded as father clear all other fathers that
+ are necessarily covered already to avoid redundant work. */
+ if (bitmap_bit_p (father_bbs, loop->header->index))
+ {
+ bitmap_clear (father_bbs);
+ bitmap_set_bit (father_bbs, loop->header->index);
+ }
+ return true;
+ }
/* Don't unroll #pragma omp simd loops until the vectorizer
attempts to vectorize those. */
if (!loop_father)
return false;
- if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+ if (loop->unroll > 1)
+ ul = UL_ALL;
+ else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
/* Unroll outermost loops only if asked to do so or they do
not cause code growth. */
&& (unroll_outer || loop_outer (loop_father)))
ul = UL_NO_GROWTH;
if (canonicalize_loop_induction_variables
- (loop, false, ul, !flag_tree_loop_ivcanon))
+ (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
{
/* If we'll continue unrolling, we need to propagate constants
within the new basic blocks to fold away induction variable
computations; otherwise, the size might blow up before the
iteration is complete and the IR eventually cleaned up. */
if (loop_outer (loop_father))
- bitmap_set_bit (father_bbs, loop_father->header->index);
+ {
+ /* Once we process our father we will have processed
+ the fathers of our children as well, so avoid doing
+ redundant work and clear fathers we've gathered sofar. */
+ bitmap_clear (father_bbs);
+ bitmap_set_bit (father_bbs, loop_father->header->index);
+ }
return true;
}
MAY_INCREASE_SIZE is true, perform the unrolling only if the
size of the code does not increase. */
-unsigned int
+static unsigned int
tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
{
bitmap father_bbs = BITMAP_ALLOC (NULL);
int iteration = 0;
bool irred_invalidated = false;
+ estimate_numbers_of_iterations (cfun);
+
do
{
changed = false;
loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
free_numbers_of_iterations_estimates (cfun);
- estimate_numbers_of_iterations ();
+ estimate_numbers_of_iterations (cfun);
changed = tree_unroll_loops_completely_1 (may_increase_size,
unroll_outer, father_bbs,
unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
- /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
+ /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
if (loop_closed_ssa_invalidated
&& !bitmap_empty_p (loop_closed_ssa_invalidated))
rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
{
loop_p father = get_loop (cfun, i);
- basic_block *body = get_loop_body_in_dom_order (father);
- for (unsigned j = 0; j < father->num_nodes; j++)
- propagate_constants_for_unrolling (body[j]);
- free (body);
+ bitmap exit_bbs = BITMAP_ALLOC (NULL);
+ loop_exit *exit = father->exits->next;
+ while (exit->e)
+ {
+ bitmap_set_bit (exit_bbs, exit->e->dest->index);
+ exit = exit->next;
+ }
+ do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
}
BITMAP_FREE (fathers);
BITMAP_FREE (loop_closed_ssa_invalidated);
}
while (changed
- && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
+ && ++iteration <= param_max_unroll_iterations);
BITMAP_FREE (father_bbs);
re-peeling the same loop multiple times. */
if (flag_peel_loops)
peeled_loops = BITMAP_ALLOC (NULL);
- int val = tree_unroll_loops_completely (flag_unroll_loops
- || flag_peel_loops
- || optimize >= 3, true);
+ unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
+ || flag_peel_loops
+ || optimize >= 3, true);
if (peeled_loops)
{
BITMAP_FREE (peeled_loops);
{
unsigned ret = 0;
- loop_optimizer_init (LOOPS_NORMAL
- | LOOPS_HAVE_RECORDED_EXITS);
+ loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
if (number_of_loops (fun) > 1)
{
scev_initialize ();
ret = tree_unroll_loops_completely (optimize >= 3, false);
- free_numbers_of_iterations_estimates (fun);
scev_finalize ();
}
loop_optimizer_finalize ();