]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/tree-ssa-loop-ivcanon.c
Optimize ODR enum streaming
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
index 6f164538132e7f20c3d2ca063c2d707f797d95a6..8ab6ab3330c5f7302ffddd7fc47c7b20fbed77fc 100644 (file)
@@ -1,5 +1,5 @@
 /* Induction variable canonicalization and loop peeling.
-   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+   Copyright (C) 2004-2020 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -28,46 +28,42 @@ along with GCC; see the file COPYING3.  If not see
    variables.  In that case the created optimization possibilities are likely
    to pay up.
 
-   Additionally in case we detect that it is beneficial to unroll the
-   loop completely, we do it right here to expose the optimization
-   possibilities to the following passes.  */
+   We also perform
+     - complete unrolling (or peeling) when the loops is rolling few enough
+       times
+     - simple peeling (i.e. copying few initial iterations prior the loop)
+       when number of iteration estimate is known (typically by the profile
+       info).  */
 
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
 #include "tree.h"
-#include "tm_p.h"
-#include "basic-block.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
 #include "gimple-pretty-print.h"
-#include "tree-ssa-alias.h"
-#include "internal-fn.h"
+#include "fold-const.h"
+#include "profile.h"
 #include "gimple-fold.h"
 #include "tree-eh.h"
-#include "gimple-expr.h"
-#include "is-a.h"
-#include "gimple.h"
 #include "gimple-iterator.h"
-#include "gimple-ssa.h"
-#include "cgraph.h"
 #include "tree-cfg.h"
-#include "tree-phinodes.h"
-#include "ssa-iterators.h"
-#include "stringpool.h"
-#include "tree-ssanames.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-ssa-loop-niter.h"
 #include "tree-ssa-loop.h"
 #include "tree-into-ssa.h"
 #include "cfgloop.h"
-#include "tree-pass.h"
 #include "tree-chrec.h"
 #include "tree-scalar-evolution.h"
-#include "params.h"
-#include "flags.h"
 #include "tree-inline.h"
-#include "target.h"
 #include "tree-cfgcleanup.h"
+#include "builtins.h"
+#include "tree-ssa-sccvn.h"
+#include "dbgcnt.h"
 
 /* Specifies types of loops that may be unrolled.  */
 
@@ -81,14 +77,17 @@ enum unroll_level
 };
 
 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
-   is the exit edge whose condition is replaced.  */
+   is the exit edge whose condition is replaced.  The ssa versions of the new
+   IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
+   if they are not NULL.  */
 
-static void
-create_canonical_iv (struct loop *loop, edge exit, tree niter)
+void
+create_canonical_iv (class loop *loop, edge exit, tree niter,
+                    tree *var_before = NULL, tree *var_after = NULL)
 {
   edge in;
   tree type, var;
-  gimple cond;
+  gcond *cond;
   gimple_stmt_iterator incr_at;
   enum tree_code cmp;
 
@@ -99,7 +98,7 @@ create_canonical_iv (struct loop *loop, edge exit, tree niter)
       fprintf (dump_file, " iterations.\n");
     }
 
-  cond = last_stmt (exit->src);
+  cond = as_a <gcond *> (last_stmt (exit->src));
   in = EDGE_SUCC (exit->src, 0);
   if (in == exit)
     in = EDGE_SUCC (exit->src, 1);
@@ -117,7 +116,9 @@ create_canonical_iv (struct loop *loop, edge exit, tree niter)
   create_iv (niter,
             build_int_cst (type, -1),
             NULL_TREE, loop,
-            &incr_at, false, NULL, &var);
+            &incr_at, false, var_before, &var);
+  if (var_after)
+    *var_after = var;
 
   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
   gimple_cond_set_code (cond, cmp);
@@ -160,11 +161,9 @@ struct loop_size
 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 
 static bool
-constant_after_peeling (tree op, gimple stmt, struct loop *loop)
+constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 {
-  affine_iv iv;
-
-  if (is_gimple_min_invariant (op))
+  if (CONSTANT_CLASS_P (op))
     return true;
 
   /* We can still fold accesses to constant arrays when index is known.  */
@@ -193,12 +192,12 @@ constant_after_peeling (tree op, gimple stmt, struct loop *loop)
       return false;
     }
 
-  /* Induction variables are constants.  */
-  if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
+  /* Induction variables are constants when defined in loop.  */
+  if (loop_containing_stmt (stmt) != loop)
     return false;
-  if (!is_gimple_min_invariant (iv.base))
-    return false;
-  if (!is_gimple_min_invariant (iv.step))
+  tree ev = analyze_scalar_evolution (loop, op);
+  if (chrec_contains_undetermined (ev)
+      || chrec_contains_symbols (ev))
     return false;
   return true;
 }
@@ -212,8 +211,8 @@ constant_after_peeling (tree op, gimple stmt, struct loop *loop)
    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 
 static bool
-tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
-                        int upper_bound)
+tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
+                        struct loop_size *size, int upper_bound)
 {
   basic_block *body = get_loop_body (loop);
   gimple_stmt_iterator gsi;
@@ -241,11 +240,12 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
       else
        after_exit = false;
       if (dump_file && (dump_flags & TDF_DETAILS))
-       fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
+       fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
+                after_exit);
 
       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
        {
-         gimple stmt = gsi_stmt (gsi);
+         gimple *stmt = gsi_stmt (gsi);
          int num = estimate_num_insns (stmt, &eni_size_weights);
          bool likely_eliminated = false;
          bool likely_eliminated_last = false;
@@ -254,62 +254,79 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "  size: %3i ", num);
-             print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
+             print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
            }
 
          /* Look for reasons why we might optimize this stmt away. */
 
-         if (gimple_has_side_effects (stmt))
-           ;
-         /* Exit conditional.  */
-         else if (exit && body[i] == exit->src
-                  && stmt == last_stmt (exit->src))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Exit condition will be eliminated "
-                        "in peeled copies.\n");
-             likely_eliminated_peeled = true;
-           }
-         else if (edge_to_cancel && body[i] == edge_to_cancel->src
-                  && stmt == last_stmt (edge_to_cancel->src))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Exit condition will be eliminated "
-                        "in last copy.\n");
-             likely_eliminated_last = true;
-           }
-         /* Sets of IV variables  */
-         else if (gimple_code (stmt) == GIMPLE_ASSIGN
-             && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Induction variable computation will"
-                        " be folded away.\n");
-             likely_eliminated = true;
-           }
-         /* Assignments of IV variables.  */
-         else if (gimple_code (stmt) == GIMPLE_ASSIGN
-                  && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
-                  && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
-                  && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
-                      || constant_after_peeling (gimple_assign_rhs2 (stmt),
-                                                 stmt, loop)))
+         if (!gimple_has_side_effects (stmt))
            {
-             size->constant_iv = true;
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Constant expression will be folded away.\n");
-             likely_eliminated = true;
-           }
-         /* Conditionals.  */
-         else if ((gimple_code (stmt) == GIMPLE_COND
-                   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
-                   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
-                  || (gimple_code (stmt) == GIMPLE_SWITCH
-                      && constant_after_peeling (gimple_switch_index (stmt), stmt, loop)))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Constant conditional.\n");
-             likely_eliminated = true;
+             /* Exit conditional.  */
+             if (exit && body[i] == exit->src
+                 && stmt == last_stmt (exit->src))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Exit condition will be eliminated "
+                            "in peeled copies.\n");
+                 likely_eliminated_peeled = true;
+               }
+             if (edge_to_cancel && body[i] == edge_to_cancel->src
+                 && stmt == last_stmt (edge_to_cancel->src))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Exit condition will be eliminated "
+                            "in last copy.\n");
+                 likely_eliminated_last = true;
+               }
+             /* Sets of IV variables  */
+             if (gimple_code (stmt) == GIMPLE_ASSIGN
+                 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Induction variable computation will"
+                            " be folded away.\n");
+                 likely_eliminated = true;
+               }
+             /* Assignments of IV variables.  */
+             else if (gimple_code (stmt) == GIMPLE_ASSIGN
+                      && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
+                      && constant_after_peeling (gimple_assign_rhs1 (stmt),
+                                                 stmt, loop)
+                      && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
+                          || constant_after_peeling (gimple_assign_rhs2 (stmt),
+                                                     stmt, loop))
+                      && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
+               {
+                 size->constant_iv = true;
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file,
+                            "   Constant expression will be folded away.\n");
+                 likely_eliminated = true;
+               }
+             /* Conditionals.  */
+             else if ((gimple_code (stmt) == GIMPLE_COND
+                       && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
+                                                  loop)
+                       && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+                                                  loop)
+                       /* We don't simplify all constant compares so make sure
+                          they are not both constant already.  See PR70288.  */
+                       && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
+                           || ! is_gimple_min_invariant
+                                (gimple_cond_rhs (stmt))))
+                      || (gimple_code (stmt) == GIMPLE_SWITCH
+                          && constant_after_peeling (gimple_switch_index (
+                                                       as_a <gswitch *>
+                                                         (stmt)),
+                                                     stmt, loop)
+                          && ! is_gimple_min_invariant
+                                  (gimple_switch_index
+                                     (as_a <gswitch *> (stmt)))))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Constant conditional.\n");
+                 likely_eliminated = true;
+               }
            }
 
          size->overall += num;
@@ -335,29 +352,29 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
       basic_block bb = path.pop ();
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
        {
-         gimple stmt = gsi_stmt (gsi);
-         if (gimple_code (stmt) == GIMPLE_CALL)
+         gimple *stmt = gsi_stmt (gsi);
+         if (gimple_code (stmt) == GIMPLE_CALL
+             && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
            {
              int flags = gimple_call_flags (stmt);
-             tree decl = gimple_call_fndecl (stmt);
-
-             if (decl && DECL_IS_BUILTIN (decl)
-                 && is_inexpensive_builtin (decl))
-               ;
-             else if (flags & (ECF_PURE | ECF_CONST))
+             if (flags & (ECF_PURE | ECF_CONST))
                size->num_pure_calls_on_hot_path++;
              else
                size->num_non_pure_calls_on_hot_path++;
              size->num_branches_on_hot_path ++;
            }
-         else if (gimple_code (stmt) != GIMPLE_CALL
-                  && gimple_code (stmt) != GIMPLE_DEBUG)
+         /* Count inexpensive calls as non-calls, because they will likely
+            expand inline.  */
+         else if (gimple_code (stmt) != GIMPLE_DEBUG)
            size->non_call_stmts_on_hot_path++;
          if (((gimple_code (stmt) == GIMPLE_COND
                && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
-                   || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
+                   || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+                                               loop)))
               || (gimple_code (stmt) == GIMPLE_SWITCH
-                  && !constant_after_peeling (gimple_switch_index (stmt), stmt, loop)))
+                  && !constant_after_peeling (gimple_switch_index (
+                                                as_a <gswitch *> (stmt)),
+                                              stmt, loop)))
              && (!exit || bb != exit->src))
            size->num_branches_on_hot_path++;
        }
@@ -404,7 +421,7 @@ estimated_unrolled_size (struct loop_size *size,
    the same time it does not make any code potentially executed 
    during the last iteration dead.  
 
-   After complette unrolling we still may get rid of the conditional
+   After complete unrolling we still may get rid of the conditional
    on the exit in the last copy even if we have no idea what it does.
    This is quite common case for loops of form
 
@@ -425,7 +442,7 @@ estimated_unrolled_size (struct loop_size *size,
    The other cases are hopefully rare and will be cleaned up later.  */
 
 static edge
-loop_edge_to_cancel (struct loop *loop)
+loop_edge_to_cancel (class loop *loop)
 {
   vec<edge> exits;
   unsigned i;
@@ -479,9 +496,9 @@ loop_edge_to_cancel (struct loop *loop)
    known to not be executed.  */
 
 static bool
-remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
+remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 {
-  struct nb_iter_bound *elt;
+  class nb_iter_bound *elt;
   bool changed = false;
 
   for (elt = loop->bounds; elt; elt = elt->next)
@@ -490,24 +507,24 @@ remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
         into unreachable (or trap when debugging experience is supposed
         to be good).  */
       if (!elt->is_exit
-         && elt->bound.ult (double_int::from_uhwi (npeeled)))
+         && wi::ltu_p (elt->bound, npeeled))
        {
          gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
-         gimple stmt = gimple_build_call
+         gcall *stmt = gimple_build_call
              (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
-
          gimple_set_location (stmt, gimple_location (elt->stmt));
          gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+         split_block (gimple_bb (stmt), stmt);
          changed = true;
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "Forced statement unreachable: ");
-             print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+             print_gimple_stmt (dump_file, elt->stmt, 0);
            }
        }
       /* If we know the exit will be taken after peeling, update.  */
       else if (elt->is_exit
-              && elt->bound.ule (double_int::from_uhwi (npeeled)))
+              && wi::leu_p (elt->bound, npeeled))
        {
          basic_block bb = gimple_bb (elt->stmt);
          edge exit_edge = EDGE_SUCC (bb, 0);
@@ -515,16 +532,18 @@ remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "Forced exit to be taken: ");
-             print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+             print_gimple_stmt (dump_file, elt->stmt, 0);
            }
          if (!loop_exit_edge_p (loop, exit_edge))
            exit_edge = EDGE_SUCC (bb, 1);
+         exit_edge->probability = profile_probability::always ();
          gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
+         gcond *cond_stmt = as_a <gcond *> (elt->stmt);
          if (exit_edge->flags & EDGE_TRUE_VALUE)
-           gimple_cond_make_true (elt->stmt);
+           gimple_cond_make_true (cond_stmt);
          else
-           gimple_cond_make_false (elt->stmt);
-         update_stmt (elt->stmt);
+           gimple_cond_make_false (cond_stmt);
+         update_stmt (cond_stmt);
          changed = true;
        }
     }
@@ -535,9 +554,9 @@ remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
    discovered.  */
 
 static bool
-remove_redundant_iv_tests (struct loop *loop)
+remove_redundant_iv_tests (class loop *loop)
 {
-  struct nb_iter_bound *elt;
+  class nb_iter_bound *elt;
   bool changed = false;
 
   if (!loop->any_upper_bound)
@@ -547,11 +566,11 @@ remove_redundant_iv_tests (struct loop *loop)
       /* Exit is pointless if it won't be taken before loop reaches
         upper bound.  */
       if (elt->is_exit && loop->any_upper_bound
-          && loop->nb_iterations_upper_bound.ult (elt->bound))
+          && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
        {
          basic_block bb = gimple_bb (elt->stmt);
          edge exit_edge = EDGE_SUCC (bb, 0);
-         struct tree_niter_desc niter;
+         class tree_niter_desc niter;
 
          if (!loop_exit_edge_p (loop, exit_edge))
            exit_edge = EDGE_SUCC (bb, 1);
@@ -564,29 +583,34 @@ remove_redundant_iv_tests (struct loop *loop)
              || !integer_zerop (niter.may_be_zero)
              || !niter.niter
              || TREE_CODE (niter.niter) != INTEGER_CST
-             || !loop->nb_iterations_upper_bound.ult
-                  (tree_to_double_int (niter.niter)))
+             || !wi::ltu_p (loop->nb_iterations_upper_bound,
+                            wi::to_widest (niter.niter)))
            continue;
          
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "Removed pointless exit: ");
-             print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+             print_gimple_stmt (dump_file, elt->stmt, 0);
            }
+         gcond *cond_stmt = as_a <gcond *> (elt->stmt);
          if (exit_edge->flags & EDGE_TRUE_VALUE)
-           gimple_cond_make_false (elt->stmt);
+           gimple_cond_make_false (cond_stmt);
          else
-           gimple_cond_make_true (elt->stmt);
-         update_stmt (elt->stmt);
+           gimple_cond_make_true (cond_stmt);
+         update_stmt (cond_stmt);
          changed = true;
        }
     }
   return changed;
 }
 
-/* Stores loops that will be unlooped after we process whole loop tree. */
+/* Stores loops that will be unlooped and edges that will be removed
+   after we process whole loop tree. */
 static vec<loop_p> loops_to_unloop;
 static vec<int> loops_to_unloop_nunroll;
+static vec<edge> edges_to_remove;
+/* Stores loops that has been peeled.  */
+static bitmap peeled_loops;
 
 /* Cancel all fully unrolled loops by putting __builtin_unreachable
    on the latch edge.  
@@ -606,13 +630,13 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
 {
   while (loops_to_unloop.length ())
     {
-      struct loop *loop = loops_to_unloop.pop ();
+      class loop *loop = loops_to_unloop.pop ();
       int n_unroll = loops_to_unloop_nunroll.pop ();
       basic_block latch = loop->latch;
       edge latch_edge = loop_latch_edge (loop);
       int flags = latch_edge->flags;
       location_t locus = latch_edge->goto_locus;
-      gimple stmt;
+      gcall *stmt;
       gimple_stmt_iterator gsi;
 
       remove_exits_and_undefined_stmts (loop, n_unroll);
@@ -624,14 +648,12 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
         it in.  */
       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
-      latch_edge->probability = 0;
-      latch_edge->count = 0;
+      latch_edge->probability = profile_probability::never ();
       latch_edge->flags |= flags;
       latch_edge->goto_locus = locus;
 
-      latch_edge->dest->loop_father = current_loops->tree_root;
-      latch_edge->dest->count = 0;
-      latch_edge->dest->frequency = 0;
+      add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
+      latch_edge->dest->count = profile_count::zero ();
       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 
       gsi = gsi_start_bb (latch_edge->dest);
@@ -639,6 +661,23 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
     }
   loops_to_unloop.release ();
   loops_to_unloop_nunroll.release ();
+
+  /* Remove edges in peeled copies.  Given remove_path removes dominated
+     regions we need to cope with removal of already removed paths.  */
+  unsigned i;
+  edge e;
+  auto_vec<int, 20> src_bbs;
+  src_bbs.reserve_exact (edges_to_remove.length ());
+  FOR_EACH_VEC_ELT (edges_to_remove, i, e)
+    src_bbs.quick_push (e->src->index);
+  FOR_EACH_VEC_ELT (edges_to_remove, i, e)
+    if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
+      {
+       bool ok = remove_path (e, irred_invalidated,
+                              loop_closed_ssa_invalidated);
+       gcc_assert (ok);
+      }
+  edges_to_remove.release ();
 }
 
 /* Tries to unroll LOOP completely, i.e. NITER times.
@@ -650,15 +689,13 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
    a summary of the unroll to the dump file.  */
 
 static bool
-try_unroll_loop_completely (struct loop *loop,
-                           edge exit, tree niter,
+try_unroll_loop_completely (class loop *loop,
+                           edge exit, tree niter, bool may_be_zero,
                            enum unroll_level ul,
                            HOST_WIDE_INT maxiter,
-                           location_t locus)
+                           dump_user_location_t locus, bool allow_peel)
 {
-  unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
-  gimple cond;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
 
@@ -682,18 +719,19 @@ try_unroll_loop_completely (struct loop *loop,
       if (edge_to_cancel == exit)
        edge_to_cancel = EDGE_SUCC (exit->src, 1);
     }
-  /* We do not know the number of iterations and thus we can not eliminate
+  /* We do not know the number of iterations and thus we cannot eliminate
      the EXIT edge.  */
   else
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  if (maxiter >= 0
+  if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+      && maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
       n_unroll = maxiter;
       n_unroll_found = true;
-      /* Loop terminates before the IV variable test, so we can not
+      /* Loop terminates before the IV variable test, so we cannot
         remove it in the last iteration.  */
       edge_to_cancel = NULL;
     }
@@ -701,167 +739,202 @@ try_unroll_loop_completely (struct loop *loop,
   if (!n_unroll_found)
     return false;
 
-  max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
-  if (n_unroll > max_unroll)
-    return false;
+  if (!loop->unroll
+      && n_unroll > (unsigned) param_max_completely_peel_times)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, "Not unrolling loop %d "
+                "(--param max-completely-peel-times limit reached).\n",
+                loop->num);
+      return false;
+    }
 
   if (!edge_to_cancel)
     edge_to_cancel = loop_edge_to_cancel (loop);
 
   if (n_unroll)
     {
-      sbitmap wont_exit;
-      edge e;
-      unsigned i;
-      bool large;
-      vec<edge> to_remove = vNULL;
       if (ul == UL_SINGLE_ITER)
        return false;
 
-      large = tree_estimate_loop_size
-                (loop, exit, edge_to_cancel, &size,
-                 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
        {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-                    loop->num);
-         return false;
+         /* If the unrolling factor is too large, bail out.  */
+         if (n_unroll > (unsigned)loop->unroll)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file,
+                        "Not unrolling loop %d: "
+                        "user didn't want it unrolled completely.\n",
+                        loop->num);
+             return false;
+           }
        }
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
        {
-         fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-         fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-                  (int) unr_insns);
-       }
+         struct loop_size size;
+         /* EXIT can be removed only if we are sure it passes first N_UNROLL
+            iterations.  */
+         bool remove_exit = (exit && niter
+                             && TREE_CODE (niter) == INTEGER_CST
+                             && wi::leu_p (n_unroll, wi::to_widest (niter)));
+         bool large
+           = tree_estimate_loop_size
+               (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+                param_max_completely_peeled_insns);
+         if (large)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+                        loop->num);
+             return false;
+           }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-        on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-         /* If there is IV variable that will become constant, we save
-            one instruction in the loop prologue we do not account
-            otherwise.  */
-         <= ninsns + (size.constant_iv != false))
-       ;
-      /* We unroll only inner loops, because we do not consider it profitable
-        otheriwse.  We still can cancel loopback edge of not rolling loop;
-        this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* Outer loops tend to be less interesting candidates for complette
-        unrolling unless we can do a lot of propagation into the inner loop
-        body.  For now we disable outer loop unrolling when the code would
-        grow.  */
-      else if (loop->inner)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "it is not innermost and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* If there is call on a hot path through the loop, then
-        there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-       {
+         unsigned HOST_WIDE_INT ninsns = size.overall;
+         unsigned HOST_WIDE_INT unr_insns
+           = estimated_unrolled_size (&size, n_unroll);
          if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "contains call and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* If there is pure/const call in the function, then we
-        can still optimize the unrolled loop body if it contains
-        some other interesting code than the calls and code
-        storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-              /* One IV increment, one test, one ivtmp store
-                 and one useful stmt.  That is about minimal loop
-                 doing pure call.  */
-              && (size.non_call_stmts_on_hot_path
-                  <= 3 + size.num_pure_calls_on_hot_path))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "contains just pure calls and code would grow.\n",
-                    loop->num);
-         return false;
+           {
+             fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+             fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+                      (int) unr_insns);
+           }
+
+         /* If the code is going to shrink, we don't need to be extra
+            cautious on guessing if the unrolling is going to be
+            profitable.  */
+         if (unr_insns
+             /* If there is IV variable that will become constant, we
+                save one instruction in the loop prologue we do not
+                account otherwise.  */
+             <= ninsns + (size.constant_iv != false))
+           ;
+         /* We unroll only inner loops, because we do not consider it
+            profitable otheriwse.  We still can cancel loopback edge
+            of not rolling loop; this is always a good idea.  */
+         else if (ul == UL_NO_GROWTH)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* Outer loops tend to be less interesting candidates for
+            complete unrolling unless we can do a lot of propagation
+            into the inner loop body.  For now we disable outer loop
+            unrolling when the code would grow.  */
+         else if (loop->inner)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "it is not innermost and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* If there is call on a hot path through the loop, then
+            there is most probably not much to optimize.  */
+         else if (size.num_non_pure_calls_on_hot_path)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "contains call and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* If there is pure/const call in the function, then we can
+            still optimize the unrolled loop body if it contains some
+            other interesting code than the calls and code storing or
+            cumulating the return value.  */
+         else if (size.num_pure_calls_on_hot_path
+                  /* One IV increment, one test, one ivtmp store and
+                     one useful stmt.  That is about minimal loop
+                     doing pure call.  */
+                  && (size.non_call_stmts_on_hot_path
+                      <= 3 + size.num_pure_calls_on_hot_path))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "contains just pure calls and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* Complete unrolling is major win when control flow is
+            removed and one big basic block is created.  If the loop
+            contains control flow the optimization may still be a win
+            because of eliminating the loop overhead but it also may
+            blow the branch predictor tables.  Limit number of
+            branches on the hot path through the peeled sequence.  */
+         else if (size.num_branches_on_hot_path * (int)n_unroll
+                  > param_max_peel_branches)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "number of branches on hot path in the unrolled "
+                        "sequence reaches --param max-peel-branches limit.\n",
+                        loop->num);
+             return false;
+           }
+         else if (unr_insns
+                  > (unsigned) param_max_completely_peeled_insns)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "number of insns in the unrolled sequence reaches "
+                        "--param max-completely-peeled-insns limit.\n",
+                        loop->num);
+             return false;
+           }
        }
-      /* Complette unrolling is major win when control flow is removed and
-        one big basic block is created.  If the loop contains control flow
-        the optimization may still be a win because of eliminating the loop
-        overhead but it also may blow the branch predictor tables.
-        Limit number of branches on the hot path through the peeled
-        sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-              > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+
+      if (!dbg_cnt (gimple_unroll))
+       return false;
+
+      initialize_original_copy_tables ();
+      auto_sbitmap wont_exit (n_unroll + 1);
+      if (exit && niter
+         && TREE_CODE (niter) == INTEGER_CST
+         && wi::leu_p (n_unroll, wi::to_widest (niter)))
        {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    " number of branches on hot path in the unrolled sequence"
-                    " reach --param max-peel-branches limit.\n",
-                    loop->num);
-         return false;
+         bitmap_ones (wont_exit);
+         if (wi::eq_p (wi::to_widest (niter), n_unroll)
+             || edge_to_cancel)
+           bitmap_clear_bit (wont_exit, 0);
        }
-      else if (unr_insns
-              > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+      else
        {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "(--param max-completely-peeled-insns limit reached).\n",
-                    loop->num);
-         return false;
+         exit = NULL;
+         bitmap_clear (wont_exit);
        }
-
-      initialize_original_copy_tables ();
-      wont_exit = sbitmap_alloc (n_unroll + 1);
-      bitmap_ones (wont_exit);
-      bitmap_clear_bit (wont_exit, 0);
+      if (may_be_zero)
+       bitmap_clear_bit (wont_exit, 1);
 
       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
                                                 n_unroll, wont_exit,
-                                                exit, &to_remove,
+                                                exit, &edges_to_remove,
                                                 DLTHE_FLAG_UPDATE_FREQ
                                                 | DLTHE_FLAG_COMPLETTE_PEEL))
        {
           free_original_copy_tables ();
-         free (wont_exit);
          if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file, "Failed to duplicate the loop\n");
          return false;
        }
 
-      FOR_EACH_VEC_ELT (to_remove, i, e)
-       {
-         bool ok = remove_path (e);
-         gcc_assert (ok);
-       }
-
-      to_remove.release ();
-      free (wont_exit);
       free_original_copy_tables ();
     }
 
-
   /* Remove the conditional from the last copy of the loop.  */
   if (edge_to_cancel)
     {
-      cond = last_stmt (edge_to_cancel->src);
+      gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
+      force_edge_cold (edge_to_cancel, true);
       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
        gimple_cond_make_false (cond);
       else
        gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-        and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path, as doing so may remove outer loop and
+        confuse bookkeeping code in tree_unroll_loops_completely.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -877,11 +950,11 @@ try_unroll_loop_completely (struct loop *loop,
         {
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
                            "loop with %d iterations completely unrolled",
-                          (int) (n_unroll + 1));
-          if (profile_info)
+                          (int) n_unroll);
+          if (loop->header->count.initialized_p ())
             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
                          " (header execution count %d)",
-                         (int)loop->header->count);
+                         (int)loop->header->count.to_gcov_type ());
           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
         }
     }
@@ -901,6 +974,187 @@ try_unroll_loop_completely (struct loop *loop,
   return true;
 }
 
+/* Return number of instructions after peeling.  */
+static unsigned HOST_WIDE_INT
+estimated_peeled_sequence_size (struct loop_size *size,
+                               unsigned HOST_WIDE_INT npeel)
+{
+  return MAX (npeel * (HOST_WIDE_INT) (size->overall
+                                      - size->eliminated_by_peeling), 1);
+}
+
+/* If the loop is expected to iterate N times and is
+   small enough, duplicate the loop body N+1 times before
+   the loop itself.  This way the hot path will never
+   enter the loop.  
+   Parameters are the same as for try_unroll_loops_completely */
+
+static bool
+try_peel_loop (class loop *loop,
+              edge exit, tree niter, bool may_be_zero,
+              HOST_WIDE_INT maxiter)
+{
+  HOST_WIDE_INT npeel;
+  struct loop_size size;
+  int peeled_size;
+
+  if (!flag_peel_loops
+      || param_max_peel_times <= 0
+      || !peeled_loops)
+    return false;
+
+  if (bitmap_bit_p (peeled_loops, loop->num))
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: loop is already peeled\n");
+      return false;
+    }
+
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
+  /* Peel only innermost loops.
+     While the code is perfectly capable of peeling non-innermost loops,
+     the heuristics would probably need some improvements. */
+  if (loop->inner)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not peeling: outer loop\n");
+      return false;
+    }
+
+  if (!optimize_loop_for_speed_p (loop))
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not peeling: cold loop\n");
+      return false;
+    }
+
+  /* Check if there is an estimate on the number of iterations.  */
+  npeel = estimated_loop_iterations_int (loop);
+  if (npeel < 0)
+    npeel = likely_max_loop_iterations_int (loop);
+  if (npeel < 0)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: number of iterations is not "
+                "estimated\n");
+      return false;
+    }
+  if (maxiter >= 0 && maxiter <= npeel)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not peeling: upper bound is known so can "
+                "unroll completely\n");
+      return false;
+    }
+
+  /* We want to peel estimated number of iterations + 1 (so we never
+     enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
+     and be sure to avoid overflows.  */
+  if (npeel > param_max_peel_times - 1)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not peeling: rolls too much "
+                "(%i + 1 > --param max-peel-times)\n", (int) npeel);
+      return false;
+    }
+  npeel++;
+
+  /* Check peeled loops size.  */
+  tree_estimate_loop_size (loop, exit, NULL, &size,
+                          param_max_peeled_insns);
+  if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
+      > param_max_peeled_insns)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+                "(%i insns > --param max-peel-insns)", peeled_size);
+      return false;
+    }
+
+  if (!dbg_cnt (gimple_unroll))
+    return false;
+
+  /* Duplicate possibly eliminating the exits.  */
+  initialize_original_copy_tables ();
+  auto_sbitmap wont_exit (npeel + 1);
+  if (exit && niter
+      && TREE_CODE (niter) == INTEGER_CST
+      && wi::leu_p (npeel, wi::to_widest (niter)))
+    {
+      bitmap_ones (wont_exit);
+      bitmap_clear_bit (wont_exit, 0);
+    }
+  else
+    {
+      exit = NULL;
+      bitmap_clear (wont_exit);
+    }
+  if (may_be_zero)
+    bitmap_clear_bit (wont_exit, 1);
+  if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+                                            npeel, wont_exit,
+                                            exit, &edges_to_remove,
+                                            DLTHE_FLAG_UPDATE_FREQ))
+    {
+      free_original_copy_tables ();
+      return false;
+    }
+  free_original_copy_tables ();
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Peeled loop %d, %i times.\n",
+              loop->num, (int) npeel);
+    }
+  if (loop->any_estimate)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
+        loop->nb_iterations_estimate -= npeel;
+      else
+       loop->nb_iterations_estimate = 0;
+    }
+  if (loop->any_upper_bound)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
+        loop->nb_iterations_upper_bound -= npeel;
+      else
+        loop->nb_iterations_upper_bound = 0;
+    }
+  if (loop->any_likely_upper_bound)
+    {
+      if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
+       loop->nb_iterations_likely_upper_bound -= npeel;
+      else
+       {
+         loop->any_estimate = true;
+         loop->nb_iterations_estimate = 0;
+         loop->nb_iterations_likely_upper_bound = 0;
+       }
+    }
+  profile_count entry_count = profile_count::zero ();
+
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    if (e->src != loop->latch)
+      {
+       if (e->src->count.initialized_p ())
+         entry_count += e->src->count;
+       gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
+      }
+  profile_probability p;
+  p = entry_count.probability_in (loop->header->count);
+  scale_loop_profile (loop, p, 0);
+  bitmap_set_bit (peeled_loops, loop->num);
+  return true;
+}
 /* Adds a canonical induction variable to LOOP if suitable.
    CREATE_IV is true if we may create a new iv.  UL determines
    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
@@ -908,22 +1162,44 @@ try_unroll_loop_completely (struct loop *loop,
    Returns true if cfg is changed.   */
 
 static bool
-canonicalize_loop_induction_variables (struct loop *loop,
+canonicalize_loop_induction_variables (class loop *loop,
                                       bool create_iv, enum unroll_level ul,
-                                      bool try_eval)
+                                      bool try_eval, bool allow_peel)
 {
   edge exit = NULL;
   tree niter;
   HOST_WIDE_INT maxiter;
   bool modified = false;
-  location_t locus = UNKNOWN_LOCATION;
+  dump_user_location_t locus;
+  class tree_niter_desc niter_desc;
+  bool may_be_zero = false;
 
-  niter = number_of_latch_executions (loop);
+  /* For unrolling allow conditional constant or zero iterations, thus
+     perform loop-header copying on-the-fly.  */
   exit = single_exit (loop);
+  niter = chrec_dont_know;
+  if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
+    {
+      niter = niter_desc.niter;
+      may_be_zero
+       = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
+    }
   if (TREE_CODE (niter) == INTEGER_CST)
-    locus = gimple_location (last_stmt (exit->src));
+    locus = last_stmt (exit->src);
   else
     {
+      /* For non-constant niter fold may_be_zero into niter again.  */
+      if (may_be_zero)
+       {
+         if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+           niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
+                                niter_desc.may_be_zero,
+                                build_int_cst (TREE_TYPE (niter), 0), niter);
+         else
+           niter = chrec_dont_know;
+         may_be_zero = false;
+       }
+
       /* If the loop has more than one exit, try checking all of them
         for # of iterations determinable through scev.  */
       if (!exit)
@@ -936,7 +1212,7 @@ canonicalize_loop_induction_variables (struct loop *loop,
        niter = find_loop_niter_by_eval (loop, &exit);
 
       if (exit)
-        locus = gimple_location (last_stmt (exit->src));
+        locus = last_stmt (exit->src);
 
       if (TREE_CODE (niter) != INTEGER_CST)
        exit = NULL;
@@ -946,8 +1222,10 @@ canonicalize_loop_induction_variables (struct loop *loop,
      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
   if (niter && TREE_CODE (niter) == INTEGER_CST)
     {
-      record_niter_bound (loop, tree_to_double_int (niter),
-                         exit == single_likely_exit (loop), true);
+      vec<edge> exits = get_loop_exit_edges  (loop);
+      record_niter_bound (loop, wi::to_widest (niter),
+                         exit == single_likely_exit (loop, exits), true);
+      exits.release ();
     }
 
   /* Force re-computation of loop bounds so we can remove redundant exits.  */
@@ -966,19 +1244,43 @@ canonicalize_loop_induction_variables (struct loop *loop,
       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
               (int)maxiter);
     }
+  if (dump_file && (dump_flags & TDF_DETAILS)
+      && likely_max_loop_iterations_int (loop) >= 0)
+    {
+      fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
+              loop->num, (int)likely_max_loop_iterations_int (loop));
+    }
 
   /* Remove exits that are known to be never taken based on loop bound.
      Needs to be called after compilation of max_loop_iterations_int that
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+  if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
+                                 maxiter, locus, allow_peel))
     return true;
 
   if (create_iv
       && niter && !chrec_contains_undetermined (niter)
       && exit && just_once_each_iteration_p (loop, exit->src))
-    create_canonical_iv (loop, exit, niter);
+    {
+      tree iv_niter = niter;
+      if (may_be_zero)
+       {
+         if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+           iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
+                                   niter_desc.may_be_zero,
+                                   build_int_cst (TREE_TYPE (iv_niter), 0),
+                                   iv_niter);
+         else
+           iv_niter = NULL_TREE;
+       }
+      if (iv_niter)
+       create_canonical_iv (loop, exit, iv_niter);
+    }
+
+  if (ul == UL_ALL)
+    modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
 
   return modified;
 }
@@ -989,19 +1291,18 @@ canonicalize_loop_induction_variables (struct loop *loop,
 unsigned int
 canonicalize_induction_variables (void)
 {
-  struct loop *loop;
+  class loop *loop;
   bool changed = false;
   bool irred_invalidated = false;
   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-  free_numbers_of_iterations_estimates ();
-  estimate_numbers_of_iterations ();
+  estimate_numbers_of_iterations (cfun);
 
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
       changed |= canonicalize_loop_induction_variables (loop,
                                                        true, UL_SINGLE_ITER,
-                                                       true);
+                                                       true, false);
     }
   gcc_assert (!need_ssa_update_p (cfun));
 
@@ -1012,6 +1313,7 @@ canonicalize_induction_variables (void)
 
   /* Clean up the information about numbers of iterations, since brute force
      evaluation could reveal new information.  */
+  free_numbers_of_iterations_estimates (cfun);
   scev_reset ();
 
   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
@@ -1026,106 +1328,53 @@ canonicalize_induction_variables (void)
   return 0;
 }
 
-/* Propagate VAL into all uses of SSA_NAME.  */
-
-static void
-propagate_into_all_uses (tree ssa_name, tree val)
-{
-  imm_use_iterator iter;
-  gimple use_stmt;
-
-  FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name)
-    {
-      gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt);
-      use_operand_p use;
-
-      FOR_EACH_IMM_USE_ON_STMT (use, iter)
-       SET_USE (use, val);
-
-      if (is_gimple_assign (use_stmt)
-         && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt))
-            == GIMPLE_SINGLE_RHS)
-       {
-         tree rhs = gimple_assign_rhs1 (use_stmt);
-
-         if (TREE_CODE (rhs) == ADDR_EXPR)
-           recompute_tree_invariant_for_addr_expr (rhs);
-       }
-
-      fold_stmt_inplace (&use_stmt_gsi);
-      update_stmt (use_stmt);
-      maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt);
-    }
-}
-
-/* Propagate constant SSA_NAMEs defined in basic block BB.  */
-
-static void
-propagate_constants_for_unrolling (basic_block bb)
-{
-  gimple_stmt_iterator gsi;
-
-  /* Look for degenerate PHI nodes with constant argument.  */
-  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
-    {
-      gimple phi = gsi_stmt (gsi);
-      tree result = gimple_phi_result (phi);
-      tree arg = gimple_phi_arg_def (phi, 0);
-
-      if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST)
-       {
-         propagate_into_all_uses (result, arg);
-         gsi_remove (&gsi, true);
-         release_ssa_name (result);
-       }
-      else
-       gsi_next (&gsi);
-    }
-
-  /* Look for assignments to SSA names with constant RHS.  */
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
-    {
-      gimple stmt = gsi_stmt (gsi);
-      tree lhs;
-
-      if (is_gimple_assign (stmt)
-         && gimple_assign_rhs_code (stmt) == INTEGER_CST
-         && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
-         && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
-       {
-         propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt));
-         gsi_remove (&gsi, true);
-         release_ssa_name (lhs);
-       }
-      else
-       gsi_next (&gsi);
-    }
-}
-
 /* Process loops from innermost to outer, stopping at the innermost
    loop we unrolled.  */
 
 static bool
 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
-                               vec<loop_p, va_heap>& father_stack,
-                               struct loop *loop)
+                               bitmap father_bbs, class loop *loop)
 {
-  struct loop *loop_father;
+  class loop *loop_father;
   bool changed = false;
-  struct loop *inner;
+  class loop *inner;
   enum unroll_level ul;
+  unsigned num = number_of_loops (cfun);
 
-  /* Process inner loops first.  */
+  /* Process inner loops first.  Don't walk loops added by the recursive
+     calls because SSA form is not up-to-date.  They can be handled in the
+     next iteration.  */
+  bitmap child_father_bbs = NULL;
   for (inner = loop->inner; inner != NULL; inner = inner->next)
-    changed |= tree_unroll_loops_completely_1 (may_increase_size,
-                                              unroll_outer, father_stack,
-                                              inner);
+    if ((unsigned) inner->num < num)
+      {
+       if (!child_father_bbs)
+         child_father_bbs = BITMAP_ALLOC (NULL);
+       if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
+                                           child_father_bbs, inner))
+         {
+           bitmap_ior_into (father_bbs, child_father_bbs);
+           bitmap_clear (child_father_bbs);
+           changed = true;
+         }
+      }
+  if (child_father_bbs)
+    BITMAP_FREE (child_father_bbs);
+
   /* If we changed an inner loop we cannot process outer loops in this
      iteration because SSA form is not up-to-date.  Continue with
      siblings of outer loops instead.  */
   if (changed)
-    return true;
+    {
+      /* If we are recorded as father clear all other fathers that
+         are necessarily covered already to avoid redundant work.  */
+      if (bitmap_bit_p (father_bbs, loop->header->index))
+       {
+         bitmap_clear (father_bbs);
+         bitmap_set_bit (father_bbs, loop->header->index);
+       }
+      return true;
+    }
 
   /* Don't unroll #pragma omp simd loops until the vectorizer
      attempts to vectorize those.  */
@@ -1137,7 +1386,9 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
         not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1146,16 +1397,19 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
     ul = UL_NO_GROWTH;
 
   if (canonicalize_loop_induction_variables
-        (loop, false, ul, !flag_tree_loop_ivcanon))
+        (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
     {
       /* If we'll continue unrolling, we need to propagate constants
         within the new basic blocks to fold away induction variable
         computations; otherwise, the size might blow up before the
         iteration is complete and the IR eventually cleaned up.  */
-      if (loop_outer (loop_father) && !loop_father->aux)
+      if (loop_outer (loop_father))
        {
-         father_stack.safe_push (loop_father);
-         loop_father->aux = loop_father;
+         /* Once we process our father we will have processed
+            the fathers of our children as well, so avoid doing
+            redundant work and clear fathers we've gathered sofar.  */
+         bitmap_clear (father_bbs);
+         bitmap_set_bit (father_bbs, loop_father->header->index);
        }
 
       return true;
@@ -1168,14 +1422,16 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
    MAY_INCREASE_SIZE is true, perform the unrolling only if the
    size of the code does not increase.  */
 
-unsigned int
+static unsigned int
 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 {
-  auto_vec<loop_p, 16> father_stack;
+  bitmap father_bbs = BITMAP_ALLOC (NULL);
   bool changed;
   int iteration = 0;
   bool irred_invalidated = false;
 
+  estimate_numbers_of_iterations (cfun);
+
   do
     {
       changed = false;
@@ -1184,27 +1440,19 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
        loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-      free_numbers_of_iterations_estimates ();
-      estimate_numbers_of_iterations ();
+      free_numbers_of_iterations_estimates (cfun);
+      estimate_numbers_of_iterations (cfun);
 
       changed = tree_unroll_loops_completely_1 (may_increase_size,
-                                               unroll_outer, father_stack,
+                                               unroll_outer, father_bbs,
                                                current_loops->tree_root);
       if (changed)
        {
-         struct loop **iter;
          unsigned i;
 
-         /* Be sure to skip unlooped loops while procesing father_stack
-            array.  */
-         FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
-           (*iter)->aux = NULL;
-         FOR_EACH_VEC_ELT (father_stack, i, iter)
-           if (!(*iter)->aux)
-             *iter = NULL;
           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
 
-         /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
+         /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
          if (loop_closed_ssa_invalidated
              && !bitmap_empty_p (loop_closed_ssa_invalidated))
             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
@@ -1212,18 +1460,34 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
          else
            update_ssa (TODO_update_ssa);
 
+         /* father_bbs is a bitmap of loop father header BB indices.
+            Translate that to what non-root loops these BBs belong to now.  */
+         bitmap_iterator bi;
+         bitmap fathers = BITMAP_ALLOC (NULL);
+         EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
+           {
+             basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
+             if (! unrolled_loop_bb)
+               continue;
+             if (loop_outer (unrolled_loop_bb->loop_father))
+               bitmap_set_bit (fathers,
+                               unrolled_loop_bb->loop_father->num);
+           }
+         bitmap_clear (father_bbs);
          /* Propagate the constants within the new basic blocks.  */
-         FOR_EACH_VEC_ELT (father_stack, i, iter)
-           if (*iter)
-             {
-               unsigned j;
-               basic_block *body = get_loop_body_in_dom_order (*iter);
-               for (j = 0; j < (*iter)->num_nodes; j++)
-                 propagate_constants_for_unrolling (body[j]);
-               free (body);
-               (*iter)->aux = NULL;
-             }
-         father_stack.truncate (0);
+         EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
+           {
+             loop_p father = get_loop (cfun, i);
+             bitmap exit_bbs = BITMAP_ALLOC (NULL);
+             loop_exit *exit = father->exits->next;
+             while (exit->e)
+               {
+                 bitmap_set_bit (exit_bbs, exit->e->dest->index);
+                 exit = exit->next;
+               }
+             do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
+           }
+         BITMAP_FREE (fathers);
 
          /* This will take care of removing completely unrolled loops
             from the loop structures so we can continue unrolling now
@@ -1234,18 +1498,16 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
          /* Clean up the information about numbers of iterations, since
             complete unrolling might have invalidated it.  */
          scev_reset ();
-#ifdef ENABLE_CHECKING
-         if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
+         if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
            verify_loop_closed_ssa (true);
-#endif
        }
       if (loop_closed_ssa_invalidated)
         BITMAP_FREE (loop_closed_ssa_invalidated);
     }
   while (changed
-        && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
+        && ++iteration <= param_max_unroll_iterations);
 
-  father_stack.release ();
+  BITMAP_FREE (father_bbs);
 
   if (irred_invalidated
       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
@@ -1263,7 +1525,6 @@ const pass_data pass_data_iv_canon =
   GIMPLE_PASS, /* type */
   "ivcanon", /* name */
   OPTGROUP_LOOP, /* optinfo_flags */
-  true, /* has_execute */
   TV_TREE_LOOP_IVCANON, /* tv_id */
   ( PROP_cfg | PROP_ssa ), /* properties_required */
   0, /* properties_provided */
@@ -1311,7 +1572,6 @@ const pass_data pass_data_complete_unroll =
   GIMPLE_PASS, /* type */
   "cunroll", /* name */
   OPTGROUP_LOOP, /* optinfo_flags */
-  true, /* has_execute */
   TV_COMPLETE_UNROLL, /* tv_id */
   ( PROP_cfg | PROP_ssa ), /* properties_required */
   0, /* properties_provided */
@@ -1338,9 +1598,20 @@ pass_complete_unroll::execute (function *fun)
   if (number_of_loops (fun) <= 1)
     return 0;
 
-  return tree_unroll_loops_completely (flag_unroll_loops
-                                      || flag_peel_loops
-                                      || optimize >= 3, true);
+  /* If we ever decide to run loop peeling more than once, we will need to
+     track loops already peeled in loop structures themselves to avoid
+     re-peeling the same loop multiple times.  */
+  if (flag_peel_loops)
+    peeled_loops = BITMAP_ALLOC (NULL);
+  unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
+                                                  || flag_peel_loops
+                                                  || optimize >= 3, true);
+  if (peeled_loops)
+    {
+      BITMAP_FREE (peeled_loops);
+      peeled_loops = NULL;
+    }
+  return val;
 }
 
 } // anon namespace
@@ -1360,7 +1631,6 @@ const pass_data pass_data_complete_unrolli =
   GIMPLE_PASS, /* type */
   "cunrolli", /* name */
   OPTGROUP_LOOP, /* optinfo_flags */
-  true, /* has_execute */
   TV_COMPLETE_UNROLL, /* tv_id */
   ( PROP_cfg | PROP_ssa ), /* properties_required */
   0, /* properties_provided */
@@ -1387,13 +1657,11 @@ pass_complete_unrolli::execute (function *fun)
 {
   unsigned ret = 0;
 
-  loop_optimizer_init (LOOPS_NORMAL
-                      | LOOPS_HAVE_RECORDED_EXITS);
+  loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
   if (number_of_loops (fun) > 1)
     {
       scev_initialize ();
       ret = tree_unroll_loops_completely (optimize >= 3, false);
-      free_numbers_of_iterations_estimates ();
       scev_finalize ();
     }
   loop_optimizer_finalize ();