]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/tree-ssa-loop-ivcanon.c
Optimize ODR enum streaming
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
index 319a4106930ab0b31bd340f38d29085175ae7fd8..8ab6ab3330c5f7302ffddd7fc47c7b20fbed77fc 100644 (file)
@@ -1,5 +1,5 @@
 /* Induction variable canonicalization and loop peeling.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2020 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -59,10 +59,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "tree-chrec.h"
 #include "tree-scalar-evolution.h"
-#include "params.h"
 #include "tree-inline.h"
 #include "tree-cfgcleanup.h"
 #include "builtins.h"
+#include "tree-ssa-sccvn.h"
+#include "dbgcnt.h"
 
 /* Specifies types of loops that may be unrolled.  */
 
@@ -76,10 +77,13 @@ enum unroll_level
 };
 
 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
-   is the exit edge whose condition is replaced.  */
+   is the exit edge whose condition is replaced.  The ssa versions of the new
+   IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
+   if they are not NULL.  */
 
-static void
-create_canonical_iv (struct loop *loop, edge exit, tree niter)
+void
+create_canonical_iv (class loop *loop, edge exit, tree niter,
+                    tree *var_before = NULL, tree *var_after = NULL)
 {
   edge in;
   tree type, var;
@@ -112,7 +116,9 @@ create_canonical_iv (struct loop *loop, edge exit, tree niter)
   create_iv (niter,
             build_int_cst (type, -1),
             NULL_TREE, loop,
-            &incr_at, false, NULL, &var);
+            &incr_at, false, var_before, &var);
+  if (var_after)
+    *var_after = var;
 
   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
   gimple_cond_set_code (cond, cmp);
@@ -155,11 +161,9 @@ struct loop_size
 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 
 static bool
-constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
+constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 {
-  affine_iv iv;
-
-  if (is_gimple_min_invariant (op))
+  if (CONSTANT_CLASS_P (op))
     return true;
 
   /* We can still fold accesses to constant arrays when index is known.  */
@@ -188,12 +192,12 @@ constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
       return false;
     }
 
-  /* Induction variables are constants.  */
-  if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
-    return false;
-  if (!is_gimple_min_invariant (iv.base))
+  /* Induction variables are constants when defined in loop.  */
+  if (loop_containing_stmt (stmt) != loop)
     return false;
-  if (!is_gimple_min_invariant (iv.step))
+  tree ev = analyze_scalar_evolution (loop, op);
+  if (chrec_contains_undetermined (ev)
+      || chrec_contains_symbols (ev))
     return false;
   return true;
 }
@@ -207,8 +211,8 @@ constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 
 static bool
-tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
-                        int upper_bound)
+tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
+                        struct loop_size *size, int upper_bound)
 {
   basic_block *body = get_loop_body (loop);
   gimple_stmt_iterator gsi;
@@ -236,7 +240,8 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
       else
        after_exit = false;
       if (dump_file && (dump_flags & TDF_DETAILS))
-       fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
+       fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
+                after_exit);
 
       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
        {
@@ -249,70 +254,79 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "  size: %3i ", num);
-             print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
+             print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
            }
 
          /* Look for reasons why we might optimize this stmt away. */
 
-         if (gimple_has_side_effects (stmt))
-           ;
-         /* Exit conditional.  */
-         else if (exit && body[i] == exit->src
-                  && stmt == last_stmt (exit->src))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Exit condition will be eliminated "
-                        "in peeled copies.\n");
-             likely_eliminated_peeled = true;
-           }
-         else if (edge_to_cancel && body[i] == edge_to_cancel->src
-                  && stmt == last_stmt (edge_to_cancel->src))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Exit condition will be eliminated "
-                        "in last copy.\n");
-             likely_eliminated_last = true;
-           }
-         /* Sets of IV variables  */
-         else if (gimple_code (stmt) == GIMPLE_ASSIGN
-             && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
+         if (!gimple_has_side_effects (stmt))
            {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Induction variable computation will"
-                        " be folded away.\n");
-             likely_eliminated = true;
-           }
-         /* Assignments of IV variables.  */
-         else if (gimple_code (stmt) == GIMPLE_ASSIGN
-                  && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
-                  && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
-                  && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
-                      || constant_after_peeling (gimple_assign_rhs2 (stmt),
-                                                 stmt, loop)))
-           {
-             size->constant_iv = true;
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Constant expression will be folded away.\n");
-             likely_eliminated = true;
-           }
-         /* Conditionals.  */
-         else if ((gimple_code (stmt) == GIMPLE_COND
-                   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
-                   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)
-                   /* We don't simplify all constant compares so make sure
-                      they are not both constant already.  See PR70288.  */
-                   && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
-                       || ! is_gimple_min_invariant (gimple_cond_rhs (stmt))))
-                  || (gimple_code (stmt) == GIMPLE_SWITCH
-                      && constant_after_peeling (gimple_switch_index (
-                                                   as_a <gswitch *> (stmt)),
+             /* Exit conditional.  */
+             if (exit && body[i] == exit->src
+                 && stmt == last_stmt (exit->src))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Exit condition will be eliminated "
+                            "in peeled copies.\n");
+                 likely_eliminated_peeled = true;
+               }
+             if (edge_to_cancel && body[i] == edge_to_cancel->src
+                 && stmt == last_stmt (edge_to_cancel->src))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Exit condition will be eliminated "
+                            "in last copy.\n");
+                 likely_eliminated_last = true;
+               }
+             /* Sets of IV variables  */
+             if (gimple_code (stmt) == GIMPLE_ASSIGN
+                 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Induction variable computation will"
+                            " be folded away.\n");
+                 likely_eliminated = true;
+               }
+             /* Assignments of IV variables.  */
+             else if (gimple_code (stmt) == GIMPLE_ASSIGN
+                      && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
+                      && constant_after_peeling (gimple_assign_rhs1 (stmt),
                                                  stmt, loop)
-                      && ! is_gimple_min_invariant (gimple_switch_index (
-                                                      as_a <gswitch *> (stmt)))))
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "   Constant conditional.\n");
-             likely_eliminated = true;
+                      && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
+                          || constant_after_peeling (gimple_assign_rhs2 (stmt),
+                                                     stmt, loop))
+                      && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
+               {
+                 size->constant_iv = true;
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file,
+                            "   Constant expression will be folded away.\n");
+                 likely_eliminated = true;
+               }
+             /* Conditionals.  */
+             else if ((gimple_code (stmt) == GIMPLE_COND
+                       && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
+                                                  loop)
+                       && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+                                                  loop)
+                       /* We don't simplify all constant compares so make sure
+                          they are not both constant already.  See PR70288.  */
+                       && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
+                           || ! is_gimple_min_invariant
+                                (gimple_cond_rhs (stmt))))
+                      || (gimple_code (stmt) == GIMPLE_SWITCH
+                          && constant_after_peeling (gimple_switch_index (
+                                                       as_a <gswitch *>
+                                                         (stmt)),
+                                                     stmt, loop)
+                          && ! is_gimple_min_invariant
+                                  (gimple_switch_index
+                                     (as_a <gswitch *> (stmt)))))
+               {
+                 if (dump_file && (dump_flags & TDF_DETAILS))
+                   fprintf (dump_file, "   Constant conditional.\n");
+                 likely_eliminated = true;
+               }
            }
 
          size->overall += num;
@@ -339,26 +353,24 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
        {
          gimple *stmt = gsi_stmt (gsi);
-         if (gimple_code (stmt) == GIMPLE_CALL)
+         if (gimple_code (stmt) == GIMPLE_CALL
+             && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
            {
              int flags = gimple_call_flags (stmt);
-             tree decl = gimple_call_fndecl (stmt);
-
-             if (decl && DECL_IS_BUILTIN (decl)
-                 && is_inexpensive_builtin (decl))
-               ;
-             else if (flags & (ECF_PURE | ECF_CONST))
+             if (flags & (ECF_PURE | ECF_CONST))
                size->num_pure_calls_on_hot_path++;
              else
                size->num_non_pure_calls_on_hot_path++;
              size->num_branches_on_hot_path ++;
            }
-         else if (gimple_code (stmt) != GIMPLE_CALL
-                  && gimple_code (stmt) != GIMPLE_DEBUG)
+         /* Count inexpensive calls as non-calls, because they will likely
+            expand inline.  */
+         else if (gimple_code (stmt) != GIMPLE_DEBUG)
            size->non_call_stmts_on_hot_path++;
          if (((gimple_code (stmt) == GIMPLE_COND
                && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
-                   || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
+                   || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
+                                               loop)))
               || (gimple_code (stmt) == GIMPLE_SWITCH
                   && !constant_after_peeling (gimple_switch_index (
                                                 as_a <gswitch *> (stmt)),
@@ -430,7 +442,7 @@ estimated_unrolled_size (struct loop_size *size,
    The other cases are hopefully rare and will be cleaned up later.  */
 
 static edge
-loop_edge_to_cancel (struct loop *loop)
+loop_edge_to_cancel (class loop *loop)
 {
   vec<edge> exits;
   unsigned i;
@@ -484,9 +496,9 @@ loop_edge_to_cancel (struct loop *loop)
    known to not be executed.  */
 
 static bool
-remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
+remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 {
-  struct nb_iter_bound *elt;
+  class nb_iter_bound *elt;
   bool changed = false;
 
   for (elt = loop->bounds; elt; elt = elt->next)
@@ -507,7 +519,7 @@ remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "Forced statement unreachable: ");
-             print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+             print_gimple_stmt (dump_file, elt->stmt, 0);
            }
        }
       /* If we know the exit will be taken after peeling, update.  */
@@ -520,10 +532,11 @@ remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "Forced exit to be taken: ");
-             print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+             print_gimple_stmt (dump_file, elt->stmt, 0);
            }
          if (!loop_exit_edge_p (loop, exit_edge))
            exit_edge = EDGE_SUCC (bb, 1);
+         exit_edge->probability = profile_probability::always ();
          gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
          gcond *cond_stmt = as_a <gcond *> (elt->stmt);
          if (exit_edge->flags & EDGE_TRUE_VALUE)
@@ -541,9 +554,9 @@ remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
    discovered.  */
 
 static bool
-remove_redundant_iv_tests (struct loop *loop)
+remove_redundant_iv_tests (class loop *loop)
 {
-  struct nb_iter_bound *elt;
+  class nb_iter_bound *elt;
   bool changed = false;
 
   if (!loop->any_upper_bound)
@@ -557,7 +570,7 @@ remove_redundant_iv_tests (struct loop *loop)
        {
          basic_block bb = gimple_bb (elt->stmt);
          edge exit_edge = EDGE_SUCC (bb, 0);
-         struct tree_niter_desc niter;
+         class tree_niter_desc niter;
 
          if (!loop_exit_edge_p (loop, exit_edge))
            exit_edge = EDGE_SUCC (bb, 1);
@@ -577,7 +590,7 @@ remove_redundant_iv_tests (struct loop *loop)
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
              fprintf (dump_file, "Removed pointless exit: ");
-             print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+             print_gimple_stmt (dump_file, elt->stmt, 0);
            }
          gcond *cond_stmt = as_a <gcond *> (elt->stmt);
          if (exit_edge->flags & EDGE_TRUE_VALUE)
@@ -591,9 +604,11 @@ remove_redundant_iv_tests (struct loop *loop)
   return changed;
 }
 
-/* Stores loops that will be unlooped after we process whole loop tree. */
+/* Stores loops that will be unlooped and edges that will be removed
+   after we process whole loop tree. */
 static vec<loop_p> loops_to_unloop;
 static vec<int> loops_to_unloop_nunroll;
+static vec<edge> edges_to_remove;
 /* Stores loops that has been peeled.  */
 static bitmap peeled_loops;
 
@@ -615,7 +630,7 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
 {
   while (loops_to_unloop.length ())
     {
-      struct loop *loop = loops_to_unloop.pop ();
+      class loop *loop = loops_to_unloop.pop ();
       int n_unroll = loops_to_unloop_nunroll.pop ();
       basic_block latch = loop->latch;
       edge latch_edge = loop_latch_edge (loop);
@@ -633,14 +648,12 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
         it in.  */
       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
-      latch_edge->probability = 0;
-      latch_edge->count = 0;
+      latch_edge->probability = profile_probability::never ();
       latch_edge->flags |= flags;
       latch_edge->goto_locus = locus;
 
-      latch_edge->dest->loop_father = current_loops->tree_root;
-      latch_edge->dest->count = 0;
-      latch_edge->dest->frequency = 0;
+      add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
+      latch_edge->dest->count = profile_count::zero ();
       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 
       gsi = gsi_start_bb (latch_edge->dest);
@@ -648,6 +661,23 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
     }
   loops_to_unloop.release ();
   loops_to_unloop_nunroll.release ();
+
+  /* Remove edges in peeled copies.  Given remove_path removes dominated
+     regions we need to cope with removal of already removed paths.  */
+  unsigned i;
+  edge e;
+  auto_vec<int, 20> src_bbs;
+  src_bbs.reserve_exact (edges_to_remove.length ());
+  FOR_EACH_VEC_ELT (edges_to_remove, i, e)
+    src_bbs.quick_push (e->src->index);
+  FOR_EACH_VEC_ELT (edges_to_remove, i, e)
+    if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
+      {
+       bool ok = remove_path (e, irred_invalidated,
+                              loop_closed_ssa_invalidated);
+       gcc_assert (ok);
+      }
+  edges_to_remove.release ();
 }
 
 /* Tries to unroll LOOP completely, i.e. NITER times.
@@ -659,17 +689,15 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
    a summary of the unroll to the dump file.  */
 
 static bool
-try_unroll_loop_completely (struct loop *loop,
-                           edge exit, tree niter,
+try_unroll_loop_completely (class loop *loop,
+                           edge exit, tree niter, bool may_be_zero,
                            enum unroll_level ul,
                            HOST_WIDE_INT maxiter,
-                           location_t locus)
+                           dump_user_location_t locus, bool allow_peel)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
-  int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 
   /* See if we proved number of iterations to be low constant.
 
@@ -691,18 +719,19 @@ try_unroll_loop_completely (struct loop *loop,
       if (edge_to_cancel == exit)
        edge_to_cancel = EDGE_SUCC (exit->src, 1);
     }
-  /* We do not know the number of iterations and thus we can not eliminate
+  /* We do not know the number of iterations and thus we cannot eliminate
      the EXIT edge.  */
   else
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  if (maxiter >= 0
+  if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+      && maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
       n_unroll = maxiter;
       n_unroll_found = true;
-      /* Loop terminates before the IV variable test, so we can not
+      /* Loop terminates before the IV variable test, so we cannot
         remove it in the last iteration.  */
       edge_to_cancel = NULL;
     }
@@ -710,7 +739,8 @@ try_unroll_loop_completely (struct loop *loop,
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (!loop->unroll
+      && n_unroll > (unsigned) param_max_completely_peel_times)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "Not unrolling loop %d "
@@ -724,127 +754,143 @@ try_unroll_loop_completely (struct loop *loop,
 
   if (n_unroll)
     {
-      sbitmap wont_exit;
-      edge e;
-      unsigned i;
-      bool large;
-      vec<edge> to_remove = vNULL;
       if (ul == UL_SINGLE_ITER)
        return false;
 
-      /* EXIT can be removed only if we are sure it passes first N_UNROLL
-        iterations.  */
-      bool remove_exit = (exit && niter
-                         && TREE_CODE (niter) == INTEGER_CST
-                         && wi::leu_p (n_unroll, wi::to_widest (niter)));
-
-      large = tree_estimate_loop_size
-                (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
-                 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
        {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-                    loop->num);
-         return false;
+         /* If the unrolling factor is too large, bail out.  */
+         if (n_unroll > (unsigned)loop->unroll)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file,
+                        "Not unrolling loop %d: "
+                        "user didn't want it unrolled completely.\n",
+                        loop->num);
+             return false;
+           }
        }
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
        {
-         fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-         fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-                  (int) unr_insns);
-       }
+         struct loop_size size;
+         /* EXIT can be removed only if we are sure it passes first N_UNROLL
+            iterations.  */
+         bool remove_exit = (exit && niter
+                             && TREE_CODE (niter) == INTEGER_CST
+                             && wi::leu_p (n_unroll, wi::to_widest (niter)));
+         bool large
+           = tree_estimate_loop_size
+               (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+                param_max_completely_peeled_insns);
+         if (large)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+                        loop->num);
+             return false;
+           }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-        on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-         /* If there is IV variable that will become constant, we save
-            one instruction in the loop prologue we do not account
-            otherwise.  */
-         <= ninsns + (size.constant_iv != false))
-       ;
-      /* We unroll only inner loops, because we do not consider it profitable
-        otheriwse.  We still can cancel loopback edge of not rolling loop;
-        this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-       {
+         unsigned HOST_WIDE_INT ninsns = size.overall;
+         unsigned HOST_WIDE_INT unr_insns
+           = estimated_unrolled_size (&size, n_unroll);
          if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* Outer loops tend to be less interesting candidates for complete
-        unrolling unless we can do a lot of propagation into the inner loop
-        body.  For now we disable outer loop unrolling when the code would
-        grow.  */
-      else if (loop->inner)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "it is not innermost and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* If there is call on a hot path through the loop, then
-        there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "contains call and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* If there is pure/const call in the function, then we
-        can still optimize the unrolled loop body if it contains
-        some other interesting code than the calls and code
-        storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-              /* One IV increment, one test, one ivtmp store
-                 and one useful stmt.  That is about minimal loop
-                 doing pure call.  */
-              && (size.non_call_stmts_on_hot_path
-                  <= 3 + size.num_pure_calls_on_hot_path))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "contains just pure calls and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* Complete unrolling is a major win when control flow is removed and
-        one big basic block is created.  If the loop contains control flow
-        the optimization may still be a win because of eliminating the loop
-        overhead but it also may blow the branch predictor tables.
-        Limit number of branches on the hot path through the peeled
-        sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-              > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    " number of branches on hot path in the unrolled sequence"
-                    " reach --param max-peel-branches limit.\n",
-                    loop->num);
-         return false;
-       }
-      else if (unr_insns
-              > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "(--param max-completely-peeled-insns limit reached).\n",
-                    loop->num);
-         return false;
+           {
+             fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+             fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+                      (int) unr_insns);
+           }
+
+         /* If the code is going to shrink, we don't need to be extra
+            cautious on guessing if the unrolling is going to be
+            profitable.  */
+         if (unr_insns
+             /* If there is IV variable that will become constant, we
+                save one instruction in the loop prologue we do not
+                account otherwise.  */
+             <= ninsns + (size.constant_iv != false))
+           ;
+         /* We unroll only inner loops, because we do not consider it
+            profitable otheriwse.  We still can cancel loopback edge
+            of not rolling loop; this is always a good idea.  */
+         else if (ul == UL_NO_GROWTH)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* Outer loops tend to be less interesting candidates for
+            complete unrolling unless we can do a lot of propagation
+            into the inner loop body.  For now we disable outer loop
+            unrolling when the code would grow.  */
+         else if (loop->inner)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "it is not innermost and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* If there is call on a hot path through the loop, then
+            there is most probably not much to optimize.  */
+         else if (size.num_non_pure_calls_on_hot_path)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "contains call and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* If there is pure/const call in the function, then we can
+            still optimize the unrolled loop body if it contains some
+            other interesting code than the calls and code storing or
+            cumulating the return value.  */
+         else if (size.num_pure_calls_on_hot_path
+                  /* One IV increment, one test, one ivtmp store and
+                     one useful stmt.  That is about minimal loop
+                     doing pure call.  */
+                  && (size.non_call_stmts_on_hot_path
+                      <= 3 + size.num_pure_calls_on_hot_path))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "contains just pure calls and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* Complete unrolling is major win when control flow is
+            removed and one big basic block is created.  If the loop
+            contains control flow the optimization may still be a win
+            because of eliminating the loop overhead but it also may
+            blow the branch predictor tables.  Limit number of
+            branches on the hot path through the peeled sequence.  */
+         else if (size.num_branches_on_hot_path * (int)n_unroll
+                  > param_max_peel_branches)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "number of branches on hot path in the unrolled "
+                        "sequence reaches --param max-peel-branches limit.\n",
+                        loop->num);
+             return false;
+           }
+         else if (unr_insns
+                  > (unsigned) param_max_completely_peeled_insns)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "number of insns in the unrolled sequence reaches "
+                        "--param max-completely-peeled-insns limit.\n",
+                        loop->num);
+             return false;
+           }
        }
-      dump_printf_loc (report_flags, locus,
-                       "loop turned into non-loop; it never loops.\n");
+
+      if (!dbg_cnt (gimple_unroll))
+       return false;
 
       initialize_original_copy_tables ();
-      wont_exit = sbitmap_alloc (n_unroll + 1);
+      auto_sbitmap wont_exit (n_unroll + 1);
       if (exit && niter
          && TREE_CODE (niter) == INTEGER_CST
          && wi::leu_p (n_unroll, wi::to_widest (niter)))
@@ -859,32 +905,24 @@ try_unroll_loop_completely (struct loop *loop,
          exit = NULL;
          bitmap_clear (wont_exit);
        }
+      if (may_be_zero)
+       bitmap_clear_bit (wont_exit, 1);
 
       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
                                                 n_unroll, wont_exit,
-                                                exit, &to_remove,
+                                                exit, &edges_to_remove,
                                                 DLTHE_FLAG_UPDATE_FREQ
                                                 | DLTHE_FLAG_COMPLETTE_PEEL))
        {
           free_original_copy_tables ();
-         free (wont_exit);
          if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file, "Failed to duplicate the loop\n");
          return false;
        }
 
-      FOR_EACH_VEC_ELT (to_remove, i, e)
-       {
-         bool ok = remove_path (e);
-         gcc_assert (ok);
-       }
-
-      to_remove.release ();
-      free (wont_exit);
       free_original_copy_tables ();
     }
 
-
   /* Remove the conditional from the last copy of the loop.  */
   if (edge_to_cancel)
     {
@@ -895,8 +933,8 @@ try_unroll_loop_completely (struct loop *loop,
       else
        gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-        and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path, as doing so may remove outer loop and
+        confuse bookkeeping code in tree_unroll_loops_completely.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -912,11 +950,11 @@ try_unroll_loop_completely (struct loop *loop,
         {
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
                            "loop with %d iterations completely unrolled",
-                          (int) (n_unroll + 1));
-          if (profile_info)
+                          (int) n_unroll);
+          if (loop->header->count.initialized_p ())
             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
                          " (header execution count %d)",
-                         (int)loop->header->count);
+                         (int)loop->header->count.to_gcov_type ());
           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
         }
     }
@@ -952,19 +990,16 @@ estimated_peeled_sequence_size (struct loop_size *size,
    Parameters are the same as for try_unroll_loops_completely */
 
 static bool
-try_peel_loop (struct loop *loop,
-              edge exit, tree niter,
+try_peel_loop (class loop *loop,
+              edge exit, tree niter, bool may_be_zero,
               HOST_WIDE_INT maxiter)
 {
   HOST_WIDE_INT npeel;
   struct loop_size size;
   int peeled_size;
-  sbitmap wont_exit;
-  unsigned i;
-  vec<edge> to_remove = vNULL;
-  edge e;
 
-  if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
+  if (!flag_peel_loops
+      || param_max_peel_times <= 0
       || !peeled_loops)
     return false;
 
@@ -975,20 +1010,29 @@ try_peel_loop (struct loop *loop,
       return false;
     }
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.
      While the code is perfectly capable of peeling non-innermost loops,
      the heuristics would probably need some improvements. */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+       fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+       fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
@@ -1006,7 +1050,7 @@ try_peel_loop (struct loop *loop,
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+       fprintf (dump_file, "Not peeling: upper bound is known so can "
                 "unroll completely\n");
       return false;
     }
@@ -1014,10 +1058,10 @@ try_peel_loop (struct loop *loop,
   /* We want to peel estimated number of iterations + 1 (so we never
      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
      and be sure to avoid overflows.  */
-  if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
+  if (npeel > param_max_peel_times - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+       fprintf (dump_file, "Not peeling: rolls too much "
                 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
       return false;
     }
@@ -1025,19 +1069,22 @@ try_peel_loop (struct loop *loop,
 
   /* Check peeled loops size.  */
   tree_estimate_loop_size (loop, exit, NULL, &size,
-                          PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
+                          param_max_peeled_insns);
   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
-      > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
+      > param_max_peeled_insns)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+       fprintf (dump_file, "Not peeling: peeled sequence size is too large "
                 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
 
+  if (!dbg_cnt (gimple_unroll))
+    return false;
+
   /* Duplicate possibly eliminating the exits.  */
   initialize_original_copy_tables ();
-  wont_exit = sbitmap_alloc (npeel + 1);
+  auto_sbitmap wont_exit (npeel + 1);
   if (exit && niter
       && TREE_CODE (niter) == INTEGER_CST
       && wi::leu_p (npeel, wi::to_widest (niter)))
@@ -1050,21 +1097,16 @@ try_peel_loop (struct loop *loop,
       exit = NULL;
       bitmap_clear (wont_exit);
     }
+  if (may_be_zero)
+    bitmap_clear_bit (wont_exit, 1);
   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
                                             npeel, wont_exit,
-                                            exit, &to_remove,
+                                            exit, &edges_to_remove,
                                             DLTHE_FLAG_UPDATE_FREQ))
     {
       free_original_copy_tables ();
-      free (wont_exit);
       return false;
     }
-  FOR_EACH_VEC_ELT (to_remove, i, e)
-    {
-      bool ok = remove_path (e);
-      gcc_assert (ok);
-    }
-  free (wont_exit);
   free_original_copy_tables ();
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -1096,23 +1138,20 @@ try_peel_loop (struct loop *loop,
          loop->nb_iterations_likely_upper_bound = 0;
        }
     }
-  gcov_type entry_count = 0;
-  int entry_freq = 0;
+  profile_count entry_count = profile_count::zero ();
 
+  edge e;
   edge_iterator ei;
   FOR_EACH_EDGE (e, ei, loop->header->preds)
     if (e->src != loop->latch)
       {
-       entry_count += e->src->count;
-       entry_freq += e->src->frequency;
+       if (e->src->count.initialized_p ())
+         entry_count += e->src->count;
        gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
       }
-  int scale = 1;
-  if (loop->header->count)
-    scale = RDIV (entry_count * REG_BR_PROB_BASE, loop->header->count);
-  else if (loop->header->frequency)
-    scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
-  scale_loop_profile (loop, scale, 0);
+  profile_probability p;
+  p = entry_count.probability_in (loop->header->count);
+  scale_loop_profile (loop, p, 0);
   bitmap_set_bit (peeled_loops, loop->num);
   return true;
 }
@@ -1123,22 +1162,44 @@ try_peel_loop (struct loop *loop,
    Returns true if cfg is changed.   */
 
 static bool
-canonicalize_loop_induction_variables (struct loop *loop,
+canonicalize_loop_induction_variables (class loop *loop,
                                       bool create_iv, enum unroll_level ul,
-                                      bool try_eval)
+                                      bool try_eval, bool allow_peel)
 {
   edge exit = NULL;
   tree niter;
   HOST_WIDE_INT maxiter;
   bool modified = false;
-  location_t locus = UNKNOWN_LOCATION;
+  dump_user_location_t locus;
+  class tree_niter_desc niter_desc;
+  bool may_be_zero = false;
 
-  niter = number_of_latch_executions (loop);
+  /* For unrolling allow conditional constant or zero iterations, thus
+     perform loop-header copying on-the-fly.  */
   exit = single_exit (loop);
+  niter = chrec_dont_know;
+  if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
+    {
+      niter = niter_desc.niter;
+      may_be_zero
+       = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
+    }
   if (TREE_CODE (niter) == INTEGER_CST)
-    locus = gimple_location (last_stmt (exit->src));
+    locus = last_stmt (exit->src);
   else
     {
+      /* For non-constant niter fold may_be_zero into niter again.  */
+      if (may_be_zero)
+       {
+         if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+           niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
+                                niter_desc.may_be_zero,
+                                build_int_cst (TREE_TYPE (niter), 0), niter);
+         else
+           niter = chrec_dont_know;
+         may_be_zero = false;
+       }
+
       /* If the loop has more than one exit, try checking all of them
         for # of iterations determinable through scev.  */
       if (!exit)
@@ -1151,7 +1212,7 @@ canonicalize_loop_induction_variables (struct loop *loop,
        niter = find_loop_niter_by_eval (loop, &exit);
 
       if (exit)
-        locus = gimple_location (last_stmt (exit->src));
+        locus = last_stmt (exit->src);
 
       if (TREE_CODE (niter) != INTEGER_CST)
        exit = NULL;
@@ -1161,8 +1222,10 @@ canonicalize_loop_induction_variables (struct loop *loop,
      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
   if (niter && TREE_CODE (niter) == INTEGER_CST)
     {
+      vec<edge> exits = get_loop_exit_edges  (loop);
       record_niter_bound (loop, wi::to_widest (niter),
-                         exit == single_likely_exit (loop), true);
+                         exit == single_likely_exit (loop, exits), true);
+      exits.release ();
     }
 
   /* Force re-computation of loop bounds so we can remove redundant exits.  */
@@ -1193,16 +1256,31 @@ canonicalize_loop_induction_variables (struct loop *loop,
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+  if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
+                                 maxiter, locus, allow_peel))
     return true;
 
   if (create_iv
       && niter && !chrec_contains_undetermined (niter)
       && exit && just_once_each_iteration_p (loop, exit->src))
-    create_canonical_iv (loop, exit, niter);
+    {
+      tree iv_niter = niter;
+      if (may_be_zero)
+       {
+         if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+           iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
+                                   niter_desc.may_be_zero,
+                                   build_int_cst (TREE_TYPE (iv_niter), 0),
+                                   iv_niter);
+         else
+           iv_niter = NULL_TREE;
+       }
+      if (iv_niter)
+       create_canonical_iv (loop, exit, iv_niter);
+    }
 
   if (ul == UL_ALL)
-    modified |= try_peel_loop (loop, exit, niter, maxiter);
+    modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
 
   return modified;
 }
@@ -1213,19 +1291,18 @@ canonicalize_loop_induction_variables (struct loop *loop,
 unsigned int
 canonicalize_induction_variables (void)
 {
-  struct loop *loop;
+  class loop *loop;
   bool changed = false;
   bool irred_invalidated = false;
   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-  free_numbers_of_iterations_estimates (cfun);
-  estimate_numbers_of_iterations ();
+  estimate_numbers_of_iterations (cfun);
 
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
       changed |= canonicalize_loop_induction_variables (loop,
                                                        true, UL_SINGLE_ITER,
-                                                       true);
+                                                       true, false);
     }
   gcc_assert (!need_ssa_update_p (cfun));
 
@@ -1236,6 +1313,7 @@ canonicalize_induction_variables (void)
 
   /* Clean up the information about numbers of iterations, since brute force
      evaluation could reveal new information.  */
+  free_numbers_of_iterations_estimates (cfun);
   scev_reset ();
 
   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
@@ -1250,74 +1328,53 @@ canonicalize_induction_variables (void)
   return 0;
 }
 
-/* Propagate constant SSA_NAMEs defined in basic block BB.  */
-
-static void
-propagate_constants_for_unrolling (basic_block bb)
-{
-  /* Look for degenerate PHI nodes with constant argument.  */
-  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
-    {
-      gphi *phi = gsi.phi ();
-      tree result = gimple_phi_result (phi);
-      tree arg = gimple_phi_arg_def (phi, 0);
-
-      if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
-         && gimple_phi_num_args (phi) == 1
-         && TREE_CODE (arg) == INTEGER_CST)
-       {
-         replace_uses_by (result, arg);
-         gsi_remove (&gsi, true);
-         release_ssa_name (result);
-       }
-      else
-       gsi_next (&gsi);
-    }
-
-  /* Look for assignments to SSA names with constant RHS.  */
-  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
-    {
-      gimple *stmt = gsi_stmt (gsi);
-      tree lhs;
-
-      if (is_gimple_assign (stmt)
-         && gimple_assign_rhs_code (stmt) == INTEGER_CST
-         && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
-         && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
-       {
-         replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
-         gsi_remove (&gsi, true);
-         release_ssa_name (lhs);
-       }
-      else
-       gsi_next (&gsi);
-    }
-}
-
 /* Process loops from innermost to outer, stopping at the innermost
    loop we unrolled.  */
 
 static bool
 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
-                               vec<loop_p, va_heap>& father_stack,
-                               struct loop *loop)
+                               bitmap father_bbs, class loop *loop)
 {
-  struct loop *loop_father;
+  class loop *loop_father;
   bool changed = false;
-  struct loop *inner;
+  class loop *inner;
   enum unroll_level ul;
+  unsigned num = number_of_loops (cfun);
 
-  /* Process inner loops first.  */
+  /* Process inner loops first.  Don't walk loops added by the recursive
+     calls because SSA form is not up-to-date.  They can be handled in the
+     next iteration.  */
+  bitmap child_father_bbs = NULL;
   for (inner = loop->inner; inner != NULL; inner = inner->next)
-    changed |= tree_unroll_loops_completely_1 (may_increase_size,
-                                              unroll_outer, father_stack,
-                                              inner);
+    if ((unsigned) inner->num < num)
+      {
+       if (!child_father_bbs)
+         child_father_bbs = BITMAP_ALLOC (NULL);
+       if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
+                                           child_father_bbs, inner))
+         {
+           bitmap_ior_into (father_bbs, child_father_bbs);
+           bitmap_clear (child_father_bbs);
+           changed = true;
+         }
+      }
+  if (child_father_bbs)
+    BITMAP_FREE (child_father_bbs);
+
   /* If we changed an inner loop we cannot process outer loops in this
      iteration because SSA form is not up-to-date.  Continue with
      siblings of outer loops instead.  */
   if (changed)
-    return true;
+    {
+      /* If we are recorded as father clear all other fathers that
+         are necessarily covered already to avoid redundant work.  */
+      if (bitmap_bit_p (father_bbs, loop->header->index))
+       {
+         bitmap_clear (father_bbs);
+         bitmap_set_bit (father_bbs, loop->header->index);
+       }
+      return true;
+    }
 
   /* Don't unroll #pragma omp simd loops until the vectorizer
      attempts to vectorize those.  */
@@ -1329,7 +1386,9 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
         not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1338,16 +1397,19 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
     ul = UL_NO_GROWTH;
 
   if (canonicalize_loop_induction_variables
-        (loop, false, ul, !flag_tree_loop_ivcanon))
+        (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
     {
       /* If we'll continue unrolling, we need to propagate constants
         within the new basic blocks to fold away induction variable
         computations; otherwise, the size might blow up before the
         iteration is complete and the IR eventually cleaned up.  */
-      if (loop_outer (loop_father) && !loop_father->aux)
+      if (loop_outer (loop_father))
        {
-         father_stack.safe_push (loop_father);
-         loop_father->aux = loop_father;
+         /* Once we process our father we will have processed
+            the fathers of our children as well, so avoid doing
+            redundant work and clear fathers we've gathered sofar.  */
+         bitmap_clear (father_bbs);
+         bitmap_set_bit (father_bbs, loop_father->header->index);
        }
 
       return true;
@@ -1360,14 +1422,16 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
    MAY_INCREASE_SIZE is true, perform the unrolling only if the
    size of the code does not increase.  */
 
-unsigned int
+static unsigned int
 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 {
-  auto_vec<loop_p, 16> father_stack;
+  bitmap father_bbs = BITMAP_ALLOC (NULL);
   bool changed;
   int iteration = 0;
   bool irred_invalidated = false;
 
+  estimate_numbers_of_iterations (cfun);
+
   do
     {
       changed = false;
@@ -1377,26 +1441,18 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
        loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
       free_numbers_of_iterations_estimates (cfun);
-      estimate_numbers_of_iterations ();
+      estimate_numbers_of_iterations (cfun);
 
       changed = tree_unroll_loops_completely_1 (may_increase_size,
-                                               unroll_outer, father_stack,
+                                               unroll_outer, father_bbs,
                                                current_loops->tree_root);
       if (changed)
        {
-         struct loop **iter;
          unsigned i;
 
-         /* Be sure to skip unlooped loops while procesing father_stack
-            array.  */
-         FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
-           (*iter)->aux = NULL;
-         FOR_EACH_VEC_ELT (father_stack, i, iter)
-           if (!(*iter)->aux)
-             *iter = NULL;
           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
 
-         /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
+         /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
          if (loop_closed_ssa_invalidated
              && !bitmap_empty_p (loop_closed_ssa_invalidated))
             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
@@ -1404,18 +1460,34 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
          else
            update_ssa (TODO_update_ssa);
 
+         /* father_bbs is a bitmap of loop father header BB indices.
+            Translate that to what non-root loops these BBs belong to now.  */
+         bitmap_iterator bi;
+         bitmap fathers = BITMAP_ALLOC (NULL);
+         EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
+           {
+             basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
+             if (! unrolled_loop_bb)
+               continue;
+             if (loop_outer (unrolled_loop_bb->loop_father))
+               bitmap_set_bit (fathers,
+                               unrolled_loop_bb->loop_father->num);
+           }
+         bitmap_clear (father_bbs);
          /* Propagate the constants within the new basic blocks.  */
-         FOR_EACH_VEC_ELT (father_stack, i, iter)
-           if (*iter)
-             {
-               unsigned j;
-               basic_block *body = get_loop_body_in_dom_order (*iter);
-               for (j = 0; j < (*iter)->num_nodes; j++)
-                 propagate_constants_for_unrolling (body[j]);
-               free (body);
-               (*iter)->aux = NULL;
-             }
-         father_stack.truncate (0);
+         EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
+           {
+             loop_p father = get_loop (cfun, i);
+             bitmap exit_bbs = BITMAP_ALLOC (NULL);
+             loop_exit *exit = father->exits->next;
+             while (exit->e)
+               {
+                 bitmap_set_bit (exit_bbs, exit->e->dest->index);
+                 exit = exit->next;
+               }
+             do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
+           }
+         BITMAP_FREE (fathers);
 
          /* This will take care of removing completely unrolled loops
             from the loop structures so we can continue unrolling now
@@ -1433,9 +1505,9 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
         BITMAP_FREE (loop_closed_ssa_invalidated);
     }
   while (changed
-        && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
+        && ++iteration <= param_max_unroll_iterations);
 
-  father_stack.release ();
+  BITMAP_FREE (father_bbs);
 
   if (irred_invalidated
       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
@@ -1531,9 +1603,9 @@ pass_complete_unroll::execute (function *fun)
      re-peeling the same loop multiple times.  */
   if (flag_peel_loops)
     peeled_loops = BITMAP_ALLOC (NULL);
-  int val = tree_unroll_loops_completely (flag_unroll_loops
-                                         || flag_peel_loops
-                                         || optimize >= 3, true);
+  unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
+                                                  || flag_peel_loops
+                                                  || optimize >= 3, true);
   if (peeled_loops)
     {
       BITMAP_FREE (peeled_loops);
@@ -1585,13 +1657,11 @@ pass_complete_unrolli::execute (function *fun)
 {
   unsigned ret = 0;
 
-  loop_optimizer_init (LOOPS_NORMAL
-                      | LOOPS_HAVE_RECORDED_EXITS);
+  loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
   if (number_of_loops (fun) > 1)
     {
       scev_initialize ();
       ret = tree_unroll_loops_completely (optimize >= 3, false);
-      free_numbers_of_iterations_estimates (fun);
       scev_finalize ();
     }
   loop_optimizer_finalize ();