gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-inline.h"
  63 #include "tree-cfgcleanup.h"
  64 #include "builtins.h"
  65 #include "tree-ssa-sccvn.h"
  66 #include "dbgcnt.h"
  67
  68 /* Specifies types of loops that may be unrolled.  */
  69
  70 enum unroll_level
  71 {
  72   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  73                            iteration.  */
  74   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  75                            of code size.  */
  76   UL_ALL                /* All suitable loops.  */
  77 };
  78
  79 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  80    is the exit edge whose condition is replaced.  The ssa versions of the new
  81    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  82    if they are not NULL.  */
  83
  84 void
  85 create_canonical_iv (class loop *loop, edge exit, tree niter,
  86                      tree *var_before = NULL, tree *var_after = NULL)
  87 {
  88   edge in;
  89   tree type, var;
  90   gcond *cond;
  91   gimple_stmt_iterator incr_at;
  92   enum tree_code cmp;
  93
  94   if (dump_file && (dump_flags & TDF_DETAILS))
  95     {
  96       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  97       print_generic_expr (dump_file, niter, TDF_SLIM);
  98       fprintf (dump_file, " iterations.\n");
  99     }
 100
 101   cond = as_a <gcond *> (last_stmt (exit->src));
 102   in = EDGE_SUCC (exit->src, 0);
 103   if (in == exit)
 104     in = EDGE_SUCC (exit->src, 1);
 105
 106   /* Note that we do not need to worry about overflows, since
 107      type of niter is always unsigned and all comparisons are
 108      just for equality/nonequality -- i.e. everything works
 109      with a modulo arithmetics.  */
 110
 111   type = TREE_TYPE (niter);
 112   niter = fold_build2 (PLUS_EXPR, type,
 113                        niter,
 114                        build_int_cst (type, 1));
 115   incr_at = gsi_last_bb (in->src);
 116   create_iv (niter,
 117              build_int_cst (type, -1),
 118              NULL_TREE, loop,
 119              &incr_at, false, var_before, &var);
 120   if (var_after)
 121     *var_after = var;
 122
 123   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 124   gimple_cond_set_code (cond, cmp);
 125   gimple_cond_set_lhs (cond, var);
 126   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 127   update_stmt (cond);
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145
 146   /* If some IV computation will become constant.  */
 147   bool constant_iv;
 148
 149   /* Number of call stmts that are not a builtin and are pure or const
 150      present on the hot path.  */
 151   int num_pure_calls_on_hot_path;
 152   /* Number of call stmts that are not a builtin and are not pure nor const
 153      present on the hot path.  */
 154   int num_non_pure_calls_on_hot_path;
 155   /* Number of statements other than calls in the loop.  */
 156   int non_call_stmts_on_hot_path;
 157   /* Number of branches seen on the hot path.  */
 158   int num_branches_on_hot_path;
 159 };
 160
 161 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 162
 163 static bool
 164 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 165 {
 166   if (CONSTANT_CLASS_P (op))
 167     return true;
 168
 169   /* We can still fold accesses to constant arrays when index is known.  */
 170   if (TREE_CODE (op) != SSA_NAME)
 171     {
 172       tree base = op;
 173
 174       /* First make fast look if we see constant array inside.  */
 175       while (handled_component_p (base))
 176         base = TREE_OPERAND (base, 0);
 177       if ((DECL_P (base)
 178            && ctor_for_folding (base) != error_mark_node)
 179           || CONSTANT_CLASS_P (base))
 180         {
 181           /* If so, see if we understand all the indices.  */
 182           base = op;
 183           while (handled_component_p (base))
 184             {
 185               if (TREE_CODE (base) == ARRAY_REF
 186                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 187                 return false;
 188               base = TREE_OPERAND (base, 0);
 189             }
 190           return true;
 191         }
 192       return false;
 193     }
 194
 195   /* Induction variables are constants when defined in loop.  */
 196   if (loop_containing_stmt (stmt) != loop)
 197     return false;
 198   tree ev = analyze_scalar_evolution (loop, op);
 199   if (chrec_contains_undetermined (ev)
 200       || chrec_contains_symbols (ev))
 201     return false;
 202   return true;
 203 }
 204
 205 /* Computes an estimated number of insns in LOOP.
 206    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 207    iteration of the loop.
 208    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 209    of loop.
 210    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 211    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 212
 213 static bool
 214 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 215                          struct loop_size *size, int upper_bound)
 216 {
 217   basic_block *body = get_loop_body (loop);
 218   gimple_stmt_iterator gsi;
 219   unsigned int i;
 220   bool after_exit;
 221   vec<basic_block> path = get_loop_hot_path (loop);
 222
 223   size->overall = 0;
 224   size->eliminated_by_peeling = 0;
 225   size->last_iteration = 0;
 226   size->last_iteration_eliminated_by_peeling = 0;
 227   size->num_pure_calls_on_hot_path = 0;
 228   size->num_non_pure_calls_on_hot_path = 0;
 229   size->non_call_stmts_on_hot_path = 0;
 230   size->num_branches_on_hot_path = 0;
 231   size->constant_iv = 0;
 232
 233   if (dump_file && (dump_flags & TDF_DETAILS))
 234     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 235   for (i = 0; i < loop->num_nodes; i++)
 236     {
 237       if (edge_to_cancel && body[i] != edge_to_cancel->src
 238           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 239         after_exit = true;
 240       else
 241         after_exit = false;
 242       if (dump_file && (dump_flags & TDF_DETAILS))
 243         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 244                  after_exit);
 245
 246       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 247         {
 248           gimple *stmt = gsi_stmt (gsi);
 249           int num = estimate_num_insns (stmt, &eni_size_weights);
 250           bool likely_eliminated = false;
 251           bool likely_eliminated_last = false;
 252           bool likely_eliminated_peeled = false;
 253
 254           if (dump_file && (dump_flags & TDF_DETAILS))
 255             {
 256               fprintf (dump_file, "  size: %3i ", num);
 257               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 258             }
 259
 260           /* Look for reasons why we might optimize this stmt away. */
 261
 262           if (!gimple_has_side_effects (stmt))
 263             {
 264               /* Exit conditional.  */
 265               if (exit && body[i] == exit->src
 266                   && stmt == last_stmt (exit->src))
 267                 {
 268                   if (dump_file && (dump_flags & TDF_DETAILS))
 269                     fprintf (dump_file, "   Exit condition will be eliminated "
 270                              "in peeled copies.\n");
 271                   likely_eliminated_peeled = true;
 272                 }
 273               if (edge_to_cancel && body[i] == edge_to_cancel->src
 274                   && stmt == last_stmt (edge_to_cancel->src))
 275                 {
 276                   if (dump_file && (dump_flags & TDF_DETAILS))
 277                     fprintf (dump_file, "   Exit condition will be eliminated "
 278                              "in last copy.\n");
 279                   likely_eliminated_last = true;
 280                 }
 281               /* Sets of IV variables  */
 282               if (gimple_code (stmt) == GIMPLE_ASSIGN
 283                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 284                 {
 285                   if (dump_file && (dump_flags & TDF_DETAILS))
 286                     fprintf (dump_file, "   Induction variable computation will"
 287                              " be folded away.\n");
 288                   likely_eliminated = true;
 289                 }
 290               /* Assignments of IV variables.  */
 291               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 292                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 293                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 294                                                   stmt, loop)
 295                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 296                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 297                                                       stmt, loop))
 298                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
 299                 {
 300                   size->constant_iv = true;
 301                   if (dump_file && (dump_flags & TDF_DETAILS))
 302                     fprintf (dump_file,
 303                              "   Constant expression will be folded away.\n");
 304                   likely_eliminated = true;
 305                 }
 306               /* Conditionals.  */
 307               else if ((gimple_code (stmt) == GIMPLE_COND
 308                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 309                                                    loop)
 310                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 311                                                    loop)
 312                         /* We don't simplify all constant compares so make sure
 313                            they are not both constant already.  See PR70288.  */
 314                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 315                             || ! is_gimple_min_invariant
 316                                  (gimple_cond_rhs (stmt))))
 317                        || (gimple_code (stmt) == GIMPLE_SWITCH
 318                            && constant_after_peeling (gimple_switch_index (
 319                                                         as_a <gswitch *>
 320                                                           (stmt)),
 321                                                       stmt, loop)
 322                            && ! is_gimple_min_invariant
 323                                    (gimple_switch_index
 324                                       (as_a <gswitch *> (stmt)))))
 325                 {
 326                   if (dump_file && (dump_flags & TDF_DETAILS))
 327                     fprintf (dump_file, "   Constant conditional.\n");
 328                   likely_eliminated = true;
 329                 }
 330             }
 331
 332           size->overall += num;
 333           if (likely_eliminated || likely_eliminated_peeled)
 334             size->eliminated_by_peeling += num;
 335           if (!after_exit)
 336             {
 337               size->last_iteration += num;
 338               if (likely_eliminated || likely_eliminated_last)
 339                 size->last_iteration_eliminated_by_peeling += num;
 340             }
 341           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 342               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 343             {
 344               free (body);
 345               path.release ();
 346               return true;
 347             }
 348         }
 349     }
 350   while (path.length ())
 351     {
 352       basic_block bb = path.pop ();
 353       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 354         {
 355           gimple *stmt = gsi_stmt (gsi);
 356           if (gimple_code (stmt) == GIMPLE_CALL
 357               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 358             {
 359               int flags = gimple_call_flags (stmt);
 360               if (flags & (ECF_PURE | ECF_CONST))
 361                 size->num_pure_calls_on_hot_path++;
 362               else
 363                 size->num_non_pure_calls_on_hot_path++;
 364               size->num_branches_on_hot_path ++;
 365             }
 366           /* Count inexpensive calls as non-calls, because they will likely
 367              expand inline.  */
 368           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 369             size->non_call_stmts_on_hot_path++;
 370           if (((gimple_code (stmt) == GIMPLE_COND
 371                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 372                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 373                                                 loop)))
 374                || (gimple_code (stmt) == GIMPLE_SWITCH
 375                    && !constant_after_peeling (gimple_switch_index (
 376                                                  as_a <gswitch *> (stmt)),
 377                                                stmt, loop)))
 378               && (!exit || bb != exit->src))
 379             size->num_branches_on_hot_path++;
 380         }
 381     }
 382   path.release ();
 383   if (dump_file && (dump_flags & TDF_DETAILS))
 384     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 385              size->eliminated_by_peeling, size->last_iteration,
 386              size->last_iteration_eliminated_by_peeling);
 387
 388   free (body);
 389   return false;
 390 }
 391
 392 /* Estimate number of insns of completely unrolled loop.
 393    It is (NUNROLL + 1) * size of loop body with taking into account
 394    the fact that in last copy everything after exit conditional
 395    is dead and that some instructions will be eliminated after
 396    peeling.
 397
 398    Loop body is likely going to simplify further, this is difficult
 399    to guess, we just decrease the result by 1/3.  */
 400
 401 static unsigned HOST_WIDE_INT
 402 estimated_unrolled_size (struct loop_size *size,
 403                          unsigned HOST_WIDE_INT nunroll)
 404 {
 405   HOST_WIDE_INT unr_insns = ((nunroll)
 406                              * (HOST_WIDE_INT) (size->overall
 407                                                 - size->eliminated_by_peeling));
 408   if (!nunroll)
 409     unr_insns = 0;
 410   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 411
 412   unr_insns = unr_insns * 2 / 3;
 413   if (unr_insns <= 0)
 414     unr_insns = 1;
 415
 416   return unr_insns;
 417 }
 418
 419 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 420    body that can be remove to make the loop to always exit and at
 421    the same time it does not make any code potentially executed
 422    during the last iteration dead.
 423
 424    After complete unrolling we still may get rid of the conditional
 425    on the exit in the last copy even if we have no idea what it does.
 426    This is quite common case for loops of form
 427
 428      int a[5];
 429      for (i=0;i<b;i++)
 430        a[i]=0;
 431
 432    Here we prove the loop to iterate 5 times but we do not know
 433    it from induction variable.
 434
 435    For now we handle only simple case where there is exit condition
 436    just before the latch block and the latch block contains no statements
 437    with side effect that may otherwise terminate the execution of loop
 438    (such as by EH or by terminating the program or longjmp).
 439
 440    In the general case we may want to cancel the paths leading to statements
 441    loop-niter identified as having undefined effect in the last iteration.
 442    The other cases are hopefully rare and will be cleaned up later.  */
 443
 444 static edge
 445 loop_edge_to_cancel (class loop *loop)
 446 {
 447   vec<edge> exits;
 448   unsigned i;
 449   edge edge_to_cancel;
 450   gimple_stmt_iterator gsi;
 451
 452   /* We want only one predecestor of the loop.  */
 453   if (EDGE_COUNT (loop->latch->preds) > 1)
 454     return NULL;
 455
 456   exits = get_loop_exit_edges (loop);
 457
 458   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 459     {
 460        /* Find the other edge than the loop exit
 461           leaving the conditoinal.  */
 462        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 463          continue;
 464        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 465          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 466        else
 467          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 468
 469       /* We only can handle conditionals.  */
 470       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 471         continue;
 472
 473       /* We should never have conditionals in the loop latch. */
 474       gcc_assert (edge_to_cancel->dest != loop->header);
 475
 476       /* Check that it leads to loop latch.  */
 477       if (edge_to_cancel->dest != loop->latch)
 478         continue;
 479
 480       exits.release ();
 481
 482       /* Verify that the code in loop latch does nothing that may end program
 483          execution without really reaching the exit.  This may include
 484          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 485       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 486         if (gimple_has_side_effects (gsi_stmt (gsi)))
 487            return NULL;
 488       return edge_to_cancel;
 489     }
 490   exits.release ();
 491   return NULL;
 492 }
 493
 494 /* Remove all tests for exits that are known to be taken after LOOP was
 495    peeled NPEELED times. Put gcc_unreachable before every statement
 496    known to not be executed.  */
 497
 498 static bool
 499 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 500 {
 501   class nb_iter_bound *elt;
 502   bool changed = false;
 503
 504   for (elt = loop->bounds; elt; elt = elt->next)
 505     {
 506       /* If statement is known to be undefined after peeling, turn it
 507          into unreachable (or trap when debugging experience is supposed
 508          to be good).  */
 509       if (!elt->is_exit
 510           && wi::ltu_p (elt->bound, npeeled))
 511         {
 512           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 513           gcall *stmt = gimple_build_call
 514               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 515           gimple_set_location (stmt, gimple_location (elt->stmt));
 516           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 517           split_block (gimple_bb (stmt), stmt);
 518           changed = true;
 519           if (dump_file && (dump_flags & TDF_DETAILS))
 520             {
 521               fprintf (dump_file, "Forced statement unreachable: ");
 522               print_gimple_stmt (dump_file, elt->stmt, 0);
 523             }
 524         }
 525       /* If we know the exit will be taken after peeling, update.  */
 526       else if (elt->is_exit
 527                && wi::leu_p (elt->bound, npeeled))
 528         {
 529           basic_block bb = gimple_bb (elt->stmt);
 530           edge exit_edge = EDGE_SUCC (bb, 0);
 531
 532           if (dump_file && (dump_flags & TDF_DETAILS))
 533             {
 534               fprintf (dump_file, "Forced exit to be taken: ");
 535               print_gimple_stmt (dump_file, elt->stmt, 0);
 536             }
 537           if (!loop_exit_edge_p (loop, exit_edge))
 538             exit_edge = EDGE_SUCC (bb, 1);
 539           exit_edge->probability = profile_probability::always ();
 540           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 541           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 542           if (exit_edge->flags & EDGE_TRUE_VALUE)
 543             gimple_cond_make_true (cond_stmt);
 544           else
 545             gimple_cond_make_false (cond_stmt);
 546           update_stmt (cond_stmt);
 547           changed = true;
 548         }
 549     }
 550   return changed;
 551 }
 552
 553 /* Remove all exits that are known to be never taken because of the loop bound
 554    discovered.  */
 555
 556 static bool
 557 remove_redundant_iv_tests (class loop *loop)
 558 {
 559   class nb_iter_bound *elt;
 560   bool changed = false;
 561
 562   if (!loop->any_upper_bound)
 563     return false;
 564   for (elt = loop->bounds; elt; elt = elt->next)
 565     {
 566       /* Exit is pointless if it won't be taken before loop reaches
 567          upper bound.  */
 568       if (elt->is_exit && loop->any_upper_bound
 569           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 570         {
 571           basic_block bb = gimple_bb (elt->stmt);
 572           edge exit_edge = EDGE_SUCC (bb, 0);
 573           class tree_niter_desc niter;
 574
 575           if (!loop_exit_edge_p (loop, exit_edge))
 576             exit_edge = EDGE_SUCC (bb, 1);
 577
 578           /* Only when we know the actual number of iterations, not
 579              just a bound, we can remove the exit.  */
 580           if (!number_of_iterations_exit (loop, exit_edge,
 581                                           &niter, false, false)
 582               || !integer_onep (niter.assumptions)
 583               || !integer_zerop (niter.may_be_zero)
 584               || !niter.niter
 585               || TREE_CODE (niter.niter) != INTEGER_CST
 586               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 587                              wi::to_widest (niter.niter)))
 588             continue;
 589
 590           if (dump_file && (dump_flags & TDF_DETAILS))
 591             {
 592               fprintf (dump_file, "Removed pointless exit: ");
 593               print_gimple_stmt (dump_file, elt->stmt, 0);
 594             }
 595           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 596           if (exit_edge->flags & EDGE_TRUE_VALUE)
 597             gimple_cond_make_false (cond_stmt);
 598           else
 599             gimple_cond_make_true (cond_stmt);
 600           update_stmt (cond_stmt);
 601           changed = true;
 602         }
 603     }
 604   return changed;
 605 }
 606
 607 /* Stores loops that will be unlooped and edges that will be removed
 608    after we process whole loop tree. */
 609 static vec<loop_p> loops_to_unloop;
 610 static vec<int> loops_to_unloop_nunroll;
 611 static vec<edge> edges_to_remove;
 612 /* Stores loops that has been peeled.  */
 613 static bitmap peeled_loops;
 614
 615 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 616    on the latch edge.
 617    We do it after all unrolling since unlooping moves basic blocks
 618    across loop boundaries trashing loop closed SSA form as well
 619    as SCEV info needed to be intact during unrolling.
 620
 621    IRRED_INVALIDATED is used to bookkeep if information about
 622    irreducible regions may become invalid as a result
 623    of the transformation.
 624    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 625    when we need to go into loop closed SSA form.  */
 626
 627 static void
 628 unloop_loops (bitmap loop_closed_ssa_invalidated,
 629               bool *irred_invalidated)
 630 {
 631   while (loops_to_unloop.length ())
 632     {
 633       class loop *loop = loops_to_unloop.pop ();
 634       int n_unroll = loops_to_unloop_nunroll.pop ();
 635       basic_block latch = loop->latch;
 636       edge latch_edge = loop_latch_edge (loop);
 637       int flags = latch_edge->flags;
 638       location_t locus = latch_edge->goto_locus;
 639       gcall *stmt;
 640       gimple_stmt_iterator gsi;
 641
 642       remove_exits_and_undefined_stmts (loop, n_unroll);
 643
 644       /* Unloop destroys the latch edge.  */
 645       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 646
 647       /* Create new basic block for the latch edge destination and wire
 648          it in.  */
 649       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 650       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 651       latch_edge->probability = profile_probability::never ();
 652       latch_edge->flags |= flags;
 653       latch_edge->goto_locus = locus;
 654
 655       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 656       latch_edge->dest->count = profile_count::zero ();
 657       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 658
 659       gsi = gsi_start_bb (latch_edge->dest);
 660       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 661     }
 662   loops_to_unloop.release ();
 663   loops_to_unloop_nunroll.release ();
 664
 665   /* Remove edges in peeled copies.  Given remove_path removes dominated
 666      regions we need to cope with removal of already removed paths.  */
 667   unsigned i;
 668   edge e;
 669   auto_vec<int, 20> src_bbs;
 670   src_bbs.reserve_exact (edges_to_remove.length ());
 671   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 672     src_bbs.quick_push (e->src->index);
 673   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 674     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 675       {
 676         bool ok = remove_path (e, irred_invalidated,
 677                                loop_closed_ssa_invalidated);
 678         gcc_assert (ok);
 679       }
 680   edges_to_remove.release ();
 681 }
 682
 683 /* Tries to unroll LOOP completely, i.e. NITER times.
 684    UL determines which loops we are allowed to unroll.
 685    EXIT is the exit of the loop that should be eliminated.
 686    MAXITER specfy bound on number of iterations, -1 if it is
 687    not known or too large for HOST_WIDE_INT.  The location
 688    LOCUS corresponding to the loop is used when emitting
 689    a summary of the unroll to the dump file.  */
 690
 691 static bool
 692 try_unroll_loop_completely (class loop *loop,
 693                             edge exit, tree niter, bool may_be_zero,
 694                             enum unroll_level ul,
 695                             HOST_WIDE_INT maxiter,
 696                             dump_user_location_t locus, bool allow_peel)
 697 {
 698   unsigned HOST_WIDE_INT n_unroll = 0;
 699   bool n_unroll_found = false;
 700   edge edge_to_cancel = NULL;
 701
 702   /* See if we proved number of iterations to be low constant.
 703
 704      EXIT is an edge that will be removed in all but last iteration of
 705      the loop.
 706
 707      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 708      of the unrolled sequence and is expected to make the final loop not
 709      rolling.
 710
 711      If the number of execution of loop is determined by standard induction
 712      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 713      from the iv test.  */
 714   if (tree_fits_uhwi_p (niter))
 715     {
 716       n_unroll = tree_to_uhwi (niter);
 717       n_unroll_found = true;
 718       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 719       if (edge_to_cancel == exit)
 720         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 721     }
 722   /* We do not know the number of iterations and thus we cannot eliminate
 723      the EXIT edge.  */
 724   else
 725     exit = NULL;
 726
 727   /* See if we can improve our estimate by using recorded loop bounds.  */
 728   if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
 729       && maxiter >= 0
 730       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 731     {
 732       n_unroll = maxiter;
 733       n_unroll_found = true;
 734       /* Loop terminates before the IV variable test, so we cannot
 735          remove it in the last iteration.  */
 736       edge_to_cancel = NULL;
 737     }
 738
 739   if (!n_unroll_found)
 740     return false;
 741
 742   if (!loop->unroll
 743       && n_unroll > (unsigned) param_max_completely_peel_times)
 744     {
 745       if (dump_file && (dump_flags & TDF_DETAILS))
 746         fprintf (dump_file, "Not unrolling loop %d "
 747                  "(--param max-completely-peel-times limit reached).\n",
 748                  loop->num);
 749       return false;
 750     }
 751
 752   if (!edge_to_cancel)
 753     edge_to_cancel = loop_edge_to_cancel (loop);
 754
 755   if (n_unroll)
 756     {
 757       if (ul == UL_SINGLE_ITER)
 758         return false;
 759
 760       if (loop->unroll)
 761         {
 762           /* If the unrolling factor is too large, bail out.  */
 763           if (n_unroll > (unsigned)loop->unroll)
 764             {
 765               if (dump_file && (dump_flags & TDF_DETAILS))
 766                 fprintf (dump_file,
 767                          "Not unrolling loop %d: "
 768                          "user didn't want it unrolled completely.\n",
 769                          loop->num);
 770               return false;
 771             }
 772         }
 773       else
 774         {
 775           struct loop_size size;
 776           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 777              iterations.  */
 778           bool remove_exit = (exit && niter
 779                               && TREE_CODE (niter) == INTEGER_CST
 780                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 781           bool large
 782             = tree_estimate_loop_size
 783                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 784                  param_max_completely_peeled_insns);
 785           if (large)
 786             {
 787               if (dump_file && (dump_flags & TDF_DETAILS))
 788                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 789                          loop->num);
 790               return false;
 791             }
 792
 793           unsigned HOST_WIDE_INT ninsns = size.overall;
 794           unsigned HOST_WIDE_INT unr_insns
 795             = estimated_unrolled_size (&size, n_unroll);
 796           if (dump_file && (dump_flags & TDF_DETAILS))
 797             {
 798               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 799               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 800                        (int) unr_insns);
 801             }
 802
 803           /* If the code is going to shrink, we don't need to be extra
 804              cautious on guessing if the unrolling is going to be
 805              profitable.  */
 806           if (unr_insns
 807               /* If there is IV variable that will become constant, we
 808                  save one instruction in the loop prologue we do not
 809                  account otherwise.  */
 810               <= ninsns + (size.constant_iv != false))
 811             ;
 812           /* We unroll only inner loops, because we do not consider it
 813              profitable otheriwse.  We still can cancel loopback edge
 814              of not rolling loop; this is always a good idea.  */
 815           else if (ul == UL_NO_GROWTH)
 816             {
 817               if (dump_file && (dump_flags & TDF_DETAILS))
 818                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 819                          loop->num);
 820               return false;
 821             }
 822           /* Outer loops tend to be less interesting candidates for
 823              complete unrolling unless we can do a lot of propagation
 824              into the inner loop body.  For now we disable outer loop
 825              unrolling when the code would grow.  */
 826           else if (loop->inner)
 827             {
 828               if (dump_file && (dump_flags & TDF_DETAILS))
 829                 fprintf (dump_file, "Not unrolling loop %d: "
 830                          "it is not innermost and code would grow.\n",
 831                          loop->num);
 832               return false;
 833             }
 834           /* If there is call on a hot path through the loop, then
 835              there is most probably not much to optimize.  */
 836           else if (size.num_non_pure_calls_on_hot_path)
 837             {
 838               if (dump_file && (dump_flags & TDF_DETAILS))
 839                 fprintf (dump_file, "Not unrolling loop %d: "
 840                          "contains call and code would grow.\n",
 841                          loop->num);
 842               return false;
 843             }
 844           /* If there is pure/const call in the function, then we can
 845              still optimize the unrolled loop body if it contains some
 846              other interesting code than the calls and code storing or
 847              cumulating the return value.  */
 848           else if (size.num_pure_calls_on_hot_path
 849                    /* One IV increment, one test, one ivtmp store and
 850                       one useful stmt.  That is about minimal loop
 851                       doing pure call.  */
 852                    && (size.non_call_stmts_on_hot_path
 853                        <= 3 + size.num_pure_calls_on_hot_path))
 854             {
 855               if (dump_file && (dump_flags & TDF_DETAILS))
 856                 fprintf (dump_file, "Not unrolling loop %d: "
 857                          "contains just pure calls and code would grow.\n",
 858                          loop->num);
 859               return false;
 860             }
 861           /* Complete unrolling is major win when control flow is
 862              removed and one big basic block is created.  If the loop
 863              contains control flow the optimization may still be a win
 864              because of eliminating the loop overhead but it also may
 865              blow the branch predictor tables.  Limit number of
 866              branches on the hot path through the peeled sequence.  */
 867           else if (size.num_branches_on_hot_path * (int)n_unroll
 868                    > param_max_peel_branches)
 869             {
 870               if (dump_file && (dump_flags & TDF_DETAILS))
 871                 fprintf (dump_file, "Not unrolling loop %d: "
 872                          "number of branches on hot path in the unrolled "
 873                          "sequence reaches --param max-peel-branches limit.\n",
 874                          loop->num);
 875               return false;
 876             }
 877           else if (unr_insns
 878                    > (unsigned) param_max_completely_peeled_insns)
 879             {
 880               if (dump_file && (dump_flags & TDF_DETAILS))
 881                 fprintf (dump_file, "Not unrolling loop %d: "
 882                          "number of insns in the unrolled sequence reaches "
 883                          "--param max-completely-peeled-insns limit.\n",
 884                          loop->num);
 885               return false;
 886             }
 887         }
 888
 889       if (!dbg_cnt (gimple_unroll))
 890         return false;
 891
 892       initialize_original_copy_tables ();
 893       auto_sbitmap wont_exit (n_unroll + 1);
 894       if (exit && niter
 895           && TREE_CODE (niter) == INTEGER_CST
 896           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 897         {
 898           bitmap_ones (wont_exit);
 899           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 900               || edge_to_cancel)
 901             bitmap_clear_bit (wont_exit, 0);
 902         }
 903       else
 904         {
 905           exit = NULL;
 906           bitmap_clear (wont_exit);
 907         }
 908       if (may_be_zero)
 909         bitmap_clear_bit (wont_exit, 1);
 910
 911       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 912                                                  n_unroll, wont_exit,
 913                                                  exit, &edges_to_remove,
 914                                                  DLTHE_FLAG_UPDATE_FREQ
 915                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 916         {
 917           free_original_copy_tables ();
 918           if (dump_file && (dump_flags & TDF_DETAILS))
 919             fprintf (dump_file, "Failed to duplicate the loop\n");
 920           return false;
 921         }
 922
 923       free_original_copy_tables ();
 924     }
 925
 926   /* Remove the conditional from the last copy of the loop.  */
 927   if (edge_to_cancel)
 928     {
 929       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 930       force_edge_cold (edge_to_cancel, true);
 931       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 932         gimple_cond_make_false (cond);
 933       else
 934         gimple_cond_make_true (cond);
 935       update_stmt (cond);
 936       /* Do not remove the path, as doing so may remove outer loop and
 937          confuse bookkeeping code in tree_unroll_loops_completely.  */
 938     }
 939
 940   /* Store the loop for later unlooping and exit removal.  */
 941   loops_to_unloop.safe_push (loop);
 942   loops_to_unloop_nunroll.safe_push (n_unroll);
 943
 944   if (dump_enabled_p ())
 945     {
 946       if (!n_unroll)
 947         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 948                          "loop turned into non-loop; it never loops\n");
 949       else
 950         {
 951           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 952                            "loop with %d iterations completely unrolled",
 953                            (int) n_unroll);
 954           if (loop->header->count.initialized_p ())
 955             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 956                          " (header execution count %d)",
 957                          (int)loop->header->count.to_gcov_type ());
 958           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 959         }
 960     }
 961
 962   if (dump_file && (dump_flags & TDF_DETAILS))
 963     {
 964       if (exit)
 965         fprintf (dump_file, "Exit condition of peeled iterations was "
 966                  "eliminated.\n");
 967       if (edge_to_cancel)
 968         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 969       else
 970         fprintf (dump_file, "Latch of last iteration was marked by "
 971                  "__builtin_unreachable ().\n");
 972     }
 973
 974   return true;
 975 }
 976
 977 /* Return number of instructions after peeling.  */
 978 static unsigned HOST_WIDE_INT
 979 estimated_peeled_sequence_size (struct loop_size *size,
 980                                 unsigned HOST_WIDE_INT npeel)
 981 {
 982   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 983                                        - size->eliminated_by_peeling), 1);
 984 }
 985
 986 /* If the loop is expected to iterate N times and is
 987    small enough, duplicate the loop body N+1 times before
 988    the loop itself.  This way the hot path will never
 989    enter the loop.
 990    Parameters are the same as for try_unroll_loops_completely */
 991
 992 static bool
 993 try_peel_loop (class loop *loop,
 994                edge exit, tree niter, bool may_be_zero,
 995                HOST_WIDE_INT maxiter)
 996 {
 997   HOST_WIDE_INT npeel;
 998   struct loop_size size;
 999   int peeled_size;
1000
1001   if (!flag_peel_loops
1002       || param_max_peel_times <= 0
1003       || !peeled_loops)
1004     return false;
1005
1006   if (bitmap_bit_p (peeled_loops, loop->num))
1007     {
1008       if (dump_file)
1009         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1010       return false;
1011     }
1012
1013   /* We don't peel loops that will be unrolled as this can duplicate a
1014      loop more times than the user requested.  */
1015   if (loop->unroll)
1016     {
1017       if (dump_file)
1018         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1019       return false;
1020     }
1021
1022   /* Peel only innermost loops.
1023      While the code is perfectly capable of peeling non-innermost loops,
1024      the heuristics would probably need some improvements. */
1025   if (loop->inner)
1026     {
1027       if (dump_file)
1028         fprintf (dump_file, "Not peeling: outer loop\n");
1029       return false;
1030     }
1031
1032   if (!optimize_loop_for_speed_p (loop))
1033     {
1034       if (dump_file)
1035         fprintf (dump_file, "Not peeling: cold loop\n");
1036       return false;
1037     }
1038
1039   /* Check if there is an estimate on the number of iterations.  */
1040   npeel = estimated_loop_iterations_int (loop);
1041   if (npeel < 0)
1042     npeel = likely_max_loop_iterations_int (loop);
1043   if (npeel < 0)
1044     {
1045       if (dump_file)
1046         fprintf (dump_file, "Not peeling: number of iterations is not "
1047                  "estimated\n");
1048       return false;
1049     }
1050   if (maxiter >= 0 && maxiter <= npeel)
1051     {
1052       if (dump_file)
1053         fprintf (dump_file, "Not peeling: upper bound is known so can "
1054                  "unroll completely\n");
1055       return false;
1056     }
1057
1058   /* We want to peel estimated number of iterations + 1 (so we never
1059      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1060      and be sure to avoid overflows.  */
1061   if (npeel > param_max_peel_times - 1)
1062     {
1063       if (dump_file)
1064         fprintf (dump_file, "Not peeling: rolls too much "
1065                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1066       return false;
1067     }
1068   npeel++;
1069
1070   /* Check peeled loops size.  */
1071   tree_estimate_loop_size (loop, exit, NULL, &size,
1072                            param_max_peeled_insns);
1073   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1074       > param_max_peeled_insns)
1075     {
1076       if (dump_file)
1077         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1078                  "(%i insns > --param max-peel-insns)", peeled_size);
1079       return false;
1080     }
1081
1082   if (!dbg_cnt (gimple_unroll))
1083     return false;
1084
1085   /* Duplicate possibly eliminating the exits.  */
1086   initialize_original_copy_tables ();
1087   auto_sbitmap wont_exit (npeel + 1);
1088   if (exit && niter
1089       && TREE_CODE (niter) == INTEGER_CST
1090       && wi::leu_p (npeel, wi::to_widest (niter)))
1091     {
1092       bitmap_ones (wont_exit);
1093       bitmap_clear_bit (wont_exit, 0);
1094     }
1095   else
1096     {
1097       exit = NULL;
1098       bitmap_clear (wont_exit);
1099     }
1100   if (may_be_zero)
1101     bitmap_clear_bit (wont_exit, 1);
1102   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1103                                              npeel, wont_exit,
1104                                              exit, &edges_to_remove,
1105                                              DLTHE_FLAG_UPDATE_FREQ))
1106     {
1107       free_original_copy_tables ();
1108       return false;
1109     }
1110   free_original_copy_tables ();
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1114                loop->num, (int) npeel);
1115     }
1116   if (loop->any_estimate)
1117     {
1118       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1119         loop->nb_iterations_estimate -= npeel;
1120       else
1121         loop->nb_iterations_estimate = 0;
1122     }
1123   if (loop->any_upper_bound)
1124     {
1125       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1126         loop->nb_iterations_upper_bound -= npeel;
1127       else
1128         loop->nb_iterations_upper_bound = 0;
1129     }
1130   if (loop->any_likely_upper_bound)
1131     {
1132       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1133         loop->nb_iterations_likely_upper_bound -= npeel;
1134       else
1135         {
1136           loop->any_estimate = true;
1137           loop->nb_iterations_estimate = 0;
1138           loop->nb_iterations_likely_upper_bound = 0;
1139         }
1140     }
1141   profile_count entry_count = profile_count::zero ();
1142
1143   edge e;
1144   edge_iterator ei;
1145   FOR_EACH_EDGE (e, ei, loop->header->preds)
1146     if (e->src != loop->latch)
1147       {
1148         if (e->src->count.initialized_p ())
1149           entry_count += e->src->count;
1150         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1151       }
1152   profile_probability p;
1153   p = entry_count.probability_in (loop->header->count);
1154   scale_loop_profile (loop, p, 0);
1155   bitmap_set_bit (peeled_loops, loop->num);
1156   return true;
1157 }
1158 /* Adds a canonical induction variable to LOOP if suitable.
1159    CREATE_IV is true if we may create a new iv.  UL determines
1160    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1161    to determine the number of iterations of a loop by direct evaluation.
1162    Returns true if cfg is changed.   */
1163
1164 static bool
1165 canonicalize_loop_induction_variables (class loop *loop,
1166                                        bool create_iv, enum unroll_level ul,
1167                                        bool try_eval, bool allow_peel)
1168 {
1169   edge exit = NULL;
1170   tree niter;
1171   HOST_WIDE_INT maxiter;
1172   bool modified = false;
1173   dump_user_location_t locus;
1174   class tree_niter_desc niter_desc;
1175   bool may_be_zero = false;
1176
1177   /* For unrolling allow conditional constant or zero iterations, thus
1178      perform loop-header copying on-the-fly.  */
1179   exit = single_exit (loop);
1180   niter = chrec_dont_know;
1181   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1182     {
1183       niter = niter_desc.niter;
1184       may_be_zero
1185         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1186     }
1187   if (TREE_CODE (niter) == INTEGER_CST)
1188     locus = last_stmt (exit->src);
1189   else
1190     {
1191       /* For non-constant niter fold may_be_zero into niter again.  */
1192       if (may_be_zero)
1193         {
1194           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1195             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1196                                  niter_desc.may_be_zero,
1197                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1198           else
1199             niter = chrec_dont_know;
1200           may_be_zero = false;
1201         }
1202
1203       /* If the loop has more than one exit, try checking all of them
1204          for # of iterations determinable through scev.  */
1205       if (!exit)
1206         niter = find_loop_niter (loop, &exit);
1207
1208       /* Finally if everything else fails, try brute force evaluation.  */
1209       if (try_eval
1210           && (chrec_contains_undetermined (niter)
1211               || TREE_CODE (niter) != INTEGER_CST))
1212         niter = find_loop_niter_by_eval (loop, &exit);
1213
1214       if (exit)
1215         locus = last_stmt (exit->src);
1216
1217       if (TREE_CODE (niter) != INTEGER_CST)
1218         exit = NULL;
1219     }
1220
1221   /* We work exceptionally hard here to estimate the bound
1222      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1223   if (niter && TREE_CODE (niter) == INTEGER_CST)
1224     {
1225       vec<edge> exits = get_loop_exit_edges  (loop);
1226       record_niter_bound (loop, wi::to_widest (niter),
1227                           exit == single_likely_exit (loop, exits), true);
1228       exits.release ();
1229     }
1230
1231   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1232   maxiter = max_loop_iterations_int (loop);
1233
1234   if (dump_file && (dump_flags & TDF_DETAILS)
1235       && TREE_CODE (niter) == INTEGER_CST)
1236     {
1237       fprintf (dump_file, "Loop %d iterates ", loop->num);
1238       print_generic_expr (dump_file, niter, TDF_SLIM);
1239       fprintf (dump_file, " times.\n");
1240     }
1241   if (dump_file && (dump_flags & TDF_DETAILS)
1242       && maxiter >= 0)
1243     {
1244       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1245                (int)maxiter);
1246     }
1247   if (dump_file && (dump_flags & TDF_DETAILS)
1248       && likely_max_loop_iterations_int (loop) >= 0)
1249     {
1250       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1251                loop->num, (int)likely_max_loop_iterations_int (loop));
1252     }
1253
1254   /* Remove exits that are known to be never taken based on loop bound.
1255      Needs to be called after compilation of max_loop_iterations_int that
1256      populates the loop bounds.  */
1257   modified |= remove_redundant_iv_tests (loop);
1258
1259   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1260                                   maxiter, locus, allow_peel))
1261     return true;
1262
1263   if (create_iv
1264       && niter && !chrec_contains_undetermined (niter)
1265       && exit && just_once_each_iteration_p (loop, exit->src))
1266     {
1267       tree iv_niter = niter;
1268       if (may_be_zero)
1269         {
1270           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1271             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1272                                     niter_desc.may_be_zero,
1273                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1274                                     iv_niter);
1275           else
1276             iv_niter = NULL_TREE;
1277         }
1278       if (iv_niter)
1279         create_canonical_iv (loop, exit, iv_niter);
1280     }
1281
1282   if (ul == UL_ALL)
1283     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1284
1285   return modified;
1286 }
1287
1288 /* The main entry point of the pass.  Adds canonical induction variables
1289    to the suitable loops.  */
1290
1291 unsigned int
1292 canonicalize_induction_variables (void)
1293 {
1294   class loop *loop;
1295   bool changed = false;
1296   bool irred_invalidated = false;
1297   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1298
1299   estimate_numbers_of_iterations (cfun);
1300
1301   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1302     {
1303       changed |= canonicalize_loop_induction_variables (loop,
1304                                                         true, UL_SINGLE_ITER,
1305                                                         true, false);
1306     }
1307   gcc_assert (!need_ssa_update_p (cfun));
1308
1309   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1310   if (irred_invalidated
1311       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1312     mark_irreducible_loops ();
1313
1314   /* Clean up the information about numbers of iterations, since brute force
1315      evaluation could reveal new information.  */
1316   free_numbers_of_iterations_estimates (cfun);
1317   scev_reset ();
1318
1319   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1320     {
1321       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1322       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1323     }
1324   BITMAP_FREE (loop_closed_ssa_invalidated);
1325
1326   if (changed)
1327     return TODO_cleanup_cfg;
1328   return 0;
1329 }
1330
1331 /* Process loops from innermost to outer, stopping at the innermost
1332    loop we unrolled.  */
1333
1334 static bool
1335 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1336                                 bitmap father_bbs, class loop *loop)
1337 {
1338   class loop *loop_father;
1339   bool changed = false;
1340   class loop *inner;
1341   enum unroll_level ul;
1342   unsigned num = number_of_loops (cfun);
1343
1344   /* Process inner loops first.  Don't walk loops added by the recursive
1345      calls because SSA form is not up-to-date.  They can be handled in the
1346      next iteration.  */
1347   bitmap child_father_bbs = NULL;
1348   for (inner = loop->inner; inner != NULL; inner = inner->next)
1349     if ((unsigned) inner->num < num)
1350       {
1351         if (!child_father_bbs)
1352           child_father_bbs = BITMAP_ALLOC (NULL);
1353         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1354                                             child_father_bbs, inner))
1355           {
1356             bitmap_ior_into (father_bbs, child_father_bbs);
1357             bitmap_clear (child_father_bbs);
1358             changed = true;
1359           }
1360       }
1361   if (child_father_bbs)
1362     BITMAP_FREE (child_father_bbs);
1363
1364   /* If we changed an inner loop we cannot process outer loops in this
1365      iteration because SSA form is not up-to-date.  Continue with
1366      siblings of outer loops instead.  */
1367   if (changed)
1368     {
1369       /* If we are recorded as father clear all other fathers that
1370          are necessarily covered already to avoid redundant work.  */
1371       if (bitmap_bit_p (father_bbs, loop->header->index))
1372         {
1373           bitmap_clear (father_bbs);
1374           bitmap_set_bit (father_bbs, loop->header->index);
1375         }
1376       return true;
1377     }
1378
1379   /* Don't unroll #pragma omp simd loops until the vectorizer
1380      attempts to vectorize those.  */
1381   if (loop->force_vectorize)
1382     return false;
1383
1384   /* Try to unroll this loop.  */
1385   loop_father = loop_outer (loop);
1386   if (!loop_father)
1387     return false;
1388
1389   if (loop->unroll > 1)
1390     ul = UL_ALL;
1391   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1392       /* Unroll outermost loops only if asked to do so or they do
1393          not cause code growth.  */
1394       && (unroll_outer || loop_outer (loop_father)))
1395     ul = UL_ALL;
1396   else
1397     ul = UL_NO_GROWTH;
1398
1399   if (canonicalize_loop_induction_variables
1400         (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1401     {
1402       /* If we'll continue unrolling, we need to propagate constants
1403          within the new basic blocks to fold away induction variable
1404          computations; otherwise, the size might blow up before the
1405          iteration is complete and the IR eventually cleaned up.  */
1406       if (loop_outer (loop_father))
1407         {
1408           /* Once we process our father we will have processed
1409              the fathers of our children as well, so avoid doing
1410              redundant work and clear fathers we've gathered sofar.  */
1411           bitmap_clear (father_bbs);
1412           bitmap_set_bit (father_bbs, loop_father->header->index);
1413         }
1414
1415       return true;
1416     }
1417
1418   return false;
1419 }
1420
1421 /* Unroll LOOPS completely if they iterate just few times.  Unless
1422    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1423    size of the code does not increase.  */
1424
1425 static unsigned int
1426 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1427 {
1428   bitmap father_bbs = BITMAP_ALLOC (NULL);
1429   bool changed;
1430   int iteration = 0;
1431   bool irred_invalidated = false;
1432
1433   estimate_numbers_of_iterations (cfun);
1434
1435   do
1436     {
1437       changed = false;
1438       bitmap loop_closed_ssa_invalidated = NULL;
1439
1440       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1441         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1442
1443       free_numbers_of_iterations_estimates (cfun);
1444       estimate_numbers_of_iterations (cfun);
1445
1446       changed = tree_unroll_loops_completely_1 (may_increase_size,
1447                                                 unroll_outer, father_bbs,
1448                                                 current_loops->tree_root);
1449       if (changed)
1450         {
1451           unsigned i;
1452
1453           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1454
1455           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1456           if (loop_closed_ssa_invalidated
1457               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1458             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1459                                           TODO_update_ssa);
1460           else
1461             update_ssa (TODO_update_ssa);
1462
1463           /* father_bbs is a bitmap of loop father header BB indices.
1464              Translate that to what non-root loops these BBs belong to now.  */
1465           bitmap_iterator bi;
1466           bitmap fathers = BITMAP_ALLOC (NULL);
1467           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1468             {
1469               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1470               if (! unrolled_loop_bb)
1471                 continue;
1472               if (loop_outer (unrolled_loop_bb->loop_father))
1473                 bitmap_set_bit (fathers,
1474                                 unrolled_loop_bb->loop_father->num);
1475             }
1476           bitmap_clear (father_bbs);
1477           /* Propagate the constants within the new basic blocks.  */
1478           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1479             {
1480               loop_p father = get_loop (cfun, i);
1481               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1482               loop_exit *exit = father->exits->next;
1483               while (exit->e)
1484                 {
1485                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1486                   exit = exit->next;
1487                 }
1488               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1489             }
1490           BITMAP_FREE (fathers);
1491
1492           /* This will take care of removing completely unrolled loops
1493              from the loop structures so we can continue unrolling now
1494              innermost loops.  */
1495           if (cleanup_tree_cfg ())
1496             update_ssa (TODO_update_ssa_only_virtuals);
1497
1498           /* Clean up the information about numbers of iterations, since
1499              complete unrolling might have invalidated it.  */
1500           scev_reset ();
1501           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1502             verify_loop_closed_ssa (true);
1503         }
1504       if (loop_closed_ssa_invalidated)
1505         BITMAP_FREE (loop_closed_ssa_invalidated);
1506     }
1507   while (changed
1508          && ++iteration <= param_max_unroll_iterations);
1509
1510   BITMAP_FREE (father_bbs);
1511
1512   if (irred_invalidated
1513       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1514     mark_irreducible_loops ();
1515
1516   return 0;
1517 }
1518
1519 /* Canonical induction variable creation pass.  */
1520
1521 namespace {
1522
1523 const pass_data pass_data_iv_canon =
1524 {
1525   GIMPLE_PASS, /* type */
1526   "ivcanon", /* name */
1527   OPTGROUP_LOOP, /* optinfo_flags */
1528   TV_TREE_LOOP_IVCANON, /* tv_id */
1529   ( PROP_cfg | PROP_ssa ), /* properties_required */
1530   0, /* properties_provided */
1531   0, /* properties_destroyed */
1532   0, /* todo_flags_start */
1533   0, /* todo_flags_finish */
1534 };
1535
1536 class pass_iv_canon : public gimple_opt_pass
1537 {
1538 public:
1539   pass_iv_canon (gcc::context *ctxt)
1540     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1541   {}
1542
1543   /* opt_pass methods: */
1544   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1545   virtual unsigned int execute (function *fun);
1546
1547 }; // class pass_iv_canon
1548
1549 unsigned int
1550 pass_iv_canon::execute (function *fun)
1551 {
1552   if (number_of_loops (fun) <= 1)
1553     return 0;
1554
1555   return canonicalize_induction_variables ();
1556 }
1557
1558 } // anon namespace
1559
1560 gimple_opt_pass *
1561 make_pass_iv_canon (gcc::context *ctxt)
1562 {
1563   return new pass_iv_canon (ctxt);
1564 }
1565
1566 /* Complete unrolling of loops.  */
1567
1568 namespace {
1569
1570 const pass_data pass_data_complete_unroll =
1571 {
1572   GIMPLE_PASS, /* type */
1573   "cunroll", /* name */
1574   OPTGROUP_LOOP, /* optinfo_flags */
1575   TV_COMPLETE_UNROLL, /* tv_id */
1576   ( PROP_cfg | PROP_ssa ), /* properties_required */
1577   0, /* properties_provided */
1578   0, /* properties_destroyed */
1579   0, /* todo_flags_start */
1580   0, /* todo_flags_finish */
1581 };
1582
1583 class pass_complete_unroll : public gimple_opt_pass
1584 {
1585 public:
1586   pass_complete_unroll (gcc::context *ctxt)
1587     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1588   {}
1589
1590   /* opt_pass methods: */
1591   virtual unsigned int execute (function *);
1592
1593 }; // class pass_complete_unroll
1594
1595 unsigned int
1596 pass_complete_unroll::execute (function *fun)
1597 {
1598   if (number_of_loops (fun) <= 1)
1599     return 0;
1600
1601   /* If we ever decide to run loop peeling more than once, we will need to
1602      track loops already peeled in loop structures themselves to avoid
1603      re-peeling the same loop multiple times.  */
1604   if (flag_peel_loops)
1605     peeled_loops = BITMAP_ALLOC (NULL);
1606   unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
1607                                                    || flag_peel_loops
1608                                                    || optimize >= 3, true);
1609   if (peeled_loops)
1610     {
1611       BITMAP_FREE (peeled_loops);
1612       peeled_loops = NULL;
1613     }
1614   return val;
1615 }
1616
1617 } // anon namespace
1618
1619 gimple_opt_pass *
1620 make_pass_complete_unroll (gcc::context *ctxt)
1621 {
1622   return new pass_complete_unroll (ctxt);
1623 }
1624
1625 /* Complete unrolling of inner loops.  */
1626
1627 namespace {
1628
1629 const pass_data pass_data_complete_unrolli =
1630 {
1631   GIMPLE_PASS, /* type */
1632   "cunrolli", /* name */
1633   OPTGROUP_LOOP, /* optinfo_flags */
1634   TV_COMPLETE_UNROLL, /* tv_id */
1635   ( PROP_cfg | PROP_ssa ), /* properties_required */
1636   0, /* properties_provided */
1637   0, /* properties_destroyed */
1638   0, /* todo_flags_start */
1639   0, /* todo_flags_finish */
1640 };
1641
1642 class pass_complete_unrolli : public gimple_opt_pass
1643 {
1644 public:
1645   pass_complete_unrolli (gcc::context *ctxt)
1646     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1647   {}
1648
1649   /* opt_pass methods: */
1650   virtual bool gate (function *) { return optimize >= 2; }
1651   virtual unsigned int execute (function *);
1652
1653 }; // class pass_complete_unrolli
1654
1655 unsigned int
1656 pass_complete_unrolli::execute (function *fun)
1657 {
1658   unsigned ret = 0;
1659
1660   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1661   if (number_of_loops (fun) > 1)
1662     {
1663       scev_initialize ();
1664       ret = tree_unroll_loops_completely (optimize >= 3, false);
1665       scev_finalize ();
1666     }
1667   loop_optimizer_finalize ();
1668
1669   return ret;
1670 }
1671
1672 } // anon namespace
1673
1674 gimple_opt_pass *
1675 make_pass_complete_unrolli (gcc::context *ctxt)
1676 {
1677   return new pass_complete_unrolli (ctxt);
1678 }
1679
1680