gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    Additionally in case we detect that it is beneficial to unroll the
  32    loop completely, we do it right here to expose the optimization
  33    possibilities to the following passes.  */
  34
  35 #include "config.h"
  36 #include "system.h"
  37 #include "coretypes.h"
  38 #include "tm.h"
  39 #include "tree.h"
  40 #include "tm_p.h"
  41 #include "basic-block.h"
  42 #include "gimple-pretty-print.h"
  43 #include "tree-flow.h"
  44 #include "cfgloop.h"
  45 #include "tree-pass.h"
  46 #include "tree-chrec.h"
  47 #include "tree-scalar-evolution.h"
  48 #include "params.h"
  49 #include "flags.h"
  50 #include "tree-inline.h"
  51 #include "target.h"
  52
  53 /* Specifies types of loops that may be unrolled.  */
  54
  55 enum unroll_level
  56 {
  57   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  58                            iteration.  */
  59   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  60                            of code size.  */
  61   UL_ALL                /* All suitable loops.  */
  62 };
  63
  64 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  65    is the exit edge whose condition is replaced.  */
  66
  67 static void
  68 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  69 {
  70   edge in;
  71   tree type, var;
  72   gimple cond;
  73   gimple_stmt_iterator incr_at;
  74   enum tree_code cmp;
  75
  76   if (dump_file && (dump_flags & TDF_DETAILS))
  77     {
  78       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  79       print_generic_expr (dump_file, niter, TDF_SLIM);
  80       fprintf (dump_file, " iterations.\n");
  81     }
  82
  83   cond = last_stmt (exit->src);
  84   in = EDGE_SUCC (exit->src, 0);
  85   if (in == exit)
  86     in = EDGE_SUCC (exit->src, 1);
  87
  88   /* Note that we do not need to worry about overflows, since
  89      type of niter is always unsigned and all comparisons are
  90      just for equality/nonequality -- i.e. everything works
  91      with a modulo arithmetics.  */
  92
  93   type = TREE_TYPE (niter);
  94   niter = fold_build2 (PLUS_EXPR, type,
  95                        niter,
  96                        build_int_cst (type, 1));
  97   incr_at = gsi_last_bb (in->src);
  98   create_iv (niter,
  99              build_int_cst (type, -1),
 100              NULL_TREE, loop,
 101              &incr_at, false, NULL, &var);
 102
 103   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 104   gimple_cond_set_code (cond, cmp);
 105   gimple_cond_set_lhs (cond, var);
 106   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 107   update_stmt (cond);
 108 }
 109
 110 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 111
 112 unsigned
 113 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 114 {
 115   basic_block *body = get_loop_body (loop);
 116   gimple_stmt_iterator gsi;
 117   unsigned size = 0, i;
 118
 119   for (i = 0; i < loop->num_nodes; i++)
 120     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 121       size += estimate_num_insns (gsi_stmt (gsi), weights);
 122   free (body);
 123
 124   return size;
 125 }
 126
 127 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 128 struct loop_size
 129 {
 130   /* Number of instructions in the loop.  */
 131   int overall;
 132
 133   /* Number of instructions that will be likely optimized out in
 134      peeled iterations of loop  (i.e. computation based on induction
 135      variable where induction variable starts at known constant.)  */
 136   int eliminated_by_peeling;
 137
 138   /* Same statistics for last iteration of loop: it is smaller because
 139      instructions after exit are not executed.  */
 140   int last_iteration;
 141   int last_iteration_eliminated_by_peeling;
 142
 143   /* If some IV computation will become constant.  */
 144   bool constant_iv;
 145
 146   /* Number of call stmts that are not a builtin and are pure or const
 147      present on the hot path.  */
 148   int num_pure_calls_on_hot_path;
 149   /* Number of call stmts that are not a builtin and are not pure nor const
 150      present on the hot path.  */
 151   int num_non_pure_calls_on_hot_path;
 152   /* Number of statements other than calls in the loop.  */
 153   int non_call_stmts_on_hot_path;
 154   /* Number of branches seen on the hot path.  */
 155   int num_branches_on_hot_path;
 156 };
 157
 158 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 159
 160 static bool
 161 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 162 {
 163   affine_iv iv;
 164
 165   if (is_gimple_min_invariant (op))
 166     return true;
 167
 168   /* We can still fold accesses to constant arrays when index is known.  */
 169   if (TREE_CODE (op) != SSA_NAME)
 170     {
 171       tree base = op;
 172
 173       /* First make fast look if we see constant array inside.  */
 174       while (handled_component_p (base))
 175         base = TREE_OPERAND (base, 0);
 176       if ((DECL_P (base)
 177            && const_value_known_p (base))
 178           || CONSTANT_CLASS_P (base))
 179         {
 180           /* If so, see if we understand all the indices.  */
 181           base = op;
 182           while (handled_component_p (base))
 183             {
 184               if (TREE_CODE (base) == ARRAY_REF
 185                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 186                 return false;
 187               base = TREE_OPERAND (base, 0);
 188             }
 189           return true;
 190         }
 191       return false;
 192     }
 193
 194   /* Induction variables are constants.  */
 195   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 196     return false;
 197   if (!is_gimple_min_invariant (iv.base))
 198     return false;
 199   if (!is_gimple_min_invariant (iv.step))
 200     return false;
 201   return true;
 202 }
 203
 204 /* Computes an estimated number of insns in LOOP.
 205    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 206    iteration of the loop.
 207    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 208    of loop.
 209    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 210    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 211
 212 static bool
 213 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
 214                          int upper_bound)
 215 {
 216   basic_block *body = get_loop_body (loop);
 217   gimple_stmt_iterator gsi;
 218   unsigned int i;
 219   bool after_exit;
 220   vec<basic_block> path = get_loop_hot_path (loop);
 221
 222   size->overall = 0;
 223   size->eliminated_by_peeling = 0;
 224   size->last_iteration = 0;
 225   size->last_iteration_eliminated_by_peeling = 0;
 226   size->num_pure_calls_on_hot_path = 0;
 227   size->num_non_pure_calls_on_hot_path = 0;
 228   size->non_call_stmts_on_hot_path = 0;
 229   size->num_branches_on_hot_path = 0;
 230   size->constant_iv = 0;
 231
 232   if (dump_file && (dump_flags & TDF_DETAILS))
 233     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 234   for (i = 0; i < loop->num_nodes; i++)
 235     {
 236       if (edge_to_cancel && body[i] != edge_to_cancel->src
 237           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 238         after_exit = true;
 239       else
 240         after_exit = false;
 241       if (dump_file && (dump_flags & TDF_DETAILS))
 242         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 243
 244       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 245         {
 246           gimple stmt = gsi_stmt (gsi);
 247           int num = estimate_num_insns (stmt, &eni_size_weights);
 248           bool likely_eliminated = false;
 249           bool likely_eliminated_last = false;
 250           bool likely_eliminated_peeled = false;
 251
 252           if (dump_file && (dump_flags & TDF_DETAILS))
 253             {
 254               fprintf (dump_file, "  size: %3i ", num);
 255               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 256             }
 257
 258           /* Look for reasons why we might optimize this stmt away. */
 259
 260           /* Exit conditional.  */
 261           if (exit && body[i] == exit->src
 262                    && stmt == last_stmt (exit->src))
 263             {
 264               if (dump_file && (dump_flags & TDF_DETAILS))
 265                 fprintf (dump_file, "   Exit condition will be eliminated "
 266                          "in peeled copies.\n");
 267               likely_eliminated_peeled = true;
 268             }
 269           else if (edge_to_cancel && body[i] == edge_to_cancel->src
 270                    && stmt == last_stmt (edge_to_cancel->src))
 271             {
 272               if (dump_file && (dump_flags & TDF_DETAILS))
 273                 fprintf (dump_file, "   Exit condition will be eliminated "
 274                          "in last copy.\n");
 275               likely_eliminated_last = true;
 276             }
 277           /* Sets of IV variables  */
 278           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 279               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 280             {
 281               if (dump_file && (dump_flags & TDF_DETAILS))
 282                 fprintf (dump_file, "   Induction variable computation will"
 283                          " be folded away.\n");
 284               likely_eliminated = true;
 285             }
 286           /* Assignments of IV variables.  */
 287           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 288                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 289                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
 290                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 291                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 292                                                   stmt, loop)))
 293             {
 294               size->constant_iv = true;
 295               if (dump_file && (dump_flags & TDF_DETAILS))
 296                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 297               likely_eliminated = true;
 298             }
 299           /* Conditionals.  */
 300           else if ((gimple_code (stmt) == GIMPLE_COND
 301                     && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 302                     && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 303                    || (gimple_code (stmt) == GIMPLE_SWITCH
 304                        && constant_after_peeling (gimple_switch_index (stmt), stmt, loop)))
 305             {
 306               if (dump_file && (dump_flags & TDF_DETAILS))
 307                 fprintf (dump_file, "   Constant conditional.\n");
 308               likely_eliminated = true;
 309             }
 310
 311           size->overall += num;
 312           if (likely_eliminated || likely_eliminated_peeled)
 313             size->eliminated_by_peeling += num;
 314           if (!after_exit)
 315             {
 316               size->last_iteration += num;
 317               if (likely_eliminated || likely_eliminated_last)
 318                 size->last_iteration_eliminated_by_peeling += num;
 319             }
 320           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 321               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 322             {
 323               free (body);
 324               path.release ();
 325               return true;
 326             }
 327         }
 328     }
 329   while (path.length ())
 330     {
 331       basic_block bb = path.pop ();
 332       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 333         {
 334           gimple stmt = gsi_stmt (gsi);
 335           if (gimple_code (stmt) == GIMPLE_CALL)
 336             {
 337               int flags = gimple_call_flags (stmt);
 338               tree decl = gimple_call_fndecl (stmt);
 339
 340               if (decl && DECL_IS_BUILTIN (decl)
 341                   && is_inexpensive_builtin (decl))
 342                 ;
 343               else if (flags & (ECF_PURE | ECF_CONST))
 344                 size->num_pure_calls_on_hot_path++;
 345               else
 346                 size->num_non_pure_calls_on_hot_path++;
 347               size->num_branches_on_hot_path ++;
 348             }
 349           else if (gimple_code (stmt) != GIMPLE_CALL
 350                    && gimple_code (stmt) != GIMPLE_DEBUG)
 351             size->non_call_stmts_on_hot_path++;
 352           if (((gimple_code (stmt) == GIMPLE_COND
 353                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 354                     || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
 355                || (gimple_code (stmt) == GIMPLE_SWITCH
 356                    && !constant_after_peeling (gimple_switch_index (stmt), stmt, loop)))
 357               && (!exit || bb != exit->src))
 358             size->num_branches_on_hot_path++;
 359         }
 360     }
 361   path.release ();
 362   if (dump_file && (dump_flags & TDF_DETAILS))
 363     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 364              size->eliminated_by_peeling, size->last_iteration,
 365              size->last_iteration_eliminated_by_peeling);
 366
 367   free (body);
 368   return false;
 369 }
 370
 371 /* Estimate number of insns of completely unrolled loop.
 372    It is (NUNROLL + 1) * size of loop body with taking into account
 373    the fact that in last copy everything after exit conditional
 374    is dead and that some instructions will be eliminated after
 375    peeling.
 376
 377    Loop body is likely going to simplify futher, this is difficult
 378    to guess, we just decrease the result by 1/3.  */
 379
 380 static unsigned HOST_WIDE_INT
 381 estimated_unrolled_size (struct loop_size *size,
 382                          unsigned HOST_WIDE_INT nunroll)
 383 {
 384   HOST_WIDE_INT unr_insns = ((nunroll)
 385                              * (HOST_WIDE_INT) (size->overall
 386                                                 - size->eliminated_by_peeling));
 387   if (!nunroll)
 388     unr_insns = 0;
 389   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 390
 391   unr_insns = unr_insns * 2 / 3;
 392   if (unr_insns <= 0)
 393     unr_insns = 1;
 394
 395   return unr_insns;
 396 }
 397
 398 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 399    body that can be remove to make the loop to always exit and at
 400    the same time it does not make any code potentially executed
 401    during the last iteration dead.
 402
 403    After complette unrolling we still may get rid of the conditional
 404    on the exit in the last copy even if we have no idea what it does.
 405    This is quite common case for loops of form
 406
 407      int a[5];
 408      for (i=0;i<b;i++)
 409        a[i]=0;
 410
 411    Here we prove the loop to iterate 5 times but we do not know
 412    it from induction variable.
 413
 414    For now we handle only simple case where there is exit condition
 415    just before the latch block and the latch block contains no statements
 416    with side effect that may otherwise terminate the execution of loop
 417    (such as by EH or by terminating the program or longjmp).
 418
 419    In the general case we may want to cancel the paths leading to statements
 420    loop-niter identified as having undefined effect in the last iteration.
 421    The other cases are hopefully rare and will be cleaned up later.  */
 422
 423 edge
 424 loop_edge_to_cancel (struct loop *loop)
 425 {
 426   vec<edge> exits;
 427   unsigned i;
 428   edge edge_to_cancel;
 429   gimple_stmt_iterator gsi;
 430
 431   /* We want only one predecestor of the loop.  */
 432   if (EDGE_COUNT (loop->latch->preds) > 1)
 433     return NULL;
 434
 435   exits = get_loop_exit_edges (loop);
 436
 437   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 438     {
 439        /* Find the other edge than the loop exit
 440           leaving the conditoinal.  */
 441        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 442          continue;
 443        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 444          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 445        else
 446          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 447
 448       /* We only can handle conditionals.  */
 449       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 450         continue;
 451
 452       /* We should never have conditionals in the loop latch. */
 453       gcc_assert (edge_to_cancel->dest != loop->header);
 454
 455       /* Check that it leads to loop latch.  */
 456       if (edge_to_cancel->dest != loop->latch)
 457         continue;
 458
 459       exits.release ();
 460
 461       /* Verify that the code in loop latch does nothing that may end program
 462          execution without really reaching the exit.  This may include
 463          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 464       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 465         if (gimple_has_side_effects (gsi_stmt (gsi)))
 466            return NULL;
 467       return edge_to_cancel;
 468     }
 469   exits.release ();
 470   return NULL;
 471 }
 472
 473 /* Remove all tests for exits that are known to be taken after LOOP was
 474    peeled NPEELED times. Put gcc_unreachable before every statement
 475    known to not be executed.  */
 476
 477 static bool
 478 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
 479 {
 480   struct nb_iter_bound *elt;
 481   bool changed = false;
 482
 483   for (elt = loop->bounds; elt; elt = elt->next)
 484     {
 485       /* If statement is known to be undefined after peeling, turn it
 486          into unreachable (or trap when debugging experience is supposed
 487          to be good).  */
 488       if (!elt->is_exit
 489           && elt->bound.ult (double_int::from_uhwi (npeeled)))
 490         {
 491           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 492           gimple stmt = gimple_build_call
 493               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 494
 495           gimple_set_location (stmt, gimple_location (elt->stmt));
 496           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 497           changed = true;
 498           if (dump_file && (dump_flags & TDF_DETAILS))
 499             {
 500               fprintf (dump_file, "Forced statement unreachable: ");
 501               print_gimple_stmt (dump_file, elt->stmt, 0, 0);
 502             }
 503         }
 504       /* If we know the exit will be taken after peeling, update.  */
 505       else if (elt->is_exit
 506                && elt->bound.ule (double_int::from_uhwi (npeeled)))
 507         {
 508           basic_block bb = gimple_bb (elt->stmt);
 509           edge exit_edge = EDGE_SUCC (bb, 0);
 510
 511           if (dump_file && (dump_flags & TDF_DETAILS))
 512             {
 513               fprintf (dump_file, "Forced exit to be taken: ");
 514               print_gimple_stmt (dump_file, elt->stmt, 0, 0);
 515             }
 516           if (!loop_exit_edge_p (loop, exit_edge))
 517             exit_edge = EDGE_SUCC (bb, 1);
 518           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 519           if (exit_edge->flags & EDGE_TRUE_VALUE)
 520             gimple_cond_make_true (elt->stmt);
 521           else
 522             gimple_cond_make_false (elt->stmt);
 523           update_stmt (elt->stmt);
 524           changed = true;
 525         }
 526     }
 527   return changed;
 528 }
 529
 530 /* Remove all exits that are known to be never taken because of the loop bound
 531    discovered.  */
 532
 533 static bool
 534 remove_redundant_iv_tests (struct loop *loop)
 535 {
 536   struct nb_iter_bound *elt;
 537   bool changed = false;
 538
 539   if (!loop->any_upper_bound)
 540     return false;
 541   for (elt = loop->bounds; elt; elt = elt->next)
 542     {
 543       /* Exit is pointless if it won't be taken before loop reaches
 544          upper bound.  */
 545       if (elt->is_exit && loop->any_upper_bound
 546           && loop->nb_iterations_upper_bound.ult (elt->bound))
 547         {
 548           basic_block bb = gimple_bb (elt->stmt);
 549           edge exit_edge = EDGE_SUCC (bb, 0);
 550           struct tree_niter_desc niter;
 551
 552           if (!loop_exit_edge_p (loop, exit_edge))
 553             exit_edge = EDGE_SUCC (bb, 1);
 554
 555           /* Only when we know the actual number of iterations, not
 556              just a bound, we can remove the exit.  */
 557           if (!number_of_iterations_exit (loop, exit_edge,
 558                                           &niter, false, false)
 559               || !integer_onep (niter.assumptions)
 560               || !integer_zerop (niter.may_be_zero)
 561               || !niter.niter
 562               || TREE_CODE (niter.niter) != INTEGER_CST
 563               || !loop->nb_iterations_upper_bound.ult
 564                    (tree_to_double_int (niter.niter)))
 565             continue;
 566
 567           if (dump_file && (dump_flags & TDF_DETAILS))
 568             {
 569               fprintf (dump_file, "Removed pointless exit: ");
 570               print_gimple_stmt (dump_file, elt->stmt, 0, 0);
 571             }
 572           if (exit_edge->flags & EDGE_TRUE_VALUE)
 573             gimple_cond_make_false (elt->stmt);
 574           else
 575             gimple_cond_make_true (elt->stmt);
 576           update_stmt (elt->stmt);
 577           changed = true;
 578         }
 579     }
 580   return changed;
 581 }
 582
 583 /* Stores loops that will be unlooped after we process whole loop tree. */
 584 static vec<loop_p> loops_to_unloop;
 585 static vec<int> loops_to_unloop_nunroll;
 586
 587 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 588    on the latch edge.
 589    We do it after all unrolling since unlooping moves basic blocks
 590    across loop boundaries trashing loop closed SSA form as well
 591    as SCEV info needed to be intact during unrolling.
 592
 593    IRRED_INVALIDATED is used to bookkeep if information about
 594    irreducible regions may become invalid as a result
 595    of the transformation.
 596    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 597    when we need to go into loop closed SSA form.  */
 598
 599 void
 600 unloop_loops (bitmap loop_closed_ssa_invalidated,
 601               bool *irred_invalidated)
 602 {
 603   while (loops_to_unloop.length ())
 604     {
 605       struct loop *loop = loops_to_unloop.pop ();
 606       int n_unroll = loops_to_unloop_nunroll.pop ();
 607       basic_block latch = loop->latch;
 608       edge latch_edge = loop_latch_edge (loop);
 609       int flags = latch_edge->flags;
 610       location_t locus = latch_edge->goto_locus;
 611       gimple stmt;
 612       gimple_stmt_iterator gsi;
 613
 614       remove_exits_and_undefined_stmts (loop, n_unroll);
 615
 616       /* Unloop destroys the latch edge.  */
 617       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 618
 619       /* Create new basic block for the latch edge destination and wire
 620          it in.  */
 621       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 622       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 623       latch_edge->probability = 0;
 624       latch_edge->count = 0;
 625       latch_edge->flags |= flags;
 626       latch_edge->goto_locus = locus;
 627
 628       latch_edge->dest->loop_father = current_loops->tree_root;
 629       latch_edge->dest->count = 0;
 630       latch_edge->dest->frequency = 0;
 631       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 632
 633       gsi = gsi_start_bb (latch_edge->dest);
 634       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 635     }
 636   loops_to_unloop.release ();
 637   loops_to_unloop_nunroll.release ();
 638 }
 639
 640 /* Tries to unroll LOOP completely, i.e. NITER times.
 641    UL determines which loops we are allowed to unroll.
 642    EXIT is the exit of the loop that should be eliminated.
 643    MAXITER specfy bound on number of iterations, -1 if it is
 644    not known or too large for HOST_WIDE_INT.  The location
 645    LOCUS corresponding to the loop is used when emitting
 646    a summary of the unroll to the dump file.  */
 647
 648 static bool
 649 try_unroll_loop_completely (struct loop *loop,
 650                             edge exit, tree niter,
 651                             enum unroll_level ul,
 652                             HOST_WIDE_INT maxiter,
 653                             location_t locus)
 654 {
 655   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 656   gimple cond;
 657   struct loop_size size;
 658   bool n_unroll_found = false;
 659   edge edge_to_cancel = NULL;
 660
 661   /* See if we proved number of iterations to be low constant.
 662
 663      EXIT is an edge that will be removed in all but last iteration of
 664      the loop.
 665
 666      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 667      of the unrolled sequence and is expected to make the final loop not
 668      rolling.
 669
 670      If the number of execution of loop is determined by standard induction
 671      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 672      from the iv test.  */
 673   if (host_integerp (niter, 1))
 674     {
 675       n_unroll = tree_low_cst (niter, 1);
 676       n_unroll_found = true;
 677       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 678       if (edge_to_cancel == exit)
 679         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 680     }
 681   /* We do not know the number of iterations and thus we can not eliminate
 682      the EXIT edge.  */
 683   else
 684     exit = NULL;
 685
 686   /* See if we can improve our estimate by using recorded loop bounds.  */
 687   if (maxiter >= 0
 688       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 689     {
 690       n_unroll = maxiter;
 691       n_unroll_found = true;
 692       /* Loop terminates before the IV variable test, so we can not
 693          remove it in the last iteration.  */
 694       edge_to_cancel = NULL;
 695     }
 696
 697   if (!n_unroll_found)
 698     return false;
 699
 700   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 701   if (n_unroll > max_unroll)
 702     return false;
 703
 704   if (!edge_to_cancel)
 705     edge_to_cancel = loop_edge_to_cancel (loop);
 706
 707   if (n_unroll)
 708     {
 709       sbitmap wont_exit;
 710       edge e;
 711       unsigned i;
 712       bool large;
 713       vec<edge> to_remove = vNULL;
 714       if (ul == UL_SINGLE_ITER)
 715         return false;
 716
 717       large = tree_estimate_loop_size
 718                  (loop, exit, edge_to_cancel, &size,
 719                   PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
 720       ninsns = size.overall;
 721       if (large)
 722         {
 723           if (dump_file && (dump_flags & TDF_DETAILS))
 724             fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 725                      loop->num);
 726           return false;
 727         }
 728
 729       unr_insns = estimated_unrolled_size (&size, n_unroll);
 730       if (dump_file && (dump_flags & TDF_DETAILS))
 731         {
 732           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 733           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 734                    (int) unr_insns);
 735         }
 736
 737       /* If the code is going to shrink, we don't need to be extra cautious
 738          on guessing if the unrolling is going to be profitable.  */
 739       if (unr_insns
 740           /* If there is IV variable that will become constant, we save
 741              one instruction in the loop prologue we do not account
 742              otherwise.  */
 743           <= ninsns + (size.constant_iv != false))
 744         ;
 745       /* We unroll only inner loops, because we do not consider it profitable
 746          otheriwse.  We still can cancel loopback edge of not rolling loop;
 747          this is always a good idea.  */
 748       else if (ul == UL_NO_GROWTH)
 749         {
 750           if (dump_file && (dump_flags & TDF_DETAILS))
 751             fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 752                      loop->num);
 753           return false;
 754         }
 755       /* Outer loops tend to be less interesting candidates for complette
 756          unrolling unless we can do a lot of propagation into the inner loop
 757          body.  For now we disable outer loop unrolling when the code would
 758          grow.  */
 759       else if (loop->inner)
 760         {
 761           if (dump_file && (dump_flags & TDF_DETAILS))
 762             fprintf (dump_file, "Not unrolling loop %d: "
 763                      "it is not innermost and code would grow.\n",
 764                      loop->num);
 765           return false;
 766         }
 767       /* If there is call on a hot path through the loop, then
 768          there is most probably not much to optimize.  */
 769       else if (size.num_non_pure_calls_on_hot_path)
 770         {
 771           if (dump_file && (dump_flags & TDF_DETAILS))
 772             fprintf (dump_file, "Not unrolling loop %d: "
 773                      "contains call and code would grow.\n",
 774                      loop->num);
 775           return false;
 776         }
 777       /* If there is pure/const call in the function, then we
 778          can still optimize the unrolled loop body if it contains
 779          some other interesting code than the calls and code
 780          storing or cumulating the return value.  */
 781       else if (size.num_pure_calls_on_hot_path
 782                /* One IV increment, one test, one ivtmp store
 783                   and one usefull stmt.  That is about minimal loop
 784                   doing pure call.  */
 785                && (size.non_call_stmts_on_hot_path
 786                    <= 3 + size.num_pure_calls_on_hot_path))
 787         {
 788           if (dump_file && (dump_flags & TDF_DETAILS))
 789             fprintf (dump_file, "Not unrolling loop %d: "
 790                      "contains just pure calls and code would grow.\n",
 791                      loop->num);
 792           return false;
 793         }
 794       /* Complette unrolling is major win when control flow is removed and
 795          one big basic block is created.  If the loop contains control flow
 796          the optimization may still be a win because of eliminating the loop
 797          overhead but it also may blow the branch predictor tables.
 798          Limit number of branches on the hot path through the peeled
 799          sequence.  */
 800       else if (size.num_branches_on_hot_path * (int)n_unroll
 801                > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
 802         {
 803           if (dump_file && (dump_flags & TDF_DETAILS))
 804             fprintf (dump_file, "Not unrolling loop %d: "
 805                      " number of branches on hot path in the unrolled sequence"
 806                      " reach --param max-peel-branches limit.\n",
 807                      loop->num);
 808           return false;
 809         }
 810       else if (unr_insns
 811                > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
 812         {
 813           if (dump_file && (dump_flags & TDF_DETAILS))
 814             fprintf (dump_file, "Not unrolling loop %d: "
 815                      "(--param max-completely-peeled-insns limit reached).\n",
 816                      loop->num);
 817           return false;
 818         }
 819
 820       initialize_original_copy_tables ();
 821       wont_exit = sbitmap_alloc (n_unroll + 1);
 822       bitmap_ones (wont_exit);
 823       bitmap_clear_bit (wont_exit, 0);
 824
 825       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 826                                                  n_unroll, wont_exit,
 827                                                  exit, &to_remove,
 828                                                  DLTHE_FLAG_UPDATE_FREQ
 829                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 830         {
 831           free_original_copy_tables ();
 832           free (wont_exit);
 833           if (dump_file && (dump_flags & TDF_DETAILS))
 834             fprintf (dump_file, "Failed to duplicate the loop\n");
 835           return false;
 836         }
 837
 838       FOR_EACH_VEC_ELT (to_remove, i, e)
 839         {
 840           bool ok = remove_path (e);
 841           gcc_assert (ok);
 842         }
 843
 844       to_remove.release ();
 845       free (wont_exit);
 846       free_original_copy_tables ();
 847     }
 848
 849
 850   /* Remove the conditional from the last copy of the loop.  */
 851   if (edge_to_cancel)
 852     {
 853       cond = last_stmt (edge_to_cancel->src);
 854       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 855         gimple_cond_make_false (cond);
 856       else
 857         gimple_cond_make_true (cond);
 858       update_stmt (cond);
 859       /* Do not remove the path. Doing so may remove outer loop
 860          and confuse bookkeeping code in tree_unroll_loops_completelly.  */
 861     }
 862
 863   /* Store the loop for later unlooping and exit removal.  */
 864   loops_to_unloop.safe_push (loop);
 865   loops_to_unloop_nunroll.safe_push (n_unroll);
 866
 867   if (dump_enabled_p ())
 868     {
 869       if (!n_unroll)
 870         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 871                          "Turned loop into non-loop; it never loops.\n");
 872       else
 873         {
 874           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 875                            "Completely unroll loop %d times", (int)n_unroll);
 876           if (profile_info)
 877             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 878                          " (header execution count %d)",
 879                          (int)loop->header->count);
 880           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 881         }
 882     }
 883
 884   if (dump_file && (dump_flags & TDF_DETAILS))
 885     {
 886       if (exit)
 887         fprintf (dump_file, "Exit condition of peeled iterations was "
 888                  "eliminated.\n");
 889       if (edge_to_cancel)
 890         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 891       else
 892         fprintf (dump_file, "Latch of last iteration was marked by "
 893                  "__builtin_unreachable ().\n");
 894     }
 895
 896   return true;
 897 }
 898
 899 /* Adds a canonical induction variable to LOOP if suitable.
 900    CREATE_IV is true if we may create a new iv.  UL determines
 901    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 902    to determine the number of iterations of a loop by direct evaluation.
 903    Returns true if cfg is changed.   */
 904
 905 static bool
 906 canonicalize_loop_induction_variables (struct loop *loop,
 907                                        bool create_iv, enum unroll_level ul,
 908                                        bool try_eval)
 909 {
 910   edge exit = NULL;
 911   tree niter;
 912   HOST_WIDE_INT maxiter;
 913   bool modified = false;
 914   location_t locus = UNKNOWN_LOCATION;
 915
 916   niter = number_of_latch_executions (loop);
 917   exit = single_exit (loop);
 918   if (TREE_CODE (niter) == INTEGER_CST)
 919     locus = gimple_location (last_stmt (exit->src));
 920   else
 921     {
 922       /* If the loop has more than one exit, try checking all of them
 923          for # of iterations determinable through scev.  */
 924       if (!exit)
 925         niter = find_loop_niter (loop, &exit);
 926
 927       /* Finally if everything else fails, try brute force evaluation.  */
 928       if (try_eval
 929           && (chrec_contains_undetermined (niter)
 930               || TREE_CODE (niter) != INTEGER_CST))
 931         niter = find_loop_niter_by_eval (loop, &exit);
 932
 933       if (exit)
 934         locus = gimple_location (last_stmt (exit->src));
 935
 936       if (TREE_CODE (niter) != INTEGER_CST)
 937         exit = NULL;
 938     }
 939
 940   /* We work exceptionally hard here to estimate the bound
 941      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
 942   if (niter && TREE_CODE (niter) == INTEGER_CST)
 943     {
 944       record_niter_bound (loop, tree_to_double_int (niter),
 945                           exit == single_likely_exit (loop), true);
 946     }
 947
 948   /* Force re-computation of loop bounds so we can remove redundant exits.  */
 949   maxiter = max_loop_iterations_int (loop);
 950
 951   if (dump_file && (dump_flags & TDF_DETAILS)
 952       && TREE_CODE (niter) == INTEGER_CST)
 953     {
 954       fprintf (dump_file, "Loop %d iterates ", loop->num);
 955       print_generic_expr (dump_file, niter, TDF_SLIM);
 956       fprintf (dump_file, " times.\n");
 957     }
 958   if (dump_file && (dump_flags & TDF_DETAILS)
 959       && maxiter >= 0)
 960     {
 961       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
 962                (int)maxiter);
 963     }
 964
 965   /* Remove exits that are known to be never taken based on loop bound.
 966      Needs to be called after compilation of max_loop_iterations_int that
 967      populates the loop bounds.  */
 968   modified |= remove_redundant_iv_tests (loop);
 969
 970   if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
 971     return true;
 972
 973   if (create_iv
 974       && niter && !chrec_contains_undetermined (niter)
 975       && exit && just_once_each_iteration_p (loop, exit->src))
 976     create_canonical_iv (loop, exit, niter);
 977
 978   return modified;
 979 }
 980
 981 /* The main entry point of the pass.  Adds canonical induction variables
 982    to the suitable loops.  */
 983
 984 unsigned int
 985 canonicalize_induction_variables (void)
 986 {
 987   loop_iterator li;
 988   struct loop *loop;
 989   bool changed = false;
 990   bool irred_invalidated = false;
 991   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 992
 993   free_numbers_of_iterations_estimates ();
 994   estimate_numbers_of_iterations ();
 995
 996   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 997     {
 998       changed |= canonicalize_loop_induction_variables (loop,
 999                                                         true, UL_SINGLE_ITER,
1000                                                         true);
1001     }
1002   gcc_assert (!need_ssa_update_p (cfun));
1003
1004   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1005   if (irred_invalidated
1006       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1007     mark_irreducible_loops ();
1008
1009   /* Clean up the information about numbers of iterations, since brute force
1010      evaluation could reveal new information.  */
1011   scev_reset ();
1012
1013   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1014     {
1015       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1016       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1017     }
1018   BITMAP_FREE (loop_closed_ssa_invalidated);
1019
1020   if (changed)
1021     return TODO_cleanup_cfg;
1022   return 0;
1023 }
1024
1025 /* Propagate VAL into all uses of SSA_NAME.  */
1026
1027 static void
1028 propagate_into_all_uses (tree ssa_name, tree val)
1029 {
1030   imm_use_iterator iter;
1031   gimple use_stmt;
1032
1033   FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name)
1034     {
1035       gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt);
1036       use_operand_p use;
1037
1038       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1039         SET_USE (use, val);
1040
1041       if (is_gimple_assign (use_stmt)
1042           && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt))
1043              == GIMPLE_SINGLE_RHS)
1044         {
1045           tree rhs = gimple_assign_rhs1 (use_stmt);
1046
1047           if (TREE_CODE (rhs) == ADDR_EXPR)
1048             recompute_tree_invariant_for_addr_expr (rhs);
1049         }
1050
1051       fold_stmt_inplace (&use_stmt_gsi);
1052       update_stmt (use_stmt);
1053       maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt);
1054     }
1055 }
1056
1057 /* Propagate constant SSA_NAMEs defined in basic block BB.  */
1058
1059 static void
1060 propagate_constants_for_unrolling (basic_block bb)
1061 {
1062   gimple_stmt_iterator gsi;
1063
1064   /* Look for degenerate PHI nodes with constant argument.  */
1065   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1066     {
1067       gimple phi = gsi_stmt (gsi);
1068       tree result = gimple_phi_result (phi);
1069       tree arg = gimple_phi_arg_def (phi, 0);
1070
1071       if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST)
1072         {
1073           propagate_into_all_uses (result, arg);
1074           gsi_remove (&gsi, true);
1075           release_ssa_name (result);
1076         }
1077       else
1078         gsi_next (&gsi);
1079     }
1080
1081   /* Look for assignments to SSA names with constant RHS.  */
1082   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1083     {
1084       gimple stmt = gsi_stmt (gsi);
1085       tree lhs;
1086
1087       if (is_gimple_assign (stmt)
1088           && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1089           && gimple_assign_rhs_code (stmt) == INTEGER_CST)
1090         {
1091           propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt));
1092           gsi_remove (&gsi, true);
1093           release_ssa_name (lhs);
1094         }
1095       else
1096         gsi_next (&gsi);
1097     }
1098 }
1099
1100 /* Unroll LOOPS completely if they iterate just few times.  Unless
1101    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1102    size of the code does not increase.  */
1103
1104 unsigned int
1105 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1106 {
1107   vec<loop_p, va_stack> father_stack;
1108   loop_iterator li;
1109   struct loop *loop;
1110   bool changed;
1111   enum unroll_level ul;
1112   int iteration = 0;
1113   bool irred_invalidated = false;
1114
1115   vec_stack_alloc (loop_p, father_stack, 16);
1116   do
1117     {
1118       changed = false;
1119       bitmap loop_closed_ssa_invalidated = NULL;
1120
1121       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1122         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1123
1124       free_numbers_of_iterations_estimates ();
1125       estimate_numbers_of_iterations ();
1126
1127       FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1128         {
1129           struct loop *loop_father = loop_outer (loop);
1130
1131           if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1132               /* Unroll outermost loops only if asked to do so or they do
1133                  not cause code growth.  */
1134               && (unroll_outer || loop_outer (loop_father)))
1135             ul = UL_ALL;
1136           else
1137             ul = UL_NO_GROWTH;
1138
1139           if (canonicalize_loop_induction_variables
1140                  (loop, false, ul, !flag_tree_loop_ivcanon))
1141             {
1142               changed = true;
1143               /* If we'll continue unrolling, we need to propagate constants
1144                  within the new basic blocks to fold away induction variable
1145                  computations; otherwise, the size might blow up before the
1146                  iteration is complete and the IR eventually cleaned up.  */
1147               if (loop_outer (loop_father) && !loop_father->aux)
1148                 {
1149                   father_stack.safe_push (loop_father);
1150                   loop_father->aux = loop_father;
1151                 }
1152             }
1153         }
1154
1155       if (changed)
1156         {
1157           struct loop **iter;
1158           unsigned i;
1159
1160           /* Be sure to skip unlooped loops while procesing father_stack
1161              array.  */
1162           FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
1163             (*iter)->aux = NULL;
1164           FOR_EACH_VEC_ELT (father_stack, i, iter)
1165             if (!(*iter)->aux)
1166               *iter = NULL;
1167           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1168
1169           /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
1170           if (loop_closed_ssa_invalidated
1171               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1172             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1173                                           TODO_update_ssa);
1174           else
1175             update_ssa (TODO_update_ssa);
1176
1177           /* Propagate the constants within the new basic blocks.  */
1178           FOR_EACH_VEC_ELT (father_stack, i, iter)
1179             if (*iter)
1180               {
1181                 unsigned j;
1182                 basic_block *body = get_loop_body_in_dom_order (*iter);
1183                 for (j = 0; j < (*iter)->num_nodes; j++)
1184                   propagate_constants_for_unrolling (body[j]);
1185                 free (body);
1186                 (*iter)->aux = NULL;
1187               }
1188           father_stack.truncate (0);
1189
1190           /* This will take care of removing completely unrolled loops
1191              from the loop structures so we can continue unrolling now
1192              innermost loops.  */
1193           if (cleanup_tree_cfg ())
1194             update_ssa (TODO_update_ssa_only_virtuals);
1195
1196           /* Clean up the information about numbers of iterations, since
1197              complete unrolling might have invalidated it.  */
1198           scev_reset ();
1199 #ifdef ENABLE_CHECKING
1200           if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1201             verify_loop_closed_ssa (true);
1202 #endif
1203         }
1204       if (loop_closed_ssa_invalidated)
1205         BITMAP_FREE (loop_closed_ssa_invalidated);
1206     }
1207   while (changed
1208          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1209
1210   father_stack.release ();
1211
1212   if (irred_invalidated
1213       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1214     mark_irreducible_loops ();
1215
1216   return 0;
1217 }