gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "params.h"
  63 #include "tree-inline.h"
  64 #include "tree-cfgcleanup.h"
  65 #include "builtins.h"
  66
  67 /* Specifies types of loops that may be unrolled.  */
  68
  69 enum unroll_level
  70 {
  71   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  72                            iteration.  */
  73   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  74                            of code size.  */
  75   UL_ALL                /* All suitable loops.  */
  76 };
  77
  78 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  79    is the exit edge whose condition is replaced.  */
  80
  81 static void
  82 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  83 {
  84   edge in;
  85   tree type, var;
  86   gcond *cond;
  87   gimple_stmt_iterator incr_at;
  88   enum tree_code cmp;
  89
  90   if (dump_file && (dump_flags & TDF_DETAILS))
  91     {
  92       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  93       print_generic_expr (dump_file, niter, TDF_SLIM);
  94       fprintf (dump_file, " iterations.\n");
  95     }
  96
  97   cond = as_a <gcond *> (last_stmt (exit->src));
  98   in = EDGE_SUCC (exit->src, 0);
  99   if (in == exit)
 100     in = EDGE_SUCC (exit->src, 1);
 101
 102   /* Note that we do not need to worry about overflows, since
 103      type of niter is always unsigned and all comparisons are
 104      just for equality/nonequality -- i.e. everything works
 105      with a modulo arithmetics.  */
 106
 107   type = TREE_TYPE (niter);
 108   niter = fold_build2 (PLUS_EXPR, type,
 109                        niter,
 110                        build_int_cst (type, 1));
 111   incr_at = gsi_last_bb (in->src);
 112   create_iv (niter,
 113              build_int_cst (type, -1),
 114              NULL_TREE, loop,
 115              &incr_at, false, NULL, &var);
 116
 117   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 118   gimple_cond_set_code (cond, cmp);
 119   gimple_cond_set_lhs (cond, var);
 120   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 121   update_stmt (cond);
 122 }
 123
 124 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 125 struct loop_size
 126 {
 127   /* Number of instructions in the loop.  */
 128   int overall;
 129
 130   /* Number of instructions that will be likely optimized out in
 131      peeled iterations of loop  (i.e. computation based on induction
 132      variable where induction variable starts at known constant.)  */
 133   int eliminated_by_peeling;
 134
 135   /* Same statistics for last iteration of loop: it is smaller because
 136      instructions after exit are not executed.  */
 137   int last_iteration;
 138   int last_iteration_eliminated_by_peeling;
 139
 140   /* If some IV computation will become constant.  */
 141   bool constant_iv;
 142
 143   /* Number of call stmts that are not a builtin and are pure or const
 144      present on the hot path.  */
 145   int num_pure_calls_on_hot_path;
 146   /* Number of call stmts that are not a builtin and are not pure nor const
 147      present on the hot path.  */
 148   int num_non_pure_calls_on_hot_path;
 149   /* Number of statements other than calls in the loop.  */
 150   int non_call_stmts_on_hot_path;
 151   /* Number of branches seen on the hot path.  */
 152   int num_branches_on_hot_path;
 153 };
 154
 155 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 156
 157 static bool
 158 constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
 159 {
 160   if (is_gimple_min_invariant (op))
 161     return true;
 162
 163   /* We can still fold accesses to constant arrays when index is known.  */
 164   if (TREE_CODE (op) != SSA_NAME)
 165     {
 166       tree base = op;
 167
 168       /* First make fast look if we see constant array inside.  */
 169       while (handled_component_p (base))
 170         base = TREE_OPERAND (base, 0);
 171       if ((DECL_P (base)
 172            && ctor_for_folding (base) != error_mark_node)
 173           || CONSTANT_CLASS_P (base))
 174         {
 175           /* If so, see if we understand all the indices.  */
 176           base = op;
 177           while (handled_component_p (base))
 178             {
 179               if (TREE_CODE (base) == ARRAY_REF
 180                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 181                 return false;
 182               base = TREE_OPERAND (base, 0);
 183             }
 184           return true;
 185         }
 186       return false;
 187     }
 188
 189   /* Induction variables are constants when defined in loop.  */
 190   if (loop_containing_stmt (stmt) != loop)
 191     return false;
 192   tree ev = analyze_scalar_evolution (loop, op);
 193   if (chrec_contains_undetermined (ev)
 194       || chrec_contains_symbols (ev))
 195     return false;
 196   return true;
 197 }
 198
 199 /* Computes an estimated number of insns in LOOP.
 200    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 201    iteration of the loop.
 202    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 203    of loop.
 204    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 205    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 206
 207 static bool
 208 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
 209                          struct loop_size *size, int upper_bound)
 210 {
 211   basic_block *body = get_loop_body (loop);
 212   gimple_stmt_iterator gsi;
 213   unsigned int i;
 214   bool after_exit;
 215   vec<basic_block> path = get_loop_hot_path (loop);
 216
 217   size->overall = 0;
 218   size->eliminated_by_peeling = 0;
 219   size->last_iteration = 0;
 220   size->last_iteration_eliminated_by_peeling = 0;
 221   size->num_pure_calls_on_hot_path = 0;
 222   size->num_non_pure_calls_on_hot_path = 0;
 223   size->non_call_stmts_on_hot_path = 0;
 224   size->num_branches_on_hot_path = 0;
 225   size->constant_iv = 0;
 226
 227   if (dump_file && (dump_flags & TDF_DETAILS))
 228     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 229   for (i = 0; i < loop->num_nodes; i++)
 230     {
 231       if (edge_to_cancel && body[i] != edge_to_cancel->src
 232           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 233         after_exit = true;
 234       else
 235         after_exit = false;
 236       if (dump_file && (dump_flags & TDF_DETAILS))
 237         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 238                  after_exit);
 239
 240       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 241         {
 242           gimple *stmt = gsi_stmt (gsi);
 243           int num = estimate_num_insns (stmt, &eni_size_weights);
 244           bool likely_eliminated = false;
 245           bool likely_eliminated_last = false;
 246           bool likely_eliminated_peeled = false;
 247
 248           if (dump_file && (dump_flags & TDF_DETAILS))
 249             {
 250               fprintf (dump_file, "  size: %3i ", num);
 251               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 252             }
 253
 254           /* Look for reasons why we might optimize this stmt away. */
 255
 256           if (!gimple_has_side_effects (stmt))
 257             {
 258               /* Exit conditional.  */
 259               if (exit && body[i] == exit->src
 260                   && stmt == last_stmt (exit->src))
 261                 {
 262                   if (dump_file && (dump_flags & TDF_DETAILS))
 263                     fprintf (dump_file, "   Exit condition will be eliminated "
 264                              "in peeled copies.\n");
 265                   likely_eliminated_peeled = true;
 266                 }
 267               if (edge_to_cancel && body[i] == edge_to_cancel->src
 268                   && stmt == last_stmt (edge_to_cancel->src))
 269                 {
 270                   if (dump_file && (dump_flags & TDF_DETAILS))
 271                     fprintf (dump_file, "   Exit condition will be eliminated "
 272                              "in last copy.\n");
 273                   likely_eliminated_last = true;
 274                 }
 275               /* Sets of IV variables  */
 276               if (gimple_code (stmt) == GIMPLE_ASSIGN
 277                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 278                 {
 279                   if (dump_file && (dump_flags & TDF_DETAILS))
 280                     fprintf (dump_file, "   Induction variable computation will"
 281                              " be folded away.\n");
 282                   likely_eliminated = true;
 283                 }
 284               /* Assignments of IV variables.  */
 285               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 286                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 287                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 288                                                   stmt, loop)
 289                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 290                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 291                                                       stmt, loop)))
 292                 {
 293                   size->constant_iv = true;
 294                   if (dump_file && (dump_flags & TDF_DETAILS))
 295                     fprintf (dump_file,
 296                              "   Constant expression will be folded away.\n");
 297                   likely_eliminated = true;
 298                 }
 299               /* Conditionals.  */
 300               else if ((gimple_code (stmt) == GIMPLE_COND
 301                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 302                                                    loop)
 303                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 304                                                    loop)
 305                         /* We don't simplify all constant compares so make sure
 306                            they are not both constant already.  See PR70288.  */
 307                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 308                             || ! is_gimple_min_invariant
 309                                  (gimple_cond_rhs (stmt))))
 310                        || (gimple_code (stmt) == GIMPLE_SWITCH
 311                            && constant_after_peeling (gimple_switch_index (
 312                                                         as_a <gswitch *>
 313                                                           (stmt)),
 314                                                       stmt, loop)
 315                            && ! is_gimple_min_invariant
 316                                    (gimple_switch_index
 317                                       (as_a <gswitch *> (stmt)))))
 318                 {
 319                   if (dump_file && (dump_flags & TDF_DETAILS))
 320                     fprintf (dump_file, "   Constant conditional.\n");
 321                   likely_eliminated = true;
 322                 }
 323             }
 324
 325           size->overall += num;
 326           if (likely_eliminated || likely_eliminated_peeled)
 327             size->eliminated_by_peeling += num;
 328           if (!after_exit)
 329             {
 330               size->last_iteration += num;
 331               if (likely_eliminated || likely_eliminated_last)
 332                 size->last_iteration_eliminated_by_peeling += num;
 333             }
 334           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 335               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 336             {
 337               free (body);
 338               path.release ();
 339               return true;
 340             }
 341         }
 342     }
 343   while (path.length ())
 344     {
 345       basic_block bb = path.pop ();
 346       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 347         {
 348           gimple *stmt = gsi_stmt (gsi);
 349           if (gimple_code (stmt) == GIMPLE_CALL
 350               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 351             {
 352               int flags = gimple_call_flags (stmt);
 353               if (flags & (ECF_PURE | ECF_CONST))
 354                 size->num_pure_calls_on_hot_path++;
 355               else
 356                 size->num_non_pure_calls_on_hot_path++;
 357               size->num_branches_on_hot_path ++;
 358             }
 359           /* Count inexpensive calls as non-calls, because they will likely
 360              expand inline.  */
 361           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 362             size->non_call_stmts_on_hot_path++;
 363           if (((gimple_code (stmt) == GIMPLE_COND
 364                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 365                     || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 366                                                loop)))
 367                || (gimple_code (stmt) == GIMPLE_SWITCH
 368                    && !constant_after_peeling (gimple_switch_index (
 369                                                  as_a <gswitch *> (stmt)),
 370                                                stmt, loop)))
 371               && (!exit || bb != exit->src))
 372             size->num_branches_on_hot_path++;
 373         }
 374     }
 375   path.release ();
 376   if (dump_file && (dump_flags & TDF_DETAILS))
 377     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 378              size->eliminated_by_peeling, size->last_iteration,
 379              size->last_iteration_eliminated_by_peeling);
 380
 381   free (body);
 382   return false;
 383 }
 384
 385 /* Estimate number of insns of completely unrolled loop.
 386    It is (NUNROLL + 1) * size of loop body with taking into account
 387    the fact that in last copy everything after exit conditional
 388    is dead and that some instructions will be eliminated after
 389    peeling.
 390
 391    Loop body is likely going to simplify further, this is difficult
 392    to guess, we just decrease the result by 1/3.  */
 393
 394 static unsigned HOST_WIDE_INT
 395 estimated_unrolled_size (struct loop_size *size,
 396                          unsigned HOST_WIDE_INT nunroll)
 397 {
 398   HOST_WIDE_INT unr_insns = ((nunroll)
 399                              * (HOST_WIDE_INT) (size->overall
 400                                                 - size->eliminated_by_peeling));
 401   if (!nunroll)
 402     unr_insns = 0;
 403   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 404
 405   unr_insns = unr_insns * 2 / 3;
 406   if (unr_insns <= 0)
 407     unr_insns = 1;
 408
 409   return unr_insns;
 410 }
 411
 412 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 413    body that can be remove to make the loop to always exit and at
 414    the same time it does not make any code potentially executed
 415    during the last iteration dead.
 416
 417    After complete unrolling we still may get rid of the conditional
 418    on the exit in the last copy even if we have no idea what it does.
 419    This is quite common case for loops of form
 420
 421      int a[5];
 422      for (i=0;i<b;i++)
 423        a[i]=0;
 424
 425    Here we prove the loop to iterate 5 times but we do not know
 426    it from induction variable.
 427
 428    For now we handle only simple case where there is exit condition
 429    just before the latch block and the latch block contains no statements
 430    with side effect that may otherwise terminate the execution of loop
 431    (such as by EH or by terminating the program or longjmp).
 432
 433    In the general case we may want to cancel the paths leading to statements
 434    loop-niter identified as having undefined effect in the last iteration.
 435    The other cases are hopefully rare and will be cleaned up later.  */
 436
 437 static edge
 438 loop_edge_to_cancel (struct loop *loop)
 439 {
 440   vec<edge> exits;
 441   unsigned i;
 442   edge edge_to_cancel;
 443   gimple_stmt_iterator gsi;
 444
 445   /* We want only one predecestor of the loop.  */
 446   if (EDGE_COUNT (loop->latch->preds) > 1)
 447     return NULL;
 448
 449   exits = get_loop_exit_edges (loop);
 450
 451   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 452     {
 453        /* Find the other edge than the loop exit
 454           leaving the conditoinal.  */
 455        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 456          continue;
 457        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 458          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 459        else
 460          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 461
 462       /* We only can handle conditionals.  */
 463       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 464         continue;
 465
 466       /* We should never have conditionals in the loop latch. */
 467       gcc_assert (edge_to_cancel->dest != loop->header);
 468
 469       /* Check that it leads to loop latch.  */
 470       if (edge_to_cancel->dest != loop->latch)
 471         continue;
 472
 473       exits.release ();
 474
 475       /* Verify that the code in loop latch does nothing that may end program
 476          execution without really reaching the exit.  This may include
 477          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 478       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 479         if (gimple_has_side_effects (gsi_stmt (gsi)))
 480            return NULL;
 481       return edge_to_cancel;
 482     }
 483   exits.release ();
 484   return NULL;
 485 }
 486
 487 /* Remove all tests for exits that are known to be taken after LOOP was
 488    peeled NPEELED times. Put gcc_unreachable before every statement
 489    known to not be executed.  */
 490
 491 static bool
 492 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
 493 {
 494   struct nb_iter_bound *elt;
 495   bool changed = false;
 496
 497   for (elt = loop->bounds; elt; elt = elt->next)
 498     {
 499       /* If statement is known to be undefined after peeling, turn it
 500          into unreachable (or trap when debugging experience is supposed
 501          to be good).  */
 502       if (!elt->is_exit
 503           && wi::ltu_p (elt->bound, npeeled))
 504         {
 505           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 506           gcall *stmt = gimple_build_call
 507               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 508           gimple_set_location (stmt, gimple_location (elt->stmt));
 509           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 510           split_block (gimple_bb (stmt), stmt);
 511           changed = true;
 512           if (dump_file && (dump_flags & TDF_DETAILS))
 513             {
 514               fprintf (dump_file, "Forced statement unreachable: ");
 515               print_gimple_stmt (dump_file, elt->stmt, 0);
 516             }
 517         }
 518       /* If we know the exit will be taken after peeling, update.  */
 519       else if (elt->is_exit
 520                && wi::leu_p (elt->bound, npeeled))
 521         {
 522           basic_block bb = gimple_bb (elt->stmt);
 523           edge exit_edge = EDGE_SUCC (bb, 0);
 524
 525           if (dump_file && (dump_flags & TDF_DETAILS))
 526             {
 527               fprintf (dump_file, "Forced exit to be taken: ");
 528               print_gimple_stmt (dump_file, elt->stmt, 0);
 529             }
 530           if (!loop_exit_edge_p (loop, exit_edge))
 531             exit_edge = EDGE_SUCC (bb, 1);
 532           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 533           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 534           if (exit_edge->flags & EDGE_TRUE_VALUE)
 535             gimple_cond_make_true (cond_stmt);
 536           else
 537             gimple_cond_make_false (cond_stmt);
 538           update_stmt (cond_stmt);
 539           changed = true;
 540         }
 541     }
 542   return changed;
 543 }
 544
 545 /* Remove all exits that are known to be never taken because of the loop bound
 546    discovered.  */
 547
 548 static bool
 549 remove_redundant_iv_tests (struct loop *loop)
 550 {
 551   struct nb_iter_bound *elt;
 552   bool changed = false;
 553
 554   if (!loop->any_upper_bound)
 555     return false;
 556   for (elt = loop->bounds; elt; elt = elt->next)
 557     {
 558       /* Exit is pointless if it won't be taken before loop reaches
 559          upper bound.  */
 560       if (elt->is_exit && loop->any_upper_bound
 561           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 562         {
 563           basic_block bb = gimple_bb (elt->stmt);
 564           edge exit_edge = EDGE_SUCC (bb, 0);
 565           struct tree_niter_desc niter;
 566
 567           if (!loop_exit_edge_p (loop, exit_edge))
 568             exit_edge = EDGE_SUCC (bb, 1);
 569
 570           /* Only when we know the actual number of iterations, not
 571              just a bound, we can remove the exit.  */
 572           if (!number_of_iterations_exit (loop, exit_edge,
 573                                           &niter, false, false)
 574               || !integer_onep (niter.assumptions)
 575               || !integer_zerop (niter.may_be_zero)
 576               || !niter.niter
 577               || TREE_CODE (niter.niter) != INTEGER_CST
 578               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 579                              wi::to_widest (niter.niter)))
 580             continue;
 581
 582           if (dump_file && (dump_flags & TDF_DETAILS))
 583             {
 584               fprintf (dump_file, "Removed pointless exit: ");
 585               print_gimple_stmt (dump_file, elt->stmt, 0);
 586             }
 587           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 588           if (exit_edge->flags & EDGE_TRUE_VALUE)
 589             gimple_cond_make_false (cond_stmt);
 590           else
 591             gimple_cond_make_true (cond_stmt);
 592           update_stmt (cond_stmt);
 593           changed = true;
 594         }
 595     }
 596   return changed;
 597 }
 598
 599 /* Stores loops that will be unlooped and edges that will be removed
 600    after we process whole loop tree. */
 601 static vec<loop_p> loops_to_unloop;
 602 static vec<int> loops_to_unloop_nunroll;
 603 static vec<edge> edges_to_remove;
 604 /* Stores loops that has been peeled.  */
 605 static bitmap peeled_loops;
 606
 607 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 608    on the latch edge.
 609    We do it after all unrolling since unlooping moves basic blocks
 610    across loop boundaries trashing loop closed SSA form as well
 611    as SCEV info needed to be intact during unrolling.
 612
 613    IRRED_INVALIDATED is used to bookkeep if information about
 614    irreducible regions may become invalid as a result
 615    of the transformation.
 616    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 617    when we need to go into loop closed SSA form.  */
 618
 619 static void
 620 unloop_loops (bitmap loop_closed_ssa_invalidated,
 621               bool *irred_invalidated)
 622 {
 623   while (loops_to_unloop.length ())
 624     {
 625       struct loop *loop = loops_to_unloop.pop ();
 626       int n_unroll = loops_to_unloop_nunroll.pop ();
 627       basic_block latch = loop->latch;
 628       edge latch_edge = loop_latch_edge (loop);
 629       int flags = latch_edge->flags;
 630       location_t locus = latch_edge->goto_locus;
 631       gcall *stmt;
 632       gimple_stmt_iterator gsi;
 633
 634       remove_exits_and_undefined_stmts (loop, n_unroll);
 635
 636       /* Unloop destroys the latch edge.  */
 637       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 638
 639       /* Create new basic block for the latch edge destination and wire
 640          it in.  */
 641       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 642       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 643       latch_edge->probability = 0;
 644       latch_edge->count = profile_count::zero ();
 645       latch_edge->flags |= flags;
 646       latch_edge->goto_locus = locus;
 647
 648       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 649       latch_edge->dest->count = profile_count::zero ();
 650       latch_edge->dest->frequency = 0;
 651       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 652
 653       gsi = gsi_start_bb (latch_edge->dest);
 654       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 655     }
 656   loops_to_unloop.release ();
 657   loops_to_unloop_nunroll.release ();
 658
 659   /* Remove edges in peeled copies.  */
 660   unsigned i;
 661   edge e;
 662   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 663     {
 664       bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
 665       gcc_assert (ok);
 666     }
 667   edges_to_remove.release ();
 668 }
 669
 670 /* Tries to unroll LOOP completely, i.e. NITER times.
 671    UL determines which loops we are allowed to unroll.
 672    EXIT is the exit of the loop that should be eliminated.
 673    MAXITER specfy bound on number of iterations, -1 if it is
 674    not known or too large for HOST_WIDE_INT.  The location
 675    LOCUS corresponding to the loop is used when emitting
 676    a summary of the unroll to the dump file.  */
 677
 678 static bool
 679 try_unroll_loop_completely (struct loop *loop,
 680                             edge exit, tree niter,
 681                             enum unroll_level ul,
 682                             HOST_WIDE_INT maxiter,
 683                             location_t locus)
 684 {
 685   unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
 686   struct loop_size size;
 687   bool n_unroll_found = false;
 688   edge edge_to_cancel = NULL;
 689   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 690
 691   /* See if we proved number of iterations to be low constant.
 692
 693      EXIT is an edge that will be removed in all but last iteration of
 694      the loop.
 695
 696      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 697      of the unrolled sequence and is expected to make the final loop not
 698      rolling.
 699
 700      If the number of execution of loop is determined by standard induction
 701      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 702      from the iv test.  */
 703   if (tree_fits_uhwi_p (niter))
 704     {
 705       n_unroll = tree_to_uhwi (niter);
 706       n_unroll_found = true;
 707       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 708       if (edge_to_cancel == exit)
 709         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 710     }
 711   /* We do not know the number of iterations and thus we can not eliminate
 712      the EXIT edge.  */
 713   else
 714     exit = NULL;
 715
 716   /* See if we can improve our estimate by using recorded loop bounds.  */
 717   if (maxiter >= 0
 718       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 719     {
 720       n_unroll = maxiter;
 721       n_unroll_found = true;
 722       /* Loop terminates before the IV variable test, so we can not
 723          remove it in the last iteration.  */
 724       edge_to_cancel = NULL;
 725     }
 726
 727   if (!n_unroll_found)
 728     return false;
 729
 730   if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 731     {
 732       if (dump_file && (dump_flags & TDF_DETAILS))
 733         fprintf (dump_file, "Not unrolling loop %d "
 734                  "(--param max-completely-peel-times limit reached).\n",
 735                  loop->num);
 736       return false;
 737     }
 738
 739   if (!edge_to_cancel)
 740     edge_to_cancel = loop_edge_to_cancel (loop);
 741
 742   if (n_unroll)
 743     {
 744       bool large;
 745       if (ul == UL_SINGLE_ITER)
 746         return false;
 747
 748       /* EXIT can be removed only if we are sure it passes first N_UNROLL
 749          iterations.  */
 750       bool remove_exit = (exit && niter
 751                           && TREE_CODE (niter) == INTEGER_CST
 752                           && wi::leu_p (n_unroll, wi::to_widest (niter)));
 753
 754       large = tree_estimate_loop_size
 755                  (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 756                   PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
 757       ninsns = size.overall;
 758       if (large)
 759         {
 760           if (dump_file && (dump_flags & TDF_DETAILS))
 761             fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 762                      loop->num);
 763           return false;
 764         }
 765
 766       unr_insns = estimated_unrolled_size (&size, n_unroll);
 767       if (dump_file && (dump_flags & TDF_DETAILS))
 768         {
 769           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 770           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 771                    (int) unr_insns);
 772         }
 773
 774       /* If the code is going to shrink, we don't need to be extra cautious
 775          on guessing if the unrolling is going to be profitable.  */
 776       if (unr_insns
 777           /* If there is IV variable that will become constant, we save
 778              one instruction in the loop prologue we do not account
 779              otherwise.  */
 780           <= ninsns + (size.constant_iv != false))
 781         ;
 782       /* We unroll only inner loops, because we do not consider it profitable
 783          otheriwse.  We still can cancel loopback edge of not rolling loop;
 784          this is always a good idea.  */
 785       else if (ul == UL_NO_GROWTH)
 786         {
 787           if (dump_file && (dump_flags & TDF_DETAILS))
 788             fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 789                      loop->num);
 790           return false;
 791         }
 792       /* Outer loops tend to be less interesting candidates for complete
 793          unrolling unless we can do a lot of propagation into the inner loop
 794          body.  For now we disable outer loop unrolling when the code would
 795          grow.  */
 796       else if (loop->inner)
 797         {
 798           if (dump_file && (dump_flags & TDF_DETAILS))
 799             fprintf (dump_file, "Not unrolling loop %d: "
 800                      "it is not innermost and code would grow.\n",
 801                      loop->num);
 802           return false;
 803         }
 804       /* If there is call on a hot path through the loop, then
 805          there is most probably not much to optimize.  */
 806       else if (size.num_non_pure_calls_on_hot_path)
 807         {
 808           if (dump_file && (dump_flags & TDF_DETAILS))
 809             fprintf (dump_file, "Not unrolling loop %d: "
 810                      "contains call and code would grow.\n",
 811                      loop->num);
 812           return false;
 813         }
 814       /* If there is pure/const call in the function, then we
 815          can still optimize the unrolled loop body if it contains
 816          some other interesting code than the calls and code
 817          storing or cumulating the return value.  */
 818       else if (size.num_pure_calls_on_hot_path
 819                /* One IV increment, one test, one ivtmp store
 820                   and one useful stmt.  That is about minimal loop
 821                   doing pure call.  */
 822                && (size.non_call_stmts_on_hot_path
 823                    <= 3 + size.num_pure_calls_on_hot_path))
 824         {
 825           if (dump_file && (dump_flags & TDF_DETAILS))
 826             fprintf (dump_file, "Not unrolling loop %d: "
 827                      "contains just pure calls and code would grow.\n",
 828                      loop->num);
 829           return false;
 830         }
 831       /* Complete unrolling is a major win when control flow is removed and
 832          one big basic block is created.  If the loop contains control flow
 833          the optimization may still be a win because of eliminating the loop
 834          overhead but it also may blow the branch predictor tables.
 835          Limit number of branches on the hot path through the peeled
 836          sequence.  */
 837       else if (size.num_branches_on_hot_path * (int)n_unroll
 838                > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
 839         {
 840           if (dump_file && (dump_flags & TDF_DETAILS))
 841             fprintf (dump_file, "Not unrolling loop %d: "
 842                      " number of branches on hot path in the unrolled sequence"
 843                      " reach --param max-peel-branches limit.\n",
 844                      loop->num);
 845           return false;
 846         }
 847       else if (unr_insns
 848                > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
 849         {
 850           if (dump_file && (dump_flags & TDF_DETAILS))
 851             fprintf (dump_file, "Not unrolling loop %d: "
 852                      "(--param max-completely-peeled-insns limit reached).\n",
 853                      loop->num);
 854           return false;
 855         }
 856       dump_printf_loc (report_flags, locus,
 857                        "loop turned into non-loop; it never loops.\n");
 858
 859       initialize_original_copy_tables ();
 860       auto_sbitmap wont_exit (n_unroll + 1);
 861       if (exit && niter
 862           && TREE_CODE (niter) == INTEGER_CST
 863           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 864         {
 865           bitmap_ones (wont_exit);
 866           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 867               || edge_to_cancel)
 868             bitmap_clear_bit (wont_exit, 0);
 869         }
 870       else
 871         {
 872           exit = NULL;
 873           bitmap_clear (wont_exit);
 874         }
 875
 876       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 877                                                  n_unroll, wont_exit,
 878                                                  exit, &edges_to_remove,
 879                                                  DLTHE_FLAG_UPDATE_FREQ
 880                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 881         {
 882           free_original_copy_tables ();
 883           if (dump_file && (dump_flags & TDF_DETAILS))
 884             fprintf (dump_file, "Failed to duplicate the loop\n");
 885           return false;
 886         }
 887
 888       free_original_copy_tables ();
 889     }
 890
 891   /* Remove the conditional from the last copy of the loop.  */
 892   if (edge_to_cancel)
 893     {
 894       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 895       force_edge_cold (edge_to_cancel, true);
 896       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 897         gimple_cond_make_false (cond);
 898       else
 899         gimple_cond_make_true (cond);
 900       update_stmt (cond);
 901       /* Do not remove the path. Doing so may remove outer loop
 902          and confuse bookkeeping code in tree_unroll_loops_completelly.  */
 903     }
 904
 905   /* Store the loop for later unlooping and exit removal.  */
 906   loops_to_unloop.safe_push (loop);
 907   loops_to_unloop_nunroll.safe_push (n_unroll);
 908
 909   if (dump_enabled_p ())
 910     {
 911       if (!n_unroll)
 912         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 913                          "loop turned into non-loop; it never loops\n");
 914       else
 915         {
 916           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 917                            "loop with %d iterations completely unrolled",
 918                            (int) (n_unroll + 1));
 919           if (loop->header->count.initialized_p ())
 920             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 921                          " (header execution count %d)",
 922                          (int)loop->header->count.to_gcov_type ());
 923           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 924         }
 925     }
 926
 927   if (dump_file && (dump_flags & TDF_DETAILS))
 928     {
 929       if (exit)
 930         fprintf (dump_file, "Exit condition of peeled iterations was "
 931                  "eliminated.\n");
 932       if (edge_to_cancel)
 933         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 934       else
 935         fprintf (dump_file, "Latch of last iteration was marked by "
 936                  "__builtin_unreachable ().\n");
 937     }
 938
 939   return true;
 940 }
 941
 942 /* Return number of instructions after peeling.  */
 943 static unsigned HOST_WIDE_INT
 944 estimated_peeled_sequence_size (struct loop_size *size,
 945                                 unsigned HOST_WIDE_INT npeel)
 946 {
 947   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 948                                        - size->eliminated_by_peeling), 1);
 949 }
 950
 951 /* If the loop is expected to iterate N times and is
 952    small enough, duplicate the loop body N+1 times before
 953    the loop itself.  This way the hot path will never
 954    enter the loop.
 955    Parameters are the same as for try_unroll_loops_completely */
 956
 957 static bool
 958 try_peel_loop (struct loop *loop,
 959                edge exit, tree niter,
 960                HOST_WIDE_INT maxiter)
 961 {
 962   HOST_WIDE_INT npeel;
 963   struct loop_size size;
 964   int peeled_size;
 965
 966   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
 967       || !peeled_loops)
 968     return false;
 969
 970   if (bitmap_bit_p (peeled_loops, loop->num))
 971     {
 972       if (dump_file)
 973         fprintf (dump_file, "Not peeling: loop is already peeled\n");
 974       return false;
 975     }
 976
 977   /* Peel only innermost loops.
 978      While the code is perfectly capable of peeling non-innermost loops,
 979      the heuristics would probably need some improvements. */
 980   if (loop->inner)
 981     {
 982       if (dump_file)
 983         fprintf (dump_file, "Not peeling: outer loop\n");
 984       return false;
 985     }
 986
 987   if (!optimize_loop_for_speed_p (loop))
 988     {
 989       if (dump_file)
 990         fprintf (dump_file, "Not peeling: cold loop\n");
 991       return false;
 992     }
 993
 994   /* Check if there is an estimate on the number of iterations.  */
 995   npeel = estimated_loop_iterations_int (loop);
 996   if (npeel < 0)
 997     npeel = likely_max_loop_iterations_int (loop);
 998   if (npeel < 0)
 999     {
1000       if (dump_file)
1001         fprintf (dump_file, "Not peeling: number of iterations is not "
1002                  "estimated\n");
1003       return false;
1004     }
1005   if (maxiter >= 0 && maxiter <= npeel)
1006     {
1007       if (dump_file)
1008         fprintf (dump_file, "Not peeling: upper bound is known so can "
1009                  "unroll completely\n");
1010       return false;
1011     }
1012
1013   /* We want to peel estimated number of iterations + 1 (so we never
1014      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1015      and be sure to avoid overflows.  */
1016   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1017     {
1018       if (dump_file)
1019         fprintf (dump_file, "Not peeling: rolls too much "
1020                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1021       return false;
1022     }
1023   npeel++;
1024
1025   /* Check peeled loops size.  */
1026   tree_estimate_loop_size (loop, exit, NULL, &size,
1027                            PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1028   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1029       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1030     {
1031       if (dump_file)
1032         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1033                  "(%i insns > --param max-peel-insns)", peeled_size);
1034       return false;
1035     }
1036
1037   /* Duplicate possibly eliminating the exits.  */
1038   initialize_original_copy_tables ();
1039   auto_sbitmap wont_exit (npeel + 1);
1040   if (exit && niter
1041       && TREE_CODE (niter) == INTEGER_CST
1042       && wi::leu_p (npeel, wi::to_widest (niter)))
1043     {
1044       bitmap_ones (wont_exit);
1045       bitmap_clear_bit (wont_exit, 0);
1046     }
1047   else
1048     {
1049       exit = NULL;
1050       bitmap_clear (wont_exit);
1051     }
1052   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1053                                              npeel, wont_exit,
1054                                              exit, &edges_to_remove,
1055                                              DLTHE_FLAG_UPDATE_FREQ))
1056     {
1057       free_original_copy_tables ();
1058       return false;
1059     }
1060   free_original_copy_tables ();
1061   if (dump_file && (dump_flags & TDF_DETAILS))
1062     {
1063       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1064                loop->num, (int) npeel);
1065     }
1066   if (loop->any_estimate)
1067     {
1068       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1069         loop->nb_iterations_estimate -= npeel;
1070       else
1071         loop->nb_iterations_estimate = 0;
1072     }
1073   if (loop->any_upper_bound)
1074     {
1075       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1076         loop->nb_iterations_upper_bound -= npeel;
1077       else
1078         loop->nb_iterations_upper_bound = 0;
1079     }
1080   if (loop->any_likely_upper_bound)
1081     {
1082       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1083         loop->nb_iterations_likely_upper_bound -= npeel;
1084       else
1085         {
1086           loop->any_estimate = true;
1087           loop->nb_iterations_estimate = 0;
1088           loop->nb_iterations_likely_upper_bound = 0;
1089         }
1090     }
1091   profile_count entry_count = profile_count::zero ();
1092   int entry_freq = 0;
1093
1094   edge e;
1095   edge_iterator ei;
1096   FOR_EACH_EDGE (e, ei, loop->header->preds)
1097     if (e->src != loop->latch)
1098       {
1099         if (e->src->count.initialized_p ())
1100           entry_count = e->src->count + e->src->count;
1101         entry_freq += e->src->frequency;
1102         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1103       }
1104   int scale = 1;
1105   if (loop->header->count > 0)
1106     scale = entry_count.probability_in (loop->header->count);
1107   else if (loop->header->frequency)
1108     scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
1109   scale_loop_profile (loop, scale, 0);
1110   bitmap_set_bit (peeled_loops, loop->num);
1111   return true;
1112 }
1113 /* Adds a canonical induction variable to LOOP if suitable.
1114    CREATE_IV is true if we may create a new iv.  UL determines
1115    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1116    to determine the number of iterations of a loop by direct evaluation.
1117    Returns true if cfg is changed.   */
1118
1119 static bool
1120 canonicalize_loop_induction_variables (struct loop *loop,
1121                                        bool create_iv, enum unroll_level ul,
1122                                        bool try_eval)
1123 {
1124   edge exit = NULL;
1125   tree niter;
1126   HOST_WIDE_INT maxiter;
1127   bool modified = false;
1128   location_t locus = UNKNOWN_LOCATION;
1129
1130   niter = number_of_latch_executions (loop);
1131   exit = single_exit (loop);
1132   if (TREE_CODE (niter) == INTEGER_CST)
1133     locus = gimple_location (last_stmt (exit->src));
1134   else
1135     {
1136       /* If the loop has more than one exit, try checking all of them
1137          for # of iterations determinable through scev.  */
1138       if (!exit)
1139         niter = find_loop_niter (loop, &exit);
1140
1141       /* Finally if everything else fails, try brute force evaluation.  */
1142       if (try_eval
1143           && (chrec_contains_undetermined (niter)
1144               || TREE_CODE (niter) != INTEGER_CST))
1145         niter = find_loop_niter_by_eval (loop, &exit);
1146
1147       if (exit)
1148         locus = gimple_location (last_stmt (exit->src));
1149
1150       if (TREE_CODE (niter) != INTEGER_CST)
1151         exit = NULL;
1152     }
1153
1154   /* We work exceptionally hard here to estimate the bound
1155      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1156   if (niter && TREE_CODE (niter) == INTEGER_CST)
1157     {
1158       record_niter_bound (loop, wi::to_widest (niter),
1159                           exit == single_likely_exit (loop), true);
1160     }
1161
1162   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1163   maxiter = max_loop_iterations_int (loop);
1164
1165   if (dump_file && (dump_flags & TDF_DETAILS)
1166       && TREE_CODE (niter) == INTEGER_CST)
1167     {
1168       fprintf (dump_file, "Loop %d iterates ", loop->num);
1169       print_generic_expr (dump_file, niter, TDF_SLIM);
1170       fprintf (dump_file, " times.\n");
1171     }
1172   if (dump_file && (dump_flags & TDF_DETAILS)
1173       && maxiter >= 0)
1174     {
1175       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1176                (int)maxiter);
1177     }
1178   if (dump_file && (dump_flags & TDF_DETAILS)
1179       && likely_max_loop_iterations_int (loop) >= 0)
1180     {
1181       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1182                loop->num, (int)likely_max_loop_iterations_int (loop));
1183     }
1184
1185   /* Remove exits that are known to be never taken based on loop bound.
1186      Needs to be called after compilation of max_loop_iterations_int that
1187      populates the loop bounds.  */
1188   modified |= remove_redundant_iv_tests (loop);
1189
1190   if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
1191     return true;
1192
1193   if (create_iv
1194       && niter && !chrec_contains_undetermined (niter)
1195       && exit && just_once_each_iteration_p (loop, exit->src))
1196     create_canonical_iv (loop, exit, niter);
1197
1198   if (ul == UL_ALL)
1199     modified |= try_peel_loop (loop, exit, niter, maxiter);
1200
1201   return modified;
1202 }
1203
1204 /* The main entry point of the pass.  Adds canonical induction variables
1205    to the suitable loops.  */
1206
1207 unsigned int
1208 canonicalize_induction_variables (void)
1209 {
1210   struct loop *loop;
1211   bool changed = false;
1212   bool irred_invalidated = false;
1213   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1214
1215   estimate_numbers_of_iterations ();
1216
1217   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1218     {
1219       changed |= canonicalize_loop_induction_variables (loop,
1220                                                         true, UL_SINGLE_ITER,
1221                                                         true);
1222     }
1223   gcc_assert (!need_ssa_update_p (cfun));
1224
1225   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1226   if (irred_invalidated
1227       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1228     mark_irreducible_loops ();
1229
1230   /* Clean up the information about numbers of iterations, since brute force
1231      evaluation could reveal new information.  */
1232   free_numbers_of_iterations_estimates (cfun);
1233   scev_reset ();
1234
1235   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1236     {
1237       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1238       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1239     }
1240   BITMAP_FREE (loop_closed_ssa_invalidated);
1241
1242   if (changed)
1243     return TODO_cleanup_cfg;
1244   return 0;
1245 }
1246
1247 /* Propagate constant SSA_NAMEs defined in basic block BB.  */
1248
1249 static void
1250 propagate_constants_for_unrolling (basic_block bb)
1251 {
1252   /* Look for degenerate PHI nodes with constant argument.  */
1253   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1254     {
1255       gphi *phi = gsi.phi ();
1256       tree result = gimple_phi_result (phi);
1257       tree arg = gimple_phi_arg_def (phi, 0);
1258
1259       if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
1260           && gimple_phi_num_args (phi) == 1
1261           && CONSTANT_CLASS_P (arg))
1262         {
1263           replace_uses_by (result, arg);
1264           gsi_remove (&gsi, true);
1265           release_ssa_name (result);
1266         }
1267       else
1268         gsi_next (&gsi);
1269     }
1270
1271   /* Look for assignments to SSA names with constant RHS.  */
1272   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1273     {
1274       gimple *stmt = gsi_stmt (gsi);
1275       tree lhs;
1276
1277       if (is_gimple_assign (stmt)
1278           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
1279           && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1280           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1281         {
1282           replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
1283           gsi_remove (&gsi, true);
1284           release_ssa_name (lhs);
1285         }
1286       else
1287         gsi_next (&gsi);
1288     }
1289 }
1290
1291 /* Process loops from innermost to outer, stopping at the innermost
1292    loop we unrolled.  */
1293
1294 static bool
1295 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1296                                 bitmap father_bbs, struct loop *loop)
1297 {
1298   struct loop *loop_father;
1299   bool changed = false;
1300   struct loop *inner;
1301   enum unroll_level ul;
1302
1303   /* Process inner loops first.  */
1304   for (inner = loop->inner; inner != NULL; inner = inner->next)
1305     changed |= tree_unroll_loops_completely_1 (may_increase_size,
1306                                                unroll_outer, father_bbs,
1307                                                inner);
1308
1309   /* If we changed an inner loop we cannot process outer loops in this
1310      iteration because SSA form is not up-to-date.  Continue with
1311      siblings of outer loops instead.  */
1312   if (changed)
1313     return true;
1314
1315   /* Don't unroll #pragma omp simd loops until the vectorizer
1316      attempts to vectorize those.  */
1317   if (loop->force_vectorize)
1318     return false;
1319
1320   /* Try to unroll this loop.  */
1321   loop_father = loop_outer (loop);
1322   if (!loop_father)
1323     return false;
1324
1325   if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1326       /* Unroll outermost loops only if asked to do so or they do
1327          not cause code growth.  */
1328       && (unroll_outer || loop_outer (loop_father)))
1329     ul = UL_ALL;
1330   else
1331     ul = UL_NO_GROWTH;
1332
1333   if (canonicalize_loop_induction_variables
1334         (loop, false, ul, !flag_tree_loop_ivcanon))
1335     {
1336       /* If we'll continue unrolling, we need to propagate constants
1337          within the new basic blocks to fold away induction variable
1338          computations; otherwise, the size might blow up before the
1339          iteration is complete and the IR eventually cleaned up.  */
1340       if (loop_outer (loop_father))
1341         bitmap_set_bit (father_bbs, loop_father->header->index);
1342
1343       return true;
1344     }
1345
1346   return false;
1347 }
1348
1349 /* Unroll LOOPS completely if they iterate just few times.  Unless
1350    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1351    size of the code does not increase.  */
1352
1353 unsigned int
1354 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1355 {
1356   bitmap father_bbs = BITMAP_ALLOC (NULL);
1357   bool changed;
1358   int iteration = 0;
1359   bool irred_invalidated = false;
1360
1361   do
1362     {
1363       changed = false;
1364       bitmap loop_closed_ssa_invalidated = NULL;
1365
1366       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1367         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1368
1369       free_numbers_of_iterations_estimates (cfun);
1370       estimate_numbers_of_iterations ();
1371
1372       changed = tree_unroll_loops_completely_1 (may_increase_size,
1373                                                 unroll_outer, father_bbs,
1374                                                 current_loops->tree_root);
1375       if (changed)
1376         {
1377           unsigned i;
1378
1379           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1380
1381           /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
1382           if (loop_closed_ssa_invalidated
1383               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1384             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1385                                           TODO_update_ssa);
1386           else
1387             update_ssa (TODO_update_ssa);
1388
1389           /* father_bbs is a bitmap of loop father header BB indices.
1390              Translate that to what non-root loops these BBs belong to now.  */
1391           bitmap_iterator bi;
1392           bitmap fathers = BITMAP_ALLOC (NULL);
1393           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1394             {
1395               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1396               if (! unrolled_loop_bb)
1397                 continue;
1398               if (loop_outer (unrolled_loop_bb->loop_father))
1399                 bitmap_set_bit (fathers,
1400                                 unrolled_loop_bb->loop_father->num);
1401             }
1402           bitmap_clear (father_bbs);
1403           /* Propagate the constants within the new basic blocks.  */
1404           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1405             {
1406               loop_p father = get_loop (cfun, i);
1407               basic_block *body = get_loop_body_in_dom_order (father);
1408               for (unsigned j = 0; j < father->num_nodes; j++)
1409                 propagate_constants_for_unrolling (body[j]);
1410               free (body);
1411             }
1412           BITMAP_FREE (fathers);
1413
1414           /* This will take care of removing completely unrolled loops
1415              from the loop structures so we can continue unrolling now
1416              innermost loops.  */
1417           if (cleanup_tree_cfg ())
1418             update_ssa (TODO_update_ssa_only_virtuals);
1419
1420           /* Clean up the information about numbers of iterations, since
1421              complete unrolling might have invalidated it.  */
1422           scev_reset ();
1423           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1424             verify_loop_closed_ssa (true);
1425         }
1426       if (loop_closed_ssa_invalidated)
1427         BITMAP_FREE (loop_closed_ssa_invalidated);
1428     }
1429   while (changed
1430          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1431
1432   BITMAP_FREE (father_bbs);
1433
1434   if (irred_invalidated
1435       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1436     mark_irreducible_loops ();
1437
1438   return 0;
1439 }
1440
1441 /* Canonical induction variable creation pass.  */
1442
1443 namespace {
1444
1445 const pass_data pass_data_iv_canon =
1446 {
1447   GIMPLE_PASS, /* type */
1448   "ivcanon", /* name */
1449   OPTGROUP_LOOP, /* optinfo_flags */
1450   TV_TREE_LOOP_IVCANON, /* tv_id */
1451   ( PROP_cfg | PROP_ssa ), /* properties_required */
1452   0, /* properties_provided */
1453   0, /* properties_destroyed */
1454   0, /* todo_flags_start */
1455   0, /* todo_flags_finish */
1456 };
1457
1458 class pass_iv_canon : public gimple_opt_pass
1459 {
1460 public:
1461   pass_iv_canon (gcc::context *ctxt)
1462     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1463   {}
1464
1465   /* opt_pass methods: */
1466   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1467   virtual unsigned int execute (function *fun);
1468
1469 }; // class pass_iv_canon
1470
1471 unsigned int
1472 pass_iv_canon::execute (function *fun)
1473 {
1474   if (number_of_loops (fun) <= 1)
1475     return 0;
1476
1477   return canonicalize_induction_variables ();
1478 }
1479
1480 } // anon namespace
1481
1482 gimple_opt_pass *
1483 make_pass_iv_canon (gcc::context *ctxt)
1484 {
1485   return new pass_iv_canon (ctxt);
1486 }
1487
1488 /* Complete unrolling of loops.  */
1489
1490 namespace {
1491
1492 const pass_data pass_data_complete_unroll =
1493 {
1494   GIMPLE_PASS, /* type */
1495   "cunroll", /* name */
1496   OPTGROUP_LOOP, /* optinfo_flags */
1497   TV_COMPLETE_UNROLL, /* tv_id */
1498   ( PROP_cfg | PROP_ssa ), /* properties_required */
1499   0, /* properties_provided */
1500   0, /* properties_destroyed */
1501   0, /* todo_flags_start */
1502   0, /* todo_flags_finish */
1503 };
1504
1505 class pass_complete_unroll : public gimple_opt_pass
1506 {
1507 public:
1508   pass_complete_unroll (gcc::context *ctxt)
1509     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1510   {}
1511
1512   /* opt_pass methods: */
1513   virtual unsigned int execute (function *);
1514
1515 }; // class pass_complete_unroll
1516
1517 unsigned int
1518 pass_complete_unroll::execute (function *fun)
1519 {
1520   if (number_of_loops (fun) <= 1)
1521     return 0;
1522
1523   /* If we ever decide to run loop peeling more than once, we will need to
1524      track loops already peeled in loop structures themselves to avoid
1525      re-peeling the same loop multiple times.  */
1526   if (flag_peel_loops)
1527     peeled_loops = BITMAP_ALLOC (NULL);
1528   int val = tree_unroll_loops_completely (flag_unroll_loops
1529                                           || flag_peel_loops
1530                                           || optimize >= 3, true);
1531   if (peeled_loops)
1532     {
1533       BITMAP_FREE (peeled_loops);
1534       peeled_loops = NULL;
1535     }
1536   return val;
1537 }
1538
1539 } // anon namespace
1540
1541 gimple_opt_pass *
1542 make_pass_complete_unroll (gcc::context *ctxt)
1543 {
1544   return new pass_complete_unroll (ctxt);
1545 }
1546
1547 /* Complete unrolling of inner loops.  */
1548
1549 namespace {
1550
1551 const pass_data pass_data_complete_unrolli =
1552 {
1553   GIMPLE_PASS, /* type */
1554   "cunrolli", /* name */
1555   OPTGROUP_LOOP, /* optinfo_flags */
1556   TV_COMPLETE_UNROLL, /* tv_id */
1557   ( PROP_cfg | PROP_ssa ), /* properties_required */
1558   0, /* properties_provided */
1559   0, /* properties_destroyed */
1560   0, /* todo_flags_start */
1561   0, /* todo_flags_finish */
1562 };
1563
1564 class pass_complete_unrolli : public gimple_opt_pass
1565 {
1566 public:
1567   pass_complete_unrolli (gcc::context *ctxt)
1568     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1569   {}
1570
1571   /* opt_pass methods: */
1572   virtual bool gate (function *) { return optimize >= 2; }
1573   virtual unsigned int execute (function *);
1574
1575 }; // class pass_complete_unrolli
1576
1577 unsigned int
1578 pass_complete_unrolli::execute (function *fun)
1579 {
1580   unsigned ret = 0;
1581
1582   loop_optimizer_init (LOOPS_NORMAL
1583                        | LOOPS_HAVE_RECORDED_EXITS);
1584   if (number_of_loops (fun) > 1)
1585     {
1586       scev_initialize ();
1587       ret = tree_unroll_loops_completely (optimize >= 3, false);
1588       free_numbers_of_iterations_estimates (fun);
1589       scev_finalize ();
1590     }
1591   loop_optimizer_finalize ();
1592
1593   return ret;
1594 }
1595
1596 } // anon namespace
1597
1598 gimple_opt_pass *
1599 make_pass_complete_unrolli (gcc::context *ctxt)
1600 {
1601   return new pass_complete_unrolli (ctxt);
1602 }
1603
1604