gcc/tree-ssa-loop-split.cc

   1 /* Loop splitting.
   2    Copyright (C) 2015-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "tree-pass.h"
  27 #include "ssa.h"
  28 #include "fold-const.h"
  29 #include "tree-cfg.h"
  30 #include "tree-ssa.h"
  31 #include "tree-ssa-loop-niter.h"
  32 #include "tree-ssa-loop.h"
  33 #include "tree-ssa-loop-manip.h"
  34 #include "tree-into-ssa.h"
  35 #include "tree-inline.h"
  36 #include "tree-cfgcleanup.h"
  37 #include "cfgloop.h"
  38 #include "tree-scalar-evolution.h"
  39 #include "gimple-iterator.h"
  40 #include "gimple-pretty-print.h"
  41 #include "cfghooks.h"
  42 #include "gimple-fold.h"
  43 #include "gimplify-me.h"
  44 #include "print-tree.h"
  45
  46 /* This file implements two kinds of loop splitting.
  47
  48    One transformation of loops like:
  49
  50    for (i = 0; i < 100; i++)
  51      {
  52        if (i < 50)
  53          A;
  54        else
  55          B;
  56      }
  57
  58    into:
  59
  60    for (i = 0; i < 50; i++)
  61      {
  62        A;
  63      }
  64    for (; i < 100; i++)
  65      {
  66        B;
  67      }
  68
  69    */
  70
  71 /* Return true when BB inside LOOP is a potential iteration space
  72    split point, i.e. ends with a condition like "IV < comp", which
  73    is true on one side of the iteration space and false on the other,
  74    and the split point can be computed.  If so, also return the border
  75    point in *BORDER and the comparison induction variable in IV.  */
  76
  77 static tree
  78 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
  79 {
  80   gcond *stmt;
  81   affine_iv iv2;
  82
  83   /* BB must end in a simple conditional jump.  */
  84   stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
  85   if (!stmt)
  86     return NULL_TREE;
  87
  88   enum tree_code code = gimple_cond_code (stmt);
  89
  90   /* Only handle relational comparisons, for equality and non-equality
  91      we'd have to split the loop into two loops and a middle statement.  */
  92   switch (code)
  93     {
  94       case LT_EXPR:
  95       case LE_EXPR:
  96       case GT_EXPR:
  97       case GE_EXPR:
  98         break;
  99       default:
 100         return NULL_TREE;
 101     }
 102
 103   if (loop_exits_from_bb_p (loop, bb))
 104     return NULL_TREE;
 105
 106   tree op0 = gimple_cond_lhs (stmt);
 107   tree op1 = gimple_cond_rhs (stmt);
 108   class loop *useloop = loop_containing_stmt (stmt);
 109
 110   if (!simple_iv (loop, useloop, op0, iv, false))
 111     return NULL_TREE;
 112   if (!simple_iv (loop, useloop, op1, &iv2, false))
 113     return NULL_TREE;
 114
 115   /* Make it so that the first argument of the condition is
 116      the looping one.  */
 117   if (!integer_zerop (iv2.step))
 118     {
 119       std::swap (op0, op1);
 120       std::swap (*iv, iv2);
 121       code = swap_tree_comparison (code);
 122       gimple_cond_set_condition (stmt, code, op0, op1);
 123       update_stmt (stmt);
 124     }
 125   else if (integer_zerop (iv->step))
 126     return NULL_TREE;
 127   if (!integer_zerop (iv2.step))
 128     return NULL_TREE;
 129   if (!iv->no_overflow)
 130     return NULL_TREE;
 131
 132   if (dump_file && (dump_flags & TDF_DETAILS))
 133     {
 134       fprintf (dump_file, "Found potential split point: ");
 135       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
 136       fprintf (dump_file, " { ");
 137       print_generic_expr (dump_file, iv->base, TDF_SLIM);
 138       fprintf (dump_file, " + I*");
 139       print_generic_expr (dump_file, iv->step, TDF_SLIM);
 140       fprintf (dump_file, " } %s ", get_tree_code_name (code));
 141       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
 142       fprintf (dump_file, "\n");
 143     }
 144
 145   *border = iv2.base;
 146   return op0;
 147 }
 148
 149 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
 150    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
 151    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
 152    exit test statement to loop back only if the GUARD statement will
 153    also be true/false in the next iteration.  */
 154
 155 static void
 156 patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound,
 157                  bool initial_true)
 158 {
 159   edge exit = single_exit (loop);
 160   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 161   gimple_cond_set_condition (stmt, gimple_cond_code (guard),
 162                              nextval, newbound);
 163   update_stmt (stmt);
 164
 165   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
 166
 167   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 168   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 169
 170   if (initial_true)
 171     {
 172       exit->flags |= EDGE_FALSE_VALUE;
 173       stay->flags |= EDGE_TRUE_VALUE;
 174     }
 175   else
 176     {
 177       exit->flags |= EDGE_TRUE_VALUE;
 178       stay->flags |= EDGE_FALSE_VALUE;
 179     }
 180 }
 181
 182 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
 183    find the loop phi node in LOOP defining it directly, or create
 184    such phi node.  Return that phi node.  */
 185
 186 static gphi *
 187 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
 188 {
 189   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
 190   gphi *phi;
 191   if ((phi = dyn_cast <gphi *> (def))
 192       && gimple_bb (phi) == loop->header)
 193     return phi;
 194
 195   /* XXX Create the PHI instead.  */
 196   return NULL;
 197 }
 198
 199 /* Returns true if the exit values of all loop phi nodes can be
 200    determined easily (i.e. that connect_loop_phis can determine them).  */
 201
 202 static bool
 203 easy_exit_values (class loop *loop)
 204 {
 205   edge exit = single_exit (loop);
 206   edge latch = loop_latch_edge (loop);
 207   gphi_iterator psi;
 208
 209   /* Currently we regard the exit values as easy if they are the same
 210      as the value over the backedge.  Which is the case if the definition
 211      of the backedge value dominates the exit edge.  */
 212   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 213     {
 214       gphi *phi = psi.phi ();
 215       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
 216       basic_block bb;
 217       if (TREE_CODE (next) == SSA_NAME
 218           && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
 219           && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
 220         return false;
 221     }
 222
 223   return true;
 224 }
 225
 226 /* This function updates the SSA form after connect_loops made a new
 227    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
 228    conditional).  I.e. the second loop can now be entered either
 229    via the original entry or via NEW_E, so the entry values of LOOP2
 230    phi nodes are either the original ones or those at the exit
 231    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
 232    this.  The loops need to fulfill easy_exit_values().  */
 233
 234 static void
 235 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
 236 {
 237   basic_block rest = loop_preheader_edge (loop2)->src;
 238   gcc_assert (new_e->dest == rest);
 239   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
 240
 241   edge firste = loop_preheader_edge (loop1);
 242   edge seconde = loop_preheader_edge (loop2);
 243   edge firstn = loop_latch_edge (loop1);
 244   gphi_iterator psi_first, psi_second;
 245   for (psi_first = gsi_start_phis (loop1->header),
 246        psi_second = gsi_start_phis (loop2->header);
 247        !gsi_end_p (psi_first);
 248        gsi_next (&psi_first), gsi_next (&psi_second))
 249     {
 250       tree init, next, new_init;
 251       use_operand_p op;
 252       gphi *phi_first = psi_first.phi ();
 253       gphi *phi_second = psi_second.phi ();
 254
 255       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
 256       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
 257       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
 258       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
 259
 260       /* Prefer using original variable as a base for the new ssa name.
 261          This is necessary for virtual ops, and useful in order to avoid
 262          losing debug info for real ops.  */
 263       if (TREE_CODE (next) == SSA_NAME
 264           && useless_type_conversion_p (TREE_TYPE (next),
 265                                         TREE_TYPE (init)))
 266         new_init = copy_ssa_name (next);
 267       else if (TREE_CODE (init) == SSA_NAME
 268                && useless_type_conversion_p (TREE_TYPE (init),
 269                                              TREE_TYPE (next)))
 270         new_init = copy_ssa_name (init);
 271       else if (useless_type_conversion_p (TREE_TYPE (next),
 272                                           TREE_TYPE (init)))
 273         new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
 274                                        "unrinittmp");
 275       else
 276         new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
 277                                        "unrinittmp");
 278
 279       gphi * newphi = create_phi_node (new_init, rest);
 280       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
 281       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
 282       SET_USE (op, new_init);
 283     }
 284 }
 285
 286 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
 287    they are still equivalent and placed in two arms of a diamond, like so:
 288
 289                .------if (cond)------.
 290                v                     v
 291              pre1                   pre2
 292               |                      |
 293         .--->h1                     h2<----.
 294         |     |                      |     |
 295         |    ex1---.            .---ex2    |
 296         |    /     |            |     \    |
 297         '---l1     X            |     l2---'
 298                    |            |
 299                    |            |
 300                    '--->join<---'
 301
 302    This function transforms the program such that LOOP1 is conditionally
 303    falling through to LOOP2, or skipping it.  This is done by splitting
 304    the ex1->join edge at X in the diagram above, and inserting a condition
 305    whose one arm goes to pre2, resulting in this situation:
 306
 307                .------if (cond)------.
 308                v                     v
 309              pre1       .---------->pre2
 310               |         |            |
 311         .--->h1         |           h2<----.
 312         |     |         |            |     |
 313         |    ex1---.    |       .---ex2    |
 314         |    /     v    |       |     \    |
 315         '---l1   skip---'       |     l2---'
 316                    |            |
 317                    |            |
 318                    '--->join<---'
 319
 320
 321    The condition used is the exit condition of LOOP1, which effectively means
 322    that when the first loop exits (for whatever reason) but the real original
 323    exit expression is still false the second loop will be entered.
 324    The function returns the new edge cond->pre2.
 325
 326    This doesn't update the SSA form, see connect_loop_phis for that.  */
 327
 328 static edge
 329 connect_loops (class loop *loop1, class loop *loop2)
 330 {
 331   edge exit = single_exit (loop1);
 332   basic_block skip_bb = split_edge (exit);
 333   gcond *skip_stmt;
 334   gimple_stmt_iterator gsi;
 335   edge new_e, skip_e;
 336
 337   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 338   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
 339                                  gimple_cond_lhs (stmt),
 340                                  gimple_cond_rhs (stmt),
 341                                  NULL_TREE, NULL_TREE);
 342   gsi = gsi_last_bb (skip_bb);
 343   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
 344
 345   skip_e = EDGE_SUCC (skip_bb, 0);
 346   skip_e->flags &= ~EDGE_FALLTHRU;
 347   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
 348   if (exit->flags & EDGE_TRUE_VALUE)
 349     {
 350       skip_e->flags |= EDGE_TRUE_VALUE;
 351       new_e->flags |= EDGE_FALSE_VALUE;
 352     }
 353   else
 354     {
 355       skip_e->flags |= EDGE_FALSE_VALUE;
 356       new_e->flags |= EDGE_TRUE_VALUE;
 357     }
 358
 359   new_e->probability = profile_probability::very_likely ();
 360   skip_e->probability = new_e->probability.invert ();
 361
 362   return new_e;
 363 }
 364
 365 /* This returns the new bound for iterations given the original iteration
 366    space in NITER, an arbitrary new bound BORDER, assumed to be some
 367    comparison value with a different IV, the initial value GUARD_INIT of
 368    that other IV, and the comparison code GUARD_CODE that compares
 369    that other IV with BORDER.  We return an SSA name, and place any
 370    necessary statements for that computation into *STMTS.
 371
 372    For example for such a loop:
 373
 374      for (i = beg, j = guard_init; i < end; i++, j++)
 375        if (j < border)  // this is supposed to be true/false
 376          ...
 377
 378    we want to return a new bound (on j) that makes the loop iterate
 379    as long as the condition j < border stays true.  We also don't want
 380    to iterate more often than the original loop, so we have to introduce
 381    some cut-off as well (via min/max), effectively resulting in:
 382
 383      newend = min (end+guard_init-beg, border)
 384      for (i = beg; j = guard_init; j < newend; i++, j++)
 385        if (j < c)
 386          ...
 387
 388    Depending on the direction of the IVs and if the exit tests
 389    are strict or non-strict we need to use MIN or MAX,
 390    and add or subtract 1.  This routine computes newend above.  */
 391
 392 static tree
 393 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
 394                          tree border,
 395                          enum tree_code guard_code, tree guard_init)
 396 {
 397   /* The niter structure contains the after-increment IV, we need
 398      the loop-enter base, so subtract STEP once.  */
 399   tree controlbase = force_gimple_operand (niter->control.base,
 400                                            stmts, true, NULL_TREE);
 401   tree controlstep = niter->control.step;
 402   tree enddiff;
 403   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
 404     {
 405       controlstep = gimple_build (stmts, NEGATE_EXPR,
 406                                   TREE_TYPE (controlstep), controlstep);
 407       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 408                               TREE_TYPE (controlbase),
 409                               controlbase, controlstep);
 410     }
 411   else
 412     enddiff = gimple_build (stmts, MINUS_EXPR,
 413                             TREE_TYPE (controlbase),
 414                             controlbase, controlstep);
 415
 416   /* Compute end-beg.  */
 417   gimple_seq stmts2;
 418   tree end = force_gimple_operand (niter->bound, &stmts2,
 419                                         true, NULL_TREE);
 420   gimple_seq_add_seq_without_update (stmts, stmts2);
 421   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
 422     {
 423       tree tem = gimple_convert (stmts, sizetype, enddiff);
 424       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
 425       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 426                               TREE_TYPE (enddiff),
 427                               end, tem);
 428     }
 429   else
 430     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
 431                             end, enddiff);
 432
 433   /* Compute guard_init + (end-beg).  */
 434   tree newbound;
 435   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
 436   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
 437     {
 438       enddiff = gimple_convert (stmts, sizetype, enddiff);
 439       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
 440                                TREE_TYPE (guard_init),
 441                                guard_init, enddiff);
 442     }
 443   else
 444     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
 445                              guard_init, enddiff);
 446
 447   /* Depending on the direction of the IVs the new bound for the first
 448      loop is the minimum or maximum of old bound and border.
 449      Also, if the guard condition isn't strictly less or greater,
 450      we need to adjust the bound.  */
 451   int addbound = 0;
 452   enum tree_code minmax;
 453   if (niter->cmp == LT_EXPR)
 454     {
 455       /* GT and LE are the same, inverted.  */
 456       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
 457         addbound = -1;
 458       minmax = MIN_EXPR;
 459     }
 460   else
 461     {
 462       gcc_assert (niter->cmp == GT_EXPR);
 463       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
 464         addbound = 1;
 465       minmax = MAX_EXPR;
 466     }
 467
 468   if (addbound)
 469     {
 470       tree type2 = TREE_TYPE (newbound);
 471       if (POINTER_TYPE_P (type2))
 472         type2 = sizetype;
 473       newbound = gimple_build (stmts,
 474                                POINTER_TYPE_P (TREE_TYPE (newbound))
 475                                ? POINTER_PLUS_EXPR : PLUS_EXPR,
 476                                TREE_TYPE (newbound),
 477                                newbound,
 478                                build_int_cst (type2, addbound));
 479     }
 480
 481   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
 482                               border, newbound);
 483   return newend;
 484 }
 485
 486 /* Fix the two loop's bb count after split based on the split edge probability,
 487    don't adjust the bbs dominated by true branches of that loop to avoid
 488    dropping 1s down.  */
 489 static void
 490 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
 491                          edge false_edge)
 492 {
 493   /* Proportion first loop's bb counts except those dominated by true
 494      branch to avoid drop 1s down.  */
 495   basic_block *bbs1, *bbs2;
 496   bbs1 = get_loop_body (loop1);
 497   unsigned j;
 498   for (j = 0; j < loop1->num_nodes; j++)
 499     if (bbs1[j] == loop1->latch
 500         /* Watch for case where the true conditional is empty.  */
 501         || !single_pred_p (true_edge->dest)
 502         || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
 503       bbs1[j]->count
 504         = bbs1[j]->count.apply_probability (true_edge->probability);
 505   free (bbs1);
 506
 507   /* Proportion second loop's bb counts except those dominated by false
 508      branch to avoid drop 1s down.  */
 509   basic_block bbi_copy = get_bb_copy (false_edge->dest);
 510   bbs2 = get_loop_body (loop2);
 511   for (j = 0; j < loop2->num_nodes; j++)
 512     if (bbs2[j] == loop2->latch
 513         /* Watch for case where the flase conditional is empty.  */
 514         || !single_pred_p (bbi_copy)
 515         || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
 516       bbs2[j]->count
 517         = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
 518   free (bbs2);
 519 }
 520
 521 /* Checks if LOOP contains an conditional block whose condition
 522    depends on which side in the iteration space it is, and if so
 523    splits the iteration space into two loops.  Returns true if the
 524    loop was split.  NITER must contain the iteration descriptor for the
 525    single exit of LOOP.  */
 526
 527 static bool
 528 split_loop (class loop *loop1)
 529 {
 530   class tree_niter_desc niter;
 531   basic_block *bbs;
 532   unsigned i;
 533   bool changed = false;
 534   tree guard_iv;
 535   tree border = NULL_TREE;
 536   affine_iv iv;
 537   edge exit1;
 538
 539   if (!(exit1 = single_exit (loop1))
 540       || EDGE_COUNT (exit1->src->succs) != 2
 541       /* ??? We could handle non-empty latches when we split the latch edge
 542          (not the exit edge), and put the new exit condition in the new block.
 543          OTOH this executes some code unconditionally that might have been
 544          skipped by the original exit before.  */
 545       || !empty_block_p (loop1->latch)
 546       || !easy_exit_values (loop1)
 547       || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
 548       || niter.cmp == ERROR_MARK)
 549     return false;
 550   if (niter.cmp == NE_EXPR)
 551     {
 552       if (!niter.control.no_overflow)
 553         return false;
 554       if (tree_int_cst_sign_bit (niter.control.step) > 0)
 555         niter.cmp = GT_EXPR;
 556       else
 557         niter.cmp = LT_EXPR;
 558     }
 559
 560   bbs = get_loop_body (loop1);
 561
 562   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
 563     {
 564       free (bbs);
 565       return false;
 566     }
 567
 568   /* Find a splitting opportunity.  */
 569   for (i = 0; i < loop1->num_nodes; i++)
 570     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
 571       {
 572         profile_count entry_count = loop_preheader_edge (loop1)->count ();
 573         /* Handling opposite steps is not implemented yet.  Neither
 574            is handling different step sizes.  */
 575         if ((tree_int_cst_sign_bit (iv.step)
 576              != tree_int_cst_sign_bit (niter.control.step))
 577             || !tree_int_cst_equal (iv.step, niter.control.step))
 578           continue;
 579
 580         /* Find a loop PHI node that defines guard_iv directly,
 581            or create one doing that.  */
 582         gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
 583         if (!phi)
 584           continue;
 585         gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 586         tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
 587                                                  loop_preheader_edge (loop1));
 588         enum tree_code guard_code = gimple_cond_code (guard_stmt);
 589
 590         /* Loop splitting is implemented by versioning the loop, placing
 591            the new loop after the old loop, make the first loop iterate
 592            as long as the conditional stays true (or false) and let the
 593            second (new) loop handle the rest of the iterations.
 594
 595            First we need to determine if the condition will start being true
 596            or false in the first loop.  */
 597         bool initial_true;
 598         switch (guard_code)
 599           {
 600             case LT_EXPR:
 601             case LE_EXPR:
 602               initial_true = !tree_int_cst_sign_bit (iv.step);
 603               break;
 604             case GT_EXPR:
 605             case GE_EXPR:
 606               initial_true = tree_int_cst_sign_bit (iv.step);
 607               break;
 608             default:
 609               gcc_unreachable ();
 610           }
 611
 612         /* Build a condition that will skip the first loop when the
 613            guard condition won't ever be true (or false).  */
 614         gimple_seq stmts2;
 615         border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
 616         if (stmts2)
 617           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 618                                             stmts2);
 619         tree cond = fold_build2 (guard_code, boolean_type_node,
 620                                  guard_init, border);
 621         if (!initial_true)
 622           cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
 623
 624         edge true_edge, false_edge;
 625         extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
 626
 627         /* Now version the loop, placing loop2 after loop1 connecting
 628            them, and fix up SSA form for that.  */
 629         initialize_original_copy_tables ();
 630         basic_block cond_bb;
 631
 632         profile_probability loop1_prob
 633           = integer_onep (cond) ? profile_probability::always ()
 634                                 : true_edge->probability;
 635         /* TODO: It is commonly a case that we know that both loops will be
 636            entered.  very_likely below is the probability that second loop will
 637            be entered given by connect_loops.  We should work out the common
 638            case it is always true.  */
 639         class loop *loop2 = loop_version (loop1, cond, &cond_bb,
 640                                           loop1_prob,
 641                                           /* Pass always as we will later
 642                                              redirect first loop to second
 643                                              loop.  */
 644                                           profile_probability::always (),
 645                                           profile_probability::always (),
 646                                           profile_probability::very_likely (),
 647                                           true);
 648         gcc_assert (loop2);
 649         /* Correct probability of edge  cond_bb->preheader_of_loop2.  */
 650         single_pred_edge
 651                 (loop_preheader_edge (loop2)->src)->probability
 652                         = loop1_prob.invert ();
 653
 654         fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
 655
 656         /* Fix loop's exit probability after scaling.  */
 657
 658         if (entry_count.initialized_p () && entry_count.nonzero_p ())
 659           {
 660             edge exit_to_latch1;
 661             if (EDGE_SUCC (exit1->src, 0) == exit1)
 662               exit_to_latch1 = EDGE_SUCC (exit1->src, 1);
 663             else
 664               exit_to_latch1 = EDGE_SUCC (exit1->src, 0);
 665             if (exit1->src->count.nonzero_p ())
 666               {
 667                 /* First loop is entered loop1_prob * entry_count times
 668                    and it needs to exit the same number of times.  */
 669                 exit1->probability
 670                         = entry_count.apply_probability
 671                                 (loop1_prob).probability_in (exit1->src->count);
 672                 exit_to_latch1->probability = exit1->probability.invert ();
 673                 scale_dominated_blocks_in_loop (loop1, exit1->src,
 674                                                 exit_to_latch1->count (),
 675                                                 exit_to_latch1->dest->count);
 676               }
 677
 678             edge exit_to_latch2, exit2 = single_exit (loop2);
 679             if (EDGE_SUCC (exit2->src, 0) == exit2)
 680               exit_to_latch2 = EDGE_SUCC (exit2->src, 1);
 681             else
 682               exit_to_latch2 = EDGE_SUCC (exit2->src, 0);
 683             if (exit2->src->count.nonzero_p ())
 684               {
 685                 /* Second loop is entered very_likely * entry_count times
 686                    and it needs to exit the same number of times.  */
 687                 exit2->probability
 688                         = entry_count.apply_probability
 689                                 (profile_probability::very_likely ())
 690                                 .probability_in (exit2->src->count);
 691                 exit_to_latch2->probability = exit2->probability.invert ();
 692                 scale_dominated_blocks_in_loop (loop2, exit2->src,
 693                                                 exit_to_latch2->count (),
 694                                                 exit_to_latch2->dest->count);
 695               }
 696           }
 697
 698         edge new_e = connect_loops (loop1, loop2);
 699         connect_loop_phis (loop1, loop2, new_e);
 700
 701         /* The iterations of the second loop is now already
 702            exactly those that the first loop didn't do, but the
 703            iteration space of the first loop is still the original one.
 704            Compute the new bound for the guarding IV and patch the
 705            loop exit to use it instead of original IV and bound.  */
 706         gimple_seq stmts = NULL;
 707         tree newend = compute_new_first_bound (&stmts, &niter, border,
 708                                                guard_code, guard_init);
 709         if (stmts)
 710           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 711                                             stmts);
 712         tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 713         patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
 714
 715         /* TODO: Update any_esitmate and upper bounds.  */
 716
 717         /* Finally patch out the two copies of the condition to be always
 718            true/false (or opposite).  */
 719         gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 720         gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
 721         if (!initial_true)
 722           std::swap (force_true, force_false);
 723         gimple_cond_make_true (force_true);
 724         gimple_cond_make_false (force_false);
 725         update_stmt (force_true);
 726         update_stmt (force_false);
 727
 728         free_original_copy_tables ();
 729
 730         changed = true;
 731         if (dump_file && (dump_flags & TDF_DETAILS))
 732           fprintf (dump_file, ";; Loop split.\n");
 733
 734         if (dump_enabled_p ())
 735           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
 736
 737         /* Only deal with the first opportunity.  */
 738         break;
 739       }
 740
 741   free (bbs);
 742   return changed;
 743 }
 744
 745 /* Another transformation of loops like:
 746
 747    for (i = INIT (); CHECK (i); i = NEXT ())
 748      {
 749        if (expr (a_1, a_2, ..., a_n))  // expr is pure
 750          a_j = ...;  // change at least one a_j
 751        else
 752          S;          // not change any a_j
 753      }
 754
 755    into:
 756
 757    for (i = INIT (); CHECK (i); i = NEXT ())
 758      {
 759        if (expr (a_1, a_2, ..., a_n))
 760          a_j = ...;
 761        else
 762          {
 763            S;
 764            i = NEXT ();
 765            break;
 766          }
 767      }
 768
 769    for (; CHECK (i); i = NEXT ())
 770      {
 771        S;
 772      }
 773
 774    */
 775
 776 /* Data structure to hold temporary information during loop split upon
 777    semi-invariant conditional statement.  */
 778 class split_info {
 779 public:
 780   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
 781   basic_block *bbs;
 782
 783   /* All memory store/clobber statements in a loop.  */
 784   auto_vec<gimple *> memory_stores;
 785
 786   /* Whether above memory stores vector has been filled.  */
 787   int need_init;
 788
 789   /* Control dependencies of basic blocks in a loop.  */
 790   auto_vec<hash_set<basic_block> *> control_deps;
 791
 792   split_info () : bbs (NULL),  need_init (true) { }
 793
 794   ~split_info ()
 795     {
 796       if (bbs)
 797         free (bbs);
 798
 799       for (unsigned i = 0; i < control_deps.length (); i++)
 800         delete control_deps[i];
 801     }
 802 };
 803
 804 /* Find all statements with memory-write effect in LOOP, including memory
 805    store and non-pure function call, and keep those in a vector.  This work
 806    is only done one time, for the vector should be constant during analysis
 807    stage of semi-invariant condition.  */
 808
 809 static void
 810 find_vdef_in_loop (struct loop *loop)
 811 {
 812   split_info *info = (split_info *) loop->aux;
 813   gphi *vphi = get_virtual_phi (loop->header);
 814
 815   /* Indicate memory store vector has been filled.  */
 816   info->need_init = false;
 817
 818   /* If loop contains memory operation, there must be a virtual PHI node in
 819      loop header basic block.  */
 820   if (vphi == NULL)
 821     return;
 822
 823   /* All virtual SSA names inside the loop are connected to be a cyclic
 824      graph via virtual PHI nodes.  The virtual PHI node in loop header just
 825      links the first and the last virtual SSA names, by using the last as
 826      PHI operand to define the first.  */
 827   const edge latch = loop_latch_edge (loop);
 828   const tree first = gimple_phi_result (vphi);
 829   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
 830
 831   /* The virtual SSA cyclic graph might consist of only one SSA name, who
 832      is defined by itself.
 833
 834        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
 835
 836      This means the loop contains only memory loads, so we can skip it.  */
 837   if (first == last)
 838     return;
 839
 840   auto_vec<gimple *> other_stores;
 841   auto_vec<tree> worklist;
 842   auto_bitmap visited;
 843
 844   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
 845   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
 846   worklist.safe_push (last);
 847
 848   do
 849     {
 850       tree vuse = worklist.pop ();
 851       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
 852
 853       /* We mark the first and last SSA names as visited at the beginning,
 854          and reversely start the process from the last SSA name towards the
 855          first, which ensures that this do-while will not touch SSA names
 856          defined outside the loop.  */
 857       gcc_assert (gimple_bb (stmt)
 858                   && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
 859
 860       if (gimple_code (stmt) == GIMPLE_PHI)
 861         {
 862           gphi *phi = as_a <gphi *> (stmt);
 863
 864           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
 865             {
 866               tree arg = gimple_phi_arg_def (stmt, i);
 867
 868               if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
 869                 worklist.safe_push (arg);
 870             }
 871         }
 872       else
 873         {
 874           tree prev = gimple_vuse (stmt);
 875
 876           /* Non-pure call statement is conservatively assumed to impact all
 877              memory locations.  So place call statements ahead of other memory
 878              stores in the vector with an idea of using them as shortcut
 879              terminators to memory alias analysis.  */
 880           if (gimple_code (stmt) == GIMPLE_CALL)
 881             info->memory_stores.safe_push (stmt);
 882           else
 883             other_stores.safe_push (stmt);
 884
 885           if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
 886             worklist.safe_push (prev);
 887         }
 888     } while (!worklist.is_empty ());
 889
 890     info->memory_stores.safe_splice (other_stores);
 891 }
 892
 893 /* Two basic blocks have equivalent control dependency if one dominates to
 894    the other, and it is post-dominated by the latter.  Given a basic block
 895    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
 896    is a constraint that BB does not post-dominate loop header of LOOP, this
 897    means BB is control-dependent on at least one basic block in LOOP.  */
 898
 899 static basic_block
 900 get_control_equiv_head_block (struct loop *loop, basic_block bb)
 901 {
 902   while (!bb->aux)
 903     {
 904       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
 905
 906       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
 907
 908       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
 909         break;
 910
 911       bb = dom_bb;
 912     }
 913   return bb;
 914 }
 915
 916 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
 917    dependent on.  */
 918
 919 static hash_set<basic_block> *
 920 find_control_dep_blocks (struct loop *loop, basic_block bb)
 921 {
 922   /* BB has same control dependency as loop header, then it is not control-
 923      dependent on any basic block in LOOP.  */
 924   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
 925     return NULL;
 926
 927   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
 928
 929   if (equiv_head->aux)
 930     {
 931       /* There is a basic block containing control dependency equivalent
 932          to BB.  No need to recompute that, and also set this information
 933          to other equivalent basic blocks.  */
 934       for (; bb != equiv_head;
 935            bb = get_immediate_dominator (CDI_DOMINATORS, bb))
 936         bb->aux = equiv_head->aux;
 937       return (hash_set<basic_block> *) equiv_head->aux;
 938     }
 939
 940   /* A basic block X is control-dependent on another Y iff there exists
 941      a path from X to Y, in which every basic block other than X and Y
 942      is post-dominated by Y, but X is not post-dominated by Y.
 943
 944      According to this rule, traverse basic blocks in the loop backwards
 945      starting from BB, if a basic block is post-dominated by BB, extend
 946      current post-dominating path to this block, otherwise it is another
 947      one that BB is control-dependent on.  */
 948
 949   auto_vec<basic_block> pdom_worklist;
 950   hash_set<basic_block> pdom_visited;
 951   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
 952
 953   pdom_worklist.safe_push (equiv_head);
 954
 955   do
 956     {
 957       basic_block pdom_bb = pdom_worklist.pop ();
 958       edge_iterator ei;
 959       edge e;
 960
 961       if (pdom_visited.add (pdom_bb))
 962         continue;
 963
 964       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
 965         {
 966           basic_block pred_bb = e->src;
 967
 968           if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
 969             {
 970               dep_bbs->add (pred_bb);
 971               continue;
 972             }
 973
 974           pred_bb = get_control_equiv_head_block (loop, pred_bb);
 975
 976           if (pdom_visited.contains (pred_bb))
 977             continue;
 978
 979           if (!pred_bb->aux)
 980             {
 981               pdom_worklist.safe_push (pred_bb);
 982               continue;
 983             }
 984
 985           /* If control dependency of basic block is available, fast extend
 986              post-dominating path using the information instead of advancing
 987              forward step-by-step.  */
 988           hash_set<basic_block> *pred_dep_bbs
 989                         = (hash_set<basic_block> *) pred_bb->aux;
 990
 991           for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
 992                iter != pred_dep_bbs->end (); ++iter)
 993             {
 994               basic_block pred_dep_bb = *iter;
 995
 996               /* Basic blocks can either be in control dependency of BB, or
 997                  must be post-dominated by BB, if so, extend the path from
 998                  these basic blocks.  */
 999               if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1000                 dep_bbs->add (pred_dep_bb);
1001               else if (!pdom_visited.contains (pred_dep_bb))
1002                 pdom_worklist.safe_push (pred_dep_bb);
1003             }
1004         }
1005     } while (!pdom_worklist.is_empty ());
1006
1007   /* Record computed control dependencies in loop so that we can reach them
1008      when reclaiming resources.  */
1009   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1010
1011   /* Associate control dependence with related equivalent basic blocks.  */
1012   for (equiv_head->aux = dep_bbs; bb != equiv_head;
1013        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1014     bb->aux = dep_bbs;
1015
1016   return dep_bbs;
1017 }
1018
1019 /* Forward declaration */
1020
1021 static bool
1022 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1023                          const_basic_block skip_head,
1024                          hash_map<gimple *, bool> &stmt_stat);
1025
1026 /* Given STMT, memory load or pure call statement, check whether it is impacted
1027    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1028    trace is composed of SKIP_HEAD and those basic block dominated by it, always
1029    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
1030    NULL, all basic blocks of LOOP are checked.  */
1031
1032 static bool
1033 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1034                        const_basic_block skip_head)
1035 {
1036   split_info *info = (split_info *) loop->aux;
1037   tree rhs = NULL_TREE;
1038   ao_ref ref;
1039   gimple *store;
1040   unsigned i;
1041
1042   /* Collect memory store/clobber statements if haven't done that.  */
1043   if (info->need_init)
1044     find_vdef_in_loop (loop);
1045
1046   if (is_gimple_assign (stmt))
1047     rhs = gimple_assign_rhs1 (stmt);
1048
1049   ao_ref_init (&ref, rhs);
1050
1051   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1052     {
1053       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
1054       if (skip_head
1055           && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1056         continue;
1057
1058       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1059         return false;
1060     }
1061
1062   return true;
1063 }
1064
1065 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1066    certain iteration of LOOP, check whether an SSA name (NAME) remains
1067    unchanged in next iteration.  We call this characteristic semi-
1068    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1069    blocks and control flows in the loop will be considered.  Semi-invariant
1070    state of checked statement is cached in hash map STMT_STAT to avoid
1071    redundant computation in possible following re-check.  */
1072
1073 static inline bool
1074 ssa_semi_invariant_p (struct loop *loop, tree name,
1075                       const_basic_block skip_head,
1076                       hash_map<gimple *, bool> &stmt_stat)
1077 {
1078   gimple *def = SSA_NAME_DEF_STMT (name);
1079   const_basic_block def_bb = gimple_bb (def);
1080
1081   /* An SSA name defined outside loop is definitely semi-invariant.  */
1082   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1083     return true;
1084
1085   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1086     return false;
1087
1088   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1089 }
1090
1091 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1092    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1093    are excluded from LOOP.  */
1094
1095 static bool
1096 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1097                                 const_basic_block skip_head)
1098 {
1099   const_edge latch = loop_latch_edge (loop);
1100   tree name = gimple_phi_result (loop_phi);
1101   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1102
1103   gcc_checking_assert (from);
1104
1105   /* Loop iteration PHI node locates in loop header, and it has two source
1106      operands, one is an initial value coming from outside the loop, the other
1107      is a value through latch of the loop, which is derived in last iteration,
1108      we call the latter latch value.  From the PHI node to definition of latch
1109      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1110      assignment or likewise, there is no other kind of value redefinition, SSA
1111      name defined by the PHI node is semi-invariant.
1112
1113                          loop entry
1114                               |     .--- latch ---.
1115                               |     |             |
1116                               v     v             |
1117                   x_1 = PHI <x_0,  x_3>           |
1118                            |                      |
1119                            v                      |
1120               .------- if (cond) -------.         |
1121               |                         |         |
1122               |                     [ SKIP ]      |
1123               |                         |         |
1124               |                     x_2 = ...     |
1125               |                         |         |
1126               '---- T ---->.<---- F ----'         |
1127                            |                      |
1128                            v                      |
1129                   x_3 = PHI <x_1, x_2>            |
1130                            |                      |
1131                            '----------------------'
1132
1133      Suppose in certain iteration, execution flow in above graph goes through
1134      true branch, which means that one source value to define x_3 in false
1135      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1136      iterations is defined by x_3, we know that x_1 will never changed if COND
1137      always chooses true branch from then on.  */
1138
1139   while (from != name)
1140     {
1141       /* A new value comes from a CONSTANT.  */
1142       if (TREE_CODE (from) != SSA_NAME)
1143         return false;
1144
1145       gimple *stmt = SSA_NAME_DEF_STMT (from);
1146       const_basic_block bb = gimple_bb (stmt);
1147
1148       /* A new value comes from outside the loop.  */
1149       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1150         return false;
1151
1152       from = NULL_TREE;
1153
1154       if (gimple_code (stmt) == GIMPLE_PHI)
1155         {
1156           gphi *phi = as_a <gphi *> (stmt);
1157
1158           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1159             {
1160               if (skip_head)
1161                 {
1162                   const_edge e = gimple_phi_arg_edge (phi, i);
1163
1164                   /* Don't consider redefinitions in excluded basic blocks.  */
1165                   if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1166                     continue;
1167                 }
1168
1169               tree arg = gimple_phi_arg_def (phi, i);
1170
1171               if (!from)
1172                 from = arg;
1173               else if (!operand_equal_p (from, arg, 0))
1174                 /* There are more than one source operands that provide
1175                    different values to the SSA name, it is variant.  */
1176                 return false;
1177             }
1178         }
1179       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1180         {
1181           /* For simple value copy, check its rhs instead.  */
1182           if (gimple_assign_ssa_name_copy_p (stmt))
1183             from = gimple_assign_rhs1 (stmt);
1184         }
1185
1186       /* Any other kind of definition is deemed to introduce a new value
1187          to the SSA name.  */
1188       if (!from)
1189         return false;
1190     }
1191   return true;
1192 }
1193
1194 /* Check whether conditional predicates that BB is control-dependent on, are
1195    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1196    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1197    in hash map STMT_STAT.  */
1198
1199 static bool
1200 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1201                               const_basic_block skip_head,
1202                               hash_map<gimple *, bool> &stmt_stat)
1203 {
1204   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1205
1206   if (!dep_bbs)
1207     return true;
1208
1209   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1210        iter != dep_bbs->end (); ++iter)
1211     {
1212       gimple *last = *gsi_last_bb (*iter);
1213       if (!last)
1214         return false;
1215
1216       /* Only check condition predicates.  */
1217       if (gimple_code (last) != GIMPLE_COND
1218           && gimple_code (last) != GIMPLE_SWITCH)
1219         return false;
1220
1221       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1222         return false;
1223     }
1224
1225   return true;
1226 }
1227
1228 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1229    semi-invariant, consequently, all its defined values are semi-invariant.
1230    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1231    Semi-invariant state of checked statement is cached in hash map
1232    STMT_STAT.  */
1233
1234 static bool
1235 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1236                          const_basic_block skip_head,
1237                          hash_map<gimple *, bool> &stmt_stat)
1238 {
1239   bool existed;
1240   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1241
1242   if (existed)
1243     return invar;
1244
1245   /* A statement might depend on itself, which is treated as variant.  So set
1246      state of statement under check to be variant to ensure that.  */
1247   invar = false;
1248
1249   if (gimple_code (stmt) == GIMPLE_PHI)
1250     {
1251       gphi *phi = as_a <gphi *> (stmt);
1252
1253       if (gimple_bb (stmt) == loop->header)
1254         {
1255           /* If the entry value is subject to abnormal coalescing
1256              avoid the transform since we're going to duplicate the
1257              loop header and thus likely introduce overlapping life-ranges
1258              between the PHI def and the entry on the path when the
1259              first loop is skipped.  */
1260           tree entry_def
1261             = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1262           if (TREE_CODE (entry_def) == SSA_NAME
1263               && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1264             return false;
1265           invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1266           return invar;
1267         }
1268
1269       /* For a loop PHI node that does not locate in loop header, it is semi-
1270          invariant only if two conditions are met.  The first is its source
1271          values are derived from CONSTANT (including loop-invariant value), or
1272          from SSA name defined by semi-invariant loop iteration PHI node.  The
1273          second is its source incoming edges are control-dependent on semi-
1274          invariant conditional predicates.  */
1275       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1276         {
1277           const_edge e = gimple_phi_arg_edge (phi, i);
1278           tree arg = gimple_phi_arg_def (phi, i);
1279
1280           if (TREE_CODE (arg) == SSA_NAME)
1281             {
1282               if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1283                 return false;
1284
1285               /* If source value is defined in location from where the source
1286                  edge comes in, no need to check control dependency again
1287                  since this has been done in above SSA name check stage.  */
1288               if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1289                 continue;
1290             }
1291
1292           if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1293                                              stmt_stat))
1294             return false;
1295         }
1296     }
1297   else
1298     {
1299       ssa_op_iter iter;
1300       tree use;
1301
1302       /* Volatile memory load or return of normal (non-const/non-pure) call
1303          should not be treated as constant in each iteration of loop.  */
1304       if (gimple_has_side_effects (stmt))
1305         return false;
1306
1307       /* Check if any memory store may kill memory load at this place.  */
1308       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1309         return false;
1310
1311       /* Although operand of a statement might be SSA name, CONSTANT or
1312          VARDECL, here we only need to check SSA name operands.  This is
1313          because check on VARDECL operands, which involve memory loads,
1314          must have been done prior to invocation of this function in
1315          vuse_semi_invariant_p.  */
1316       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1317         if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1318           return false;
1319     }
1320
1321   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1322                                      stmt_stat))
1323     return false;
1324
1325   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1326      to new insertion, and thus invar may point to invalid memory.  */
1327   stmt_stat.put (stmt, true);
1328   return true;
1329 }
1330
1331 /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1332    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1333
1334 static bool
1335 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1336                        const_basic_block skip_head)
1337 {
1338   hash_map<gimple *, bool> stmt_stat;
1339   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1340 }
1341
1342 /* Determine when conditional statement never transfers execution to one of its
1343    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1344    and those basic blocks dominated by BRANCH_BB.  */
1345
1346 static bool
1347 branch_removable_p (basic_block branch_bb)
1348 {
1349   edge_iterator ei;
1350   edge e;
1351
1352   if (single_pred_p (branch_bb))
1353     return true;
1354
1355   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1356     {
1357       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1358         continue;
1359
1360       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1361         continue;
1362
1363        /* The branch can be reached from opposite branch, or from some
1364           statement not dominated by the conditional statement.  */
1365       return false;
1366     }
1367
1368   return true;
1369 }
1370
1371 /* Find out which branch of a conditional statement (COND) is invariant in the
1372    execution context of LOOP.  That is: once the branch is selected in certain
1373    iteration of the loop, any operand that contributes to computation of the
1374    conditional statement remains unchanged in all following iterations.  */
1375
1376 static edge
1377 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1378 {
1379   basic_block cond_bb = gimple_bb (cond);
1380   basic_block targ_bb[2];
1381   bool invar[2];
1382   unsigned invar_checks = 0;
1383
1384   for (unsigned i = 0; i < 2; i++)
1385     {
1386       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1387
1388       /* One branch directs to loop exit, no need to perform loop split upon
1389          this conditional statement.  Firstly, it is trivial if the exit branch
1390          is semi-invariant, for the statement is just to break loop.  Secondly,
1391          if the opposite branch is semi-invariant, it means that the statement
1392          is real loop-invariant, which is covered by loop unswitch.  */
1393       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1394         return NULL;
1395     }
1396
1397   for (unsigned i = 0; i < 2; i++)
1398     {
1399       invar[!i] = false;
1400
1401       if (!branch_removable_p (targ_bb[i]))
1402         continue;
1403
1404       /* Given a semi-invariant branch, if its opposite branch dominates
1405          loop latch, it and its following trace will only be executed in
1406          final iteration of loop, namely it is not part of repeated body
1407          of the loop.  Similar to the above case that the branch is loop
1408          exit, no need to split loop.  */
1409       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1410         continue;
1411
1412       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1413       invar_checks++;
1414     }
1415
1416   /* With both branches being invariant (handled by loop unswitch) or
1417      variant is not what we want.  */
1418   if (invar[0] ^ !invar[1])
1419     return NULL;
1420
1421   /* Found a real loop-invariant condition, do nothing.  */
1422   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1423     return NULL;
1424
1425   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1426 }
1427
1428 /* Calculate increased code size measured by estimated insn number if applying
1429    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1430
1431 static int
1432 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1433 {
1434   basic_block cond_bb = branch_edge->src;
1435   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1436   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1437   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1438   int num = 0;
1439
1440   for (unsigned i = 0; i < loop->num_nodes; i++)
1441     {
1442       /* Do no count basic blocks only in opposite branch.  */
1443       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1444         continue;
1445
1446       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1447     }
1448
1449   /* It is unnecessary to evaluate expression of the conditional statement
1450      in new loop that contains only invariant branch.  This expression should
1451      be constant value (either true or false).  Exclude code size of insns
1452      that contribute to computation of the expression.  */
1453
1454   auto_vec<gimple *> worklist;
1455   hash_set<gimple *> removed;
1456   gimple *stmt = last_nondebug_stmt (cond_bb);
1457
1458   worklist.safe_push (stmt);
1459   removed.add (stmt);
1460   num -= estimate_num_insns (stmt, &eni_size_weights);
1461
1462   do
1463     {
1464       ssa_op_iter opnd_iter;
1465       use_operand_p opnd_p;
1466
1467       stmt = worklist.pop ();
1468       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1469         {
1470           tree opnd = USE_FROM_PTR (opnd_p);
1471
1472           if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1473             continue;
1474
1475           gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1476           use_operand_p use_p;
1477           imm_use_iterator use_iter;
1478
1479           if (removed.contains (opnd_stmt)
1480               || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1481             continue;
1482
1483           FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1484             {
1485               gimple *use_stmt = USE_STMT (use_p);
1486
1487               if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1488                 {
1489                   opnd_stmt = NULL;
1490                   break;
1491                 }
1492             }
1493
1494           if (opnd_stmt)
1495             {
1496               worklist.safe_push (opnd_stmt);
1497               removed.add (opnd_stmt);
1498               num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1499             }
1500         }
1501     } while (!worklist.is_empty ());
1502
1503   gcc_assert (num >= 0);
1504   return num;
1505 }
1506
1507 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1508    and check whether it is eligible and profitable to perform loop split upon
1509    this branch in LOOP.  */
1510
1511 static edge
1512 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1513 {
1514   edge invar_branch = get_cond_invariant_branch (loop, cond);
1515   if (!invar_branch)
1516     return NULL;
1517
1518   /* When accurate profile information is available, and execution
1519      frequency of the branch is too low, just let it go.  */
1520   profile_probability prob = invar_branch->probability;
1521   if (prob.reliable_p ())
1522     {
1523       int thres = param_min_loop_cond_split_prob;
1524
1525       if (prob < profile_probability::always ().apply_scale (thres, 100))
1526         return NULL;
1527     }
1528
1529   /* Add a threshold for increased code size to disable loop split.  */
1530   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1531     return NULL;
1532
1533   return invar_branch;
1534 }
1535
1536 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1537    conditional statement, perform loop split transformation illustrated
1538    as the following graph.
1539
1540                .-------T------ if (true) ------F------.
1541                |                    .---------------. |
1542                |                    |               | |
1543                v                    |               v v
1544           pre-header                |            pre-header
1545                | .------------.     |                 | .------------.
1546                | |            |     |                 | |            |
1547                | v            |     |                 | v            |
1548              header           |     |               header           |
1549                |              |     |                 |              |
1550       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1551       |                 |     |     |        |                 |     |
1552   invariant             |     |     |    invariant             |     |
1553       |                 |     |     |        |                 |     |
1554       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1555                |              |    /                  |              |
1556              stmts            |   /                 stmts            |
1557                |              F  T                    |              |
1558               / \             | /                    / \             |
1559      .-------*   *      [ if (cond) ]       .-------*   *            |
1560      |           |            |             |           |            |
1561      |         latch          |             |         latch          |
1562      |           |            |             |           |            |
1563      |           '------------'             |           '------------'
1564      '------------------------. .-----------'
1565              loop1            | |                   loop2
1566                               v v
1567                              exits
1568
1569    In the graph, loop1 represents the part derived from original one, and
1570    loop2 is duplicated using loop_version (), which corresponds to the part
1571    of original one being splitted out.  In original latch edge of loop1, we
1572    insert a new conditional statement duplicated from the semi-invariant cond,
1573    and one of its branch goes back to loop1 header as a latch edge, and the
1574    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1575    we abandon the variant branch of the conditional statement by setting a
1576    constant bool condition, based on which branch is semi-invariant.  */
1577
1578 static bool
1579 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1580 {
1581   basic_block cond_bb = invar_branch->src;
1582   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1583   gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1584
1585   gcc_assert (cond_bb->loop_father == loop1);
1586
1587   if (dump_enabled_p ())
1588     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1589                      "loop split on semi-invariant condition at %s branch\n",
1590                      true_invar ? "true" : "false");
1591
1592   initialize_original_copy_tables ();
1593
1594   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1595                                      invar_branch->probability.invert (),
1596                                      invar_branch->probability,
1597                                      profile_probability::always (),
1598                                      profile_probability::always (),
1599                                      true);
1600   if (!loop2)
1601     {
1602       free_original_copy_tables ();
1603       return false;
1604     }
1605
1606   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1607   gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1608
1609   /* Replace the condition in loop2 with a bool constant to let PassManager
1610      remove the variant branch after current pass completes.  */
1611   if (true_invar)
1612     gimple_cond_make_true (cond_copy);
1613   else
1614     gimple_cond_make_false (cond_copy);
1615
1616   update_stmt (cond_copy);
1617
1618   /* Insert a new conditional statement on latch edge of loop1, its condition
1619      is duplicated from the semi-invariant.  This statement acts as a switch
1620      to transfer execution from loop1 to loop2, when loop1 enters into
1621      invariant state.  */
1622   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1623   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1624   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1625                                           gimple_cond_lhs (cond),
1626                                           gimple_cond_rhs (cond),
1627                                           NULL_TREE, NULL_TREE);
1628
1629   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1630   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1631
1632   edge to_loop1 = single_succ_edge (break_bb);
1633   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1634
1635   to_loop1->flags &= ~EDGE_FALLTHRU;
1636   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1637   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1638
1639   /* Due to introduction of a control flow edge from loop1 latch to loop2
1640      pre-header, we should update PHIs in loop2 to reflect this connection
1641      between loop1 and loop2.  */
1642   connect_loop_phis (loop1, loop2, to_loop2);
1643
1644   edge true_edge, false_edge, skip_edge1, skip_edge2;
1645   extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1646
1647   skip_edge1 = true_invar ? false_edge : true_edge;
1648   skip_edge2 = true_invar ? true_edge : false_edge;
1649   fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1650
1651   /* Fix first loop's exit probability after scaling.  */
1652   to_loop1->probability = invar_branch->probability.invert ();
1653   to_loop2->probability = invar_branch->probability;
1654
1655   free_original_copy_tables ();
1656
1657   return true;
1658 }
1659
1660 /* Traverse all conditional statements in LOOP, to find out a good candidate
1661    upon which we can do loop split.  */
1662
1663 static bool
1664 split_loop_on_cond (struct loop *loop)
1665 {
1666   split_info *info = new split_info ();
1667   basic_block *bbs = info->bbs = get_loop_body (loop);
1668   bool do_split = false;
1669
1670   /* Allocate an area to keep temporary info, and associate its address
1671      with loop aux field.  */
1672   loop->aux = info;
1673
1674   for (unsigned i = 0; i < loop->num_nodes; i++)
1675     bbs[i]->aux = NULL;
1676
1677   for (unsigned i = 0; i < loop->num_nodes; i++)
1678     {
1679       basic_block bb = bbs[i];
1680
1681       /* We only consider conditional statement, which be executed at most once
1682          in each iteration of the loop.  So skip statements in inner loops.  */
1683       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1684         continue;
1685
1686       /* Actually this check is not a must constraint.  With it, we can ensure
1687          conditional statement will always be executed in each iteration.  */
1688       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1689         continue;
1690
1691       gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1692       if (!cond)
1693         continue;
1694
1695       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1696
1697       if (branch_edge)
1698         {
1699           do_split_loop_on_cond (loop, branch_edge);
1700           do_split = true;
1701           break;
1702         }
1703     }
1704
1705   delete info;
1706   loop->aux = NULL;
1707
1708   return do_split;
1709 }
1710
1711 /* Main entry point.  Perform loop splitting on all suitable loops.  */
1712
1713 static unsigned int
1714 tree_ssa_split_loops (void)
1715 {
1716   bool changed = false;
1717
1718   gcc_assert (scev_initialized_p ());
1719
1720   calculate_dominance_info (CDI_POST_DOMINATORS);
1721
1722   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1723     loop->aux = NULL;
1724
1725   /* Go through all loops starting from innermost.  */
1726   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1727     {
1728       if (loop->aux)
1729         {
1730           /* If any of our inner loops was split, don't split us,
1731              and mark our containing loop as having had splits as well.
1732              This allows for delaying SSA update.  */
1733           loop_outer (loop)->aux = loop;
1734           continue;
1735         }
1736
1737       if (optimize_loop_for_size_p (loop))
1738         continue;
1739
1740       if (split_loop (loop) || split_loop_on_cond (loop))
1741         {
1742           /* Mark our containing loop as having had some split inner loops.  */
1743           loop_outer (loop)->aux = loop;
1744           changed = true;
1745         }
1746     }
1747
1748   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1749     loop->aux = NULL;
1750
1751   clear_aux_for_blocks ();
1752
1753   free_dominance_info (CDI_POST_DOMINATORS);
1754
1755   if (changed)
1756     {
1757       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1758       return TODO_cleanup_cfg;
1759     }
1760   return 0;
1761 }
1762
1763 /* Loop splitting pass.  */
1764
1765 namespace {
1766
1767 const pass_data pass_data_loop_split =
1768 {
1769   GIMPLE_PASS, /* type */
1770   "lsplit", /* name */
1771   OPTGROUP_LOOP, /* optinfo_flags */
1772   TV_LOOP_SPLIT, /* tv_id */
1773   PROP_cfg, /* properties_required */
1774   0, /* properties_provided */
1775   0, /* properties_destroyed */
1776   0, /* todo_flags_start */
1777   0, /* todo_flags_finish */
1778 };
1779
1780 class pass_loop_split : public gimple_opt_pass
1781 {
1782 public:
1783   pass_loop_split (gcc::context *ctxt)
1784     : gimple_opt_pass (pass_data_loop_split, ctxt)
1785   {}
1786
1787   /* opt_pass methods: */
1788   bool gate (function *) final override { return flag_split_loops != 0; }
1789   unsigned int execute (function *) final override;
1790
1791 }; // class pass_loop_split
1792
1793 unsigned int
1794 pass_loop_split::execute (function *fun)
1795 {
1796   if (number_of_loops (fun) <= 1)
1797     return 0;
1798
1799   return tree_ssa_split_loops ();
1800 }
1801
1802 } // anon namespace
1803
1804 gimple_opt_pass *
1805 make_pass_loop_split (gcc::context *ctxt)
1806 {
1807   return new pass_loop_split (ctxt);
1808 }