gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "gimple.h"
  32 #include "gimple-ssa.h"
  33 #include "tree-cfg.h"
  34 #include "tree-phinodes.h"
  35 #include "ssa-iterators.h"
  36 #include "tree-ssanames.h"
  37 #include "tree-ssa-loop.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "tree-pass.h"
  41 #include "cfgloop.h"
  42 #include "diagnostic-core.h"
  43 #include "tree-scalar-evolution.h"
  44 #include "tree-vectorizer.h"
  45 #include "langhooks.h"
  46
  47 /*************************************************************************
  48   Simple Loop Peeling Utilities
  49
  50   Utilities to support loop peeling for vectorization purposes.
  51  *************************************************************************/
  52
  53
  54 /* Renames the use *OP_P.  */
  55
  56 static void
  57 rename_use_op (use_operand_p op_p)
  58 {
  59   tree new_name;
  60
  61   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  62     return;
  63
  64   new_name = get_current_def (USE_FROM_PTR (op_p));
  65
  66   /* Something defined outside of the loop.  */
  67   if (!new_name)
  68     return;
  69
  70   /* An ordinary ssa name defined in the loop.  */
  71
  72   SET_USE (op_p, new_name);
  73 }
  74
  75
  76 /* Renames the variables in basic block BB.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb)
  80 {
  81   gimple_stmt_iterator gsi;
  82   gimple stmt;
  83   use_operand_p use_p;
  84   ssa_op_iter iter;
  85   edge e;
  86   edge_iterator ei;
  87   struct loop *loop = bb->loop_father;
  88
  89   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  90     {
  91       stmt = gsi_stmt (gsi);
  92       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  93         rename_use_op (use_p);
  94     }
  95
  96   FOR_EACH_EDGE (e, ei, bb->preds)
  97     {
  98       if (!flow_bb_inside_loop_p (loop, e->src))
  99         continue;
 100       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 101         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
 102     }
 103 }
 104
 105
 106 typedef struct
 107 {
 108   tree from, to;
 109   basic_block bb;
 110 } adjust_info;
 111
 112 /* A stack of values to be adjusted in debug stmts.  We have to
 113    process them LIFO, so that the closest substitution applies.  If we
 114    processed them FIFO, without the stack, we might substitute uses
 115    with a PHI DEF that would soon become non-dominant, and when we got
 116    to the suitable one, it wouldn't have anything to substitute any
 117    more.  */
 118 static vec<adjust_info, va_stack> adjust_vec;
 119
 120 /* Adjust any debug stmts that referenced AI->from values to use the
 121    loop-closed AI->to, if the references are dominated by AI->bb and
 122    not by the definition of AI->from.  */
 123
 124 static void
 125 adjust_debug_stmts_now (adjust_info *ai)
 126 {
 127   basic_block bbphi = ai->bb;
 128   tree orig_def = ai->from;
 129   tree new_def = ai->to;
 130   imm_use_iterator imm_iter;
 131   gimple stmt;
 132   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 133
 134   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 135
 136   /* Adjust any debug stmts that held onto non-loop-closed
 137      references.  */
 138   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 139     {
 140       use_operand_p use_p;
 141       basic_block bbuse;
 142
 143       if (!is_gimple_debug (stmt))
 144         continue;
 145
 146       gcc_assert (gimple_debug_bind_p (stmt));
 147
 148       bbuse = gimple_bb (stmt);
 149
 150       if ((bbuse == bbphi
 151            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 152           && !(bbuse == bbdef
 153                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 154         {
 155           if (new_def)
 156             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 157               SET_USE (use_p, new_def);
 158           else
 159             {
 160               gimple_debug_bind_reset_value (stmt);
 161               update_stmt (stmt);
 162             }
 163         }
 164     }
 165 }
 166
 167 /* Adjust debug stmts as scheduled before.  */
 168
 169 static void
 170 adjust_vec_debug_stmts (void)
 171 {
 172   if (!MAY_HAVE_DEBUG_STMTS)
 173     return;
 174
 175   gcc_assert (adjust_vec.exists ());
 176
 177   while (!adjust_vec.is_empty ())
 178     {
 179       adjust_debug_stmts_now (&adjust_vec.last ());
 180       adjust_vec.pop ();
 181     }
 182
 183   adjust_vec.release ();
 184 }
 185
 186 /* Adjust any debug stmts that referenced FROM values to use the
 187    loop-closed TO, if the references are dominated by BB and not by
 188    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 189    will be postponed until adjust_vec_debug_stmts is called.  */
 190
 191 static void
 192 adjust_debug_stmts (tree from, tree to, basic_block bb)
 193 {
 194   adjust_info ai;
 195
 196   if (MAY_HAVE_DEBUG_STMTS
 197       && TREE_CODE (from) == SSA_NAME
 198       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 199       && ! virtual_operand_p (from))
 200     {
 201       ai.from = from;
 202       ai.to = to;
 203       ai.bb = bb;
 204
 205       if (adjust_vec.exists ())
 206         adjust_vec.safe_push (ai);
 207       else
 208         adjust_debug_stmts_now (&ai);
 209     }
 210 }
 211
 212 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 213    to adjust any debug stmts that referenced the old phi arg,
 214    presumably non-loop-closed references left over from other
 215    transformations.  */
 216
 217 static void
 218 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 219 {
 220   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 221
 222   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 223
 224   if (MAY_HAVE_DEBUG_STMTS)
 225     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 226                         gimple_bb (update_phi));
 227 }
 228
 229
 230 /* Update PHI nodes for a guard of the LOOP.
 231
 232    Input:
 233    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 234         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 235         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 236         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 237         We denote this bb NEW_MERGE_BB because before the guard code was added
 238         it had a single predecessor (the LOOP header), and now it became a merge
 239         point of two paths - the path that ends with the LOOP exit-edge, and
 240         the path that ends with GUARD_EDGE.
 241    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 242         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 243
 244    ===> The CFG before the guard-code was added:
 245         LOOP_header_bb:
 246           loop_body
 247           if (exit_loop) goto update_bb
 248           else           goto LOOP_header_bb
 249         update_bb:
 250
 251    ==> The CFG after the guard-code was added:
 252         guard_bb:
 253           if (LOOP_guard_condition) goto new_merge_bb
 254           else                      goto LOOP_header_bb
 255         LOOP_header_bb:
 256           loop_body
 257           if (exit_loop_condition) goto new_merge_bb
 258           else                     goto LOOP_header_bb
 259         new_merge_bb:
 260           goto update_bb
 261         update_bb:
 262
 263    ==> The CFG after this function:
 264         guard_bb:
 265           if (LOOP_guard_condition) goto new_merge_bb
 266           else                      goto LOOP_header_bb
 267         LOOP_header_bb:
 268           loop_body
 269           if (exit_loop_condition) goto new_exit_bb
 270           else                     goto LOOP_header_bb
 271         new_exit_bb:
 272         new_merge_bb:
 273           goto update_bb
 274         update_bb:
 275
 276    This function:
 277    1. creates and updates the relevant phi nodes to account for the new
 278       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 279       1.1. Create phi nodes at NEW_MERGE_BB.
 280       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 281            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 282    2. preserves loop-closed-ssa-form by creating the required phi nodes
 283       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 284
 285    There are two flavors to this function:
 286
 287    slpeel_update_phi_nodes_for_guard1:
 288      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 289      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 290      for variables that have phis in the loop header.
 291
 292    slpeel_update_phi_nodes_for_guard2:
 293      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 294      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 295      for variables that have phis in the loop exit.
 296
 297    I.E., the overall structure is:
 298
 299         loop1_preheader_bb:
 300                 guard1 (goto loop1/merge1_bb)
 301         loop1
 302         loop1_exit_bb:
 303                 guard2 (goto merge1_bb/merge2_bb)
 304         merge1_bb
 305         loop2
 306         loop2_exit_bb
 307         merge2_bb
 308         next_bb
 309
 310    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 311    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 312    that have phis in loop1->header).
 313
 314    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 315    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 316    that have phis in next_bb). It also adds some of these phis to
 317    loop1_exit_bb.
 318
 319    slpeel_update_phi_nodes_for_guard1 is always called before
 320    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 321    to create correct data-flow and loop-closed-ssa-form.
 322
 323    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 324    that change between iterations of a loop (and therefore have a phi-node
 325    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 326    phis for variables that are used out of the loop (and therefore have
 327    loop-closed exit phis). Some variables may be both updated between
 328    iterations and used after the loop. This is why in loop1_exit_bb we
 329    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 330    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 331
 332    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 333      an original loop. i.e., we have:
 334
 335            orig_loop
 336            guard_bb (goto LOOP/new_merge)
 337            new_loop <-- LOOP
 338            new_exit
 339            new_merge
 340            next_bb
 341
 342      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 343      have:
 344
 345            new_loop
 346            guard_bb (goto LOOP/new_merge)
 347            orig_loop <-- LOOP
 348            new_exit
 349            new_merge
 350            next_bb
 351
 352      The SSA names defined in the original loop have a current
 353      reaching definition that that records the corresponding new
 354      ssa-name used in the new duplicated loop copy.
 355   */
 356
 357 /* Function slpeel_update_phi_nodes_for_guard1
 358
 359    Input:
 360    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 361    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 362             information.
 363
 364    In the context of the overall structure, we have:
 365
 366         loop1_preheader_bb:
 367                 guard1 (goto loop1/merge1_bb)
 368 LOOP->  loop1
 369         loop1_exit_bb:
 370                 guard2 (goto merge1_bb/merge2_bb)
 371         merge1_bb
 372         loop2
 373         loop2_exit_bb
 374         merge2_bb
 375         next_bb
 376
 377    For each name updated between loop iterations (i.e - for each name that has
 378    an entry (loop-header) phi in LOOP) we create a new phi in:
 379    1. merge1_bb (to account for the edge from guard1)
 380    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 381 */
 382
 383 static void
 384 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 385                                     bool is_new_loop, basic_block *new_exit_bb)
 386 {
 387   gimple orig_phi, new_phi;
 388   gimple update_phi, update_phi2;
 389   tree guard_arg, loop_arg;
 390   basic_block new_merge_bb = guard_edge->dest;
 391   edge e = EDGE_SUCC (new_merge_bb, 0);
 392   basic_block update_bb = e->dest;
 393   basic_block orig_bb = loop->header;
 394   edge new_exit_e;
 395   tree current_new_name;
 396   gimple_stmt_iterator gsi_orig, gsi_update;
 397
 398   /* Create new bb between loop and new_merge_bb.  */
 399   *new_exit_bb = split_edge (single_exit (loop));
 400
 401   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 402
 403   for (gsi_orig = gsi_start_phis (orig_bb),
 404        gsi_update = gsi_start_phis (update_bb);
 405        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 406        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 407     {
 408       source_location loop_locus, guard_locus;
 409       tree new_res;
 410       orig_phi = gsi_stmt (gsi_orig);
 411       update_phi = gsi_stmt (gsi_update);
 412
 413       /** 1. Handle new-merge-point phis  **/
 414
 415       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 416       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 417       new_phi = create_phi_node (new_res, new_merge_bb);
 418
 419       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 420             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 421       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 422       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 423                                                       EDGE_SUCC (loop->latch,
 424                                                                  0));
 425       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 426       guard_locus
 427         = gimple_phi_arg_location_from_edge (orig_phi,
 428                                              loop_preheader_edge (loop));
 429
 430       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 431       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 432
 433       /* 1.3. Update phi in successor block.  */
 434       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 435                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 436       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 437       update_phi2 = new_phi;
 438
 439
 440       /** 2. Handle loop-closed-ssa-form phis  **/
 441
 442       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 443         continue;
 444
 445       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 446       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 447       new_phi = create_phi_node (new_res, *new_exit_bb);
 448
 449       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 450       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 451
 452       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 453       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 454       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 455                                   PHI_RESULT (new_phi));
 456
 457       /* 2.4. Record the newly created name with set_current_def.
 458          We want to find a name such that
 459                 name = get_current_def (orig_loop_name)
 460          and to set its current definition as follows:
 461                 set_current_def (name, new_phi_name)
 462
 463          If LOOP is a new loop then loop_arg is already the name we're
 464          looking for. If LOOP is the original loop, then loop_arg is
 465          the orig_loop_name and the relevant name is recorded in its
 466          current reaching definition.  */
 467       if (is_new_loop)
 468         current_new_name = loop_arg;
 469       else
 470         {
 471           current_new_name = get_current_def (loop_arg);
 472           /* current_def is not available only if the variable does not
 473              change inside the loop, in which case we also don't care
 474              about recording a current_def for it because we won't be
 475              trying to create loop-exit-phis for it.  */
 476           if (!current_new_name)
 477             continue;
 478         }
 479       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 480
 481       set_current_def (current_new_name, PHI_RESULT (new_phi));
 482     }
 483 }
 484
 485
 486 /* Function slpeel_update_phi_nodes_for_guard2
 487
 488    Input:
 489    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 490
 491    In the context of the overall structure, we have:
 492
 493         loop1_preheader_bb:
 494                 guard1 (goto loop1/merge1_bb)
 495         loop1
 496         loop1_exit_bb:
 497                 guard2 (goto merge1_bb/merge2_bb)
 498         merge1_bb
 499 LOOP->  loop2
 500         loop2_exit_bb
 501         merge2_bb
 502         next_bb
 503
 504    For each name used out side the loop (i.e - for each name that has an exit
 505    phi in next_bb) we create a new phi in:
 506    1. merge2_bb (to account for the edge from guard_bb)
 507    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 508    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 509       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 510 */
 511
 512 static void
 513 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 514                                     bool is_new_loop, basic_block *new_exit_bb)
 515 {
 516   gimple orig_phi, new_phi;
 517   gimple update_phi, update_phi2;
 518   tree guard_arg, loop_arg;
 519   basic_block new_merge_bb = guard_edge->dest;
 520   edge e = EDGE_SUCC (new_merge_bb, 0);
 521   basic_block update_bb = e->dest;
 522   edge new_exit_e;
 523   tree orig_def, orig_def_new_name;
 524   tree new_name, new_name2;
 525   tree arg;
 526   gimple_stmt_iterator gsi;
 527
 528   /* Create new bb between loop and new_merge_bb.  */
 529   *new_exit_bb = split_edge (single_exit (loop));
 530
 531   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 532
 533   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 534     {
 535       tree new_res;
 536       update_phi = gsi_stmt (gsi);
 537       orig_phi = update_phi;
 538       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 539       /* This loop-closed-phi actually doesn't represent a use
 540          out of the loop - the phi arg is a constant.  */
 541       if (TREE_CODE (orig_def) != SSA_NAME)
 542         continue;
 543       orig_def_new_name = get_current_def (orig_def);
 544       arg = NULL_TREE;
 545
 546       /** 1. Handle new-merge-point phis  **/
 547
 548       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 549       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 550       new_phi = create_phi_node (new_res, new_merge_bb);
 551
 552       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 553             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 554       new_name = orig_def;
 555       new_name2 = NULL_TREE;
 556       if (orig_def_new_name)
 557         {
 558           new_name = orig_def_new_name;
 559           /* Some variables have both loop-entry-phis and loop-exit-phis.
 560              Such variables were given yet newer names by phis placed in
 561              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 562              new_name2 = get_current_def (get_current_def (orig_name)).  */
 563           new_name2 = get_current_def (new_name);
 564         }
 565
 566       if (is_new_loop)
 567         {
 568           guard_arg = orig_def;
 569           loop_arg = new_name;
 570         }
 571       else
 572         {
 573           guard_arg = new_name;
 574           loop_arg = orig_def;
 575         }
 576       if (new_name2)
 577         guard_arg = new_name2;
 578
 579       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 580       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 581
 582       /* 1.3. Update phi in successor block.  */
 583       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 584       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 585       update_phi2 = new_phi;
 586
 587
 588       /** 2. Handle loop-closed-ssa-form phis  **/
 589
 590       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 591       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 592       new_phi = create_phi_node (new_res, *new_exit_bb);
 593
 594       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 595       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 596
 597       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 598       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 599       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 600                                   PHI_RESULT (new_phi));
 601
 602
 603       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 604
 605       /* 3.1. Find the relevant names that need an exit-phi in
 606          GUARD_BB, i.e. names for which
 607          slpeel_update_phi_nodes_for_guard1 had not already created a
 608          phi node. This is the case for names that are used outside
 609          the loop (and therefore need an exit phi) but are not updated
 610          across loop iterations (and therefore don't have a
 611          loop-header-phi).
 612
 613          slpeel_update_phi_nodes_for_guard1 is responsible for
 614          creating loop-exit phis in GUARD_BB for names that have a
 615          loop-header-phi.  When such a phi is created we also record
 616          the new name in its current definition.  If this new name
 617          exists, then guard_arg was set to this new name (see 1.2
 618          above).  Therefore, if guard_arg is not this new name, this
 619          is an indication that an exit-phi in GUARD_BB was not yet
 620          created, so we take care of it here.  */
 621       if (guard_arg == new_name2)
 622         continue;
 623       arg = guard_arg;
 624
 625       /* 3.2. Generate new phi node in GUARD_BB:  */
 626       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 627       new_phi = create_phi_node (new_res, guard_edge->src);
 628
 629       /* 3.3. GUARD_BB has one incoming edge:  */
 630       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 631       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 632                    UNKNOWN_LOCATION);
 633
 634       /* 3.4. Update phi in successor of GUARD_BB:  */
 635       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 636                                                                 == guard_arg);
 637       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 638                                   PHI_RESULT (new_phi));
 639     }
 640 }
 641
 642
 643 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 644    that starts at zero, increases by one and its limit is NITERS.
 645
 646    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 647
 648 void
 649 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 650 {
 651   tree indx_before_incr, indx_after_incr;
 652   gimple cond_stmt;
 653   gimple orig_cond;
 654   edge exit_edge = single_exit (loop);
 655   gimple_stmt_iterator loop_cond_gsi;
 656   gimple_stmt_iterator incr_gsi;
 657   bool insert_after;
 658   tree init = build_int_cst (TREE_TYPE (niters), 0);
 659   tree step = build_int_cst (TREE_TYPE (niters), 1);
 660   LOC loop_loc;
 661   enum tree_code code;
 662
 663   orig_cond = get_loop_exit_condition (loop);
 664   gcc_assert (orig_cond);
 665   loop_cond_gsi = gsi_for_stmt (orig_cond);
 666
 667   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 668   create_iv (init, step, NULL_TREE, loop,
 669              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 670
 671   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 672                                               true, NULL_TREE, true,
 673                                               GSI_SAME_STMT);
 674   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 675                                      true, GSI_SAME_STMT);
 676
 677   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 678   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 679                                  NULL_TREE);
 680
 681   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 682
 683   /* Remove old loop exit test:  */
 684   gsi_remove (&loop_cond_gsi, true);
 685   free_stmt_vec_info (orig_cond);
 686
 687   loop_loc = find_loop_location (loop);
 688   if (dump_enabled_p ())
 689     {
 690       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
 691         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
 692                      LOC_LINE (loop_loc));
 693       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 694       dump_printf (MSG_NOTE, "\n");
 695     }
 696   loop->nb_iterations = niters;
 697 }
 698
 699
 700 /* Given LOOP this function generates a new copy of it and puts it
 701    on E which is either the entry or exit of LOOP.  */
 702
 703 struct loop *
 704 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 705 {
 706   struct loop *new_loop;
 707   basic_block *new_bbs, *bbs;
 708   bool at_exit;
 709   bool was_imm_dom;
 710   basic_block exit_dest;
 711   edge exit, new_exit;
 712
 713   exit = single_exit (loop);
 714   at_exit = (e == exit);
 715   if (!at_exit && e != loop_preheader_edge (loop))
 716     return NULL;
 717
 718   bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 719   get_loop_body_with_size (loop, bbs, loop->num_nodes);
 720
 721   /* Check whether duplication is possible.  */
 722   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 723     {
 724       free (bbs);
 725       return NULL;
 726     }
 727
 728   /* Generate new loop structure.  */
 729   new_loop = duplicate_loop (loop, loop_outer (loop));
 730   duplicate_subloops (loop, new_loop);
 731
 732   exit_dest = exit->dest;
 733   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 734                                           exit_dest) == loop->header ?
 735                  true : false);
 736
 737   /* Also copy the pre-header, this avoids jumping through hoops to
 738      duplicate the loop entry PHI arguments.  Create an empty
 739      pre-header unconditionally for this.  */
 740   basic_block preheader = split_edge (loop_preheader_edge (loop));
 741   edge entry_e = single_pred_edge (preheader);
 742   bbs[loop->num_nodes] = preheader;
 743   new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 744
 745   copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
 746             &exit, 1, &new_exit, NULL,
 747             e->src, true);
 748   basic_block new_preheader = new_bbs[loop->num_nodes];
 749
 750   add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
 751
 752   if (at_exit) /* Add the loop copy at exit.  */
 753     {
 754       redirect_edge_and_branch_force (e, new_preheader);
 755       flush_pending_stmts (e);
 756       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 757       if (was_imm_dom)
 758         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 759
 760       /* And remove the non-necessary forwarder again.  Keep the other
 761          one so we have a proper pre-header for the loop at the exit edge.  */
 762       redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
 763       delete_basic_block (preheader);
 764       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 765                                loop_preheader_edge (loop)->src);
 766     }
 767   else /* Add the copy at entry.  */
 768     {
 769       redirect_edge_and_branch_force (entry_e, new_preheader);
 770       flush_pending_stmts (entry_e);
 771       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 772
 773       redirect_edge_and_branch_force (new_exit, preheader);
 774       flush_pending_stmts (new_exit);
 775       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 776
 777       /* And remove the non-necessary forwarder again.  Keep the other
 778          one so we have a proper pre-header for the loop at the exit edge.  */
 779       redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
 780       delete_basic_block (new_preheader);
 781       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 782                                loop_preheader_edge (new_loop)->src);
 783     }
 784
 785   for (unsigned i = 0; i < loop->num_nodes+1; i++)
 786     rename_variables_in_bb (new_bbs[i]);
 787
 788   free (new_bbs);
 789   free (bbs);
 790
 791 #ifdef ENABLE_CHECKING
 792   verify_dominators (CDI_DOMINATORS);
 793 #endif
 794
 795   return new_loop;
 796 }
 797
 798
 799 /* Given the condition statement COND, put it as the last statement
 800    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 801    Assumes that this is the single exit of the guarded loop.
 802    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 803
 804 static edge
 805 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 806                        gimple_seq cond_expr_stmt_list,
 807                        basic_block exit_bb, basic_block dom_bb,
 808                        int probability)
 809 {
 810   gimple_stmt_iterator gsi;
 811   edge new_e, enter_e;
 812   gimple cond_stmt;
 813   gimple_seq gimplify_stmt_list = NULL;
 814
 815   enter_e = EDGE_SUCC (guard_bb, 0);
 816   enter_e->flags &= ~EDGE_FALLTHRU;
 817   enter_e->flags |= EDGE_FALSE_VALUE;
 818   gsi = gsi_last_bb (guard_bb);
 819
 820   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 821                                  NULL_TREE);
 822   if (gimplify_stmt_list)
 823     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 824   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 825   if (cond_expr_stmt_list)
 826     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 827
 828   gsi = gsi_last_bb (guard_bb);
 829   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 830
 831   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 832   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 833
 834   new_e->count = guard_bb->count;
 835   new_e->probability = probability;
 836   new_e->count = apply_probability (enter_e->count, probability);
 837   enter_e->count -= new_e->count;
 838   enter_e->probability = inverse_probability (probability);
 839   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 840   return new_e;
 841 }
 842
 843
 844 /* This function verifies that the following restrictions apply to LOOP:
 845    (1) it is innermost
 846    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 847    (3) it is single entry, single exit
 848    (4) its exit condition is the last stmt in the header
 849    (5) E is the entry/exit edge of LOOP.
 850  */
 851
 852 bool
 853 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 854 {
 855   edge exit_e = single_exit (loop);
 856   edge entry_e = loop_preheader_edge (loop);
 857   gimple orig_cond = get_loop_exit_condition (loop);
 858   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 859
 860   if (loop->inner
 861       /* All loops have an outer scope; the only case loop->outer is NULL is for
 862          the function itself.  */
 863       || !loop_outer (loop)
 864       || loop->num_nodes != 2
 865       || !empty_block_p (loop->latch)
 866       || !single_exit (loop)
 867       /* Verify that new loop exit condition can be trivially modified.  */
 868       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 869       || (e != exit_e && e != entry_e))
 870     return false;
 871
 872   return true;
 873 }
 874
 875 #ifdef ENABLE_CHECKING
 876 static void
 877 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 878                                  struct loop *second_loop)
 879 {
 880   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 881   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 882   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 883
 884   /* A guard that controls whether the second_loop is to be executed or skipped
 885      is placed in first_loop->exit.  first_loop->exit therefore has two
 886      successors - one is the preheader of second_loop, and the other is a bb
 887      after second_loop.
 888    */
 889   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 890
 891   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 892         of second_loop.  */
 893
 894   /* The preheader of new_loop is expected to have two predecessors:
 895      first_loop->exit and the block that precedes first_loop.  */
 896
 897   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 898               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 899                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 900                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 901                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 902
 903   /* Verify that the other successor of first_loop->exit is after the
 904      second_loop.  */
 905   /* TODO */
 906 }
 907 #endif
 908
 909 /* If the run time cost model check determines that vectorization is
 910    not profitable and hence scalar loop should be generated then set
 911    FIRST_NITERS to prologue peeled iterations. This will allow all the
 912    iterations to be executed in the prologue peeled scalar loop.  */
 913
 914 static void
 915 set_prologue_iterations (basic_block bb_before_first_loop,
 916                          tree *first_niters,
 917                          struct loop *loop,
 918                          unsigned int th,
 919                          int probability)
 920 {
 921   edge e;
 922   basic_block cond_bb, then_bb;
 923   tree var, prologue_after_cost_adjust_name;
 924   gimple_stmt_iterator gsi;
 925   gimple newphi;
 926   edge e_true, e_false, e_fallthru;
 927   gimple cond_stmt;
 928   gimple_seq stmts = NULL;
 929   tree cost_pre_condition = NULL_TREE;
 930   tree scalar_loop_iters =
 931     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 932
 933   e = single_pred_edge (bb_before_first_loop);
 934   cond_bb = split_edge (e);
 935
 936   e = single_pred_edge (bb_before_first_loop);
 937   then_bb = split_edge (e);
 938   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 939
 940   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 941                                    EDGE_FALSE_VALUE);
 942   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 943
 944   e_true = EDGE_PRED (then_bb, 0);
 945   e_true->flags &= ~EDGE_FALLTHRU;
 946   e_true->flags |= EDGE_TRUE_VALUE;
 947
 948   e_true->probability = probability;
 949   e_false->probability = inverse_probability (probability);
 950   e_true->count = apply_probability (cond_bb->count, probability);
 951   e_false->count = cond_bb->count - e_true->count;
 952   then_bb->frequency = EDGE_FREQUENCY (e_true);
 953   then_bb->count = e_true->count;
 954
 955   e_fallthru = EDGE_SUCC (then_bb, 0);
 956   e_fallthru->count = then_bb->count;
 957
 958   gsi = gsi_last_bb (cond_bb);
 959   cost_pre_condition =
 960     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 961                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 962   cost_pre_condition =
 963     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
 964                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
 965   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
 966                                            NULL_TREE, NULL_TREE);
 967   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 968
 969   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 970                         "prologue_after_cost_adjust");
 971   prologue_after_cost_adjust_name =
 972     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 973
 974   gsi = gsi_last_bb (then_bb);
 975   if (stmts)
 976     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 977
 978   newphi = create_phi_node (var, bb_before_first_loop);
 979   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
 980                UNKNOWN_LOCATION);
 981   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
 982
 983   *first_niters = PHI_RESULT (newphi);
 984 }
 985
 986 /* Function slpeel_tree_peel_loop_to_edge.
 987
 988    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 989    that is placed on the entry (exit) edge E of LOOP. After this transformation
 990    we have two loops one after the other - first-loop iterates FIRST_NITERS
 991    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 992    If the cost model indicates that it is profitable to emit a scalar
 993    loop instead of the vector one, then the prolog (epilog) loop will iterate
 994    for the entire unchanged scalar iterations of the loop.
 995
 996    Input:
 997    - LOOP: the loop to be peeled.
 998    - E: the exit or entry edge of LOOP.
 999         If it is the entry edge, we peel the first iterations of LOOP. In this
1000         case first-loop is LOOP, and second-loop is the newly created loop.
1001         If it is the exit edge, we peel the last iterations of LOOP. In this
1002         case, first-loop is the newly created loop, and second-loop is LOOP.
1003    - NITERS: the number of iterations that LOOP iterates.
1004    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1005    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1006         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1007         is false, the caller of this function may want to take care of this
1008         (this can be useful if we don't want new stmts added to first-loop).
1009    - TH: cost model profitability threshold of iterations for vectorization.
1010    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1011                           during versioning and hence needs to occur during
1012                           prologue generation or whether cost model check
1013                           has not occurred during prologue generation and hence
1014                           needs to occur during epilogue generation.
1015    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1016    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1017
1018
1019    Output:
1020    The function returns a pointer to the new loop-copy, or NULL if it failed
1021    to perform the transformation.
1022
1023    The function generates two if-then-else guards: one before the first loop,
1024    and the other before the second loop:
1025    The first guard is:
1026      if (FIRST_NITERS == 0) then skip the first loop,
1027      and go directly to the second loop.
1028    The second guard is:
1029      if (FIRST_NITERS == NITERS) then skip the second loop.
1030
1031    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1032    then the generated condition is combined with COND_EXPR and the
1033    statements in COND_EXPR_STMT_LIST are emitted together with it.
1034
1035    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1036    FORNOW the resulting code will not be in loop-closed-ssa form.
1037 */
1038
1039 static struct loop*
1040 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1041                                edge e, tree *first_niters,
1042                                tree niters, bool update_first_loop_count,
1043                                unsigned int th, bool check_profitability,
1044                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1045                                int bound1, int bound2)
1046 {
1047   struct loop *new_loop = NULL, *first_loop, *second_loop;
1048   edge skip_e;
1049   tree pre_condition = NULL_TREE;
1050   basic_block bb_before_second_loop, bb_after_second_loop;
1051   basic_block bb_before_first_loop;
1052   basic_block bb_between_loops;
1053   basic_block new_exit_bb;
1054   gimple_stmt_iterator gsi;
1055   edge exit_e = single_exit (loop);
1056   LOC loop_loc;
1057   tree cost_pre_condition = NULL_TREE;
1058   /* There are many aspects to how likely the first loop is going to be executed.
1059      Without histogram we can't really do good job.  Simply set it to
1060      2/3, so the first loop is not reordered to the end of function and
1061      the hot path through stays short.  */
1062   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1063   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1064   int probability_of_second_loop;
1065
1066   if (!slpeel_can_duplicate_loop_p (loop, e))
1067     return NULL;
1068
1069   /* We might have a queued need to update virtual SSA form.  As we
1070      delete the update SSA machinery below after doing a regular
1071      incremental SSA update during loop copying make sure we don't
1072      lose that fact.
1073      ???  Needing to update virtual SSA form by renaming is unfortunate
1074      but not all of the vectorizer code inserting new loads / stores
1075      properly assigns virtual operands to those statements.  */
1076   update_ssa (TODO_update_ssa_only_virtuals);
1077
1078   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1079      in the exit bb and rename all the uses after the loop.  This simplifies
1080      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1081      (but normally loop closed SSA form doesn't require virtual PHIs to be
1082      in the same form).  Doing this early simplifies the checking what
1083      uses should be renamed.  */
1084   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1085     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1086       {
1087         gimple phi = gsi_stmt (gsi);
1088         for (gsi = gsi_start_phis (exit_e->dest);
1089              !gsi_end_p (gsi); gsi_next (&gsi))
1090           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1091             break;
1092         if (gsi_end_p (gsi))
1093           {
1094             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1095             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1096             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1097             imm_use_iterator imm_iter;
1098             gimple stmt;
1099             use_operand_p use_p;
1100
1101             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1102             gimple_phi_set_result (new_phi, new_vop);
1103             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1104               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1105                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1106                   SET_USE (use_p, new_vop);
1107           }
1108         break;
1109       }
1110
1111   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1112         Resulting CFG would be:
1113
1114         first_loop:
1115         do {
1116         } while ...
1117
1118         second_loop:
1119         do {
1120         } while ...
1121
1122         orig_exit_bb:
1123    */
1124
1125   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1126     {
1127       loop_loc = find_loop_location (loop);
1128       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1129                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1130       return NULL;
1131     }
1132
1133   if (MAY_HAVE_DEBUG_STMTS)
1134     {
1135       gcc_assert (!adjust_vec.exists ());
1136       vec_stack_alloc (adjust_info, adjust_vec, 32);
1137     }
1138
1139   if (e == exit_e)
1140     {
1141       /* NEW_LOOP was placed after LOOP.  */
1142       first_loop = loop;
1143       second_loop = new_loop;
1144     }
1145   else
1146     {
1147       /* NEW_LOOP was placed before LOOP.  */
1148       first_loop = new_loop;
1149       second_loop = loop;
1150     }
1151
1152   /* 2.  Add the guard code in one of the following ways:
1153
1154      2.a Add the guard that controls whether the first loop is executed.
1155          This occurs when this function is invoked for prologue or epilogue
1156          generation and when the cost model check can be done at compile time.
1157
1158          Resulting CFG would be:
1159
1160          bb_before_first_loop:
1161          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1162                                 GOTO first-loop
1163
1164          first_loop:
1165          do {
1166          } while ...
1167
1168          bb_before_second_loop:
1169
1170          second_loop:
1171          do {
1172          } while ...
1173
1174          orig_exit_bb:
1175
1176      2.b Add the cost model check that allows the prologue
1177          to iterate for the entire unchanged scalar
1178          iterations of the loop in the event that the cost
1179          model indicates that the scalar loop is more
1180          profitable than the vector one. This occurs when
1181          this function is invoked for prologue generation
1182          and the cost model check needs to be done at run
1183          time.
1184
1185          Resulting CFG after prologue peeling would be:
1186
1187          if (scalar_loop_iterations <= th)
1188            FIRST_NITERS = scalar_loop_iterations
1189
1190          bb_before_first_loop:
1191          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1192                                 GOTO first-loop
1193
1194          first_loop:
1195          do {
1196          } while ...
1197
1198          bb_before_second_loop:
1199
1200          second_loop:
1201          do {
1202          } while ...
1203
1204          orig_exit_bb:
1205
1206      2.c Add the cost model check that allows the epilogue
1207          to iterate for the entire unchanged scalar
1208          iterations of the loop in the event that the cost
1209          model indicates that the scalar loop is more
1210          profitable than the vector one. This occurs when
1211          this function is invoked for epilogue generation
1212          and the cost model check needs to be done at run
1213          time.  This check is combined with any pre-existing
1214          check in COND_EXPR to avoid versioning.
1215
1216          Resulting CFG after prologue peeling would be:
1217
1218          bb_before_first_loop:
1219          if ((scalar_loop_iterations <= th)
1220              ||
1221              FIRST_NITERS == 0) GOTO bb_before_second_loop
1222                                 GOTO first-loop
1223
1224          first_loop:
1225          do {
1226          } while ...
1227
1228          bb_before_second_loop:
1229
1230          second_loop:
1231          do {
1232          } while ...
1233
1234          orig_exit_bb:
1235   */
1236
1237   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1238   /* Loop copying insterted a forwarder block for us here.  */
1239   bb_before_second_loop = single_exit (first_loop)->dest;
1240
1241   probability_of_second_loop = (inverse_probability (first_guard_probability)
1242                                 + combine_probabilities (second_guard_probability,
1243                                                          first_guard_probability));
1244   /* Theoretically preheader edge of first loop and exit edge should have
1245      same frequencies.  Loop exit probablities are however easy to get wrong.
1246      It is safer to copy value from original loop entry.  */
1247   bb_before_second_loop->frequency
1248      = combine_probabilities (bb_before_first_loop->frequency,
1249                               probability_of_second_loop);
1250   bb_before_second_loop->count
1251      = apply_probability (bb_before_first_loop->count,
1252                           probability_of_second_loop);
1253   single_succ_edge (bb_before_second_loop)->count
1254      = bb_before_second_loop->count;
1255
1256   /* Epilogue peeling.  */
1257   if (!update_first_loop_count)
1258     {
1259       pre_condition =
1260         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1261                      build_int_cst (TREE_TYPE (*first_niters), 0));
1262       if (check_profitability)
1263         {
1264           tree scalar_loop_iters
1265             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1266                                         (loop_vec_info_for_loop (loop)));
1267           cost_pre_condition =
1268             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1269                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1270
1271           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1272                                        cost_pre_condition, pre_condition);
1273         }
1274       if (cond_expr)
1275         {
1276           pre_condition =
1277             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1278                          pre_condition,
1279                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1280                                       cond_expr));
1281         }
1282     }
1283
1284   /* Prologue peeling.  */
1285   else
1286     {
1287       if (check_profitability)
1288         set_prologue_iterations (bb_before_first_loop, first_niters,
1289                                  loop, th, first_guard_probability);
1290
1291       pre_condition =
1292         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1293                      build_int_cst (TREE_TYPE (*first_niters), 0));
1294     }
1295
1296   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1297                                   cond_expr_stmt_list,
1298                                   bb_before_second_loop, bb_before_first_loop,
1299                                   inverse_probability (first_guard_probability));
1300   scale_loop_profile (first_loop, first_guard_probability,
1301                       check_profitability && (int)th > bound1 ? th : bound1);
1302   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1303                                       first_loop == new_loop,
1304                                       &new_exit_bb);
1305
1306
1307   /* 3. Add the guard that controls whether the second loop is executed.
1308         Resulting CFG would be:
1309
1310         bb_before_first_loop:
1311         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1312                                GOTO first-loop
1313
1314         first_loop:
1315         do {
1316         } while ...
1317
1318         bb_between_loops:
1319         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1320                                     GOTO bb_before_second_loop
1321
1322         bb_before_second_loop:
1323
1324         second_loop:
1325         do {
1326         } while ...
1327
1328         bb_after_second_loop:
1329
1330         orig_exit_bb:
1331    */
1332
1333   bb_between_loops = new_exit_bb;
1334   bb_after_second_loop = split_edge (single_exit (second_loop));
1335
1336   pre_condition =
1337         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1338   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1339                                   bb_after_second_loop, bb_before_first_loop,
1340                                   inverse_probability (second_guard_probability));
1341   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1342   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1343                                      second_loop == new_loop, &new_exit_bb);
1344
1345   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1346    */
1347   if (update_first_loop_count)
1348     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1349
1350   delete_update_ssa ();
1351
1352   adjust_vec_debug_stmts ();
1353
1354   return new_loop;
1355 }
1356
1357 /* Function vect_get_loop_location.
1358
1359    Extract the location of the loop in the source code.
1360    If the loop is not well formed for vectorization, an estimated
1361    location is calculated.
1362    Return the loop location if succeed and NULL if not.  */
1363
1364 LOC
1365 find_loop_location (struct loop *loop)
1366 {
1367   gimple stmt = NULL;
1368   basic_block bb;
1369   gimple_stmt_iterator si;
1370
1371   if (!loop)
1372     return UNKNOWN_LOC;
1373
1374   stmt = get_loop_exit_condition (loop);
1375
1376   if (stmt
1377       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1378     return gimple_location (stmt);
1379
1380   /* If we got here the loop is probably not "well formed",
1381      try to estimate the loop location */
1382
1383   if (!loop->header)
1384     return UNKNOWN_LOC;
1385
1386   bb = loop->header;
1387
1388   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1389     {
1390       stmt = gsi_stmt (si);
1391       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1392         return gimple_location (stmt);
1393     }
1394
1395   return UNKNOWN_LOC;
1396 }
1397
1398
1399 /* This function builds ni_name = number of iterations loop executes
1400    on the loop preheader.  If SEQ is given the stmt is instead emitted
1401    there.  */
1402
1403 static tree
1404 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1405 {
1406   tree ni_name, var;
1407   gimple_seq stmts = NULL;
1408   edge pe;
1409   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1410   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1411
1412   var = create_tmp_var (TREE_TYPE (ni), "niters");
1413   ni_name = force_gimple_operand (ni, &stmts, false, var);
1414
1415   pe = loop_preheader_edge (loop);
1416   if (stmts)
1417     {
1418       if (seq)
1419         gimple_seq_add_seq (&seq, stmts);
1420       else
1421         {
1422           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1423           gcc_assert (!new_bb);
1424         }
1425     }
1426
1427   return ni_name;
1428 }
1429
1430
1431 /* This function generates the following statements:
1432
1433  ni_name = number of iterations loop executes
1434  ratio = ni_name / vf
1435  ratio_mult_vf_name = ratio * vf
1436
1437  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1438  if that is non-NULL.  */
1439
1440 static void
1441 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1442                                  tree *ni_name_ptr,
1443                                  tree *ratio_mult_vf_name_ptr,
1444                                  tree *ratio_name_ptr,
1445                                  gimple_seq cond_expr_stmt_list)
1446 {
1447
1448   edge pe;
1449   basic_block new_bb;
1450   gimple_seq stmts;
1451   tree ni_name, ni_minus_gap_name;
1452   tree var;
1453   tree ratio_name;
1454   tree ratio_mult_vf_name;
1455   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1456   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1457   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1458   tree log_vf;
1459
1460   pe = loop_preheader_edge (loop);
1461
1462   /* Generate temporary variable that contains
1463      number of iterations loop executes.  */
1464
1465   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1466   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1467
1468   /* If epilogue loop is required because of data accesses with gaps, we
1469      subtract one iteration from the total number of iterations here for
1470      correct calculation of RATIO.  */
1471   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1472     {
1473       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1474                                        ni_name,
1475                                        build_one_cst (TREE_TYPE (ni_name)));
1476       if (!is_gimple_val (ni_minus_gap_name))
1477         {
1478           var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1479
1480           stmts = NULL;
1481           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1482                                                     true, var);
1483           if (cond_expr_stmt_list)
1484             gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1485           else
1486             {
1487               pe = loop_preheader_edge (loop);
1488               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1489               gcc_assert (!new_bb);
1490             }
1491         }
1492     }
1493   else
1494     ni_minus_gap_name = ni_name;
1495
1496   /* Create: ratio = ni >> log2(vf) */
1497
1498   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1499                             ni_minus_gap_name, log_vf);
1500   if (!is_gimple_val (ratio_name))
1501     {
1502       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1503
1504       stmts = NULL;
1505       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1506       if (cond_expr_stmt_list)
1507         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1508       else
1509         {
1510           pe = loop_preheader_edge (loop);
1511           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1512           gcc_assert (!new_bb);
1513         }
1514     }
1515
1516   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1517
1518   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1519                                     ratio_name, log_vf);
1520   if (!is_gimple_val (ratio_mult_vf_name))
1521     {
1522       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1523
1524       stmts = NULL;
1525       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1526                                                  true, var);
1527       if (cond_expr_stmt_list)
1528         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1529       else
1530         {
1531           pe = loop_preheader_edge (loop);
1532           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1533           gcc_assert (!new_bb);
1534         }
1535     }
1536
1537   *ni_name_ptr = ni_name;
1538   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1539   *ratio_name_ptr = ratio_name;
1540
1541   return;
1542 }
1543
1544 /* Function vect_can_advance_ivs_p
1545
1546    In case the number of iterations that LOOP iterates is unknown at compile
1547    time, an epilog loop will be generated, and the loop induction variables
1548    (IVs) will be "advanced" to the value they are supposed to take just before
1549    the epilog loop.  Here we check that the access function of the loop IVs
1550    and the expression that represents the loop bound are simple enough.
1551    These restrictions will be relaxed in the future.  */
1552
1553 bool
1554 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1555 {
1556   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1557   basic_block bb = loop->header;
1558   gimple phi;
1559   gimple_stmt_iterator gsi;
1560
1561   /* Analyze phi functions of the loop header.  */
1562
1563   if (dump_enabled_p ())
1564     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1565   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1566     {
1567       tree evolution_part;
1568
1569       phi = gsi_stmt (gsi);
1570       if (dump_enabled_p ())
1571         {
1572           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1573           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1574           dump_printf (MSG_NOTE, "\n");
1575         }
1576
1577       /* Skip virtual phi's. The data dependences that are associated with
1578          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1579
1580       if (virtual_operand_p (PHI_RESULT (phi)))
1581         {
1582           if (dump_enabled_p ())
1583             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1584                              "virtual phi. skip.\n");
1585           continue;
1586         }
1587
1588       /* Skip reduction phis.  */
1589
1590       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1591         {
1592           if (dump_enabled_p ())
1593             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1594                              "reduc phi. skip.\n");
1595           continue;
1596         }
1597
1598       /* Analyze the evolution function.  */
1599
1600       evolution_part
1601         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1602       if (evolution_part == NULL_TREE)
1603         {
1604           if (dump_enabled_p ())
1605             dump_printf (MSG_MISSED_OPTIMIZATION,
1606                          "No access function or evolution.\n");
1607           return false;
1608         }
1609
1610       /* FORNOW: We do not transform initial conditions of IVs
1611          which evolution functions are a polynomial of degree >= 2.  */
1612
1613       if (tree_is_chrec (evolution_part))
1614         return false;
1615     }
1616
1617   return true;
1618 }
1619
1620
1621 /*   Function vect_update_ivs_after_vectorizer.
1622
1623      "Advance" the induction variables of LOOP to the value they should take
1624      after the execution of LOOP.  This is currently necessary because the
1625      vectorizer does not handle induction variables that are used after the
1626      loop.  Such a situation occurs when the last iterations of LOOP are
1627      peeled, because:
1628      1. We introduced new uses after LOOP for IVs that were not originally used
1629         after LOOP: the IVs of LOOP are now used by an epilog loop.
1630      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1631         times, whereas the loop IVs should be bumped N times.
1632
1633      Input:
1634      - LOOP - a loop that is going to be vectorized. The last few iterations
1635               of LOOP were peeled.
1636      - NITERS - the number of iterations that LOOP executes (before it is
1637                 vectorized). i.e, the number of times the ivs should be bumped.
1638      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1639                   coming out from LOOP on which there are uses of the LOOP ivs
1640                   (this is the path from LOOP->exit to epilog_loop->preheader).
1641
1642                   The new definitions of the ivs are placed in LOOP->exit.
1643                   The phi args associated with the edge UPDATE_E in the bb
1644                   UPDATE_E->dest are updated accordingly.
1645
1646      Assumption 1: Like the rest of the vectorizer, this function assumes
1647      a single loop exit that has a single predecessor.
1648
1649      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1650      organized in the same order.
1651
1652      Assumption 3: The access function of the ivs is simple enough (see
1653      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1654
1655      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1656      coming out of LOOP on which the ivs of LOOP are used (this is the path
1657      that leads to the epilog loop; other paths skip the epilog loop).  This
1658      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1659      needs to have its phis updated.
1660  */
1661
1662 static void
1663 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1664                                   edge update_e)
1665 {
1666   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1667   basic_block exit_bb = single_exit (loop)->dest;
1668   gimple phi, phi1;
1669   gimple_stmt_iterator gsi, gsi1;
1670   basic_block update_bb = update_e->dest;
1671
1672   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1673
1674   /* Make sure there exists a single-predecessor exit bb:  */
1675   gcc_assert (single_pred_p (exit_bb));
1676
1677   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1678        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1679        gsi_next (&gsi), gsi_next (&gsi1))
1680     {
1681       tree init_expr;
1682       tree step_expr, off;
1683       tree type;
1684       tree var, ni, ni_name;
1685       gimple_stmt_iterator last_gsi;
1686       stmt_vec_info stmt_info;
1687
1688       phi = gsi_stmt (gsi);
1689       phi1 = gsi_stmt (gsi1);
1690       if (dump_enabled_p ())
1691         {
1692           dump_printf_loc (MSG_NOTE, vect_location,
1693                            "vect_update_ivs_after_vectorizer: phi: ");
1694           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1695           dump_printf (MSG_NOTE, "\n");
1696         }
1697
1698       /* Skip virtual phi's.  */
1699       if (virtual_operand_p (PHI_RESULT (phi)))
1700         {
1701           if (dump_enabled_p ())
1702             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1703                              "virtual phi. skip.\n");
1704           continue;
1705         }
1706
1707       /* Skip reduction phis.  */
1708       stmt_info = vinfo_for_stmt (phi);
1709       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1710         {
1711           if (dump_enabled_p ())
1712             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1713                              "reduc phi. skip.\n");
1714           continue;
1715         }
1716
1717       type = TREE_TYPE (gimple_phi_result (phi));
1718       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1719       step_expr = unshare_expr (step_expr);
1720
1721       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1722          of degree >= 2 or exponential.  */
1723       gcc_assert (!tree_is_chrec (step_expr));
1724
1725       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1726
1727       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1728                          fold_convert (TREE_TYPE (step_expr), niters),
1729                          step_expr);
1730       if (POINTER_TYPE_P (type))
1731         ni = fold_build_pointer_plus (init_expr, off);
1732       else
1733         ni = fold_build2 (PLUS_EXPR, type,
1734                           init_expr, fold_convert (type, off));
1735
1736       var = create_tmp_var (type, "tmp");
1737
1738       last_gsi = gsi_last_bb (exit_bb);
1739       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1740                                           true, GSI_SAME_STMT);
1741
1742       /* Fix phi expressions in the successor bb.  */
1743       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1744     }
1745 }
1746
1747 /* Function vect_do_peeling_for_loop_bound
1748
1749    Peel the last iterations of the loop represented by LOOP_VINFO.
1750    The peeled iterations form a new epilog loop.  Given that the loop now
1751    iterates NITERS times, the new epilog loop iterates
1752    NITERS % VECTORIZATION_FACTOR times.
1753
1754    The original loop will later be made to iterate
1755    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1756
1757    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1758    test.  */
1759
1760 void
1761 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1762                                 unsigned int th, bool check_profitability)
1763 {
1764   tree ni_name, ratio_mult_vf_name;
1765   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1766   struct loop *new_loop;
1767   edge update_e;
1768   basic_block preheader;
1769   int loop_num;
1770   int max_iter;
1771   tree cond_expr = NULL_TREE;
1772   gimple_seq cond_expr_stmt_list = NULL;
1773
1774   if (dump_enabled_p ())
1775     dump_printf_loc (MSG_NOTE, vect_location,
1776                      "=== vect_do_peeling_for_loop_bound ===\n");
1777
1778   initialize_original_copy_tables ();
1779
1780   /* Generate the following variables on the preheader of original loop:
1781
1782      ni_name = number of iteration the original loop executes
1783      ratio = ni_name / vf
1784      ratio_mult_vf_name = ratio * vf  */
1785   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1786                                    &ratio_mult_vf_name, ratio,
1787                                    cond_expr_stmt_list);
1788
1789   loop_num  = loop->num;
1790
1791   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1792                                             &ratio_mult_vf_name, ni_name, false,
1793                                             th, check_profitability,
1794                                             cond_expr, cond_expr_stmt_list,
1795                                             0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1796   gcc_assert (new_loop);
1797   gcc_assert (loop_num == loop->num);
1798 #ifdef ENABLE_CHECKING
1799   slpeel_verify_cfg_after_peeling (loop, new_loop);
1800 #endif
1801
1802   /* A guard that controls whether the new_loop is to be executed or skipped
1803      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1804      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1805      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1806      is on the path where the LOOP IVs are used and need to be updated.  */
1807
1808   preheader = loop_preheader_edge (new_loop)->src;
1809   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1810     update_e = EDGE_PRED (preheader, 0);
1811   else
1812     update_e = EDGE_PRED (preheader, 1);
1813
1814   /* Update IVs of original loop as if they were advanced
1815      by ratio_mult_vf_name steps.  */
1816   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1817
1818   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1819      and this means N-2 loopback edge executions.
1820
1821      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1822      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1823   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1824               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1825               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1826   if (check_profitability)
1827     max_iter = MAX (max_iter, (int) th - 1);
1828   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1829   dump_printf (MSG_NOTE,
1830                "Setting upper bound of nb iterations for epilogue "
1831                "loop to %d\n", max_iter);
1832
1833   /* After peeling we have to reset scalar evolution analyzer.  */
1834   scev_reset ();
1835
1836   free_original_copy_tables ();
1837 }
1838
1839
1840 /* Function vect_gen_niters_for_prolog_loop
1841
1842    Set the number of iterations for the loop represented by LOOP_VINFO
1843    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1844    and the misalignment of DR - the data reference recorded in
1845    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1846    this loop, the data reference DR will refer to an aligned location.
1847
1848    The following computation is generated:
1849
1850    If the misalignment of DR is known at compile time:
1851      addr_mis = int mis = DR_MISALIGNMENT (dr);
1852    Else, compute address misalignment in bytes:
1853      addr_mis = addr & (vectype_align - 1)
1854
1855    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1856
1857    (elem_size = element type size; an element is the scalar element whose type
1858    is the inner type of the vectype)
1859
1860    When the step of the data-ref in the loop is not 1 (as in interleaved data
1861    and SLP), the number of iterations of the prolog must be divided by the step
1862    (which is equal to the size of interleaved group).
1863
1864    The above formulas assume that VF == number of elements in the vector. This
1865    may not hold when there are multiple-types in the loop.
1866    In this case, for some data-references in the loop the VF does not represent
1867    the number of elements that fit in the vector.  Therefore, instead of VF we
1868    use TYPE_VECTOR_SUBPARTS.  */
1869
1870 static tree
1871 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1872 {
1873   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1874   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1875   tree var;
1876   gimple_seq stmts;
1877   tree iters, iters_name;
1878   edge pe;
1879   basic_block new_bb;
1880   gimple dr_stmt = DR_STMT (dr);
1881   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1882   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1883   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1884   tree niters_type = TREE_TYPE (loop_niters);
1885   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1886
1887   pe = loop_preheader_edge (loop);
1888
1889   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1890     {
1891       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1892
1893       if (dump_enabled_p ())
1894         dump_printf_loc (MSG_NOTE, vect_location,
1895                          "known peeling = %d.\n", npeel);
1896
1897       iters = build_int_cst (niters_type, npeel);
1898       *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1899     }
1900   else
1901     {
1902       gimple_seq new_stmts = NULL;
1903       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1904       tree offset = negative
1905           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1906       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1907                                                 &new_stmts, offset, loop);
1908       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1909       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1910       HOST_WIDE_INT elem_size =
1911                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1912       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1913       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1914       tree nelements_tree = build_int_cst (type, nelements);
1915       tree byte_misalign;
1916       tree elem_misalign;
1917
1918       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1919       gcc_assert (!new_bb);
1920
1921       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1922       byte_misalign =
1923         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1924                      vectype_align_minus_1);
1925
1926       /* Create:  elem_misalign = byte_misalign / element_size  */
1927       elem_misalign =
1928         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1929
1930       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1931       if (negative)
1932         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1933       else
1934         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1935       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1936       iters = fold_convert (niters_type, iters);
1937       *bound = nelements;
1938     }
1939
1940   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1941   /* If the loop bound is known at compile time we already verified that it is
1942      greater than vf; since the misalignment ('iters') is at most vf, there's
1943      no need to generate the MIN_EXPR in this case.  */
1944   if (TREE_CODE (loop_niters) != INTEGER_CST)
1945     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1946
1947   if (dump_enabled_p ())
1948     {
1949       dump_printf_loc (MSG_NOTE, vect_location,
1950                        "niters for prolog loop: ");
1951       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1952       dump_printf (MSG_NOTE, "\n");
1953     }
1954
1955   var = create_tmp_var (niters_type, "prolog_loop_niters");
1956   stmts = NULL;
1957   iters_name = force_gimple_operand (iters, &stmts, false, var);
1958
1959   /* Insert stmt on loop preheader edge.  */
1960   if (stmts)
1961     {
1962       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1963       gcc_assert (!new_bb);
1964     }
1965
1966   return iters_name;
1967 }
1968
1969
1970 /* Function vect_update_init_of_dr
1971
1972    NITERS iterations were peeled from LOOP.  DR represents a data reference
1973    in LOOP.  This function updates the information recorded in DR to
1974    account for the fact that the first NITERS iterations had already been
1975    executed.  Specifically, it updates the OFFSET field of DR.  */
1976
1977 static void
1978 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1979 {
1980   tree offset = DR_OFFSET (dr);
1981
1982   niters = fold_build2 (MULT_EXPR, sizetype,
1983                         fold_convert (sizetype, niters),
1984                         fold_convert (sizetype, DR_STEP (dr)));
1985   offset = fold_build2 (PLUS_EXPR, sizetype,
1986                         fold_convert (sizetype, offset), niters);
1987   DR_OFFSET (dr) = offset;
1988 }
1989
1990
1991 /* Function vect_update_inits_of_drs
1992
1993    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1994    This function updates the information recorded for the data references in
1995    the loop to account for the fact that the first NITERS iterations had
1996    already been executed.  Specifically, it updates the initial_condition of
1997    the access_function of all the data_references in the loop.  */
1998
1999 static void
2000 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2001 {
2002   unsigned int i;
2003   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2004   struct data_reference *dr;
2005
2006  if (dump_enabled_p ())
2007     dump_printf_loc (MSG_NOTE, vect_location,
2008                      "=== vect_update_inits_of_dr ===\n");
2009
2010   FOR_EACH_VEC_ELT (datarefs, i, dr)
2011     vect_update_init_of_dr (dr, niters);
2012 }
2013
2014
2015 /* Function vect_do_peeling_for_alignment
2016
2017    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2018    'niters' is set to the misalignment of one of the data references in the
2019    loop, thereby forcing it to refer to an aligned location at the beginning
2020    of the execution of this loop.  The data reference for which we are
2021    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2022
2023 void
2024 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2025                                unsigned int th, bool check_profitability)
2026 {
2027   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2028   tree niters_of_prolog_loop, ni_name;
2029   tree n_iters;
2030   tree wide_prolog_niters;
2031   struct loop *new_loop;
2032   int max_iter;
2033   int bound = 0;
2034
2035   if (dump_enabled_p ())
2036     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2037                      "loop peeled for vectorization to enhance"
2038                      " alignment\n");
2039
2040   initialize_original_copy_tables ();
2041
2042   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2043   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2044                                                            ni_name,
2045                                                            &bound);
2046
2047   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2048   new_loop =
2049     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2050                                    &niters_of_prolog_loop, ni_name, true,
2051                                    th, check_profitability, NULL_TREE, NULL,
2052                                    bound,
2053                                    0);
2054
2055   gcc_assert (new_loop);
2056 #ifdef ENABLE_CHECKING
2057   slpeel_verify_cfg_after_peeling (new_loop, loop);
2058 #endif
2059   /* For vectorization factor N, we need to copy at most N-1 values
2060      for alignment and this means N-2 loopback edge executions.  */
2061   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2062   if (check_profitability)
2063     max_iter = MAX (max_iter, (int) th - 1);
2064   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2065   dump_printf (MSG_NOTE,
2066                "Setting upper bound of nb iterations for prologue "
2067                "loop to %d\n", max_iter);
2068
2069   /* Update number of times loop executes.  */
2070   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2071   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2072                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2073
2074   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2075     wide_prolog_niters = niters_of_prolog_loop;
2076   else
2077     {
2078       gimple_seq seq = NULL;
2079       edge pe = loop_preheader_edge (loop);
2080       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2081       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2082       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2083                                                  var);
2084       if (seq)
2085         {
2086           /* Insert stmt on loop preheader edge.  */
2087           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2088           gcc_assert (!new_bb);
2089         }
2090     }
2091
2092   /* Update the init conditions of the access functions of all data refs.  */
2093   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2094
2095   /* After peeling we have to reset scalar evolution analyzer.  */
2096   scev_reset ();
2097
2098   free_original_copy_tables ();
2099 }
2100
2101
2102 /* Function vect_create_cond_for_align_checks.
2103
2104    Create a conditional expression that represents the alignment checks for
2105    all of data references (array element references) whose alignment must be
2106    checked at runtime.
2107
2108    Input:
2109    COND_EXPR  - input conditional expression.  New conditions will be chained
2110                 with logical AND operation.
2111    LOOP_VINFO - two fields of the loop information are used.
2112                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2113                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2114
2115    Output:
2116    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2117                          expression.
2118    The returned value is the conditional expression to be used in the if
2119    statement that controls which version of the loop gets executed at runtime.
2120
2121    The algorithm makes two assumptions:
2122      1) The number of bytes "n" in a vector is a power of 2.
2123      2) An address "a" is aligned if a%n is zero and that this
2124         test can be done as a&(n-1) == 0.  For example, for 16
2125         byte vectors the test is a&0xf == 0.  */
2126
2127 static void
2128 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2129                                    tree *cond_expr,
2130                                    gimple_seq *cond_expr_stmt_list)
2131 {
2132   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2133   vec<gimple> may_misalign_stmts
2134     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2135   gimple ref_stmt;
2136   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2137   tree mask_cst;
2138   unsigned int i;
2139   tree int_ptrsize_type;
2140   char tmp_name[20];
2141   tree or_tmp_name = NULL_TREE;
2142   tree and_tmp_name;
2143   gimple and_stmt;
2144   tree ptrsize_zero;
2145   tree part_cond_expr;
2146
2147   /* Check that mask is one less than a power of 2, i.e., mask is
2148      all zeros followed by all ones.  */
2149   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2150
2151   int_ptrsize_type = signed_type_for (ptr_type_node);
2152
2153   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2154      of the first vector of the i'th data reference. */
2155
2156   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2157     {
2158       gimple_seq new_stmt_list = NULL;
2159       tree addr_base;
2160       tree addr_tmp_name;
2161       tree new_or_tmp_name;
2162       gimple addr_stmt, or_stmt;
2163       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2164       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2165       bool negative = tree_int_cst_compare
2166         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2167       tree offset = negative
2168         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2169
2170       /* create: addr_tmp = (int)(address_of_first_vector) */
2171       addr_base =
2172         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2173                                               offset, loop);
2174       if (new_stmt_list != NULL)
2175         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2176
2177       sprintf (tmp_name, "addr2int%d", i);
2178       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2179       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2180                                                 addr_base, NULL_TREE);
2181       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2182
2183       /* The addresses are OR together.  */
2184
2185       if (or_tmp_name != NULL_TREE)
2186         {
2187           /* create: or_tmp = or_tmp | addr_tmp */
2188           sprintf (tmp_name, "orptrs%d", i);
2189           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2190           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2191                                                   new_or_tmp_name,
2192                                                   or_tmp_name, addr_tmp_name);
2193           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2194           or_tmp_name = new_or_tmp_name;
2195         }
2196       else
2197         or_tmp_name = addr_tmp_name;
2198
2199     } /* end for i */
2200
2201   mask_cst = build_int_cst (int_ptrsize_type, mask);
2202
2203   /* create: and_tmp = or_tmp & mask  */
2204   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2205
2206   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2207                                            or_tmp_name, mask_cst);
2208   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2209
2210   /* Make and_tmp the left operand of the conditional test against zero.
2211      if and_tmp has a nonzero bit then some address is unaligned.  */
2212   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2213   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2214                                 and_tmp_name, ptrsize_zero);
2215   if (*cond_expr)
2216     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2217                               *cond_expr, part_cond_expr);
2218   else
2219     *cond_expr = part_cond_expr;
2220 }
2221
2222
2223 /* Function vect_vfa_segment_size.
2224
2225    Create an expression that computes the size of segment
2226    that will be accessed for a data reference.  The functions takes into
2227    account that realignment loads may access one more vector.
2228
2229    Input:
2230      DR: The data reference.
2231      LENGTH_FACTOR: segment length to consider.
2232
2233    Return an expression whose value is the size of segment which will be
2234    accessed by DR.  */
2235
2236 static tree
2237 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2238 {
2239   tree segment_length;
2240
2241   if (integer_zerop (DR_STEP (dr)))
2242     segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2243   else
2244     segment_length = size_binop (MULT_EXPR,
2245                                  fold_convert (sizetype, DR_STEP (dr)),
2246                                  fold_convert (sizetype, length_factor));
2247
2248   if (vect_supportable_dr_alignment (dr, false)
2249         == dr_explicit_realign_optimized)
2250     {
2251       tree vector_size = TYPE_SIZE_UNIT
2252                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2253
2254       segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2255     }
2256   return segment_length;
2257 }
2258
2259
2260 /* Function vect_create_cond_for_alias_checks.
2261
2262    Create a conditional expression that represents the run-time checks for
2263    overlapping of address ranges represented by a list of data references
2264    relations passed as input.
2265
2266    Input:
2267    COND_EXPR  - input conditional expression.  New conditions will be chained
2268                 with logical AND operation.
2269    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2270                 to be checked.
2271
2272    Output:
2273    COND_EXPR - conditional expression.
2274
2275    The returned value is the conditional expression to be used in the if
2276    statement that controls which version of the loop gets executed at runtime.
2277 */
2278
2279 static void
2280 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2281 {
2282   vec<ddr_p>  may_alias_ddrs =
2283     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2284   int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2285   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2286
2287   ddr_p ddr;
2288   unsigned int i;
2289   tree part_cond_expr, length_factor;
2290
2291   /* Create expression
2292      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2293      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2294      &&
2295      ...
2296      &&
2297      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2298      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2299
2300   if (may_alias_ddrs.is_empty ())
2301     return;
2302
2303   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2304     {
2305       struct data_reference *dr_a, *dr_b;
2306       gimple dr_group_first_a, dr_group_first_b;
2307       tree addr_base_a, addr_base_b;
2308       tree segment_length_a, segment_length_b;
2309       gimple stmt_a, stmt_b;
2310       tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2311
2312       dr_a = DDR_A (ddr);
2313       stmt_a = DR_STMT (DDR_A (ddr));
2314       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2315       if (dr_group_first_a)
2316         {
2317           stmt_a = dr_group_first_a;
2318           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2319         }
2320
2321       dr_b = DDR_B (ddr);
2322       stmt_b = DR_STMT (DDR_B (ddr));
2323       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2324       if (dr_group_first_b)
2325         {
2326           stmt_b = dr_group_first_b;
2327           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2328         }
2329
2330       addr_base_a
2331         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a),
2332                                    size_binop (PLUS_EXPR, DR_OFFSET (dr_a),
2333                                                DR_INIT (dr_a)));
2334       addr_base_b
2335         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b),
2336                                    size_binop (PLUS_EXPR, DR_OFFSET (dr_b),
2337                                                DR_INIT (dr_b)));
2338
2339       if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2340         length_factor = scalar_loop_iters;
2341       else
2342         length_factor = size_int (vect_factor);
2343       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2344       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2345
2346       if (dump_enabled_p ())
2347         {
2348           dump_printf_loc (MSG_NOTE, vect_location,
2349                            "create runtime check for data references ");
2350           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
2351           dump_printf (MSG_NOTE, " and ");
2352           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
2353           dump_printf (MSG_NOTE, "\n");
2354         }
2355
2356       seg_a_min = addr_base_a;
2357       seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2358       if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2359         seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2360
2361       seg_b_min = addr_base_b;
2362       seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2363       if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2364         seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2365
2366       part_cond_expr =
2367         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2368           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2369           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2370
2371       if (*cond_expr)
2372         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2373                                   *cond_expr, part_cond_expr);
2374       else
2375         *cond_expr = part_cond_expr;
2376     }
2377
2378   if (dump_enabled_p ())
2379     dump_printf_loc (MSG_NOTE, vect_location,
2380                      "created %u versioning for alias checks.\n",
2381                      may_alias_ddrs.length ());
2382 }
2383
2384
2385 /* Function vect_loop_versioning.
2386
2387    If the loop has data references that may or may not be aligned or/and
2388    has data reference relations whose independence was not proven then
2389    two versions of the loop need to be generated, one which is vectorized
2390    and one which isn't.  A test is then generated to control which of the
2391    loops is executed.  The test checks for the alignment of all of the
2392    data references that may or may not be aligned.  An additional
2393    sequence of runtime tests is generated for each pairs of DDRs whose
2394    independence was not proven.  The vectorized version of loop is
2395    executed only if both alias and alignment tests are passed.
2396
2397    The test generated to check which version of loop is executed
2398    is modified to also check for profitability as indicated by the
2399    cost model initially.
2400
2401    The versioning precondition(s) are placed in *COND_EXPR and
2402    *COND_EXPR_STMT_LIST.  */
2403
2404 void
2405 vect_loop_versioning (loop_vec_info loop_vinfo,
2406                       unsigned int th, bool check_profitability)
2407 {
2408   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2409   basic_block condition_bb;
2410   gimple_stmt_iterator gsi, cond_exp_gsi;
2411   basic_block merge_bb;
2412   basic_block new_exit_bb;
2413   edge new_exit_e, e;
2414   gimple orig_phi, new_phi;
2415   tree cond_expr = NULL_TREE;
2416   gimple_seq cond_expr_stmt_list = NULL;
2417   tree arg;
2418   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2419   gimple_seq gimplify_stmt_list = NULL;
2420   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2421   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2422   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2423
2424   if (check_profitability)
2425     {
2426       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2427                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2428       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2429                                           is_gimple_condexpr, NULL_TREE);
2430     }
2431
2432   if (version_align)
2433     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2434                                        &cond_expr_stmt_list);
2435
2436   if (version_alias)
2437     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2438
2439   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2440                                       is_gimple_condexpr, NULL_TREE);
2441   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2442
2443   initialize_original_copy_tables ();
2444   loop_version (loop, cond_expr, &condition_bb,
2445                 prob, prob, REG_BR_PROB_BASE - prob, true);
2446
2447   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC
2448       && dump_enabled_p ())
2449     {
2450       if (version_alias)
2451         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2452                          "loop versioned for vectorization because of "
2453                          "possible aliasing\n");
2454       if (version_align)
2455         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2456                          "loop versioned for vectorization to enhance "
2457                          "alignment\n");
2458
2459     }
2460   free_original_copy_tables ();
2461
2462   /* Loop versioning violates an assumption we try to maintain during
2463      vectorization - that the loop exit block has a single predecessor.
2464      After versioning, the exit block of both loop versions is the same
2465      basic block (i.e. it has two predecessors). Just in order to simplify
2466      following transformations in the vectorizer, we fix this situation
2467      here by adding a new (empty) block on the exit-edge of the loop,
2468      with the proper loop-exit phis to maintain loop-closed-form.  */
2469
2470   merge_bb = single_exit (loop)->dest;
2471   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2472   new_exit_bb = split_edge (single_exit (loop));
2473   new_exit_e = single_exit (loop);
2474   e = EDGE_SUCC (new_exit_bb, 0);
2475
2476   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2477     {
2478       tree new_res;
2479       orig_phi = gsi_stmt (gsi);
2480       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2481       new_phi = create_phi_node (new_res, new_exit_bb);
2482       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2483       add_phi_arg (new_phi, arg, new_exit_e,
2484                    gimple_phi_arg_location_from_edge (orig_phi, e));
2485       adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2486     }
2487
2488
2489   /* Extract load statements on memrefs with zero-stride accesses.  */
2490
2491   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2492     {
2493       /* In the loop body, we iterate each statement to check if it is a load.
2494          Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
2495          then we will hoist the load statement to the loop preheader.  */
2496
2497       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2498       int nbbs = loop->num_nodes;
2499
2500       for (int i = 0; i < nbbs; ++i)
2501         {
2502           for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]);
2503                !gsi_end_p (si);)
2504             {
2505               gimple stmt = gsi_stmt (si);
2506               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2507               struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2508
2509               if (is_gimple_assign (stmt)
2510                   && (!dr
2511                       || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))))
2512                 {
2513                   bool hoist = true;
2514                   ssa_op_iter iter;
2515                   tree var;
2516
2517                   /* We hoist a statement if all SSA uses in it are defined
2518                      outside of the loop.  */
2519                   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
2520                     {
2521                       gimple def = SSA_NAME_DEF_STMT (var);
2522                       if (!gimple_nop_p (def)
2523                           && flow_bb_inside_loop_p (loop, gimple_bb (def)))
2524                         {
2525                           hoist = false;
2526                           break;
2527                         }
2528                     }
2529
2530                   if (hoist)
2531                     {
2532                       if (dr)
2533                         gimple_set_vuse (stmt, NULL);
2534
2535                       gsi_remove (&si, false);
2536                       gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
2537                                                     stmt);
2538
2539                       if (dump_enabled_p ())
2540                         {
2541                           dump_printf_loc
2542                               (MSG_NOTE, vect_location,
2543                                "hoisting out of the vectorized loop: ");
2544                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2545                           dump_printf (MSG_NOTE, "\n");
2546                         }
2547                       continue;
2548                     }
2549                 }
2550               gsi_next (&si);
2551             }
2552         }
2553     }
2554
2555   /* End loop-exit-fixes after versioning.  */
2556
2557   if (cond_expr_stmt_list)
2558     {
2559       cond_exp_gsi = gsi_last_bb (condition_bb);
2560       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2561                              GSI_SAME_STMT);
2562     }
2563   update_ssa (TODO_update_ssa);
2564 }