gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type *, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment (struct data_reference *);
 218 static bool vect_analyze_data_ref_access (struct data_reference *);
 219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 220 static struct data_reference * vect_analyze_pointer_ref_access
 221   (tree, tree, bool);
 222 static bool vect_can_advance_ivs_p (struct loop *);
 223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
 224                                       loop_vec_info, tree *, tree *, tree *,
 225                                       bool*);
 226 static struct data_reference * vect_analyze_pointer_ref_access
 227   (tree, tree, bool);
 228 static tree vect_get_ptr_offset (tree, tree, tree *);
 229 static tree vect_get_memtag_and_dr
 230   (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
 231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
 232                                       tree *, tree *);
 233 static tree vect_strip_conversion (tree);
 234
 235 /* Utility functions for the code transformation.  */
 236 static tree vect_create_destination_var (tree, tree);
 237 static tree vect_create_data_ref_ptr
 238   (tree, block_stmt_iterator *, tree, tree *, bool);
 239 static tree vect_create_index_for_vector_ref
 240   (struct loop *, block_stmt_iterator *);
 241 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 242 static tree get_vectype_for_scalar_type (tree);
 243 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 244 static tree vect_get_vec_def_for_operand (tree, tree);
 245 static tree vect_init_vector (tree, tree);
 246 static void vect_finish_stmt_generation
 247   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 248
 249 /* Utility function dealing with loop peeling (not peeling itself).  */
 250 static void vect_generate_tmps_on_preheader
 251   (loop_vec_info, tree *, tree *, tree *);
 252 static tree vect_build_loop_niters (loop_vec_info);
 253 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 254 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 255 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
 256 static void vect_update_inits_of_drs (loop_vec_info, tree);
 257 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 258 static void vect_do_peeling_for_loop_bound
 259   (loop_vec_info, tree *, struct loops *);
 260
 261 /* Utilities for creation and deletion of vec_info structs.  */
 262 loop_vec_info new_loop_vec_info (struct loop *loop);
 263 void destroy_loop_vec_info (loop_vec_info);
 264 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 265
 266 static bool vect_debug_stats (struct loop *loop);
 267 static bool vect_debug_details (struct loop *loop);
 268
 269 \f
 270 /*************************************************************************
 271   Simple Loop Peeling Utilities
 272
 273   Utilities to support loop peeling for vectorization purposes.
 274  *************************************************************************/
 275
 276
 277 /* For each definition in DEFINITIONS this function allocates
 278    new ssa name.  */
 279
 280 static void
 281 allocate_new_names (bitmap definitions)
 282 {
 283   unsigned ver;
 284   bitmap_iterator bi;
 285
 286   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 287     {
 288       tree def = ssa_name (ver);
 289       tree *new_name_ptr = xmalloc (sizeof (tree));
 290
 291       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 292
 293       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 294       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 295
 296       SSA_NAME_AUX (def) = new_name_ptr;
 297     }
 298 }
 299
 300
 301 /* Renames the use *OP_P.  */
 302
 303 static void
 304 rename_use_op (use_operand_p op_p)
 305 {
 306   tree *new_name_ptr;
 307
 308   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 309     return;
 310
 311   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 312
 313   /* Something defined outside of the loop.  */
 314   if (!new_name_ptr)
 315     return;
 316
 317   /* An ordinary ssa name defined in the loop.  */
 318
 319   SET_USE (op_p, *new_name_ptr);
 320 }
 321
 322
 323 /* Renames the def *OP_P in statement STMT.  */
 324
 325 static void
 326 rename_def_op (def_operand_p op_p, tree stmt)
 327 {
 328   tree *new_name_ptr;
 329
 330   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 331     return;
 332
 333   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 334
 335   /* Something defined outside of the loop.  */
 336   if (!new_name_ptr)
 337     return;
 338
 339   /* An ordinary ssa name defined in the loop.  */
 340
 341   SET_DEF (op_p, *new_name_ptr);
 342   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 343 }
 344
 345
 346 /* Renames the variables in basic block BB.  */
 347
 348 static void
 349 rename_variables_in_bb (basic_block bb)
 350 {
 351   tree phi;
 352   block_stmt_iterator bsi;
 353   tree stmt;
 354   stmt_ann_t ann;
 355   use_optype uses;
 356   vuse_optype vuses;
 357   def_optype defs;
 358   v_may_def_optype v_may_defs;
 359   v_must_def_optype v_must_defs;
 360   unsigned i;
 361   edge e;
 362   edge_iterator ei;
 363   struct loop *loop = bb->loop_father;
 364
 365   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 366     rename_def_op (PHI_RESULT_PTR (phi), phi);
 367
 368   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 369     {
 370       stmt = bsi_stmt (bsi);
 371       get_stmt_operands (stmt);
 372       ann = stmt_ann (stmt);
 373
 374       uses = USE_OPS (ann);
 375       for (i = 0; i < NUM_USES (uses); i++)
 376         rename_use_op (USE_OP_PTR (uses, i));
 377
 378       defs = DEF_OPS (ann);
 379       for (i = 0; i < NUM_DEFS (defs); i++)
 380         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 381
 382       vuses = VUSE_OPS (ann);
 383       for (i = 0; i < NUM_VUSES (vuses); i++)
 384         rename_use_op (VUSE_OP_PTR (vuses, i));
 385
 386       v_may_defs = V_MAY_DEF_OPS (ann);
 387       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 388         {
 389           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 390           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 391         }
 392
 393       v_must_defs = V_MUST_DEF_OPS (ann);
 394       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 395         {
 396           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 397           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 398         }
 399     }
 400
 401   FOR_EACH_EDGE (e, ei, bb->succs)
 402     {
 403       if (!flow_bb_inside_loop_p (loop, e->dest))
 404         continue;
 405       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 406         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 407     }
 408 }
 409
 410
 411 /* Releases the structures holding the new ssa names.  */
 412
 413 static void
 414 free_new_names (bitmap definitions)
 415 {
 416   unsigned ver;
 417   bitmap_iterator bi;
 418
 419   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 420     {
 421       tree def = ssa_name (ver);
 422
 423       if (SSA_NAME_AUX (def))
 424         {
 425           free (SSA_NAME_AUX (def));
 426           SSA_NAME_AUX (def) = NULL;
 427         }
 428     }
 429 }
 430
 431
 432 /* Renames variables in new generated LOOP.  */
 433
 434 static void
 435 rename_variables_in_loop (struct loop *loop)
 436 {
 437   unsigned i;
 438   basic_block *bbs;
 439
 440   bbs = get_loop_body (loop);
 441
 442   for (i = 0; i < loop->num_nodes; i++)
 443     rename_variables_in_bb (bbs[i]);
 444
 445   free (bbs);
 446 }
 447
 448
 449 /* Update the PHI nodes of NEW_LOOP.
 450
 451    NEW_LOOP is a duplicate of ORIG_LOOP.
 452    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 453    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 454    executes before it.  */
 455
 456 static void
 457 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 458                                        struct loop *new_loop, bool after)
 459 {
 460   tree *new_name_ptr, new_ssa_name;
 461   tree phi_new, phi_orig;
 462   tree def;
 463   edge orig_loop_latch = loop_latch_edge (orig_loop);
 464   edge orig_entry_e = loop_preheader_edge (orig_loop);
 465   edge new_loop_exit_e = new_loop->exit_edges[0];
 466   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 467   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 468
 469   /*
 470      step 1. For each loop-header-phi:
 471              Add the first phi argument for the phi in NEW_LOOP
 472             (the one associated with the entry of NEW_LOOP)
 473
 474      step 2. For each loop-header-phi:
 475              Add the second phi argument for the phi in NEW_LOOP
 476             (the one associated with the latch of NEW_LOOP)
 477
 478      step 3. Update the phis in the successor block of NEW_LOOP.
 479
 480         case 1: NEW_LOOP was placed before ORIG_LOOP:
 481                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 482                 Updating the phis in the successor block can therefore be done
 483                 along with the scanning of the loop header phis, because the
 484                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 485                 phi nodes, organized in the same order.
 486
 487         case 2: NEW_LOOP was placed after ORIG_LOOP:
 488                 The successor block of NEW_LOOP is the original exit block of
 489                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 490                 We postpone updating these phis to a later stage (when
 491                 loop guards are added).
 492    */
 493
 494
 495   /* Scan the phis in the headers of the old and new loops
 496      (they are organized in exactly the same order).  */
 497
 498   for (phi_new = phi_nodes (new_loop->header),
 499        phi_orig = phi_nodes (orig_loop->header);
 500        phi_new && phi_orig;
 501        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 502     {
 503       /* step 1.  */
 504       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 505       add_phi_arg (phi_new, def, new_loop_entry_e);
 506
 507       /* step 2.  */
 508       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 509       if (TREE_CODE (def) != SSA_NAME)
 510         continue;
 511
 512       new_name_ptr = SSA_NAME_AUX (def);
 513       if (!new_name_ptr)
 514         /* Something defined outside of the loop.  */
 515         continue;
 516
 517       /* An ordinary ssa name defined in the loop.  */
 518       new_ssa_name = *new_name_ptr;
 519       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 520
 521       /* step 3 (case 1).  */
 522       if (!after)
 523         {
 524           gcc_assert (new_loop_exit_e == orig_entry_e);
 525           SET_PHI_ARG_DEF (phi_orig,
 526                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 527                            new_ssa_name);
 528         }
 529     }
 530 }
 531
 532
 533 /* Update PHI nodes for a guard of the LOOP.
 534
 535    Input:
 536    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 537         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 538         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 539         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 540         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 541         LOOP header) before the guard code was added, and now it became a merge
 542         point of two paths - the path that ends with the LOOP exit-edge, and
 543         the path that ends with GUARD_EDGE.
 544
 545         This function creates and updates the relevant phi nodes to account for
 546         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 547         1. Create phi nodes at NEW_MERGE_BB.
 548         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 549            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 550            was added:
 551
 552         ===> The CFG before the guard-code was added:
 553         LOOP_header_bb:
 554           if (exit_loop) goto update_bb : LOOP_header_bb
 555         update_bb:
 556
 557         ==> The CFG after the guard-code was added:
 558         guard_bb:
 559           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 560         LOOP_header_bb:
 561           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 562         new_merge_bb:
 563           goto update_bb
 564         update_bb:
 565
 566    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 567         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 568         organized in the same order.
 569         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 570         loop exit phis.
 571
 572    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 573         "original" loop).  FALSE if LOOP is an original loop (not a newly
 574         created copy).  The SSA_NAME_AUX fields of the defs in the original
 575         loop are the corresponding new ssa-names used in the new duplicated
 576         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 577         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 578         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 579         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 580         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 581         FALSE, it's the other way around.
 582   */
 583
 584 static void
 585 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 586                                    struct loop *loop,
 587                                    bool entry_phis,
 588                                    bool is_new_loop)
 589 {
 590   tree orig_phi, new_phi, update_phi;
 591   tree guard_arg, loop_arg;
 592   basic_block new_merge_bb = guard_edge->dest;
 593   edge e = EDGE_SUCC (new_merge_bb, 0);
 594   basic_block update_bb = e->dest;
 595   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 596
 597   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 598        orig_phi && update_phi;
 599        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 600     {
 601       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 602       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 603                                  new_merge_bb);
 604
 605       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 606             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 607       if (entry_phis)
 608         {
 609           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 610                                             EDGE_SUCC (loop->latch, 0));
 611           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 612         }
 613       else /* exit phis */
 614         {
 615           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 616           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 617           tree new_name;
 618
 619           if (new_name_ptr)
 620             new_name = *new_name_ptr;
 621           else
 622             /* Something defined outside of the loop  */
 623             new_name = orig_def;
 624
 625           if (is_new_loop)
 626             {
 627               guard_arg = orig_def;
 628               loop_arg = new_name;
 629             }
 630           else
 631             {
 632               guard_arg = new_name;
 633               loop_arg = orig_def;
 634             }
 635         }
 636       add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
 637       add_phi_arg (new_phi, guard_arg, guard_edge);
 638
 639       /* 3. Update phi in successor block.  */
 640       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 641                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 642       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 643                        PHI_RESULT (new_phi));
 644     }
 645
 646   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 647 }
 648
 649
 650 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 651    that starts at zero, increases by one and its limit is NITERS.
 652
 653    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 654
 655 static void
 656 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 657 {
 658   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 659   tree orig_cond;
 660   edge exit_edge = loop->exit_edges[0];
 661   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 662   tree begin_label = tree_block_label (loop->latch);
 663   tree exit_label = tree_block_label (loop->single_exit->dest);
 664   tree init = build_int_cst (TREE_TYPE (niters), 0);
 665   tree step = build_int_cst (TREE_TYPE (niters), 1);
 666   tree then_label;
 667   tree else_label;
 668
 669   orig_cond = get_loop_exit_condition (loop);
 670   gcc_assert (orig_cond);
 671   create_iv (init, step, NULL_TREE, loop,
 672              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 673
 674   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 675      back to the exit condition statement.  */
 676   bsi_next (&loop_exit_bsi);
 677   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 678
 679   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 680     {
 681       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 682       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 683       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 684     }
 685   else /* 'then' edge loops back.  */
 686     {
 687       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 688       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 689       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 690     }
 691
 692   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 693                      then_label, else_label);
 694   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 695
 696   /* Remove old loop exit test:  */
 697   bsi_remove (&loop_exit_bsi);
 698
 699   if (vect_debug_stats (loop) || vect_debug_details (loop))
 700     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 701
 702   loop->nb_iterations = niters;
 703 }
 704
 705
 706 /* Given LOOP this function generates a new copy of it and puts it
 707    on E which is either the entry or exit of LOOP.  */
 708
 709 static struct loop *
 710 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 711                                         edge e)
 712 {
 713   struct loop *new_loop;
 714   basic_block *new_bbs, *bbs;
 715   bool at_exit;
 716   bool was_imm_dom;
 717   basic_block exit_dest;
 718   tree phi, phi_arg;
 719
 720   at_exit = (e == loop->exit_edges[0]);
 721   if (!at_exit && e != loop_preheader_edge (loop))
 722     {
 723       if (dump_file && (dump_flags & TDF_DETAILS))
 724           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 725       return NULL;
 726     }
 727
 728   bbs = get_loop_body (loop);
 729
 730   /* Check whether duplication is possible.  */
 731   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 732     {
 733       if (vect_debug_stats (loop) || vect_debug_details (loop))
 734           fprintf (dump_file, "Cannot copy basic blocks.\n");
 735       free (bbs);
 736       return NULL;
 737     }
 738
 739   /* Generate new loop structure.  */
 740   new_loop = duplicate_loop (loops, loop, loop->outer);
 741   if (!new_loop)
 742     {
 743       if (vect_debug_stats (loop) || vect_debug_details (loop))
 744           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 745       free (bbs);
 746       return NULL;
 747     }
 748
 749   exit_dest = loop->exit_edges[0]->dest;
 750   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 751                                           exit_dest) == loop->header ?
 752                  true : false);
 753
 754   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 755
 756   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 757
 758   /* Duplicating phi args at exit bbs as coming
 759      also from exit of duplicated loop.  */
 760   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 761     {
 762       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 763       if (phi_arg)
 764         {
 765           edge new_loop_exit_edge;
 766
 767           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 768             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 769           else
 770             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 771
 772           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 773         }
 774     }
 775
 776   if (at_exit) /* Add the loop copy at exit.  */
 777     {
 778       redirect_edge_and_branch_force (e, new_loop->header);
 779       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 780       if (was_imm_dom)
 781         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 782     }
 783   else /* Add the copy at entry.  */
 784     {
 785       edge new_exit_e;
 786       edge entry_e = loop_preheader_edge (loop);
 787       basic_block preheader = entry_e->src;
 788
 789       if (!flow_bb_inside_loop_p (new_loop,
 790                                   EDGE_SUCC (new_loop->header, 0)->dest))
 791         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 792       else
 793         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 794
 795       redirect_edge_and_branch_force (new_exit_e, loop->header);
 796       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 797                                new_exit_e->src);
 798
 799       /* We have to add phi args to the loop->header here as coming
 800          from new_exit_e edge.  */
 801       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 802         {
 803           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 804           if (phi_arg)
 805             add_phi_arg (phi, phi_arg, new_exit_e);
 806         }
 807
 808       redirect_edge_and_branch_force (entry_e, new_loop->header);
 809       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 810     }
 811
 812   flow_loop_scan (new_loop, LOOP_ALL);
 813   flow_loop_scan (loop, LOOP_ALL);
 814   free (new_bbs);
 815   free (bbs);
 816
 817   return new_loop;
 818 }
 819
 820
 821 /* Given the condition statement COND, put it as the last statement
 822    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 823    Assumes that this is the single exit of the guarded loop.
 824    Returns the skip edge.  */
 825
 826 static edge
 827 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 828                         basic_block dom_bb)
 829 {
 830   block_stmt_iterator bsi;
 831   edge new_e, enter_e;
 832   tree cond_stmt, then_label, else_label;
 833
 834   enter_e = EDGE_SUCC (guard_bb, 0);
 835   enter_e->flags &= ~EDGE_FALLTHRU;
 836   enter_e->flags |= EDGE_FALSE_VALUE;
 837   bsi = bsi_last (guard_bb);
 838
 839   then_label = build1 (GOTO_EXPR, void_type_node,
 840                        tree_block_label (exit_bb));
 841   else_label = build1 (GOTO_EXPR, void_type_node,
 842                        tree_block_label (enter_e->dest));
 843   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 844                      then_label, else_label);
 845   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 846   /* Add new edge to connect entry block to the second loop.  */
 847   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 848   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 849   return new_e;
 850 }
 851
 852
 853 /* This function verifies that the following restrictions apply to LOOP:
 854    (1) it is innermost
 855    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 856    (3) it is single entry, single exit
 857    (4) its exit condition is the last stmt in the header
 858    (5) E is the entry/exit edge of LOOP.
 859  */
 860
 861 static bool
 862 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 863 {
 864   edge exit_e = loop->exit_edges [0];
 865   edge entry_e = loop_preheader_edge (loop);
 866   tree orig_cond = get_loop_exit_condition (loop);
 867   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 868
 869   if (any_marked_for_rewrite_p ())
 870     return false;
 871
 872   if (loop->inner
 873       /* All loops have an outer scope; the only case loop->outer is NULL is for
 874          the function itself.  */
 875       || !loop->outer
 876       || loop->num_nodes != 2
 877       || !empty_block_p (loop->latch)
 878       || loop->num_exits != 1
 879       || loop->num_entries != 1
 880       /* Verify that new loop exit condition can be trivially modified.  */
 881       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 882       || (e != exit_e && e != entry_e))
 883     return false;
 884
 885   return true;
 886 }
 887
 888 #ifdef ENABLE_CHECKING
 889 static void
 890 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 891                                  struct loop *second_loop)
 892 {
 893   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 894   basic_block loop2_entry_bb = second_loop->pre_header;
 895   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 896
 897   /* A guard that controls whether the second_loop is to be executed or skipped
 898      is placed in first_loop->exit.  first_loopt->exit therefore has two
 899      successors - one is the preheader of second_loop, and the other is a bb
 900      after second_loop.
 901    */
 902   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 903
 904
 905   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 906         of second_loop.  */
 907
 908   /* The preheader of new_loop is expected to have two predessors:
 909      first_loop->exit and the block that precedes first_loop.  */
 910
 911   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 912               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 913                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 914                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 915                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 916
 917   /* Verify that the other successor of first_loopt->exit is after the
 918      second_loop.  */
 919   /* TODO */
 920 }
 921 #endif
 922
 923 /* Function slpeel_tree_peel_loop_to_edge.
 924
 925    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 926    that is placed on the entry (exit) edge E of LOOP. After this transformation
 927    we have two loops one after the other - first-loop iterates FIRST_NITERS
 928    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 929
 930    Input:
 931    - LOOP: the loop to be peeled.
 932    - E: the exit or entry edge of LOOP.
 933         If it is the entry edge, we peel the first iterations of LOOP. In this
 934         case first-loop is LOOP, and second-loop is the newly created loop.
 935         If it is the exit edge, we peel the last iterations of LOOP. In this
 936         case, first-loop is the newly created loop, and second-loop is LOOP.
 937    - NITERS: the number of iterations that LOOP iterates.
 938    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 939    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 940         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 941         is false, the caller of this function may want to take care of this
 942         (this can be useful if we don't want new stmts added to first-loop).
 943
 944    Output:
 945    The function returns a pointer to the new loop-copy, or NULL if it failed
 946    to perform the transformation.
 947
 948    The function generates two if-then-else guards: one before the first loop,
 949    and the other before the second loop:
 950    The first guard is:
 951      if (FIRST_NITERS == 0) then skip the first loop,
 952      and go directly to the second loop.
 953    The second guard is:
 954      if (FIRST_NITERS == NITERS) then skip the second loop.
 955
 956    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 957    FORNOW the resulting code will not be in loop-closed-ssa form.
 958 */
 959
 960 struct loop*
 961 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 962                                edge e, tree first_niters,
 963                                tree niters, bool update_first_loop_count)
 964 {
 965   struct loop *new_loop = NULL, *first_loop, *second_loop;
 966   edge skip_e;
 967   tree pre_condition;
 968   bitmap definitions;
 969   basic_block bb_before_second_loop, bb_after_second_loop;
 970   basic_block bb_before_first_loop;
 971   basic_block bb_between_loops;
 972   edge exit_e = loop->exit_edges [0];
 973
 974   if (!slpeel_can_duplicate_loop_p (loop, e))
 975     return NULL;
 976
 977   /* We have to initialize cfg_hooks. Then, when calling
 978    cfg_hooks->split_edge, the function tree_split_edge
 979    is actually called and, when calling cfg_hooks->duplicate_block,
 980    the function tree_duplicate_bb is called.  */
 981   tree_register_cfg_hooks ();
 982
 983
 984   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 985         Resulting CFG would be:
 986
 987         first_loop:
 988         do {
 989         } while ...
 990
 991         second_loop:
 992         do {
 993         } while ...
 994
 995         orig_exit_bb:
 996    */
 997
 998   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 999     {
1000       if (vect_debug_stats (loop) || vect_debug_details (loop))
1001         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1002       return NULL;
1003     }
1004
1005   if (e == exit_e)
1006     {
1007       /* NEW_LOOP was placed after LOOP.  */
1008       first_loop = loop;
1009       second_loop = new_loop;
1010     }
1011   else
1012     {
1013       /* NEW_LOOP was placed before LOOP.  */
1014       first_loop = new_loop;
1015       second_loop = loop;
1016     }
1017
1018   definitions = marked_ssa_names ();
1019   allocate_new_names (definitions);
1020   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1021   rename_variables_in_loop (new_loop);
1022
1023
1024   /* 2. Add the guard that controls whether the first loop is executed.
1025         Resulting CFG would be:
1026
1027         bb_before_first_loop:
1028         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1029                                GOTO first-loop
1030
1031         first_loop:
1032         do {
1033         } while ...
1034
1035         bb_before_second_loop:
1036
1037         second_loop:
1038         do {
1039         } while ...
1040
1041         orig_exit_bb:
1042    */
1043
1044   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1045   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1046   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1047   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1048   flow_loop_scan (first_loop, LOOP_ALL);
1049   flow_loop_scan (second_loop, LOOP_ALL);
1050
1051   pre_condition =
1052         build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1053   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1054                                   bb_before_second_loop, bb_before_first_loop);
1055   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1056                                      first_loop == new_loop);
1057
1058
1059   /* 3. Add the guard that controls whether the second loop is executed.
1060         Resulting CFG would be:
1061
1062         bb_before_first_loop:
1063         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1064                                GOTO first-loop
1065
1066         first_loop:
1067         do {
1068         } while ...
1069
1070         bb_between_loops:
1071         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1072                                     GOTO bb_before_second_loop
1073
1074         bb_before_second_loop:
1075
1076         second_loop:
1077         do {
1078         } while ...
1079
1080         bb_after_second_loop:
1081
1082         orig_exit_bb:
1083    */
1084
1085   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1086   add_bb_to_loop (bb_between_loops, first_loop->outer);
1087   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1088   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1089   flow_loop_scan (first_loop, LOOP_ALL);
1090   flow_loop_scan (second_loop, LOOP_ALL);
1091
1092   pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1093   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1094                                   bb_after_second_loop, bb_before_first_loop);
1095   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1096                                      second_loop == new_loop);
1097
1098   /* Flow loop scan does not update loop->single_exit field.  */
1099   first_loop->single_exit = first_loop->exit_edges[0];
1100   second_loop->single_exit = second_loop->exit_edges[0];
1101
1102   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1103    */
1104   if (update_first_loop_count)
1105     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1106
1107   free_new_names (definitions);
1108   BITMAP_XFREE (definitions);
1109   unmark_all_for_rewrite ();
1110
1111   return new_loop;
1112 }
1113
1114 \f
1115 /* Here the proper Vectorizer starts.  */
1116
1117 /*************************************************************************
1118   Vectorization Utilities.
1119  *************************************************************************/
1120
1121 /* Function new_stmt_vec_info.
1122
1123    Create and initialize a new stmt_vec_info struct for STMT.  */
1124
1125 stmt_vec_info
1126 new_stmt_vec_info (tree stmt, struct loop *loop)
1127 {
1128   stmt_vec_info res;
1129   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1130
1131   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1132   STMT_VINFO_STMT (res) = stmt;
1133   STMT_VINFO_LOOP (res) = loop;
1134   STMT_VINFO_RELEVANT_P (res) = 0;
1135   STMT_VINFO_VECTYPE (res) = NULL;
1136   STMT_VINFO_VEC_STMT (res) = NULL;
1137   STMT_VINFO_DATA_REF (res) = NULL;
1138   STMT_VINFO_MEMTAG (res) = NULL;
1139   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1140   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1141   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1142   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1143   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1144
1145   return res;
1146 }
1147
1148
1149 /* Function new_loop_vec_info.
1150
1151    Create and initialize a new loop_vec_info struct for LOOP, as well as
1152    stmt_vec_info structs for all the stmts in LOOP.  */
1153
1154 loop_vec_info
1155 new_loop_vec_info (struct loop *loop)
1156 {
1157   loop_vec_info res;
1158   basic_block *bbs;
1159   block_stmt_iterator si;
1160   unsigned int i;
1161
1162   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1163
1164   bbs = get_loop_body (loop);
1165
1166   /* Create stmt_info for all stmts in the loop.  */
1167   for (i = 0; i < loop->num_nodes; i++)
1168     {
1169       basic_block bb = bbs[i];
1170       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1171         {
1172           tree stmt = bsi_stmt (si);
1173           stmt_ann_t ann;
1174
1175           get_stmt_operands (stmt);
1176           ann = stmt_ann (stmt);
1177           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1178         }
1179     }
1180
1181   LOOP_VINFO_LOOP (res) = loop;
1182   LOOP_VINFO_BBS (res) = bbs;
1183   LOOP_VINFO_EXIT_COND (res) = NULL;
1184   LOOP_VINFO_NITERS (res) = NULL;
1185   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1186   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1187   LOOP_VINFO_VECT_FACTOR (res) = 0;
1188   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1189                            "loop_write_datarefs");
1190   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1191                            "loop_read_datarefs");
1192   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1193
1194   return res;
1195 }
1196
1197
1198 /* Function destroy_loop_vec_info.
1199
1200    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1201    stmts in the loop.  */
1202
1203 void
1204 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1205 {
1206   struct loop *loop;
1207   basic_block *bbs;
1208   int nbbs;
1209   block_stmt_iterator si;
1210   int j;
1211
1212   if (!loop_vinfo)
1213     return;
1214
1215   loop = LOOP_VINFO_LOOP (loop_vinfo);
1216
1217   bbs = LOOP_VINFO_BBS (loop_vinfo);
1218   nbbs = loop->num_nodes;
1219
1220   for (j = 0; j < nbbs; j++)
1221     {
1222       basic_block bb = bbs[j];
1223       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1224         {
1225           tree stmt = bsi_stmt (si);
1226           stmt_ann_t ann = stmt_ann (stmt);
1227           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1228           free (stmt_info);
1229           set_stmt_info (ann, NULL);
1230         }
1231     }
1232
1233   free (LOOP_VINFO_BBS (loop_vinfo));
1234   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1235   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1236
1237   free (loop_vinfo);
1238 }
1239
1240
1241 /* Function debug_loop_stats.
1242
1243    For vectorization statistics dumps.  */
1244
1245 static bool
1246 vect_debug_stats (struct loop *loop)
1247 {
1248   basic_block bb;
1249   block_stmt_iterator si;
1250   tree node = NULL_TREE;
1251
1252   if (!dump_file || !(dump_flags & TDF_STATS))
1253     return false;
1254
1255   if (!loop)
1256     {
1257       fprintf (dump_file, "\n");
1258       return true;
1259     }
1260
1261   if (!loop->header)
1262     return false;
1263
1264   bb = loop->header;
1265
1266   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1267     {
1268       node = bsi_stmt (si);
1269       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1270         break;
1271     }
1272
1273   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1274       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1275     {
1276       fprintf (dump_file, "\nloop at %s:%d: ",
1277         EXPR_FILENAME (node), EXPR_LINENO (node));
1278       return true;
1279     }
1280
1281   return false;
1282 }
1283
1284
1285 /* Function debug_loop_details.
1286
1287    For vectorization debug dumps.  */
1288
1289 static bool
1290 vect_debug_details (struct loop *loop)
1291 {
1292    basic_block bb;
1293    block_stmt_iterator si;
1294    tree node = NULL_TREE;
1295
1296   if (!dump_file || !(dump_flags & TDF_DETAILS))
1297     return false;
1298
1299   if (!loop)
1300     {
1301       fprintf (dump_file, "\n");
1302       return true;
1303     }
1304
1305   if (!loop->header)
1306     return false;
1307
1308   bb = loop->header;
1309
1310   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1311     {
1312       node = bsi_stmt (si);
1313       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1314         break;
1315     }
1316
1317   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1318       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1319     {
1320       fprintf (dump_file, "\nloop at %s:%d: ",
1321                EXPR_FILENAME (node), EXPR_LINENO (node));
1322       return true;
1323     }
1324
1325   return false;
1326 }
1327
1328
1329 /* Function vect_get_ptr_offset
1330
1331    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1332
1333 static tree
1334 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1335                      tree vectype ATTRIBUTE_UNUSED,
1336                      tree *offset ATTRIBUTE_UNUSED)
1337 {
1338   /* TODO: Use alignment information.  */
1339   return NULL_TREE;
1340 }
1341
1342
1343 /* Function vect_strip_conversions
1344
1345    Strip conversions that don't narrow the mode.  */
1346
1347 static tree
1348 vect_strip_conversion (tree expr)
1349 {
1350   tree to, ti, oprnd0;
1351
1352   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1353     {
1354       to = TREE_TYPE (expr);
1355       oprnd0 = TREE_OPERAND (expr, 0);
1356       ti = TREE_TYPE (oprnd0);
1357
1358       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1359         return NULL_TREE;
1360       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1361         return NULL_TREE;
1362
1363       expr = oprnd0;
1364     }
1365   return expr;
1366 }
1367
1368
1369 /* Function vect_analyze_offset_expr
1370
1371    Given an offset expression EXPR received from get_inner_reference, analyze
1372    it and create an expression for INITIAL_OFFSET by substituting the variables
1373    of EXPR with initial_condition of the corresponding access_fn in the loop.
1374    E.g.,
1375       for i
1376          for (j = 3; j < N; j++)
1377             a[j].b[i][j] = 0;
1378
1379    For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1380    substituted, since its access_fn in the inner loop is i. 'j' will be
1381    substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1382    C` =  3 * C_j + C.
1383
1384    Compute MISALIGN (the misalignment of the data reference initial access from
1385    its base) if possible. Misalignment can be calculated only if all the
1386    variables can be substituted with constants, or if a variable is multiplied
1387    by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1388    be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1389    of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1390    VECTYPE_ALIGNMENT computation in the caller of this function).
1391
1392    STEP is an evolution of the data reference in this loop in bytes.
1393    In the above example, STEP is C_j.
1394
1395    Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1396    variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1397    are NULL_TREEs. Otherwise, return TRUE.
1398
1399 */
1400
1401 static bool
1402 vect_analyze_offset_expr (tree expr,
1403                           struct loop *loop,
1404                           tree vectype_alignment,
1405                           tree *initial_offset,
1406                           tree *misalign,
1407                           tree *step)
1408 {
1409   tree oprnd0;
1410   tree oprnd1;
1411   tree left_offset = size_zero_node;
1412   tree right_offset = size_zero_node;
1413   tree left_misalign = size_zero_node;
1414   tree right_misalign = size_zero_node;
1415   tree left_step = size_zero_node;
1416   tree right_step = size_zero_node;
1417   enum tree_code code;
1418   tree init, evolution;
1419
1420   *step = NULL_TREE;
1421   *misalign = NULL_TREE;
1422   *initial_offset = NULL_TREE;
1423
1424   /* Strip conversions that don't narrow the mode.  */
1425   expr = vect_strip_conversion (expr);
1426   if (!expr)
1427     return false;
1428
1429   /* Stop conditions:
1430      1. Constant.  */
1431   if (TREE_CODE (expr) == INTEGER_CST)
1432     {
1433       *initial_offset = fold_convert (sizetype, expr);
1434       *misalign = fold_convert (sizetype, expr);
1435       *step = size_zero_node;
1436       return true;
1437     }
1438
1439   /* 2. Variable. Try to substitute with initial_condition of the corresponding
1440      access_fn in the current loop.  */
1441   if (SSA_VAR_P (expr))
1442     {
1443       tree access_fn = analyze_scalar_evolution (loop, expr);
1444
1445       if (access_fn == chrec_dont_know)
1446         /* No access_fn.  */
1447         return false;
1448
1449       init = initial_condition_in_loop_num (access_fn, loop->num);
1450       if (init == expr && !expr_invariant_in_loop_p (loop, init))
1451         /* Not enough information: may be not loop invariant.
1452            E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1453            initial_condition is D, but it depends on i - loop's induction
1454            variable.  */
1455         return false;
1456
1457       evolution = evolution_part_in_loop_num (access_fn, loop->num);
1458       if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1459         /* Evolution is not constant.  */
1460         return false;
1461
1462       if (TREE_CODE (init) == INTEGER_CST)
1463         *misalign = fold_convert (sizetype, init);
1464       else
1465         /* Not constant, misalignment cannot be calculated.  */
1466         *misalign = NULL_TREE;
1467
1468       *initial_offset = fold_convert (sizetype, init);
1469
1470       *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1471       return true;
1472     }
1473
1474   /* Recursive computation.  */
1475   if (!BINARY_CLASS_P (expr))
1476     {
1477       /* We expect to get binary expressions (PLUS/MINUS and MULT).  */
1478       if (vect_debug_details (NULL))
1479         {
1480           fprintf (dump_file, "Not binary expression ");
1481           print_generic_expr (dump_file, expr, TDF_SLIM);
1482         }
1483       return false;
1484     }
1485   oprnd0 = TREE_OPERAND (expr, 0);
1486   oprnd1 = TREE_OPERAND (expr, 1);
1487
1488   if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1489                                 &left_misalign, &left_step)
1490       || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1491                                     &right_offset, &right_misalign, &right_step))
1492       return false;
1493
1494   /* The type of the operation: plus, minus or mult.  */
1495   code = TREE_CODE (expr);
1496   switch (code)
1497     {
1498     case MULT_EXPR:
1499       if (TREE_CODE (right_offset) != INTEGER_CST)
1500         /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1501            sized types.
1502            FORNOW: We don't support such cases.  */
1503         return false;
1504
1505       /* Strip conversions that don't narrow the mode.  */
1506       left_offset = vect_strip_conversion (left_offset);
1507       if (!left_offset)
1508         return false;
1509       /* Misalignment computation.  */
1510       if (SSA_VAR_P (left_offset))
1511         {
1512           /* If the left side contains variable that cannot be substituted with
1513              constant, we check if the right side is a multiple of ALIGNMENT.  */
1514           if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1515                                          vectype_alignment)))
1516             *misalign = size_zero_node;
1517           else
1518             /* If the remainder is not zero or the right side isn't constant, we
1519                can't compute  misalignment.  */
1520             *misalign = NULL_TREE;
1521         }
1522       else
1523         {
1524           /* The left operand was successfully substituted with constant.  */
1525           if (left_misalign)
1526             /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1527                NULL_TREE.  */
1528             *misalign  = size_binop (code, left_misalign, right_misalign);
1529           else
1530             *misalign = NULL_TREE;
1531         }
1532
1533       /* Step calculation.  */
1534       /* Multiply the step by the right operand.  */
1535       *step  = size_binop (MULT_EXPR, left_step, right_offset);
1536       break;
1537
1538     case PLUS_EXPR:
1539     case MINUS_EXPR:
1540       /* Combine the recursive calculations for step and misalignment.  */
1541       *step = size_binop (code, left_step, right_step);
1542
1543       if (left_misalign && right_misalign)
1544         *misalign  = size_binop (code, left_misalign, right_misalign);
1545       else
1546         *misalign = NULL_TREE;
1547
1548       break;
1549
1550     default:
1551       gcc_unreachable ();
1552     }
1553
1554   /* Compute offset.  */
1555   *initial_offset = fold_convert (sizetype,
1556                                   fold (build2 (code, TREE_TYPE (left_offset),
1557                                                 left_offset,
1558                                                 right_offset)));
1559   return true;
1560 }
1561
1562
1563 /* Function vect_get_base_and_offset
1564
1565    Return the BASE of the data reference EXPR.
1566    If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1567    STEP.
1568    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1569    'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1570    instantiated with initial_conditions of access_functions of variables,
1571    modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1572
1573    Function get_inner_reference is used for the above in case of ARRAY_REF and
1574    COMPONENT_REF.
1575
1576    Input:
1577    EXPR - the memory reference that is being analyzed
1578    DR - the data_reference struct of the _original_ memory reference
1579         (Note: DR_REF (DR) is not necessarily EXPR)
1580    VECTYPE - the type that defines the alignment (i.e, we compute
1581              alignment relative to TYPE_ALIGN(VECTYPE))
1582
1583    Output:
1584    BASE (returned value) - the base of the data reference EXPR.
1585                            E.g, if EXPR is a.b[k].c[i][j] the returned
1586                            base is a.
1587    INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1588    MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1589               computation is impossible
1590    STEP - evolution of the DR_REF in the loop
1591    BASE_ALIGNED_P - indicates if BASE is aligned
1592
1593    If something unexpected is encountered (an unsupported form of data-ref),
1594    then NULL_TREE is returned.  */
1595
1596 static tree
1597 vect_get_base_and_offset (struct data_reference *dr,
1598                           tree expr,
1599                           tree vectype,
1600                           loop_vec_info loop_vinfo,
1601                           tree *initial_offset,
1602                           tree *misalign,
1603                           tree *step,
1604                           bool *base_aligned_p)
1605 {
1606   tree this_offset = size_zero_node;
1607   tree this_misalign = size_zero_node;
1608   tree this_step = size_zero_node;
1609   tree base = NULL_TREE;
1610   tree next_ref;
1611   tree oprnd0, oprnd1;
1612   enum tree_code code = TREE_CODE (expr);
1613   HOST_WIDE_INT pbitsize;
1614   HOST_WIDE_INT pbitpos;
1615   tree poffset;
1616   enum machine_mode pmode;
1617   int punsignedp, pvolatilep;
1618   tree bit_pos_in_bytes;
1619   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1620
1621   *base_aligned_p = false;
1622
1623   switch (code)
1624     {
1625     /* These cases end the recursion:  */
1626     case VAR_DECL:
1627     case PARM_DECL:
1628       *initial_offset = size_zero_node;
1629       *step = size_zero_node;
1630       *misalign = size_zero_node;
1631       if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1632         *base_aligned_p = true;
1633       return expr;
1634
1635     case SSA_NAME:
1636       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1637         return NULL_TREE;
1638
1639       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1640         {
1641           base = vect_get_ptr_offset (expr, vectype, misalign);
1642           if (base)
1643             *base_aligned_p = true;
1644         }
1645       else
1646         {
1647           *base_aligned_p = true;
1648           *misalign = size_zero_node;
1649         }
1650       *initial_offset = size_zero_node;
1651       *step = size_zero_node;
1652       return expr;
1653
1654     case INTEGER_CST:
1655       *initial_offset = fold_convert (sizetype, expr);
1656       *misalign = fold_convert (sizetype, expr);
1657       *step = size_zero_node;
1658       return expr;
1659
1660     /* These cases continue the recursion:  */
1661     case ADDR_EXPR:
1662       oprnd0 = TREE_OPERAND (expr, 0);
1663       next_ref = oprnd0;
1664       break;
1665
1666     case INDIRECT_REF:
1667       oprnd0 = TREE_OPERAND (expr, 0);
1668       next_ref = oprnd0;
1669       break;
1670
1671     case PLUS_EXPR:
1672     case MINUS_EXPR:
1673       oprnd0 = TREE_OPERAND (expr, 0);
1674       oprnd1 = TREE_OPERAND (expr, 1);
1675
1676       /* In case we have a PLUS_EXPR of the form
1677          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1678          This is verified in vect_get_memtag_and_dr.  */
1679       base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1680                                        &this_offset, &this_misalign,
1681                                        &this_step, base_aligned_p);
1682       /* Offset was already computed in vect_analyze_pointer_ref_access.  */
1683       this_offset = size_zero_node;
1684
1685       if (!base)
1686         this_misalign = NULL_TREE;
1687
1688       next_ref = oprnd0;
1689       break;
1690
1691     default:
1692       if (!handled_component_p (expr))
1693         /* Unsupported expression.  */
1694         return NULL_TREE;
1695
1696       /* Find the base and the offset from it.  */
1697       next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1698                                       &pmode, &punsignedp, &pvolatilep, false);
1699       if (!next_ref)
1700         return NULL_TREE;
1701
1702       if (poffset
1703           && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1704                                         &this_offset, &this_misalign,
1705                                         &this_step))
1706         {
1707           /* Failed to compute offset or step.  */
1708           *step = NULL_TREE;
1709           *initial_offset = NULL_TREE;
1710           *misalign = NULL_TREE;
1711           return NULL_TREE;
1712         }
1713
1714       /* Add bit position to OFFSET and MISALIGN.  */
1715
1716       bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1717       /* Check that there is no remainder in bits.  */
1718       if (pbitpos%BITS_PER_UNIT)
1719         {
1720           if (vect_debug_details (NULL))
1721             fprintf (dump_file, "bit offset alignment.");
1722           return NULL_TREE;
1723         }
1724       this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1725                                       fold_convert (sizetype, this_offset)));
1726       if (this_misalign)
1727         this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1728
1729       /* Continue the recursion to refine the base (get_inner_reference returns
1730          &a for &a[i], and not a).  */
1731       break;
1732     }
1733
1734   base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1735                                    initial_offset, misalign, step,
1736                                    base_aligned_p);
1737   if (base)
1738     {
1739       /* Combine the results.  */
1740       if (this_misalign && *misalign)
1741         *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1742       else
1743         *misalign = NULL_TREE;
1744
1745       *step = size_binop (PLUS_EXPR, *step, this_step);
1746
1747       *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1748                                       *initial_offset, this_offset));
1749
1750       if (vect_debug_details (NULL))
1751         {
1752           print_generic_expr (dump_file, expr, TDF_SLIM);
1753           fprintf (dump_file, "\n --> total offset for ref: ");
1754           print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1755           fprintf (dump_file, "\n --> total misalign for ref: ");
1756           print_generic_expr (dump_file, *misalign, TDF_SLIM);
1757           fprintf (dump_file, "\n --> total step for ref: ");
1758           print_generic_expr (dump_file, *step, TDF_SLIM);
1759         }
1760     }
1761   return base;
1762 }
1763
1764
1765 /* Function vect_force_dr_alignment_p.
1766
1767    Returns whether the alignment of a DECL can be forced to be aligned
1768    on ALIGNMENT bit boundary.  */
1769
1770 static bool
1771 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1772 {
1773   if (TREE_CODE (decl) != VAR_DECL)
1774     return false;
1775
1776   if (DECL_EXTERNAL (decl))
1777     return false;
1778
1779   if (TREE_ASM_WRITTEN (decl))
1780     return false;
1781
1782   if (TREE_STATIC (decl))
1783     return (alignment <= MAX_OFILE_ALIGNMENT);
1784   else
1785     /* This is not 100% correct.  The absolute correct stack alignment
1786        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1787        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1788        However, until someone implements forced stack alignment, SSE
1789        isn't really usable without this.  */
1790     return (alignment <= PREFERRED_STACK_BOUNDARY);
1791 }
1792
1793
1794 /* Function vect_get_new_vect_var.
1795
1796    Returns a name for a new variable. The current naming scheme appends the
1797    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1798    the name of vectorizer generated variables, and appends that to NAME if
1799    provided.  */
1800
1801 static tree
1802 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1803 {
1804   const char *prefix;
1805   int prefix_len;
1806   tree new_vect_var;
1807
1808   if (var_kind == vect_simple_var)
1809     prefix = "vect_";
1810   else
1811     prefix = "vect_p";
1812
1813   prefix_len = strlen (prefix);
1814
1815   if (name)
1816     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1817   else
1818     new_vect_var = create_tmp_var (type, prefix);
1819
1820   return new_vect_var;
1821 }
1822
1823
1824 /* Function vect_create_index_for_vector_ref.
1825
1826    Create (and return) an index variable, along with it's update chain in the
1827    loop. This variable will be used to access a memory location in a vector
1828    operation.
1829
1830    Input:
1831    LOOP: The loop being vectorized.
1832    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1833         function can be added here, or in the loop pre-header.
1834
1835    Output:
1836    Return an index that will be used to index a vector array.  It is expected
1837    that a pointer to the first vector will be used as the base address for the
1838    indexed reference.
1839
1840    FORNOW: we are not trying to be efficient, just creating a new index each
1841    time from scratch.  At this time all vector references could use the same
1842    index.
1843
1844    TODO: create only one index to be used by all vector references.  Record
1845    the index in the LOOP_VINFO the first time this procedure is called and
1846    return it on subsequent calls.  The increment of this index must be placed
1847    just before the conditional expression that ends the single block loop.  */
1848
1849 static tree
1850 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1851 {
1852   tree init, step;
1853   tree indx_before_incr, indx_after_incr;
1854
1855   /* It is assumed that the base pointer used for vectorized access contains
1856      the address of the first vector.  Therefore the index used for vectorized
1857      access must be initialized to zero and incremented by 1.  */
1858
1859   init = integer_zero_node;
1860   step = integer_one_node;
1861
1862   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1863   create_iv (init, step, NULL_TREE, loop, bsi, false,
1864         &indx_before_incr, &indx_after_incr);
1865
1866   return indx_before_incr;
1867 }
1868
1869
1870 /* Function vect_create_addr_base_for_vector_ref.
1871
1872    Create an expression that computes the address of the first memory location
1873    that will be accessed for a data reference.
1874
1875    Input:
1876    STMT: The statement containing the data reference.
1877    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1878    OFFSET: Optional. If supplied, it is be added to the initial address.
1879
1880    Output:
1881    1. Return an SSA_NAME whose value is the address of the memory location of
1882       the first vector of the data reference.
1883    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1884       these statement(s) which define the returned SSA_NAME.
1885
1886    FORNOW: We are only handling array accesses with step 1.  */
1887
1888 static tree
1889 vect_create_addr_base_for_vector_ref (tree stmt,
1890                                       tree *new_stmt_list,
1891                                       tree offset)
1892 {
1893   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1894   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1895   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1896   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1897   tree ref = DR_REF (dr);
1898   tree scalar_type = TREE_TYPE (ref);
1899   tree scalar_ptr_type = build_pointer_type (scalar_type);
1900   tree vec_stmt;
1901   tree new_temp;
1902   tree addr_base, addr_expr;
1903   tree dest, new_stmt;
1904   tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1905
1906   if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1907     /* After the analysis stage, we expect to get here only with RECORD_TYPE
1908        and ARRAY_TYPE. */
1909     /* Add '&' to ref_base.  */
1910     data_ref_base = build_fold_addr_expr (data_ref_base);
1911   else
1912     {
1913       /* Create '(scalar_type*) base' for pointers.  */
1914       tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1915       tree scalar_array_type = build_array_type (scalar_type, 0);
1916       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1917       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1918       add_referenced_tmp_var (array_ptr);
1919
1920       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1921       add_referenced_tmp_var (dest);
1922       tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1923       append_to_statement_list_force (new_stmt,  new_stmt_list);
1924
1925       vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1926       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1927       new_temp = make_ssa_name (array_ptr, vec_stmt);
1928       TREE_OPERAND (vec_stmt, 0) = new_temp;
1929       append_to_statement_list_force (vec_stmt,  new_stmt_list);
1930       data_ref_base = new_temp;
1931     }
1932
1933   /* Create base_offset */
1934   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1935   add_referenced_tmp_var (dest);
1936   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1937   append_to_statement_list_force (new_stmt, new_stmt_list);
1938
1939   if (offset)
1940     {
1941       tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1942       add_referenced_tmp_var (tmp);
1943       offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1944                              STMT_VINFO_VECT_STEP (stmt_info)));
1945       base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1946                                   offset));
1947       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1948       append_to_statement_list_force (new_stmt, new_stmt_list);
1949     }
1950
1951   /* base + base_offset */
1952   addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1953                             base_offset));
1954
1955   /* addr_expr = addr_base */
1956   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1957                                      get_name (base_name));
1958   add_referenced_tmp_var (addr_expr);
1959   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1960   new_temp = make_ssa_name (addr_expr, vec_stmt);
1961   TREE_OPERAND (vec_stmt, 0) = new_temp;
1962   append_to_statement_list_force (vec_stmt, new_stmt_list);
1963
1964   if (vect_debug_details (NULL))
1965     {
1966       fprintf (dump_file, "created ");
1967       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1968       fprintf (dump_file, "\n");
1969     }
1970   return new_temp;
1971 }
1972
1973
1974 /* Function get_vectype_for_scalar_type.
1975
1976    Returns the vector type corresponding to SCALAR_TYPE as supported
1977    by the target.  */
1978
1979 static tree
1980 get_vectype_for_scalar_type (tree scalar_type)
1981 {
1982   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1983   int nbytes = GET_MODE_SIZE (inner_mode);
1984   int nunits;
1985   tree vectype;
1986
1987   if (nbytes == 0)
1988     return NULL_TREE;
1989
1990   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1991      is expected.  */
1992   nunits = UNITS_PER_SIMD_WORD / nbytes;
1993
1994   vectype = build_vector_type (scalar_type, nunits);
1995   if (vect_debug_details (NULL))
1996     {
1997       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1998       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1999     }
2000
2001   if (!vectype)
2002     return NULL_TREE;
2003
2004   if (vect_debug_details (NULL))
2005     {
2006       fprintf (dump_file, "vectype: ");
2007       print_generic_expr (dump_file, vectype, TDF_SLIM);
2008     }
2009
2010   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2011     {
2012       /* TODO: tree-complex.c sometimes can parallelize operations
2013          on generic vectors.  We can vectorize the loop in that case,
2014          but then we should re-run the lowering pass.  */
2015       if (vect_debug_details (NULL))
2016         fprintf (dump_file, "mode not supported by target.");
2017       return NULL_TREE;
2018     }
2019
2020   return vectype;
2021 }
2022
2023
2024 /* Function vect_align_data_ref.
2025
2026    Handle mislignment of a memory accesses.
2027
2028    FORNOW: Can't handle misaligned accesses.
2029    Make sure that the dataref is aligned.  */
2030
2031 static void
2032 vect_align_data_ref (tree stmt)
2033 {
2034   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2035   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2036
2037   /* FORNOW: can't handle misaligned accesses;
2038              all accesses expected to be aligned.  */
2039   gcc_assert (aligned_access_p (dr));
2040 }
2041
2042
2043 /* Function vect_create_data_ref_ptr.
2044
2045    Create a memory reference expression for vector access, to be used in a
2046    vector load/store stmt. The reference is based on a new pointer to vector
2047    type (vp).
2048
2049    Input:
2050    1. STMT: a stmt that references memory. Expected to be of the form
2051          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2052    2. BSI: block_stmt_iterator where new stmts can be added.
2053    3. OFFSET (optional): an offset to be added to the initial address accessed
2054         by the data-ref in STMT.
2055    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2056         pointing to the initial address.
2057
2058    Output:
2059    1. Declare a new ptr to vector_type, and have it point to the base of the
2060       data reference (initial addressed accessed by the data reference).
2061       For example, for vector of type V8HI, the following code is generated:
2062
2063       v8hi *vp;
2064       vp = (v8hi *)initial_address;
2065
2066       if OFFSET is not supplied:
2067          initial_address = &a[init];
2068       if OFFSET is supplied:
2069          initial_address = &a[init + OFFSET];
2070
2071       Return the initial_address in INITIAL_ADDRESS.
2072
2073    2. Create a data-reference in the loop based on the new vector pointer vp,
2074       and using a new index variable 'idx' as follows:
2075
2076       vp' = vp + update
2077
2078       where if ONLY_INIT is true:
2079          update = zero
2080       and otherwise
2081          update = idx + vector_type_size
2082
2083       Return the pointer vp'.
2084
2085
2086    FORNOW: handle only aligned and consecutive accesses.  */
2087
2088 static tree
2089 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2090                           tree *initial_address, bool only_init)
2091 {
2092   tree base_name;
2093   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2094   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2095   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2096   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2097   tree vect_ptr_type;
2098   tree vect_ptr;
2099   tree tag;
2100   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2101   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2102   vuse_optype vuses = STMT_VUSE_OPS (stmt);
2103   int nvuses, nv_may_defs, nv_must_defs;
2104   int i;
2105   tree new_temp;
2106   tree vec_stmt;
2107   tree new_stmt_list = NULL_TREE;
2108   tree idx;
2109   edge pe = loop_preheader_edge (loop);
2110   basic_block new_bb;
2111   tree vect_ptr_init;
2112   tree vectype_size;
2113   tree ptr_update;
2114   tree data_ref_ptr;
2115   tree type, tmp, size;
2116
2117   base_name = unshare_expr (DR_BASE_NAME (dr));
2118   if (vect_debug_details (NULL))
2119     {
2120       tree data_ref_base = base_name;
2121       fprintf (dump_file, "create array_ref of type: ");
2122       print_generic_expr (dump_file, vectype, TDF_SLIM);
2123       if (TREE_CODE (data_ref_base) == VAR_DECL)
2124         fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2125       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2126         fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2127       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2128         fprintf (dump_file, "\nvectorizing a record based array ref: ");
2129       else if (TREE_CODE (data_ref_base) == SSA_NAME)
2130         fprintf (dump_file, "\nvectorizing a pointer ref: ");
2131       print_generic_expr (dump_file, base_name, TDF_SLIM);
2132     }
2133
2134   /** (1) Create the new vector-pointer variable:  **/
2135
2136   vect_ptr_type = build_pointer_type (vectype);
2137   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2138                                     get_name (base_name));
2139   add_referenced_tmp_var (vect_ptr);
2140
2141
2142   /** (2) Handle aliasing information of the new vector-pointer:  **/
2143
2144   tag = STMT_VINFO_MEMTAG (stmt_info);
2145   gcc_assert (tag);
2146   get_var_ann (vect_ptr)->type_mem_tag = tag;
2147
2148   /* Mark for renaming all aliased variables
2149      (i.e, the may-aliases of the type-mem-tag).  */
2150   nvuses = NUM_VUSES (vuses);
2151   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2152   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2153   for (i = 0; i < nvuses; i++)
2154     {
2155       tree use = VUSE_OP (vuses, i);
2156       if (TREE_CODE (use) == SSA_NAME)
2157         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2158     }
2159   for (i = 0; i < nv_may_defs; i++)
2160     {
2161       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2162       if (TREE_CODE (def) == SSA_NAME)
2163         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2164     }
2165   for (i = 0; i < nv_must_defs; i++)
2166     {
2167       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2168       if (TREE_CODE (def) == SSA_NAME)
2169         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2170     }
2171
2172
2173   /** (3) Calculate the initial address the vector-pointer, and set
2174           the vector-pointer to point to it before the loop:  **/
2175
2176   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2177   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2178                                                    offset);
2179   pe = loop_preheader_edge (loop);
2180   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2181   gcc_assert (!new_bb);
2182   *initial_address = new_temp;
2183
2184   /* Create: p = (vectype *) initial_base  */
2185   vec_stmt = fold_convert (vect_ptr_type, new_temp);
2186   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2187   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2188   TREE_OPERAND (vec_stmt, 0) = new_temp;
2189   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2190   gcc_assert (!new_bb);
2191   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2192
2193
2194   /** (4) Handle the updating of the vector-pointer inside the loop: **/
2195
2196   if (only_init) /* No update in loop is required.  */
2197     return vect_ptr_init;
2198
2199   idx = vect_create_index_for_vector_ref (loop, bsi);
2200
2201   /* Create: update = idx * vectype_size  */
2202   tmp = create_tmp_var (integer_type_node, "update");
2203   add_referenced_tmp_var (tmp);
2204   size = TYPE_SIZE (vect_ptr_type);
2205   type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2206   ptr_update = create_tmp_var (type, "update");
2207   add_referenced_tmp_var (ptr_update);
2208   vectype_size = TYPE_SIZE_UNIT (vectype);
2209   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2210   vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2211   new_temp = make_ssa_name (tmp, vec_stmt);
2212   TREE_OPERAND (vec_stmt, 0) = new_temp;
2213   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2214   vec_stmt = fold_convert (type, new_temp);
2215   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2216   new_temp = make_ssa_name (ptr_update, vec_stmt);
2217   TREE_OPERAND (vec_stmt, 0) = new_temp;
2218   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2219
2220   /* Create: data_ref_ptr = vect_ptr_init + update  */
2221   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2222   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2223   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2224   TREE_OPERAND (vec_stmt, 0) = new_temp;
2225   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2226   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2227
2228   return data_ref_ptr;
2229 }
2230
2231
2232 /* Function vect_create_destination_var.
2233
2234    Create a new temporary of type VECTYPE.  */
2235
2236 static tree
2237 vect_create_destination_var (tree scalar_dest, tree vectype)
2238 {
2239   tree vec_dest;
2240   const char *new_name;
2241
2242   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2243
2244   new_name = get_name (scalar_dest);
2245   if (!new_name)
2246     new_name = "var_";
2247   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2248   add_referenced_tmp_var (vec_dest);
2249
2250   return vec_dest;
2251 }
2252
2253
2254 /* Function vect_init_vector.
2255
2256    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2257    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2258    used in the vectorization of STMT.  */
2259
2260 static tree
2261 vect_init_vector (tree stmt, tree vector_var)
2262 {
2263   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2264   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2265   tree new_var;
2266   tree init_stmt;
2267   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2268   tree vec_oprnd;
2269   edge pe;
2270   tree new_temp;
2271   basic_block new_bb;
2272
2273   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2274   add_referenced_tmp_var (new_var);
2275
2276   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2277   new_temp = make_ssa_name (new_var, init_stmt);
2278   TREE_OPERAND (init_stmt, 0) = new_temp;
2279
2280   pe = loop_preheader_edge (loop);
2281   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2282   gcc_assert (!new_bb);
2283
2284   if (vect_debug_details (NULL))
2285     {
2286       fprintf (dump_file, "created new init_stmt: ");
2287       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2288     }
2289
2290   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2291   return vec_oprnd;
2292 }
2293
2294
2295 /* Function vect_get_vec_def_for_operand.
2296
2297    OP is an operand in STMT. This function returns a (vector) def that will be
2298    used in the vectorized stmt for STMT.
2299
2300    In the case that OP is an SSA_NAME which is defined in the loop, then
2301    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2302
2303    In case OP is an invariant or constant, a new stmt that creates a vector def
2304    needs to be introduced.  */
2305
2306 static tree
2307 vect_get_vec_def_for_operand (tree op, tree stmt)
2308 {
2309   tree vec_oprnd;
2310   tree vec_stmt;
2311   tree def_stmt;
2312   stmt_vec_info def_stmt_info = NULL;
2313   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2314   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2315   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2316   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2317   basic_block bb;
2318   tree vec_inv;
2319   tree t = NULL_TREE;
2320   tree def;
2321   int i;
2322
2323   if (vect_debug_details (NULL))
2324     {
2325       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2326       print_generic_expr (dump_file, op, TDF_SLIM);
2327     }
2328
2329   /** ===> Case 1: operand is a constant.  **/
2330
2331   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2332     {
2333       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2334
2335       tree vec_cst;
2336
2337       /* Build a tree with vector elements.  */
2338       if (vect_debug_details (NULL))
2339         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2340
2341       for (i = nunits - 1; i >= 0; --i)
2342         {
2343           t = tree_cons (NULL_TREE, op, t);
2344         }
2345       vec_cst = build_vector (vectype, t);
2346       return vect_init_vector (stmt, vec_cst);
2347     }
2348
2349   gcc_assert (TREE_CODE (op) == SSA_NAME);
2350
2351   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2352
2353   def_stmt = SSA_NAME_DEF_STMT (op);
2354   def_stmt_info = vinfo_for_stmt (def_stmt);
2355
2356   if (vect_debug_details (NULL))
2357     {
2358       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2359       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2360     }
2361
2362
2363   /** ==> Case 2.1: operand is defined inside the loop.  **/
2364
2365   if (def_stmt_info)
2366     {
2367       /* Get the def from the vectorized stmt.  */
2368
2369       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2370       gcc_assert (vec_stmt);
2371       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2372       return vec_oprnd;
2373     }
2374
2375
2376   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2377                     it is a reduction/induction.  **/
2378
2379   bb = bb_for_stmt (def_stmt);
2380   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2381     {
2382       if (vect_debug_details (NULL))
2383         fprintf (dump_file, "reduction/induction - unsupported.");
2384       internal_error ("no support for reduction/induction"); /* FORNOW */
2385     }
2386
2387
2388   /** ==> Case 2.3: operand is defined outside the loop -
2389                     it is a loop invariant.  */
2390
2391   switch (TREE_CODE (def_stmt))
2392     {
2393     case PHI_NODE:
2394       def = PHI_RESULT (def_stmt);
2395       break;
2396     case MODIFY_EXPR:
2397       def = TREE_OPERAND (def_stmt, 0);
2398       break;
2399     case NOP_EXPR:
2400       def = TREE_OPERAND (def_stmt, 0);
2401       gcc_assert (IS_EMPTY_STMT (def_stmt));
2402       def = op;
2403       break;
2404     default:
2405       if (vect_debug_details (NULL))
2406         {
2407           fprintf (dump_file, "unsupported defining stmt: ");
2408           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2409         }
2410       internal_error ("unsupported defining stmt");
2411     }
2412
2413   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2414
2415   if (vect_debug_details (NULL))
2416     fprintf (dump_file, "Create vector_inv.");
2417
2418   for (i = nunits - 1; i >= 0; --i)
2419     {
2420       t = tree_cons (NULL_TREE, def, t);
2421     }
2422
2423   vec_inv = build_constructor (vectype, t);
2424   return vect_init_vector (stmt, vec_inv);
2425 }
2426
2427
2428 /* Function vect_finish_stmt_generation.
2429
2430    Insert a new stmt.  */
2431
2432 static void
2433 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2434 {
2435   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2436
2437   if (vect_debug_details (NULL))
2438     {
2439       fprintf (dump_file, "add new stmt: ");
2440       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2441     }
2442
2443   /* Make sure bsi points to the stmt that is being vectorized.  */
2444
2445   /* Assumption: any stmts created for the vectorization of stmt S were
2446      inserted before S. BSI is expected to point to S or some new stmt before S.
2447    */
2448
2449   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2450     bsi_next (bsi);
2451   gcc_assert (stmt == bsi_stmt (*bsi));
2452 }
2453
2454
2455 /* Function vectorizable_assignment.
2456
2457    Check if STMT performs an assignment (copy) that can be vectorized.
2458    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2459    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2460    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2461
2462 static bool
2463 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2464 {
2465   tree vec_dest;
2466   tree scalar_dest;
2467   tree op;
2468   tree vec_oprnd;
2469   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2470   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2471   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2472   tree new_temp;
2473
2474   /* Is vectorizable assignment?  */
2475
2476   if (TREE_CODE (stmt) != MODIFY_EXPR)
2477     return false;
2478
2479   scalar_dest = TREE_OPERAND (stmt, 0);
2480   if (TREE_CODE (scalar_dest) != SSA_NAME)
2481     return false;
2482
2483   op = TREE_OPERAND (stmt, 1);
2484   if (!vect_is_simple_use (op, loop, NULL))
2485     {
2486       if (vect_debug_details (NULL))
2487         fprintf (dump_file, "use not simple.");
2488       return false;
2489     }
2490
2491   if (!vec_stmt) /* transformation not required.  */
2492     {
2493       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2494       return true;
2495     }
2496
2497   /** Trasform.  **/
2498   if (vect_debug_details (NULL))
2499     fprintf (dump_file, "transform assignment.");
2500
2501   /* Handle def.  */
2502   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2503
2504   /* Handle use.  */
2505   op = TREE_OPERAND (stmt, 1);
2506   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2507
2508   /* Arguments are ready. create the new vector stmt.  */
2509   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2510   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2511   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2512   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2513
2514   return true;
2515 }
2516
2517
2518 /* Function vectorizable_operation.
2519
2520    Check if STMT performs a binary or unary operation that can be vectorized.
2521    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2522    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2523    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2524
2525 static bool
2526 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2527 {
2528   tree vec_dest;
2529   tree scalar_dest;
2530   tree operation;
2531   tree op0, op1 = NULL;
2532   tree vec_oprnd0, vec_oprnd1=NULL;
2533   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2534   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2535   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2536   int i;
2537   enum tree_code code;
2538   enum machine_mode vec_mode;
2539   tree new_temp;
2540   int op_type;
2541   tree op;
2542   optab optab;
2543
2544   /* Is STMT a vectorizable binary/unary operation?   */
2545   if (TREE_CODE (stmt) != MODIFY_EXPR)
2546     return false;
2547
2548   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2549     return false;
2550
2551   operation = TREE_OPERAND (stmt, 1);
2552   code = TREE_CODE (operation);
2553   optab = optab_for_tree_code (code, vectype);
2554
2555   /* Support only unary or binary operations.  */
2556   op_type = TREE_CODE_LENGTH (code);
2557   if (op_type != unary_op && op_type != binary_op)
2558     {
2559       if (vect_debug_details (NULL))
2560         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2561       return false;
2562     }
2563
2564   for (i = 0; i < op_type; i++)
2565     {
2566       op = TREE_OPERAND (operation, i);
2567       if (!vect_is_simple_use (op, loop, NULL))
2568         {
2569           if (vect_debug_details (NULL))
2570             fprintf (dump_file, "use not simple.");
2571           return false;
2572         }
2573     }
2574
2575   /* Supportable by target?  */
2576   if (!optab)
2577     {
2578       if (vect_debug_details (NULL))
2579         fprintf (dump_file, "no optab.");
2580       return false;
2581     }
2582   vec_mode = TYPE_MODE (vectype);
2583   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2584     {
2585       if (vect_debug_details (NULL))
2586         fprintf (dump_file, "op not supported by target.");
2587       return false;
2588     }
2589
2590   if (!vec_stmt) /* transformation not required.  */
2591     {
2592       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2593       return true;
2594     }
2595
2596   /** Transform.  **/
2597
2598   if (vect_debug_details (NULL))
2599     fprintf (dump_file, "transform binary/unary operation.");
2600
2601   /* Handle def.  */
2602   scalar_dest = TREE_OPERAND (stmt, 0);
2603   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2604
2605   /* Handle uses.  */
2606   op0 = TREE_OPERAND (operation, 0);
2607   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2608
2609   if (op_type == binary_op)
2610     {
2611       op1 = TREE_OPERAND (operation, 1);
2612       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2613     }
2614
2615   /* Arguments are ready. create the new vector stmt.  */
2616
2617   if (op_type == binary_op)
2618     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2619                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2620   else
2621     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2622                 build1 (code, vectype, vec_oprnd0));
2623   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2624   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2625   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2626
2627   return true;
2628 }
2629
2630
2631 /* Function vectorizable_store.
2632
2633    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2634    can be vectorized.
2635    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2636    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2637    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2638
2639 static bool
2640 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2641 {
2642   tree scalar_dest;
2643   tree data_ref;
2644   tree op;
2645   tree vec_oprnd1;
2646   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2647   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2648   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2649   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2650   enum machine_mode vec_mode;
2651   tree dummy;
2652   enum dr_alignment_support alignment_support_cheme;
2653
2654   /* Is vectorizable store? */
2655
2656   if (TREE_CODE (stmt) != MODIFY_EXPR)
2657     return false;
2658
2659   scalar_dest = TREE_OPERAND (stmt, 0);
2660   if (TREE_CODE (scalar_dest) != ARRAY_REF
2661       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2662     return false;
2663
2664   op = TREE_OPERAND (stmt, 1);
2665   if (!vect_is_simple_use (op, loop, NULL))
2666     {
2667       if (vect_debug_details (NULL))
2668         fprintf (dump_file, "use not simple.");
2669       return false;
2670     }
2671
2672   vec_mode = TYPE_MODE (vectype);
2673   /* FORNOW. In some cases can vectorize even if data-type not supported
2674      (e.g. - array initialization with 0).  */
2675   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2676     return false;
2677
2678   if (!STMT_VINFO_DATA_REF (stmt_info))
2679     return false;
2680
2681
2682   if (!vec_stmt) /* transformation not required.  */
2683     {
2684       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2685       return true;
2686     }
2687
2688   /** Trasform.  **/
2689
2690   if (vect_debug_details (NULL))
2691     fprintf (dump_file, "transform store");
2692
2693   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2694   gcc_assert (alignment_support_cheme);
2695   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2696
2697   /* Handle use - get the vectorized def from the defining stmt.  */
2698   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2699
2700   /* Handle def.  */
2701   /* FORNOW: make sure the data reference is aligned.  */
2702   vect_align_data_ref (stmt);
2703   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2704   data_ref = build_fold_indirect_ref (data_ref);
2705
2706   /* Arguments are ready. create the new vector stmt.  */
2707   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2708   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2709
2710   return true;
2711 }
2712
2713
2714 /* vectorizable_load.
2715
2716    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2717    can be vectorized.
2718    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2719    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2720    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2721
2722 static bool
2723 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2724 {
2725   tree scalar_dest;
2726   tree vec_dest = NULL;
2727   tree data_ref = NULL;
2728   tree op;
2729   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2730   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2731   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2732   tree new_temp;
2733   int mode;
2734   tree init_addr;
2735   tree new_stmt;
2736   tree dummy;
2737   basic_block new_bb;
2738   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2739   edge pe = loop_preheader_edge (loop);
2740   enum dr_alignment_support alignment_support_cheme;
2741
2742   /* Is vectorizable load? */
2743
2744   if (TREE_CODE (stmt) != MODIFY_EXPR)
2745     return false;
2746
2747   scalar_dest = TREE_OPERAND (stmt, 0);
2748   if (TREE_CODE (scalar_dest) != SSA_NAME)
2749     return false;
2750
2751   op = TREE_OPERAND (stmt, 1);
2752   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2753     return false;
2754
2755   if (!STMT_VINFO_DATA_REF (stmt_info))
2756     return false;
2757
2758   mode = (int) TYPE_MODE (vectype);
2759
2760   /* FORNOW. In some cases can vectorize even if data-type not supported
2761     (e.g. - data copies).  */
2762   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2763     {
2764       if (vect_debug_details (loop))
2765         fprintf (dump_file, "Aligned load, but unsupported type.");
2766       return false;
2767     }
2768
2769   if (!vec_stmt) /* transformation not required.  */
2770     {
2771       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2772       return true;
2773     }
2774
2775   /** Trasform.  **/
2776
2777   if (vect_debug_details (NULL))
2778     fprintf (dump_file, "transform load.");
2779
2780   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2781   gcc_assert (alignment_support_cheme);
2782
2783   if (alignment_support_cheme == dr_aligned
2784       || alignment_support_cheme == dr_unaligned_supported)
2785     {
2786       /* Create:
2787          p = initial_addr;
2788          indx = 0;
2789          loop {
2790            vec_dest = *(p);
2791            indx = indx + 1;
2792          }
2793       */
2794
2795       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2796       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2797       if (aligned_access_p (dr))
2798         data_ref = build_fold_indirect_ref (data_ref);
2799       else
2800         {
2801           int mis = DR_MISALIGNMENT (dr);
2802           tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2803           tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2804           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2805         }
2806       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2807       new_temp = make_ssa_name (vec_dest, new_stmt);
2808       TREE_OPERAND (new_stmt, 0) = new_temp;
2809       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2810     }
2811   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2812     {
2813       /* Create:
2814          p1 = initial_addr;
2815          msq_init = *(floor(p1))
2816          p2 = initial_addr + VS - 1;
2817          magic = have_builtin ? builtin_result : initial_address;
2818          indx = 0;
2819          loop {
2820            p2' = p2 + indx * vectype_size
2821            lsq = *(floor(p2'))
2822            vec_dest = realign_load (msq, lsq, magic)
2823            indx = indx + 1;
2824            msq = lsq;
2825          }
2826       */
2827
2828       tree offset;
2829       tree magic;
2830       tree phi_stmt;
2831       tree msq_init;
2832       tree msq, lsq;
2833       tree dataref_ptr;
2834       tree params;
2835
2836       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2837       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2838       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2839                                            &init_addr, true);
2840       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2841       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2842       new_temp = make_ssa_name (vec_dest, new_stmt);
2843       TREE_OPERAND (new_stmt, 0) = new_temp;
2844       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2845       gcc_assert (!new_bb);
2846       msq_init = TREE_OPERAND (new_stmt, 0);
2847
2848
2849       /* <2> Create lsq = *(floor(p2')) in the loop  */
2850       offset = build_int_cst (integer_type_node,
2851                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2852       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2853       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2854       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2855       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2856       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2857       new_temp = make_ssa_name (vec_dest, new_stmt);
2858       TREE_OPERAND (new_stmt, 0) = new_temp;
2859       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2860       lsq = TREE_OPERAND (new_stmt, 0);
2861
2862
2863       /* <3> */
2864       if (targetm.vectorize.builtin_mask_for_load)
2865         {
2866           /* Create permutation mask, if required, in loop preheader.  */
2867           tree builtin_decl;
2868           params = build_tree_list (NULL_TREE, init_addr);
2869           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2870           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2871           new_stmt = build_function_call_expr (builtin_decl, params);
2872           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2873           new_temp = make_ssa_name (vec_dest, new_stmt);
2874           TREE_OPERAND (new_stmt, 0) = new_temp;
2875           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2876           gcc_assert (!new_bb);
2877           magic = TREE_OPERAND (new_stmt, 0);
2878
2879           /* Since we have just created a CALL_EXPR, we may need to
2880              rename call-clobbered variables.  */
2881           mark_call_clobbered_vars_to_rename ();
2882         }
2883       else
2884         {
2885           /* Use current address instead of init_addr for reduced reg pressure.
2886            */
2887           magic = dataref_ptr;
2888         }
2889
2890
2891       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2892       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2893       msq = make_ssa_name (vec_dest, NULL_TREE);
2894       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2895       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2896       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2897       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2898
2899
2900       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2901       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2902       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2903       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2904       new_temp = make_ssa_name (vec_dest, new_stmt);
2905       TREE_OPERAND (new_stmt, 0) = new_temp;
2906       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2907     }
2908   else
2909     gcc_unreachable ();
2910
2911   *vec_stmt = new_stmt;
2912   return true;
2913 }
2914
2915
2916 /* Function vect_supportable_dr_alignment
2917
2918    Return whether the data reference DR is supported with respect to its
2919    alignment.  */
2920
2921 static enum dr_alignment_support
2922 vect_supportable_dr_alignment (struct data_reference *dr)
2923 {
2924   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2925   enum machine_mode mode = (int) TYPE_MODE (vectype);
2926
2927   if (aligned_access_p (dr))
2928     return dr_aligned;
2929
2930   /* Possibly unaligned access.  */
2931
2932   if (DR_IS_READ (dr))
2933     {
2934       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2935           && (!targetm.vectorize.builtin_mask_for_load
2936               || targetm.vectorize.builtin_mask_for_load ()))
2937         return dr_unaligned_software_pipeline;
2938
2939       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2940         /* Can't software pipeline the loads, but can at least do them.  */
2941         return dr_unaligned_supported;
2942     }
2943
2944   /* Unsupported.  */
2945   return dr_unaligned_unsupported;
2946 }
2947
2948
2949 /* Function vect_transform_stmt.
2950
2951    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2952
2953 static bool
2954 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2955 {
2956   bool is_store = false;
2957   tree vec_stmt = NULL_TREE;
2958   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2959   bool done;
2960
2961   switch (STMT_VINFO_TYPE (stmt_info))
2962     {
2963     case op_vec_info_type:
2964       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2965       gcc_assert (done);
2966       break;
2967
2968     case assignment_vec_info_type:
2969       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2970       gcc_assert (done);
2971       break;
2972
2973     case load_vec_info_type:
2974       done = vectorizable_load (stmt, bsi, &vec_stmt);
2975       gcc_assert (done);
2976       break;
2977
2978     case store_vec_info_type:
2979       done = vectorizable_store (stmt, bsi, &vec_stmt);
2980       gcc_assert (done);
2981       is_store = true;
2982       break;
2983     default:
2984       if (vect_debug_details (NULL))
2985         fprintf (dump_file, "stmt not supported.");
2986       gcc_unreachable ();
2987     }
2988
2989   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2990
2991   return is_store;
2992 }
2993
2994
2995 /* This function builds ni_name = number of iterations loop executes
2996    on the loop preheader.  */
2997
2998 static tree
2999 vect_build_loop_niters (loop_vec_info loop_vinfo)
3000 {
3001   tree ni_name, stmt, var;
3002   edge pe;
3003   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3004   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3005
3006   var = create_tmp_var (TREE_TYPE (ni), "niters");
3007   add_referenced_tmp_var (var);
3008   ni_name = force_gimple_operand (ni, &stmt, false, var);
3009
3010   pe = loop_preheader_edge (loop);
3011   if (stmt)
3012     {
3013       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3014       gcc_assert (!new_bb);
3015     }
3016
3017   return ni_name;
3018 }
3019
3020
3021 /* This function generates the following statements:
3022
3023  ni_name = number of iterations loop executes
3024  ratio = ni_name / vf
3025  ratio_mult_vf_name = ratio * vf
3026
3027  and places them at the loop preheader edge.  */
3028
3029 static void
3030 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3031                                  tree *ni_name_ptr,
3032                                  tree *ratio_mult_vf_name_ptr,
3033                                  tree *ratio_name_ptr)
3034 {
3035
3036   edge pe;
3037   basic_block new_bb;
3038   tree stmt, ni_name;
3039   tree var;
3040   tree ratio_name;
3041   tree ratio_mult_vf_name;
3042   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3043   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3044   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3045   tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3046
3047   pe = loop_preheader_edge (loop);
3048
3049   /* Generate temporary variable that contains
3050      number of iterations loop executes.  */
3051
3052   ni_name = vect_build_loop_niters (loop_vinfo);
3053
3054   /* Create: ratio = ni >> log2(vf) */
3055
3056   var = create_tmp_var (TREE_TYPE (ni), "bnd");
3057   add_referenced_tmp_var (var);
3058   ratio_name = make_ssa_name (var, NULL_TREE);
3059   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3060            build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3061   SSA_NAME_DEF_STMT (ratio_name) = stmt;
3062
3063   pe = loop_preheader_edge (loop);
3064   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3065   gcc_assert (!new_bb);
3066
3067   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
3068
3069   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3070   add_referenced_tmp_var (var);
3071   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3072   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3073            build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3074   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3075
3076   pe = loop_preheader_edge (loop);
3077   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3078   gcc_assert (!new_bb);
3079
3080   *ni_name_ptr = ni_name;
3081   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3082   *ratio_name_ptr = ratio_name;
3083
3084   return;
3085 }
3086
3087
3088 /*   Function vect_update_ivs_after_vectorizer.
3089
3090      "Advance" the induction variables of LOOP to the value they should take
3091      after the execution of LOOP.  This is currently necessary because the
3092      vectorizer does not handle induction variables that are used after the
3093      loop.  Such a situation occurs when the last iterations of LOOP are
3094      peeled, because:
3095      1. We introduced new uses after LOOP for IVs that were not originally used
3096         after LOOP: the IVs of LOOP are now used by an epilog loop.
3097      2. LOOP is going to be vectorized; this means that it will iterate N/VF
3098         times, whereas the loop IVs should be bumped N times.
3099
3100      Input:
3101      - LOOP - a loop that is going to be vectorized. The last few iterations
3102               of LOOP were peeled.
3103      - NITERS - the number of iterations that LOOP executes (before it is
3104                 vectorized). i.e, the number of times the ivs should be bumped.
3105      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3106                   coming out from LOOP on which there are uses of the LOOP ivs
3107                   (this is the path from LOOP->exit to epilog_loop->preheader).
3108
3109                   The new definitions of the ivs are placed in LOOP->exit.
3110                   The phi args associated with the edge UPDATE_E in the bb
3111                   UPDATE_E->dest are updated accordingly.
3112
3113      Assumption 1: Like the rest of the vectorizer, this function assumes
3114      a single loop exit that has a single predecessor.
3115
3116      Assumption 2: The phi nodes in the LOOP header and in update_bb are
3117      organized in the same order.
3118
3119      Assumption 3: The access function of the ivs is simple enough (see
3120      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
3121
3122      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3123      coming out of LOOP on which the ivs of LOOP are used (this is the path
3124      that leads to the epilog loop; other paths skip the epilog loop).  This
3125      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3126      needs to have its phis updated.
3127  */
3128
3129 static void
3130 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3131 {
3132   basic_block exit_bb = loop->exit_edges[0]->dest;
3133   tree phi, phi1;
3134   basic_block update_bb = update_e->dest;
3135
3136   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3137
3138   /* Make sure there exists a single-predecessor exit bb:  */
3139   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3140
3141   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3142        phi && phi1;
3143        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3144     {
3145       tree access_fn = NULL;
3146       tree evolution_part;
3147       tree init_expr;
3148       tree step_expr;
3149       tree var, stmt, ni, ni_name;
3150       block_stmt_iterator last_bsi;
3151
3152       /* Skip virtual phi's.  */
3153       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3154         {
3155           if (vect_debug_details (NULL))
3156             fprintf (dump_file, "virtual phi. skip.");
3157           continue;
3158         }
3159
3160       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3161       gcc_assert (access_fn);
3162       evolution_part =
3163          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3164       gcc_assert (evolution_part != NULL_TREE);
3165
3166       /* FORNOW: We do not support IVs whose evolution function is a polynomial
3167          of degree >= 2 or exponential.  */
3168       gcc_assert (!tree_is_chrec (evolution_part));
3169
3170       step_expr = evolution_part;
3171       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3172                                                                loop->num));
3173
3174       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3175                   build2 (MULT_EXPR, TREE_TYPE (niters),
3176                        niters, step_expr), init_expr);
3177
3178       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3179       add_referenced_tmp_var (var);
3180
3181       ni_name = force_gimple_operand (ni, &stmt, false, var);
3182
3183       /* Insert stmt into exit_bb.  */
3184       last_bsi = bsi_last (exit_bb);
3185       if (stmt)
3186         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3187
3188       /* Fix phi expressions in the successor bb.  */
3189       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3190                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3191       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3192     }
3193 }
3194
3195
3196 /* Function vect_do_peeling_for_loop_bound
3197
3198    Peel the last iterations of the loop represented by LOOP_VINFO.
3199    The peeled iterations form a new epilog loop.  Given that the loop now
3200    iterates NITERS times, the new epilog loop iterates
3201    NITERS % VECTORIZATION_FACTOR times.
3202
3203    The original loop will later be made to iterate
3204    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
3205
3206 static void
3207 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3208                                 struct loops *loops)
3209 {
3210
3211   tree ni_name, ratio_mult_vf_name;
3212   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3213   struct loop *new_loop;
3214   edge update_e;
3215 #ifdef ENABLE_CHECKING
3216   int loop_num;
3217 #endif
3218
3219   if (vect_debug_details (NULL))
3220     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3221
3222   /* Generate the following variables on the preheader of original loop:
3223
3224      ni_name = number of iteration the original loop executes
3225      ratio = ni_name / vf
3226      ratio_mult_vf_name = ratio * vf  */
3227   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3228                                    &ratio_mult_vf_name, ratio);
3229
3230   /* Update loop info.  */
3231   loop->pre_header = loop_preheader_edge (loop)->src;
3232   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3233
3234 #ifdef ENABLE_CHECKING
3235   loop_num  = loop->num;
3236 #endif
3237   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3238                                             ratio_mult_vf_name, ni_name, false);
3239 #ifdef ENABLE_CHECKING
3240   gcc_assert (new_loop);
3241   gcc_assert (loop_num == loop->num);
3242   slpeel_verify_cfg_after_peeling (loop, new_loop);
3243 #endif
3244
3245   /* A guard that controls whether the new_loop is to be executed or skipped
3246      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3247      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3248      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3249      is on the path where the LOOP IVs are used and need to be updated.  */
3250
3251   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3252     update_e = EDGE_PRED (new_loop->pre_header, 0);
3253   else
3254     update_e = EDGE_PRED (new_loop->pre_header, 1);
3255
3256   /* Update IVs of original loop as if they were advanced
3257      by ratio_mult_vf_name steps.  */
3258   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3259
3260   /* After peeling we have to reset scalar evolution analyzer.  */
3261   scev_reset ();
3262
3263   return;
3264 }
3265
3266
3267 /* Function vect_gen_niters_for_prolog_loop
3268
3269    Set the number of iterations for the loop represented by LOOP_VINFO
3270    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3271    and the misalignment of DR - the first data reference recorded in
3272    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3273    this loop, the data reference DR will refer to an aligned location.
3274
3275    The following computation is generated:
3276
3277    compute address misalignment in bytes:
3278    addr_mis = addr & (vectype_size - 1)
3279
3280    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3281
3282    (elem_size = element type size; an element is the scalar element
3283         whose type is the inner type of the vectype)  */
3284
3285 static tree
3286 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3287 {
3288   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3289   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3290   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3291   tree var, stmt;
3292   tree iters, iters_name;
3293   edge pe;
3294   basic_block new_bb;
3295   tree dr_stmt = DR_STMT (dr);
3296   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3297   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3298   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3299   tree elem_misalign;
3300   tree byte_misalign;
3301   tree new_stmts = NULL_TREE;
3302   tree start_addr =
3303         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3304   tree ptr_type = TREE_TYPE (start_addr);
3305   tree size = TYPE_SIZE (ptr_type);
3306   tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3307   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3308   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3309   tree niters_type = TREE_TYPE (loop_niters);
3310   tree elem_size_log =
3311         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3312   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3313
3314   pe = loop_preheader_edge (loop);
3315   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3316   gcc_assert (!new_bb);
3317
3318   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3319   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3320
3321   /* Create:  elem_misalign = byte_misalign / element_size  */
3322   elem_misalign =
3323         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3324
3325   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3326   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3327   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3328   iters = fold_convert (niters_type, iters);
3329
3330   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3331   /* If the loop bound is known at compile time we already verified that it is
3332      greater than vf; since the misalignment ('iters') is at most vf, there's
3333      no need to generate the MIN_EXPR in this case.  */
3334   if (TREE_CODE (loop_niters) != INTEGER_CST)
3335     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3336
3337   var = create_tmp_var (niters_type, "prolog_loop_niters");
3338   add_referenced_tmp_var (var);
3339   iters_name = force_gimple_operand (iters, &stmt, false, var);
3340
3341   /* Insert stmt on loop preheader edge.  */
3342   pe = loop_preheader_edge (loop);
3343   if (stmt)
3344     {
3345       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3346       gcc_assert (!new_bb);
3347     }
3348
3349   return iters_name;
3350 }
3351
3352
3353 /* Function vect_update_inits_of_dr
3354
3355    NITERS iterations were peeled from LOOP.  DR represents a data reference
3356    in LOOP.  This function updates the information recorded in DR to
3357    account for the fact that the first NITERS iterations had already been
3358    executed.  Specifically, it updates the OFFSET field of stmt_info.  */
3359
3360 static void
3361 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3362 {
3363   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3364   tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3365
3366   niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3367                          STMT_VINFO_VECT_STEP (stmt_info)));
3368   offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3369   STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3370 }
3371
3372
3373 /* Function vect_update_inits_of_drs
3374
3375    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3376    This function updates the information recorded for the data references in
3377    the loop to account for the fact that the first NITERS iterations had
3378    already been executed.  Specifically, it updates the initial_condition of the
3379    access_function of all the data_references in the loop.  */
3380
3381 static void
3382 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3383 {
3384   unsigned int i;
3385   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3386   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3387
3388   if (dump_file && (dump_flags & TDF_DETAILS))
3389     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3390
3391   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3392     {
3393       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3394       vect_update_inits_of_dr (dr, niters);
3395     }
3396
3397   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3398     {
3399       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3400       vect_update_inits_of_dr (dr, niters);
3401     }
3402 }
3403
3404
3405 /* Function vect_do_peeling_for_alignment
3406
3407    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3408    'niters' is set to the misalignment of one of the data references in the
3409    loop, thereby forcing it to refer to an aligned location at the beginning
3410    of the execution of this loop.  The data reference for which we are
3411    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3412
3413 static void
3414 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3415 {
3416   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3417   tree niters_of_prolog_loop, ni_name;
3418   tree n_iters;
3419   struct loop *new_loop;
3420
3421   if (vect_debug_details (NULL))
3422     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3423
3424   ni_name = vect_build_loop_niters (loop_vinfo);
3425   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3426
3427   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3428   new_loop =
3429         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3430                                        niters_of_prolog_loop, ni_name, true);
3431 #ifdef ENABLE_CHECKING
3432   gcc_assert (new_loop);
3433   slpeel_verify_cfg_after_peeling (new_loop, loop);
3434 #endif
3435
3436   /* Update number of times loop executes.  */
3437   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3438   LOOP_VINFO_NITERS (loop_vinfo) =
3439     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3440
3441   /* Update the init conditions of the access functions of all data refs.  */
3442   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3443
3444   /* After peeling we have to reset scalar evolution analyzer.  */
3445   scev_reset ();
3446
3447   return;
3448 }
3449
3450
3451 /* Function vect_transform_loop.
3452
3453    The analysis phase has determined that the loop is vectorizable.
3454    Vectorize the loop - created vectorized stmts to replace the scalar
3455    stmts in the loop, and update the loop exit condition.  */
3456
3457 static void
3458 vect_transform_loop (loop_vec_info loop_vinfo,
3459                      struct loops *loops ATTRIBUTE_UNUSED)
3460 {
3461   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3462   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3463   int nbbs = loop->num_nodes;
3464   block_stmt_iterator si;
3465   int i;
3466   tree ratio = NULL;
3467   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3468
3469   if (vect_debug_details (NULL))
3470     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3471
3472
3473   /* Peel the loop if there are data refs with unknown alignment.
3474      Only one data ref with unknown store is allowed.  */
3475
3476   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3477     vect_do_peeling_for_alignment (loop_vinfo, loops);
3478
3479   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3480      compile time constant), or it is a constant that doesn't divide by the
3481      vectorization factor, then an epilog loop needs to be created.
3482      We therefore duplicate the loop: the original loop will be vectorized,
3483      and will compute the first (n/VF) iterations. The second copy of the loop
3484      will remain scalar and will compute the remaining (n%VF) iterations.
3485      (VF is the vectorization factor).  */
3486
3487   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3488       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3489           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3490     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3491   else
3492     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3493                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3494
3495   /* 1) Make sure the loop header has exactly two entries
3496      2) Make sure we have a preheader basic block.  */
3497
3498   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3499
3500   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3501
3502
3503   /* FORNOW: the vectorizer supports only loops which body consist
3504      of one basic block (header + empty latch). When the vectorizer will
3505      support more involved loop forms, the order by which the BBs are
3506      traversed need to be reconsidered.  */
3507
3508   for (i = 0; i < nbbs; i++)
3509     {
3510       basic_block bb = bbs[i];
3511
3512       for (si = bsi_start (bb); !bsi_end_p (si);)
3513         {
3514           tree stmt = bsi_stmt (si);
3515           stmt_vec_info stmt_info;
3516           bool is_store;
3517
3518           if (vect_debug_details (NULL))
3519             {
3520               fprintf (dump_file, "------>vectorizing statement: ");
3521               print_generic_expr (dump_file, stmt, TDF_SLIM);
3522             }
3523           stmt_info = vinfo_for_stmt (stmt);
3524           gcc_assert (stmt_info);
3525           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3526             {
3527               bsi_next (&si);
3528               continue;
3529             }
3530 #ifdef ENABLE_CHECKING
3531           /* FORNOW: Verify that all stmts operate on the same number of
3532                      units and no inner unrolling is necessary.  */
3533           gcc_assert
3534                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3535                  == vectorization_factor);
3536 #endif
3537           /* -------- vectorize statement ------------ */
3538           if (vect_debug_details (NULL))
3539             fprintf (dump_file, "transform statement.");
3540
3541           is_store = vect_transform_stmt (stmt, &si);
3542           if (is_store)
3543             {
3544               /* free the attached stmt_vec_info and remove the stmt.  */
3545               stmt_ann_t ann = stmt_ann (stmt);
3546               free (stmt_info);
3547               set_stmt_info (ann, NULL);
3548               bsi_remove (&si);
3549               continue;
3550             }
3551
3552           bsi_next (&si);
3553         }                       /* stmts in BB */
3554     }                           /* BBs in loop */
3555
3556   slpeel_make_loop_iterate_ntimes (loop, ratio);
3557
3558   if (vect_debug_details (loop))
3559     fprintf (dump_file,"Success! loop vectorized.");
3560   if (vect_debug_stats (loop))
3561     fprintf (dump_file, "LOOP VECTORIZED.");
3562 }
3563
3564
3565 /* Function vect_is_simple_use.
3566
3567    Input:
3568    LOOP - the loop that is being vectorized.
3569    OPERAND - operand of a stmt in LOOP.
3570    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3571
3572    Returns whether a stmt with OPERAND can be vectorized.
3573    Supportable operands are constants, loop invariants, and operands that are
3574    defined by the current iteration of the loop. Unsupportable operands are
3575    those that are defined by a previous iteration of the loop (as is the case
3576    in reduction/induction computations).  */
3577
3578 static bool
3579 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3580 {
3581   tree def_stmt;
3582   basic_block bb;
3583
3584   if (def)
3585     *def = NULL_TREE;
3586
3587   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3588     return true;
3589
3590   if (TREE_CODE (operand) != SSA_NAME)
3591     return false;
3592
3593   def_stmt = SSA_NAME_DEF_STMT (operand);
3594   if (def_stmt == NULL_TREE )
3595     {
3596       if (vect_debug_details (NULL))
3597         fprintf (dump_file, "no def_stmt.");
3598       return false;
3599     }
3600
3601   /* empty stmt is expected only in case of a function argument.
3602      (Otherwise - we expect a phi_node or a modify_expr).  */
3603   if (IS_EMPTY_STMT (def_stmt))
3604     {
3605       tree arg = TREE_OPERAND (def_stmt, 0);
3606       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3607         return true;
3608       if (vect_debug_details (NULL))
3609         {
3610           fprintf (dump_file, "Unexpected empty stmt: ");
3611           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3612         }
3613       return false;
3614     }
3615
3616   /* phi_node inside the loop indicates an induction/reduction pattern.
3617      This is not supported yet.  */
3618   bb = bb_for_stmt (def_stmt);
3619   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3620     {
3621       if (vect_debug_details (NULL))
3622         fprintf (dump_file, "reduction/induction - unsupported.");
3623       return false; /* FORNOW: not supported yet.  */
3624     }
3625
3626   /* Expecting a modify_expr or a phi_node.  */
3627   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3628       || TREE_CODE (def_stmt) == PHI_NODE)
3629     {
3630       if (def)
3631         *def = def_stmt;
3632       return true;
3633     }
3634
3635   return false;
3636 }
3637
3638
3639 /* Function vect_analyze_operations.
3640
3641    Scan the loop stmts and make sure they are all vectorizable.  */
3642
3643 static bool
3644 vect_analyze_operations (loop_vec_info loop_vinfo)
3645 {
3646   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3647   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3648   int nbbs = loop->num_nodes;
3649   block_stmt_iterator si;
3650   unsigned int vectorization_factor = 0;
3651   int i;
3652   bool ok;
3653   tree scalar_type;
3654
3655   if (vect_debug_details (NULL))
3656     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3657
3658   for (i = 0; i < nbbs; i++)
3659     {
3660       basic_block bb = bbs[i];
3661
3662       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3663         {
3664           tree stmt = bsi_stmt (si);
3665           unsigned int nunits;
3666           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3667           tree vectype;
3668
3669           if (vect_debug_details (NULL))
3670             {
3671               fprintf (dump_file, "==> examining statement: ");
3672               print_generic_expr (dump_file, stmt, TDF_SLIM);
3673             }
3674
3675           gcc_assert (stmt_info);
3676
3677           /* skip stmts which do not need to be vectorized.
3678              this is expected to include:
3679              - the COND_EXPR which is the loop exit condition
3680              - any LABEL_EXPRs in the loop
3681              - computations that are used only for array indexing or loop
3682              control  */
3683
3684           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3685             {
3686               if (vect_debug_details (NULL))
3687                 fprintf (dump_file, "irrelevant.");
3688               continue;
3689             }
3690
3691           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3692             {
3693               if (vect_debug_stats (loop) || vect_debug_details (loop))
3694                 {
3695                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3696                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3697                 }
3698               return false;
3699             }
3700
3701           if (STMT_VINFO_DATA_REF (stmt_info))
3702             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3703           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3704             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3705           else
3706             scalar_type = TREE_TYPE (stmt);
3707
3708           if (vect_debug_details (NULL))
3709             {
3710               fprintf (dump_file, "get vectype for scalar type:  ");
3711               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3712             }
3713
3714           vectype = get_vectype_for_scalar_type (scalar_type);
3715           if (!vectype)
3716             {
3717               if (vect_debug_stats (loop) || vect_debug_details (loop))
3718                 {
3719                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3720                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3721                 }
3722               return false;
3723             }
3724
3725           if (vect_debug_details (NULL))
3726             {
3727               fprintf (dump_file, "vectype: ");
3728               print_generic_expr (dump_file, vectype, TDF_SLIM);
3729             }
3730           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3731
3732           ok = (vectorizable_operation (stmt, NULL, NULL)
3733                 || vectorizable_assignment (stmt, NULL, NULL)
3734                 || vectorizable_load (stmt, NULL, NULL)
3735                 || vectorizable_store (stmt, NULL, NULL));
3736
3737           if (!ok)
3738             {
3739               if (vect_debug_stats (loop) || vect_debug_details (loop))
3740                 {
3741                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3742                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3743                 }
3744               return false;
3745             }
3746
3747           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3748           if (vect_debug_details (NULL))
3749             fprintf (dump_file, "nunits = %d", nunits);
3750
3751           if (vectorization_factor)
3752             {
3753               /* FORNOW: don't allow mixed units.
3754                  This restriction will be relaxed in the future.  */
3755               if (nunits != vectorization_factor)
3756                 {
3757                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3758                     fprintf (dump_file, "not vectorized: mixed data-types");
3759                   return false;
3760                 }
3761             }
3762           else
3763             vectorization_factor = nunits;
3764
3765 #ifdef ENABLE_CHECKING
3766           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3767                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3768 #endif
3769         }
3770     }
3771
3772   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3773
3774   if (vectorization_factor <= 1)
3775     {
3776       if (vect_debug_stats (loop) || vect_debug_details (loop))
3777         fprintf (dump_file, "not vectorized: unsupported data-type");
3778       return false;
3779     }
3780   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3781
3782   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3783     fprintf (dump_file,
3784         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3785         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3786
3787   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3788       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3789     {
3790       if (vect_debug_stats (loop) || vect_debug_details (loop))
3791         fprintf (dump_file, "not vectorized: iteration count too small.");
3792       return false;
3793     }
3794
3795   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3796       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3797     {
3798       if (vect_debug_stats (loop) || vect_debug_details (loop))
3799         fprintf (dump_file, "epilog loop required.");
3800       if (!vect_can_advance_ivs_p (loop))
3801         {
3802           if (vect_debug_stats (loop) || vect_debug_details (loop))
3803             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3804           return false;
3805         }
3806       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3807         {
3808           if (vect_debug_stats (loop) || vect_debug_details (loop))
3809             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3810           return false;
3811         }
3812     }
3813
3814   return true;
3815 }
3816
3817
3818 /* Function exist_non_indexing_operands_for_use_p
3819
3820    USE is one of the uses attached to STMT. Check if USE is
3821    used in STMT for anything other than indexing an array.  */
3822
3823 static bool
3824 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3825 {
3826   tree operand;
3827   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3828
3829   /* USE corresponds to some operand in STMT. If there is no data
3830      reference in STMT, then any operand that corresponds to USE
3831      is not indexing an array.  */
3832   if (!STMT_VINFO_DATA_REF (stmt_info))
3833     return true;
3834
3835   /* STMT has a data_ref. FORNOW this means that its of one of
3836      the following forms:
3837      -1- ARRAY_REF = var
3838      -2- var = ARRAY_REF
3839      (This should have been verified in analyze_data_refs).
3840
3841      'var' in the second case corresponds to a def, not a use,
3842      so USE cannot correspond to any operands that are not used
3843      for array indexing.
3844
3845      Therefore, all we need to check is if STMT falls into the
3846      first case, and whether var corresponds to USE.  */
3847
3848   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3849     return false;
3850
3851   operand = TREE_OPERAND (stmt, 1);
3852
3853   if (TREE_CODE (operand) != SSA_NAME)
3854     return false;
3855
3856   if (operand == use)
3857     return true;
3858
3859   return false;
3860 }
3861
3862
3863 /* Function vect_is_simple_iv_evolution.
3864
3865    FORNOW: A simple evolution of an induction variables in the loop is
3866    considered a polynomial evolution with constant step.  */
3867
3868 static bool
3869 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3870                                 tree * step, bool strict)
3871 {
3872   tree init_expr;
3873   tree step_expr;
3874
3875   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3876
3877   /* When there is no evolution in this loop, the evolution function
3878      is not "simple".  */
3879   if (evolution_part == NULL_TREE)
3880     return false;
3881
3882   /* When the evolution is a polynomial of degree >= 2
3883      the evolution function is not "simple".  */
3884   if (tree_is_chrec (evolution_part))
3885     return false;
3886
3887   step_expr = evolution_part;
3888   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
3889
3890   if (vect_debug_details (NULL))
3891     {
3892       fprintf (dump_file, "step: ");
3893       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3894       fprintf (dump_file, ",  init: ");
3895       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3896     }
3897
3898   *init = init_expr;
3899   *step = step_expr;
3900
3901   if (TREE_CODE (step_expr) != INTEGER_CST)
3902     {
3903       if (vect_debug_details (NULL))
3904         fprintf (dump_file, "step unknown.");
3905       return false;
3906     }
3907
3908   if (strict)
3909     if (!integer_onep (step_expr))
3910       {
3911         if (vect_debug_details (NULL))
3912           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3913         return false;
3914       }
3915
3916   return true;
3917 }
3918
3919
3920 /* Function vect_analyze_scalar_cycles.
3921
3922    Examine the cross iteration def-use cycles of scalar variables, by
3923    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3924    cycles that they represent do not impede vectorization.
3925
3926    FORNOW: Reduction as in the following loop, is not supported yet:
3927               loop1:
3928               for (i=0; i<N; i++)
3929                  sum += a[i];
3930            The cross-iteration cycle corresponding to variable 'sum' will be
3931            considered too complicated and will impede vectorization.
3932
3933    FORNOW: Induction as in the following loop, is not supported yet:
3934               loop2:
3935               for (i=0; i<N; i++)
3936                  a[i] = i;
3937
3938            However, the following loop *is* vectorizable:
3939               loop3:
3940               for (i=0; i<N; i++)
3941                  a[i] = b[i];
3942
3943            In both loops there exists a def-use cycle for the variable i:
3944               loop: i_2 = PHI (i_0, i_1)
3945                     a[i_2] = ...;
3946                     i_1 = i_2 + 1;
3947                     GOTO loop;
3948
3949            The evolution of the above cycle is considered simple enough,
3950            however, we also check that the cycle does not need to be
3951            vectorized, i.e - we check that the variable that this cycle
3952            defines is only used for array indexing or in stmts that do not
3953            need to be vectorized. This is not the case in loop2, but it
3954            *is* the case in loop3.  */
3955
3956 static bool
3957 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3958 {
3959   tree phi;
3960   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3961   basic_block bb = loop->header;
3962   tree dummy;
3963
3964   if (vect_debug_details (NULL))
3965     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3966
3967   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3968     {
3969       tree access_fn = NULL;
3970
3971       if (vect_debug_details (NULL))
3972         {
3973           fprintf (dump_file, "Analyze phi: ");
3974           print_generic_expr (dump_file, phi, TDF_SLIM);
3975         }
3976
3977       /* Skip virtual phi's. The data dependences that are associated with
3978          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3979
3980       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3981         {
3982           if (vect_debug_details (NULL))
3983             fprintf (dump_file, "virtual phi. skip.");
3984           continue;
3985         }
3986
3987       /* Analyze the evolution function.  */
3988
3989       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3990          those of loop induction variables; This property is verified here.
3991
3992          Furthermore, if that induction variable is used in an operation
3993          that needs to be vectorized (i.e, is not solely used to index
3994          arrays and check the exit condition) - we do not support its
3995          vectorization yet. This property is verified in vect_is_simple_use,
3996          during vect_analyze_operations.  */
3997
3998       access_fn = /* instantiate_parameters
3999                      (loop,*/
4000          analyze_scalar_evolution (loop, PHI_RESULT (phi));
4001
4002       if (!access_fn)
4003         {
4004           if (vect_debug_stats (loop) || vect_debug_details (loop))
4005             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4006           return false;
4007         }
4008
4009       if (vect_debug_details (NULL))
4010         {
4011            fprintf (dump_file, "Access function of PHI: ");
4012            print_generic_expr (dump_file, access_fn, TDF_SLIM);
4013         }
4014
4015       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4016                                         &dummy, false))
4017         {
4018           if (vect_debug_stats (loop) || vect_debug_details (loop))
4019             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4020           return false;
4021         }
4022     }
4023
4024   return true;
4025 }
4026
4027
4028 /* Function vect_analyze_data_ref_dependence.
4029
4030    Return TRUE if there (might) exist a dependence between a memory-reference
4031    DRA and a memory-reference DRB.  */
4032
4033 static bool
4034 vect_analyze_data_ref_dependence (struct data_reference *dra,
4035                                   struct data_reference *drb,
4036                                   struct loop *loop)
4037 {
4038   bool differ_p;
4039   struct data_dependence_relation *ddr;
4040
4041   if (!array_base_name_differ_p (dra, drb, &differ_p))
4042     {
4043       if (vect_debug_stats (loop) || vect_debug_details (loop))
4044         {
4045           fprintf (dump_file,
4046                 "not vectorized: can't determine dependence between: ");
4047           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4048           fprintf (dump_file, " and ");
4049           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4050         }
4051       return true;
4052     }
4053
4054   if (differ_p)
4055     return false;
4056
4057   ddr = initialize_data_dependence_relation (dra, drb);
4058   compute_affine_dependence (ddr);
4059
4060   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4061     return false;
4062
4063   if (vect_debug_stats (loop) || vect_debug_details (loop))
4064     {
4065       fprintf (dump_file,
4066         "not vectorized: possible dependence between data-refs ");
4067       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4068       fprintf (dump_file, " and ");
4069       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4070     }
4071
4072   return true;
4073 }
4074
4075
4076 /* Function vect_analyze_data_ref_dependences.
4077
4078    Examine all the data references in the loop, and make sure there do not
4079    exist any data dependences between them.
4080
4081    TODO: dependences which distance is greater than the vectorization factor
4082          can be ignored.  */
4083
4084 static bool
4085 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4086 {
4087   unsigned int i, j;
4088   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4089   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4090   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4091
4092   /* Examine store-store (output) dependences.  */
4093
4094   if (vect_debug_details (NULL))
4095     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4096
4097   if (vect_debug_details (NULL))
4098     fprintf (dump_file, "compare all store-store pairs.");
4099
4100   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4101     {
4102       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4103         {
4104           struct data_reference *dra =
4105             VARRAY_GENERIC_PTR (loop_write_refs, i);
4106           struct data_reference *drb =
4107             VARRAY_GENERIC_PTR (loop_write_refs, j);
4108           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4109             return false;
4110         }
4111     }
4112
4113   /* Examine load-store (true/anti) dependences.  */
4114
4115   if (vect_debug_details (NULL))
4116     fprintf (dump_file, "compare all load-store pairs.");
4117
4118   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4119     {
4120       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4121         {
4122           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4123           struct data_reference *drb =
4124             VARRAY_GENERIC_PTR (loop_write_refs, j);
4125           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4126             return false;
4127         }
4128     }
4129
4130   return true;
4131 }
4132
4133
4134 /* Function vect_compute_data_ref_alignment
4135
4136    Compute the misalignment of the data reference DR.
4137
4138    Output:
4139    1. If during the misalignment computation it is found that the data reference
4140       cannot be vectorized then false is returned.
4141    2. DR_MISALIGNMENT (DR) is defined.
4142
4143    FOR NOW: No analysis is actually performed. Misalignment is calculated
4144    only for trivial cases. TODO.  */
4145
4146 static bool
4147 vect_compute_data_ref_alignment (struct data_reference *dr)
4148 {
4149   tree stmt = DR_STMT (dr);
4150   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4151   tree ref = DR_REF (dr);
4152   tree vectype;
4153   tree base, alignment;
4154   bool base_aligned_p;
4155   tree misalign;
4156
4157   if (vect_debug_details (NULL))
4158     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4159
4160   /* Initialize misalignment to unknown.  */
4161   DR_MISALIGNMENT (dr) = -1;
4162
4163   misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4164   base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4165   base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4166   vectype = STMT_VINFO_VECTYPE (stmt_info);
4167
4168   if (!misalign)
4169     {
4170       if (vect_debug_details (NULL))
4171         {
4172           fprintf (dump_file, "Unknown alignment for access: ");
4173           print_generic_expr (dump_file, base, TDF_SLIM);
4174         }
4175       return true;
4176     }
4177
4178   if (!base_aligned_p)
4179     {
4180       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4181         {
4182           if (vect_debug_details (NULL))
4183             {
4184               fprintf (dump_file, "can't force alignment of ref: ");
4185               print_generic_expr (dump_file, ref, TDF_SLIM);
4186             }
4187           return true;
4188         }
4189
4190       /* Force the alignment of the decl.
4191          NOTE: This is the only change to the code we make during
4192          the analysis phase, before deciding to vectorize the loop.  */
4193       if (vect_debug_details (NULL))
4194         fprintf (dump_file, "force alignment");
4195       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4196       DECL_USER_ALIGN (base) = 1;
4197     }
4198
4199   /* At this point we assume that the base is aligned.  */
4200   gcc_assert (base_aligned_p
4201               || (TREE_CODE (base) == VAR_DECL
4202                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4203
4204   /* Alignment required, in bytes:  */
4205   alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4206
4207   /* Modulo alignment.  */
4208   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4209   if (tree_int_cst_sgn (misalign) < 0)
4210     {
4211       /* Negative misalignment value.  */
4212       if (vect_debug_details (NULL))
4213         fprintf (dump_file, "unexpected misalign value");
4214       return false;
4215     }
4216
4217   DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4218
4219   if (vect_debug_details (NULL))
4220     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4221
4222   return true;
4223 }
4224
4225
4226 /* Function vect_compute_data_refs_alignment
4227
4228    Compute the misalignment of data references in the loop.
4229    This pass may take place at function granularity instead of at loop
4230    granularity.
4231
4232    FOR NOW: No analysis is actually performed. Misalignment is calculated
4233    only for trivial cases. TODO.  */
4234
4235 static bool
4236 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4237 {
4238   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4239   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4240   unsigned int i;
4241
4242   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4243     {
4244       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4245       if (!vect_compute_data_ref_alignment (dr))
4246         return false;
4247     }
4248
4249   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4250     {
4251       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4252       if (!vect_compute_data_ref_alignment (dr))
4253         return false;
4254     }
4255
4256   return true;
4257 }
4258
4259
4260 /* Function vect_enhance_data_refs_alignment
4261
4262    This pass will use loop versioning and loop peeling in order to enhance
4263    the alignment of data references in the loop.
4264
4265    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4266    original loop is to be vectorized; Any other loops that are created by
4267    the transformations performed in this pass - are not supposed to be
4268    vectorized. This restriction will be relaxed.  */
4269
4270 static void
4271 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4272 {
4273   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4274   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4275   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4276   unsigned int i;
4277
4278   /*
4279      This pass will require a cost model to guide it whether to apply peeling
4280      or versioning or a combination of the two. For example, the scheme that
4281      intel uses when given a loop with several memory accesses, is as follows:
4282      choose one memory access ('p') which alignment you want to force by doing
4283      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4284      other accesses are not necessarily aligned, or (2) use loop versioning to
4285      generate one loop in which all accesses are aligned, and another loop in
4286      which only 'p' is necessarily aligned.
4287
4288      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4289       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4290       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4291
4292      Devising a cost model is the most critical aspect of this work. It will
4293      guide us on which access to peel for, whether to use loop versioning, how
4294      many versions to create, etc. The cost model will probably consist of
4295      generic considerations as well as target specific considerations (on
4296      powerpc for example, misaligned stores are more painful than misaligned
4297      loads).
4298
4299      Here is the general steps involved in alignment enhancements:
4300
4301      -- original loop, before alignment analysis:
4302         for (i=0; i<N; i++){
4303           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4304           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4305         }
4306
4307      -- After vect_compute_data_refs_alignment:
4308         for (i=0; i<N; i++){
4309           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4310           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4311         }
4312
4313      -- Possibility 1: we do loop versioning:
4314      if (p is aligned) {
4315         for (i=0; i<N; i++){    # loop 1A
4316           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4317           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4318         }
4319      }
4320      else {
4321         for (i=0; i<N; i++){    # loop 1B
4322           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4323           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4324         }
4325      }
4326
4327      -- Possibility 2: we do loop peeling:
4328      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4329         x = q[i];
4330         p[i] = y;
4331      }
4332      for (i = 3; i < N; i++){   # loop 2A
4333         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4334         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4335      }
4336
4337      -- Possibility 3: combination of loop peeling and versioning:
4338      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4339         x = q[i];
4340         p[i] = y;
4341      }
4342      if (p is aligned) {
4343         for (i = 3; i<N; i++){  # loop 3A
4344           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4345           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4346         }
4347      }
4348      else {
4349         for (i = 3; i<N; i++){  # loop 3B
4350           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4351           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4352         }
4353      }
4354
4355      These loops are later passed to loop_transform to be vectorized. The
4356      vectorizer will use the alignment information to guide the transformation
4357      (whether to generate regular loads/stores, or with special handling for
4358      misalignment).
4359    */
4360
4361   /* (1) Peeling to force alignment.  */
4362
4363   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4364      Considerations:
4365      + How many accesses will become aligned due to the peeling
4366      - How many accesses will become unaligned due to the peeling,
4367        and the cost of misaligned accesses.
4368      - The cost of peeling (the extra runtime checks, the increase
4369        in code size).
4370
4371      The scheme we use FORNOW: peel to force the alignment of the first
4372      misaligned store in the loop.
4373      Rationale: misaligned stores are not yet supported.
4374
4375      TODO: Use a better cost model.  */
4376
4377   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4378     {
4379       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4380       if (!aligned_access_p (dr))
4381         {
4382           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4383           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4384           break;
4385         }
4386     }
4387
4388   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4389     {
4390       if (vect_debug_details (loop))
4391         fprintf (dump_file, "Peeling for alignment will not be applied.");
4392       return;
4393     }
4394   else
4395     if (vect_debug_details (loop))
4396       fprintf (dump_file, "Peeling for alignment will be applied.");
4397
4398
4399   /* (1.2) Update the alignment info according to the peeling factor.
4400            If the misalignment of the DR we peel for is M, then the
4401            peeling factor is VF - M, and the misalignment of each access DR_i
4402            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4403            If the misalignment of the DR we peel for is unknown, then the
4404            misalignment of each access DR_i in the loop is also unknown.
4405
4406            FORNOW: set the misalignment of the accesses to unknown even
4407                    if the peeling factor is known at compile time.
4408
4409            TODO: - if the peeling factor is known at compile time, use that
4410                    when updating the misalignment info of the loop DRs.
4411                  - consider accesses that are known to have the same
4412                    alignment, even if that alignment is unknown.  */
4413
4414   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4415     {
4416       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4417       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4418         {
4419           DR_MISALIGNMENT (dr) = 0;
4420           if (vect_debug_details (loop) || vect_debug_stats (loop))
4421             fprintf (dump_file, "Alignment of access forced using peeling.");
4422         }
4423       else
4424         DR_MISALIGNMENT (dr) = -1;
4425     }
4426   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4427     {
4428       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4429       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4430         {
4431           DR_MISALIGNMENT (dr) = 0;
4432           if (vect_debug_details (loop) || vect_debug_stats (loop))
4433             fprintf (dump_file, "Alignment of access forced using peeling.");
4434         }
4435       else
4436         DR_MISALIGNMENT (dr) = -1;
4437     }
4438 }
4439
4440
4441 /* Function vect_analyze_data_refs_alignment
4442
4443    Analyze the alignment of the data-references in the loop.
4444    FOR NOW: Until support for misliagned accesses is in place, only if all
4445    accesses are aligned can the loop be vectorized. This restriction will be
4446    relaxed.  */
4447
4448 static bool
4449 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4450 {
4451   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4452   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4453   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4454   enum dr_alignment_support supportable_dr_alignment;
4455   unsigned int i;
4456
4457   if (vect_debug_details (NULL))
4458     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4459
4460
4461   /* This pass may take place at function granularity instead of at loop
4462      granularity.  */
4463
4464   if (!vect_compute_data_refs_alignment (loop_vinfo))
4465     {
4466       if (vect_debug_details (loop) || vect_debug_stats (loop))
4467         fprintf (dump_file,
4468                  "not vectorized: can't calculate alignment for data ref.");
4469       return false;
4470     }
4471
4472
4473   /* This pass will decide on using loop versioning and/or loop peeling in
4474      order to enhance the alignment of data references in the loop.  */
4475
4476   vect_enhance_data_refs_alignment (loop_vinfo);
4477
4478
4479   /* Finally, check that all the data references in the loop can be
4480      handled with respect to their alignment.  */
4481
4482   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4483     {
4484       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4485       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4486       if (!supportable_dr_alignment)
4487         {
4488           if (vect_debug_details (loop) || vect_debug_stats (loop))
4489             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4490           return false;
4491         }
4492       if (supportable_dr_alignment != dr_aligned
4493           && (vect_debug_details (loop) || vect_debug_stats (loop)))
4494         fprintf (dump_file, "Vectorizing an unaligned access.");
4495     }
4496   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4497     {
4498       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4499       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4500       if (!supportable_dr_alignment)
4501         {
4502           if (vect_debug_details (loop) || vect_debug_stats (loop))
4503             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4504           return false;
4505         }
4506       if (supportable_dr_alignment != dr_aligned
4507           && (vect_debug_details (loop) || vect_debug_stats (loop)))
4508         fprintf (dump_file, "Vectorizing an unaligned access.");
4509     }
4510
4511   return true;
4512 }
4513
4514
4515 /* Function vect_analyze_data_ref_access.
4516
4517    Analyze the access pattern of the data-reference DR. For now, a data access
4518    has to consecutive to be considered vectorizable.  */
4519
4520 static bool
4521 vect_analyze_data_ref_access (struct data_reference *dr)
4522 {
4523   tree stmt = DR_STMT (dr);
4524   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4525   tree step = STMT_VINFO_VECT_STEP (stmt_info);
4526   tree scalar_type = TREE_TYPE (DR_REF (dr));
4527
4528   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4529     {
4530       if (vect_debug_details (NULL))
4531         fprintf (dump_file, "not consecutive access");
4532       return false;
4533     }
4534   return true;
4535 }
4536
4537
4538 /* Function vect_analyze_data_ref_accesses.
4539
4540    Analyze the access pattern of all the data references in the loop.
4541
4542    FORNOW: the only access pattern that is considered vectorizable is a
4543            simple step 1 (consecutive) access.
4544
4545    FORNOW: handle only arrays and pointer accesses.  */
4546
4547 static bool
4548 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4549 {
4550   unsigned int i;
4551   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4552   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4553
4554   if (vect_debug_details (NULL))
4555     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4556
4557   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4558     {
4559       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4560       bool ok = vect_analyze_data_ref_access (dr);
4561       if (!ok)
4562         {
4563           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4564               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4565             fprintf (dump_file, "not vectorized: complicated access pattern.");
4566           return false;
4567         }
4568     }
4569
4570   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4571     {
4572       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4573       bool ok = vect_analyze_data_ref_access (dr);
4574       if (!ok)
4575         {
4576           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4577               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4578             fprintf (dump_file, "not vectorized: complicated access pattern.");
4579           return false;
4580         }
4581     }
4582
4583   return true;
4584 }
4585
4586
4587 /* Function vect_analyze_pointer_ref_access.
4588
4589    Input:
4590    STMT - a stmt that contains a data-ref
4591    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4592
4593    If the data-ref access is vectorizable, return a data_reference structure
4594    that represents it (DR). Otherwise - return NULL.  */
4595
4596 static struct data_reference *
4597 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4598 {
4599   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4600   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4601   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4602   tree init, step;
4603   tree reftype, innertype;
4604   tree indx_access_fn;
4605   int loopnum = loop->num;
4606   struct data_reference *dr;
4607
4608   if (!access_fn)
4609     {
4610       if (vect_debug_stats (loop) || vect_debug_details (loop))
4611         fprintf (dump_file, "not vectorized: complicated pointer access.");
4612       return NULL;
4613     }
4614
4615   if (vect_debug_details (NULL))
4616     {
4617       fprintf (dump_file, "Access function of ptr: ");
4618       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4619     }
4620
4621   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4622     {
4623       if (vect_debug_stats (loop) || vect_debug_details (loop))
4624         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4625       return NULL;
4626     }
4627
4628   STRIP_NOPS (init);
4629
4630   if (!expr_invariant_in_loop_p (loop, init))
4631     {
4632       if (vect_debug_stats (loop) || vect_debug_details (loop))
4633         fprintf (dump_file,
4634                  "not vectorized: initial condition is not loop invariant.");
4635       return NULL;
4636     }
4637
4638   if (TREE_CODE (step) != INTEGER_CST)
4639     {
4640       if (vect_debug_stats (loop) || vect_debug_details (loop))
4641         fprintf (dump_file,
4642                 "not vectorized: non constant step for pointer access.");
4643       return NULL;
4644     }
4645
4646   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4647   if (TREE_CODE (reftype) != POINTER_TYPE)
4648     {
4649       if (vect_debug_stats (loop) || vect_debug_details (loop))
4650         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4651       return NULL;
4652     }
4653
4654   reftype = TREE_TYPE (init);
4655   if (TREE_CODE (reftype) != POINTER_TYPE)
4656     {
4657       if (vect_debug_stats (loop) || vect_debug_details (loop))
4658         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4659       return NULL;
4660     }
4661
4662   innertype = TREE_TYPE (reftype);
4663   if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4664     {
4665       /* FORNOW: support only consecutive access */
4666       if (vect_debug_stats (loop) || vect_debug_details (loop))
4667         fprintf (dump_file, "not vectorized: non consecutive access.");
4668       return NULL;
4669     }
4670
4671   STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4672   if (TREE_CODE (init) == PLUS_EXPR
4673       || TREE_CODE (init) == MINUS_EXPR)
4674     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4675       fold (size_binop (TREE_CODE (init), size_zero_node,
4676                         fold_convert (sizetype, TREE_OPERAND (init, 1))));
4677   else
4678     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4679
4680   indx_access_fn =
4681         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4682   if (vect_debug_details (NULL))
4683     {
4684       fprintf (dump_file, "Access function of ptr indx: ");
4685       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4686     }
4687   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4688   return dr;
4689 }
4690
4691
4692 /* Function vect_get_memtag_and_dr.
4693
4694    The function returns the relevant variable for memory tag (for aliasing
4695    purposes). Also data reference structure DR is created.
4696
4697    This function handles three kinds of MEMREF:
4698
4699    It is called from vect_analyze_data_refs with a MEMREF that is either an
4700    ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4701    It builds a DR for them using vect_get_base_and_offset, and calls itself
4702    recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4703    MEMREF along the way. During the recursive calls, the function may be called
4704    with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4705    MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4706    and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4707    and SSA_NAME (this is category 3 - "recursion stop condition").
4708
4709    When the MEMREF falls into category 1 there is still no data reference struct
4710    (DR) available. It is created by this function, and then, along the recursion,
4711    MEMREF will fall into category 2 or 3, in which case a DR will have already
4712    been created, but the analysis continues to retrieve the MEMTAG.
4713
4714    Input:
4715    MEMREF - data reference in STMT
4716    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4717
4718    Output:
4719    DR - data_reference struct for MEMREF
4720    return value - the relevant variable for memory tag (for aliasing purposes).
4721
4722 */
4723
4724 static tree
4725 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4726                         loop_vec_info loop_vinfo,
4727                         tree vectype, struct data_reference **dr)
4728 {
4729   tree symbl, oprnd0, oprnd1;
4730   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4731   tree offset, misalign, step;
4732   tree ref_to_be_analyzed, tag, dr_base;
4733   struct data_reference *new_dr;
4734   bool base_aligned_p;
4735
4736   if (*dr)
4737     {
4738       /* Category 3: recursion stop condition.  */
4739       /* (1) A DR already exists. We only need to get the relevant memtag for
4740          MEMREF, the rest of the data was already initialized.  */
4741
4742       switch (TREE_CODE (memref))
4743         {
4744           /* (1.1) Stop condition: find the relevant memtag and return.  */
4745         case SSA_NAME:
4746           symbl = SSA_NAME_VAR (memref);
4747           tag = get_var_ann (symbl)->type_mem_tag;
4748           if (!tag)
4749             {
4750               tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4751               if (TREE_CODE (ptr) == SSA_NAME)
4752                 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4753             }
4754           if (!tag)
4755             {
4756               if (vect_debug_details (NULL))
4757                 fprintf (dump_file, "not vectorized: no memtag for ref.");
4758               return NULL_TREE;
4759             }
4760           return tag;
4761
4762         case VAR_DECL:
4763         case PARM_DECL:
4764           return memref;
4765
4766           /* Category 2: recursion continues.  */
4767           /* (1.2) A recursive call to find the relevant memtag is required.  */
4768         case INDIRECT_REF:
4769           symbl = TREE_OPERAND (memref, 0);
4770           break; /* For recursive call.  */
4771
4772         case COMPONENT_REF:
4773           /* Could have recorded more accurate information -
4774              i.e, the actual FIELD_DECL that is being referenced -
4775              but later passes expect VAR_DECL as the nmt.  */
4776           /* Fall through.  */
4777
4778         case ADDR_EXPR:
4779           symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4780           break; /* For recursive call.  */
4781
4782         case PLUS_EXPR:
4783         case MINUS_EXPR:
4784           /* Although DR exists, we have to call the function recursively to
4785              build MEMTAG for such expression. This is handled below.  */
4786           oprnd0 = TREE_OPERAND (memref, 0);
4787           oprnd1 = TREE_OPERAND (memref, 1);
4788
4789           STRIP_NOPS (oprnd1);
4790            /* Supported plus/minus expressions are of the form
4791              {address_base + offset}, such that address_base is of type
4792              POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4793              or it's not of type POINTER/ARRAY.
4794              TODO: swap operands if {offset + address_base}.  */
4795           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4796                && TREE_CODE (oprnd1) != INTEGER_CST)
4797               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4798             return NULL_TREE;
4799
4800           symbl = oprnd0;
4801           break; /* For recursive call.  */
4802
4803         default:
4804           return NULL_TREE;
4805         }
4806     }
4807   else
4808     {
4809       /* Category 1: recursion begins.  */
4810       /* (2) A DR does not exist yet and must be built, followed by a
4811          recursive call to get the relevant memtag for MEMREF.  */
4812
4813       switch (TREE_CODE (memref))
4814         {
4815         case INDIRECT_REF:
4816           new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4817           if (!new_dr)
4818             return NULL_TREE;
4819           *dr = new_dr;
4820           symbl = DR_BASE_NAME (new_dr);
4821           ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4822           break;
4823
4824         case ARRAY_REF:
4825           new_dr = analyze_array (stmt, memref, is_read);
4826           *dr = new_dr;
4827           symbl = DR_BASE_NAME (new_dr);
4828           ref_to_be_analyzed = memref;
4829           break;
4830
4831         default:
4832           /* TODO: Support data-refs of form a[i].p for unions and single
4833              field structures.  */
4834           return NULL_TREE;
4835         }
4836
4837       offset = size_zero_node;
4838       misalign = size_zero_node;
4839       step = size_zero_node;
4840
4841       /* Analyze data-ref, find its base, initial offset from the base, step,
4842          and alignment.  */
4843       dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4844                                           vectype, loop_vinfo, &offset,
4845                                           &misalign, &step, &base_aligned_p);
4846       if (!dr_base)
4847         return NULL_TREE;
4848
4849       /* Initialize information according to above analysis.  */
4850       /* Since offset and step of a pointer can be also set in
4851          vect_analyze_pointer_ref_access, we combine the values here. */
4852       if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4853         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4854           fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4855                         STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4856       else
4857         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4858
4859       if (step && STMT_VINFO_VECT_STEP (stmt_info))
4860         STMT_VINFO_VECT_STEP (stmt_info) =
4861           size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4862       else
4863         STMT_VINFO_VECT_STEP (stmt_info) = step;
4864
4865       STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4866       STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4867       STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4868     }
4869
4870   if (!symbl)
4871     return NULL_TREE;
4872   /* Recursive call to retrieve the relevant memtag.  */
4873   tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4874   return tag;
4875 }
4876
4877
4878
4879 /* Function vect_analyze_data_refs.
4880
4881    Find all the data references in the loop.
4882
4883    The general structure of the analysis of data refs in the vectorizer is as
4884    follows:
4885    1- vect_analyze_data_refs(loop):
4886       Find and analyze all data-refs in the loop:
4887           foreach ref
4888              ref_stmt.memtag =  vect_get_memtag_and_dr (ref)
4889    1.1- vect_get_memtag_and_dr(ref):
4890       Analyze ref, and build a DR (data_referece struct) for it;
4891       call vect_get_base_and_offset to compute base, initial_offset,
4892       step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4893       ref_stmt.alignment, and ref_stmt.step accordingly.
4894    1.1.1- vect_get_base_and_offset():
4895       Calculate base, initial_offset, step and alignment.
4896       For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4897    2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4898    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4899    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4900
4901    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4902            which base is really an array (not a pointer) and which alignment
4903            can be forced. This restriction will be relaxed.  */
4904
4905 static bool
4906 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4907 {
4908   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4909   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4910   int nbbs = loop->num_nodes;
4911   block_stmt_iterator si;
4912   int j;
4913   struct data_reference *dr;
4914
4915   if (vect_debug_details (NULL))
4916     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4917
4918   for (j = 0; j < nbbs; j++)
4919     {
4920       basic_block bb = bbs[j];
4921       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4922         {
4923           bool is_read = false;
4924           tree stmt = bsi_stmt (si);
4925           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4926           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4927           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4928           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4929           varray_type *datarefs = NULL;
4930           int nvuses, nv_may_defs, nv_must_defs;
4931           tree memref = NULL;
4932           tree symbl;
4933           tree scalar_type, vectype;
4934
4935           /* Assumption: there exists a data-ref in stmt, if and only if
4936              it has vuses/vdefs.  */
4937
4938           if (!vuses && !v_may_defs && !v_must_defs)
4939             continue;
4940
4941           nvuses = NUM_VUSES (vuses);
4942           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4943           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4944
4945           if (nvuses && (nv_may_defs || nv_must_defs))
4946             {
4947               if (vect_debug_details (NULL))
4948                 {
4949                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4950                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4951                 }
4952               return false;
4953             }
4954
4955           if (TREE_CODE (stmt) != MODIFY_EXPR)
4956             {
4957               if (vect_debug_details (NULL))
4958                 {
4959                   fprintf (dump_file, "unexpected vops in stmt: ");
4960                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4961                 }
4962               return false;
4963             }
4964
4965           if (vuses)
4966             {
4967               memref = TREE_OPERAND (stmt, 1);
4968               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4969               is_read = true;
4970             }
4971           else /* vdefs */
4972             {
4973               memref = TREE_OPERAND (stmt, 0);
4974               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4975               is_read = false;
4976             }
4977
4978           scalar_type = TREE_TYPE (memref);
4979           vectype = get_vectype_for_scalar_type (scalar_type);
4980           if (!vectype)
4981             {
4982               if (vect_debug_details (NULL))
4983                 {
4984                   fprintf (dump_file, "no vectype for stmt: ");
4985                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4986                   fprintf (dump_file, " scalar_type: ");
4987                   print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4988                 }
4989               /* It is not possible to vectorize this data reference.  */
4990               return false;
4991             }
4992           /* Analyze MEMREF. If it is of a supported form, build data_reference
4993              struct for it (DR) and find memtag for aliasing purposes.  */
4994           dr = NULL;
4995           symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4996                                           vectype, &dr);
4997           if (!symbl)
4998             {
4999               if (vect_debug_stats (loop) || vect_debug_details (loop))
5000                 {
5001                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
5002                   print_generic_expr (dump_file, stmt, TDF_SLIM);
5003                 }
5004               return false;
5005             }
5006           STMT_VINFO_MEMTAG (stmt_info) = symbl;
5007           STMT_VINFO_VECTYPE (stmt_info) = vectype;
5008           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5009           STMT_VINFO_DATA_REF (stmt_info) = dr;
5010         }
5011     }
5012
5013   return true;
5014 }
5015
5016
5017 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
5018
5019 /* Function vect_mark_relevant.
5020
5021    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
5022
5023 static void
5024 vect_mark_relevant (varray_type *worklist, tree stmt)
5025 {
5026   stmt_vec_info stmt_info;
5027
5028   if (vect_debug_details (NULL))
5029     fprintf (dump_file, "mark relevant.");
5030
5031   if (TREE_CODE (stmt) == PHI_NODE)
5032     {
5033       VARRAY_PUSH_TREE (*worklist, stmt);
5034       return;
5035     }
5036
5037   stmt_info = vinfo_for_stmt (stmt);
5038
5039   if (!stmt_info)
5040     {
5041       if (vect_debug_details (NULL))
5042         {
5043           fprintf (dump_file, "mark relevant: no stmt info!!.");
5044           print_generic_expr (dump_file, stmt, TDF_SLIM);
5045         }
5046       return;
5047     }
5048
5049   if (STMT_VINFO_RELEVANT_P (stmt_info))
5050     {
5051       if (vect_debug_details (NULL))
5052         fprintf (dump_file, "already marked relevant.");
5053       return;
5054     }
5055
5056   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5057   VARRAY_PUSH_TREE (*worklist, stmt);
5058 }
5059
5060
5061 /* Function vect_stmt_relevant_p.
5062
5063    Return true if STMT in loop that is represented by LOOP_VINFO is
5064    "relevant for vectorization".
5065
5066    A stmt is considered "relevant for vectorization" if:
5067    - it has uses outside the loop.
5068    - it has vdefs (it alters memory).
5069    - control stmts in the loop (except for the exit condition).
5070
5071    CHECKME: what other side effects would the vectorizer allow?  */
5072
5073 static bool
5074 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5075 {
5076   v_may_def_optype v_may_defs;
5077   v_must_def_optype v_must_defs;
5078   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5079   int i;
5080   dataflow_t df;
5081   int num_uses;
5082
5083   /* cond stmt other than loop exit cond.  */
5084   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5085     return true;
5086
5087   /* changing memory.  */
5088   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5089   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5090   if (v_may_defs || v_must_defs)
5091     {
5092       if (vect_debug_details (NULL))
5093         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5094       return true;
5095     }
5096
5097   /* uses outside the loop.  */
5098   df = get_immediate_uses (stmt);
5099   num_uses = num_immediate_uses (df);
5100   for (i = 0; i < num_uses; i++)
5101     {
5102       tree use = immediate_use (df, i);
5103       basic_block bb = bb_for_stmt (use);
5104       if (!flow_bb_inside_loop_p (loop, bb))
5105         {
5106           if (vect_debug_details (NULL))
5107             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5108           return true;
5109         }
5110     }
5111
5112   return false;
5113 }
5114
5115
5116 /* Function vect_mark_stmts_to_be_vectorized.
5117
5118    Not all stmts in the loop need to be vectorized. For example:
5119
5120      for i...
5121        for j...
5122    1.    T0 = i + j
5123    2.    T1 = a[T0]
5124
5125    3.    j = j + 1
5126
5127    Stmt 1 and 3 do not need to be vectorized, because loop control and
5128    addressing of vectorized data-refs are handled differently.
5129
5130    This pass detects such stmts.  */
5131
5132 static bool
5133 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5134 {
5135   varray_type worklist;
5136   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5137   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5138   unsigned int nbbs = loop->num_nodes;
5139   block_stmt_iterator si;
5140   tree stmt;
5141   stmt_ann_t ann;
5142   unsigned int i;
5143   int j;
5144   use_optype use_ops;
5145   stmt_vec_info stmt_info;
5146
5147   if (vect_debug_details (NULL))
5148     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5149
5150   VARRAY_TREE_INIT (worklist, 64, "work list");
5151
5152   /* 1. Init worklist.  */
5153
5154   for (i = 0; i < nbbs; i++)
5155     {
5156       basic_block bb = bbs[i];
5157       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5158         {
5159           stmt = bsi_stmt (si);
5160
5161           if (vect_debug_details (NULL))
5162             {
5163               fprintf (dump_file, "init: stmt relevant? ");
5164               print_generic_expr (dump_file, stmt, TDF_SLIM);
5165             }
5166
5167           stmt_info = vinfo_for_stmt (stmt);
5168           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5169
5170           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5171             vect_mark_relevant (&worklist, stmt);
5172         }
5173     }
5174
5175
5176   /* 2. Process_worklist */
5177
5178   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5179     {
5180       stmt = VARRAY_TOP_TREE (worklist);
5181       VARRAY_POP (worklist);
5182
5183       if (vect_debug_details (NULL))
5184         {
5185           fprintf (dump_file, "worklist: examine stmt: ");
5186           print_generic_expr (dump_file, stmt, TDF_SLIM);
5187         }
5188
5189       /* Examine the USES in this statement. Mark all the statements which
5190          feed this statement's uses as "relevant", unless the USE is used as
5191          an array index.  */
5192
5193       if (TREE_CODE (stmt) == PHI_NODE)
5194         {
5195           /* follow the def-use chain inside the loop.  */
5196           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5197             {
5198               tree arg = PHI_ARG_DEF (stmt, j);
5199               tree def_stmt = NULL_TREE;
5200               basic_block bb;
5201               if (!vect_is_simple_use (arg, loop, &def_stmt))
5202                 {
5203                   if (vect_debug_details (NULL))
5204                     fprintf (dump_file, "worklist: unsupported use.");
5205                   varray_clear (worklist);
5206                   return false;
5207                 }
5208               if (!def_stmt)
5209                 continue;
5210
5211               if (vect_debug_details (NULL))
5212                 {
5213                   fprintf (dump_file, "worklist: def_stmt: ");
5214                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5215                 }
5216
5217               bb = bb_for_stmt (def_stmt);
5218               if (flow_bb_inside_loop_p (loop, bb))
5219                 vect_mark_relevant (&worklist, def_stmt);
5220             }
5221         }
5222
5223       ann = stmt_ann (stmt);
5224       use_ops = USE_OPS (ann);
5225
5226       for (i = 0; i < NUM_USES (use_ops); i++)
5227         {
5228           tree use = USE_OP (use_ops, i);
5229
5230           /* We are only interested in uses that need to be vectorized. Uses
5231              that are used for address computation are not considered relevant.
5232            */
5233           if (exist_non_indexing_operands_for_use_p (use, stmt))
5234             {
5235               tree def_stmt = NULL_TREE;
5236               basic_block bb;
5237               if (!vect_is_simple_use (use, loop, &def_stmt))
5238                 {
5239                   if (vect_debug_details (NULL))
5240                     fprintf (dump_file, "worklist: unsupported use.");
5241                   varray_clear (worklist);
5242                   return false;
5243                 }
5244
5245               if (!def_stmt)
5246                 continue;
5247
5248               if (vect_debug_details (NULL))
5249                 {
5250                   fprintf (dump_file, "worklist: examine use %d: ", i);
5251                   print_generic_expr (dump_file, use, TDF_SLIM);
5252                 }
5253
5254               bb = bb_for_stmt (def_stmt);
5255               if (flow_bb_inside_loop_p (loop, bb))
5256                 vect_mark_relevant (&worklist, def_stmt);
5257             }
5258         }
5259     }                           /* while worklist */
5260
5261   varray_clear (worklist);
5262   return true;
5263 }
5264
5265
5266 /* Function vect_can_advance_ivs_p
5267
5268    In case the number of iterations that LOOP iterates in unknown at compile
5269    time, an epilog loop will be generated, and the loop induction variables
5270    (IVs) will be "advanced" to the value they are supposed to take just before
5271    the epilog loop.  Here we check that the access function of the loop IVs
5272    and the expression that represents the loop bound are simple enough.
5273    These restrictions will be relaxed in the future.  */
5274
5275 static bool
5276 vect_can_advance_ivs_p (struct loop *loop)
5277 {
5278   basic_block bb = loop->header;
5279   tree phi;
5280
5281   /* Analyze phi functions of the loop header.  */
5282
5283   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5284     {
5285       tree access_fn = NULL;
5286       tree evolution_part;
5287
5288       if (vect_debug_details (NULL))
5289         {
5290           fprintf (dump_file, "Analyze phi: ");
5291           print_generic_expr (dump_file, phi, TDF_SLIM);
5292         }
5293
5294       /* Skip virtual phi's. The data dependences that are associated with
5295          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5296
5297       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5298         {
5299           if (vect_debug_details (NULL))
5300             fprintf (dump_file, "virtual phi. skip.");
5301           continue;
5302         }
5303
5304       /* Analyze the evolution function.  */
5305
5306       access_fn = instantiate_parameters
5307         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5308
5309       if (!access_fn)
5310         {
5311           if (vect_debug_details (NULL))
5312             fprintf (dump_file, "No Access function.");
5313           return false;
5314         }
5315
5316       if (vect_debug_details (NULL))
5317         {
5318           fprintf (dump_file, "Access function of PHI: ");
5319           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5320         }
5321
5322       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5323
5324       if (evolution_part == NULL_TREE)
5325         return false;
5326
5327       /* FORNOW: We do not transform initial conditions of IVs
5328          which evolution functions are a polynomial of degree >= 2.  */
5329
5330       if (tree_is_chrec (evolution_part))
5331         return false;
5332     }
5333
5334   return true;
5335 }
5336
5337
5338 /* Function vect_get_loop_niters.
5339
5340    Determine how many iterations the loop is executed.
5341    If an expression that represents the number of iterations
5342    can be constructed, place it in NUMBER_OF_ITERATIONS.
5343    Return the loop exit condition.  */
5344
5345 static tree
5346 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5347 {
5348   tree niters;
5349
5350   if (vect_debug_details (NULL))
5351     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5352
5353   niters = number_of_iterations_in_loop (loop);
5354
5355   if (niters != NULL_TREE
5356       && niters != chrec_dont_know)
5357     {
5358       *number_of_iterations = niters;
5359
5360       if (vect_debug_details (NULL))
5361         {
5362           fprintf (dump_file, "==> get_loop_niters:" );
5363           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5364         }
5365     }
5366
5367   return get_loop_exit_condition (loop);
5368 }
5369
5370
5371 /* Function vect_analyze_loop_form.
5372
5373    Verify the following restrictions (some may be relaxed in the future):
5374    - it's an inner-most loop
5375    - number of BBs = 2 (which are the loop header and the latch)
5376    - the loop has a pre-header
5377    - the loop has a single entry and exit
5378    - the loop exit condition is simple enough, and the number of iterations
5379      can be analyzed (a countable loop).  */
5380
5381 static loop_vec_info
5382 vect_analyze_loop_form (struct loop *loop)
5383 {
5384   loop_vec_info loop_vinfo;
5385   tree loop_cond;
5386   tree number_of_iterations = NULL;
5387   bool rescan = false;
5388
5389   if (vect_debug_details (loop))
5390     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5391
5392   if (loop->inner
5393       || !loop->single_exit
5394       || loop->num_nodes != 2
5395       || EDGE_COUNT (loop->header->preds) != 2
5396       || loop->num_entries != 1)
5397     {
5398       if (vect_debug_stats (loop) || vect_debug_details (loop))
5399         {
5400           fprintf (dump_file, "not vectorized: bad loop form. ");
5401           if (loop->inner)
5402             fprintf (dump_file, "nested loop.");
5403           else if (!loop->single_exit)
5404             fprintf (dump_file, "multiple exits.");
5405           else if (loop->num_nodes != 2)
5406             fprintf (dump_file, "too many BBs in loop.");
5407           else if (EDGE_COUNT (loop->header->preds) != 2)
5408             fprintf (dump_file, "too many incoming edges.");
5409           else if (loop->num_entries != 1)
5410             fprintf (dump_file, "too many entries.");
5411         }
5412
5413       return NULL;
5414     }
5415
5416   /* We assume that the loop exit condition is at the end of the loop. i.e,
5417      that the loop is represented as a do-while (with a proper if-guard
5418      before the loop if needed), where the loop header contains all the
5419      executable statements, and the latch is empty.  */
5420   if (!empty_block_p (loop->latch))
5421     {
5422       if (vect_debug_stats (loop) || vect_debug_details (loop))
5423         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5424       return NULL;
5425     }
5426
5427   /* Make sure we have a preheader basic block.  */
5428   if (!loop->pre_header)
5429     {
5430       rescan = true;
5431       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5432     }
5433
5434   /* Make sure there exists a single-predecessor exit bb:  */
5435   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5436     {
5437       rescan = true;
5438       loop_split_edge_with (loop->exit_edges[0], NULL);
5439     }
5440
5441   if (rescan)
5442     {
5443       flow_loop_scan (loop, LOOP_ALL);
5444       /* Flow loop scan does not update loop->single_exit field.  */
5445       loop->single_exit = loop->exit_edges[0];
5446     }
5447
5448   if (empty_block_p (loop->header))
5449     {
5450       if (vect_debug_stats (loop) || vect_debug_details (loop))
5451         fprintf (dump_file, "not vectorized: empty loop.");
5452       return NULL;
5453     }
5454
5455   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5456   if (!loop_cond)
5457     {
5458       if (vect_debug_stats (loop) || vect_debug_details (loop))
5459         fprintf (dump_file, "not vectorized: complicated exit condition.");
5460       return NULL;
5461     }
5462
5463   if (!number_of_iterations)
5464     {
5465       if (vect_debug_stats (loop) || vect_debug_details (loop))
5466         fprintf (dump_file,
5467                  "not vectorized: number of iterations cannot be computed.");
5468       return NULL;
5469     }
5470
5471   if (chrec_contains_undetermined (number_of_iterations))
5472     {
5473       if (vect_debug_details (NULL))
5474         fprintf (dump_file, "Infinite number of iterations.");
5475       return false;
5476     }
5477
5478   loop_vinfo = new_loop_vec_info (loop);
5479   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5480
5481   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5482     {
5483       if (vect_debug_details (loop))
5484         {
5485           fprintf (dump_file, "loop bound unknown.\n");
5486           fprintf (dump_file, "Symbolic number of iterations is ");
5487           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5488         }
5489     }
5490   else
5491   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5492     {
5493       if (vect_debug_stats (loop) || vect_debug_details (loop))
5494         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5495       return NULL;
5496     }
5497
5498   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5499
5500   return loop_vinfo;
5501 }
5502
5503
5504 /* Function vect_analyze_loop.
5505
5506    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5507    for it. The different analyses will record information in the
5508    loop_vec_info struct.  */
5509
5510 static loop_vec_info
5511 vect_analyze_loop (struct loop *loop)
5512 {
5513   bool ok;
5514   loop_vec_info loop_vinfo;
5515
5516   if (vect_debug_details (NULL))
5517     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5518
5519   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5520
5521   loop_vinfo = vect_analyze_loop_form (loop);
5522   if (!loop_vinfo)
5523     {
5524       if (vect_debug_details (loop))
5525         fprintf (dump_file, "bad loop form.");
5526       return NULL;
5527     }
5528
5529   /* Find all data references in the loop (which correspond to vdefs/vuses)
5530      and analyze their evolution in the loop.
5531
5532      FORNOW: Handle only simple, array references, which
5533      alignment can be forced, and aligned pointer-references.  */
5534
5535   ok = vect_analyze_data_refs (loop_vinfo);
5536   if (!ok)
5537     {
5538       if (vect_debug_details (loop))
5539         fprintf (dump_file, "bad data references.");
5540       destroy_loop_vec_info (loop_vinfo);
5541       return NULL;
5542     }
5543
5544   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5545
5546   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5547   if (!ok)
5548     {
5549       if (vect_debug_details (loop))
5550         fprintf (dump_file, "unexpected pattern.");
5551       if (vect_debug_details (loop))
5552         fprintf (dump_file, "not vectorized: unexpected pattern.");
5553       destroy_loop_vec_info (loop_vinfo);
5554       return NULL;
5555     }
5556
5557   /* Check that all cross-iteration scalar data-flow cycles are OK.
5558      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5559
5560   ok = vect_analyze_scalar_cycles (loop_vinfo);
5561   if (!ok)
5562     {
5563       if (vect_debug_details (loop))
5564         fprintf (dump_file, "bad scalar cycle.");
5565       destroy_loop_vec_info (loop_vinfo);
5566       return NULL;
5567     }
5568
5569   /* Analyze data dependences between the data-refs in the loop.
5570      FORNOW: fail at the first data dependence that we encounter.  */
5571
5572   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5573   if (!ok)
5574     {
5575       if (vect_debug_details (loop))
5576         fprintf (dump_file, "bad data dependence.");
5577       destroy_loop_vec_info (loop_vinfo);
5578       return NULL;
5579     }
5580
5581   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5582      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5583
5584   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5585   if (!ok)
5586     {
5587       if (vect_debug_details (loop))
5588         fprintf (dump_file, "bad data access.");
5589       destroy_loop_vec_info (loop_vinfo);
5590       return NULL;
5591     }
5592
5593   /* Analyze the alignment of the data-refs in the loop.
5594      FORNOW: Only aligned accesses are handled.  */
5595
5596   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5597   if (!ok)
5598     {
5599       if (vect_debug_details (loop))
5600         fprintf (dump_file, "bad data alignment.");
5601       destroy_loop_vec_info (loop_vinfo);
5602       return NULL;
5603     }
5604
5605   /* Scan all the operations in the loop and make sure they are
5606      vectorizable.  */
5607
5608   ok = vect_analyze_operations (loop_vinfo);
5609   if (!ok)
5610     {
5611       if (vect_debug_details (loop))
5612         fprintf (dump_file, "bad operation or unsupported loop bound.");
5613       destroy_loop_vec_info (loop_vinfo);
5614       return NULL;
5615     }
5616
5617   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5618
5619   return loop_vinfo;
5620 }
5621
5622
5623 /* Function need_imm_uses_for.
5624
5625    Return whether we ought to include information for 'var'
5626    when calculating immediate uses.  For this pass we only want use
5627    information for non-virtual variables.  */
5628
5629 static bool
5630 need_imm_uses_for (tree var)
5631 {
5632   return is_gimple_reg (var);
5633 }
5634
5635
5636 /* Function vectorize_loops.
5637
5638    Entry Point to loop vectorization phase.  */
5639
5640 void
5641 vectorize_loops (struct loops *loops)
5642 {
5643   unsigned int i, loops_num;
5644   unsigned int num_vectorized_loops = 0;
5645
5646   /* Does the target support SIMD?  */
5647   /* FORNOW: until more sophisticated machine modelling is in place.  */
5648   if (!UNITS_PER_SIMD_WORD)
5649     {
5650       if (vect_debug_details (NULL))
5651         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5652       return;
5653     }
5654
5655 #ifdef ENABLE_CHECKING
5656   verify_loop_closed_ssa ();
5657 #endif
5658
5659   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5660
5661   /*  ----------- Analyze loops. -----------  */
5662
5663   /* If some loop was duplicated, it gets bigger number
5664      than all previously defined loops. This fact allows us to run
5665      only over initial loops skipping newly generated ones.  */
5666   loops_num = loops->num;
5667   for (i = 1; i < loops_num; i++)
5668     {
5669       loop_vec_info loop_vinfo;
5670       struct loop *loop = loops->parray[i];
5671
5672       if (!loop)
5673         continue;
5674
5675       loop_vinfo = vect_analyze_loop (loop);
5676       loop->aux = loop_vinfo;
5677
5678       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5679         continue;
5680
5681       vect_transform_loop (loop_vinfo, loops);
5682       num_vectorized_loops++;
5683     }
5684
5685   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5686     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5687              num_vectorized_loops);
5688
5689   /*  ----------- Finalize. -----------  */
5690
5691   free_df ();
5692   for (i = 1; i < loops_num; i++)
5693     {
5694       struct loop *loop = loops->parray[i];
5695       loop_vec_info loop_vinfo;
5696
5697       if (!loop)
5698         continue;
5699       loop_vinfo = loop->aux;
5700       destroy_loop_vec_info (loop_vinfo);
5701       loop->aux = NULL;
5702     }
5703
5704   rewrite_into_ssa (false);
5705   rewrite_into_loop_closed_ssa (); /* FORNOW */
5706   bitmap_clear (vars_to_rename);
5707 }