gcc/tree-vect-transform.c

   1 /* Transformation Utilities for Loop Vectorization.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "ggc.h"
  27 #include "tree.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "basic-block.h"
  31 #include "diagnostic.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "params.h"
  39 #include "recog.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-chrec.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "tree-vectorizer.h"
  44 #include "langhooks.h"
  45 #include "tree-pass.h"
  46 #include "toplev.h"
  47 #include "real.h"
  48
  49 /* Utility functions for the code transformation.  */
  50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
  51 static tree vect_create_destination_var (tree, tree);
  52 static tree vect_create_data_ref_ptr
  53   (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
  54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
  55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
  56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
  57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
  58 static tree vect_init_vector (tree, tree, tree);
  59 static void vect_finish_stmt_generation
  60   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
  61 static bool vect_is_simple_cond (tree, loop_vec_info);
  62 static void update_vuses_to_preheader (tree, struct loop*);
  63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
  64 static tree get_initial_def_for_reduction (tree, tree, tree *);
  65
  66 /* Utility function dealing with loop peeling (not peeling itself).  */
  67 static void vect_generate_tmps_on_preheader
  68   (loop_vec_info, tree *, tree *, tree *);
  69 static tree vect_build_loop_niters (loop_vec_info);
  70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
  71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
  72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
  73 static void vect_update_inits_of_drs (loop_vec_info, tree);
  74 static int vect_min_worthwhile_factor (enum tree_code);
  75
  76
  77 /* Function vect_estimate_min_profitable_iters
  78
  79    Return the number of iterations required for the vector version of the
  80    loop to be profitable relative to the cost of the scalar version of the
  81    loop.
  82
  83    TODO: Take profile info into account before making vectorization
  84    decisions, if available.  */
  85
  86 int
  87 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
  88 {
  89   int i;
  90   int min_profitable_iters;
  91   int peel_iters_prologue;
  92   int peel_iters_epilogue;
  93   int vec_inside_cost = 0;
  94   int vec_outside_cost = 0;
  95   int scalar_single_iter_cost = 0;
  96   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
  97   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  98   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
  99   int nbbs = loop->num_nodes;
 100
 101   /* Cost model disabled.  */
 102   if (!flag_vect_cost_model)
 103     {
 104       if (vect_print_dump_info (REPORT_DETAILS))
 105         fprintf (vect_dump, "cost model disabled.");
 106       return 0;
 107     }
 108
 109   /* Requires loop versioning tests to handle misalignment.
 110      FIXME: Make cost depend on number of stmts in may_misalign list.  */
 111
 112   if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
 113     {
 114       vec_outside_cost += TARG_COND_BRANCH_COST;
 115       if (vect_print_dump_info (REPORT_DETAILS))
 116         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
 117                  "versioning.\n");
 118     }
 119
 120   /* Requires a prologue loop when peeling to handle misalignment. Add cost of
 121      two guards, one for the peeled loop and one for the vector loop.  */
 122
 123   peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
 124   if (peel_iters_prologue)
 125     {
 126       vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
 127       if (vect_print_dump_info (REPORT_DETAILS))
 128         fprintf (vect_dump, "cost model: Adding cost of checks for "
 129                  "prologue.\n");
 130     }
 131
 132  /* Requires an epilogue loop to finish up remaining iterations after vector
 133     loop. Add cost of two guards, one for the peeled loop and one for the
 134     vector loop.  */
 135
 136   if ((peel_iters_prologue < 0)
 137       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 138       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
 139     {
 140       vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
 141       if (vect_print_dump_info (REPORT_DETAILS))
 142         fprintf (vect_dump, "cost model : Adding cost of checks for "
 143                  "epilogue.\n");
 144     }
 145
 146   /* Count statements in scalar loop.  Using this as scalar cost for a single
 147      iteration for now.
 148
 149      TODO: Add outer loop support.
 150
 151      TODO: Consider assigning different costs to different scalar
 152      statements.  */
 153
 154   for (i = 0; i < nbbs; i++)
 155     {
 156       block_stmt_iterator si;
 157       basic_block bb = bbs[i];
 158
 159       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
 160         {
 161           tree stmt = bsi_stmt (si);
 162           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 163           if (!STMT_VINFO_RELEVANT_P (stmt_info)
 164               && !STMT_VINFO_LIVE_P (stmt_info))
 165             continue;
 166           scalar_single_iter_cost++;
 167           vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
 168           vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
 169         }
 170     }
 171
 172   /* Add additional cost for the peeled instructions in prologue and epilogue
 173      loop.
 174
 175      FORNOW: If we dont know the value of peel_iters for prologue or epilogue
 176      at compile-time - we assume the worst.
 177
 178      TODO: Build an expression that represents peel_iters for prologue and
 179      epilogue to be used in a run-time test.  */
 180
 181   peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
 182
 183   if (peel_iters_prologue < 0)
 184     {
 185       peel_iters_prologue = vf - 1;
 186       if (vect_print_dump_info (REPORT_DETAILS))
 187         fprintf (vect_dump, "cost model: "
 188                  "prologue peel iters set conservatively.");
 189
 190       /* If peeling for alignment is unknown, loop bound of main loop becomes
 191          unknown.  */
 192       peel_iters_epilogue = vf - 1;
 193       if (vect_print_dump_info (REPORT_DETAILS))
 194         fprintf (vect_dump, "cost model: "
 195                  "epilogue peel iters set conservatively because "
 196                  "peeling for alignment is unknown .");
 197     }
 198   else
 199     {
 200       if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
 201         {
 202           peel_iters_epilogue = vf - 1;
 203           if (vect_print_dump_info (REPORT_DETAILS))
 204             fprintf (vect_dump, "cost model: "
 205                      "epilogue peel iters set conservatively because "
 206                      "loop iterations are unknown .");
 207         }
 208       else
 209         peel_iters_epilogue =
 210                      (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue)
 211                      % vf;
 212     }
 213
 214   vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
 215                       + (peel_iters_epilogue * scalar_single_iter_cost);
 216
 217   /* Calculate number of iterations required to make the vector version
 218      profitable, relative to the loop bodies only. The following condition
 219      must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
 220      SIC = scalar iteration cost, VIC = vector iteration cost,
 221      VOC = vector outside cost and VF = vectorization factor.  */
 222
 223   if ((scalar_single_iter_cost * vf) > vec_inside_cost)
 224     {
 225       if (vec_outside_cost == 0)
 226         min_profitable_iters = 1;
 227       else
 228         {
 229           min_profitable_iters = (vec_outside_cost * vf)
 230                                  / ((scalar_single_iter_cost * vf)
 231                                     - vec_inside_cost);
 232
 233           if ((scalar_single_iter_cost * vf * min_profitable_iters)
 234               <= ((vec_inside_cost * min_profitable_iters)
 235                   + (vec_outside_cost * vf)))
 236             min_profitable_iters++;
 237         }
 238     }
 239   /* vector version will never be profitable.  */
 240   else
 241     {
 242       if (vect_print_dump_info (REPORT_DETAILS))
 243         fprintf (vect_dump, "cost model: vector iteration cost = %d "
 244                  "is divisible by scalar iteration cost = %d by a factor "
 245                  "greater than or equal to the vectorization factor = %d .",
 246                  vec_inside_cost, scalar_single_iter_cost, vf);
 247       return -1;
 248     }
 249
 250   if (vect_print_dump_info (REPORT_DETAILS))
 251     {
 252       fprintf (vect_dump, "Cost model analysis: \n");
 253       fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
 254                vec_inside_cost);
 255       fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
 256                vec_outside_cost);
 257       fprintf (vect_dump, "  Scalar cost: %d\n", scalar_single_iter_cost);
 258       fprintf (vect_dump, "  prologue iterations: %d\n",
 259                peel_iters_prologue);
 260       fprintf (vect_dump, "  epilogue iterations: %d\n",
 261                peel_iters_epilogue);
 262       fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
 263                min_profitable_iters);
 264       fprintf (vect_dump, "  Actual minimum iters for profitability: %d\n",
 265                min_profitable_iters < vf ? vf : min_profitable_iters);
 266     }
 267
 268   return min_profitable_iters < vf ? vf : min_profitable_iters;
 269 }
 270
 271
 272 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
 273    functions. Design better to avoid maintenance issues.  */
 274
 275 /* Function vect_model_reduction_cost.
 276
 277    Models cost for a reduction operation, including the vector ops
 278    generated within the strip-mine loop, the initial definition before
 279    the loop, and the epilogue code that must be generated.  */
 280
 281 static void
 282 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
 283                            int ncopies)
 284 {
 285   int outer_cost = 0;
 286   enum tree_code code;
 287   optab optab;
 288   tree vectype;
 289   tree orig_stmt;
 290   tree reduction_op;
 291   enum machine_mode mode;
 292   tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
 293   int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
 294
 295   /* Cost of reduction op inside loop.  */
 296   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
 297
 298   reduction_op = TREE_OPERAND (operation, op_type-1);
 299   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
 300   mode = TYPE_MODE (vectype);
 301   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
 302
 303   if (!orig_stmt)
 304     orig_stmt = STMT_VINFO_STMT (stmt_info);
 305
 306   code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
 307
 308   /* Add in cost for initial definition.  */
 309   outer_cost += TARG_VEC_STMT_COST;
 310
 311   /* Determine cost of epilogue code.
 312
 313      We have a reduction operator that will reduce the vector in one statement.
 314      Also requires scalar extract.  */
 315
 316   if (reduc_code < NUM_TREE_CODES)
 317     outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
 318   else
 319     {
 320       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
 321       tree bitsize =
 322         TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
 323       int element_bitsize = tree_low_cst (bitsize, 1);
 324       int nelements = vec_size_in_bits / element_bitsize;
 325
 326       optab = optab_for_tree_code (code, vectype);
 327
 328       /* We have a whole vector shift available.  */
 329       if (!VECTOR_MODE_P (mode)
 330           || optab->handlers[mode].insn_code == CODE_FOR_nothing)
 331         /* Final reduction via vector shifts and the reduction operator. Also
 332            requires scalar extract.  */
 333         outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
 334       else
 335         /* Use extracts and reduction op for final reduction.  For N elements,
 336            we have N extracts and N-1 reduction ops.  */
 337         outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
 338     }
 339
 340   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
 341
 342   if (vect_print_dump_info (REPORT_DETAILS))
 343     fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
 344              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
 345              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
 346 }
 347
 348
 349 /* Function vect_model_induction_cost.
 350
 351    Models cost for induction operations.  */
 352
 353 static void
 354 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
 355 {
 356   /* loop cost for vec_loop.  */
 357   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
 358   /* prologue cost for vec_init and vec_step.  */
 359   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST;
 360
 361   if (vect_print_dump_info (REPORT_DETAILS))
 362     fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
 363              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
 364              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
 365 }
 366
 367
 368 /* Function vect_model_simple_cost.
 369
 370    Models cost for simple operations, i.e. those that only emit ncopies of a
 371    single op.  Right now, this does not account for multiple insns that could
 372    be generated for the single vector op.  We will handle that shortly.  */
 373
 374 static void
 375 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies)
 376 {
 377   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
 378
 379   if (vect_print_dump_info (REPORT_DETAILS))
 380     fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
 381              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
 382              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
 383 }
 384
 385
 386 /* Function vect_cost_strided_group_size
 387
 388    For strided load or store, return the group_size only if it is the first
 389    load or store of a group, else return 1.  This ensures that group size is
 390    only returned once per group.  */
 391
 392 static int
 393 vect_cost_strided_group_size (stmt_vec_info stmt_info)
 394 {
 395   tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
 396
 397   if (first_stmt == STMT_VINFO_STMT (stmt_info))
 398     return DR_GROUP_SIZE (stmt_info);
 399
 400   return 1;
 401 }
 402
 403
 404 /* Function vect_model_store_cost
 405
 406    Models cost for stores.  In the case of strided accesses, one access
 407    has the overhead of the strided access attributed to it.  */
 408
 409 static void
 410 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies)
 411 {
 412   int cost = 0;
 413   int group_size;
 414
 415   /* Strided access?  */
 416   if (DR_GROUP_FIRST_DR (stmt_info))
 417     group_size = vect_cost_strided_group_size (stmt_info);
 418   /* Not a strided access.  */
 419   else
 420     group_size = 1;
 421
 422   /* Is this an access in a group of stores, which provide strided access?
 423      If so, add in the cost of the permutes.  */
 424   if (group_size > 1)
 425     {
 426       /* Uses a high and low interleave operation for each needed permute.  */
 427       cost = ncopies * exact_log2(group_size) * group_size
 428              * TARG_VEC_STMT_COST;
 429
 430       if (vect_print_dump_info (REPORT_DETAILS))
 431         fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
 432                  group_size);
 433
 434     }
 435
 436   /* Costs of the stores.  */
 437   cost += ncopies * TARG_VEC_STORE_COST;
 438
 439   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
 440
 441   if (vect_print_dump_info (REPORT_DETAILS))
 442     fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
 443              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
 444              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
 445 }
 446
 447
 448 /* Function vect_model_load_cost
 449
 450    Models cost for loads.  In the case of strided accesses, the last access
 451    has the overhead of the strided access attributed to it.  Since unaligned
 452    accesses are supported for loads, we also account for the costs of the
 453    access scheme chosen.  */
 454
 455 static void
 456 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
 457
 458 {
 459   int inner_cost = 0;
 460   int group_size;
 461   int alignment_support_cheme;
 462   tree first_stmt;
 463   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
 464
 465   /* Strided accesses?  */
 466   first_stmt = DR_GROUP_FIRST_DR (stmt_info);
 467   if (first_stmt)
 468     {
 469       group_size = vect_cost_strided_group_size (stmt_info);
 470       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
 471     }
 472   /* Not a strided access.  */
 473   else
 474     {
 475       group_size = 1;
 476       first_dr = dr;
 477     }
 478
 479   alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
 480
 481   /* Is this an access in a group of loads providing strided access?
 482      If so, add in the cost of the permutes.  */
 483   if (group_size > 1)
 484     {
 485       /* Uses an even and odd extract operations for each needed permute.  */
 486       inner_cost = ncopies * exact_log2(group_size) * group_size
 487                    * TARG_VEC_STMT_COST;
 488
 489       if (vect_print_dump_info (REPORT_DETAILS))
 490         fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
 491                  group_size);
 492
 493     }
 494
 495   /* The loads themselves.  */
 496   switch (alignment_support_cheme)
 497     {
 498     case dr_aligned:
 499       {
 500         inner_cost += ncopies * TARG_VEC_LOAD_COST;
 501
 502         if (vect_print_dump_info (REPORT_DETAILS))
 503           fprintf (vect_dump, "vect_model_load_cost: aligned.");
 504
 505         break;
 506       }
 507     case dr_unaligned_supported:
 508       {
 509         /* Here, we assign an additional cost for the unaligned load.  */
 510         inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
 511
 512         if (vect_print_dump_info (REPORT_DETAILS))
 513           fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
 514                    "hardware.");
 515
 516         break;
 517       }
 518     case dr_unaligned_software_pipeline:
 519       {
 520         int outer_cost = 0;
 521
 522         if (vect_print_dump_info (REPORT_DETAILS))
 523           fprintf (vect_dump, "vect_model_load_cost: unaligned software "
 524                    "pipelined.");
 525
 526         /* Unaligned software pipeline has a load of an address, an initial
 527            load, and possibly a mask operation to "prime" the loop. However,
 528            if this is an access in a group of loads, which provide strided
 529            access, then the above cost should only be considered for one
 530            access in the group. Inside the loop, there is a load op
 531            and a realignment op.  */
 532
 533         if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
 534           {
 535             outer_cost = 2*TARG_VEC_STMT_COST;
 536             if (targetm.vectorize.builtin_mask_for_load)
 537               outer_cost += TARG_VEC_STMT_COST;
 538           }
 539
 540         STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
 541
 542         inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
 543
 544         break;
 545       }
 546
 547     default:
 548       gcc_unreachable ();
 549     }
 550
 551   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
 552
 553   if (vect_print_dump_info (REPORT_DETAILS))
 554     fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
 555              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
 556              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
 557
 558 }
 559
 560
 561 /* Function vect_get_new_vect_var.
 562
 563    Returns a name for a new variable. The current naming scheme appends the
 564    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
 565    the name of vectorizer generated variables, and appends that to NAME if
 566    provided.  */
 567
 568 static tree
 569 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
 570 {
 571   const char *prefix;
 572   tree new_vect_var;
 573
 574   switch (var_kind)
 575   {
 576   case vect_simple_var:
 577     prefix = "vect_";
 578     break;
 579   case vect_scalar_var:
 580     prefix = "stmp_";
 581     break;
 582   case vect_pointer_var:
 583     prefix = "vect_p";
 584     break;
 585   default:
 586     gcc_unreachable ();
 587   }
 588
 589   if (name)
 590     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
 591   else
 592     new_vect_var = create_tmp_var (type, prefix);
 593
 594   /* Mark vector typed variable as a gimple register variable.  */
 595   if (TREE_CODE (type) == VECTOR_TYPE)
 596     DECL_GIMPLE_REG_P (new_vect_var) = true;
 597
 598   return new_vect_var;
 599 }
 600
 601
 602 /* Function vect_create_addr_base_for_vector_ref.
 603
 604    Create an expression that computes the address of the first memory location
 605    that will be accessed for a data reference.
 606
 607    Input:
 608    STMT: The statement containing the data reference.
 609    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
 610    OFFSET: Optional. If supplied, it is be added to the initial address.
 611
 612    Output:
 613    1. Return an SSA_NAME whose value is the address of the memory location of
 614       the first vector of the data reference.
 615    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
 616       these statement(s) which define the returned SSA_NAME.
 617
 618    FORNOW: We are only handling array accesses with step 1.  */
 619
 620 static tree
 621 vect_create_addr_base_for_vector_ref (tree stmt,
 622                                       tree *new_stmt_list,
 623                                       tree offset)
 624 {
 625   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 626   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
 627   tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
 628   tree base_name = build_fold_indirect_ref (data_ref_base);
 629   tree vec_stmt;
 630   tree addr_base, addr_expr;
 631   tree dest, new_stmt;
 632   tree base_offset = unshare_expr (DR_OFFSET (dr));
 633   tree init = unshare_expr (DR_INIT (dr));
 634   tree vect_ptr_type, addr_expr2;
 635
 636   /* Create base_offset */
 637   base_offset = size_binop (PLUS_EXPR, base_offset, init);
 638   base_offset = fold_convert (sizetype, base_offset);
 639   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
 640   add_referenced_var (dest);
 641   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
 642   append_to_statement_list_force (new_stmt, new_stmt_list);
 643
 644   if (offset)
 645     {
 646       tree tmp = create_tmp_var (sizetype, "offset");
 647       tree step;
 648
 649       /* For interleaved access step we divide STEP by the size of the
 650         interleaving group.  */
 651       if (DR_GROUP_SIZE (stmt_info))
 652         step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
 653                             build_int_cst (TREE_TYPE (offset),
 654                                            DR_GROUP_SIZE (stmt_info)));
 655       else
 656         step = DR_STEP (dr);
 657
 658       add_referenced_var (tmp);
 659       offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
 660       base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
 661                                  base_offset, offset);
 662       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
 663       append_to_statement_list_force (new_stmt, new_stmt_list);
 664     }
 665
 666   /* base + base_offset */
 667   addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
 668                            base_offset);
 669
 670   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
 671
 672   /* addr_expr = addr_base */
 673   addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
 674                                      get_name (base_name));
 675   add_referenced_var (addr_expr);
 676   vec_stmt = fold_convert (vect_ptr_type, addr_base);
 677   addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
 678                                      get_name (base_name));
 679   add_referenced_var (addr_expr2);
 680   vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
 681   append_to_statement_list_force (new_stmt, new_stmt_list);
 682
 683   if (vect_print_dump_info (REPORT_DETAILS))
 684     {
 685       fprintf (vect_dump, "created ");
 686       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
 687     }
 688   return vec_stmt;
 689 }
 690
 691
 692 /* Function vect_create_data_ref_ptr.
 693
 694    Create a new pointer to vector type (vp), that points to the first location
 695    accessed in the loop by STMT, along with the def-use update chain to
 696    appropriately advance the pointer through the loop iterations. Also set
 697    aliasing information for the pointer.  This vector pointer is used by the
 698    callers to this function to create a memory reference expression for vector
 699    load/store access.
 700
 701    Input:
 702    1. STMT: a stmt that references memory. Expected to be of the form
 703          GIMPLE_MODIFY_STMT <name, data-ref> or
 704          GIMPLE_MODIFY_STMT <data-ref, name>.
 705    2. BSI: block_stmt_iterator where new stmts can be added.
 706    3. OFFSET (optional): an offset to be added to the initial address accessed
 707         by the data-ref in STMT.
 708    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
 709         pointing to the initial address.
 710    5. TYPE: if not NULL indicates the required type of the data-ref
 711
 712    Output:
 713    1. Declare a new ptr to vector_type, and have it point to the base of the
 714       data reference (initial addressed accessed by the data reference).
 715       For example, for vector of type V8HI, the following code is generated:
 716
 717       v8hi *vp;
 718       vp = (v8hi *)initial_address;
 719
 720       if OFFSET is not supplied:
 721          initial_address = &a[init];
 722       if OFFSET is supplied:
 723          initial_address = &a[init + OFFSET];
 724
 725       Return the initial_address in INITIAL_ADDRESS.
 726
 727    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
 728       update the pointer in each iteration of the loop.
 729
 730       Return the increment stmt that updates the pointer in PTR_INCR.
 731
 732    3. Return the pointer.  */
 733
 734 static tree
 735 vect_create_data_ref_ptr (tree stmt,
 736                           block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
 737                           tree offset, tree *initial_address, tree *ptr_incr,
 738                           bool only_init, tree type)
 739 {
 740   tree base_name;
 741   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 742   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
 743   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 744   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 745   tree vect_ptr_type;
 746   tree vect_ptr;
 747   tree tag;
 748   tree new_temp;
 749   tree vec_stmt;
 750   tree new_stmt_list = NULL_TREE;
 751   edge pe = loop_preheader_edge (loop);
 752   basic_block new_bb;
 753   tree vect_ptr_init;
 754   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
 755
 756   base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
 757
 758   if (vect_print_dump_info (REPORT_DETAILS))
 759     {
 760       tree data_ref_base = base_name;
 761       fprintf (vect_dump, "create vector-pointer variable to type: ");
 762       print_generic_expr (vect_dump, vectype, TDF_SLIM);
 763       if (TREE_CODE (data_ref_base) == VAR_DECL)
 764         fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
 765       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
 766         fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
 767       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
 768         fprintf (vect_dump, "  vectorizing a record based array ref: ");
 769       else if (TREE_CODE (data_ref_base) == SSA_NAME)
 770         fprintf (vect_dump, "  vectorizing a pointer ref: ");
 771       print_generic_expr (vect_dump, base_name, TDF_SLIM);
 772     }
 773
 774   /** (1) Create the new vector-pointer variable:  **/
 775   if (type)
 776     vect_ptr_type = build_pointer_type (type);
 777   else
 778     vect_ptr_type = build_pointer_type (vectype);
 779   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
 780                                     get_name (base_name));
 781   add_referenced_var (vect_ptr);
 782
 783   /** (2) Add aliasing information to the new vector-pointer:
 784           (The points-to info (DR_PTR_INFO) may be defined later.)  **/
 785
 786   tag = DR_SYMBOL_TAG (dr);
 787   gcc_assert (tag);
 788
 789   /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
 790      tag must be created with tag added to its may alias list.  */
 791   if (!MTAG_P (tag))
 792     new_type_alias (vect_ptr, tag, DR_REF (dr));
 793   else
 794     set_symbol_mem_tag (vect_ptr, tag);
 795
 796   var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
 797
 798   /** (3) Calculate the initial address the vector-pointer, and set
 799           the vector-pointer to point to it before the loop:  **/
 800
 801   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
 802   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
 803                                                    offset);
 804   pe = loop_preheader_edge (loop);
 805   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
 806   gcc_assert (!new_bb);
 807   *initial_address = new_temp;
 808
 809   /* Create: p = (vectype *) initial_base  */
 810   vec_stmt = fold_convert (vect_ptr_type, new_temp);
 811   vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
 812   vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
 813   GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
 814   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
 815   gcc_assert (!new_bb);
 816
 817
 818   /** (4) Handle the updating of the vector-pointer inside the loop: **/
 819
 820   if (only_init) /* No update in loop is required.  */
 821     {
 822       /* Copy the points-to information if it exists. */
 823       if (DR_PTR_INFO (dr))
 824         duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
 825       return vect_ptr_init;
 826     }
 827   else
 828     {
 829       block_stmt_iterator incr_bsi;
 830       bool insert_after;
 831       tree indx_before_incr, indx_after_incr;
 832       tree incr;
 833
 834       standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 835       create_iv (vect_ptr_init,
 836                  fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
 837                  NULL_TREE, loop, &incr_bsi, insert_after,
 838                  &indx_before_incr, &indx_after_incr);
 839       incr = bsi_stmt (incr_bsi);
 840       set_stmt_info (stmt_ann (incr),
 841                      new_stmt_vec_info (incr, loop_vinfo));
 842
 843       /* Copy the points-to information if it exists. */
 844       if (DR_PTR_INFO (dr))
 845         {
 846           duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
 847           duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
 848         }
 849       merge_alias_info (vect_ptr_init, indx_before_incr);
 850       merge_alias_info (vect_ptr_init, indx_after_incr);
 851       if (ptr_incr)
 852         *ptr_incr = incr;
 853
 854       return indx_before_incr;
 855     }
 856 }
 857
 858
 859 /* Function bump_vector_ptr
 860
 861    Increment a pointer (to a vector type) by vector-size. Connect the new
 862    increment stmt to the existing def-use update-chain of the pointer.
 863
 864    The pointer def-use update-chain before this function:
 865                         DATAREF_PTR = phi (p_0, p_2)
 866                         ....
 867         PTR_INCR:       p_2 = DATAREF_PTR + step
 868
 869    The pointer def-use update-chain after this function:
 870                         DATAREF_PTR = phi (p_0, p_2)
 871                         ....
 872                         NEW_DATAREF_PTR = DATAREF_PTR + vector_size
 873                         ....
 874         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
 875
 876    Input:
 877    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
 878                  in the loop.
 879    PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
 880               The increment amount across iterations is also expected to be
 881               vector_size.
 882    BSI - location where the new update stmt is to be placed.
 883    STMT - the original scalar memory-access stmt that is being vectorized.
 884
 885    Output: Return NEW_DATAREF_PTR as illustrated above.
 886
 887 */
 888
 889 static tree
 890 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
 891                  tree stmt)
 892 {
 893   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 894   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
 895   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 896   tree vptr_type = TREE_TYPE (dataref_ptr);
 897   tree ptr_var = SSA_NAME_VAR (dataref_ptr);
 898   tree update = TYPE_SIZE_UNIT (vectype);
 899   tree incr_stmt;
 900   ssa_op_iter iter;
 901   use_operand_p use_p;
 902   tree new_dataref_ptr;
 903
 904   incr_stmt = build_gimple_modify_stmt (ptr_var,
 905                                         build2 (POINTER_PLUS_EXPR, vptr_type,
 906                                                 dataref_ptr, update));
 907   new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
 908   GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
 909   vect_finish_stmt_generation (stmt, incr_stmt, bsi);
 910
 911   /* Update the vector-pointer's cross-iteration increment.  */
 912   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
 913     {
 914       tree use = USE_FROM_PTR (use_p);
 915
 916       if (use == dataref_ptr)
 917         SET_USE (use_p, new_dataref_ptr);
 918       else
 919         gcc_assert (tree_int_cst_compare (use, update) == 0);
 920     }
 921
 922   /* Copy the points-to information if it exists. */
 923   if (DR_PTR_INFO (dr))
 924     duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
 925   merge_alias_info (new_dataref_ptr, dataref_ptr);
 926
 927   return new_dataref_ptr;
 928 }
 929
 930
 931 /* Function vect_create_destination_var.
 932
 933    Create a new temporary of type VECTYPE.  */
 934
 935 static tree
 936 vect_create_destination_var (tree scalar_dest, tree vectype)
 937 {
 938   tree vec_dest;
 939   const char *new_name;
 940   tree type;
 941   enum vect_var_kind kind;
 942
 943   kind = vectype ? vect_simple_var : vect_scalar_var;
 944   type = vectype ? vectype : TREE_TYPE (scalar_dest);
 945
 946   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
 947
 948   new_name = get_name (scalar_dest);
 949   if (!new_name)
 950     new_name = "var_";
 951   vec_dest = vect_get_new_vect_var (type, kind, new_name);
 952   add_referenced_var (vec_dest);
 953
 954   return vec_dest;
 955 }
 956
 957
 958 /* Function vect_init_vector.
 959
 960    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
 961    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
 962    used in the vectorization of STMT.  */
 963
 964 static tree
 965 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
 966 {
 967   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
 968   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
 969   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 970   tree new_var;
 971   tree init_stmt;
 972   tree vec_oprnd;
 973   edge pe;
 974   tree new_temp;
 975   basic_block new_bb;
 976
 977   new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
 978   add_referenced_var (new_var);
 979
 980   init_stmt = build_gimple_modify_stmt (new_var, vector_var);
 981   new_temp = make_ssa_name (new_var, init_stmt);
 982   GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
 983
 984   pe = loop_preheader_edge (loop);
 985   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
 986   gcc_assert (!new_bb);
 987
 988   if (vect_print_dump_info (REPORT_DETAILS))
 989     {
 990       fprintf (vect_dump, "created new init_stmt: ");
 991       print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
 992     }
 993
 994   vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
 995   return vec_oprnd;
 996 }
 997
 998
 999 /* Function get_initial_def_for_induction
1000
1001    Input:
1002    IV_PHI - the initial value of the induction variable
1003
1004    Output:
1005    Return a vector variable, initialized with the first VF values of
1006    the induction variable. E.g., for an iv with IV_PHI='X' and
1007    evolution S, for a vector of 4 units, we want to return:
1008    [X, X + S, X + 2*S, X + 3*S].  */
1009
1010 static tree
1011 get_initial_def_for_induction (tree iv_phi)
1012 {
1013   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1014   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1015   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1016   tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1017   tree vectype = get_vectype_for_scalar_type (scalar_type);
1018   int nunits =  TYPE_VECTOR_SUBPARTS (vectype);
1019   edge pe = loop_preheader_edge (loop);
1020   basic_block new_bb;
1021   block_stmt_iterator bsi;
1022   tree vec, vec_init, vec_step, t;
1023   tree access_fn;
1024   tree new_var;
1025   tree new_name;
1026   tree init_stmt;
1027   tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1028   tree init_expr, step_expr;
1029   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1030   int i;
1031   bool ok;
1032   int ncopies = vf / nunits;
1033   tree expr;
1034   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1035   tree stmts;
1036   tree stmt = NULL_TREE;
1037   block_stmt_iterator si;
1038   basic_block bb = bb_for_stmt (iv_phi);
1039
1040   gcc_assert (phi_info);
1041   gcc_assert (ncopies >= 1);
1042
1043   /* Find the first insertion point in the BB.  */
1044   si = bsi_after_labels (bb);
1045   stmt = bsi_stmt (si);
1046
1047   access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1048   gcc_assert (access_fn);
1049   ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1050                                     &init_expr, &step_expr);
1051   gcc_assert (ok);
1052
1053   /* Create the vector that holds the initial_value of the induction.  */
1054   new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1055   add_referenced_var (new_var);
1056
1057   new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1058   if (stmts)
1059     {
1060       new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1061       gcc_assert (!new_bb);
1062     }
1063
1064   t = NULL_TREE;
1065   t = tree_cons (NULL_TREE, new_name, t);
1066   for (i = 1; i < nunits; i++)
1067     {
1068       tree tmp;
1069
1070       /* Create: new_name = new_name + step_expr  */
1071       tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1072       init_stmt = build_gimple_modify_stmt (new_var, tmp);
1073       new_name = make_ssa_name (new_var, init_stmt);
1074       GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1075
1076       new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1077       gcc_assert (!new_bb);
1078
1079       if (vect_print_dump_info (REPORT_DETAILS))
1080         {
1081           fprintf (vect_dump, "created new init_stmt: ");
1082           print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1083         }
1084       t = tree_cons (NULL_TREE, new_name, t);
1085     }
1086   vec = build_constructor_from_list (vectype, nreverse (t));
1087   vec_init = vect_init_vector (stmt, vec, vectype);
1088
1089
1090   /* Create the vector that holds the step of the induction.  */
1091   expr = build_int_cst (scalar_type, vf);
1092   new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1093   t = NULL_TREE;
1094   for (i = 0; i < nunits; i++)
1095     t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1096   vec = build_constructor_from_list (vectype, t);
1097   vec_step = vect_init_vector (stmt, vec, vectype);
1098
1099
1100   /* Create the following def-use cycle:
1101      loop prolog:
1102          vec_init = [X, X+S, X+2*S, X+3*S]
1103          vec_step = [VF*S, VF*S, VF*S, VF*S]
1104      loop:
1105          vec_iv = PHI <vec_init, vec_loop>
1106          ...
1107          STMT
1108          ...
1109          vec_loop = vec_iv + vec_step;  */
1110
1111   /* Create the induction-phi that defines the induction-operand.  */
1112   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1113   add_referenced_var (vec_dest);
1114   induction_phi = create_phi_node (vec_dest, loop->header);
1115   set_stmt_info (get_stmt_ann (induction_phi),
1116                  new_stmt_vec_info (induction_phi, loop_vinfo));
1117   induc_def = PHI_RESULT (induction_phi);
1118
1119   /* Create the iv update inside the loop  */
1120   new_stmt = build_gimple_modify_stmt (NULL_TREE,
1121                                        build2 (PLUS_EXPR, vectype,
1122                                                induc_def, vec_step));
1123   vec_def = make_ssa_name (vec_dest, new_stmt);
1124   GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1125   bsi = bsi_for_stmt (stmt);
1126   vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1127
1128   /* Set the arguments of the phi node:  */
1129   add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1130   add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1131
1132
1133   /* In case the vectorization factor (VF) is bigger than the number
1134      of elements that we can fit in a vectype (nunits), we have to generate
1135      more than one vector stmt - i.e - we need to "unroll" the
1136      vector stmt by a factor VF/nunits.  For more details see documentation
1137      in vectorizable_operation.  */
1138
1139   if (ncopies > 1)
1140     {
1141       stmt_vec_info prev_stmt_vinfo;
1142
1143       /* Create the vector that holds the step of the induction.  */
1144       expr = build_int_cst (scalar_type, nunits);
1145       new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1146       t = NULL_TREE;
1147       for (i = 0; i < nunits; i++)
1148         t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1149       vec = build_constructor_from_list (vectype, t);
1150       vec_step = vect_init_vector (stmt, vec, vectype);
1151
1152       vec_def = induc_def;
1153       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1154       for (i = 1; i < ncopies; i++)
1155         {
1156           tree tmp;
1157
1158           /* vec_i = vec_prev + vec_{step*nunits}  */
1159           tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1160           new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1161           vec_def = make_ssa_name (vec_dest, new_stmt);
1162           GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1163           bsi = bsi_for_stmt (stmt);
1164           vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1165
1166           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1167           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1168         }
1169     }
1170
1171   if (vect_print_dump_info (REPORT_DETAILS))
1172     {
1173       fprintf (vect_dump, "transform induction: created def-use cycle:");
1174       print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1175       fprintf (vect_dump, "\n");
1176       print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1177     }
1178
1179   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1180   return induc_def;
1181 }
1182
1183
1184 /* Function vect_get_vec_def_for_operand.
1185
1186    OP is an operand in STMT. This function returns a (vector) def that will be
1187    used in the vectorized stmt for STMT.
1188
1189    In the case that OP is an SSA_NAME which is defined in the loop, then
1190    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1191
1192    In case OP is an invariant or constant, a new stmt that creates a vector def
1193    needs to be introduced.  */
1194
1195 static tree
1196 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1197 {
1198   tree vec_oprnd;
1199   tree vec_stmt;
1200   tree def_stmt;
1201   stmt_vec_info def_stmt_info = NULL;
1202   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1203   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1204   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1205   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1206   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1207   tree vec_inv;
1208   tree vec_cst;
1209   tree t = NULL_TREE;
1210   tree def;
1211   int i;
1212   enum vect_def_type dt;
1213   bool is_simple_use;
1214   tree vector_type;
1215
1216   if (vect_print_dump_info (REPORT_DETAILS))
1217     {
1218       fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1219       print_generic_expr (vect_dump, op, TDF_SLIM);
1220     }
1221
1222   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1223   gcc_assert (is_simple_use);
1224   if (vect_print_dump_info (REPORT_DETAILS))
1225     {
1226       if (def)
1227         {
1228           fprintf (vect_dump, "def =  ");
1229           print_generic_expr (vect_dump, def, TDF_SLIM);
1230         }
1231       if (def_stmt)
1232         {
1233           fprintf (vect_dump, "  def_stmt =  ");
1234           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1235         }
1236     }
1237
1238   switch (dt)
1239     {
1240     /* Case 1: operand is a constant.  */
1241     case vect_constant_def:
1242       {
1243         if (scalar_def)
1244           *scalar_def = op;
1245
1246         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1247         if (vect_print_dump_info (REPORT_DETAILS))
1248           fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1249
1250         for (i = nunits - 1; i >= 0; --i)
1251           {
1252             t = tree_cons (NULL_TREE, op, t);
1253           }
1254         vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1255         vec_cst = build_vector (vector_type, t);
1256
1257         return vect_init_vector (stmt, vec_cst, vector_type);
1258       }
1259
1260     /* Case 2: operand is defined outside the loop - loop invariant.  */
1261     case vect_invariant_def:
1262       {
1263         if (scalar_def)
1264           *scalar_def = def;
1265
1266         /* Create 'vec_inv = {inv,inv,..,inv}'  */
1267         if (vect_print_dump_info (REPORT_DETAILS))
1268           fprintf (vect_dump, "Create vector_inv.");
1269
1270         for (i = nunits - 1; i >= 0; --i)
1271           {
1272             t = tree_cons (NULL_TREE, def, t);
1273           }
1274
1275         /* FIXME: use build_constructor directly.  */
1276         vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1277         vec_inv = build_constructor_from_list (vector_type, t);
1278         return vect_init_vector (stmt, vec_inv, vector_type);
1279       }
1280
1281     /* Case 3: operand is defined inside the loop.  */
1282     case vect_loop_def:
1283       {
1284         if (scalar_def)
1285           *scalar_def = def_stmt;
1286
1287         /* Get the def from the vectorized stmt.  */
1288         def_stmt_info = vinfo_for_stmt (def_stmt);
1289         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1290         gcc_assert (vec_stmt);
1291         vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1292         return vec_oprnd;
1293       }
1294
1295     /* Case 4: operand is defined by a loop header phi - reduction  */
1296     case vect_reduction_def:
1297       {
1298         gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1299
1300         /* Get the def before the loop  */
1301         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1302         return get_initial_def_for_reduction (stmt, op, scalar_def);
1303      }
1304
1305     /* Case 5: operand is defined by loop-header phi - induction.  */
1306     case vect_induction_def:
1307       {
1308         gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1309
1310         /* Get the def before the loop  */
1311         return get_initial_def_for_induction (def_stmt);
1312       }
1313
1314     default:
1315       gcc_unreachable ();
1316     }
1317 }
1318
1319
1320 /* Function vect_get_vec_def_for_stmt_copy
1321
1322    Return a vector-def for an operand. This function is used when the
1323    vectorized stmt to be created (by the caller to this function) is a "copy"
1324    created in case the vectorized result cannot fit in one vector, and several
1325    copies of the vector-stmt are required. In this case the vector-def is
1326    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1327    of the stmt that defines VEC_OPRND.
1328    DT is the type of the vector def VEC_OPRND.
1329
1330    Context:
1331         In case the vectorization factor (VF) is bigger than the number
1332    of elements that can fit in a vectype (nunits), we have to generate
1333    more than one vector stmt to vectorize the scalar stmt. This situation
1334    arises when there are multiple data-types operated upon in the loop; the
1335    smallest data-type determines the VF, and as a result, when vectorizing
1336    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1337    vector stmt (each computing a vector of 'nunits' results, and together
1338    computing 'VF' results in each iteration).  This function is called when
1339    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1340    which VF=16 and nunits=4, so the number of copies required is 4):
1341
1342    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1343
1344    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1345                         VS1.1:  vx.1 = memref1      VS1.2
1346                         VS1.2:  vx.2 = memref2      VS1.3
1347                         VS1.3:  vx.3 = memref3
1348
1349    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1350                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1351                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1352                         VSnew.3:  vz3 = vx.3 + ...
1353
1354    The vectorization of S1 is explained in vectorizable_load.
1355    The vectorization of S2:
1356         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1357    the function 'vect_get_vec_def_for_operand' is called to
1358    get the relevant vector-def for each operand of S2. For operand x it
1359    returns  the vector-def 'vx.0'.
1360
1361         To create the remaining copies of the vector-stmt (VSnew.j), this
1362    function is called to get the relevant vector-def for each operand.  It is
1363    obtained from the respective VS1.j stmt, which is recorded in the
1364    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1365
1366         For example, to obtain the vector-def 'vx.1' in order to create the
1367    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1368    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1369    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1370    and return its def ('vx.1').
1371    Overall, to create the above sequence this function will be called 3 times:
1372         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1373         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1374         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1375
1376 static tree
1377 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1378 {
1379   tree vec_stmt_for_operand;
1380   stmt_vec_info def_stmt_info;
1381
1382   /* Do nothing; can reuse same def.  */
1383   if (dt == vect_invariant_def || dt == vect_constant_def )
1384     return vec_oprnd;
1385
1386   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1387   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1388   gcc_assert (def_stmt_info);
1389   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1390   gcc_assert (vec_stmt_for_operand);
1391   vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1392
1393   return vec_oprnd;
1394 }
1395
1396
1397 /* Function vect_finish_stmt_generation.
1398
1399    Insert a new stmt.  */
1400
1401 static void
1402 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1403                              block_stmt_iterator *bsi)
1404 {
1405   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1406   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1407
1408   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1409   set_stmt_info (get_stmt_ann (vec_stmt),
1410                  new_stmt_vec_info (vec_stmt, loop_vinfo));
1411
1412   if (vect_print_dump_info (REPORT_DETAILS))
1413     {
1414       fprintf (vect_dump, "add new stmt: ");
1415       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1416     }
1417
1418   /* Make sure bsi points to the stmt that is being vectorized.  */
1419   gcc_assert (stmt == bsi_stmt (*bsi));
1420
1421 #ifdef USE_MAPPED_LOCATION
1422   SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1423 #else
1424   SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1425 #endif
1426 }
1427
1428
1429 /* Function get_initial_def_for_reduction
1430
1431    Input:
1432    STMT - a stmt that performs a reduction operation in the loop.
1433    INIT_VAL - the initial value of the reduction variable
1434
1435    Output:
1436    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1437         of the reduction (used for adjusting the epilog - see below).
1438    Return a vector variable, initialized according to the operation that STMT
1439         performs. This vector will be used as the initial value of the
1440         vector of partial results.
1441
1442    Option1 (adjust in epilog): Initialize the vector as follows:
1443      add:         [0,0,...,0,0]
1444      mult:        [1,1,...,1,1]
1445      min/max:     [init_val,init_val,..,init_val,init_val]
1446      bit and/or:  [init_val,init_val,..,init_val,init_val]
1447    and when necessary (e.g. add/mult case) let the caller know
1448    that it needs to adjust the result by init_val.
1449
1450    Option2: Initialize the vector as follows:
1451      add:         [0,0,...,0,init_val]
1452      mult:        [1,1,...,1,init_val]
1453      min/max:     [init_val,init_val,...,init_val]
1454      bit and/or:  [init_val,init_val,...,init_val]
1455    and no adjustments are needed.
1456
1457    For example, for the following code:
1458
1459    s = init_val;
1460    for (i=0;i<n;i++)
1461      s = s + a[i];
1462
1463    STMT is 's = s + a[i]', and the reduction variable is 's'.
1464    For a vector of 4 units, we want to return either [0,0,0,init_val],
1465    or [0,0,0,0] and let the caller know that it needs to adjust
1466    the result at the end by 'init_val'.
1467
1468    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1469    initialization vector is simpler (same element in all entries).
1470    A cost model should help decide between these two schemes.  */
1471
1472 static tree
1473 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1474 {
1475   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1476   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1477   int nunits =  TYPE_VECTOR_SUBPARTS (vectype);
1478   enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1479   tree type = TREE_TYPE (init_val);
1480   tree vecdef;
1481   tree def_for_init;
1482   tree init_def;
1483   tree t = NULL_TREE;
1484   int i;
1485   tree vector_type;
1486
1487   gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1488   vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1489
1490   switch (code)
1491   {
1492   case WIDEN_SUM_EXPR:
1493   case DOT_PROD_EXPR:
1494   case PLUS_EXPR:
1495     *adjustment_def = init_val;
1496     /* Create a vector of zeros for init_def.  */
1497     if (INTEGRAL_TYPE_P (type))
1498       def_for_init = build_int_cst (type, 0);
1499     else
1500       def_for_init = build_real (type, dconst0);
1501       for (i = nunits - 1; i >= 0; --i)
1502     t = tree_cons (NULL_TREE, def_for_init, t);
1503     vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1504     init_def = build_vector (vector_type, t);
1505     break;
1506
1507   case MIN_EXPR:
1508   case MAX_EXPR:
1509     *adjustment_def = NULL_TREE;
1510     init_def = vecdef;
1511     break;
1512
1513   default:
1514     gcc_unreachable ();
1515   }
1516
1517   return init_def;
1518 }
1519
1520
1521 /* Function vect_create_epilog_for_reduction
1522
1523    Create code at the loop-epilog to finalize the result of a reduction
1524    computation.
1525
1526    VECT_DEF is a vector of partial results.
1527    REDUC_CODE is the tree-code for the epilog reduction.
1528    STMT is the scalar reduction stmt that is being vectorized.
1529    REDUCTION_PHI is the phi-node that carries the reduction computation.
1530
1531    This function:
1532    1. Creates the reduction def-use cycle: sets the arguments for
1533       REDUCTION_PHI:
1534       The loop-entry argument is the vectorized initial-value of the reduction.
1535       The loop-latch argument is VECT_DEF - the vector of partial sums.
1536    2. "Reduces" the vector of partial results VECT_DEF into a single result,
1537       by applying the operation specified by REDUC_CODE if available, or by
1538       other means (whole-vector shifts or a scalar loop).
1539       The function also creates a new phi node at the loop exit to preserve
1540       loop-closed form, as illustrated below.
1541
1542      The flow at the entry to this function:
1543
1544         loop:
1545           vec_def = phi <null, null>            # REDUCTION_PHI
1546           VECT_DEF = vector_stmt                # vectorized form of STMT
1547           s_loop = scalar_stmt                  # (scalar) STMT
1548         loop_exit:
1549           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
1550           use <s_out0>
1551           use <s_out0>
1552
1553      The above is transformed by this function into:
1554
1555         loop:
1556           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
1557           VECT_DEF = vector_stmt                # vectorized form of STMT
1558           s_loop = scalar_stmt                  # (scalar) STMT
1559         loop_exit:
1560           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
1561           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
1562           v_out2 = reduce <v_out1>
1563           s_out3 = extract_field <v_out2, 0>
1564           s_out4 = adjust_result <s_out3>
1565           use <s_out4>
1566           use <s_out4>
1567 */
1568
1569 static void
1570 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1571                                   enum tree_code reduc_code, tree reduction_phi)
1572 {
1573   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1574   tree vectype;
1575   enum machine_mode mode;
1576   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1577   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1578   basic_block exit_bb;
1579   tree scalar_dest;
1580   tree scalar_type;
1581   tree new_phi;
1582   block_stmt_iterator exit_bsi;
1583   tree vec_dest;
1584   tree new_temp;
1585   tree new_name;
1586   tree epilog_stmt;
1587   tree new_scalar_dest, exit_phi;
1588   tree bitsize, bitpos, bytesize;
1589   enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1590   tree scalar_initial_def;
1591   tree vec_initial_def;
1592   tree orig_name;
1593   imm_use_iterator imm_iter;
1594   use_operand_p use_p;
1595   bool extract_scalar_result;
1596   tree reduction_op;
1597   tree orig_stmt;
1598   tree use_stmt;
1599   tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1600   int op_type;
1601
1602   op_type = TREE_OPERAND_LENGTH (operation);
1603   reduction_op = TREE_OPERAND (operation, op_type-1);
1604   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1605   mode = TYPE_MODE (vectype);
1606
1607   /*** 1. Create the reduction def-use cycle  ***/
1608
1609   /* 1.1 set the loop-entry arg of the reduction-phi:  */
1610   /* For the case of reduction, vect_get_vec_def_for_operand returns
1611      the scalar def before the loop, that defines the initial value
1612      of the reduction variable.  */
1613   vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1614                                                   &scalar_initial_def);
1615   add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1616
1617   /* 1.2 set the loop-latch arg for the reduction-phi:  */
1618   add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1619
1620   if (vect_print_dump_info (REPORT_DETAILS))
1621     {
1622       fprintf (vect_dump, "transform reduction: created def-use cycle:");
1623       print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1624       fprintf (vect_dump, "\n");
1625       print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1626     }
1627
1628
1629   /*** 2. Create epilog code
1630           The reduction epilog code operates across the elements of the vector
1631           of partial results computed by the vectorized loop.
1632           The reduction epilog code consists of:
1633           step 1: compute the scalar result in a vector (v_out2)
1634           step 2: extract the scalar result (s_out3) from the vector (v_out2)
1635           step 3: adjust the scalar result (s_out3) if needed.
1636
1637           Step 1 can be accomplished using one the following three schemes:
1638           (scheme 1) using reduc_code, if available.
1639           (scheme 2) using whole-vector shifts, if available.
1640           (scheme 3) using a scalar loop. In this case steps 1+2 above are
1641                      combined.
1642
1643           The overall epilog code looks like this:
1644
1645           s_out0 = phi <s_loop>         # original EXIT_PHI
1646           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
1647           v_out2 = reduce <v_out1>              # step 1
1648           s_out3 = extract_field <v_out2, 0>    # step 2
1649           s_out4 = adjust_result <s_out3>       # step 3
1650
1651           (step 3 is optional, and step2 1 and 2 may be combined).
1652           Lastly, the uses of s_out0 are replaced by s_out4.
1653
1654           ***/
1655
1656   /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1657         v_out1 = phi <v_loop>  */
1658
1659   exit_bb = single_exit (loop)->dest;
1660   new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1661   SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1662   exit_bsi = bsi_after_labels (exit_bb);
1663
1664   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1665          (i.e. when reduc_code is not available) and in the final adjustment
1666          code (if needed).  Also get the original scalar reduction variable as
1667          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
1668          represents a reduction pattern), the tree-code and scalar-def are
1669          taken from the original stmt that the pattern-stmt (STMT) replaces.
1670          Otherwise (it is a regular reduction) - the tree-code and scalar-def
1671          are taken from STMT.  */
1672
1673   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1674   if (!orig_stmt)
1675     {
1676       /* Regular reduction  */
1677       orig_stmt = stmt;
1678     }
1679   else
1680     {
1681       /* Reduction pattern  */
1682       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1683       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1684       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1685     }
1686   code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1687   scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1688   scalar_type = TREE_TYPE (scalar_dest);
1689   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1690   bitsize = TYPE_SIZE (scalar_type);
1691   bytesize = TYPE_SIZE_UNIT (scalar_type);
1692
1693   /* 2.3 Create the reduction code, using one of the three schemes described
1694          above.  */
1695
1696   if (reduc_code < NUM_TREE_CODES)
1697     {
1698       tree tmp;
1699
1700       /*** Case 1:  Create:
1701            v_out2 = reduc_expr <v_out1>  */
1702
1703       if (vect_print_dump_info (REPORT_DETAILS))
1704         fprintf (vect_dump, "Reduce using direct vector reduction.");
1705
1706       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1707       tmp = build1 (reduc_code, vectype,  PHI_RESULT (new_phi));
1708       epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1709       new_temp = make_ssa_name (vec_dest, epilog_stmt);
1710       GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1711       bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1712
1713       extract_scalar_result = true;
1714     }
1715   else
1716     {
1717       enum tree_code shift_code = 0;
1718       bool have_whole_vector_shift = true;
1719       int bit_offset;
1720       int element_bitsize = tree_low_cst (bitsize, 1);
1721       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1722       tree vec_temp;
1723
1724       if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1725         shift_code = VEC_RSHIFT_EXPR;
1726       else
1727         have_whole_vector_shift = false;
1728
1729       /* Regardless of whether we have a whole vector shift, if we're
1730          emulating the operation via tree-vect-generic, we don't want
1731          to use it.  Only the first round of the reduction is likely
1732          to still be profitable via emulation.  */
1733       /* ??? It might be better to emit a reduction tree code here, so that
1734          tree-vect-generic can expand the first round via bit tricks.  */
1735       if (!VECTOR_MODE_P (mode))
1736         have_whole_vector_shift = false;
1737       else
1738         {
1739           optab optab = optab_for_tree_code (code, vectype);
1740           if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1741             have_whole_vector_shift = false;
1742         }
1743
1744       if (have_whole_vector_shift)
1745         {
1746           /*** Case 2: Create:
1747              for (offset = VS/2; offset >= element_size; offset/=2)
1748                 {
1749                   Create:  va' = vec_shift <va, offset>
1750                   Create:  va = vop <va, va'>
1751                 }  */
1752
1753           if (vect_print_dump_info (REPORT_DETAILS))
1754             fprintf (vect_dump, "Reduce using vector shifts");
1755
1756           vec_dest = vect_create_destination_var (scalar_dest, vectype);
1757           new_temp = PHI_RESULT (new_phi);
1758
1759           for (bit_offset = vec_size_in_bits/2;
1760                bit_offset >= element_bitsize;
1761                bit_offset /= 2)
1762             {
1763               tree bitpos = size_int (bit_offset);
1764               tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1765               epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1766               new_name = make_ssa_name (vec_dest, epilog_stmt);
1767               GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1768               bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1769
1770               tmp = build2 (code, vectype, new_name, new_temp);
1771               epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1772               new_temp = make_ssa_name (vec_dest, epilog_stmt);
1773               GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1774               bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1775             }
1776
1777           extract_scalar_result = true;
1778         }
1779       else
1780         {
1781           tree rhs;
1782
1783           /*** Case 3: Create:
1784              s = extract_field <v_out2, 0>
1785              for (offset = element_size;
1786                   offset < vector_size;
1787                   offset += element_size;)
1788                {
1789                  Create:  s' = extract_field <v_out2, offset>
1790                  Create:  s = op <s, s'>
1791                }  */
1792
1793           if (vect_print_dump_info (REPORT_DETAILS))
1794             fprintf (vect_dump, "Reduce using scalar code. ");
1795
1796           vec_temp = PHI_RESULT (new_phi);
1797           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1798           rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1799                          bitsize_zero_node);
1800           BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1801           epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1802           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1803           GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1804           bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1805
1806           for (bit_offset = element_bitsize;
1807                bit_offset < vec_size_in_bits;
1808                bit_offset += element_bitsize)
1809             {
1810               tree tmp;
1811               tree bitpos = bitsize_int (bit_offset);
1812               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1813                                  bitpos);
1814
1815               BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1816               epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1817               new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1818               GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1819               bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1820
1821               tmp = build2 (code, scalar_type, new_name, new_temp);
1822               epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1823               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1824               GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1825               bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1826             }
1827
1828           extract_scalar_result = false;
1829         }
1830     }
1831
1832   /* 2.4  Extract the final scalar result.  Create:
1833          s_out3 = extract_field <v_out2, bitpos>  */
1834
1835   if (extract_scalar_result)
1836     {
1837       tree rhs;
1838
1839       if (vect_print_dump_info (REPORT_DETAILS))
1840         fprintf (vect_dump, "extract scalar result");
1841
1842       if (BYTES_BIG_ENDIAN)
1843         bitpos = size_binop (MULT_EXPR,
1844                        bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1845                        TYPE_SIZE (scalar_type));
1846       else
1847         bitpos = bitsize_zero_node;
1848
1849       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1850       BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1851       epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1852       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1853       GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1854       bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1855     }
1856
1857   /* 2.4 Adjust the final result by the initial value of the reduction
1858          variable. (When such adjustment is not needed, then
1859          'scalar_initial_def' is zero).
1860
1861          Create:
1862          s_out4 = scalar_expr <s_out3, scalar_initial_def>  */
1863
1864   if (scalar_initial_def)
1865     {
1866       tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1867       epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1868       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1869       GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1870       bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1871     }
1872
1873   /* 2.6 Replace uses of s_out0 with uses of s_out3  */
1874
1875   /* Find the loop-closed-use at the loop exit of the original scalar result.
1876      (The reduction result is expected to have two immediate uses - one at the
1877      latch block, and one at the loop exit).  */
1878   exit_phi = NULL;
1879   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1880     {
1881       if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1882         {
1883           exit_phi = USE_STMT (use_p);
1884           break;
1885         }
1886     }
1887   /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
1888   gcc_assert (exit_phi);
1889   /* Replace the uses:  */
1890   orig_name = PHI_RESULT (exit_phi);
1891   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1892     FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1893       SET_USE (use_p, new_temp);
1894 }
1895
1896
1897 /* Function vectorizable_reduction.
1898
1899    Check if STMT performs a reduction operation that can be vectorized.
1900    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1901    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1902    Return FALSE if not a vectorizable STMT, TRUE otherwise.
1903
1904    This function also handles reduction idioms (patterns) that have been
1905    recognized in advance during vect_pattern_recog. In this case, STMT may be
1906    of this form:
1907      X = pattern_expr (arg0, arg1, ..., X)
1908    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1909    sequence that had been detected and replaced by the pattern-stmt (STMT).
1910
1911    In some cases of reduction patterns, the type of the reduction variable X is
1912    different than the type of the other arguments of STMT.
1913    In such cases, the vectype that is used when transforming STMT into a vector
1914    stmt is different than the vectype that is used to determine the
1915    vectorization factor, because it consists of a different number of elements
1916    than the actual number of elements that are being operated upon in parallel.
1917
1918    For example, consider an accumulation of shorts into an int accumulator.
1919    On some targets it's possible to vectorize this pattern operating on 8
1920    shorts at a time (hence, the vectype for purposes of determining the
1921    vectorization factor should be V8HI); on the other hand, the vectype that
1922    is used to create the vector form is actually V4SI (the type of the result).
1923
1924    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1925    indicates what is the actual level of parallelism (V8HI in the example), so
1926    that the right vectorization factor would be derived. This vectype
1927    corresponds to the type of arguments to the reduction stmt, and should *NOT*
1928    be used to create the vectorized stmt. The right vectype for the vectorized
1929    stmt is obtained from the type of the result X:
1930         get_vectype_for_scalar_type (TREE_TYPE (X))
1931
1932    This means that, contrary to "regular" reductions (or "regular" stmts in
1933    general), the following equation:
1934       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1935    does *NOT* necessarily hold for reduction patterns.  */
1936
1937 bool
1938 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1939 {
1940   tree vec_dest;
1941   tree scalar_dest;
1942   tree op;
1943   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1944   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1946   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1947   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1948   tree operation;
1949   enum tree_code code, orig_code, epilog_reduc_code = 0;
1950   enum machine_mode vec_mode;
1951   int op_type;
1952   optab optab, reduc_optab;
1953   tree new_temp = NULL_TREE;
1954   tree def, def_stmt;
1955   enum vect_def_type dt;
1956   tree new_phi;
1957   tree scalar_type;
1958   bool is_simple_use;
1959   tree orig_stmt;
1960   stmt_vec_info orig_stmt_info;
1961   tree expr = NULL_TREE;
1962   int i;
1963   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1965   stmt_vec_info prev_stmt_info;
1966   tree reduc_def;
1967   tree new_stmt = NULL_TREE;
1968   int j;
1969
1970   gcc_assert (ncopies >= 1);
1971
1972   /* 1. Is vectorizable reduction?  */
1973
1974   /* Not supportable if the reduction variable is used in the loop.  */
1975   if (STMT_VINFO_RELEVANT_P (stmt_info))
1976     return false;
1977
1978   if (!STMT_VINFO_LIVE_P (stmt_info))
1979     return false;
1980
1981   /* Make sure it was already recognized as a reduction computation.  */
1982   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1983     return false;
1984
1985   /* 2. Has this been recognized as a reduction pattern?
1986
1987      Check if STMT represents a pattern that has been recognized
1988      in earlier analysis stages.  For stmts that represent a pattern,
1989      the STMT_VINFO_RELATED_STMT field records the last stmt in
1990      the original sequence that constitutes the pattern.  */
1991
1992   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1993   if (orig_stmt)
1994     {
1995       orig_stmt_info = vinfo_for_stmt (orig_stmt);
1996       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1997       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1998       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1999     }
2000
2001   /* 3. Check the operands of the operation. The first operands are defined
2002         inside the loop body. The last operand is the reduction variable,
2003         which is defined by the loop-header-phi.  */
2004
2005   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2006
2007   operation = GIMPLE_STMT_OPERAND (stmt, 1);
2008   code = TREE_CODE (operation);
2009   op_type = TREE_OPERAND_LENGTH (operation);
2010   if (op_type != binary_op && op_type != ternary_op)
2011     return false;
2012   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2013   scalar_type = TREE_TYPE (scalar_dest);
2014
2015   /* All uses but the last are expected to be defined in the loop.
2016      The last use is the reduction variable.  */
2017   for (i = 0; i < op_type-1; i++)
2018     {
2019       op = TREE_OPERAND (operation, i);
2020       is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2021       gcc_assert (is_simple_use);
2022       if (dt != vect_loop_def
2023           && dt != vect_invariant_def
2024           && dt != vect_constant_def
2025           && dt != vect_induction_def)
2026         return false;
2027     }
2028
2029   op = TREE_OPERAND (operation, i);
2030   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2031   gcc_assert (is_simple_use);
2032   gcc_assert (dt == vect_reduction_def);
2033   gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2034   if (orig_stmt)
2035     gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2036   else
2037     gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2038
2039   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2040     return false;
2041
2042   /* 4. Supportable by target?  */
2043
2044   /* 4.1. check support for the operation in the loop  */
2045   optab = optab_for_tree_code (code, vectype);
2046   if (!optab)
2047     {
2048       if (vect_print_dump_info (REPORT_DETAILS))
2049         fprintf (vect_dump, "no optab.");
2050       return false;
2051     }
2052   vec_mode = TYPE_MODE (vectype);
2053   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2054     {
2055       if (vect_print_dump_info (REPORT_DETAILS))
2056         fprintf (vect_dump, "op not supported by target.");
2057       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2058           || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2059              < vect_min_worthwhile_factor (code))
2060         return false;
2061       if (vect_print_dump_info (REPORT_DETAILS))
2062         fprintf (vect_dump, "proceeding using word mode.");
2063     }
2064
2065   /* Worthwhile without SIMD support?  */
2066   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2067       && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2068          < vect_min_worthwhile_factor (code))
2069     {
2070       if (vect_print_dump_info (REPORT_DETAILS))
2071         fprintf (vect_dump, "not worthwhile without SIMD support.");
2072       return false;
2073     }
2074
2075   /* 4.2. Check support for the epilog operation.
2076
2077           If STMT represents a reduction pattern, then the type of the
2078           reduction variable may be different than the type of the rest
2079           of the arguments.  For example, consider the case of accumulation
2080           of shorts into an int accumulator; The original code:
2081                         S1: int_a = (int) short_a;
2082           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
2083
2084           was replaced with:
2085                         STMT: int_acc = widen_sum <short_a, int_acc>
2086
2087           This means that:
2088           1. The tree-code that is used to create the vector operation in the
2089              epilog code (that reduces the partial results) is not the
2090              tree-code of STMT, but is rather the tree-code of the original
2091              stmt from the pattern that STMT is replacing. I.e, in the example
2092              above we want to use 'widen_sum' in the loop, but 'plus' in the
2093              epilog.
2094           2. The type (mode) we use to check available target support
2095              for the vector operation to be created in the *epilog*, is
2096              determined by the type of the reduction variable (in the example
2097              above we'd check this: plus_optab[vect_int_mode]).
2098              However the type (mode) we use to check available target support
2099              for the vector operation to be created *inside the loop*, is
2100              determined by the type of the other arguments to STMT (in the
2101              example we'd check this: widen_sum_optab[vect_short_mode]).
2102
2103           This is contrary to "regular" reductions, in which the types of all
2104           the arguments are the same as the type of the reduction variable.
2105           For "regular" reductions we can therefore use the same vector type
2106           (and also the same tree-code) when generating the epilog code and
2107           when generating the code inside the loop.  */
2108
2109   if (orig_stmt)
2110     {
2111       /* This is a reduction pattern: get the vectype from the type of the
2112          reduction variable, and get the tree-code from orig_stmt.  */
2113       orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2114       vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2115       vec_mode = TYPE_MODE (vectype);
2116     }
2117   else
2118     {
2119       /* Regular reduction: use the same vectype and tree-code as used for
2120          the vector code inside the loop can be used for the epilog code. */
2121       orig_code = code;
2122     }
2123
2124   if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2125     return false;
2126   reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2127   if (!reduc_optab)
2128     {
2129       if (vect_print_dump_info (REPORT_DETAILS))
2130         fprintf (vect_dump, "no optab for reduction.");
2131       epilog_reduc_code = NUM_TREE_CODES;
2132     }
2133   if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2134     {
2135       if (vect_print_dump_info (REPORT_DETAILS))
2136         fprintf (vect_dump, "reduc op not supported by target.");
2137       epilog_reduc_code = NUM_TREE_CODES;
2138     }
2139
2140   if (!vec_stmt) /* transformation not required.  */
2141     {
2142       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2143       vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2144       return true;
2145     }
2146
2147   /** Transform.  **/
2148
2149   if (vect_print_dump_info (REPORT_DETAILS))
2150     fprintf (vect_dump, "transform reduction.");
2151
2152   /* Create the destination vector  */
2153   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2154
2155   /* Create the reduction-phi that defines the reduction-operand.  */
2156   new_phi = create_phi_node (vec_dest, loop->header);
2157
2158   /* In case the vectorization factor (VF) is bigger than the number
2159      of elements that we can fit in a vectype (nunits), we have to generate
2160      more than one vector stmt - i.e - we need to "unroll" the
2161      vector stmt by a factor VF/nunits.  For more details see documentation
2162      in vectorizable_operation.  */
2163
2164   prev_stmt_info = NULL;
2165   for (j = 0; j < ncopies; j++)
2166     {
2167       /* Handle uses.  */
2168       if (j == 0)
2169         {
2170           op = TREE_OPERAND (operation, 0);
2171           loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2172           if (op_type == ternary_op)
2173             {
2174               op = TREE_OPERAND (operation, 1);
2175               loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2176             }
2177
2178           /* Get the vector def for the reduction variable from the phi node */
2179           reduc_def = PHI_RESULT (new_phi);
2180         }
2181       else
2182         {
2183           enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2184           loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2185           if (op_type == ternary_op)
2186             loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2187
2188           /* Get the vector def for the reduction variable from the vectorized
2189              reduction operation generated in the previous iteration (j-1)  */
2190           reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2191         }
2192
2193       /* Arguments are ready. create the new vector stmt.  */
2194       if (op_type == binary_op)
2195         expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2196       else
2197         expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2198                        reduc_def);
2199       new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2200       new_temp = make_ssa_name (vec_dest, new_stmt);
2201       GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2202       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2203
2204       if (j == 0)
2205         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2206       else
2207         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2208       prev_stmt_info = vinfo_for_stmt (new_stmt);
2209     }
2210
2211   /* Finalize the reduction-phi (set it's arguments) and create the
2212      epilog reduction code.  */
2213   vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2214   return true;
2215 }
2216
2217 /* Checks if CALL can be vectorized in type VECTYPE.  Returns
2218    a function declaration if the target has a vectorized version
2219    of the function, or NULL_TREE if the function cannot be vectorized.  */
2220
2221 tree
2222 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2223 {
2224   tree fndecl = get_callee_fndecl (call);
2225   enum built_in_function code;
2226
2227   /* We only handle functions that do not read or clobber memory -- i.e.
2228      const or novops ones.  */
2229   if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2230     return NULL_TREE;
2231
2232   if (!fndecl
2233       || TREE_CODE (fndecl) != FUNCTION_DECL
2234       || !DECL_BUILT_IN (fndecl))
2235     return NULL_TREE;
2236
2237   code = DECL_FUNCTION_CODE (fndecl);
2238   return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2239                                                         vectype_in);
2240 }
2241
2242 /* Function vectorizable_call.
2243
2244    Check if STMT performs a function call that can be vectorized.
2245    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2246    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2247    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2248
2249 bool
2250 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2251 {
2252   tree vec_dest;
2253   tree scalar_dest;
2254   tree operation;
2255   tree op, type;
2256   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2257   tree vectype_out, vectype_in;
2258   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2259   tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2260   enum vect_def_type dt[2];
2261   int ncopies, j, nargs;
2262   call_expr_arg_iterator iter;
2263
2264   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2265     return false;
2266
2267   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2268     return false;
2269
2270   /* FORNOW: not yet supported.  */
2271   if (STMT_VINFO_LIVE_P (stmt_info))
2272     {
2273       if (vect_print_dump_info (REPORT_DETAILS))
2274         fprintf (vect_dump, "value used after loop.");
2275       return false;
2276     }
2277
2278   /* Is STMT a vectorizable call?   */
2279   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2280     return false;
2281
2282   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2283     return false;
2284
2285   operation = GIMPLE_STMT_OPERAND (stmt, 1);
2286   if (TREE_CODE (operation) != CALL_EXPR)
2287     return false;
2288
2289   /* Process function arguments.  */
2290   rhs_type = NULL_TREE;
2291   nargs = 0;
2292   FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2293     {
2294       ++nargs;
2295
2296       /* Bail out if the function has more than two arguments, we
2297          do not have interesting builtin functions to vectorize with
2298          more than two arguments.  */
2299       if (nargs > 2)
2300         return false;
2301
2302       /* We can only handle calls with arguments of the same type.  */
2303       if (rhs_type
2304           && rhs_type != TREE_TYPE (op))
2305         {
2306           if (vect_print_dump_info (REPORT_DETAILS))
2307             fprintf (vect_dump, "argument types differ.");
2308           return false;
2309         }
2310       rhs_type = TREE_TYPE (op);
2311
2312       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs-1]))
2313         {
2314           if (vect_print_dump_info (REPORT_DETAILS))
2315             fprintf (vect_dump, "use not simple.");
2316           return false;
2317         }
2318     }
2319
2320   /* No arguments is also not good.  */
2321   if (nargs == 0)
2322     return false;
2323
2324   vectype_in = get_vectype_for_scalar_type (rhs_type);
2325
2326   lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2327   vectype_out = get_vectype_for_scalar_type (lhs_type);
2328
2329   /* Only handle the case of vectors with the same number of elements.
2330      FIXME: We need a way to handle for example the SSE2 cvtpd2dq
2331             instruction which converts V2DFmode to V4SImode but only
2332             using the lower half of the V4SImode result.  */
2333   if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
2334     return false;
2335
2336   /* For now, we only vectorize functions if a target specific builtin
2337      is available.  TODO -- in some cases, it might be profitable to
2338      insert the calls for pieces of the vector, in order to be able
2339      to vectorize other operations in the loop.  */
2340   fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2341   if (fndecl == NULL_TREE)
2342     {
2343       if (vect_print_dump_info (REPORT_DETAILS))
2344         fprintf (vect_dump, "function is not vectorizable.");
2345
2346       return false;
2347     }
2348
2349   gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2350
2351   ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2352              / TYPE_VECTOR_SUBPARTS (vectype_out));
2353
2354   if (!vec_stmt) /* transformation not required.  */
2355     {
2356       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2357       if (vect_print_dump_info (REPORT_DETAILS))
2358         fprintf (vect_dump, "=== vectorizable_call ===");
2359       vect_model_simple_cost (stmt_info, ncopies);
2360       return true;
2361     }
2362
2363   /** Transform.  **/
2364
2365   if (vect_print_dump_info (REPORT_DETAILS))
2366     fprintf (vect_dump, "transform operation.");
2367
2368   gcc_assert (ncopies >= 1);
2369
2370   /* Handle def.  */
2371   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2372   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2373
2374   prev_stmt_info = NULL;
2375   for (j = 0; j < ncopies; ++j)
2376     {
2377       tree new_stmt, vargs;
2378       tree vec_oprnd[2];
2379       int n;
2380
2381       /* Build argument list for the vectorized call.  */
2382       /* FIXME: Rewrite this so that it doesn't construct a temporary
2383           list.  */
2384       vargs = NULL_TREE;
2385       n = -1;
2386       FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2387         {
2388           ++n;
2389
2390           if (j == 0)
2391             vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
2392           else
2393             vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
2394
2395           vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
2396         }
2397       vargs = nreverse (vargs);
2398
2399       rhs = build_function_call_expr (fndecl, vargs);
2400       new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2401       new_temp = make_ssa_name (vec_dest, new_stmt);
2402       GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2403
2404       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2405
2406       if (j == 0)
2407         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2408       else
2409         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2410       prev_stmt_info = vinfo_for_stmt (new_stmt);
2411     }
2412
2413   /* The call in STMT might prevent it from being removed in dce.  We however
2414      cannot remove it here, due to the way the ssa name it defines is mapped
2415      to the new definition.  So just replace rhs of the statement with something
2416      harmless.  */
2417   type = TREE_TYPE (scalar_dest);
2418   GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2419   update_stmt (stmt);
2420
2421   return true;
2422 }
2423
2424
2425 /* Function vect_gen_widened_results_half
2426
2427    Create a vector stmt whose code, type, number of arguments, and result
2428    variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2429    VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2430    In the case that CODE is a CALL_EXPR, this means that a call to DECL
2431    needs to be created (DECL is a function-decl of a target-builtin).
2432    STMT is the original scalar stmt that we are vectorizing.  */
2433
2434 static tree
2435 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2436                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
2437                                tree vec_dest, block_stmt_iterator *bsi,
2438                                tree stmt)
2439 {
2440   tree expr;
2441   tree new_stmt;
2442   tree new_temp;
2443   tree sym;
2444   ssa_op_iter iter;
2445
2446   /* Generate half of the widened result:  */
2447   if (code == CALL_EXPR)
2448     {
2449       /* Target specific support  */
2450       if (op_type == binary_op)
2451         expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2452       else
2453         expr = build_call_expr (decl, 1, vec_oprnd0);
2454     }
2455   else
2456     {
2457       /* Generic support */
2458       gcc_assert (op_type == TREE_CODE_LENGTH (code));
2459       if (op_type == binary_op)
2460         expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2461       else
2462         expr = build1 (code, vectype, vec_oprnd0);
2463     }
2464   new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2465   new_temp = make_ssa_name (vec_dest, new_stmt);
2466   GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2467   vect_finish_stmt_generation (stmt, new_stmt, bsi);
2468
2469   if (code == CALL_EXPR)
2470     {
2471       FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2472         {
2473           if (TREE_CODE (sym) == SSA_NAME)
2474             sym = SSA_NAME_VAR (sym);
2475           mark_sym_for_renaming (sym);
2476         }
2477     }
2478
2479   return new_stmt;
2480 }
2481
2482
2483 /* Function vectorizable_conversion.
2484
2485 Check if STMT performs a conversion operation, that can be vectorized.
2486 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2487 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2488 Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2489
2490 bool
2491 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2492                                    tree * vec_stmt)
2493 {
2494   tree vec_dest;
2495   tree scalar_dest;
2496   tree operation;
2497   tree op0;
2498   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2499   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2500   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2501   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2502   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2503   tree new_temp;
2504   tree def, def_stmt;
2505   enum vect_def_type dt0;
2506   tree new_stmt;
2507   stmt_vec_info prev_stmt_info;
2508   int nunits_in;
2509   int nunits_out;
2510   tree vectype_out, vectype_in;
2511   int ncopies, j;
2512   tree expr;
2513   tree rhs_type, lhs_type;
2514   tree builtin_decl;
2515   enum { NARROW, NONE, WIDEN } modifier;
2516
2517   /* Is STMT a vectorizable conversion?   */
2518
2519   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2520     return false;
2521
2522   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2523     return false;
2524
2525   if (STMT_VINFO_LIVE_P (stmt_info))
2526     {
2527       /* FORNOW: not yet supported.  */
2528       if (vect_print_dump_info (REPORT_DETAILS))
2529         fprintf (vect_dump, "value used after loop.");
2530       return false;
2531     }
2532
2533   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2534     return false;
2535
2536   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2537     return false;
2538
2539   operation = GIMPLE_STMT_OPERAND (stmt, 1);
2540   code = TREE_CODE (operation);
2541   if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2542     return false;
2543
2544   /* Check types of lhs and rhs */
2545   op0 = TREE_OPERAND (operation, 0);
2546   rhs_type = TREE_TYPE (op0);
2547   vectype_in = get_vectype_for_scalar_type (rhs_type);
2548   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2549
2550   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2551   lhs_type = TREE_TYPE (scalar_dest);
2552   vectype_out = get_vectype_for_scalar_type (lhs_type);
2553   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2554
2555   /* FORNOW */
2556   if (nunits_in == nunits_out / 2)
2557     modifier = NARROW;
2558   else if (nunits_out == nunits_in)
2559     modifier = NONE;
2560   else if (nunits_out == nunits_in / 2)
2561     modifier = WIDEN;
2562   else
2563     return false;
2564
2565   if (modifier == NONE)
2566     gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2567
2568   /* Bail out if the types are both integral or non-integral */
2569   if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2570       || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2571     return false;
2572
2573   if (modifier == NARROW)
2574     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2575   else
2576     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2577
2578   /* Sanity check: make sure that at least one copy of the vectorized stmt
2579      needs to be generated.  */
2580   gcc_assert (ncopies >= 1);
2581
2582   /* Check the operands of the operation.  */
2583   if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2584     {
2585       if (vect_print_dump_info (REPORT_DETAILS))
2586         fprintf (vect_dump, "use not simple.");
2587       return false;
2588     }
2589
2590   /* Supportable by target?  */
2591   if ((modifier == NONE
2592        && !targetm.vectorize.builtin_conversion (code, vectype_in))
2593       || (modifier == WIDEN
2594           && !supportable_widening_operation (code, stmt, vectype_in,
2595                                               &decl1, &decl2,
2596                                               &code1, &code2))
2597       || (modifier == NARROW
2598           && !supportable_narrowing_operation (code, stmt, vectype_in,
2599                                                &code1)))
2600     {
2601       if (vect_print_dump_info (REPORT_DETAILS))
2602         fprintf (vect_dump, "op not supported by target.");
2603       return false;
2604     }
2605
2606   if (modifier != NONE)
2607     STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2608
2609   if (!vec_stmt)                /* transformation not required.  */
2610     {
2611       STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2612       return true;
2613     }
2614
2615   /** Transform.  **/
2616   if (vect_print_dump_info (REPORT_DETAILS))
2617     fprintf (vect_dump, "transform conversion.");
2618
2619   /* Handle def.  */
2620   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2621
2622   prev_stmt_info = NULL;
2623   switch (modifier)
2624     {
2625     case NONE:
2626       for (j = 0; j < ncopies; j++)
2627         {
2628           tree sym;
2629           ssa_op_iter iter;
2630
2631           if (j == 0)
2632             vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2633           else
2634             vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2635
2636           builtin_decl =
2637             targetm.vectorize.builtin_conversion (code, vectype_in);
2638           new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2639
2640           /* Arguments are ready. create the new vector stmt.  */
2641           new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2642           new_temp = make_ssa_name (vec_dest, new_stmt);
2643           GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2644           vect_finish_stmt_generation (stmt, new_stmt, bsi);
2645           FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2646             {
2647               if (TREE_CODE (sym) == SSA_NAME)
2648                 sym = SSA_NAME_VAR (sym);
2649               mark_sym_for_renaming (sym);
2650             }
2651
2652           if (j == 0)
2653             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2654           else
2655             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2656           prev_stmt_info = vinfo_for_stmt (new_stmt);
2657         }
2658       break;
2659
2660     case WIDEN:
2661       /* In case the vectorization factor (VF) is bigger than the number
2662          of elements that we can fit in a vectype (nunits), we have to
2663          generate more than one vector stmt - i.e - we need to "unroll"
2664          the vector stmt by a factor VF/nunits.  */
2665       for (j = 0; j < ncopies; j++)
2666         {
2667           if (j == 0)
2668             vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2669           else
2670             vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2671
2672           STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2673
2674           /* Generate first half of the widened result:  */
2675           new_stmt
2676             = vect_gen_widened_results_half (code1, vectype_out, decl1,
2677                                              vec_oprnd0, vec_oprnd1,
2678                                              unary_op, vec_dest, bsi, stmt);
2679           if (j == 0)
2680             STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2681           else
2682             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2683           prev_stmt_info = vinfo_for_stmt (new_stmt);
2684
2685           /* Generate second half of the widened result:  */
2686           new_stmt
2687             = vect_gen_widened_results_half (code2, vectype_out, decl2,
2688                                              vec_oprnd0, vec_oprnd1,
2689                                              unary_op, vec_dest, bsi, stmt);
2690           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2691           prev_stmt_info = vinfo_for_stmt (new_stmt);
2692         }
2693       break;
2694
2695     case NARROW:
2696       /* In case the vectorization factor (VF) is bigger than the number
2697          of elements that we can fit in a vectype (nunits), we have to
2698          generate more than one vector stmt - i.e - we need to "unroll"
2699          the vector stmt by a factor VF/nunits.  */
2700       for (j = 0; j < ncopies; j++)
2701         {
2702           /* Handle uses.  */
2703           if (j == 0)
2704             {
2705               vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2706               vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2707             }
2708           else
2709             {
2710               vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2711               vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2712             }
2713
2714           /* Arguments are ready. Create the new vector stmt.  */
2715           expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2716           new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2717           new_temp = make_ssa_name (vec_dest, new_stmt);
2718           GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2719           vect_finish_stmt_generation (stmt, new_stmt, bsi);
2720
2721           if (j == 0)
2722             STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2723           else
2724             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2725
2726           prev_stmt_info = vinfo_for_stmt (new_stmt);
2727         }
2728
2729       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2730     }
2731   return true;
2732 }
2733
2734
2735 /* Function vectorizable_assignment.
2736
2737    Check if STMT performs an assignment (copy) that can be vectorized.
2738    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2739    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2740    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2741
2742 bool
2743 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2744 {
2745   tree vec_dest;
2746   tree scalar_dest;
2747   tree op;
2748   tree vec_oprnd;
2749   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2750   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2751   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2752   tree new_temp;
2753   tree def, def_stmt;
2754   enum vect_def_type dt;
2755   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2756   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2757
2758   gcc_assert (ncopies >= 1);
2759   if (ncopies > 1)
2760     return false; /* FORNOW */
2761
2762   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2763     return false;
2764
2765   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2766     return false;
2767
2768   /* FORNOW: not yet supported.  */
2769   if (STMT_VINFO_LIVE_P (stmt_info))
2770     {
2771       if (vect_print_dump_info (REPORT_DETAILS))
2772         fprintf (vect_dump, "value used after loop.");
2773       return false;
2774     }
2775
2776   /* Is vectorizable assignment?  */
2777   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2778     return false;
2779
2780   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2781   if (TREE_CODE (scalar_dest) != SSA_NAME)
2782     return false;
2783
2784   op = GIMPLE_STMT_OPERAND (stmt, 1);
2785   if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2786     {
2787       if (vect_print_dump_info (REPORT_DETAILS))
2788         fprintf (vect_dump, "use not simple.");
2789       return false;
2790     }
2791
2792   if (!vec_stmt) /* transformation not required.  */
2793     {
2794       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2795       if (vect_print_dump_info (REPORT_DETAILS))
2796         fprintf (vect_dump, "=== vectorizable_assignment ===");
2797       vect_model_simple_cost (stmt_info, ncopies);
2798       return true;
2799     }
2800
2801   /** Transform.  **/
2802   if (vect_print_dump_info (REPORT_DETAILS))
2803     fprintf (vect_dump, "transform assignment.");
2804
2805   /* Handle def.  */
2806   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2807
2808   /* Handle use.  */
2809   op = GIMPLE_STMT_OPERAND (stmt, 1);
2810   vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2811
2812   /* Arguments are ready. create the new vector stmt.  */
2813   *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2814   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2815   GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2816   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2817
2818   return true;
2819 }
2820
2821
2822 /* Function vect_min_worthwhile_factor.
2823
2824    For a loop where we could vectorize the operation indicated by CODE,
2825    return the minimum vectorization factor that makes it worthwhile
2826    to use generic vectors.  */
2827 static int
2828 vect_min_worthwhile_factor (enum tree_code code)
2829 {
2830   switch (code)
2831     {
2832     case PLUS_EXPR:
2833     case MINUS_EXPR:
2834     case NEGATE_EXPR:
2835       return 4;
2836
2837     case BIT_AND_EXPR:
2838     case BIT_IOR_EXPR:
2839     case BIT_XOR_EXPR:
2840     case BIT_NOT_EXPR:
2841       return 2;
2842
2843     default:
2844       return INT_MAX;
2845     }
2846 }
2847
2848
2849 /* Function vectorizable_induction
2850
2851    Check if PHI performs an induction computation that can be vectorized.
2852    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
2853    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
2854    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2855
2856 bool
2857 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2858                         tree *vec_stmt)
2859 {
2860   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
2861   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2862   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2863   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2864   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2865   tree vec_def;
2866
2867   gcc_assert (ncopies >= 1);
2868
2869   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2870     return false;
2871
2872   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
2873
2874   if (STMT_VINFO_LIVE_P (stmt_info))
2875     {
2876       /* FORNOW: not yet supported.  */
2877       if (vect_print_dump_info (REPORT_DETAILS))
2878         fprintf (vect_dump, "value used after loop.");
2879       return false;
2880     }
2881
2882   if (TREE_CODE (phi) != PHI_NODE)
2883     return false;
2884
2885   if (!vec_stmt) /* transformation not required.  */
2886     {
2887       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
2888       if (vect_print_dump_info (REPORT_DETAILS))
2889         fprintf (vect_dump, "=== vectorizable_induction ===");
2890       vect_model_induction_cost (stmt_info, ncopies);
2891       return true;
2892     }
2893
2894   /** Transform.  **/
2895
2896   if (vect_print_dump_info (REPORT_DETAILS))
2897     fprintf (vect_dump, "transform induction phi.");
2898
2899   vec_def = get_initial_def_for_induction (phi);
2900   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
2901   return true;
2902 }
2903
2904
2905 /* Function vectorizable_operation.
2906
2907    Check if STMT performs a binary or unary operation that can be vectorized.
2908    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2909    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2910    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2911
2912 bool
2913 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2914 {
2915   tree vec_dest;
2916   tree scalar_dest;
2917   tree operation;
2918   tree op0, op1 = NULL;
2919   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2920   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2921   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2922   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2923   enum tree_code code;
2924   enum machine_mode vec_mode;
2925   tree new_temp;
2926   int op_type;
2927   optab optab;
2928   int icode;
2929   enum machine_mode optab_op2_mode;
2930   tree def, def_stmt;
2931   enum vect_def_type dt0, dt1;
2932   tree new_stmt;
2933   stmt_vec_info prev_stmt_info;
2934   int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2935   int nunits_out;
2936   tree vectype_out;
2937   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2938   int j;
2939
2940   gcc_assert (ncopies >= 1);
2941
2942   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2943     return false;
2944
2945   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2946     return false;
2947
2948   /* FORNOW: not yet supported.  */
2949   if (STMT_VINFO_LIVE_P (stmt_info))
2950     {
2951       if (vect_print_dump_info (REPORT_DETAILS))
2952         fprintf (vect_dump, "value used after loop.");
2953       return false;
2954     }
2955
2956   /* Is STMT a vectorizable binary/unary operation?   */
2957   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2958     return false;
2959
2960   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2961     return false;
2962
2963   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2964   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2965   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2966   if (nunits_out != nunits_in)
2967     return false;
2968
2969   operation = GIMPLE_STMT_OPERAND (stmt, 1);
2970   code = TREE_CODE (operation);
2971   optab = optab_for_tree_code (code, vectype);
2972
2973   /* Support only unary or binary operations.  */
2974   op_type = TREE_OPERAND_LENGTH (operation);
2975   if (op_type != unary_op && op_type != binary_op)
2976     {
2977       if (vect_print_dump_info (REPORT_DETAILS))
2978         fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2979       return false;
2980     }
2981
2982   op0 = TREE_OPERAND (operation, 0);
2983   if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2984     {
2985       if (vect_print_dump_info (REPORT_DETAILS))
2986         fprintf (vect_dump, "use not simple.");
2987       return false;
2988     }
2989
2990   if (op_type == binary_op)
2991     {
2992       op1 = TREE_OPERAND (operation, 1);
2993       if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2994         {
2995           if (vect_print_dump_info (REPORT_DETAILS))
2996             fprintf (vect_dump, "use not simple.");
2997           return false;
2998         }
2999     }
3000
3001   /* Supportable by target?  */
3002   if (!optab)
3003     {
3004       if (vect_print_dump_info (REPORT_DETAILS))
3005         fprintf (vect_dump, "no optab.");
3006       return false;
3007     }
3008   vec_mode = TYPE_MODE (vectype);
3009   icode = (int) optab->handlers[(int) vec_mode].insn_code;
3010   if (icode == CODE_FOR_nothing)
3011     {
3012       if (vect_print_dump_info (REPORT_DETAILS))
3013         fprintf (vect_dump, "op not supported by target.");
3014       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3015           || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3016              < vect_min_worthwhile_factor (code))
3017         return false;
3018       if (vect_print_dump_info (REPORT_DETAILS))
3019         fprintf (vect_dump, "proceeding using word mode.");
3020     }
3021
3022   /* Worthwhile without SIMD support?  */
3023   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3024       && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3025          < vect_min_worthwhile_factor (code))
3026     {
3027       if (vect_print_dump_info (REPORT_DETAILS))
3028         fprintf (vect_dump, "not worthwhile without SIMD support.");
3029       return false;
3030     }
3031
3032   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3033     {
3034       /* FORNOW: not yet supported.  */
3035       if (!VECTOR_MODE_P (vec_mode))
3036         return false;
3037
3038       /* Invariant argument is needed for a vector shift
3039          by a scalar shift operand.  */
3040       optab_op2_mode = insn_data[icode].operand[2].mode;
3041       if (! (VECTOR_MODE_P (optab_op2_mode)
3042              || dt1 == vect_constant_def
3043              || dt1 == vect_invariant_def))
3044         {
3045           if (vect_print_dump_info (REPORT_DETAILS))
3046             fprintf (vect_dump, "operand mode requires invariant argument.");
3047           return false;
3048         }
3049     }
3050
3051   if (!vec_stmt) /* transformation not required.  */
3052     {
3053       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3054       if (vect_print_dump_info (REPORT_DETAILS))
3055         fprintf (vect_dump, "=== vectorizable_operation ===");
3056       vect_model_simple_cost (stmt_info, ncopies);
3057       return true;
3058     }
3059
3060   /** Transform.  **/
3061
3062   if (vect_print_dump_info (REPORT_DETAILS))
3063     fprintf (vect_dump, "transform binary/unary operation.");
3064
3065   /* Handle def.  */
3066   vec_dest = vect_create_destination_var (scalar_dest, vectype);
3067
3068   /* In case the vectorization factor (VF) is bigger than the number
3069      of elements that we can fit in a vectype (nunits), we have to generate
3070      more than one vector stmt - i.e - we need to "unroll" the
3071      vector stmt by a factor VF/nunits. In doing so, we record a pointer
3072      from one copy of the vector stmt to the next, in the field
3073      STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3074      stages to find the correct vector defs to be used when vectorizing
3075      stmts that use the defs of the current stmt. The example below illustrates
3076      the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3077      4 vectorized stmts):
3078
3079      before vectorization:
3080                                 RELATED_STMT    VEC_STMT
3081         S1:     x = memref      -               -
3082         S2:     z = x + 1       -               -
3083
3084      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3085              there):
3086                                 RELATED_STMT    VEC_STMT
3087         VS1_0:  vx0 = memref0   VS1_1           -
3088         VS1_1:  vx1 = memref1   VS1_2           -
3089         VS1_2:  vx2 = memref2   VS1_3           -
3090         VS1_3:  vx3 = memref3   -               -
3091         S1:     x = load        -               VS1_0
3092         S2:     z = x + 1       -               -
3093
3094      step2: vectorize stmt S2 (done here):
3095         To vectorize stmt S2 we first need to find the relevant vector
3096         def for the first operand 'x'. This is, as usual, obtained from
3097         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3098         that defines 'x' (S1). This way we find the stmt VS1_0, and the
3099         relevant vector def 'vx0'. Having found 'vx0' we can generate
3100         the vector stmt VS2_0, and as usual, record it in the
3101         STMT_VINFO_VEC_STMT of stmt S2.
3102         When creating the second copy (VS2_1), we obtain the relevant vector
3103         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3104         stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3105         vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3106         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3107         Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3108         chain of stmts and pointers:
3109                                 RELATED_STMT    VEC_STMT
3110         VS1_0:  vx0 = memref0   VS1_1           -
3111         VS1_1:  vx1 = memref1   VS1_2           -
3112         VS1_2:  vx2 = memref2   VS1_3           -
3113         VS1_3:  vx3 = memref3   -               -
3114         S1:     x = load        -               VS1_0
3115         VS2_0:  vz0 = vx0 + v1  VS2_1           -
3116         VS2_1:  vz1 = vx1 + v1  VS2_2           -
3117         VS2_2:  vz2 = vx2 + v1  VS2_3           -
3118         VS2_3:  vz3 = vx3 + v1  -               -
3119         S2:     z = x + 1       -               VS2_0  */
3120
3121   prev_stmt_info = NULL;
3122   for (j = 0; j < ncopies; j++)
3123     {
3124       /* Handle uses.  */
3125       if (j == 0)
3126         {
3127           vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3128           if (op_type == binary_op)
3129             {
3130               if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3131                 {
3132                   /* Vector shl and shr insn patterns can be defined with
3133                      scalar operand 2 (shift operand).  In this case, use
3134                      constant or loop invariant op1 directly, without
3135                      extending it to vector mode first.  */
3136                   optab_op2_mode = insn_data[icode].operand[2].mode;
3137                   if (!VECTOR_MODE_P (optab_op2_mode))
3138                     {
3139                       if (vect_print_dump_info (REPORT_DETAILS))
3140                         fprintf (vect_dump, "operand 1 using scalar mode.");
3141                       vec_oprnd1 = op1;
3142                     }
3143                 }
3144               if (!vec_oprnd1)
3145                 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3146             }
3147         }
3148       else
3149         {
3150           vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3151           if (op_type == binary_op)
3152             vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
3153         }
3154
3155       /* Arguments are ready. create the new vector stmt.  */
3156
3157       if (op_type == binary_op)
3158         new_stmt = build_gimple_modify_stmt (vec_dest,
3159                     build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3160       else
3161         new_stmt = build_gimple_modify_stmt (vec_dest,
3162                     build1 (code, vectype, vec_oprnd0));
3163       new_temp = make_ssa_name (vec_dest, new_stmt);
3164       GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3165       vect_finish_stmt_generation (stmt, new_stmt, bsi);
3166
3167       if (j == 0)
3168         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3169       else
3170         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3171       prev_stmt_info = vinfo_for_stmt (new_stmt);
3172     }
3173
3174   return true;
3175 }
3176
3177
3178 /* Function vectorizable_type_demotion
3179
3180    Check if STMT performs a binary or unary operation that involves
3181    type demotion, and if it can be vectorized.
3182    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3183    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3184    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3185
3186 bool
3187 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3188                             tree *vec_stmt)
3189 {
3190   tree vec_dest;
3191   tree scalar_dest;
3192   tree operation;
3193   tree op0;
3194   tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3195   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3196   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3197   enum tree_code code, code1 = ERROR_MARK;
3198   tree new_temp;
3199   tree def, def_stmt;
3200   enum vect_def_type dt0;
3201   tree new_stmt;
3202   stmt_vec_info prev_stmt_info;
3203   int nunits_in;
3204   int nunits_out;
3205   tree vectype_out;
3206   int ncopies;
3207   int j;
3208   tree expr;
3209   tree vectype_in;
3210
3211   if (!STMT_VINFO_RELEVANT_P (stmt_info))
3212     return false;
3213
3214   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3215     return false;
3216
3217   /* FORNOW: not yet supported.  */
3218   if (STMT_VINFO_LIVE_P (stmt_info))
3219     {
3220       if (vect_print_dump_info (REPORT_DETAILS))
3221         fprintf (vect_dump, "value used after loop.");
3222       return false;
3223     }
3224
3225   /* Is STMT a vectorizable type-demotion operation?  */
3226   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3227     return false;
3228
3229   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3230     return false;
3231
3232   operation = GIMPLE_STMT_OPERAND (stmt, 1);
3233   code = TREE_CODE (operation);
3234   if (code != NOP_EXPR && code != CONVERT_EXPR)
3235     return false;
3236
3237   op0 = TREE_OPERAND (operation, 0);
3238   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3239   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3240
3241   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3242   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3243   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3244   if (nunits_in != nunits_out / 2) /* FORNOW */
3245     return false;
3246
3247   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3248   gcc_assert (ncopies >= 1);
3249
3250   if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3251           && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3252          || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3253              && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3254              && (code == NOP_EXPR || code == CONVERT_EXPR))))
3255     return false;
3256
3257   /* Check the operands of the operation.  */
3258   if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3259     {
3260       if (vect_print_dump_info (REPORT_DETAILS))
3261         fprintf (vect_dump, "use not simple.");
3262       return false;
3263     }
3264
3265   /* Supportable by target?  */
3266   if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3267     return false;
3268
3269   STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3270
3271   if (!vec_stmt) /* transformation not required.  */
3272     {
3273       STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3274       if (vect_print_dump_info (REPORT_DETAILS))
3275         fprintf (vect_dump, "=== vectorizable_demotion ===");
3276       vect_model_simple_cost (stmt_info, ncopies);
3277       return true;
3278     }
3279
3280   /** Transform.  **/
3281   if (vect_print_dump_info (REPORT_DETAILS))
3282     fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3283              ncopies);
3284
3285   /* Handle def.  */
3286   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3287
3288   /* In case the vectorization factor (VF) is bigger than the number
3289      of elements that we can fit in a vectype (nunits), we have to generate
3290      more than one vector stmt - i.e - we need to "unroll" the
3291      vector stmt by a factor VF/nunits.   */
3292   prev_stmt_info = NULL;
3293   for (j = 0; j < ncopies; j++)
3294     {
3295       /* Handle uses.  */
3296       if (j == 0)
3297         {
3298           vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3299           vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3300         }
3301       else
3302         {
3303           vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
3304           vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3305         }
3306
3307       /* Arguments are ready. Create the new vector stmt.  */
3308       expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3309       new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3310       new_temp = make_ssa_name (vec_dest, new_stmt);
3311       GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3312       vect_finish_stmt_generation (stmt, new_stmt, bsi);
3313
3314       if (j == 0)
3315         STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3316       else
3317         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3318
3319       prev_stmt_info = vinfo_for_stmt (new_stmt);
3320     }
3321
3322   *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3323   return true;
3324 }
3325
3326
3327 /* Function vectorizable_type_promotion
3328
3329    Check if STMT performs a binary or unary operation that involves
3330    type promotion, and if it can be vectorized.
3331    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3332    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3333    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3334
3335 bool
3336 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3337                              tree *vec_stmt)
3338 {
3339   tree vec_dest;
3340   tree scalar_dest;
3341   tree operation;
3342   tree op0, op1 = NULL;
3343   tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3344   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3345   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3346   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3347   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3348   int op_type;
3349   tree def, def_stmt;
3350   enum vect_def_type dt0, dt1;
3351   tree new_stmt;
3352   stmt_vec_info prev_stmt_info;
3353   int nunits_in;
3354   int nunits_out;
3355   tree vectype_out;
3356   int ncopies;
3357   int j;
3358   tree vectype_in;
3359
3360   if (!STMT_VINFO_RELEVANT_P (stmt_info))
3361     return false;
3362
3363   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3364     return false;
3365
3366   /* FORNOW: not yet supported.  */
3367   if (STMT_VINFO_LIVE_P (stmt_info))
3368     {
3369       if (vect_print_dump_info (REPORT_DETAILS))
3370         fprintf (vect_dump, "value used after loop.");
3371       return false;
3372     }
3373
3374   /* Is STMT a vectorizable type-promotion operation?  */
3375   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3376     return false;
3377
3378   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3379     return false;
3380
3381   operation = GIMPLE_STMT_OPERAND (stmt, 1);
3382   code = TREE_CODE (operation);
3383   if (code != NOP_EXPR && code != CONVERT_EXPR
3384       && code != WIDEN_MULT_EXPR)
3385     return false;
3386
3387   op0 = TREE_OPERAND (operation, 0);
3388   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3389   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3390
3391   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3392   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3393   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3394   if (nunits_out != nunits_in / 2) /* FORNOW */
3395     return false;
3396
3397   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3398   gcc_assert (ncopies >= 1);
3399
3400   if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3401           && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3402          || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3403              && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3404              && (code == CONVERT_EXPR || code == NOP_EXPR))))
3405     return false;
3406
3407   /* Check the operands of the operation.  */
3408   if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3409     {
3410       if (vect_print_dump_info (REPORT_DETAILS))
3411         fprintf (vect_dump, "use not simple.");
3412       return false;
3413     }
3414
3415   op_type = TREE_CODE_LENGTH (code);
3416   if (op_type == binary_op)
3417     {
3418       op1 = TREE_OPERAND (operation, 1);
3419       if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
3420         {
3421           if (vect_print_dump_info (REPORT_DETAILS))
3422             fprintf (vect_dump, "use not simple.");
3423           return false;
3424         }
3425     }
3426
3427   /* Supportable by target?  */
3428   if (!supportable_widening_operation (code, stmt, vectype_in,
3429                                        &decl1, &decl2, &code1, &code2))
3430     return false;
3431
3432   STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3433
3434   if (!vec_stmt) /* transformation not required.  */
3435     {
3436       STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3437       if (vect_print_dump_info (REPORT_DETAILS))
3438         fprintf (vect_dump, "=== vectorizable_promotion ===");
3439       vect_model_simple_cost (stmt_info, 2*ncopies);
3440       return true;
3441     }
3442
3443   /** Transform.  **/
3444
3445   if (vect_print_dump_info (REPORT_DETAILS))
3446     fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3447                         ncopies);
3448
3449   /* Handle def.  */
3450   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3451
3452   /* In case the vectorization factor (VF) is bigger than the number
3453      of elements that we can fit in a vectype (nunits), we have to generate
3454      more than one vector stmt - i.e - we need to "unroll" the
3455      vector stmt by a factor VF/nunits.   */
3456
3457   prev_stmt_info = NULL;
3458   for (j = 0; j < ncopies; j++)
3459     {
3460       /* Handle uses.  */
3461       if (j == 0)
3462         {
3463           vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3464           if (op_type == binary_op)
3465             vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3466         }
3467       else
3468         {
3469           vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3470           if (op_type == binary_op)
3471             vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
3472         }
3473
3474       /* Arguments are ready. Create the new vector stmt.  We are creating
3475          two vector defs because the widened result does not fit in one vector.
3476          The vectorized stmt can be expressed as a call to a taregt builtin,
3477          or a using a tree-code.  */
3478       /* Generate first half of the widened result:  */
3479       new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3480                         vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3481       if (j == 0)
3482         STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3483       else
3484         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3485       prev_stmt_info = vinfo_for_stmt (new_stmt);
3486
3487       /* Generate second half of the widened result:  */
3488       new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3489                         vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3490       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3491       prev_stmt_info = vinfo_for_stmt (new_stmt);
3492
3493     }
3494
3495   *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3496   return true;
3497 }
3498
3499
3500 /* Function vect_strided_store_supported.
3501
3502    Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3503    and FALSE otherwise.  */
3504
3505 static bool
3506 vect_strided_store_supported (tree vectype)
3507 {
3508   optab interleave_high_optab, interleave_low_optab;
3509   int mode;
3510
3511   mode = (int) TYPE_MODE (vectype);
3512
3513   /* Check that the operation is supported.  */
3514   interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3515                                                vectype);
3516   interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3517                                               vectype);
3518   if (!interleave_high_optab || !interleave_low_optab)
3519     {
3520       if (vect_print_dump_info (REPORT_DETAILS))
3521         fprintf (vect_dump, "no optab for interleave.");
3522       return false;
3523     }
3524
3525   if (interleave_high_optab->handlers[(int) mode].insn_code
3526       == CODE_FOR_nothing
3527       || interleave_low_optab->handlers[(int) mode].insn_code
3528       == CODE_FOR_nothing)
3529     {
3530       if (vect_print_dump_info (REPORT_DETAILS))
3531         fprintf (vect_dump, "interleave op not supported by target.");
3532       return false;
3533     }
3534   return true;
3535 }
3536
3537
3538 /* Function vect_permute_store_chain.
3539
3540    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3541    a power of 2, generate interleave_high/low stmts to reorder the data
3542    correctly for the stores. Return the final references for stores in
3543    RESULT_CHAIN.
3544
3545    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3546    The input is 4 vectors each containing 8 elements. We assign a number to each
3547    element, the input sequence is:
3548
3549    1st vec:   0  1  2  3  4  5  6  7
3550    2nd vec:   8  9 10 11 12 13 14 15
3551    3rd vec:  16 17 18 19 20 21 22 23
3552    4th vec:  24 25 26 27 28 29 30 31
3553
3554    The output sequence should be:
3555
3556    1st vec:  0  8 16 24  1  9 17 25
3557    2nd vec:  2 10 18 26  3 11 19 27
3558    3rd vec:  4 12 20 28  5 13 21 30
3559    4th vec:  6 14 22 30  7 15 23 31
3560
3561    i.e., we interleave the contents of the four vectors in their order.
3562
3563    We use interleave_high/low instructions to create such output. The input of
3564    each interleave_high/low operation is two vectors:
3565    1st vec    2nd vec
3566    0 1 2 3    4 5 6 7
3567    the even elements of the result vector are obtained left-to-right from the
3568    high/low elements of the first vector. The odd elements of the result are
3569    obtained left-to-right from the high/low elements of the second vector.
3570    The output of interleave_high will be:   0 4 1 5
3571    and of interleave_low:                   2 6 3 7
3572
3573
3574    The permutation is done in log LENGTH stages. In each stage interleave_high
3575    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3576    where the first argument is taken from the first half of DR_CHAIN and the
3577    second argument from it's second half.
3578    In our example,
3579
3580    I1: interleave_high (1st vec, 3rd vec)
3581    I2: interleave_low (1st vec, 3rd vec)
3582    I3: interleave_high (2nd vec, 4th vec)
3583    I4: interleave_low (2nd vec, 4th vec)
3584
3585    The output for the first stage is:
3586
3587    I1:  0 16  1 17  2 18  3 19
3588    I2:  4 20  5 21  6 22  7 23
3589    I3:  8 24  9 25 10 26 11 27
3590    I4: 12 28 13 29 14 30 15 31
3591
3592    The output of the second stage, i.e. the final result is:
3593
3594    I1:  0  8 16 24  1  9 17 25
3595    I2:  2 10 18 26  3 11 19 27
3596    I3:  4 12 20 28  5 13 21 30
3597    I4:  6 14 22 30  7 15 23 31.  */
3598
3599 static bool
3600 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3601                           unsigned int length,
3602                           tree stmt,
3603                           block_stmt_iterator *bsi,
3604                           VEC(tree,heap) **result_chain)
3605 {
3606   tree perm_dest, perm_stmt, vect1, vect2, high, low;
3607   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3608   tree scalar_dest, tmp;
3609   int i;
3610   unsigned int j;
3611   VEC(tree,heap) *first, *second;
3612
3613   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3614   first = VEC_alloc (tree, heap, length/2);
3615   second = VEC_alloc (tree, heap, length/2);
3616
3617   /* Check that the operation is supported.  */
3618   if (!vect_strided_store_supported (vectype))
3619     return false;
3620
3621   *result_chain = VEC_copy (tree, heap, dr_chain);
3622
3623   for (i = 0; i < exact_log2 (length); i++)
3624     {
3625       for (j = 0; j < length/2; j++)
3626         {
3627           vect1 = VEC_index (tree, dr_chain, j);
3628           vect2 = VEC_index (tree, dr_chain, j+length/2);
3629
3630           /* Create interleaving stmt:
3631              in the case of big endian:
3632                                 high = interleave_high (vect1, vect2)
3633              and in the case of little endian:
3634                                 high = interleave_low (vect1, vect2).  */
3635           perm_dest = create_tmp_var (vectype, "vect_inter_high");
3636           DECL_GIMPLE_REG_P (perm_dest) = 1;
3637           add_referenced_var (perm_dest);
3638           if (BYTES_BIG_ENDIAN)
3639             tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3640           else
3641             tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3642           perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3643           high = make_ssa_name (perm_dest, perm_stmt);
3644           GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3645           vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3646           VEC_replace (tree, *result_chain, 2*j, high);
3647
3648           /* Create interleaving stmt:
3649              in the case of big endian:
3650                                low  = interleave_low (vect1, vect2)
3651              and in the case of little endian:
3652                                low  = interleave_high (vect1, vect2).  */
3653           perm_dest = create_tmp_var (vectype, "vect_inter_low");
3654           DECL_GIMPLE_REG_P (perm_dest) = 1;
3655           add_referenced_var (perm_dest);
3656           if (BYTES_BIG_ENDIAN)
3657             tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3658           else
3659             tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3660           perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3661           low = make_ssa_name (perm_dest, perm_stmt);
3662           GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3663           vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3664           VEC_replace (tree, *result_chain, 2*j+1, low);
3665         }
3666       dr_chain = VEC_copy (tree, heap, *result_chain);
3667     }
3668   return true;
3669 }
3670
3671
3672 /* Function vectorizable_store.
3673
3674    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3675    can be vectorized.
3676    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3677    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3678    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3679
3680 bool
3681 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3682 {
3683   tree scalar_dest;
3684   tree data_ref;
3685   tree op;
3686   tree vec_oprnd = NULL_TREE;
3687   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3688   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3689   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3690   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3691   enum machine_mode vec_mode;
3692   tree dummy;
3693   enum dr_alignment_support alignment_support_cheme;
3694   ssa_op_iter iter;
3695   def_operand_p def_p;
3696   tree def, def_stmt;
3697   enum vect_def_type dt;
3698   stmt_vec_info prev_stmt_info = NULL;
3699   tree dataref_ptr = NULL_TREE;
3700   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3701   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3702   int j;
3703   tree next_stmt, first_stmt;
3704   bool strided_store = false;
3705   unsigned int group_size, i;
3706   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3707   gcc_assert (ncopies >= 1);
3708
3709   if (!STMT_VINFO_RELEVANT_P (stmt_info))
3710     return false;
3711
3712   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3713     return false;
3714
3715   if (STMT_VINFO_LIVE_P (stmt_info))
3716     {
3717       if (vect_print_dump_info (REPORT_DETAILS))
3718         fprintf (vect_dump, "value used after loop.");
3719       return false;
3720     }
3721
3722   /* Is vectorizable store? */
3723
3724   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3725     return false;
3726
3727   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3728   if (TREE_CODE (scalar_dest) != ARRAY_REF
3729       && TREE_CODE (scalar_dest) != INDIRECT_REF
3730       && !DR_GROUP_FIRST_DR (stmt_info))
3731     return false;
3732
3733   op = GIMPLE_STMT_OPERAND (stmt, 1);
3734   if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3735     {
3736       if (vect_print_dump_info (REPORT_DETAILS))
3737         fprintf (vect_dump, "use not simple.");
3738       return false;
3739     }
3740
3741   vec_mode = TYPE_MODE (vectype);
3742   /* FORNOW. In some cases can vectorize even if data-type not supported
3743      (e.g. - array initialization with 0).  */
3744   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3745     return false;
3746
3747   if (!STMT_VINFO_DATA_REF (stmt_info))
3748     return false;
3749
3750   if (DR_GROUP_FIRST_DR (stmt_info))
3751     {
3752       strided_store = true;
3753       if (!vect_strided_store_supported (vectype))
3754         return false;
3755     }
3756
3757   if (!vec_stmt) /* transformation not required.  */
3758     {
3759       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3760       vect_model_store_cost (stmt_info, ncopies);
3761       return true;
3762     }
3763
3764   /** Transform.  **/
3765
3766   if (strided_store)
3767     {
3768       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3769       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3770       group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3771
3772       DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3773
3774       /* We vectorize all the stmts of the interleaving group when we
3775          reach the last stmt in the group.  */
3776       if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3777           < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3778         {
3779           *vec_stmt = NULL_TREE;
3780           return true;
3781         }
3782     }
3783   else
3784     {
3785       first_stmt = stmt;
3786       first_dr = dr;
3787       group_size = 1;
3788     }
3789
3790   if (vect_print_dump_info (REPORT_DETAILS))
3791     fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3792
3793   dr_chain = VEC_alloc (tree, heap, group_size);
3794   oprnds = VEC_alloc (tree, heap, group_size);
3795
3796   alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3797   gcc_assert (alignment_support_cheme);
3798   gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
3799
3800   /* In case the vectorization factor (VF) is bigger than the number
3801      of elements that we can fit in a vectype (nunits), we have to generate
3802      more than one vector stmt - i.e - we need to "unroll" the
3803      vector stmt by a factor VF/nunits.  For more details see documentation in
3804      vect_get_vec_def_for_copy_stmt.  */
3805
3806   /* In case of interleaving (non-unit strided access):
3807
3808         S1:  &base + 2 = x2
3809         S2:  &base = x0
3810         S3:  &base + 1 = x1
3811         S4:  &base + 3 = x3
3812
3813      We create vectorized stores starting from base address (the access of the
3814      first stmt in the chain (S2 in the above example), when the last store stmt
3815      of the chain (S4) is reached:
3816
3817         VS1: &base = vx2
3818         VS2: &base + vec_size*1 = vx0
3819         VS3: &base + vec_size*2 = vx1
3820         VS4: &base + vec_size*3 = vx3
3821
3822      Then permutation statements are generated:
3823
3824         VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3825         VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3826         ...
3827
3828      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3829      (the order of the data-refs in the output of vect_permute_store_chain
3830      corresponds to the order of scalar stmts in the interleaving chain - see
3831      the documentation of vect_permute_store_chain()).
3832
3833      In case of both multiple types and interleaving, above vector stores and
3834      permutation stmts are created for every copy. The result vector stmts are
3835      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3836      STMT_VINFO_RELATED_STMT for the next copies.
3837   */
3838
3839   prev_stmt_info = NULL;
3840   for (j = 0; j < ncopies; j++)
3841     {
3842       tree new_stmt;
3843       tree ptr_incr;
3844
3845       if (j == 0)
3846         {
3847           /* For interleaved stores we collect vectorized defs for all the
3848              stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
3849              as an input to vect_permute_store_chain(), and OPRNDS as an input
3850              to vect_get_vec_def_for_stmt_copy() for the next copy.
3851              If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3852              OPRNDS are of size 1.  */
3853           next_stmt = first_stmt;
3854           for (i = 0; i < group_size; i++)
3855             {
3856               /* Since gaps are not supported for interleaved stores, GROUP_SIZE
3857                  is the exact number of stmts in the chain. Therefore, NEXT_STMT
3858                  can't be NULL_TREE.  In case that there is no interleaving,
3859                  GROUP_SIZE is 1, and only one iteration of the loop will be
3860                  executed.  */
3861               gcc_assert (next_stmt);
3862               op = GIMPLE_STMT_OPERAND (next_stmt, 1);
3863               vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
3864               VEC_quick_push(tree, dr_chain, vec_oprnd);
3865               VEC_quick_push(tree, oprnds, vec_oprnd);
3866               next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3867             }
3868           dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
3869                                                   &dummy, &ptr_incr, false,
3870                                                   TREE_TYPE (vec_oprnd));
3871         }
3872       else
3873         {
3874           /* For interleaved stores we created vectorized defs for all the
3875              defs stored in OPRNDS in the previous iteration (previous copy).
3876              DR_CHAIN is then used as an input to vect_permute_store_chain(),
3877              and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3878              next copy.
3879              If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3880              OPRNDS are of size 1.  */
3881           for (i = 0; i < group_size; i++)
3882             {
3883               vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
3884                                                    VEC_index (tree, oprnds, i));
3885               VEC_replace(tree, dr_chain, i, vec_oprnd);
3886               VEC_replace(tree, oprnds, i, vec_oprnd);
3887             }
3888           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3889         }
3890
3891       if (strided_store)
3892         {
3893           result_chain = VEC_alloc (tree, heap, group_size);
3894           /* Permute.  */
3895           if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
3896                                          &result_chain))
3897             return false;
3898         }
3899
3900       next_stmt = first_stmt;
3901       for (i = 0; i < group_size; i++)
3902         {
3903           /* For strided stores vectorized defs are interleaved in
3904              vect_permute_store_chain().  */
3905           if (strided_store)
3906             vec_oprnd = VEC_index(tree, result_chain, i);
3907
3908           data_ref = build_fold_indirect_ref (dataref_ptr);
3909           /* Arguments are ready. Create the new vector stmt.  */
3910           new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
3911           vect_finish_stmt_generation (stmt, new_stmt, bsi);
3912
3913           /* Set the VDEFs for the vector pointer. If this virtual def
3914              has a use outside the loop and a loop peel is performed
3915              then the def may be renamed by the peel.  Mark it for
3916              renaming so the later use will also be renamed.  */
3917           copy_virtual_operands (new_stmt, next_stmt);
3918           if (j == 0)
3919             {
3920               /* The original store is deleted so the same SSA_NAMEs
3921                  can be used.  */
3922               FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
3923                 {
3924                   SSA_NAME_DEF_STMT (def) = new_stmt;
3925                   mark_sym_for_renaming (SSA_NAME_VAR (def));
3926                 }
3927
3928               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt =  new_stmt;
3929             }
3930           else
3931             {
3932               /* Create new names for all the definitions created by COPY and
3933                  add replacement mappings for each new name.  */
3934               FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
3935                 {
3936                   create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
3937                   mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
3938                 }
3939
3940               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3941             }
3942
3943           prev_stmt_info = vinfo_for_stmt (new_stmt);
3944           next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3945           if (!next_stmt)
3946             break;
3947           /* Bump the vector pointer.  */
3948           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3949         }
3950     }
3951
3952   return true;
3953 }
3954
3955
3956 /* Function vect_setup_realignment
3957
3958    This function is called when vectorizing an unaligned load using
3959    the dr_unaligned_software_pipeline scheme.
3960    This function generates the following code at the loop prolog:
3961
3962       p = initial_addr;
3963       msq_init = *(floor(p));   # prolog load
3964       realignment_token = call target_builtin;
3965     loop:
3966       msq = phi (msq_init, ---)
3967
3968    The code above sets up a new (vector) pointer, pointing to the first
3969    location accessed by STMT, and a "floor-aligned" load using that pointer.
3970    It also generates code to compute the "realignment-token" (if the relevant
3971    target hook was defined), and creates a phi-node at the loop-header bb
3972    whose arguments are the result of the prolog-load (created by this
3973    function) and the result of a load that takes place in the loop (to be
3974    created by the caller to this function).
3975    The caller to this function uses the phi-result (msq) to create the
3976    realignment code inside the loop, and sets up the missing phi argument,
3977    as follows:
3978
3979     loop:
3980       msq = phi (msq_init, lsq)
3981       lsq = *(floor(p'));        # load in loop
3982       result = realign_load (msq, lsq, realignment_token);
3983
3984    Input:
3985    STMT - (scalar) load stmt to be vectorized. This load accesses
3986           a memory location that may be unaligned.
3987    BSI - place where new code is to be inserted.
3988
3989    Output:
3990    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3991                        target hook, if defined.
3992    Return value - the result of the loop-header phi node.  */
3993
3994 static tree
3995 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
3996                         tree *realignment_token)
3997 {
3998   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3999   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4000   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4001   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4002   edge pe = loop_preheader_edge (loop);
4003   tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4004   tree vec_dest;
4005   tree init_addr;
4006   tree inc;
4007   tree ptr;
4008   tree data_ref;
4009   tree new_stmt;
4010   basic_block new_bb;
4011   tree msq_init;
4012   tree new_temp;
4013   tree phi_stmt;
4014   tree msq;
4015
4016   /* 1. Create msq_init = *(floor(p1)) in the loop preheader  */
4017   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4018   ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4019                                   NULL_TREE);
4020   data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4021   new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4022   new_temp = make_ssa_name (vec_dest, new_stmt);
4023   GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4024   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4025   gcc_assert (!new_bb);
4026   msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4027   copy_virtual_operands (new_stmt, stmt);
4028   update_vuses_to_preheader (new_stmt, loop);
4029
4030   /* 2. Create permutation mask, if required, in loop preheader.  */
4031   if (targetm.vectorize.builtin_mask_for_load)
4032     {
4033       tree builtin_decl;
4034
4035       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4036       new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4037       vec_dest = vect_create_destination_var (scalar_dest,
4038                                               TREE_TYPE (new_stmt));
4039       new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4040       new_temp = make_ssa_name (vec_dest, new_stmt);
4041       GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4042       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4043       gcc_assert (!new_bb);
4044       *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4045
4046       /* The result of the CALL_EXPR to this builtin is determined from
4047          the value of the parameter and no global variables are touched
4048          which makes the builtin a "const" function.  Requiring the
4049          builtin to have the "const" attribute makes it unnecessary
4050          to call mark_call_clobbered.  */
4051       gcc_assert (TREE_READONLY (builtin_decl));
4052     }
4053
4054   /* 3. Create msq = phi <msq_init, lsq> in loop  */
4055   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4056   msq = make_ssa_name (vec_dest, NULL_TREE);
4057   phi_stmt = create_phi_node (msq, loop->header);
4058   SSA_NAME_DEF_STMT (msq) = phi_stmt;
4059   add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4060
4061   return msq;
4062 }
4063
4064
4065 /* Function vect_strided_load_supported.
4066
4067    Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4068    and FALSE otherwise.  */
4069
4070 static bool
4071 vect_strided_load_supported (tree vectype)
4072 {
4073   optab perm_even_optab, perm_odd_optab;
4074   int mode;
4075
4076   mode = (int) TYPE_MODE (vectype);
4077
4078   perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4079   if (!perm_even_optab)
4080     {
4081       if (vect_print_dump_info (REPORT_DETAILS))
4082         fprintf (vect_dump, "no optab for perm_even.");
4083       return false;
4084     }
4085
4086   if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4087     {
4088       if (vect_print_dump_info (REPORT_DETAILS))
4089         fprintf (vect_dump, "perm_even op not supported by target.");
4090       return false;
4091     }
4092
4093   perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4094   if (!perm_odd_optab)
4095     {
4096       if (vect_print_dump_info (REPORT_DETAILS))
4097         fprintf (vect_dump, "no optab for perm_odd.");
4098       return false;
4099     }
4100
4101   if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4102     {
4103       if (vect_print_dump_info (REPORT_DETAILS))
4104         fprintf (vect_dump, "perm_odd op not supported by target.");
4105       return false;
4106     }
4107   return true;
4108 }
4109
4110
4111 /* Function vect_permute_load_chain.
4112
4113    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4114    a power of 2, generate extract_even/odd stmts to reorder the input data
4115    correctly. Return the final references for loads in RESULT_CHAIN.
4116
4117    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4118    The input is 4 vectors each containing 8 elements. We assign a number to each
4119    element, the input sequence is:
4120
4121    1st vec:   0  1  2  3  4  5  6  7
4122    2nd vec:   8  9 10 11 12 13 14 15
4123    3rd vec:  16 17 18 19 20 21 22 23
4124    4th vec:  24 25 26 27 28 29 30 31
4125
4126    The output sequence should be:
4127
4128    1st vec:  0 4  8 12 16 20 24 28
4129    2nd vec:  1 5  9 13 17 21 25 29
4130    3rd vec:  2 6 10 14 18 22 26 30
4131    4th vec:  3 7 11 15 19 23 27 31
4132
4133    i.e., the first output vector should contain the first elements of each
4134    interleaving group, etc.
4135
4136    We use extract_even/odd instructions to create such output. The input of each
4137    extract_even/odd operation is two vectors
4138    1st vec    2nd vec
4139    0 1 2 3    4 5 6 7
4140
4141    and the output is the vector of extracted even/odd elements. The output of
4142    extract_even will be:   0 2 4 6
4143    and of extract_odd:     1 3 5 7
4144
4145
4146    The permutation is done in log LENGTH stages. In each stage extract_even and
4147    extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4148    order. In our example,
4149
4150    E1: extract_even (1st vec, 2nd vec)
4151    E2: extract_odd (1st vec, 2nd vec)
4152    E3: extract_even (3rd vec, 4th vec)
4153    E4: extract_odd (3rd vec, 4th vec)
4154
4155    The output for the first stage will be:
4156
4157    E1:  0  2  4  6  8 10 12 14
4158    E2:  1  3  5  7  9 11 13 15
4159    E3: 16 18 20 22 24 26 28 30
4160    E4: 17 19 21 23 25 27 29 31
4161
4162    In order to proceed and create the correct sequence for the next stage (or
4163    for the correct output, if the second stage is the last one, as in our
4164    example), we first put the output of extract_even operation and then the
4165    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4166    The input for the second stage is:
4167
4168    1st vec (E1):  0  2  4  6  8 10 12 14
4169    2nd vec (E3): 16 18 20 22 24 26 28 30
4170    3rd vec (E2):  1  3  5  7  9 11 13 15
4171    4th vec (E4): 17 19 21 23 25 27 29 31
4172
4173    The output of the second stage:
4174
4175    E1: 0 4  8 12 16 20 24 28
4176    E2: 2 6 10 14 18 22 26 30
4177    E3: 1 5  9 13 17 21 25 29
4178    E4: 3 7 11 15 19 23 27 31
4179
4180    And RESULT_CHAIN after reordering:
4181
4182    1st vec (E1):  0 4  8 12 16 20 24 28
4183    2nd vec (E3):  1 5  9 13 17 21 25 29
4184    3rd vec (E2):  2 6 10 14 18 22 26 30
4185    4th vec (E4):  3 7 11 15 19 23 27 31.  */
4186
4187 static bool
4188 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4189                          unsigned int length,
4190                          tree stmt,
4191                          block_stmt_iterator *bsi,
4192                          VEC(tree,heap) **result_chain)
4193 {
4194   tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4195   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4196   tree tmp;
4197   int i;
4198   unsigned int j;
4199
4200   /* Check that the operation is supported.  */
4201   if (!vect_strided_load_supported (vectype))
4202     return false;
4203
4204   *result_chain = VEC_copy (tree, heap, dr_chain);
4205   for (i = 0; i < exact_log2 (length); i++)
4206     {
4207       for (j = 0; j < length; j +=2)
4208         {
4209           first_vect = VEC_index (tree, dr_chain, j);
4210           second_vect = VEC_index (tree, dr_chain, j+1);
4211
4212           /* data_ref = permute_even (first_data_ref, second_data_ref);  */
4213           perm_dest = create_tmp_var (vectype, "vect_perm_even");
4214           DECL_GIMPLE_REG_P (perm_dest) = 1;
4215           add_referenced_var (perm_dest);
4216
4217           tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4218                         first_vect, second_vect);
4219           perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4220
4221           data_ref = make_ssa_name (perm_dest, perm_stmt);
4222           GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4223           vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4224           mark_symbols_for_renaming (perm_stmt);
4225
4226           VEC_replace (tree, *result_chain, j/2, data_ref);
4227
4228           /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
4229           perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4230           DECL_GIMPLE_REG_P (perm_dest) = 1;
4231           add_referenced_var (perm_dest);
4232
4233           tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4234                         first_vect, second_vect);
4235           perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4236           data_ref = make_ssa_name (perm_dest, perm_stmt);
4237           GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4238           vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4239           mark_symbols_for_renaming (perm_stmt);
4240
4241           VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4242         }
4243       dr_chain = VEC_copy (tree, heap, *result_chain);
4244     }
4245   return true;
4246 }
4247
4248
4249 /* Function vect_transform_strided_load.
4250
4251    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4252    to perform their permutation and ascribe the result vectorized statements to
4253    the scalar statements.
4254 */
4255
4256 static bool
4257 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4258                              block_stmt_iterator *bsi)
4259 {
4260   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4261   tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4262   tree next_stmt, new_stmt;
4263   VEC(tree,heap) *result_chain = NULL;
4264   unsigned int i, gap_count;
4265   tree tmp_data_ref;
4266
4267   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4268      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4269      vectors, that are ready for vector computation.  */
4270   result_chain = VEC_alloc (tree, heap, size);
4271   /* Permute.  */
4272   if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4273     return false;
4274
4275   /* Put a permuted data-ref in the VECTORIZED_STMT field.
4276      Since we scan the chain starting from it's first node, their order
4277      corresponds the order of data-refs in RESULT_CHAIN.  */
4278   next_stmt = first_stmt;
4279   gap_count = 1;
4280   for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
4281     {
4282       if (!next_stmt)
4283         break;
4284
4285       /* Skip the gaps. Loads created for the gaps will be removed by dead
4286        code elimination pass later.
4287        DR_GROUP_GAP is the number of steps in elements from the previous
4288        access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4289        correspond to the gaps.
4290       */
4291       if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4292       {
4293         gap_count++;
4294         continue;
4295       }
4296
4297       while (next_stmt)
4298         {
4299           new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4300           /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4301              copies, and we put the new vector statement in the first available
4302              RELATED_STMT.  */
4303           if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4304             STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4305           else
4306             {
4307               tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4308               tree rel_stmt = STMT_VINFO_RELATED_STMT (
4309                                                        vinfo_for_stmt (prev_stmt));
4310               while (rel_stmt)
4311                 {
4312                   prev_stmt = rel_stmt;
4313                   rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4314                 }
4315               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4316             }
4317           next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4318           gap_count = 1;
4319           /* If NEXT_STMT accesses the same DR as the previous statement,
4320              put the same TMP_DATA_REF as its vectorized statement; otherwise
4321              get the next data-ref from RESULT_CHAIN.  */
4322           if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4323             break;
4324         }
4325     }
4326   return true;
4327 }
4328
4329
4330 /* vectorizable_load.
4331
4332    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4333    can be vectorized.
4334    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4335    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4336    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4337
4338 bool
4339 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4340 {
4341   tree scalar_dest;
4342   tree vec_dest = NULL;
4343   tree data_ref = NULL;
4344   tree op;
4345   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4346   stmt_vec_info prev_stmt_info;
4347   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4348   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4349   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4350   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4351   tree new_temp;
4352   int mode;
4353   tree new_stmt = NULL_TREE;
4354   tree dummy;
4355   enum dr_alignment_support alignment_support_cheme;
4356   tree dataref_ptr = NULL_TREE;
4357   tree ptr_incr;
4358   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4359   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4360   int i, j, group_size;
4361   tree msq = NULL_TREE, lsq;
4362   tree offset = NULL_TREE;
4363   tree realignment_token = NULL_TREE;
4364   tree phi_stmt = NULL_TREE;
4365   VEC(tree,heap) *dr_chain = NULL;
4366   bool strided_load = false;
4367   tree first_stmt;
4368
4369   if (!STMT_VINFO_RELEVANT_P (stmt_info))
4370     return false;
4371
4372   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4373     return false;
4374
4375   /* FORNOW: not yet supported.  */
4376   if (STMT_VINFO_LIVE_P (stmt_info))
4377     {
4378       if (vect_print_dump_info (REPORT_DETAILS))
4379         fprintf (vect_dump, "value used after loop.");
4380       return false;
4381     }
4382
4383   /* Is vectorizable load? */
4384   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4385     return false;
4386
4387   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4388   if (TREE_CODE (scalar_dest) != SSA_NAME)
4389     return false;
4390
4391   op = GIMPLE_STMT_OPERAND (stmt, 1);
4392   if (TREE_CODE (op) != ARRAY_REF
4393       && TREE_CODE (op) != INDIRECT_REF
4394       && !DR_GROUP_FIRST_DR (stmt_info))
4395     return false;
4396
4397   if (!STMT_VINFO_DATA_REF (stmt_info))
4398     return false;
4399
4400   mode = (int) TYPE_MODE (vectype);
4401
4402   /* FORNOW. In some cases can vectorize even if data-type not supported
4403     (e.g. - data copies).  */
4404   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4405     {
4406       if (vect_print_dump_info (REPORT_DETAILS))
4407         fprintf (vect_dump, "Aligned load, but unsupported type.");
4408       return false;
4409     }
4410
4411   /* Check if the load is a part of an interleaving chain.  */
4412   if (DR_GROUP_FIRST_DR (stmt_info))
4413     {
4414       strided_load = true;
4415
4416       /* Check if interleaving is supported.  */
4417       if (!vect_strided_load_supported (vectype))
4418         return false;
4419     }
4420
4421   if (!vec_stmt) /* transformation not required.  */
4422     {
4423       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4424       vect_model_load_cost (stmt_info, ncopies);
4425       return true;
4426     }
4427
4428   if (vect_print_dump_info (REPORT_DETAILS))
4429     fprintf (vect_dump, "transform load.");
4430
4431   /** Transform.  **/
4432
4433   if (strided_load)
4434     {
4435       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4436       /* Check if the chain of loads is already vectorized.  */
4437       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4438         {
4439           *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4440           return true;
4441         }
4442       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4443       group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4444       dr_chain = VEC_alloc (tree, heap, group_size);
4445     }
4446   else
4447     {
4448       first_stmt = stmt;
4449       first_dr = dr;
4450       group_size = 1;
4451     }
4452
4453   alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4454   gcc_assert (alignment_support_cheme);
4455
4456
4457   /* In case the vectorization factor (VF) is bigger than the number
4458      of elements that we can fit in a vectype (nunits), we have to generate
4459      more than one vector stmt - i.e - we need to "unroll" the
4460      vector stmt by a factor VF/nunits. In doing so, we record a pointer
4461      from one copy of the vector stmt to the next, in the field
4462      STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4463      stages to find the correct vector defs to be used when vectorizing
4464      stmts that use the defs of the current stmt. The example below illustrates
4465      the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4466      4 vectorized stmts):
4467
4468      before vectorization:
4469                                 RELATED_STMT    VEC_STMT
4470         S1:     x = memref      -               -
4471         S2:     z = x + 1       -               -
4472
4473      step 1: vectorize stmt S1:
4474         We first create the vector stmt VS1_0, and, as usual, record a
4475         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4476         Next, we create the vector stmt VS1_1, and record a pointer to
4477         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4478         Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4479         stmts and pointers:
4480                                 RELATED_STMT    VEC_STMT
4481         VS1_0:  vx0 = memref0   VS1_1           -
4482         VS1_1:  vx1 = memref1   VS1_2           -
4483         VS1_2:  vx2 = memref2   VS1_3           -
4484         VS1_3:  vx3 = memref3   -               -
4485         S1:     x = load        -               VS1_0
4486         S2:     z = x + 1       -               -
4487
4488      See in documentation in vect_get_vec_def_for_stmt_copy for how the
4489      information we recorded in RELATED_STMT field is used to vectorize
4490      stmt S2.  */
4491
4492   /* In case of interleaving (non-unit strided access):
4493
4494      S1:  x2 = &base + 2
4495      S2:  x0 = &base
4496      S3:  x1 = &base + 1
4497      S4:  x3 = &base + 3
4498
4499      Vectorized loads are created in the order of memory accesses
4500      starting from the access of the first stmt of the chain:
4501
4502      VS1: vx0 = &base
4503      VS2: vx1 = &base + vec_size*1
4504      VS3: vx3 = &base + vec_size*2
4505      VS4: vx4 = &base + vec_size*3
4506
4507      Then permutation statements are generated:
4508
4509      VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4510      VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4511        ...
4512
4513      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4514      (the order of the data-refs in the output of vect_permute_load_chain
4515      corresponds to the order of scalar stmts in the interleaving chain - see
4516      the documentation of vect_permute_load_chain()).
4517      The generation of permutation stmts and recording them in
4518      STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4519
4520      In case of both multiple types and interleaving, the vector loads and
4521      permutation stmts above are created for every copy. The result vector stmts
4522      are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4523      STMT_VINFO_RELATED_STMT for the next copies.  */
4524
4525   /* If the data reference is aligned (dr_aligned) or potentially unaligned
4526      on a target that supports unaligned accesses (dr_unaligned_supported)
4527      we generate the following code:
4528          p = initial_addr;
4529          indx = 0;
4530          loop {
4531            p = p + indx * vectype_size;
4532            vec_dest = *(p);
4533            indx = indx + 1;
4534          }
4535
4536      Otherwise, the data reference is potentially unaligned on a target that
4537      does not support unaligned accesses (dr_unaligned_software_pipeline) -
4538      then generate the following code, in which the data in each iteration is
4539      obtained by two vector loads, one from the previous iteration, and one
4540      from the current iteration:
4541          p1 = initial_addr;
4542          msq_init = *(floor(p1))
4543          p2 = initial_addr + VS - 1;
4544          realignment_token = call target_builtin;
4545          indx = 0;
4546          loop {
4547            p2 = p2 + indx * vectype_size
4548            lsq = *(floor(p2))
4549            vec_dest = realign_load (msq, lsq, realignment_token)
4550            indx = indx + 1;
4551            msq = lsq;
4552          }   */
4553
4554   if (alignment_support_cheme == dr_unaligned_software_pipeline)
4555     {
4556       msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4557       phi_stmt = SSA_NAME_DEF_STMT (msq);
4558       offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4559     }
4560
4561   prev_stmt_info = NULL;
4562   for (j = 0; j < ncopies; j++)
4563     {
4564       /* 1. Create the vector pointer update chain.  */
4565       if (j == 0)
4566         dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4567                                                 &ptr_incr, false, NULL_TREE);
4568       else
4569         dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4570
4571       for (i = 0; i < group_size; i++)
4572         {
4573           /* 2. Create the vector-load in the loop.  */
4574           switch (alignment_support_cheme)
4575             {
4576             case dr_aligned:
4577               gcc_assert (aligned_access_p (first_dr));
4578               data_ref = build_fold_indirect_ref (dataref_ptr);
4579               break;
4580             case dr_unaligned_supported:
4581               {
4582                 int mis = DR_MISALIGNMENT (first_dr);
4583                 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4584
4585                 gcc_assert (!aligned_access_p (first_dr));
4586                 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4587                 data_ref =
4588                   build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4589                 break;
4590               }
4591             case dr_unaligned_software_pipeline:
4592               gcc_assert (!aligned_access_p (first_dr));
4593               data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4594               break;
4595             default:
4596               gcc_unreachable ();
4597             }
4598           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4599           new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4600           new_temp = make_ssa_name (vec_dest, new_stmt);
4601           GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4602           vect_finish_stmt_generation (stmt, new_stmt, bsi);
4603           copy_virtual_operands (new_stmt, stmt);
4604           mark_symbols_for_renaming (new_stmt);
4605
4606           /* 3. Handle explicit realignment if necessary/supported.  */
4607           if (alignment_support_cheme == dr_unaligned_software_pipeline)
4608             {
4609               /* Create in loop:
4610                  <vec_dest = realign_load (msq, lsq, realignment_token)>  */
4611               lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4612               if (!realignment_token)
4613                 realignment_token = dataref_ptr;
4614               vec_dest = vect_create_destination_var (scalar_dest, vectype);
4615               new_stmt =
4616                 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4617               new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4618               new_temp = make_ssa_name (vec_dest, new_stmt);
4619               GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4620               vect_finish_stmt_generation (stmt, new_stmt, bsi);
4621               if (i == group_size - 1 && j == ncopies - 1)
4622                 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4623               msq = lsq;
4624             }
4625           if (strided_load)
4626             VEC_quick_push (tree, dr_chain, new_temp);
4627           if (i < group_size - 1)
4628             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4629         }
4630
4631       if (strided_load)
4632         {
4633           if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4634             return false;
4635           *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4636           dr_chain = VEC_alloc (tree, heap, group_size);
4637         }
4638       else
4639         {
4640           if (j == 0)
4641             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4642           else
4643             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4644           prev_stmt_info = vinfo_for_stmt (new_stmt);
4645         }
4646     }
4647
4648   return true;
4649 }
4650
4651
4652 /* Function vectorizable_live_operation.
4653
4654    STMT computes a value that is used outside the loop. Check if
4655    it can be supported.  */
4656
4657 bool
4658 vectorizable_live_operation (tree stmt,
4659                              block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4660                              tree *vec_stmt ATTRIBUTE_UNUSED)
4661 {
4662   tree operation;
4663   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4664   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4665   int i;
4666   int op_type;
4667   tree op;
4668   tree def, def_stmt;
4669   enum vect_def_type dt;
4670
4671   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4672
4673   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4674     return false;
4675
4676   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4677     return false;
4678
4679   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4680     return false;
4681
4682   operation = GIMPLE_STMT_OPERAND (stmt, 1);
4683   op_type = TREE_OPERAND_LENGTH (operation);
4684
4685   /* FORNOW: support only if all uses are invariant. This means
4686      that the scalar operations can remain in place, unvectorized.
4687      The original last scalar value that they compute will be used.  */
4688
4689   for (i = 0; i < op_type; i++)
4690     {
4691       op = TREE_OPERAND (operation, i);
4692       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4693         {
4694           if (vect_print_dump_info (REPORT_DETAILS))
4695             fprintf (vect_dump, "use not simple.");
4696           return false;
4697         }
4698
4699       if (dt != vect_invariant_def && dt != vect_constant_def)
4700         return false;
4701     }
4702
4703   /* No transformation is required for the cases we currently support.  */
4704   return true;
4705 }
4706
4707
4708 /* Function vect_is_simple_cond.
4709
4710    Input:
4711    LOOP - the loop that is being vectorized.
4712    COND - Condition that is checked for simple use.
4713
4714    Returns whether a COND can be vectorized.  Checks whether
4715    condition operands are supportable using vec_is_simple_use.  */
4716
4717 static bool
4718 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4719 {
4720   tree lhs, rhs;
4721   tree def;
4722   enum vect_def_type dt;
4723
4724   if (!COMPARISON_CLASS_P (cond))
4725     return false;
4726
4727   lhs = TREE_OPERAND (cond, 0);
4728   rhs = TREE_OPERAND (cond, 1);
4729
4730   if (TREE_CODE (lhs) == SSA_NAME)
4731     {
4732       tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4733       if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4734         return false;
4735     }
4736   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4737     return false;
4738
4739   if (TREE_CODE (rhs) == SSA_NAME)
4740     {
4741       tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4742       if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4743         return false;
4744     }
4745   else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
4746     return false;
4747
4748   return true;
4749 }
4750
4751 /* vectorizable_condition.
4752
4753    Check if STMT is conditional modify expression that can be vectorized.
4754    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4755    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
4756    at BSI.
4757
4758    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4759
4760 bool
4761 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4762 {
4763   tree scalar_dest = NULL_TREE;
4764   tree vec_dest = NULL_TREE;
4765   tree op = NULL_TREE;
4766   tree cond_expr, then_clause, else_clause;
4767   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4768   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4769   tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4770   tree vec_compare, vec_cond_expr;
4771   tree new_temp;
4772   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4773   enum machine_mode vec_mode;
4774   tree def;
4775   enum vect_def_type dt;
4776   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4777   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4778
4779   gcc_assert (ncopies >= 1);
4780   if (ncopies > 1)
4781     return false; /* FORNOW */
4782
4783   if (!STMT_VINFO_RELEVANT_P (stmt_info))
4784     return false;
4785
4786   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4787     return false;
4788
4789   /* FORNOW: not yet supported.  */
4790   if (STMT_VINFO_LIVE_P (stmt_info))
4791     {
4792       if (vect_print_dump_info (REPORT_DETAILS))
4793         fprintf (vect_dump, "value used after loop.");
4794       return false;
4795     }
4796
4797   /* Is vectorizable conditional operation?  */
4798   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4799     return false;
4800
4801   op = GIMPLE_STMT_OPERAND (stmt, 1);
4802
4803   if (TREE_CODE (op) != COND_EXPR)
4804     return false;
4805
4806   cond_expr = TREE_OPERAND (op, 0);
4807   then_clause = TREE_OPERAND (op, 1);
4808   else_clause = TREE_OPERAND (op, 2);
4809
4810   if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4811     return false;
4812
4813   /* We do not handle two different vector types for the condition
4814      and the values.  */
4815   if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4816     return false;
4817
4818   if (TREE_CODE (then_clause) == SSA_NAME)
4819     {
4820       tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4821       if (!vect_is_simple_use (then_clause, loop_vinfo,
4822                                &then_def_stmt, &def, &dt))
4823         return false;
4824     }
4825   else if (TREE_CODE (then_clause) != INTEGER_CST
4826            && TREE_CODE (then_clause) != REAL_CST)
4827     return false;
4828
4829   if (TREE_CODE (else_clause) == SSA_NAME)
4830     {
4831       tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4832       if (!vect_is_simple_use (else_clause, loop_vinfo,
4833                                &else_def_stmt, &def, &dt))
4834         return false;
4835     }
4836   else if (TREE_CODE (else_clause) != INTEGER_CST
4837            && TREE_CODE (else_clause) != REAL_CST)
4838     return false;
4839
4840
4841   vec_mode = TYPE_MODE (vectype);
4842
4843   if (!vec_stmt)
4844     {
4845       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4846       return expand_vec_cond_expr_p (op, vec_mode);
4847     }
4848
4849   /* Transform */
4850
4851   /* Handle def.  */
4852   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4853   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4854
4855   /* Handle cond expr.  */
4856   vec_cond_lhs =
4857     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
4858   vec_cond_rhs =
4859     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4860   vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4861   vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4862
4863   /* Arguments are ready. create the new vector stmt.  */
4864   vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4865                         vec_cond_lhs, vec_cond_rhs);
4866   vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4867                           vec_compare, vec_then_clause, vec_else_clause);
4868
4869   *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
4870   new_temp = make_ssa_name (vec_dest, *vec_stmt);
4871   GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
4872   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
4873
4874   return true;
4875 }
4876
4877 /* Function vect_transform_stmt.
4878
4879    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
4880
4881 bool
4882 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
4883 {
4884   bool is_store = false;
4885   tree vec_stmt = NULL_TREE;
4886   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4887   tree orig_stmt_in_pattern;
4888   bool done;
4889
4890   switch (STMT_VINFO_TYPE (stmt_info))
4891     {
4892     case type_demotion_vec_info_type:
4893       done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
4894       gcc_assert (done);
4895       break;
4896
4897     case type_promotion_vec_info_type:
4898       done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
4899       gcc_assert (done);
4900       break;
4901
4902     case type_conversion_vec_info_type:
4903       done = vectorizable_conversion (stmt, bsi, &vec_stmt);
4904       gcc_assert (done);
4905       break;
4906
4907     case induc_vec_info_type:
4908       done = vectorizable_induction (stmt, bsi, &vec_stmt);
4909       gcc_assert (done);
4910       break;
4911
4912     case op_vec_info_type:
4913       done = vectorizable_operation (stmt, bsi, &vec_stmt);
4914       gcc_assert (done);
4915       break;
4916
4917     case assignment_vec_info_type:
4918       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
4919       gcc_assert (done);
4920       break;
4921
4922     case load_vec_info_type:
4923       done = vectorizable_load (stmt, bsi, &vec_stmt);
4924       gcc_assert (done);
4925       break;
4926
4927     case store_vec_info_type:
4928       done = vectorizable_store (stmt, bsi, &vec_stmt);
4929       gcc_assert (done);
4930       if (DR_GROUP_FIRST_DR (stmt_info))
4931         {
4932           /* In case of interleaving, the whole chain is vectorized when the
4933              last store in the chain is reached. Store stmts before the last
4934              one are skipped, and there vec_stmt_info shouldn't be freed
4935              meanwhile.  */
4936           *strided_store = true;
4937           if (STMT_VINFO_VEC_STMT (stmt_info))
4938             is_store = true;
4939           }
4940       else
4941         is_store = true;
4942       break;
4943
4944     case condition_vec_info_type:
4945       done = vectorizable_condition (stmt, bsi, &vec_stmt);
4946       gcc_assert (done);
4947       break;
4948
4949     case call_vec_info_type:
4950       done = vectorizable_call (stmt, bsi, &vec_stmt);
4951       break;
4952
4953     case reduc_vec_info_type:
4954       done = vectorizable_reduction (stmt, bsi, &vec_stmt);
4955       gcc_assert (done);
4956       break;
4957
4958     default:
4959       if (!STMT_VINFO_LIVE_P (stmt_info))
4960         {
4961           if (vect_print_dump_info (REPORT_DETAILS))
4962             fprintf (vect_dump, "stmt not supported.");
4963           gcc_unreachable ();
4964         }
4965     }
4966
4967   if (STMT_VINFO_LIVE_P (stmt_info)
4968       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4969     {
4970       done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
4971       gcc_assert (done);
4972     }
4973
4974   if (vec_stmt)
4975     {
4976       STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4977       orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4978       if (orig_stmt_in_pattern)
4979         {
4980           stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4981           /* STMT was inserted by the vectorizer to replace a computation idiom.
4982              ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4983              computed this idiom.  We need to record a pointer to VEC_STMT in
4984              the stmt_info of ORIG_STMT_IN_PATTERN.  See more details in the
4985              documentation of vect_pattern_recog.  */
4986           if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4987             {
4988               gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4989               STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4990             }
4991         }
4992     }
4993
4994   return is_store;
4995 }
4996
4997
4998 /* This function builds ni_name = number of iterations loop executes
4999    on the loop preheader.  */
5000
5001 static tree
5002 vect_build_loop_niters (loop_vec_info loop_vinfo)
5003 {
5004   tree ni_name, stmt, var;
5005   edge pe;
5006   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5007   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5008
5009   var = create_tmp_var (TREE_TYPE (ni), "niters");
5010   add_referenced_var (var);
5011   ni_name = force_gimple_operand (ni, &stmt, false, var);
5012
5013   pe = loop_preheader_edge (loop);
5014   if (stmt)
5015     {
5016       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5017       gcc_assert (!new_bb);
5018     }
5019
5020   return ni_name;
5021 }
5022
5023
5024 /* This function generates the following statements:
5025
5026  ni_name = number of iterations loop executes
5027  ratio = ni_name / vf
5028  ratio_mult_vf_name = ratio * vf
5029
5030  and places them at the loop preheader edge.  */
5031
5032 static void
5033 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5034                                  tree *ni_name_ptr,
5035                                  tree *ratio_mult_vf_name_ptr,
5036                                  tree *ratio_name_ptr)
5037 {
5038
5039   edge pe;
5040   basic_block new_bb;
5041   tree stmt, ni_name;
5042   tree var;
5043   tree ratio_name;
5044   tree ratio_mult_vf_name;
5045   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5046   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5047   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5048   tree log_vf;
5049
5050   pe = loop_preheader_edge (loop);
5051
5052   /* Generate temporary variable that contains
5053      number of iterations loop executes.  */
5054
5055   ni_name = vect_build_loop_niters (loop_vinfo);
5056   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5057
5058   /* Create: ratio = ni >> log2(vf) */
5059
5060   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5061   if (!is_gimple_val (ratio_name))
5062     {
5063       var = create_tmp_var (TREE_TYPE (ni), "bnd");
5064       add_referenced_var (var);
5065
5066       ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5067       pe = loop_preheader_edge (loop);
5068       new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5069       gcc_assert (!new_bb);
5070     }
5071
5072   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
5073
5074   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5075                                     ratio_name, log_vf);
5076   if (!is_gimple_val (ratio_mult_vf_name))
5077     {
5078       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5079       add_referenced_var (var);
5080
5081       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5082                                                  true, var);
5083       pe = loop_preheader_edge (loop);
5084       new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5085       gcc_assert (!new_bb);
5086     }
5087
5088   *ni_name_ptr = ni_name;
5089   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5090   *ratio_name_ptr = ratio_name;
5091
5092   return;
5093 }
5094
5095
5096 /* Function update_vuses_to_preheader.
5097
5098    Input:
5099    STMT - a statement with potential VUSEs.
5100    LOOP - the loop whose preheader will contain STMT.
5101
5102    It's possible to vectorize a loop even though an SSA_NAME from a VUSE
5103    appears to be defined in a VDEF in another statement in a loop.
5104    One such case is when the VUSE is at the dereference of a __restricted__
5105    pointer in a load and the VDEF is at the dereference of a different
5106    __restricted__ pointer in a store.  Vectorization may result in
5107    copy_virtual_uses being called to copy the problematic VUSE to a new
5108    statement that is being inserted in the loop preheader.  This procedure
5109    is called to change the SSA_NAME in the new statement's VUSE from the
5110    SSA_NAME updated in the loop to the related SSA_NAME available on the
5111    path entering the loop.
5112
5113    When this function is called, we have the following situation:
5114
5115         # vuse <name1>
5116         S1: vload
5117     do {
5118         # name1 = phi < name0 , name2>
5119
5120         # vuse <name1>
5121         S2: vload
5122
5123         # name2 = vdef <name1>
5124         S3: vstore
5125
5126     }while...
5127
5128    Stmt S1 was created in the loop preheader block as part of misaligned-load
5129    handling. This function fixes the name of the vuse of S1 from 'name1' to
5130    'name0'.  */
5131
5132 static void
5133 update_vuses_to_preheader (tree stmt, struct loop *loop)
5134 {
5135   basic_block header_bb = loop->header;
5136   edge preheader_e = loop_preheader_edge (loop);
5137   ssa_op_iter iter;
5138   use_operand_p use_p;
5139
5140   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
5141     {
5142       tree ssa_name = USE_FROM_PTR (use_p);
5143       tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
5144       tree name_var = SSA_NAME_VAR (ssa_name);
5145       basic_block bb = bb_for_stmt (def_stmt);
5146
5147       /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
5148       if (!IS_EMPTY_STMT (def_stmt)
5149           && flow_bb_inside_loop_p (loop, bb))
5150         {
5151           /* If the block containing the statement defining the SSA_NAME
5152              is in the loop then it's necessary to find the definition
5153              outside the loop using the PHI nodes of the header.  */
5154           tree phi;
5155           bool updated = false;
5156
5157           for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
5158             {
5159               if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
5160                 {
5161                   SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
5162                   updated = true;
5163                   break;
5164                 }
5165             }
5166           gcc_assert (updated);
5167         }
5168     }
5169 }
5170
5171
5172 /*   Function vect_update_ivs_after_vectorizer.
5173
5174      "Advance" the induction variables of LOOP to the value they should take
5175      after the execution of LOOP.  This is currently necessary because the
5176      vectorizer does not handle induction variables that are used after the
5177      loop.  Such a situation occurs when the last iterations of LOOP are
5178      peeled, because:
5179      1. We introduced new uses after LOOP for IVs that were not originally used
5180         after LOOP: the IVs of LOOP are now used by an epilog loop.
5181      2. LOOP is going to be vectorized; this means that it will iterate N/VF
5182         times, whereas the loop IVs should be bumped N times.
5183
5184      Input:
5185      - LOOP - a loop that is going to be vectorized. The last few iterations
5186               of LOOP were peeled.
5187      - NITERS - the number of iterations that LOOP executes (before it is
5188                 vectorized). i.e, the number of times the ivs should be bumped.
5189      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5190                   coming out from LOOP on which there are uses of the LOOP ivs
5191                   (this is the path from LOOP->exit to epilog_loop->preheader).
5192
5193                   The new definitions of the ivs are placed in LOOP->exit.
5194                   The phi args associated with the edge UPDATE_E in the bb
5195                   UPDATE_E->dest are updated accordingly.
5196
5197      Assumption 1: Like the rest of the vectorizer, this function assumes
5198      a single loop exit that has a single predecessor.
5199
5200      Assumption 2: The phi nodes in the LOOP header and in update_bb are
5201      organized in the same order.
5202
5203      Assumption 3: The access function of the ivs is simple enough (see
5204      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
5205
5206      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5207      coming out of LOOP on which the ivs of LOOP are used (this is the path
5208      that leads to the epilog loop; other paths skip the epilog loop).  This
5209      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5210      needs to have its phis updated.
5211  */
5212
5213 static void
5214 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5215                                   edge update_e)
5216 {
5217   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5218   basic_block exit_bb = single_exit (loop)->dest;
5219   tree phi, phi1;
5220   basic_block update_bb = update_e->dest;
5221
5222   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5223
5224   /* Make sure there exists a single-predecessor exit bb:  */
5225   gcc_assert (single_pred_p (exit_bb));
5226
5227   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5228        phi && phi1;
5229        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5230     {
5231       tree access_fn = NULL;
5232       tree evolution_part;
5233       tree init_expr;
5234       tree step_expr;
5235       tree var, stmt, ni, ni_name;
5236       block_stmt_iterator last_bsi;
5237
5238       if (vect_print_dump_info (REPORT_DETAILS))
5239         {
5240           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5241           print_generic_expr (vect_dump, phi, TDF_SLIM);
5242         }
5243
5244       /* Skip virtual phi's.  */
5245       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5246         {
5247           if (vect_print_dump_info (REPORT_DETAILS))
5248             fprintf (vect_dump, "virtual phi. skip.");
5249           continue;
5250         }
5251
5252       /* Skip reduction phis.  */
5253       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5254         {
5255           if (vect_print_dump_info (REPORT_DETAILS))
5256             fprintf (vect_dump, "reduc phi. skip.");
5257           continue;
5258         }
5259
5260       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5261       gcc_assert (access_fn);
5262       evolution_part =
5263          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5264       gcc_assert (evolution_part != NULL_TREE);
5265
5266       /* FORNOW: We do not support IVs whose evolution function is a polynomial
5267          of degree >= 2 or exponential.  */
5268       gcc_assert (!tree_is_chrec (evolution_part));
5269
5270       step_expr = evolution_part;
5271       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5272                                                                loop->num));
5273
5274       if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5275         ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5276                           init_expr,
5277                           fold_convert (sizetype,
5278                                         fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5279                                                      niters, step_expr)));
5280       else
5281         ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5282                           fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5283                                        fold_convert (TREE_TYPE (init_expr),
5284                                                      niters),
5285                                        step_expr),
5286                           init_expr);
5287
5288
5289
5290       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5291       add_referenced_var (var);
5292
5293       ni_name = force_gimple_operand (ni, &stmt, false, var);
5294
5295       /* Insert stmt into exit_bb.  */
5296       last_bsi = bsi_last (exit_bb);
5297       if (stmt)
5298         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
5299
5300       /* Fix phi expressions in the successor bb.  */
5301       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5302     }
5303 }
5304
5305
5306 /* Function vect_do_peeling_for_loop_bound
5307
5308    Peel the last iterations of the loop represented by LOOP_VINFO.
5309    The peeled iterations form a new epilog loop.  Given that the loop now
5310    iterates NITERS times, the new epilog loop iterates
5311    NITERS % VECTORIZATION_FACTOR times.
5312
5313    The original loop will later be made to iterate
5314    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
5315
5316 static void
5317 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5318 {
5319   tree ni_name, ratio_mult_vf_name;
5320   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5321   struct loop *new_loop;
5322   edge update_e;
5323   basic_block preheader;
5324   int loop_num;
5325   unsigned int th;
5326   int min_scalar_loop_bound;
5327   int min_profitable_iters;
5328
5329   if (vect_print_dump_info (REPORT_DETAILS))
5330     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5331
5332   initialize_original_copy_tables ();
5333
5334   /* Generate the following variables on the preheader of original loop:
5335
5336      ni_name = number of iteration the original loop executes
5337      ratio = ni_name / vf
5338      ratio_mult_vf_name = ratio * vf  */
5339   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5340                                    &ratio_mult_vf_name, ratio);
5341
5342   loop_num  = loop->num;
5343
5344   /* Analyze cost to set threshhold for vectorized loop.  */
5345   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
5346
5347   min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5348                           * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5349
5350   /* Use the cost model only if it is more conservative than user specified
5351      threshold.  */
5352
5353   th = (unsigned) min_scalar_loop_bound;
5354   if (min_profitable_iters
5355       && (!min_scalar_loop_bound
5356           || min_profitable_iters > min_scalar_loop_bound))
5357     th = (unsigned) min_profitable_iters;
5358
5359   if (vect_print_dump_info (REPORT_DETAILS))
5360     fprintf (vect_dump, "vectorization may not be profitable.");
5361
5362   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5363                                             ratio_mult_vf_name, ni_name, false,
5364                                             th);
5365   gcc_assert (new_loop);
5366   gcc_assert (loop_num == loop->num);
5367 #ifdef ENABLE_CHECKING
5368   slpeel_verify_cfg_after_peeling (loop, new_loop);
5369 #endif
5370
5371   /* A guard that controls whether the new_loop is to be executed or skipped
5372      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
5373      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
5374      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
5375      is on the path where the LOOP IVs are used and need to be updated.  */
5376
5377   preheader = loop_preheader_edge (new_loop)->src;
5378   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5379     update_e = EDGE_PRED (preheader, 0);
5380   else
5381     update_e = EDGE_PRED (preheader, 1);
5382
5383   /* Update IVs of original loop as if they were advanced
5384      by ratio_mult_vf_name steps.  */
5385   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5386
5387   /* After peeling we have to reset scalar evolution analyzer.  */
5388   scev_reset ();
5389
5390   free_original_copy_tables ();
5391 }
5392
5393
5394 /* Function vect_gen_niters_for_prolog_loop
5395
5396    Set the number of iterations for the loop represented by LOOP_VINFO
5397    to the minimum between LOOP_NITERS (the original iteration count of the loop)
5398    and the misalignment of DR - the data reference recorded in
5399    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
5400    this loop, the data reference DR will refer to an aligned location.
5401
5402    The following computation is generated:
5403
5404    If the misalignment of DR is known at compile time:
5405      addr_mis = int mis = DR_MISALIGNMENT (dr);
5406    Else, compute address misalignment in bytes:
5407      addr_mis = addr & (vectype_size - 1)
5408
5409    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5410
5411    (elem_size = element type size; an element is the scalar element
5412         whose type is the inner type of the vectype)
5413
5414    For interleaving,
5415
5416    prolog_niters = min ( LOOP_NITERS ,
5417                         (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5418          where group_size is the size of the interleaved group.
5419
5420    The above formulas assume that VF == number of elements in the vector. This
5421    may not hold when there are multiple-types in the loop.
5422    In this case, for some data-references in the loop the VF does not represent
5423    the number of elements that fit in the vector.  Therefore, instead of VF we
5424    use TYPE_VECTOR_SUBPARTS.  */
5425
5426 static tree
5427 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5428 {
5429   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5430   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5431   tree var, stmt;
5432   tree iters, iters_name;
5433   edge pe;
5434   basic_block new_bb;
5435   tree dr_stmt = DR_STMT (dr);
5436   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5437   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5438   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5439   tree niters_type = TREE_TYPE (loop_niters);
5440   int group_size = 1;
5441   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5442   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5443
5444   if (DR_GROUP_FIRST_DR (stmt_info))
5445     {
5446       /* For interleaved access element size must be multiplied by the size of
5447          the interleaved group.  */
5448       group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5449                                                DR_GROUP_FIRST_DR (stmt_info)));
5450       element_size *= group_size;
5451     }
5452
5453   pe = loop_preheader_edge (loop);
5454
5455   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5456     {
5457       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5458       int elem_misalign = byte_misalign / element_size;
5459
5460       if (vect_print_dump_info (REPORT_DETAILS))
5461         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5462       iters = build_int_cst (niters_type,
5463                              (nelements - elem_misalign)&(nelements/group_size-1));
5464     }
5465   else
5466     {
5467       tree new_stmts = NULL_TREE;
5468       tree start_addr =
5469         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5470       tree ptr_type = TREE_TYPE (start_addr);
5471       tree size = TYPE_SIZE (ptr_type);
5472       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5473       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5474       tree elem_size_log =
5475         build_int_cst (type, exact_log2 (vectype_align/nelements));
5476       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5477       tree nelements_tree = build_int_cst (type, nelements);
5478       tree byte_misalign;
5479       tree elem_misalign;
5480
5481       new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5482       gcc_assert (!new_bb);
5483
5484       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
5485       byte_misalign =
5486         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5487
5488       /* Create:  elem_misalign = byte_misalign / element_size  */
5489       elem_misalign =
5490         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5491
5492       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
5493       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5494       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5495       iters = fold_convert (niters_type, iters);
5496     }
5497
5498   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
5499   /* If the loop bound is known at compile time we already verified that it is
5500      greater than vf; since the misalignment ('iters') is at most vf, there's
5501      no need to generate the MIN_EXPR in this case.  */
5502   if (TREE_CODE (loop_niters) != INTEGER_CST)
5503     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5504
5505   if (vect_print_dump_info (REPORT_DETAILS))
5506     {
5507       fprintf (vect_dump, "niters for prolog loop: ");
5508       print_generic_expr (vect_dump, iters, TDF_SLIM);
5509     }
5510
5511   var = create_tmp_var (niters_type, "prolog_loop_niters");
5512   add_referenced_var (var);
5513   iters_name = force_gimple_operand (iters, &stmt, false, var);
5514
5515   /* Insert stmt on loop preheader edge.  */
5516   if (stmt)
5517     {
5518       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5519       gcc_assert (!new_bb);
5520     }
5521
5522   return iters_name;
5523 }
5524
5525
5526 /* Function vect_update_init_of_dr
5527
5528    NITERS iterations were peeled from LOOP.  DR represents a data reference
5529    in LOOP.  This function updates the information recorded in DR to
5530    account for the fact that the first NITERS iterations had already been
5531    executed.  Specifically, it updates the OFFSET field of DR.  */
5532
5533 static void
5534 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5535 {
5536   tree offset = DR_OFFSET (dr);
5537
5538   niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5539   offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5540   DR_OFFSET (dr) = offset;
5541 }
5542
5543
5544 /* Function vect_update_inits_of_drs
5545
5546    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5547    This function updates the information recorded for the data references in
5548    the loop to account for the fact that the first NITERS iterations had
5549    already been executed.  Specifically, it updates the initial_condition of
5550    the access_function of all the data_references in the loop.  */
5551
5552 static void
5553 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5554 {
5555   unsigned int i;
5556   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5557   struct data_reference *dr;
5558
5559   if (vect_print_dump_info (REPORT_DETAILS))
5560     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5561
5562   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5563     vect_update_init_of_dr (dr, niters);
5564 }
5565
5566
5567 /* Function vect_do_peeling_for_alignment
5568
5569    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5570    'niters' is set to the misalignment of one of the data references in the
5571    loop, thereby forcing it to refer to an aligned location at the beginning
5572    of the execution of this loop.  The data reference for which we are
5573    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
5574
5575 static void
5576 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5577 {
5578   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5579   tree niters_of_prolog_loop, ni_name;
5580   tree n_iters;
5581   struct loop *new_loop;
5582
5583   if (vect_print_dump_info (REPORT_DETAILS))
5584     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5585
5586   initialize_original_copy_tables ();
5587
5588   ni_name = vect_build_loop_niters (loop_vinfo);
5589   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5590
5591   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
5592   new_loop =
5593         slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5594                                        niters_of_prolog_loop, ni_name, true, 0);
5595   gcc_assert (new_loop);
5596 #ifdef ENABLE_CHECKING
5597   slpeel_verify_cfg_after_peeling (new_loop, loop);
5598 #endif
5599
5600   /* Update number of times loop executes.  */
5601   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5602   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5603                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5604
5605   /* Update the init conditions of the access functions of all data refs.  */
5606   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5607
5608   /* After peeling we have to reset scalar evolution analyzer.  */
5609   scev_reset ();
5610
5611   free_original_copy_tables ();
5612 }
5613
5614
5615 /* Function vect_create_cond_for_align_checks.
5616
5617    Create a conditional expression that represents the alignment checks for
5618    all of data references (array element references) whose alignment must be
5619    checked at runtime.
5620
5621    Input:
5622    LOOP_VINFO - two fields of the loop information are used.
5623                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5624                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5625
5626    Output:
5627    COND_EXPR_STMT_LIST - statements needed to construct the conditional
5628                          expression.
5629    The returned value is the conditional expression to be used in the if
5630    statement that controls which version of the loop gets executed at runtime.
5631
5632    The algorithm makes two assumptions:
5633      1) The number of bytes "n" in a vector is a power of 2.
5634      2) An address "a" is aligned if a%n is zero and that this
5635         test can be done as a&(n-1) == 0.  For example, for 16
5636         byte vectors the test is a&0xf == 0.  */
5637
5638 static tree
5639 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5640                                    tree *cond_expr_stmt_list)
5641 {
5642   VEC(tree,heap) *may_misalign_stmts
5643     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5644   tree ref_stmt, tmp;
5645   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5646   tree mask_cst;
5647   unsigned int i;
5648   tree psize;
5649   tree int_ptrsize_type;
5650   char tmp_name[20];
5651   tree or_tmp_name = NULL_TREE;
5652   tree and_tmp, and_tmp_name, and_stmt;
5653   tree ptrsize_zero;
5654
5655   /* Check that mask is one less than a power of 2, i.e., mask is
5656      all zeros followed by all ones.  */
5657   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5658
5659   /* CHECKME: what is the best integer or unsigned type to use to hold a
5660      cast from a pointer value?  */
5661   psize = TYPE_SIZE (ptr_type_node);
5662   int_ptrsize_type
5663     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5664
5665   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5666      of the first vector of the i'th data reference. */
5667
5668   for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5669     {
5670       tree new_stmt_list = NULL_TREE;
5671       tree addr_base;
5672       tree addr_tmp, addr_tmp_name, addr_stmt;
5673       tree or_tmp, new_or_tmp_name, or_stmt;
5674
5675       /* create: addr_tmp = (int)(address_of_first_vector) */
5676       addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5677                                                         &new_stmt_list,
5678                                                         NULL_TREE);
5679
5680       if (new_stmt_list != NULL_TREE)
5681         append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5682
5683       sprintf (tmp_name, "%s%d", "addr2int", i);
5684       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5685       add_referenced_var (addr_tmp);
5686       addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5687       addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5688       addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5689       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5690       append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5691
5692       /* The addresses are OR together.  */
5693
5694       if (or_tmp_name != NULL_TREE)
5695         {
5696           /* create: or_tmp = or_tmp | addr_tmp */
5697           sprintf (tmp_name, "%s%d", "orptrs", i);
5698           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5699           add_referenced_var (or_tmp);
5700           new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5701           tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5702                         or_tmp_name, addr_tmp_name);
5703           or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5704           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5705           append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5706           or_tmp_name = new_or_tmp_name;
5707         }
5708       else
5709         or_tmp_name = addr_tmp_name;
5710
5711     } /* end for i */
5712
5713   mask_cst = build_int_cst (int_ptrsize_type, mask);
5714
5715   /* create: and_tmp = or_tmp & mask  */
5716   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5717   add_referenced_var (and_tmp);
5718   and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5719
5720   tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5721   and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5722   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5723   append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5724
5725   /* Make and_tmp the left operand of the conditional test against zero.
5726      if and_tmp has a nonzero bit then some address is unaligned.  */
5727   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5728   return build2 (EQ_EXPR, boolean_type_node,
5729                  and_tmp_name, ptrsize_zero);
5730 }
5731
5732
5733 /* Function vect_transform_loop.
5734
5735    The analysis phase has determined that the loop is vectorizable.
5736    Vectorize the loop - created vectorized stmts to replace the scalar
5737    stmts in the loop, and update the loop exit condition.  */
5738
5739 void
5740 vect_transform_loop (loop_vec_info loop_vinfo)
5741 {
5742   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5743   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5744   int nbbs = loop->num_nodes;
5745   block_stmt_iterator si, next_si;
5746   int i;
5747   tree ratio = NULL;
5748   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5749   bool strided_store;
5750
5751   if (vect_print_dump_info (REPORT_DETAILS))
5752     fprintf (vect_dump, "=== vec_transform_loop ===");
5753
5754   /* If the loop has data references that may or may not be aligned then
5755      two versions of the loop need to be generated, one which is vectorized
5756      and one which isn't.  A test is then generated to control which of the
5757      loops is executed.  The test checks for the alignment of all of the
5758      data references that may or may not be aligned. */
5759
5760   if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5761     {
5762       struct loop *nloop;
5763       tree cond_expr;
5764       tree cond_expr_stmt_list = NULL_TREE;
5765       basic_block condition_bb;
5766       block_stmt_iterator cond_exp_bsi;
5767       basic_block merge_bb;
5768       basic_block new_exit_bb;
5769       edge new_exit_e, e;
5770       tree orig_phi, new_phi, arg;
5771       unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5772
5773       cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5774                                                      &cond_expr_stmt_list);
5775       initialize_original_copy_tables ();
5776       nloop = loop_version (loop, cond_expr, &condition_bb,
5777                             prob, prob, REG_BR_PROB_BASE - prob, true);
5778       free_original_copy_tables();
5779
5780       /** Loop versioning violates an assumption we try to maintain during
5781          vectorization - that the loop exit block has a single predecessor.
5782          After versioning, the exit block of both loop versions is the same
5783          basic block (i.e. it has two predecessors). Just in order to simplify
5784          following transformations in the vectorizer, we fix this situation
5785          here by adding a new (empty) block on the exit-edge of the loop,
5786          with the proper loop-exit phis to maintain loop-closed-form.  **/
5787
5788       merge_bb = single_exit (loop)->dest;
5789       gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5790       new_exit_bb = split_edge (single_exit (loop));
5791       new_exit_e = single_exit (loop);
5792       e = EDGE_SUCC (new_exit_bb, 0);
5793
5794       for (orig_phi = phi_nodes (merge_bb); orig_phi;
5795            orig_phi = PHI_CHAIN (orig_phi))
5796         {
5797           new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5798                                      new_exit_bb);
5799           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5800           add_phi_arg (new_phi, arg, new_exit_e);
5801           SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5802         }
5803
5804       /** end loop-exit-fixes after versioning  **/
5805
5806       update_ssa (TODO_update_ssa);
5807       cond_exp_bsi = bsi_last (condition_bb);
5808       bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5809     }
5810
5811   /* CHECKME: we wouldn't need this if we called update_ssa once
5812      for all loops.  */
5813   bitmap_zero (vect_memsyms_to_rename);
5814
5815   /* Peel the loop if there are data refs with unknown alignment.
5816      Only one data ref with unknown store is allowed.  */
5817
5818   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5819     vect_do_peeling_for_alignment (loop_vinfo);
5820
5821   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5822      compile time constant), or it is a constant that doesn't divide by the
5823      vectorization factor, then an epilog loop needs to be created.
5824      We therefore duplicate the loop: the original loop will be vectorized,
5825      and will compute the first (n/VF) iterations. The second copy of the loop
5826      will remain scalar and will compute the remaining (n%VF) iterations.
5827      (VF is the vectorization factor).  */
5828
5829   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5830       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5831           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
5832     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
5833   else
5834     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5835                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5836
5837   /* 1) Make sure the loop header has exactly two entries
5838      2) Make sure we have a preheader basic block.  */
5839
5840   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5841
5842   split_edge (loop_preheader_edge (loop));
5843
5844   /* FORNOW: the vectorizer supports only loops which body consist
5845      of one basic block (header + empty latch). When the vectorizer will
5846      support more involved loop forms, the order by which the BBs are
5847      traversed need to be reconsidered.  */
5848
5849   for (i = 0; i < nbbs; i++)
5850     {
5851       basic_block bb = bbs[i];
5852       stmt_vec_info stmt_info;
5853       tree phi;
5854
5855       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5856         {
5857           if (vect_print_dump_info (REPORT_DETAILS))
5858             {
5859               fprintf (vect_dump, "------>vectorizing phi: ");
5860               print_generic_expr (vect_dump, phi, TDF_SLIM);
5861             }
5862           stmt_info = vinfo_for_stmt (phi);
5863           if (!stmt_info)
5864             continue;
5865           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5866               && !STMT_VINFO_LIVE_P (stmt_info))
5867             continue;
5868
5869           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5870                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5871               && vect_print_dump_info (REPORT_DETAILS))
5872             fprintf (vect_dump, "multiple-types.");
5873
5874           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5875             {
5876               if (vect_print_dump_info (REPORT_DETAILS))
5877                 fprintf (vect_dump, "transform phi.");
5878               vect_transform_stmt (phi, NULL, NULL);
5879             }
5880         }
5881
5882       for (si = bsi_start (bb); !bsi_end_p (si);)
5883         {
5884           tree stmt = bsi_stmt (si);
5885           bool is_store;
5886
5887           if (vect_print_dump_info (REPORT_DETAILS))
5888             {
5889               fprintf (vect_dump, "------>vectorizing statement: ");
5890               print_generic_expr (vect_dump, stmt, TDF_SLIM);
5891             }
5892           stmt_info = vinfo_for_stmt (stmt);
5893           gcc_assert (stmt_info);
5894           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5895               && !STMT_VINFO_LIVE_P (stmt_info))
5896             {
5897               bsi_next (&si);
5898               continue;
5899             }
5900
5901           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5902           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5903                  != (unsigned HOST_WIDE_INT) vectorization_factor)
5904               && vect_print_dump_info (REPORT_DETAILS))
5905             fprintf (vect_dump, "multiple-types.");
5906
5907           /* -------- vectorize statement ------------ */
5908           if (vect_print_dump_info (REPORT_DETAILS))
5909             fprintf (vect_dump, "transform statement.");
5910
5911           strided_store = false;
5912           is_store = vect_transform_stmt (stmt, &si, &strided_store);
5913           if (is_store)
5914             {
5915               stmt_ann_t ann;
5916               if (DR_GROUP_FIRST_DR (stmt_info))
5917                 {
5918                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5919                      interleaving chain was completed - free all the stores in
5920                      the chain.  */
5921                   tree next = DR_GROUP_FIRST_DR (stmt_info);
5922                   tree tmp;
5923                   stmt_vec_info next_stmt_info;
5924
5925                   while (next)
5926                     {
5927                       next_si = bsi_for_stmt (next);
5928                       next_stmt_info = vinfo_for_stmt (next);
5929                       /* Free the attached stmt_vec_info and remove the stmt.  */
5930                       ann = stmt_ann (next);
5931                       tmp = DR_GROUP_NEXT_DR (next_stmt_info);
5932                       free (next_stmt_info);
5933                       set_stmt_info (ann, NULL);
5934                       bsi_remove (&next_si, true);
5935                       next = tmp;
5936                     }
5937                   bsi_remove (&si, true);
5938                   continue;
5939                 }
5940               else
5941                 {
5942                   /* Free the attached stmt_vec_info and remove the stmt.  */
5943                   ann = stmt_ann (stmt);
5944                   free (stmt_info);
5945                   set_stmt_info (ann, NULL);
5946                   bsi_remove (&si, true);
5947                   continue;
5948                 }
5949             }
5950           bsi_next (&si);
5951         }                       /* stmts in BB */
5952     }                           /* BBs in loop */
5953
5954   slpeel_make_loop_iterate_ntimes (loop, ratio);
5955
5956   mark_set_for_renaming (vect_memsyms_to_rename);
5957
5958   /* The memory tags and pointers in vectorized statements need to
5959      have their SSA forms updated.  FIXME, why can't this be delayed
5960      until all the loops have been transformed?  */
5961   update_ssa (TODO_update_ssa);
5962
5963   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
5964     fprintf (vect_dump, "LOOP VECTORIZED.");
5965 }