]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-transform.c
* MAINTAINERS: Update my email address.
[thirdparty/gcc.git] / gcc / tree-vect-transform.c
CommitLineData
f7064d11 1/* Transformation Utilities for Loop Vectorization.
2d593c86 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
f7064d11
DN
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
f7064d11
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
f7064d11
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
f7064d11
DN
25#include "ggc.h"
26#include "tree.h"
27#include "target.h"
28#include "rtl.h"
29#include "basic-block.h"
30#include "diagnostic.h"
31#include "tree-flow.h"
32#include "tree-dump.h"
33#include "timevar.h"
34#include "cfgloop.h"
35#include "expr.h"
36#include "optabs.h"
acdc40df 37#include "params.h"
b2d16a23 38#include "recog.h"
f7064d11
DN
39#include "tree-data-ref.h"
40#include "tree-chrec.h"
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
43#include "langhooks.h"
44#include "tree-pass.h"
45#include "toplev.h"
61d3cdbb 46#include "real.h"
f7064d11
DN
47
48/* Utility functions for the code transformation. */
805e2059 49static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree);
f7064d11
DN
50static tree vect_create_destination_var (tree, tree);
51static tree vect_create_data_ref_ptr
468c2ac0
DN
52 (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *);
53static tree vect_create_addr_base_for_vector_ref
54 (tree, tree *, tree, struct loop *);
f7064d11 55static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
61d3cdbb 56static tree vect_get_vec_def_for_operand (tree, tree, tree *);
468c2ac0 57static tree vect_init_vector (tree, tree, tree, block_stmt_iterator *);
f7064d11 58static void vect_finish_stmt_generation
468c2ac0 59 (tree stmt, tree vec_stmt, block_stmt_iterator *);
b52485c6 60static bool vect_is_simple_cond (tree, loop_vec_info);
20f06221 61static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
61d3cdbb 62static tree get_initial_def_for_reduction (tree, tree, tree *);
f7064d11
DN
63
64/* Utility function dealing with loop peeling (not peeling itself). */
65static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67static tree vect_build_loop_niters (loop_vec_info);
68static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
5f55a1ba 70static void vect_update_init_of_dr (struct data_reference *, tree niters);
f7064d11 71static void vect_update_inits_of_drs (loop_vec_info, tree);
afc1ab61 72static int vect_min_worthwhile_factor (enum tree_code);
f7064d11
DN
73
74
e95b59d2
DN
75static int
76cost_for_stmt (tree stmt)
77{
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
79
80 switch (STMT_VINFO_TYPE (stmt_info))
81 {
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
97 default:
98 gcc_unreachable ();
99 }
100}
101
102
792ed98b
HJ
103/* Function vect_estimate_min_profitable_iters
104
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
107 loop.
108
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
111
112int
113vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
114{
115 int i;
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
749cc4b1
HJ
122 int scalar_outside_cost = 0;
123 bool runtime_test = false;
792ed98b
HJ
124 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
125 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
126 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
127 int nbbs = loop->num_nodes;
749cc4b1 128 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
e70444a8 129 int peel_guard_costs = 0;
a34ef915 130 int innerloop_iters = 0, factor;
805e2059
IR
131 VEC (slp_instance, heap) *slp_instances;
132 slp_instance instance;
792ed98b
HJ
133
134 /* Cost model disabled. */
135 if (!flag_vect_cost_model)
136 {
f5adacc5 137 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
138 fprintf (vect_dump, "cost model disabled.");
139 return 0;
140 }
141
749cc4b1
HJ
142 /* If the number of iterations is unknown, or the
143 peeling-for-misalignment amount is unknown, we will have to generate
144 a runtime test to test the loop count against the threshold. */
145 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
146 || (byte_misalign < 0))
147 runtime_test = true;
148
42cbdeac 149 /* Requires loop versioning tests to handle misalignment. */
792ed98b 150
3a70f3ef 151 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
792ed98b 152 {
42cbdeac
VK
153 /* FIXME: Make cost depend on complexity of individual check. */
154 vec_outside_cost +=
155 VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
f5adacc5 156 if (vect_print_dump_info (REPORT_COST))
42cbdeac
VK
157 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
158 "versioning to treat misalignment.\n");
159 }
160
161 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
162 {
163 /* FIXME: Make cost depend on complexity of individual check. */
164 vec_outside_cost +=
165 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
f5adacc5 166 if (vect_print_dump_info (REPORT_COST))
792ed98b 167 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
42cbdeac
VK
168 "versioning aliasing.\n");
169 }
170
171 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
172 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
173 {
174 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
792ed98b
HJ
175 }
176
792ed98b
HJ
177 /* Count statements in scalar loop. Using this as scalar cost for a single
178 iteration for now.
179
180 TODO: Add outer loop support.
181
182 TODO: Consider assigning different costs to different scalar
183 statements. */
184
d29de1bf
DN
185 /* FORNOW. */
186 if (loop->inner)
187 innerloop_iters = 50; /* FIXME */
188
792ed98b
HJ
189 for (i = 0; i < nbbs; i++)
190 {
191 block_stmt_iterator si;
192 basic_block bb = bbs[i];
193
d29de1bf
DN
194 if (bb->loop_father == loop->inner)
195 factor = innerloop_iters;
196 else
197 factor = 1;
198
792ed98b 199 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
ca7b0517
DN
200 {
201 tree stmt = bsi_stmt (si);
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 /* Skip stmts that are not vectorized inside the loop. */
204 if (!STMT_VINFO_RELEVANT_P (stmt_info)
0e1b778a
DN
205 && (!STMT_VINFO_LIVE_P (stmt_info)
206 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
ca7b0517
DN
207 continue;
208 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
209 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
d29de1bf
DN
210 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
211 some of the "outside" costs are generated inside the outer-loop. */
ca7b0517
DN
212 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
213 }
792ed98b
HJ
214 }
215
216 /* Add additional cost for the peeled instructions in prologue and epilogue
217 loop.
218
219 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
e70444a8 220 at compile-time - we assume it's vf/2 (the worst would be vf-1).
792ed98b
HJ
221
222 TODO: Build an expression that represents peel_iters for prologue and
223 epilogue to be used in a run-time test. */
224
3a70f3ef 225 if (byte_misalign < 0)
792ed98b 226 {
e70444a8 227 peel_iters_prologue = vf/2;
f5adacc5 228 if (vect_print_dump_info (REPORT_COST))
792ed98b 229 fprintf (vect_dump, "cost model: "
e70444a8 230 "prologue peel iters set to vf/2.");
792ed98b
HJ
231
232 /* If peeling for alignment is unknown, loop bound of main loop becomes
0d52bcc1 233 unknown. */
e70444a8 234 peel_iters_epilogue = vf/2;
f5adacc5 235 if (vect_print_dump_info (REPORT_COST))
792ed98b 236 fprintf (vect_dump, "cost model: "
e70444a8 237 "epilogue peel iters set to vf/2 because "
792ed98b 238 "peeling for alignment is unknown .");
e70444a8
HJ
239
240 /* If peeled iterations are unknown, count a taken branch and a not taken
241 branch per peeled loop. Even if scalar loop iterations are known,
242 vector iterations are not known since peeled prologue iterations are
243 not known. Hence guards remain the same. */
244 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
245 + TARG_COND_NOT_TAKEN_BRANCH_COST);
246
792ed98b
HJ
247 }
248 else
249 {
3a70f3ef
DN
250 if (byte_misalign)
251 {
252 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
253 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
254 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
255 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
256
257 peel_iters_prologue = nelements - (byte_misalign / element_size);
258 }
259 else
260 peel_iters_prologue = 0;
261
792ed98b
HJ
262 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
263 {
e70444a8 264 peel_iters_epilogue = vf/2;
f5adacc5 265 if (vect_print_dump_info (REPORT_COST))
792ed98b 266 fprintf (vect_dump, "cost model: "
e70444a8 267 "epilogue peel iters set to vf/2 because "
792ed98b 268 "loop iterations are unknown .");
e70444a8
HJ
269
270 /* If peeled iterations are known but number of scalar loop
271 iterations are unknown, count a taken branch per peeled loop. */
272 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
273
792ed98b
HJ
274 }
275 else
3a70f3ef
DN
276 {
277 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
278 peel_iters_prologue = niters < peel_iters_prologue ?
279 niters : peel_iters_prologue;
280 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
281 }
282 }
283
792ed98b 284 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
e70444a8
HJ
285 + (peel_iters_epilogue * scalar_single_iter_cost)
286 + peel_guard_costs;
792ed98b 287
749cc4b1
HJ
288 /* FORNOW: The scalar outside cost is incremented in one of the
289 following ways:
290
291 1. The vectorizer checks for alignment and aliasing and generates
292 a condition that allows dynamic vectorization. A cost model
293 check is ANDED with the versioning condition. Hence scalar code
294 path now has the added cost of the versioning check.
295
296 if (cost > th & versioning_check)
297 jmp to vector code
298
299 Hence run-time scalar is incremented by not-taken branch cost.
300
301 2. The vectorizer then checks if a prologue is required. If the
302 cost model check was not done before during versioning, it has to
303 be done before the prologue check.
304
305 if (cost <= th)
306 prologue = scalar_iters
307 if (prologue == 0)
308 jmp to vector code
309 else
310 execute prologue
311 if (prologue == num_iters)
312 go to exit
313
314 Hence the run-time scalar cost is incremented by a taken branch,
315 plus a not-taken branch, plus a taken branch cost.
316
317 3. The vectorizer then checks if an epilogue is required. If the
318 cost model check was not done before during prologue check, it
319 has to be done with the epilogue check.
320
321 if (prologue == 0)
322 jmp to vector code
323 else
324 execute prologue
325 if (prologue == num_iters)
326 go to exit
327 vector code:
328 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
329 jmp to epilogue
330
331 Hence the run-time scalar cost should be incremented by 2 taken
332 branches.
333
334 TODO: The back end may reorder the BBS's differently and reverse
335 conditions/branch directions. Change the stimates below to
336 something more reasonable. */
337
338 if (runtime_test)
339 {
340 /* Cost model check occurs at versioning. */
341 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
342 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
343 scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
344 else
345 {
346 /* Cost model occurs at prologue generation. */
347 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
348 scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST
349 + TARG_COND_NOT_TAKEN_BRANCH_COST;
350 /* Cost model check occurs at epilogue generation. */
351 else
352 scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST;
353 }
e95b59d2
DN
354 }
355
805e2059
IR
356 /* Add SLP costs. */
357 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
358 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
359 {
360 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
361 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
362 }
363
792ed98b
HJ
364 /* Calculate number of iterations required to make the vector version
365 profitable, relative to the loop bodies only. The following condition
749cc4b1
HJ
366 must hold true:
367 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
368 where
792ed98b 369 SIC = scalar iteration cost, VIC = vector iteration cost,
749cc4b1
HJ
370 VOC = vector outside cost, VF = vectorization factor,
371 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
372 SOC = scalar outside cost for run time cost model check. */
792ed98b
HJ
373
374 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
375 {
e70444a8 376 if (vec_outside_cost <= 0)
792ed98b
HJ
377 min_profitable_iters = 1;
378 else
379 {
749cc4b1
HJ
380 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
381 - vec_inside_cost * peel_iters_prologue
e70444a8 382 - vec_inside_cost * peel_iters_epilogue)
792ed98b
HJ
383 / ((scalar_single_iter_cost * vf)
384 - vec_inside_cost);
385
386 if ((scalar_single_iter_cost * vf * min_profitable_iters)
387 <= ((vec_inside_cost * min_profitable_iters)
749cc4b1 388 + ((vec_outside_cost - scalar_outside_cost) * vf)))
792ed98b
HJ
389 min_profitable_iters++;
390 }
391 }
392 /* vector version will never be profitable. */
393 else
394 {
f5adacc5 395 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
396 fprintf (vect_dump, "cost model: vector iteration cost = %d "
397 "is divisible by scalar iteration cost = %d by a factor "
398 "greater than or equal to the vectorization factor = %d .",
399 vec_inside_cost, scalar_single_iter_cost, vf);
400 return -1;
401 }
402
f5adacc5 403 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
404 {
405 fprintf (vect_dump, "Cost model analysis: \n");
406 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
407 vec_inside_cost);
408 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
409 vec_outside_cost);
749cc4b1
HJ
410 fprintf (vect_dump, " Scalar iteration cost: %d\n",
411 scalar_single_iter_cost);
412 fprintf (vect_dump, " Scalar outside cost: %d\n", scalar_outside_cost);
792ed98b
HJ
413 fprintf (vect_dump, " prologue iterations: %d\n",
414 peel_iters_prologue);
415 fprintf (vect_dump, " epilogue iterations: %d\n",
416 peel_iters_epilogue);
417 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
418 min_profitable_iters);
792ed98b
HJ
419 }
420
e95b59d2
DN
421 min_profitable_iters =
422 min_profitable_iters < vf ? vf : min_profitable_iters;
423
424 /* Because the condition we create is:
425 if (niters <= min_profitable_iters)
426 then skip the vectorized loop. */
427 min_profitable_iters--;
e70444a8 428
f5adacc5 429 if (vect_print_dump_info (REPORT_COST))
e70444a8
HJ
430 fprintf (vect_dump, " Profitability threshold = %d\n",
431 min_profitable_iters);
432
e95b59d2 433 return min_profitable_iters;
792ed98b
HJ
434}
435
436
437/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
0d52bcc1 438 functions. Design better to avoid maintenance issues. */
792ed98b
HJ
439
440/* Function vect_model_reduction_cost.
441
442 Models cost for a reduction operation, including the vector ops
443 generated within the strip-mine loop, the initial definition before
444 the loop, and the epilogue code that must be generated. */
445
20e545c3 446static bool
792ed98b
HJ
447vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
448 int ncopies)
449{
450 int outer_cost = 0;
451 enum tree_code code;
452 optab optab;
453 tree vectype;
454 tree orig_stmt;
455 tree reduction_op;
456 enum machine_mode mode;
457 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
458 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
468c2ac0
DN
459 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
792ed98b
HJ
461
462 /* Cost of reduction op inside loop. */
463 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
464
465 reduction_op = TREE_OPERAND (operation, op_type-1);
466 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
20e545c3
IR
467 if (!vectype)
468 {
f5adacc5 469 if (vect_print_dump_info (REPORT_COST))
20e545c3
IR
470 {
471 fprintf (vect_dump, "unsupported data-type ");
472 print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
473 }
474 return false;
475 }
476
792ed98b
HJ
477 mode = TYPE_MODE (vectype);
478 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
479
480 if (!orig_stmt)
481 orig_stmt = STMT_VINFO_STMT (stmt_info);
482
483 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
484
485 /* Add in cost for initial definition. */
e95b59d2 486 outer_cost += TARG_SCALAR_TO_VEC_COST;
792ed98b
HJ
487
488 /* Determine cost of epilogue code.
489
490 We have a reduction operator that will reduce the vector in one statement.
491 Also requires scalar extract. */
492
468c2ac0 493 if (!nested_in_vect_loop_p (loop, orig_stmt))
792ed98b 494 {
468c2ac0
DN
495 if (reduc_code < NUM_TREE_CODES)
496 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
497 else
498 {
499 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
500 tree bitsize =
501 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
502 int element_bitsize = tree_low_cst (bitsize, 1);
503 int nelements = vec_size_in_bits / element_bitsize;
504
505 optab = optab_for_tree_code (code, vectype);
506
507 /* We have a whole vector shift available. */
508 if (VECTOR_MODE_P (mode)
509 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
510 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
511 /* Final reduction via vector shifts and the reduction operator. Also
512 requires scalar extract. */
513 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
514 + TARG_VEC_TO_SCALAR_COST);
515 else
516 /* Use extracts and reduction op for final reduction. For N elements,
517 we have N extracts and N-1 reduction ops. */
518 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
519 }
792ed98b
HJ
520 }
521
522 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
523
f5adacc5 524 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
525 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
526 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
527 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
20e545c3
IR
528
529 return true;
792ed98b
HJ
530}
531
532
533/* Function vect_model_induction_cost.
534
535 Models cost for induction operations. */
536
537static void
538vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
539{
540 /* loop cost for vec_loop. */
541 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
542 /* prologue cost for vec_init and vec_step. */
3a70f3ef 543 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
792ed98b 544
f5adacc5 545 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
546 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
547 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
548 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
549}
550
551
552/* Function vect_model_simple_cost.
553
554 Models cost for simple operations, i.e. those that only emit ncopies of a
555 single op. Right now, this does not account for multiple insns that could
556 be generated for the single vector op. We will handle that shortly. */
557
805e2059
IR
558void
559vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
560 enum vect_def_type *dt, slp_tree slp_node)
792ed98b 561{
3a70f3ef 562 int i;
89675e73 563 int inside_cost = 0, outside_cost = 0;
3a70f3ef 564
89675e73 565 inside_cost = ncopies * TARG_VEC_STMT_COST;
792ed98b 566
3a70f3ef 567 /* FORNOW: Assuming maximum 2 args per stmts. */
805e2059 568 for (i = 0; i < 2; i++)
3a70f3ef
DN
569 {
570 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
89675e73 571 outside_cost += TARG_SCALAR_TO_VEC_COST;
3a70f3ef
DN
572 }
573
f5adacc5 574 if (vect_print_dump_info (REPORT_COST))
792ed98b 575 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
89675e73
IR
576 "outside_cost = %d .", inside_cost, outside_cost);
577
578 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
579 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
580 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
792ed98b
HJ
581}
582
583
584/* Function vect_cost_strided_group_size
585
586 For strided load or store, return the group_size only if it is the first
587 load or store of a group, else return 1. This ensures that group size is
588 only returned once per group. */
589
590static int
591vect_cost_strided_group_size (stmt_vec_info stmt_info)
592{
593 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
594
595 if (first_stmt == STMT_VINFO_STMT (stmt_info))
596 return DR_GROUP_SIZE (stmt_info);
597
598 return 1;
599}
600
601
602/* Function vect_model_store_cost
603
604 Models cost for stores. In the case of strided accesses, one access
605 has the overhead of the strided access attributed to it. */
606
805e2059
IR
607void
608vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
609 enum vect_def_type dt, slp_tree slp_node)
792ed98b 610{
792ed98b 611 int group_size;
89675e73 612 int inside_cost = 0, outside_cost = 0;
792ed98b 613
3a70f3ef 614 if (dt == vect_constant_def || dt == vect_invariant_def)
89675e73 615 outside_cost = TARG_SCALAR_TO_VEC_COST;
3a70f3ef 616
792ed98b
HJ
617 /* Strided access? */
618 if (DR_GROUP_FIRST_DR (stmt_info))
619 group_size = vect_cost_strided_group_size (stmt_info);
620 /* Not a strided access. */
621 else
622 group_size = 1;
623
624 /* Is this an access in a group of stores, which provide strided access?
625 If so, add in the cost of the permutes. */
626 if (group_size > 1)
627 {
628 /* Uses a high and low interleave operation for each needed permute. */
89675e73 629 inside_cost = ncopies * exact_log2(group_size) * group_size
792ed98b
HJ
630 * TARG_VEC_STMT_COST;
631
f5adacc5 632 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
633 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
634 group_size);
635
636 }
637
638 /* Costs of the stores. */
89675e73 639 inside_cost += ncopies * TARG_VEC_STORE_COST;
792ed98b 640
f5adacc5 641 if (vect_print_dump_info (REPORT_COST))
792ed98b 642 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
89675e73
IR
643 "outside_cost = %d .", inside_cost, outside_cost);
644
645 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
646 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
647 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
792ed98b
HJ
648}
649
650
651/* Function vect_model_load_cost
652
653 Models cost for loads. In the case of strided accesses, the last access
654 has the overhead of the strided access attributed to it. Since unaligned
655 accesses are supported for loads, we also account for the costs of the
656 access scheme chosen. */
657
805e2059
IR
658void
659vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
792ed98b
HJ
660
661{
792ed98b
HJ
662 int group_size;
663 int alignment_support_cheme;
664 tree first_stmt;
665 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
89675e73 666 int inside_cost = 0, outside_cost = 0;
792ed98b
HJ
667
668 /* Strided accesses? */
669 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
805e2059 670 if (first_stmt && !slp_node)
792ed98b
HJ
671 {
672 group_size = vect_cost_strided_group_size (stmt_info);
673 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
674 }
675 /* Not a strided access. */
676 else
677 {
678 group_size = 1;
679 first_dr = dr;
680 }
681
682 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
683
684 /* Is this an access in a group of loads providing strided access?
685 If so, add in the cost of the permutes. */
686 if (group_size > 1)
687 {
688 /* Uses an even and odd extract operations for each needed permute. */
89675e73
IR
689 inside_cost = ncopies * exact_log2(group_size) * group_size
690 * TARG_VEC_STMT_COST;
792ed98b 691
f5adacc5 692 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
693 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
694 group_size);
695
696 }
697
698 /* The loads themselves. */
699 switch (alignment_support_cheme)
700 {
701 case dr_aligned:
702 {
89675e73 703 inside_cost += ncopies * TARG_VEC_LOAD_COST;
792ed98b 704
f5adacc5 705 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
706 fprintf (vect_dump, "vect_model_load_cost: aligned.");
707
708 break;
709 }
710 case dr_unaligned_supported:
711 {
712 /* Here, we assign an additional cost for the unaligned load. */
89675e73 713 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
792ed98b 714
f5adacc5 715 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
716 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
717 "hardware.");
718
719 break;
720 }
468c2ac0
DN
721 case dr_explicit_realign:
722 {
89675e73 723 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
468c2ac0
DN
724
725 /* FIXME: If the misalignment remains fixed across the iterations of
726 the containing loop, the following cost should be added to the
727 outside costs. */
728 if (targetm.vectorize.builtin_mask_for_load)
89675e73 729 inside_cost += TARG_VEC_STMT_COST;
468c2ac0
DN
730
731 break;
732 }
733 case dr_explicit_realign_optimized:
792ed98b 734 {
f5adacc5 735 if (vect_print_dump_info (REPORT_COST))
792ed98b
HJ
736 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
737 "pipelined.");
738
739 /* Unaligned software pipeline has a load of an address, an initial
740 load, and possibly a mask operation to "prime" the loop. However,
741 if this is an access in a group of loads, which provide strided
0d52bcc1 742 access, then the above cost should only be considered for one
792ed98b
HJ
743 access in the group. Inside the loop, there is a load op
744 and a realignment op. */
745
805e2059 746 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
792ed98b 747 {
89675e73 748 outside_cost = 2*TARG_VEC_STMT_COST;
792ed98b 749 if (targetm.vectorize.builtin_mask_for_load)
89675e73 750 outside_cost += TARG_VEC_STMT_COST;
792ed98b 751 }
792ed98b 752
89675e73 753 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
792ed98b
HJ
754
755 break;
756 }
757
758 default:
759 gcc_unreachable ();
760 }
89675e73 761
f5adacc5 762 if (vect_print_dump_info (REPORT_COST))
792ed98b 763 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
89675e73 764 "outside_cost = %d .", inside_cost, outside_cost);
792ed98b 765
89675e73
IR
766 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
767 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
768 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
792ed98b
HJ
769}
770
771
f7064d11
DN
772/* Function vect_get_new_vect_var.
773
774 Returns a name for a new variable. The current naming scheme appends the
775 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
776 the name of vectorizer generated variables, and appends that to NAME if
777 provided. */
778
779static tree
780vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
781{
782 const char *prefix;
f7064d11
DN
783 tree new_vect_var;
784
61d3cdbb
DN
785 switch (var_kind)
786 {
787 case vect_simple_var:
788 prefix = "vect_";
789 break;
790 case vect_scalar_var:
791 prefix = "stmp_";
792 break;
793 case vect_pointer_var:
f7064d11 794 prefix = "vect_p";
61d3cdbb
DN
795 break;
796 default:
797 gcc_unreachable ();
798 }
f7064d11 799
f7064d11 800 if (name)
639d3040
DM
801 {
802 char* tmp = concat (prefix, name, NULL);
803 new_vect_var = create_tmp_var (type, tmp);
804 free (tmp);
805 }
f7064d11
DN
806 else
807 new_vect_var = create_tmp_var (type, prefix);
808
0890b981
AP
809 /* Mark vector typed variable as a gimple register variable. */
810 if (TREE_CODE (type) == VECTOR_TYPE)
811 DECL_GIMPLE_REG_P (new_vect_var) = true;
812
f7064d11
DN
813 return new_vect_var;
814}
815
816
f7064d11
DN
817/* Function vect_create_addr_base_for_vector_ref.
818
819 Create an expression that computes the address of the first memory location
820 that will be accessed for a data reference.
821
822 Input:
823 STMT: The statement containing the data reference.
824 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
825 OFFSET: Optional. If supplied, it is be added to the initial address.
468c2ac0
DN
826 LOOP: Specify relative to which loop-nest should the address be computed.
827 For example, when the dataref is in an inner-loop nested in an
828 outer-loop that is now being vectorized, LOOP can be either the
829 outer-loop, or the inner-loop. The first memory location accessed
830 by the following dataref ('in' points to short):
831
832 for (i=0; i<N; i++)
833 for (j=0; j<M; j++)
834 s += in[i+j]
835
836 is as follows:
837 if LOOP=i_loop: &in (relative to i_loop)
838 if LOOP=j_loop: &in+i*2B (relative to j_loop)
f7064d11
DN
839
840 Output:
841 1. Return an SSA_NAME whose value is the address of the memory location of
842 the first vector of the data reference.
843 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
844 these statement(s) which define the returned SSA_NAME.
845
846 FORNOW: We are only handling array accesses with step 1. */
847
848static tree
849vect_create_addr_base_for_vector_ref (tree stmt,
850 tree *new_stmt_list,
468c2ac0
DN
851 tree offset,
852 struct loop *loop)
f7064d11
DN
853{
854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
855 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
468c2ac0
DN
856 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
857 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
858 tree base_name;
06cb4f79 859 tree data_ref_base_var;
06cb4f79 860 tree new_base_stmt;
f7064d11 861 tree vec_stmt;
f7064d11
DN
862 tree addr_base, addr_expr;
863 tree dest, new_stmt;
86a07404
IR
864 tree base_offset = unshare_expr (DR_OFFSET (dr));
865 tree init = unshare_expr (DR_INIT (dr));
4090db01 866 tree vect_ptr_type, addr_expr2;
468c2ac0
DN
867 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
868
869 gcc_assert (loop);
870 if (loop != containing_loop)
871 {
872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
873 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
874
875 gcc_assert (nested_in_vect_loop_p (loop, stmt));
876
877 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
878 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
879 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
880 }
881
06cb4f79 882 /* Create data_ref_base */
468c2ac0
DN
883 base_name = build_fold_indirect_ref (data_ref_base);
884 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
06cb4f79 885 add_referenced_var (data_ref_base_var);
468c2ac0 886 data_ref_base = force_gimple_operand (data_ref_base, &new_base_stmt,
06cb4f79
JS
887 true, data_ref_base_var);
888 append_to_statement_list_force(new_base_stmt, new_stmt_list);
f7064d11
DN
889
890 /* Create base_offset */
86a07404 891 base_offset = size_binop (PLUS_EXPR, base_offset, init);
5be014d5 892 base_offset = fold_convert (sizetype, base_offset);
f7064d11 893 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
f004ab02 894 add_referenced_var (dest);
06cb4f79 895 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
f7064d11
DN
896 append_to_statement_list_force (new_stmt, new_stmt_list);
897
898 if (offset)
899 {
5be014d5 900 tree tmp = create_tmp_var (sizetype, "offset");
98b44b0e 901
f004ab02 902 add_referenced_var (tmp);
98b44b0e 903 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
987b67bc
KH
904 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
905 base_offset, offset);
8115817b 906 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
f7064d11
DN
907 append_to_statement_list_force (new_stmt, new_stmt_list);
908 }
909
910 /* base + base_offset */
468c2ac0
DN
911 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
912 data_ref_base, base_offset);
f7064d11 913
4090db01
IR
914 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
915
f7064d11 916 /* addr_expr = addr_base */
4090db01 917 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
f7064d11 918 get_name (base_name));
f004ab02 919 add_referenced_var (addr_expr);
4090db01
IR
920 vec_stmt = fold_convert (vect_ptr_type, addr_base);
921 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
922 get_name (base_name));
923 add_referenced_var (addr_expr2);
924 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
925 append_to_statement_list_force (new_stmt, new_stmt_list);
f7064d11 926
00518cb1 927 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
928 {
929 fprintf (vect_dump, "created ");
930 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
931 }
4090db01 932 return vec_stmt;
f7064d11
DN
933}
934
935
f7064d11
DN
936/* Function vect_create_data_ref_ptr.
937
89d67cca
DN
938 Create a new pointer to vector type (vp), that points to the first location
939 accessed in the loop by STMT, along with the def-use update chain to
940 appropriately advance the pointer through the loop iterations. Also set
941 aliasing information for the pointer. This vector pointer is used by the
d9987fb4 942 callers to this function to create a memory reference expression for vector
89d67cca 943 load/store access.
f7064d11
DN
944
945 Input:
946 1. STMT: a stmt that references memory. Expected to be of the form
07beea0d
AH
947 GIMPLE_MODIFY_STMT <name, data-ref> or
948 GIMPLE_MODIFY_STMT <data-ref, name>.
468c2ac0 949 2. AT_LOOP: the loop where the vector memref is to be created.
f7064d11
DN
950 3. OFFSET (optional): an offset to be added to the initial address accessed
951 by the data-ref in STMT.
952 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
953 pointing to the initial address.
4090db01 954 5. TYPE: if not NULL indicates the required type of the data-ref
f7064d11
DN
955
956 Output:
957 1. Declare a new ptr to vector_type, and have it point to the base of the
958 data reference (initial addressed accessed by the data reference).
959 For example, for vector of type V8HI, the following code is generated:
960
961 v8hi *vp;
962 vp = (v8hi *)initial_address;
963
964 if OFFSET is not supplied:
965 initial_address = &a[init];
966 if OFFSET is supplied:
967 initial_address = &a[init + OFFSET];
968
969 Return the initial_address in INITIAL_ADDRESS.
970
89d67cca
DN
971 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
972 update the pointer in each iteration of the loop.
f7064d11 973
89d67cca
DN
974 Return the increment stmt that updates the pointer in PTR_INCR.
975
468c2ac0
DN
976 3. Set INV_P to true if the access pattern of the data reference in the
977 vectorized loop is invariant. Set it to false otherwise.
978
979 4. Return the pointer. */
f7064d11
DN
980
981static tree
468c2ac0 982vect_create_data_ref_ptr (tree stmt, struct loop *at_loop,
89d67cca 983 tree offset, tree *initial_address, tree *ptr_incr,
468c2ac0 984 bool only_init, tree type, bool *inv_p)
f7064d11
DN
985{
986 tree base_name;
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
989 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468c2ac0
DN
990 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
991 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
f7064d11
DN
992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
993 tree vect_ptr_type;
994 tree vect_ptr;
995 tree tag;
f7064d11
DN
996 tree new_temp;
997 tree vec_stmt;
998 tree new_stmt_list = NULL_TREE;
468c2ac0 999 edge pe;
f7064d11
DN
1000 basic_block new_bb;
1001 tree vect_ptr_init;
86a07404 1002 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
468c2ac0
DN
1003 tree vptr;
1004 block_stmt_iterator incr_bsi;
1005 bool insert_after;
1006 tree indx_before_incr, indx_after_incr;
1007 tree incr;
1008 tree step;
1009
1010 /* Check the step (evolution) of the load in LOOP, and record
1011 whether it's invariant. */
1012 if (nested_in_vect_loop)
1013 step = STMT_VINFO_DR_STEP (stmt_info);
1014 else
1015 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1016
1017 if (tree_int_cst_compare (step, size_zero_node) == 0)
1018 *inv_p = true;
1019 else
1020 *inv_p = false;
f7064d11 1021
468c2ac0
DN
1022 /* Create an expression for the first address accessed by this load
1023 in LOOP. */
86a07404 1024 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
f7064d11 1025
00518cb1 1026 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1027 {
1028 tree data_ref_base = base_name;
c12cc930 1029 fprintf (vect_dump, "create vector-pointer variable to type: ");
f7064d11
DN
1030 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1031 if (TREE_CODE (data_ref_base) == VAR_DECL)
1032 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
1033 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1034 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
1035 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1036 fprintf (vect_dump, " vectorizing a record based array ref: ");
1037 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1038 fprintf (vect_dump, " vectorizing a pointer ref: ");
1039 print_generic_expr (vect_dump, base_name, TDF_SLIM);
1040 }
1041
1042 /** (1) Create the new vector-pointer variable: **/
4090db01
IR
1043 if (type)
1044 vect_ptr_type = build_pointer_type (type);
1045 else
1046 vect_ptr_type = build_pointer_type (vectype);
f7064d11
DN
1047 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1048 get_name (base_name));
f004ab02 1049 add_referenced_var (vect_ptr);
4090db01 1050
8bb46326 1051 /** (2) Add aliasing information to the new vector-pointer:
86a07404 1052 (The points-to info (DR_PTR_INFO) may be defined later.) **/
f7064d11 1053
3cb960c7 1054 tag = DR_SYMBOL_TAG (dr);
f7064d11 1055 gcc_assert (tag);
c75ab022 1056
18cd8a03 1057 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
9cf5a7e3 1058 tag must be created with tag added to its may alias list. */
326eda4b 1059 if (!MTAG_P (tag))
cc0968b0 1060 new_type_alias (vect_ptr, tag, DR_REF (dr));
0bca51f0 1061 else
38635499 1062 set_symbol_mem_tag (vect_ptr, tag);
9cf5a7e3 1063
86a07404 1064 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
f7064d11 1065
468c2ac0
DN
1066 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1067 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1068 def-use update cycles for the pointer: One relative to the outer-loop
1069 (LOOP), which is what steps (3) and (4) below do. The other is relative
1070 to the inner-loop (which is the inner-most loop containing the dataref),
1071 and this is done be step (5) below.
1072
1073 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1074 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1075 redundant. Steps (3),(4) create the following:
1076
1077 vp0 = &base_addr;
1078 LOOP: vp1 = phi(vp0,vp2)
1079 ...
1080 ...
1081 vp2 = vp1 + step
1082 goto LOOP
1083
1084 If there is an inner-loop nested in loop, then step (5) will also be
1085 applied, and an additional update in the inner-loop will be created:
1086
1087 vp0 = &base_addr;
1088 LOOP: vp1 = phi(vp0,vp2)
1089 ...
1090 inner: vp3 = phi(vp1,vp4)
1091 vp4 = vp3 + inner_step
1092 if () goto inner
1093 ...
1094 vp2 = vp1 + step
1095 if () goto LOOP */
1096
f7064d11
DN
1097 /** (3) Calculate the initial address the vector-pointer, and set
1098 the vector-pointer to point to it before the loop: **/
1099
1100 /* Create: (&(base[init_val+offset]) in the loop preheader. */
468c2ac0 1101
f7064d11 1102 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
468c2ac0 1103 offset, loop);
f7064d11
DN
1104 pe = loop_preheader_edge (loop);
1105 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1106 gcc_assert (!new_bb);
1107 *initial_address = new_temp;
1108
1109 /* Create: p = (vectype *) initial_base */
1110 vec_stmt = fold_convert (vect_ptr_type, new_temp);
ebb07520 1111 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
99c09897 1112 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
07beea0d 1113 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
f7064d11
DN
1114 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1115 gcc_assert (!new_bb);
f7064d11
DN
1116
1117
468c2ac0
DN
1118 /** (4) Handle the updating of the vector-pointer inside the loop.
1119 This is needed when ONLY_INIT is false, and also when AT_LOOP
1120 is the inner-loop nested in LOOP (during outer-loop vectorization).
1121 **/
f7064d11 1122
468c2ac0 1123 if (only_init && at_loop == loop) /* No update in loop is required. */
8bb46326
DN
1124 {
1125 /* Copy the points-to information if it exists. */
86a07404
IR
1126 if (DR_PTR_INFO (dr))
1127 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
468c2ac0 1128 vptr = vect_ptr_init;
8bb46326 1129 }
99c09897
RH
1130 else
1131 {
468c2ac0
DN
1132 /* The step of the vector pointer is the Vector Size. */
1133 tree step = TYPE_SIZE_UNIT (vectype);
1134 /* One exception to the above is when the scalar step of the load in
1135 LOOP is zero. In this case the step here is also zero. */
1136 if (*inv_p)
1137 step = size_zero_node;
99c09897
RH
1138
1139 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
468c2ac0 1140
99c09897 1141 create_iv (vect_ptr_init,
468c2ac0 1142 fold_convert (vect_ptr_type, step),
99c09897
RH
1143 NULL_TREE, loop, &incr_bsi, insert_after,
1144 &indx_before_incr, &indx_after_incr);
1145 incr = bsi_stmt (incr_bsi);
93c094b5 1146 set_stmt_info (stmt_ann (incr),
99c09897 1147 new_stmt_vec_info (incr, loop_vinfo));
f7064d11 1148
99c09897
RH
1149 /* Copy the points-to information if it exists. */
1150 if (DR_PTR_INFO (dr))
1151 {
1152 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1153 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1154 }
1155 merge_alias_info (vect_ptr_init, indx_before_incr);
1156 merge_alias_info (vect_ptr_init, indx_after_incr);
89d67cca
DN
1157 if (ptr_incr)
1158 *ptr_incr = incr;
f7064d11 1159
468c2ac0
DN
1160 vptr = indx_before_incr;
1161 }
1162
1163 if (!nested_in_vect_loop || only_init)
1164 return vptr;
1165
1166
1167 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1168 nested in LOOP, if exists: **/
1169
1170 gcc_assert (nested_in_vect_loop);
1171 if (!only_init)
1172 {
1173 standard_iv_increment_position (containing_loop, &incr_bsi,
1174 &insert_after);
1175 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1176 containing_loop, &incr_bsi, insert_after, &indx_before_incr,
1177 &indx_after_incr);
1178 incr = bsi_stmt (incr_bsi);
1179 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1180
1181 /* Copy the points-to information if it exists. */
1182 if (DR_PTR_INFO (dr))
1183 {
1184 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1185 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1186 }
1187 merge_alias_info (vect_ptr_init, indx_before_incr);
1188 merge_alias_info (vect_ptr_init, indx_after_incr);
1189 if (ptr_incr)
1190 *ptr_incr = incr;
1191
1192 return indx_before_incr;
99c09897 1193 }
468c2ac0
DN
1194 else
1195 gcc_unreachable ();
f7064d11
DN
1196}
1197
1198
89d67cca
DN
1199/* Function bump_vector_ptr
1200
468c2ac0
DN
1201 Increment a pointer (to a vector type) by vector-size. If requested,
1202 i.e. if PTR-INCR is given, then also connect the new increment stmt
1203 to the existing def-use update-chain of the pointer, by modifying
1204 the PTR_INCR as illustrated below:
89d67cca
DN
1205
1206 The pointer def-use update-chain before this function:
1207 DATAREF_PTR = phi (p_0, p_2)
1208 ....
1209 PTR_INCR: p_2 = DATAREF_PTR + step
1210
1211 The pointer def-use update-chain after this function:
1212 DATAREF_PTR = phi (p_0, p_2)
1213 ....
468c2ac0 1214 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
89d67cca
DN
1215 ....
1216 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1217
1218 Input:
1219 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1220 in the loop.
468c2ac0
DN
1221 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1222 the loop. The increment amount across iterations is expected
1223 to be vector_size.
89d67cca
DN
1224 BSI - location where the new update stmt is to be placed.
1225 STMT - the original scalar memory-access stmt that is being vectorized.
468c2ac0
DN
1226 BUMP - optional. The offset by which to bump the pointer. If not given,
1227 the offset is assumed to be vector_size.
89d67cca
DN
1228
1229 Output: Return NEW_DATAREF_PTR as illustrated above.
1230
1231*/
1232
1233static tree
1234bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
468c2ac0 1235 tree stmt, tree bump)
89d67cca
DN
1236{
1237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1238 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1239 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1240 tree vptr_type = TREE_TYPE (dataref_ptr);
1241 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
5be014d5 1242 tree update = TYPE_SIZE_UNIT (vectype);
89d67cca
DN
1243 tree incr_stmt;
1244 ssa_op_iter iter;
1245 use_operand_p use_p;
1246 tree new_dataref_ptr;
1247
468c2ac0
DN
1248 if (bump)
1249 update = bump;
1250
ebb07520 1251 incr_stmt = build_gimple_modify_stmt (ptr_var,
5be014d5 1252 build2 (POINTER_PLUS_EXPR, vptr_type,
ebb07520 1253 dataref_ptr, update));
89d67cca 1254 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
07beea0d 1255 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
89d67cca
DN
1256 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1257
468c2ac0
DN
1258 /* Copy the points-to information if it exists. */
1259 if (DR_PTR_INFO (dr))
1260 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1261 merge_alias_info (new_dataref_ptr, dataref_ptr);
1262
1263 if (!ptr_incr)
1264 return new_dataref_ptr;
1265
89d67cca
DN
1266 /* Update the vector-pointer's cross-iteration increment. */
1267 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1268 {
1269 tree use = USE_FROM_PTR (use_p);
1270
1271 if (use == dataref_ptr)
1272 SET_USE (use_p, new_dataref_ptr);
1273 else
1274 gcc_assert (tree_int_cst_compare (use, update) == 0);
1275 }
1276
89d67cca
DN
1277 return new_dataref_ptr;
1278}
1279
1280
f7064d11
DN
1281/* Function vect_create_destination_var.
1282
1283 Create a new temporary of type VECTYPE. */
1284
1285static tree
1286vect_create_destination_var (tree scalar_dest, tree vectype)
1287{
1288 tree vec_dest;
1289 const char *new_name;
61d3cdbb
DN
1290 tree type;
1291 enum vect_var_kind kind;
1292
1293 kind = vectype ? vect_simple_var : vect_scalar_var;
1294 type = vectype ? vectype : TREE_TYPE (scalar_dest);
f7064d11
DN
1295
1296 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1297
1298 new_name = get_name (scalar_dest);
1299 if (!new_name)
1300 new_name = "var_";
fbf798fc 1301 vec_dest = vect_get_new_vect_var (type, kind, new_name);
f004ab02 1302 add_referenced_var (vec_dest);
f7064d11
DN
1303
1304 return vec_dest;
1305}
1306
1307
1308/* Function vect_init_vector.
1309
1310 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
468c2ac0
DN
1311 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1312 is not NULL. Otherwise, place the initialization at the loop preheader.
1313 Return the DEF of INIT_STMT.
1314 It will be used in the vectorization of STMT. */
f7064d11
DN
1315
1316static tree
468c2ac0
DN
1317vect_init_vector (tree stmt, tree vector_var, tree vector_type,
1318 block_stmt_iterator *bsi)
f7064d11
DN
1319{
1320 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
f7064d11
DN
1321 tree new_var;
1322 tree init_stmt;
f7064d11
DN
1323 tree vec_oprnd;
1324 edge pe;
1325 tree new_temp;
1326 basic_block new_bb;
1327
4090db01 1328 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
f004ab02 1329 add_referenced_var (new_var);
ebb07520 1330 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
f7064d11 1331 new_temp = make_ssa_name (new_var, init_stmt);
07beea0d 1332 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
f7064d11 1333
468c2ac0
DN
1334 if (bsi)
1335 vect_finish_stmt_generation (stmt, init_stmt, bsi);
1336 else
1337 {
1338 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1339 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1340
1341 if (nested_in_vect_loop_p (loop, stmt))
1342 loop = loop->inner;
1343 pe = loop_preheader_edge (loop);
1344 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1345 gcc_assert (!new_bb);
1346 }
f7064d11 1347
00518cb1 1348 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1349 {
1350 fprintf (vect_dump, "created new init_stmt: ");
1351 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1352 }
1353
07beea0d 1354 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
f7064d11
DN
1355 return vec_oprnd;
1356}
1357
1358
805e2059
IR
1359/* For constant and loop invariant defs of SLP_NODE this function returns
1360 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1361 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1362 stmts. */
1363
1364static void
1365vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1366 unsigned int op_num)
1367{
1368 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1369 tree stmt = VEC_index (tree, stmts, 0);
1370 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1371 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1372 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1373 tree vec_cst;
1374 tree t = NULL_TREE;
1375 int j, number_of_places_left_in_vector;
1376 tree vector_type;
1377 tree op, vop, operation;
1378 int group_size = VEC_length (tree, stmts);
1379 unsigned int vec_num, i;
1380 int number_of_copies = 1;
1381 bool is_store = false;
1382 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1383 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
c563bcd1 1384 bool constant_p;
805e2059
IR
1385
1386 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1387 is_store = true;
1388
1389 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1390 created vectors. It is greater than 1 if unrolling is performed.
1391
1392 For example, we have two scalar operands, s1 and s2 (e.g., group of
1393 strided accesses of size two), while NUINTS is four (i.e., four scalars
1394 of this type can be packed in a vector). The output vector will contain
1395 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1396 will be 2).
1397
1398 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1399 containing the operands.
1400
1401 For example, NUINTS is four as before, and the group size is 8
1402 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1403 {s5, s6, s7, s8}. */
1404
1405 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1406
1407 number_of_places_left_in_vector = nunits;
c563bcd1 1408 constant_p = true;
805e2059
IR
1409 for (j = 0; j < number_of_copies; j++)
1410 {
1411 for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
1412 {
1413 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1414 if (is_store)
1415 op = operation;
1416 else
1417 op = TREE_OPERAND (operation, op_num);
c563bcd1
JJ
1418 if (!CONSTANT_CLASS_P (op))
1419 constant_p = false;
805e2059
IR
1420
1421 /* Create 'vect_ = {op0,op1,...,opn}'. */
1422 t = tree_cons (NULL_TREE, op, t);
1423
1424 number_of_places_left_in_vector--;
1425
1426 if (number_of_places_left_in_vector == 0)
1427 {
1428 number_of_places_left_in_vector = nunits;
1429
1430 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
20e545c3 1431 gcc_assert (vector_type);
c563bcd1
JJ
1432 if (constant_p)
1433 vec_cst = build_vector (vector_type, t);
1434 else
1435 vec_cst = build_constructor_from_list (vector_type, t);
1436 constant_p = true;
805e2059
IR
1437 VEC_quick_push (tree, voprnds,
1438 vect_init_vector (stmt, vec_cst, vector_type,
1439 NULL));
1440 t = NULL_TREE;
1441 }
1442 }
1443 }
1444
1445 /* Since the vectors are created in the reverse order, we should invert
1446 them. */
1447 vec_num = VEC_length (tree, voprnds);
1448 for (j = vec_num - 1; j >= 0; j--)
1449 {
1450 vop = VEC_index (tree, voprnds, j);
1451 VEC_quick_push (tree, *vec_oprnds, vop);
1452 }
1453
1454 VEC_free (tree, heap, voprnds);
1455
1456 /* In case that VF is greater than the unrolling factor needed for the SLP
1457 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1458 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1459 to replicate the vectors. */
1460 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1461 {
1462 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1463 VEC_quick_push (tree, *vec_oprnds, vop);
1464 }
1465}
1466
1467
84fbffb2 1468/* Get vectorized definitions from SLP_NODE that contains corresponding
805e2059
IR
1469 vectorized def-stmts. */
1470
1471static void
1472vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1473{
1474 tree vec_oprnd;
1475 tree vec_def_stmt;
1476 unsigned int i;
1477
1478 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1479
1480 for (i = 0;
1481 VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1482 i++)
1483 {
1484 gcc_assert (vec_def_stmt);
1485 vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0);
1486 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1487 }
1488}
1489
1490
1491/* Get vectorized definitions for SLP_NODE.
1492 If the scalar definitions are loop invariants or constants, collect them and
1493 call vect_get_constant_vectors() to create vector stmts.
1494 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1495 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
f8f8fee8
IR
1496 vect_get_slp_vect_defs() to retrieve them.
1497 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1498 the right node. This is used when the second operand must remain scalar. */
805e2059
IR
1499
1500static void
1501vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1502 VEC (tree,heap) **vec_oprnds1)
1503{
1504 tree operation, first_stmt;
1505
1506 /* Allocate memory for vectorized defs. */
1507 *vec_oprnds0 = VEC_alloc (tree, heap,
1508 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1509
1510 /* SLP_NODE corresponds either to a group of stores or to a group of
1511 unary/binary operations. We don't call this function for loads. */
1512 if (SLP_TREE_LEFT (slp_node))
1513 /* The defs are already vectorized. */
1514 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1515 else
1516 /* Build vectors from scalar defs. */
1517 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1518
1519 first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1520 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1521 /* Since we don't call this function with loads, this is a group of
1522 stores. */
1523 return;
1524
1525 operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
f8f8fee8 1526 if (TREE_OPERAND_LENGTH (operation) == unary_op || !vec_oprnds1)
805e2059
IR
1527 return;
1528
1529 *vec_oprnds1 = VEC_alloc (tree, heap,
1530 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1531
1532 if (SLP_TREE_RIGHT (slp_node))
1533 /* The defs are already vectorized. */
1534 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1535 else
1536 /* Build vectors from scalar defs. */
1537 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1538}
1539
1540
fbf798fc
DN
1541/* Function get_initial_def_for_induction
1542
1543 Input:
d29de1bf 1544 STMT - a stmt that performs an induction operation in the loop.
fbf798fc
DN
1545 IV_PHI - the initial value of the induction variable
1546
1547 Output:
1548 Return a vector variable, initialized with the first VF values of
1549 the induction variable. E.g., for an iv with IV_PHI='X' and
1550 evolution S, for a vector of 4 units, we want to return:
1551 [X, X + S, X + 2*S, X + 3*S]. */
1552
1553static tree
cd38ca7f 1554get_initial_def_for_induction (tree iv_phi)
fbf798fc 1555{
cd38ca7f 1556 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
fbf798fc
DN
1557 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1558 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3d95caa4 1559 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
20e545c3
IR
1560 tree vectype;
1561 int nunits;
fbf798fc 1562 edge pe = loop_preheader_edge (loop);
d29de1bf 1563 struct loop *iv_loop;
fbf798fc 1564 basic_block new_bb;
fbf798fc
DN
1565 tree vec, vec_init, vec_step, t;
1566 tree access_fn;
1567 tree new_var;
1568 tree new_name;
1569 tree init_stmt;
1570 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1571 tree init_expr, step_expr;
1572 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1573 int i;
1574 bool ok;
20e545c3 1575 int ncopies;
fbf798fc
DN
1576 tree expr;
1577 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
d29de1bf 1578 bool nested_in_vect_loop = false;
c492dc9a 1579 tree stmts;
d29de1bf
DN
1580 imm_use_iterator imm_iter;
1581 use_operand_p use_p;
1582 tree exit_phi;
1583 edge latch_e;
1584 tree loop_arg;
cd38ca7f
DN
1585 block_stmt_iterator si;
1586 basic_block bb = bb_for_stmt (iv_phi);
fbf798fc 1587
20e545c3
IR
1588 vectype = get_vectype_for_scalar_type (scalar_type);
1589 gcc_assert (vectype);
1590 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1591 ncopies = vf / nunits;
1592
fbf798fc 1593 gcc_assert (phi_info);
cd38ca7f 1594 gcc_assert (ncopies >= 1);
fbf798fc 1595
cd38ca7f
DN
1596 /* Find the first insertion point in the BB. */
1597 si = bsi_after_labels (bb);
fbf798fc 1598
d29de1bf
DN
1599 if (INTEGRAL_TYPE_P (scalar_type))
1600 step_expr = build_int_cst (scalar_type, 0);
1601 else
1602 step_expr = build_real (scalar_type, dconst0);
1603
1604 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1605 if (nested_in_vect_loop_p (loop, iv_phi))
1606 {
1607 nested_in_vect_loop = true;
1608 iv_loop = loop->inner;
1609 }
1610 else
1611 iv_loop = loop;
1612 gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
1613
1614 latch_e = loop_latch_edge (iv_loop);
1615 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1616
1617 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
fbf798fc 1618 gcc_assert (access_fn);
d29de1bf
DN
1619 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1620 &init_expr, &step_expr);
fbf798fc 1621 gcc_assert (ok);
d29de1bf 1622 pe = loop_preheader_edge (iv_loop);
fbf798fc
DN
1623
1624 /* Create the vector that holds the initial_value of the induction. */
d29de1bf 1625 if (nested_in_vect_loop)
c492dc9a 1626 {
d29de1bf
DN
1627 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1628 been created during vectorization of previous stmts; We obtain it from
1629 the STMT_VINFO_VEC_STMT of the defining stmt. */
1630 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1631 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
c492dc9a 1632 }
d29de1bf 1633 else
fbf798fc 1634 {
d29de1bf
DN
1635 /* iv_loop is the loop to be vectorized. Create:
1636 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1637 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1638 add_referenced_var (new_var);
ebb07520 1639
d29de1bf
DN
1640 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1641 if (stmts)
1642 {
1643 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1644 gcc_assert (!new_bb);
1645 }
fbf798fc 1646
d29de1bf
DN
1647 t = NULL_TREE;
1648 t = tree_cons (NULL_TREE, init_expr, t);
1649 for (i = 1; i < nunits; i++)
1650 {
1651 tree tmp;
fbf798fc 1652
d29de1bf
DN
1653 /* Create: new_name_i = new_name + step_expr */
1654 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1655 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1656 new_name = make_ssa_name (new_var, init_stmt);
1657 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1658
1659 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1660 gcc_assert (!new_bb);
1661
1662 if (vect_print_dump_info (REPORT_DETAILS))
1663 {
1664 fprintf (vect_dump, "created new init_stmt: ");
1665 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1666 }
1667 t = tree_cons (NULL_TREE, new_name, t);
1668 }
1669 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1670 vec = build_constructor_from_list (vectype, nreverse (t));
468c2ac0 1671 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
fbf798fc 1672 }
fbf798fc
DN
1673
1674
1675 /* Create the vector that holds the step of the induction. */
d29de1bf
DN
1676 if (nested_in_vect_loop)
1677 /* iv_loop is nested in the loop to be vectorized. Generate:
1678 vec_step = [S, S, S, S] */
1679 new_name = step_expr;
1680 else
1681 {
1682 /* iv_loop is the loop to be vectorized. Generate:
1683 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1684 expr = build_int_cst (scalar_type, vf);
1685 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1686 }
1687
fbf798fc
DN
1688 t = NULL_TREE;
1689 for (i = 0; i < nunits; i++)
1690 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
c563bcd1
JJ
1691 gcc_assert (CONSTANT_CLASS_P (new_name));
1692 vec = build_vector (vectype, t);
468c2ac0 1693 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
fbf798fc
DN
1694
1695
1696 /* Create the following def-use cycle:
1697 loop prolog:
d29de1bf
DN
1698 vec_init = ...
1699 vec_step = ...
fbf798fc
DN
1700 loop:
1701 vec_iv = PHI <vec_init, vec_loop>
1702 ...
1703 STMT
1704 ...
1705 vec_loop = vec_iv + vec_step; */
1706
1707 /* Create the induction-phi that defines the induction-operand. */
1708 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1709 add_referenced_var (vec_dest);
d29de1bf 1710 induction_phi = create_phi_node (vec_dest, iv_loop->header);
fbf798fc
DN
1711 set_stmt_info (get_stmt_ann (induction_phi),
1712 new_stmt_vec_info (induction_phi, loop_vinfo));
1713 induc_def = PHI_RESULT (induction_phi);
1714
1715 /* Create the iv update inside the loop */
ebb07520
RS
1716 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1717 build2 (PLUS_EXPR, vectype,
1718 induc_def, vec_step));
fbf798fc
DN
1719 vec_def = make_ssa_name (vec_dest, new_stmt);
1720 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
d29de1bf
DN
1721 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1722 set_stmt_info (get_stmt_ann (new_stmt),
1723 new_stmt_vec_info (new_stmt, loop_vinfo));
fbf798fc
DN
1724
1725 /* Set the arguments of the phi node: */
d29de1bf
DN
1726 add_phi_arg (induction_phi, vec_init, pe);
1727 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
fbf798fc
DN
1728
1729
d29de1bf 1730 /* In case that vectorization factor (VF) is bigger than the number
fbf798fc
DN
1731 of elements that we can fit in a vectype (nunits), we have to generate
1732 more than one vector stmt - i.e - we need to "unroll" the
1733 vector stmt by a factor VF/nunits. For more details see documentation
1734 in vectorizable_operation. */
1735
1736 if (ncopies > 1)
1737 {
1738 stmt_vec_info prev_stmt_vinfo;
d29de1bf
DN
1739 /* FORNOW. This restriction should be relaxed. */
1740 gcc_assert (!nested_in_vect_loop);
fbf798fc
DN
1741
1742 /* Create the vector that holds the step of the induction. */
1743 expr = build_int_cst (scalar_type, nunits);
1744 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1745 t = NULL_TREE;
1746 for (i = 0; i < nunits; i++)
1747 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
c563bcd1
JJ
1748 gcc_assert (CONSTANT_CLASS_P (new_name));
1749 vec = build_vector (vectype, t);
468c2ac0 1750 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
fbf798fc
DN
1751
1752 vec_def = induc_def;
1753 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1754 for (i = 1; i < ncopies; i++)
1755 {
ebb07520
RS
1756 tree tmp;
1757
d29de1bf 1758 /* vec_i = vec_prev + vec_step */
ebb07520
RS
1759 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1760 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
fbf798fc
DN
1761 vec_def = make_ssa_name (vec_dest, new_stmt);
1762 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
d29de1bf
DN
1763 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1764 set_stmt_info (get_stmt_ann (new_stmt),
1765 new_stmt_vec_info (new_stmt, loop_vinfo));
fbf798fc
DN
1766 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1767 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1768 }
1769 }
1770
d29de1bf
DN
1771 if (nested_in_vect_loop)
1772 {
1773 /* Find the loop-closed exit-phi of the induction, and record
1774 the final vector of induction results: */
1775 exit_phi = NULL;
1776 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1777 {
1778 if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
1779 {
1780 exit_phi = USE_STMT (use_p);
1781 break;
1782 }
1783 }
1784 if (exit_phi)
1785 {
1786 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1787 /* FORNOW. Currently not supporting the case that an inner-loop induction
1788 is not used in the outer-loop (i.e. only outside the outer-loop). */
1789 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1790 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1791
1792 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1793 if (vect_print_dump_info (REPORT_DETAILS))
1794 {
1795 fprintf (vect_dump, "vector of inductions after inner-loop:");
1796 print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
1797 }
1798 }
1799 }
1800
1801
fbf798fc
DN
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 {
1804 fprintf (vect_dump, "transform induction: created def-use cycle:");
1805 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1806 fprintf (vect_dump, "\n");
1807 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1808 }
1809
1810 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1811 return induc_def;
1812}
1813
1814
f7064d11
DN
1815/* Function vect_get_vec_def_for_operand.
1816
1817 OP is an operand in STMT. This function returns a (vector) def that will be
1818 used in the vectorized stmt for STMT.
1819
1820 In the case that OP is an SSA_NAME which is defined in the loop, then
1821 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1822
1823 In case OP is an invariant or constant, a new stmt that creates a vector def
1824 needs to be introduced. */
1825
1826static tree
61d3cdbb 1827vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
f7064d11
DN
1828{
1829 tree vec_oprnd;
1830 tree vec_stmt;
1831 tree def_stmt;
1832 stmt_vec_info def_stmt_info = NULL;
1833 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1834 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
57d1677d 1835 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7064d11 1836 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f7064d11 1837 tree vec_inv;
88088c03 1838 tree vec_cst;
f7064d11
DN
1839 tree t = NULL_TREE;
1840 tree def;
1841 int i;
88088c03
DN
1842 enum vect_def_type dt;
1843 bool is_simple_use;
4090db01 1844 tree vector_type;
f7064d11 1845
00518cb1 1846 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1847 {
1848 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1849 print_generic_expr (vect_dump, op, TDF_SLIM);
1850 }
1851
88088c03
DN
1852 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1853 gcc_assert (is_simple_use);
00518cb1 1854 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11 1855 {
88088c03 1856 if (def)
f7064d11 1857 {
88088c03
DN
1858 fprintf (vect_dump, "def = ");
1859 print_generic_expr (vect_dump, def, TDF_SLIM);
1860 }
1861 if (def_stmt)
1862 {
1863 fprintf (vect_dump, " def_stmt = ");
1864 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
f7064d11 1865 }
f7064d11
DN
1866 }
1867
88088c03 1868 switch (dt)
f7064d11 1869 {
88088c03
DN
1870 /* Case 1: operand is a constant. */
1871 case vect_constant_def:
1872 {
61d3cdbb
DN
1873 if (scalar_def)
1874 *scalar_def = op;
1875
88088c03 1876 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
00518cb1 1877 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
1878 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1879
1880 for (i = nunits - 1; i >= 0; --i)
1881 {
1882 t = tree_cons (NULL_TREE, op, t);
1883 }
4090db01 1884 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
20e545c3 1885 gcc_assert (vector_type);
4090db01
IR
1886 vec_cst = build_vector (vector_type, t);
1887
468c2ac0 1888 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
88088c03
DN
1889 }
1890
1891 /* Case 2: operand is defined outside the loop - loop invariant. */
1892 case vect_invariant_def:
1893 {
61d3cdbb
DN
1894 if (scalar_def)
1895 *scalar_def = def;
1896
88088c03 1897 /* Create 'vec_inv = {inv,inv,..,inv}' */
00518cb1 1898 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
1899 fprintf (vect_dump, "Create vector_inv.");
1900
1901 for (i = nunits - 1; i >= 0; --i)
1902 {
1903 t = tree_cons (NULL_TREE, def, t);
1904 }
1905
4038c495 1906 /* FIXME: use build_constructor directly. */
4090db01 1907 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
20e545c3 1908 gcc_assert (vector_type);
4090db01 1909 vec_inv = build_constructor_from_list (vector_type, t);
468c2ac0 1910 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
88088c03
DN
1911 }
1912
1913 /* Case 3: operand is defined inside the loop. */
1914 case vect_loop_def:
1915 {
61d3cdbb
DN
1916 if (scalar_def)
1917 *scalar_def = def_stmt;
1918
88088c03
DN
1919 /* Get the def from the vectorized stmt. */
1920 def_stmt_info = vinfo_for_stmt (def_stmt);
1921 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1922 gcc_assert (vec_stmt);
d29de1bf
DN
1923 if (TREE_CODE (vec_stmt) == PHI_NODE)
1924 vec_oprnd = PHI_RESULT (vec_stmt);
1925 else
1926 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
88088c03
DN
1927 return vec_oprnd;
1928 }
1929
61d3cdbb
DN
1930 /* Case 4: operand is defined by a loop header phi - reduction */
1931 case vect_reduction_def:
1932 {
d29de1bf
DN
1933 struct loop *loop;
1934
61d3cdbb 1935 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
d29de1bf 1936 loop = (bb_for_stmt (def_stmt))->loop_father;
61d3cdbb
DN
1937
1938 /* Get the def before the loop */
1939 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1940 return get_initial_def_for_reduction (stmt, op, scalar_def);
1941 }
1942
1943 /* Case 5: operand is defined by loop-header phi - induction. */
88088c03
DN
1944 case vect_induction_def:
1945 {
fbf798fc
DN
1946 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1947
d29de1bf
DN
1948 /* Get the def from the vectorized stmt. */
1949 def_stmt_info = vinfo_for_stmt (def_stmt);
1950 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1951 gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
1952 vec_oprnd = PHI_RESULT (vec_stmt);
1953 return vec_oprnd;
88088c03 1954 }
f7064d11 1955
f7064d11 1956 default:
88088c03 1957 gcc_unreachable ();
f7064d11 1958 }
f7064d11
DN
1959}
1960
1961
89d67cca
DN
1962/* Function vect_get_vec_def_for_stmt_copy
1963
1964 Return a vector-def for an operand. This function is used when the
1965 vectorized stmt to be created (by the caller to this function) is a "copy"
1966 created in case the vectorized result cannot fit in one vector, and several
1967 copies of the vector-stmt are required. In this case the vector-def is
8115817b 1968 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
89d67cca
DN
1969 of the stmt that defines VEC_OPRND.
1970 DT is the type of the vector def VEC_OPRND.
1971
1972 Context:
1973 In case the vectorization factor (VF) is bigger than the number
1974 of elements that can fit in a vectype (nunits), we have to generate
1975 more than one vector stmt to vectorize the scalar stmt. This situation
1976 arises when there are multiple data-types operated upon in the loop; the
1977 smallest data-type determines the VF, and as a result, when vectorizing
1978 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1979 vector stmt (each computing a vector of 'nunits' results, and together
1980 computing 'VF' results in each iteration). This function is called when
2f8e468b 1981 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
878aa817 1982 which VF=16 and nunits=4, so the number of copies required is 4):
89d67cca
DN
1983
1984 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1985
1986 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1987 VS1.1: vx.1 = memref1 VS1.2
1988 VS1.2: vx.2 = memref2 VS1.3
1989 VS1.3: vx.3 = memref3
1990
1991 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1992 VSnew.1: vz1 = vx.1 + ... VSnew.2
1993 VSnew.2: vz2 = vx.2 + ... VSnew.3
1994 VSnew.3: vz3 = vx.3 + ...
1995
1996 The vectorization of S1 is explained in vectorizable_load.
1997 The vectorization of S2:
1998 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1999 the function 'vect_get_vec_def_for_operand' is called to
2000 get the relevant vector-def for each operand of S2. For operand x it
2001 returns the vector-def 'vx.0'.
2002
2003 To create the remaining copies of the vector-stmt (VSnew.j), this
2004 function is called to get the relevant vector-def for each operand. It is
2005 obtained from the respective VS1.j stmt, which is recorded in the
2006 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
2007
2008 For example, to obtain the vector-def 'vx.1' in order to create the
2009 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
2010 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
2011 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
2012 and return its def ('vx.1').
2013 Overall, to create the above sequence this function will be called 3 times:
2014 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
2015 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
2016 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
2017
2018static tree
2019vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
2020{
2021 tree vec_stmt_for_operand;
2022 stmt_vec_info def_stmt_info;
2023
fbf798fc
DN
2024 /* Do nothing; can reuse same def. */
2025 if (dt == vect_invariant_def || dt == vect_constant_def )
2026 return vec_oprnd;
89d67cca
DN
2027
2028 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
2029 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
2030 gcc_assert (def_stmt_info);
2031 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
2032 gcc_assert (vec_stmt_for_operand);
07beea0d 2033 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
89d67cca
DN
2034 return vec_oprnd;
2035}
2036
2037
805e2059
IR
2038/* Get vectorized definitions for the operands to create a copy of an original
2039 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
2040
2041static void
2042vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
2043 VEC(tree,heap) **vec_oprnds0,
2044 VEC(tree,heap) **vec_oprnds1)
2045{
2046 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
2047
2048 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
2049 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2050
4934454b 2051 if (vec_oprnds1 && *vec_oprnds1)
805e2059
IR
2052 {
2053 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
2054 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
2055 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2056 }
2057}
2058
2059
2060/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2061
2062static void
2063vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0,
2064 VEC(tree,heap) **vec_oprnds1, slp_tree slp_node)
2065{
2066 if (slp_node)
2067 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
2068 else
2069 {
2070 tree vec_oprnd;
2071
2072 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
2073 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
2074 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2075
2076 if (op1)
2077 {
2078 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
2079 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
2080 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2081 }
2082 }
2083}
2084
2085
f7064d11
DN
2086/* Function vect_finish_stmt_generation.
2087
2088 Insert a new stmt. */
2089
2090static void
89d67cca
DN
2091vect_finish_stmt_generation (tree stmt, tree vec_stmt,
2092 block_stmt_iterator *bsi)
f7064d11 2093{
89d67cca
DN
2094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2095 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2096
d29de1bf
DN
2097 gcc_assert (stmt == bsi_stmt (*bsi));
2098 gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
2099
f7064d11 2100 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
d29de1bf 2101
89d67cca
DN
2102 set_stmt_info (get_stmt_ann (vec_stmt),
2103 new_stmt_vec_info (vec_stmt, loop_vinfo));
f7064d11 2104
00518cb1 2105 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2106 {
2107 fprintf (vect_dump, "add new stmt: ");
2108 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2109 }
2110
f7064d11
DN
2111 /* Make sure bsi points to the stmt that is being vectorized. */
2112 gcc_assert (stmt == bsi_stmt (*bsi));
f7064d11 2113
dbce1570 2114 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
f7064d11
DN
2115}
2116
2117
61d3cdbb
DN
2118/* Function get_initial_def_for_reduction
2119
2120 Input:
2121 STMT - a stmt that performs a reduction operation in the loop.
2122 INIT_VAL - the initial value of the reduction variable
2123
2124 Output:
f7c1d73d
DN
2125 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2126 of the reduction (used for adjusting the epilog - see below).
61d3cdbb 2127 Return a vector variable, initialized according to the operation that STMT
f7c1d73d
DN
2128 performs. This vector will be used as the initial value of the
2129 vector of partial results.
61d3cdbb 2130
f7c1d73d 2131 Option1 (adjust in epilog): Initialize the vector as follows:
61d3cdbb
DN
2132 add: [0,0,...,0,0]
2133 mult: [1,1,...,1,1]
2134 min/max: [init_val,init_val,..,init_val,init_val]
2135 bit and/or: [init_val,init_val,..,init_val,init_val]
f7c1d73d 2136 and when necessary (e.g. add/mult case) let the caller know
61d3cdbb
DN
2137 that it needs to adjust the result by init_val.
2138
2139 Option2: Initialize the vector as follows:
2140 add: [0,0,...,0,init_val]
2141 mult: [1,1,...,1,init_val]
2142 min/max: [init_val,init_val,...,init_val]
2143 bit and/or: [init_val,init_val,...,init_val]
2144 and no adjustments are needed.
2145
2146 For example, for the following code:
2147
2148 s = init_val;
2149 for (i=0;i<n;i++)
2150 s = s + a[i];
2151
2152 STMT is 's = s + a[i]', and the reduction variable is 's'.
2153 For a vector of 4 units, we want to return either [0,0,0,init_val],
2154 or [0,0,0,0] and let the caller know that it needs to adjust
2155 the result at the end by 'init_val'.
2156
f7c1d73d
DN
2157 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2158 initialization vector is simpler (same element in all entries).
2159 A cost model should help decide between these two schemes. */
61d3cdbb
DN
2160
2161static tree
f7c1d73d 2162get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
61d3cdbb
DN
2163{
2164 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d29de1bf
DN
2165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2166 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
61d3cdbb 2167 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
f58e9734 2168 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
07beea0d 2169 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
61d3cdbb 2170 tree type = TREE_TYPE (init_val);
f7c1d73d
DN
2171 tree vecdef;
2172 tree def_for_init;
2173 tree init_def;
2174 tree t = NULL_TREE;
61d3cdbb 2175 int i;
4090db01 2176 tree vector_type;
d29de1bf 2177 bool nested_in_vect_loop = false;
61d3cdbb 2178
a0aa00d7 2179 gcc_assert (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
d29de1bf
DN
2180 if (nested_in_vect_loop_p (loop, stmt))
2181 nested_in_vect_loop = true;
2182 else
2183 gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
2184
f7c1d73d 2185 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
61d3cdbb
DN
2186
2187 switch (code)
2188 {
20f06221
DN
2189 case WIDEN_SUM_EXPR:
2190 case DOT_PROD_EXPR:
61d3cdbb 2191 case PLUS_EXPR:
a0aa00d7
DN
2192 if (nested_in_vect_loop)
2193 *adjustment_def = vecdef;
9009820b 2194 else
a0aa00d7
DN
2195 *adjustment_def = init_val;
2196 /* Create a vector of zeros for init_def. */
2197 if (SCALAR_FLOAT_TYPE_P (type))
f7c1d73d 2198 def_for_init = build_real (type, dconst0);
a0aa00d7
DN
2199 else
2200 def_for_init = build_int_cst (type, 0);
2201 for (i = nunits - 1; i >= 0; --i)
2202 t = tree_cons (NULL_TREE, def_for_init, t);
f7c1d73d 2203 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
20e545c3 2204 gcc_assert (vector_type);
f7c1d73d 2205 init_def = build_vector (vector_type, t);
61d3cdbb
DN
2206 break;
2207
2208 case MIN_EXPR:
2209 case MAX_EXPR:
f7c1d73d
DN
2210 *adjustment_def = NULL_TREE;
2211 init_def = vecdef;
61d3cdbb
DN
2212 break;
2213
2214 default:
2215 gcc_unreachable ();
2216 }
2217
f7c1d73d 2218 return init_def;
61d3cdbb
DN
2219}
2220
2221
20f06221 2222/* Function vect_create_epilog_for_reduction
61d3cdbb
DN
2223
2224 Create code at the loop-epilog to finalize the result of a reduction
20f06221 2225 computation.
61d3cdbb 2226
20f06221
DN
2227 VECT_DEF is a vector of partial results.
2228 REDUC_CODE is the tree-code for the epilog reduction.
2229 STMT is the scalar reduction stmt that is being vectorized.
61d3cdbb 2230 REDUCTION_PHI is the phi-node that carries the reduction computation.
61d3cdbb 2231
20f06221 2232 This function:
ea2c620c 2233 1. Creates the reduction def-use cycle: sets the arguments for
20f06221
DN
2234 REDUCTION_PHI:
2235 The loop-entry argument is the vectorized initial-value of the reduction.
2236 The loop-latch argument is VECT_DEF - the vector of partial sums.
2237 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2238 by applying the operation specified by REDUC_CODE if available, or by
2239 other means (whole-vector shifts or a scalar loop).
2240 The function also creates a new phi node at the loop exit to preserve
2241 loop-closed form, as illustrated below.
2242
2243 The flow at the entry to this function:
61d3cdbb
DN
2244
2245 loop:
20f06221 2246 vec_def = phi <null, null> # REDUCTION_PHI
8115817b 2247 VECT_DEF = vector_stmt # vectorized form of STMT
20f06221 2248 s_loop = scalar_stmt # (scalar) STMT
61d3cdbb 2249 loop_exit:
20f06221 2250 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
61d3cdbb
DN
2251 use <s_out0>
2252 use <s_out0>
2253
20f06221 2254 The above is transformed by this function into:
61d3cdbb
DN
2255
2256 loop:
20f06221
DN
2257 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2258 VECT_DEF = vector_stmt # vectorized form of STMT
2259 s_loop = scalar_stmt # (scalar) STMT
61d3cdbb 2260 loop_exit:
20f06221
DN
2261 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2262 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2263 v_out2 = reduce <v_out1>
61d3cdbb 2264 s_out3 = extract_field <v_out2, 0>
20f06221
DN
2265 s_out4 = adjust_result <s_out3>
2266 use <s_out4>
2267 use <s_out4>
61d3cdbb
DN
2268*/
2269
2270static void
20f06221 2271vect_create_epilog_for_reduction (tree vect_def, tree stmt,
61d3cdbb
DN
2272 enum tree_code reduc_code, tree reduction_phi)
2273{
2274 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
20f06221
DN
2275 tree vectype;
2276 enum machine_mode mode;
61d3cdbb
DN
2277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2278 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2279 basic_block exit_bb;
20f06221
DN
2280 tree scalar_dest;
2281 tree scalar_type;
61d3cdbb
DN
2282 tree new_phi;
2283 block_stmt_iterator exit_bsi;
2284 tree vec_dest;
d29de1bf 2285 tree new_temp = NULL_TREE;
a6b46ba2 2286 tree new_name;
d29de1bf
DN
2287 tree epilog_stmt = NULL_TREE;
2288 tree new_scalar_dest, exit_phi, new_dest;
a6b46ba2 2289 tree bitsize, bitpos, bytesize;
07beea0d 2290 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
d29de1bf 2291 tree adjustment_def;
61d3cdbb
DN
2292 tree vec_initial_def;
2293 tree orig_name;
2294 imm_use_iterator imm_iter;
2295 use_operand_p use_p;
d29de1bf
DN
2296 bool extract_scalar_result = false;
2297 tree reduction_op, expr;
20f06221 2298 tree orig_stmt;
6c00f606 2299 tree use_stmt;
07beea0d 2300 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
d29de1bf 2301 bool nested_in_vect_loop = false;
20f06221 2302 int op_type;
71f4a023
DN
2303 VEC(tree,heap) *phis = NULL;
2304 int i;
61d3cdbb 2305
d29de1bf
DN
2306 if (nested_in_vect_loop_p (loop, stmt))
2307 {
2308 loop = loop->inner;
2309 nested_in_vect_loop = true;
2310 }
2311
5039610b 2312 op_type = TREE_OPERAND_LENGTH (operation);
20f06221
DN
2313 reduction_op = TREE_OPERAND (operation, op_type-1);
2314 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
20e545c3 2315 gcc_assert (vectype);
20f06221
DN
2316 mode = TYPE_MODE (vectype);
2317
61d3cdbb
DN
2318 /*** 1. Create the reduction def-use cycle ***/
2319
2320 /* 1.1 set the loop-entry arg of the reduction-phi: */
2321 /* For the case of reduction, vect_get_vec_def_for_operand returns
2322 the scalar def before the loop, that defines the initial value
2323 of the reduction variable. */
2324 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
d29de1bf 2325 &adjustment_def);
61d3cdbb
DN
2326 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
2327
61d3cdbb
DN
2328 /* 1.2 set the loop-latch arg for the reduction-phi: */
2329 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
2330
00518cb1 2331 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
2332 {
2333 fprintf (vect_dump, "transform reduction: created def-use cycle:");
2334 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
2335 fprintf (vect_dump, "\n");
2336 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
2337 }
2338
2339
20f06221
DN
2340 /*** 2. Create epilog code
2341 The reduction epilog code operates across the elements of the vector
2342 of partial results computed by the vectorized loop.
2343 The reduction epilog code consists of:
2344 step 1: compute the scalar result in a vector (v_out2)
2345 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2346 step 3: adjust the scalar result (s_out3) if needed.
2347
2348 Step 1 can be accomplished using one the following three schemes:
2349 (scheme 1) using reduc_code, if available.
2350 (scheme 2) using whole-vector shifts, if available.
2351 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2352 combined.
2353
2354 The overall epilog code looks like this:
2355
2356 s_out0 = phi <s_loop> # original EXIT_PHI
2357 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2358 v_out2 = reduce <v_out1> # step 1
2359 s_out3 = extract_field <v_out2, 0> # step 2
2360 s_out4 = adjust_result <s_out3> # step 3
2361
2362 (step 3 is optional, and step2 1 and 2 may be combined).
2363 Lastly, the uses of s_out0 are replaced by s_out4.
2364
2365 ***/
61d3cdbb
DN
2366
2367 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2368 v_out1 = phi <v_loop> */
2369
ac8f6c69 2370 exit_bb = single_exit (loop)->dest;
61d3cdbb 2371 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
ac8f6c69 2372 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
8b11009b 2373 exit_bsi = bsi_after_labels (exit_bb);
61d3cdbb 2374
20f06221 2375 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
8115817b
UB
2376 (i.e. when reduc_code is not available) and in the final adjustment
2377 code (if needed). Also get the original scalar reduction variable as
20f06221
DN
2378 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2379 represents a reduction pattern), the tree-code and scalar-def are
2380 taken from the original stmt that the pattern-stmt (STMT) replaces.
2381 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2382 are taken from STMT. */
2383
2384 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2385 if (!orig_stmt)
2386 {
2387 /* Regular reduction */
2388 orig_stmt = stmt;
2389 }
2390 else
2391 {
2392 /* Reduction pattern */
2393 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2394 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2395 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2396 }
07beea0d
AH
2397 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2398 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
20f06221 2399 scalar_type = TREE_TYPE (scalar_dest);
a6b46ba2
DN
2400 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2401 bitsize = TYPE_SIZE (scalar_type);
2402 bytesize = TYPE_SIZE_UNIT (scalar_type);
61d3cdbb 2403
d29de1bf
DN
2404
2405 /* In case this is a reduction in an inner-loop while vectorizing an outer
2406 loop - we don't need to extract a single scalar result at the end of the
2407 inner-loop. The final vector of partial results will be used in the
2408 vectorized outer-loop, or reduced to a scalar result at the end of the
2409 outer-loop. */
2410 if (nested_in_vect_loop)
2411 goto vect_finalize_reduction;
2412
20f06221
DN
2413 /* 2.3 Create the reduction code, using one of the three schemes described
2414 above. */
61d3cdbb 2415
a6b46ba2 2416 if (reduc_code < NUM_TREE_CODES)
61d3cdbb 2417 {
ebb07520
RS
2418 tree tmp;
2419
a6b46ba2
DN
2420 /*** Case 1: Create:
2421 v_out2 = reduc_expr <v_out1> */
61d3cdbb 2422
00518cb1 2423 if (vect_print_dump_info (REPORT_DETAILS))
a6b46ba2 2424 fprintf (vect_dump, "Reduce using direct vector reduction.");
61d3cdbb 2425
a6b46ba2 2426 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebb07520
RS
2427 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2428 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
a6b46ba2 2429 new_temp = make_ssa_name (vec_dest, epilog_stmt);
07beea0d 2430 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
8b11009b 2431 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
61d3cdbb 2432
a6b46ba2 2433 extract_scalar_result = true;
a6b46ba2
DN
2434 }
2435 else
2436 {
dfea6c85 2437 enum tree_code shift_code = 0;
a6b46ba2 2438 bool have_whole_vector_shift = true;
a6b46ba2
DN
2439 int bit_offset;
2440 int element_bitsize = tree_low_cst (bitsize, 1);
2441 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2442 tree vec_temp;
2443
166cdb08 2444 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
a6b46ba2 2445 shift_code = VEC_RSHIFT_EXPR;
a6b46ba2
DN
2446 else
2447 have_whole_vector_shift = false;
2448
afc1ab61
RH
2449 /* Regardless of whether we have a whole vector shift, if we're
2450 emulating the operation via tree-vect-generic, we don't want
2451 to use it. Only the first round of the reduction is likely
2452 to still be profitable via emulation. */
2453 /* ??? It might be better to emit a reduction tree code here, so that
2454 tree-vect-generic can expand the first round via bit tricks. */
2455 if (!VECTOR_MODE_P (mode))
2456 have_whole_vector_shift = false;
2457 else
2458 {
2459 optab optab = optab_for_tree_code (code, vectype);
166cdb08 2460 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
afc1ab61
RH
2461 have_whole_vector_shift = false;
2462 }
2463
a6b46ba2
DN
2464 if (have_whole_vector_shift)
2465 {
20f06221 2466 /*** Case 2: Create:
a6b46ba2
DN
2467 for (offset = VS/2; offset >= element_size; offset/=2)
2468 {
2469 Create: va' = vec_shift <va, offset>
2470 Create: va = vop <va, va'>
2471 } */
2472
00518cb1 2473 if (vect_print_dump_info (REPORT_DETAILS))
a6b46ba2
DN
2474 fprintf (vect_dump, "Reduce using vector shifts");
2475
2476 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2477 new_temp = PHI_RESULT (new_phi);
2478
2479 for (bit_offset = vec_size_in_bits/2;
2480 bit_offset >= element_bitsize;
2481 bit_offset /= 2)
2482 {
2483 tree bitpos = size_int (bit_offset);
ebb07520
RS
2484 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
2485 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
a6b46ba2 2486 new_name = make_ssa_name (vec_dest, epilog_stmt);
07beea0d 2487 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
8b11009b 2488 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2 2489
ebb07520
RS
2490 tmp = build2 (code, vectype, new_name, new_temp);
2491 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
a6b46ba2 2492 new_temp = make_ssa_name (vec_dest, epilog_stmt);
07beea0d 2493 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
8b11009b 2494 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2
DN
2495 }
2496
2497 extract_scalar_result = true;
a6b46ba2
DN
2498 }
2499 else
2500 {
429268fc
DN
2501 tree rhs;
2502
20f06221 2503 /*** Case 3: Create:
429268fc 2504 s = extract_field <v_out2, 0>
20f06221
DN
2505 for (offset = element_size;
2506 offset < vector_size;
2507 offset += element_size;)
a6b46ba2
DN
2508 {
2509 Create: s' = extract_field <v_out2, offset>
2510 Create: s = op <s, s'>
2511 } */
2512
00518cb1 2513 if (vect_print_dump_info (REPORT_DETAILS))
a6b46ba2
DN
2514 fprintf (vect_dump, "Reduce using scalar code. ");
2515
2516 vec_temp = PHI_RESULT (new_phi);
2517 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
429268fc
DN
2518 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2519 bitsize_zero_node);
429268fc 2520 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
ebb07520 2521 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
429268fc 2522 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
07beea0d 2523 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
8b11009b 2524 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2 2525
429268fc 2526 for (bit_offset = element_bitsize;
a6b46ba2
DN
2527 bit_offset < vec_size_in_bits;
2528 bit_offset += element_bitsize)
2529 {
ebb07520 2530 tree tmp;
a6b46ba2 2531 tree bitpos = bitsize_int (bit_offset);
0ed414a4
DN
2532 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2533 bitpos);
2534
2535 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
8115817b 2536 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
a6b46ba2 2537 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
07beea0d 2538 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
8b11009b 2539 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2 2540
ebb07520
RS
2541 tmp = build2 (code, scalar_type, new_name, new_temp);
2542 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
a6b46ba2 2543 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
07beea0d 2544 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
8b11009b 2545 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2
DN
2546 }
2547
2548 extract_scalar_result = false;
a6b46ba2
DN
2549 }
2550 }
61d3cdbb 2551
20f06221 2552 /* 2.4 Extract the final scalar result. Create:
a6b46ba2 2553 s_out3 = extract_field <v_out2, bitpos> */
61d3cdbb 2554
a6b46ba2
DN
2555 if (extract_scalar_result)
2556 {
0ed414a4
DN
2557 tree rhs;
2558
d29de1bf 2559 gcc_assert (!nested_in_vect_loop);
00518cb1 2560 if (vect_print_dump_info (REPORT_DETAILS))
a6b46ba2
DN
2561 fprintf (vect_dump, "extract scalar result");
2562
578578a5 2563 if (BYTES_BIG_ENDIAN)
a6b46ba2
DN
2564 bitpos = size_binop (MULT_EXPR,
2565 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2566 TYPE_SIZE (scalar_type));
2567 else
2568 bitpos = bitsize_zero_node;
2569
0ed414a4
DN
2570 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2571 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
ebb07520 2572 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
a6b46ba2 2573 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
07beea0d 2574 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
8b11009b 2575 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2 2576 }
61d3cdbb 2577
d29de1bf
DN
2578vect_finalize_reduction:
2579
2580 /* 2.5 Adjust the final result by the initial value of the reduction
20f06221 2581 variable. (When such adjustment is not needed, then
d29de1bf
DN
2582 'adjustment_def' is zero). For example, if code is PLUS we create:
2583 new_temp = loop_exit_def + adjustment_def */
61d3cdbb 2584
d29de1bf 2585 if (adjustment_def)
a6b46ba2 2586 {
d29de1bf
DN
2587 if (nested_in_vect_loop)
2588 {
2589 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2590 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2591 new_dest = vect_create_destination_var (scalar_dest, vectype);
2592 }
2593 else
2594 {
2595 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2596 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2597 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2598 }
2599 epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
2600 new_temp = make_ssa_name (new_dest, epilog_stmt);
07beea0d 2601 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
8b11009b 2602 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
a6b46ba2 2603 }
61d3cdbb 2604
a6b46ba2 2605
d29de1bf
DN
2606 /* 2.6 Handle the loop-exit phi */
2607
2608 /* Replace uses of s_out0 with uses of s_out3:
2609 Find the loop-closed-use at the loop exit of the original scalar result.
20f06221
DN
2610 (The reduction result is expected to have two immediate uses - one at the
2611 latch block, and one at the loop exit). */
71f4a023 2612 phis = VEC_alloc (tree, heap, 10);
61d3cdbb
DN
2613 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2614 {
2615 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
a6b46ba2
DN
2616 {
2617 exit_phi = USE_STMT (use_p);
71f4a023 2618 VEC_quick_push (tree, phis, exit_phi);
a6b46ba2 2619 }
61d3cdbb 2620 }
20f06221 2621 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
71f4a023 2622 gcc_assert (!VEC_empty (tree, phis));
d29de1bf 2623
71f4a023 2624 for (i = 0; VEC_iterate (tree, phis, i, exit_phi); i++)
d29de1bf 2625 {
71f4a023
DN
2626 if (nested_in_vect_loop)
2627 {
2628 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
d29de1bf 2629
71f4a023
DN
2630 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2631 is not used in the outer-loop (but only outside the outer-loop). */
2632 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2633 && !STMT_VINFO_LIVE_P (stmt_vinfo));
d29de1bf 2634
71f4a023
DN
2635 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2636 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2637 set_stmt_info (get_stmt_ann (epilog_stmt),
2638 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2639 continue;
2640 }
d29de1bf 2641
71f4a023
DN
2642 /* Replace the uses: */
2643 orig_name = PHI_RESULT (exit_phi);
2644 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2645 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2646 SET_USE (use_p, new_temp);
d29de1bf 2647 }
71f4a023 2648 VEC_free (tree, heap, phis);
61d3cdbb
DN
2649}
2650
2651
2652/* Function vectorizable_reduction.
2653
2654 Check if STMT performs a reduction operation that can be vectorized.
2655 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2656 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
20f06221
DN
2657 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2658
2659 This function also handles reduction idioms (patterns) that have been
2660 recognized in advance during vect_pattern_recog. In this case, STMT may be
2661 of this form:
2662 X = pattern_expr (arg0, arg1, ..., X)
2663 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2664 sequence that had been detected and replaced by the pattern-stmt (STMT).
2665
8115817b 2666 In some cases of reduction patterns, the type of the reduction variable X is
20f06221
DN
2667 different than the type of the other arguments of STMT.
2668 In such cases, the vectype that is used when transforming STMT into a vector
8115817b 2669 stmt is different than the vectype that is used to determine the
20f06221
DN
2670 vectorization factor, because it consists of a different number of elements
2671 than the actual number of elements that are being operated upon in parallel.
2672
8115817b 2673 For example, consider an accumulation of shorts into an int accumulator.
20f06221
DN
2674 On some targets it's possible to vectorize this pattern operating on 8
2675 shorts at a time (hence, the vectype for purposes of determining the
2676 vectorization factor should be V8HI); on the other hand, the vectype that
8115817b 2677 is used to create the vector form is actually V4SI (the type of the result).
20f06221 2678
8115817b
UB
2679 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2680 indicates what is the actual level of parallelism (V8HI in the example), so
2681 that the right vectorization factor would be derived. This vectype
2682 corresponds to the type of arguments to the reduction stmt, and should *NOT*
20f06221 2683 be used to create the vectorized stmt. The right vectype for the vectorized
8115817b 2684 stmt is obtained from the type of the result X:
20f06221
DN
2685 get_vectype_for_scalar_type (TREE_TYPE (X))
2686
8115817b 2687 This means that, contrary to "regular" reductions (or "regular" stmts in
20f06221
DN
2688 general), the following equation:
2689 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2690 does *NOT* necessarily hold for reduction patterns. */
61d3cdbb
DN
2691
2692bool
2693vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2694{
2695 tree vec_dest;
2696 tree scalar_dest;
20f06221 2697 tree op;
89d67cca 2698 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
61d3cdbb
DN
2699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2700 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2701 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2702 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2703 tree operation;
20f06221 2704 enum tree_code code, orig_code, epilog_reduc_code = 0;
61d3cdbb
DN
2705 enum machine_mode vec_mode;
2706 int op_type;
2707 optab optab, reduc_optab;
89d67cca 2708 tree new_temp = NULL_TREE;
20f06221
DN
2709 tree def, def_stmt;
2710 enum vect_def_type dt;
61d3cdbb
DN
2711 tree new_phi;
2712 tree scalar_type;
20f06221
DN
2713 bool is_simple_use;
2714 tree orig_stmt;
2715 stmt_vec_info orig_stmt_info;
2716 tree expr = NULL_TREE;
2717 int i;
89d67cca
DN
2718 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2719 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2720 stmt_vec_info prev_stmt_info;
2721 tree reduc_def;
2722 tree new_stmt = NULL_TREE;
2723 int j;
2724
d29de1bf
DN
2725 if (nested_in_vect_loop_p (loop, stmt))
2726 {
2727 loop = loop->inner;
2728 /* FORNOW. This restriction should be relaxed. */
2729 if (ncopies > 1)
2730 {
2731 if (vect_print_dump_info (REPORT_DETAILS))
2732 fprintf (vect_dump, "multiple types in nested loop.");
2733 return false;
2734 }
2735 }
2736
89d67cca 2737 gcc_assert (ncopies >= 1);
61d3cdbb 2738
805e2059
IR
2739 /* FORNOW: SLP not supported. */
2740 if (STMT_SLP_TYPE (stmt_info))
2741 return false;
2742
20f06221 2743 /* 1. Is vectorizable reduction? */
61d3cdbb
DN
2744
2745 /* Not supportable if the reduction variable is used in the loop. */
d29de1bf 2746 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
61d3cdbb
DN
2747 return false;
2748
d29de1bf
DN
2749 /* Reductions that are not used even in an enclosing outer-loop,
2750 are expected to be "live" (used out of the loop). */
2751 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2752 && !STMT_VINFO_LIVE_P (stmt_info))
61d3cdbb
DN
2753 return false;
2754
20f06221 2755 /* Make sure it was already recognized as a reduction computation. */
61d3cdbb
DN
2756 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2757 return false;
2758
20f06221
DN
2759 /* 2. Has this been recognized as a reduction pattern?
2760
2761 Check if STMT represents a pattern that has been recognized
2762 in earlier analysis stages. For stmts that represent a pattern,
2763 the STMT_VINFO_RELATED_STMT field records the last stmt in
2764 the original sequence that constitutes the pattern. */
2765
2766 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2767 if (orig_stmt)
2768 {
2769 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2770 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2771 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2772 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2773 }
2774
2775 /* 3. Check the operands of the operation. The first operands are defined
2776 inside the loop body. The last operand is the reduction variable,
2777 which is defined by the loop-header-phi. */
2778
07beea0d 2779 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
61d3cdbb 2780
07beea0d 2781 operation = GIMPLE_STMT_OPERAND (stmt, 1);
61d3cdbb 2782 code = TREE_CODE (operation);
5039610b 2783 op_type = TREE_OPERAND_LENGTH (operation);
20f06221 2784 if (op_type != binary_op && op_type != ternary_op)
61d3cdbb 2785 return false;
07beea0d 2786 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
61d3cdbb 2787 scalar_type = TREE_TYPE (scalar_dest);
a0aa00d7
DN
2788 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
2789 && !SCALAR_FLOAT_TYPE_P (scalar_type))
2790 return false;
61d3cdbb 2791
20f06221
DN
2792 /* All uses but the last are expected to be defined in the loop.
2793 The last use is the reduction variable. */
2794 for (i = 0; i < op_type-1; i++)
2795 {
2796 op = TREE_OPERAND (operation, i);
2797 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2798 gcc_assert (is_simple_use);
fbf798fc
DN
2799 if (dt != vect_loop_def
2800 && dt != vect_invariant_def
2801 && dt != vect_constant_def
2802 && dt != vect_induction_def)
2803 return false;
20f06221 2804 }
61d3cdbb 2805
20f06221
DN
2806 op = TREE_OPERAND (operation, i);
2807 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2808 gcc_assert (is_simple_use);
2809 gcc_assert (dt == vect_reduction_def);
2810 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2811 if (orig_stmt)
d29de1bf 2812 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
20f06221 2813 else
d29de1bf 2814 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
20f06221
DN
2815
2816 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2817 return false;
61d3cdbb 2818
20f06221 2819 /* 4. Supportable by target? */
61d3cdbb 2820
20f06221 2821 /* 4.1. check support for the operation in the loop */
61d3cdbb
DN
2822 optab = optab_for_tree_code (code, vectype);
2823 if (!optab)
2824 {
00518cb1 2825 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
2826 fprintf (vect_dump, "no optab.");
2827 return false;
2828 }
2829 vec_mode = TYPE_MODE (vectype);
166cdb08 2830 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
61d3cdbb 2831 {
00518cb1 2832 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb 2833 fprintf (vect_dump, "op not supported by target.");
afc1ab61
RH
2834 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2835 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2836 < vect_min_worthwhile_factor (code))
2837 return false;
00518cb1 2838 if (vect_print_dump_info (REPORT_DETAILS))
afc1ab61
RH
2839 fprintf (vect_dump, "proceeding using word mode.");
2840 }
2841
2842 /* Worthwhile without SIMD support? */
2843 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2844 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2845 < vect_min_worthwhile_factor (code))
2846 {
00518cb1 2847 if (vect_print_dump_info (REPORT_DETAILS))
afc1ab61 2848 fprintf (vect_dump, "not worthwhile without SIMD support.");
61d3cdbb
DN
2849 return false;
2850 }
2851
20f06221
DN
2852 /* 4.2. Check support for the epilog operation.
2853
2854 If STMT represents a reduction pattern, then the type of the
2855 reduction variable may be different than the type of the rest
2856 of the arguments. For example, consider the case of accumulation
2857 of shorts into an int accumulator; The original code:
2858 S1: int_a = (int) short_a;
2859 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2860
2861 was replaced with:
2862 STMT: int_acc = widen_sum <short_a, int_acc>
2863
2864 This means that:
2865 1. The tree-code that is used to create the vector operation in the
2866 epilog code (that reduces the partial results) is not the
2867 tree-code of STMT, but is rather the tree-code of the original
2868 stmt from the pattern that STMT is replacing. I.e, in the example
2869 above we want to use 'widen_sum' in the loop, but 'plus' in the
2870 epilog.
2871 2. The type (mode) we use to check available target support
2872 for the vector operation to be created in the *epilog*, is
2873 determined by the type of the reduction variable (in the example
2874 above we'd check this: plus_optab[vect_int_mode]).
2875 However the type (mode) we use to check available target support
2876 for the vector operation to be created *inside the loop*, is
2877 determined by the type of the other arguments to STMT (in the
2878 example we'd check this: widen_sum_optab[vect_short_mode]).
2879
2880 This is contrary to "regular" reductions, in which the types of all
2881 the arguments are the same as the type of the reduction variable.
2882 For "regular" reductions we can therefore use the same vector type
2883 (and also the same tree-code) when generating the epilog code and
2884 when generating the code inside the loop. */
2885
2886 if (orig_stmt)
2887 {
2888 /* This is a reduction pattern: get the vectype from the type of the
2889 reduction variable, and get the tree-code from orig_stmt. */
07beea0d 2890 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
20f06221 2891 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
20e545c3
IR
2892 if (!vectype)
2893 {
2894 if (vect_print_dump_info (REPORT_DETAILS))
2895 {
2896 fprintf (vect_dump, "unsupported data-type ");
2897 print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
2898 }
2899 return false;
2900 }
2901
20f06221
DN
2902 vec_mode = TYPE_MODE (vectype);
2903 }
2904 else
2905 {
2906 /* Regular reduction: use the same vectype and tree-code as used for
2907 the vector code inside the loop can be used for the epilog code. */
2908 orig_code = code;
2909 }
2910
2911 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
61d3cdbb 2912 return false;
20f06221 2913 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
61d3cdbb
DN
2914 if (!reduc_optab)
2915 {
00518cb1 2916 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb 2917 fprintf (vect_dump, "no optab for reduction.");
20f06221 2918 epilog_reduc_code = NUM_TREE_CODES;
61d3cdbb 2919 }
166cdb08 2920 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
61d3cdbb 2921 {
00518cb1 2922 if (vect_print_dump_info (REPORT_DETAILS))
a6b46ba2 2923 fprintf (vect_dump, "reduc op not supported by target.");
20f06221 2924 epilog_reduc_code = NUM_TREE_CODES;
61d3cdbb
DN
2925 }
2926
2927 if (!vec_stmt) /* transformation not required. */
2928 {
2929 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
20e545c3
IR
2930 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
2931 return false;
61d3cdbb
DN
2932 return true;
2933 }
2934
2935 /** Transform. **/
2936
00518cb1 2937 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
2938 fprintf (vect_dump, "transform reduction.");
2939
2940 /* Create the destination vector */
2941 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2942
61d3cdbb
DN
2943 /* Create the reduction-phi that defines the reduction-operand. */
2944 new_phi = create_phi_node (vec_dest, loop->header);
2945
89d67cca
DN
2946 /* In case the vectorization factor (VF) is bigger than the number
2947 of elements that we can fit in a vectype (nunits), we have to generate
2948 more than one vector stmt - i.e - we need to "unroll" the
2949 vector stmt by a factor VF/nunits. For more details see documentation
2950 in vectorizable_operation. */
2951
2952 prev_stmt_info = NULL;
2953 for (j = 0; j < ncopies; j++)
20f06221 2954 {
89d67cca
DN
2955 /* Handle uses. */
2956 if (j == 0)
2957 {
2958 op = TREE_OPERAND (operation, 0);
2959 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2960 if (op_type == ternary_op)
2961 {
2962 op = TREE_OPERAND (operation, 1);
2963 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2964 }
8115817b 2965
89d67cca
DN
2966 /* Get the vector def for the reduction variable from the phi node */
2967 reduc_def = PHI_RESULT (new_phi);
2968 }
2969 else
2970 {
2971 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2972 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2973 if (op_type == ternary_op)
2974 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
8115817b 2975
89d67cca
DN
2976 /* Get the vector def for the reduction variable from the vectorized
2977 reduction operation generated in the previous iteration (j-1) */
07beea0d 2978 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
89d67cca 2979 }
8115817b 2980
89d67cca 2981 /* Arguments are ready. create the new vector stmt. */
89d67cca
DN
2982 if (op_type == binary_op)
2983 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2984 else
2985 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
8115817b 2986 reduc_def);
ebb07520 2987 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
89d67cca 2988 new_temp = make_ssa_name (vec_dest, new_stmt);
07beea0d 2989 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
89d67cca 2990 vect_finish_stmt_generation (stmt, new_stmt, bsi);
8115817b 2991
89d67cca
DN
2992 if (j == 0)
2993 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2994 else
2995 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2996 prev_stmt_info = vinfo_for_stmt (new_stmt);
20f06221 2997 }
8115817b 2998
61d3cdbb
DN
2999 /* Finalize the reduction-phi (set it's arguments) and create the
3000 epilog reduction code. */
8115817b 3001 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
61d3cdbb
DN
3002 return true;
3003}
3004
2505a3f2 3005/* Checks if CALL can be vectorized in type VECTYPE. Returns
b95becfc
RG
3006 a function declaration if the target has a vectorized version
3007 of the function, or NULL_TREE if the function cannot be vectorized. */
2505a3f2 3008
b95becfc
RG
3009tree
3010vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2505a3f2
RG
3011{
3012 tree fndecl = get_callee_fndecl (call);
b95becfc 3013 enum built_in_function code;
2505a3f2
RG
3014
3015 /* We only handle functions that do not read or clobber memory -- i.e.
3016 const or novops ones. */
3017 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
b95becfc 3018 return NULL_TREE;
2505a3f2
RG
3019
3020 if (!fndecl
3021 || TREE_CODE (fndecl) != FUNCTION_DECL
3022 || !DECL_BUILT_IN (fndecl))
b95becfc 3023 return NULL_TREE;
2505a3f2 3024
b95becfc
RG
3025 code = DECL_FUNCTION_CODE (fndecl);
3026 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
3027 vectype_in);
2505a3f2
RG
3028}
3029
3030/* Function vectorizable_call.
3031
3032 Check if STMT performs a function call that can be vectorized.
3033 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3034 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3035 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3036
3037bool
3038vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3039{
3040 tree vec_dest;
3041 tree scalar_dest;
3042 tree operation;
5039610b 3043 tree op, type;
b40c4f68 3044 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
b95becfc
RG
3045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3046 tree vectype_out, vectype_in;
b40c4f68
UB
3047 int nunits_in;
3048 int nunits_out;
2505a3f2 3049 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 3050 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
b95becfc 3051 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
3a70f3ef 3052 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b40c4f68 3053 tree new_stmt;
b95becfc 3054 int ncopies, j, nargs;
5039610b 3055 call_expr_arg_iterator iter;
b40c4f68
UB
3056 tree vargs;
3057 enum { NARROW, NONE, WIDEN } modifier;
2505a3f2 3058
60555ced
DN
3059 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3060 return false;
3061
3062 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3063 return false;
3064
805e2059
IR
3065 /* FORNOW: SLP not supported. */
3066 if (STMT_SLP_TYPE (stmt_info))
3067 return false;
3068
2505a3f2 3069 /* Is STMT a vectorizable call? */
07beea0d 3070 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2505a3f2
RG
3071 return false;
3072
07beea0d 3073 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2505a3f2
RG
3074 return false;
3075
07beea0d 3076 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2505a3f2
RG
3077 if (TREE_CODE (operation) != CALL_EXPR)
3078 return false;
b95becfc
RG
3079
3080 /* Process function arguments. */
3081 rhs_type = NULL_TREE;
5039610b
SL
3082 nargs = 0;
3083 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
b95becfc 3084 {
b95becfc
RG
3085 /* Bail out if the function has more than two arguments, we
3086 do not have interesting builtin functions to vectorize with
3087 more than two arguments. */
b40c4f68 3088 if (nargs >= 2)
b95becfc
RG
3089 return false;
3090
3091 /* We can only handle calls with arguments of the same type. */
3092 if (rhs_type
3093 && rhs_type != TREE_TYPE (op))
3094 {
3095 if (vect_print_dump_info (REPORT_DETAILS))
3096 fprintf (vect_dump, "argument types differ.");
3097 return false;
3098 }
3099 rhs_type = TREE_TYPE (op);
3100
b40c4f68 3101 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
b95becfc
RG
3102 {
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "use not simple.");
3105 return false;
3106 }
b40c4f68
UB
3107
3108 ++nargs;
b95becfc
RG
3109 }
3110
3111 /* No arguments is also not good. */
3112 if (nargs == 0)
3113 return false;
3114
3115 vectype_in = get_vectype_for_scalar_type (rhs_type);
6d3bf849
UB
3116 if (!vectype_in)
3117 return false;
b40c4f68 3118 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
b95becfc
RG
3119
3120 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
3121 vectype_out = get_vectype_for_scalar_type (lhs_type);
6d3bf849
UB
3122 if (!vectype_out)
3123 return false;
b40c4f68
UB
3124 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3125
3126 /* FORNOW */
3127 if (nunits_in == nunits_out / 2)
3128 modifier = NARROW;
3129 else if (nunits_out == nunits_in)
3130 modifier = NONE;
3131 else if (nunits_out == nunits_in / 2)
3132 modifier = WIDEN;
3133 else
b95becfc
RG
3134 return false;
3135
2505a3f2
RG
3136 /* For now, we only vectorize functions if a target specific builtin
3137 is available. TODO -- in some cases, it might be profitable to
3138 insert the calls for pieces of the vector, in order to be able
3139 to vectorize other operations in the loop. */
b95becfc
RG
3140 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
3141 if (fndecl == NULL_TREE)
2505a3f2
RG
3142 {
3143 if (vect_print_dump_info (REPORT_DETAILS))
3144 fprintf (vect_dump, "function is not vectorizable.");
3145
3146 return false;
3147 }
2505a3f2 3148
b95becfc 3149 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2505a3f2 3150
b40c4f68
UB
3151 if (modifier == NARROW)
3152 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3153 else
3154 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3155
3156 /* Sanity check: make sure that at least one copy of the vectorized stmt
3157 needs to be generated. */
3158 gcc_assert (ncopies >= 1);
792ed98b 3159
d29de1bf
DN
3160 /* FORNOW. This restriction should be relaxed. */
3161 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3162 {
3163 if (vect_print_dump_info (REPORT_DETAILS))
3164 fprintf (vect_dump, "multiple types in nested loop.");
3165 return false;
3166 }
3167
2505a3f2
RG
3168 if (!vec_stmt) /* transformation not required. */
3169 {
3170 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
792ed98b
HJ
3171 if (vect_print_dump_info (REPORT_DETAILS))
3172 fprintf (vect_dump, "=== vectorizable_call ===");
805e2059 3173 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2505a3f2
RG
3174 return true;
3175 }
3176
3177 /** Transform. **/
3178
3179 if (vect_print_dump_info (REPORT_DETAILS))
3180 fprintf (vect_dump, "transform operation.");
3181
d29de1bf
DN
3182 /* FORNOW. This restriction should be relaxed. */
3183 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3184 {
3185 if (vect_print_dump_info (REPORT_DETAILS))
3186 fprintf (vect_dump, "multiple types in nested loop.");
3187 return false;
3188 }
3189
2505a3f2 3190 /* Handle def. */
07beea0d 3191 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
b95becfc 3192 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2505a3f2 3193
b95becfc 3194 prev_stmt_info = NULL;
b40c4f68 3195 switch (modifier)
2505a3f2 3196 {
b40c4f68
UB
3197 case NONE:
3198 for (j = 0; j < ncopies; ++j)
b95becfc 3199 {
b40c4f68
UB
3200 /* Build argument list for the vectorized call. */
3201 /* FIXME: Rewrite this so that it doesn't
3202 construct a temporary list. */
3203 vargs = NULL_TREE;
3204 nargs = 0;
3205 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3206 {
3207 if (j == 0)
3208 vec_oprnd0
3209 = vect_get_vec_def_for_operand (op, stmt, NULL);
3210 else
3211 vec_oprnd0
3212 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3213
3214 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3215
3216 ++nargs;
3217 }
3218 vargs = nreverse (vargs);
3219
3220 rhs = build_function_call_expr (fndecl, vargs);
3221 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3222 new_temp = make_ssa_name (vec_dest, new_stmt);
3223 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3224
3225 vect_finish_stmt_generation (stmt, new_stmt, bsi);
b95becfc
RG
3226
3227 if (j == 0)
b40c4f68 3228 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
b95becfc 3229 else
b40c4f68 3230 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
b95becfc 3231
b40c4f68 3232 prev_stmt_info = vinfo_for_stmt (new_stmt);
b95becfc 3233 }
b95becfc 3234
b40c4f68 3235 break;
2505a3f2 3236
b40c4f68
UB
3237 case NARROW:
3238 for (j = 0; j < ncopies; ++j)
3239 {
3240 /* Build argument list for the vectorized call. */
3241 /* FIXME: Rewrite this so that it doesn't
3242 construct a temporary list. */
3243 vargs = NULL_TREE;
3244 nargs = 0;
3245 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3246 {
3247 if (j == 0)
3248 {
3249 vec_oprnd0
3250 = vect_get_vec_def_for_operand (op, stmt, NULL);
3251 vec_oprnd1
3252 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3253 }
3254 else
3255 {
3256 vec_oprnd0
3257 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3258 vec_oprnd1
3259 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3260 }
3261
3262 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3263 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
3264
3265 ++nargs;
3266 }
3267 vargs = nreverse (vargs);
2505a3f2 3268
b40c4f68
UB
3269 rhs = build_function_call_expr (fndecl, vargs);
3270 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3271 new_temp = make_ssa_name (vec_dest, new_stmt);
3272 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3273
3274 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3275
3276 if (j == 0)
3277 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3278 else
3279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3280
3281 prev_stmt_info = vinfo_for_stmt (new_stmt);
3282 }
3283
3284 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3285
3286 break;
3287
3288 case WIDEN:
3289 /* No current target implements this case. */
3290 return false;
b95becfc 3291 }
2505a3f2 3292
b40c4f68
UB
3293 /* The call in STMT might prevent it from being removed in dce.
3294 We however cannot remove it here, due to the way the ssa name
3295 it defines is mapped to the new definition. So just replace
3296 rhs of the statement with something harmless. */
2505a3f2 3297 type = TREE_TYPE (scalar_dest);
9f919563 3298 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
1344284e 3299 update_stmt (stmt);
2505a3f2
RG
3300
3301 return true;
3302}
3303
61d3cdbb 3304
d9987fb4
UB
3305/* Function vect_gen_widened_results_half
3306
3307 Create a vector stmt whose code, type, number of arguments, and result
3308 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3309 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3310 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3311 needs to be created (DECL is a function-decl of a target-builtin).
3312 STMT is the original scalar stmt that we are vectorizing. */
3313
3314static tree
3315vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
3316 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3317 tree vec_dest, block_stmt_iterator *bsi,
3318 tree stmt)
3319{
3320 tree expr;
3321 tree new_stmt;
3322 tree new_temp;
3323 tree sym;
3324 ssa_op_iter iter;
3325
3326 /* Generate half of the widened result: */
3327 if (code == CALL_EXPR)
3328 {
3329 /* Target specific support */
3330 if (op_type == binary_op)
3331 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
3332 else
3333 expr = build_call_expr (decl, 1, vec_oprnd0);
3334 }
3335 else
3336 {
3337 /* Generic support */
3338 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3339 if (op_type == binary_op)
3340 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
3341 else
3342 expr = build1 (code, vectype, vec_oprnd0);
3343 }
3344 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3345 new_temp = make_ssa_name (vec_dest, new_stmt);
3346 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3347 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3348
3349 if (code == CALL_EXPR)
3350 {
3351 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3352 {
3353 if (TREE_CODE (sym) == SSA_NAME)
3354 sym = SSA_NAME_VAR (sym);
3355 mark_sym_for_renaming (sym);
3356 }
3357 }
3358
3359 return new_stmt;
3360}
3361
3362
805e2059
IR
3363/* Check if STMT performs a conversion operation, that can be vectorized.
3364 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3365 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3366 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
f57d17f1
TM
3367
3368bool
805e2059
IR
3369vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
3370 tree *vec_stmt, slp_tree slp_node)
f57d17f1
TM
3371{
3372 tree vec_dest;
3373 tree scalar_dest;
3374 tree operation;
3375 tree op0;
d9987fb4 3376 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
f57d17f1
TM
3377 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3378 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 3379 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8ff43db0 3380 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
d9987fb4 3381 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
f57d17f1
TM
3382 tree new_temp;
3383 tree def, def_stmt;
805e2059
IR
3384 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3385 tree new_stmt = NULL_TREE;
d9987fb4 3386 stmt_vec_info prev_stmt_info;
f57d17f1
TM
3387 int nunits_in;
3388 int nunits_out;
f57d17f1 3389 tree vectype_out, vectype_in;
d9987fb4
UB
3390 int ncopies, j;
3391 tree expr;
f57d17f1 3392 tree rhs_type, lhs_type;
5039610b 3393 tree builtin_decl;
d9987fb4 3394 enum { NARROW, NONE, WIDEN } modifier;
805e2059
IR
3395 int i;
3396 VEC(tree,heap) *vec_oprnds0 = NULL;
3397 tree vop0;
f57d17f1
TM
3398
3399 /* Is STMT a vectorizable conversion? */
3400
3401 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3402 return false;
3403
60555ced
DN
3404 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3405 return false;
f57d17f1 3406
f57d17f1
TM
3407 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3408 return false;
3409
3410 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3411 return false;
3412
3413 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3414 code = TREE_CODE (operation);
3415 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3416 return false;
3417
805e2059 3418 /* Check types of lhs and rhs. */
f57d17f1
TM
3419 op0 = TREE_OPERAND (operation, 0);
3420 rhs_type = TREE_TYPE (op0);
3421 vectype_in = get_vectype_for_scalar_type (rhs_type);
4934454b
DN
3422 if (!vectype_in)
3423 return false;
f57d17f1
TM
3424 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3425
3426 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3427 lhs_type = TREE_TYPE (scalar_dest);
3428 vectype_out = get_vectype_for_scalar_type (lhs_type);
4934454b
DN
3429 if (!vectype_out)
3430 return false;
f57d17f1
TM
3431 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3432
d9987fb4
UB
3433 /* FORNOW */
3434 if (nunits_in == nunits_out / 2)
3435 modifier = NARROW;
3436 else if (nunits_out == nunits_in)
3437 modifier = NONE;
3438 else if (nunits_out == nunits_in / 2)
3439 modifier = WIDEN;
3440 else
f57d17f1
TM
3441 return false;
3442
d9987fb4
UB
3443 if (modifier == NONE)
3444 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3445
805e2059 3446 /* Bail out if the types are both integral or non-integral. */
f57d17f1
TM
3447 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3448 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3449 return false;
3450
d9987fb4
UB
3451 if (modifier == NARROW)
3452 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3453 else
3454 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3455
805e2059
IR
3456 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3457 this, so we can safely override NCOPIES with 1 here. */
3458 if (slp_node)
3459 ncopies = 1;
3460
f57d17f1
TM
3461 /* Sanity check: make sure that at least one copy of the vectorized stmt
3462 needs to be generated. */
f57d17f1
TM
3463 gcc_assert (ncopies >= 1);
3464
d29de1bf
DN
3465 /* FORNOW. This restriction should be relaxed. */
3466 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3467 {
3468 if (vect_print_dump_info (REPORT_DETAILS))
3469 fprintf (vect_dump, "multiple types in nested loop.");
3470 return false;
3471 }
3472
d9987fb4 3473 /* Check the operands of the operation. */
805e2059 3474 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
f57d17f1
TM
3475 {
3476 if (vect_print_dump_info (REPORT_DETAILS))
3477 fprintf (vect_dump, "use not simple.");
3478 return false;
3479 }
3480
3481 /* Supportable by target? */
d9987fb4
UB
3482 if ((modifier == NONE
3483 && !targetm.vectorize.builtin_conversion (code, vectype_in))
3484 || (modifier == WIDEN
3485 && !supportable_widening_operation (code, stmt, vectype_in,
3486 &decl1, &decl2,
3487 &code1, &code2))
3488 || (modifier == NARROW
3489 && !supportable_narrowing_operation (code, stmt, vectype_in,
3490 &code1)))
f57d17f1
TM
3491 {
3492 if (vect_print_dump_info (REPORT_DETAILS))
3493 fprintf (vect_dump, "op not supported by target.");
3494 return false;
3495 }
3496
d9987fb4 3497 if (modifier != NONE)
805e2059
IR
3498 {
3499 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3500 /* FORNOW: SLP not supported. */
3501 if (STMT_SLP_TYPE (stmt_info))
3502 return false;
3503 }
d9987fb4 3504
f57d17f1
TM
3505 if (!vec_stmt) /* transformation not required. */
3506 {
3507 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3508 return true;
3509 }
3510
d9987fb4 3511 /** Transform. **/
f57d17f1
TM
3512 if (vect_print_dump_info (REPORT_DETAILS))
3513 fprintf (vect_dump, "transform conversion.");
3514
3515 /* Handle def. */
3516 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3517
805e2059
IR
3518 if (modifier == NONE && !slp_node)
3519 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3520
f57d17f1 3521 prev_stmt_info = NULL;
d9987fb4 3522 switch (modifier)
f57d17f1 3523 {
d9987fb4
UB
3524 case NONE:
3525 for (j = 0; j < ncopies; j++)
3526 {
3527 tree sym;
3528 ssa_op_iter iter;
f57d17f1 3529
d9987fb4 3530 if (j == 0)
805e2059 3531 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
d9987fb4 3532 else
805e2059 3533 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
d9987fb4
UB
3534
3535 builtin_decl =
3536 targetm.vectorize.builtin_conversion (code, vectype_in);
805e2059
IR
3537 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3538 {
3539 new_stmt = build_call_expr (builtin_decl, 1, vop0);
d9987fb4 3540
805e2059
IR
3541 /* Arguments are ready. create the new vector stmt. */
3542 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3543 new_temp = make_ssa_name (vec_dest, new_stmt);
3544 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3545 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3546 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3547 SSA_OP_ALL_VIRTUALS)
3548 {
3549 if (TREE_CODE (sym) == SSA_NAME)
3550 sym = SSA_NAME_VAR (sym);
3551 mark_sym_for_renaming (sym);
3552 }
3553 if (slp_node)
3554 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
d9987fb4 3555 }
f57d17f1 3556
d9987fb4
UB
3557 if (j == 0)
3558 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3559 else
3560 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3561 prev_stmt_info = vinfo_for_stmt (new_stmt);
3562 }
3563 break;
3564
3565 case WIDEN:
3566 /* In case the vectorization factor (VF) is bigger than the number
3567 of elements that we can fit in a vectype (nunits), we have to
3568 generate more than one vector stmt - i.e - we need to "unroll"
3569 the vector stmt by a factor VF/nunits. */
3570 for (j = 0; j < ncopies; j++)
3571 {
3572 if (j == 0)
3573 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3574 else
805e2059 3575 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
f57d17f1 3576
d9987fb4 3577 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
f57d17f1 3578
d9987fb4
UB
3579 /* Generate first half of the widened result: */
3580 new_stmt
3581 = vect_gen_widened_results_half (code1, vectype_out, decl1,
3582 vec_oprnd0, vec_oprnd1,
3583 unary_op, vec_dest, bsi, stmt);
3584 if (j == 0)
3585 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3586 else
3587 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3588 prev_stmt_info = vinfo_for_stmt (new_stmt);
3589
3590 /* Generate second half of the widened result: */
3591 new_stmt
3592 = vect_gen_widened_results_half (code2, vectype_out, decl2,
3593 vec_oprnd0, vec_oprnd1,
3594 unary_op, vec_dest, bsi, stmt);
3595 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3596 prev_stmt_info = vinfo_for_stmt (new_stmt);
3597 }
3598 break;
3599
3600 case NARROW:
3601 /* In case the vectorization factor (VF) is bigger than the number
3602 of elements that we can fit in a vectype (nunits), we have to
3603 generate more than one vector stmt - i.e - we need to "unroll"
3604 the vector stmt by a factor VF/nunits. */
3605 for (j = 0; j < ncopies; j++)
3606 {
3607 /* Handle uses. */
3608 if (j == 0)
3609 {
3610 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
805e2059 3611 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
d9987fb4
UB
3612 }
3613 else
3614 {
805e2059
IR
3615 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3616 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
d9987fb4
UB
3617 }
3618
3619 /* Arguments are ready. Create the new vector stmt. */
3620 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3621 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3622 new_temp = make_ssa_name (vec_dest, new_stmt);
3623 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3624 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3625
3626 if (j == 0)
3627 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3628 else
3629 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3630
3631 prev_stmt_info = vinfo_for_stmt (new_stmt);
3632 }
3633
3634 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
f57d17f1 3635 }
805e2059 3636
f57d17f1
TM
3637 return true;
3638}
3639
3640
f7064d11
DN
3641/* Function vectorizable_assignment.
3642
3643 Check if STMT performs an assignment (copy) that can be vectorized.
3644 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3645 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3646 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3647
3648bool
805e2059
IR
3649vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3650 slp_tree slp_node)
f7064d11
DN
3651{
3652 tree vec_dest;
3653 tree scalar_dest;
3654 tree op;
f7064d11
DN
3655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3656 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3657 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3658 tree new_temp;
88088c03 3659 tree def, def_stmt;
3a70f3ef 3660 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
89d67cca
DN
3661 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3662 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
805e2059
IR
3663 int i;
3664 VEC(tree,heap) *vec_oprnds = NULL;
3665 tree vop;
89d67cca
DN
3666
3667 gcc_assert (ncopies >= 1);
3668 if (ncopies > 1)
3669 return false; /* FORNOW */
f7064d11 3670
88088c03
DN
3671 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3672 return false;
3673
60555ced
DN
3674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3675 return false;
3676
60555ced 3677 /* Is vectorizable assignment? */
07beea0d 3678 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
f7064d11
DN
3679 return false;
3680
07beea0d 3681 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
f7064d11
DN
3682 if (TREE_CODE (scalar_dest) != SSA_NAME)
3683 return false;
3684
07beea0d 3685 op = GIMPLE_STMT_OPERAND (stmt, 1);
3a70f3ef 3686 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
f7064d11 3687 {
00518cb1 3688 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
3689 fprintf (vect_dump, "use not simple.");
3690 return false;
3691 }
3692
3693 if (!vec_stmt) /* transformation not required. */
3694 {
3695 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
792ed98b
HJ
3696 if (vect_print_dump_info (REPORT_DETAILS))
3697 fprintf (vect_dump, "=== vectorizable_assignment ===");
805e2059 3698 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
f7064d11
DN
3699 return true;
3700 }
3701
3702 /** Transform. **/
00518cb1 3703 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
3704 fprintf (vect_dump, "transform assignment.");
3705
3706 /* Handle def. */
3707 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3708
3709 /* Handle use. */
805e2059 3710 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f7064d11
DN
3711
3712 /* Arguments are ready. create the new vector stmt. */
805e2059
IR
3713 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3714 {
3715 *vec_stmt = build_gimple_modify_stmt (vec_dest, vop);
3716 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3717 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3718 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3719 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3720
3721 if (slp_node)
3722 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3723 }
f7064d11 3724
805e2059 3725 VEC_free (tree, heap, vec_oprnds);
f7064d11
DN
3726 return true;
3727}
3728
3729
c4336539
PB
3730/* Function vect_min_worthwhile_factor.
3731
3732 For a loop where we could vectorize the operation indicated by CODE,
3733 return the minimum vectorization factor that makes it worthwhile
3734 to use generic vectors. */
3735static int
3736vect_min_worthwhile_factor (enum tree_code code)
3737{
3738 switch (code)
3739 {
3740 case PLUS_EXPR:
3741 case MINUS_EXPR:
3742 case NEGATE_EXPR:
3743 return 4;
3744
3745 case BIT_AND_EXPR:
3746 case BIT_IOR_EXPR:
3747 case BIT_XOR_EXPR:
3748 case BIT_NOT_EXPR:
3749 return 2;
3750
3751 default:
3752 return INT_MAX;
3753 }
3754}
3755
88088c03 3756
cd38ca7f
DN
3757/* Function vectorizable_induction
3758
3759 Check if PHI performs an induction computation that can be vectorized.
3760 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3761 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3762 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3763
3764bool
3765vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3766 tree *vec_stmt)
3767{
3768 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3769 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3770 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3771 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3772 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3773 tree vec_def;
3774
3775 gcc_assert (ncopies >= 1);
3776
3777 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3778 return false;
3779
805e2059
IR
3780 /* FORNOW: SLP not supported. */
3781 if (STMT_SLP_TYPE (stmt_info))
3782 return false;
3783
cd38ca7f
DN
3784 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3785
cd38ca7f
DN
3786 if (TREE_CODE (phi) != PHI_NODE)
3787 return false;
3788
3789 if (!vec_stmt) /* transformation not required. */
3790 {
3791 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
792ed98b
HJ
3792 if (vect_print_dump_info (REPORT_DETAILS))
3793 fprintf (vect_dump, "=== vectorizable_induction ===");
3794 vect_model_induction_cost (stmt_info, ncopies);
cd38ca7f
DN
3795 return true;
3796 }
3797
3798 /** Transform. **/
3799
3800 if (vect_print_dump_info (REPORT_DETAILS))
3801 fprintf (vect_dump, "transform induction phi.");
3802
3803 vec_def = get_initial_def_for_induction (phi);
3804 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3805 return true;
3806}
3807
3808
f7064d11
DN
3809/* Function vectorizable_operation.
3810
3811 Check if STMT performs a binary or unary operation that can be vectorized.
3812 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3813 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3814 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3815
3816bool
805e2059
IR
3817vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3818 slp_tree slp_node)
f7064d11
DN
3819{
3820 tree vec_dest;
3821 tree scalar_dest;
3822 tree operation;
3823 tree op0, op1 = NULL;
805e2059 3824 tree vec_oprnd1 = NULL_TREE;
f7064d11
DN
3825 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3827 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 3828 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
f7064d11
DN
3829 enum tree_code code;
3830 enum machine_mode vec_mode;
3831 tree new_temp;
3832 int op_type;
f7064d11 3833 optab optab;
b2d16a23
UB
3834 int icode;
3835 enum machine_mode optab_op2_mode;
88088c03 3836 tree def, def_stmt;
3a70f3ef 3837 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
805e2059 3838 tree new_stmt = NULL_TREE;
89d67cca
DN
3839 stmt_vec_info prev_stmt_info;
3840 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3841 int nunits_out;
3842 tree vectype_out;
3843 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
805e2059
IR
3844 int j, i;
3845 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3846 tree vop0, vop1;
50d76c24
IR
3847 unsigned int k;
3848 bool scalar_shift_arg = false;
805e2059
IR
3849
3850 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3851 this, so we can safely override NCOPIES with 1 here. */
3852 if (slp_node)
3853 ncopies = 1;
89d67cca 3854 gcc_assert (ncopies >= 1);
d29de1bf
DN
3855 /* FORNOW. This restriction should be relaxed. */
3856 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3857 {
3858 if (vect_print_dump_info (REPORT_DETAILS))
3859 fprintf (vect_dump, "multiple types in nested loop.");
3860 return false;
3861 }
f7064d11 3862
88088c03
DN
3863 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3864 return false;
3865
60555ced
DN
3866 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3867 return false;
88088c03 3868
60555ced 3869 /* Is STMT a vectorizable binary/unary operation? */
07beea0d 3870 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
f7064d11
DN
3871 return false;
3872
07beea0d 3873 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
f7064d11
DN
3874 return false;
3875
07beea0d 3876 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
89d67cca 3877 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
6d3bf849
UB
3878 if (!vectype_out)
3879 return false;
89d67cca
DN
3880 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3881 if (nunits_out != nunits_in)
3882 return false;
3883
07beea0d 3884 operation = GIMPLE_STMT_OPERAND (stmt, 1);
f7064d11 3885 code = TREE_CODE (operation);
2caf766b
AP
3886
3887 /* For pointer addition, we should use the normal plus for
3888 the vector addition. */
3889 if (code == POINTER_PLUS_EXPR)
3890 code = PLUS_EXPR;
3891
f7064d11
DN
3892 optab = optab_for_tree_code (code, vectype);
3893
3894 /* Support only unary or binary operations. */
5039610b 3895 op_type = TREE_OPERAND_LENGTH (operation);
f7064d11
DN
3896 if (op_type != unary_op && op_type != binary_op)
3897 {
00518cb1 3898 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
3899 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3900 return false;
3901 }
3902
89d67cca 3903 op0 = TREE_OPERAND (operation, 0);
3a70f3ef 3904 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
f7064d11 3905 {
89d67cca
DN
3906 if (vect_print_dump_info (REPORT_DETAILS))
3907 fprintf (vect_dump, "use not simple.");
3908 return false;
3909 }
8115817b 3910
89d67cca
DN
3911 if (op_type == binary_op)
3912 {
3913 op1 = TREE_OPERAND (operation, 1);
3a70f3ef 3914 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
f7064d11 3915 {
00518cb1 3916 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
3917 fprintf (vect_dump, "use not simple.");
3918 return false;
89d67cca
DN
3919 }
3920 }
f7064d11
DN
3921
3922 /* Supportable by target? */
3923 if (!optab)
3924 {
00518cb1 3925 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
3926 fprintf (vect_dump, "no optab.");
3927 return false;
3928 }
3929 vec_mode = TYPE_MODE (vectype);
166cdb08 3930 icode = (int) optab_handler (optab, vec_mode)->insn_code;
b2d16a23 3931 if (icode == CODE_FOR_nothing)
f7064d11 3932 {
00518cb1 3933 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11 3934 fprintf (vect_dump, "op not supported by target.");
712f1172 3935 /* Check only during analysis. */
598b2024 3936 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
712f1172
IR
3937 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3938 < vect_min_worthwhile_factor (code)
3939 && !vec_stmt))
598b2024 3940 return false;
00518cb1 3941 if (vect_print_dump_info (REPORT_DETAILS))
598b2024 3942 fprintf (vect_dump, "proceeding using word mode.");
f7064d11
DN
3943 }
3944
712f1172 3945 /* Worthwhile without SIMD support? Check only during analysis. */
c4336539
PB
3946 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3947 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
712f1172
IR
3948 < vect_min_worthwhile_factor (code)
3949 && !vec_stmt)
c4336539 3950 {
00518cb1 3951 if (vect_print_dump_info (REPORT_DETAILS))
c4336539
PB
3952 fprintf (vect_dump, "not worthwhile without SIMD support.");
3953 return false;
3954 }
3955
b2d16a23
UB
3956 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3957 {
3958 /* FORNOW: not yet supported. */
3959 if (!VECTOR_MODE_P (vec_mode))
3960 return false;
3961
3962 /* Invariant argument is needed for a vector shift
3963 by a scalar shift operand. */
3964 optab_op2_mode = insn_data[icode].operand[2].mode;
50d76c24 3965 if (!VECTOR_MODE_P (optab_op2_mode))
b2d16a23 3966 {
50d76c24
IR
3967 if (dt[1] != vect_constant_def && dt[1] != vect_invariant_def)
3968 {
3969 if (vect_print_dump_info (REPORT_DETAILS))
3970 fprintf (vect_dump, "operand mode requires invariant"
3971 " argument.");
3972 return false;
3973 }
3974
3975 scalar_shift_arg = true;
3976 }
b2d16a23
UB
3977 }
3978
f7064d11
DN
3979 if (!vec_stmt) /* transformation not required. */
3980 {
3981 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
792ed98b
HJ
3982 if (vect_print_dump_info (REPORT_DETAILS))
3983 fprintf (vect_dump, "=== vectorizable_operation ===");
805e2059 3984 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
f7064d11
DN
3985 return true;
3986 }
3987
3988 /** Transform. **/
3989
00518cb1 3990 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
3991 fprintf (vect_dump, "transform binary/unary operation.");
3992
3993 /* Handle def. */
f7064d11
DN
3994 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3995
50d76c24
IR
3996 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3997 created in the previous stages of the recursion, so no allocation is
3998 needed, except for the case of shift with scalar shift argument. In that
3999 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
4000 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
4001 In case of loop-based vectorization we allocate VECs of size 1. We
4002 allocate VEC_OPRNDS1 only in case of binary operation. */
805e2059 4003 if (!slp_node)
50d76c24
IR
4004 {
4005 vec_oprnds0 = VEC_alloc (tree, heap, 1);
4006 if (op_type == binary_op)
4007 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4008 }
4009 else if (scalar_shift_arg)
4010 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
805e2059 4011
89d67cca
DN
4012 /* In case the vectorization factor (VF) is bigger than the number
4013 of elements that we can fit in a vectype (nunits), we have to generate
4014 more than one vector stmt - i.e - we need to "unroll" the
4015 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4016 from one copy of the vector stmt to the next, in the field
4017 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4018 stages to find the correct vector defs to be used when vectorizing
4019 stmts that use the defs of the current stmt. The example below illustrates
4020 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4021 4 vectorized stmts):
8115817b 4022
89d67cca
DN
4023 before vectorization:
4024 RELATED_STMT VEC_STMT
4025 S1: x = memref - -
4026 S2: z = x + 1 - -
8115817b 4027
89d67cca
DN
4028 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4029 there):
4030 RELATED_STMT VEC_STMT
4031 VS1_0: vx0 = memref0 VS1_1 -
4032 VS1_1: vx1 = memref1 VS1_2 -
4033 VS1_2: vx2 = memref2 VS1_3 -
4034 VS1_3: vx3 = memref3 - -
4035 S1: x = load - VS1_0
4036 S2: z = x + 1 - -
8115817b 4037
89d67cca
DN
4038 step2: vectorize stmt S2 (done here):
4039 To vectorize stmt S2 we first need to find the relevant vector
4040 def for the first operand 'x'. This is, as usual, obtained from
4041 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4042 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4043 relevant vector def 'vx0'. Having found 'vx0' we can generate
4044 the vector stmt VS2_0, and as usual, record it in the
4045 STMT_VINFO_VEC_STMT of stmt S2.
4046 When creating the second copy (VS2_1), we obtain the relevant vector
4047 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4048 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4049 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4050 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4051 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4052 chain of stmts and pointers:
4053 RELATED_STMT VEC_STMT
4054 VS1_0: vx0 = memref0 VS1_1 -
4055 VS1_1: vx1 = memref1 VS1_2 -
4056 VS1_2: vx2 = memref2 VS1_3 -
4057 VS1_3: vx3 = memref3 - -
4058 S1: x = load - VS1_0
4059 VS2_0: vz0 = vx0 + v1 VS2_1 -
4060 VS2_1: vz1 = vx1 + v1 VS2_2 -
4061 VS2_2: vz2 = vx2 + v1 VS2_3 -
4062 VS2_3: vz3 = vx3 + v1 - -
4063 S2: z = x + 1 - VS2_0 */
8115817b 4064
89d67cca
DN
4065 prev_stmt_info = NULL;
4066 for (j = 0; j < ncopies; j++)
4067 {
4068 /* Handle uses. */
4069 if (j == 0)
4070 {
805e2059 4071 if (op_type == binary_op
f8f8fee8 4072 && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
89d67cca 4073 {
805e2059
IR
4074 /* Vector shl and shr insn patterns can be defined with scalar
4075 operand 2 (shift operand). In this case, use constant or loop
4076 invariant op1 directly, without extending it to vector mode
4077 first. */
4078 optab_op2_mode = insn_data[icode].operand[2].mode;
4079 if (!VECTOR_MODE_P (optab_op2_mode))
4080 {
4081 if (vect_print_dump_info (REPORT_DETAILS))
4082 fprintf (vect_dump, "operand 1 using scalar mode.");
4083 vec_oprnd1 = op1;
4084 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
50d76c24
IR
4085 if (slp_node)
4086 {
4087 /* Store vec_oprnd1 for every vector stmt to be created
4088 for SLP_NODE. We check during the analysis that all the
4089 shift arguments are the same.
4090 TODO: Allow different constants for different vector
4091 stmts generated for an SLP instance. */
4092 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4093 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
4094 }
805e2059 4095 }
89d67cca 4096 }
805e2059 4097
50d76c24 4098 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
f8f8fee8
IR
4099 (a special case for certain kind of vector shifts); otherwise,
4100 operand 1 should be of a vector type (the usual case). */
805e2059
IR
4101 if (op_type == binary_op && !vec_oprnd1)
4102 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4103 slp_node);
4104 else
f8f8fee8 4105 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
805e2059 4106 slp_node);
89d67cca
DN
4107 }
4108 else
805e2059 4109 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
89d67cca 4110
805e2059
IR
4111 /* Arguments are ready. Create the new vector stmt. */
4112 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
4113 {
4114 if (op_type == binary_op)
4115 {
4116 vop1 = VEC_index (tree, vec_oprnds1, i);
4117 new_stmt = build_gimple_modify_stmt (vec_dest,
4118 build2 (code, vectype, vop0, vop1));
4119 }
4120 else
4121 new_stmt = build_gimple_modify_stmt (vec_dest,
4122 build1 (code, vectype, vop0));
8115817b 4123
805e2059
IR
4124 new_temp = make_ssa_name (vec_dest, new_stmt);
4125 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4126 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4127 if (slp_node)
4128 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4129 }
8115817b 4130
89d67cca
DN
4131 if (j == 0)
4132 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4133 else
4134 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4135 prev_stmt_info = vinfo_for_stmt (new_stmt);
4136 }
4137
805e2059
IR
4138 VEC_free (tree, heap, vec_oprnds0);
4139 if (vec_oprnds1)
4140 VEC_free (tree, heap, vec_oprnds1);
4141
89d67cca
DN
4142 return true;
4143}
4144
4145
4146/* Function vectorizable_type_demotion
8115817b 4147
89d67cca
DN
4148 Check if STMT performs a binary or unary operation that involves
4149 type demotion, and if it can be vectorized.
4150 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4151 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4152 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8115817b 4153
89d67cca
DN
4154bool
4155vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
d9987fb4 4156 tree *vec_stmt)
89d67cca
DN
4157{
4158 tree vec_dest;
4159 tree scalar_dest;
4160 tree operation;
4161 tree op0;
4162 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4164 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 4165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8ff43db0 4166 enum tree_code code, code1 = ERROR_MARK;
89d67cca
DN
4167 tree new_temp;
4168 tree def, def_stmt;
3a70f3ef 4169 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
89d67cca
DN
4170 tree new_stmt;
4171 stmt_vec_info prev_stmt_info;
4172 int nunits_in;
4173 int nunits_out;
4174 tree vectype_out;
4175 int ncopies;
4176 int j;
4177 tree expr;
4178 tree vectype_in;
8115817b 4179
89d67cca
DN
4180 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4181 return false;
60555ced
DN
4182
4183 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4184 return false;
4185
60555ced 4186 /* Is STMT a vectorizable type-demotion operation? */
07beea0d 4187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
89d67cca 4188 return false;
8115817b 4189
07beea0d 4190 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
89d67cca 4191 return false;
8115817b 4192
07beea0d 4193 operation = GIMPLE_STMT_OPERAND (stmt, 1);
89d67cca
DN
4194 code = TREE_CODE (operation);
4195 if (code != NOP_EXPR && code != CONVERT_EXPR)
4196 return false;
8115817b 4197
89d67cca
DN
4198 op0 = TREE_OPERAND (operation, 0);
4199 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4934454b
DN
4200 if (!vectype_in)
4201 return false;
89d67cca 4202 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
8115817b 4203
07beea0d 4204 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
459e691a 4205 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4934454b
DN
4206 if (!vectype_out)
4207 return false;
89d67cca
DN
4208 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4209 if (nunits_in != nunits_out / 2) /* FORNOW */
4210 return false;
8115817b 4211
89d67cca
DN
4212 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4213 gcc_assert (ncopies >= 1);
d29de1bf
DN
4214 /* FORNOW. This restriction should be relaxed. */
4215 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4216 {
4217 if (vect_print_dump_info (REPORT_DETAILS))
4218 fprintf (vect_dump, "multiple types in nested loop.");
4219 return false;
4220 }
878aa817 4221
8115817b
UB
4222 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4223 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4224 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4225 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4226 && (code == NOP_EXPR || code == CONVERT_EXPR))))
878aa817 4227 return false;
8115817b 4228
89d67cca 4229 /* Check the operands of the operation. */
3a70f3ef 4230 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
89d67cca
DN
4231 {
4232 if (vect_print_dump_info (REPORT_DETAILS))
4233 fprintf (vect_dump, "use not simple.");
4234 return false;
4235 }
8115817b 4236
89d67cca 4237 /* Supportable by target? */
d9987fb4 4238 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
89d67cca 4239 return false;
8115817b 4240
89d67cca 4241 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
8115817b 4242
89d67cca
DN
4243 if (!vec_stmt) /* transformation not required. */
4244 {
4245 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
792ed98b
HJ
4246 if (vect_print_dump_info (REPORT_DETAILS))
4247 fprintf (vect_dump, "=== vectorizable_demotion ===");
805e2059 4248 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
89d67cca
DN
4249 return true;
4250 }
8115817b 4251
89d67cca 4252 /** Transform. **/
89d67cca
DN
4253 if (vect_print_dump_info (REPORT_DETAILS))
4254 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
8115817b
UB
4255 ncopies);
4256
89d67cca
DN
4257 /* Handle def. */
4258 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4259
4260 /* In case the vectorization factor (VF) is bigger than the number
4261 of elements that we can fit in a vectype (nunits), we have to generate
4262 more than one vector stmt - i.e - we need to "unroll" the
4263 vector stmt by a factor VF/nunits. */
4264 prev_stmt_info = NULL;
4265 for (j = 0; j < ncopies; j++)
4266 {
4267 /* Handle uses. */
4268 if (j == 0)
4269 {
89d67cca 4270 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3a70f3ef 4271 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
89d67cca
DN
4272 }
4273 else
4274 {
3a70f3ef
DN
4275 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
4276 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
89d67cca 4277 }
8115817b 4278
89d67cca 4279 /* Arguments are ready. Create the new vector stmt. */
d9987fb4 4280 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
ebb07520 4281 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
89d67cca 4282 new_temp = make_ssa_name (vec_dest, new_stmt);
07beea0d 4283 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
89d67cca 4284 vect_finish_stmt_generation (stmt, new_stmt, bsi);
8115817b 4285
89d67cca
DN
4286 if (j == 0)
4287 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4288 else
4289 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8115817b 4290
89d67cca
DN
4291 prev_stmt_info = vinfo_for_stmt (new_stmt);
4292 }
8115817b 4293
89d67cca
DN
4294 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4295 return true;
4296}
4297
4298
89d67cca
DN
4299/* Function vectorizable_type_promotion
4300
4301 Check if STMT performs a binary or unary operation that involves
4302 type promotion, and if it can be vectorized.
4303 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4304 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4305 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4306
4307bool
4308vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
4309 tree *vec_stmt)
4310{
4311 tree vec_dest;
4312 tree scalar_dest;
4313 tree operation;
4314 tree op0, op1 = NULL;
4315 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4316 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 4318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8ff43db0 4319 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
89d67cca
DN
4320 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4321 int op_type;
4322 tree def, def_stmt;
3a70f3ef 4323 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
89d67cca
DN
4324 tree new_stmt;
4325 stmt_vec_info prev_stmt_info;
4326 int nunits_in;
4327 int nunits_out;
4328 tree vectype_out;
4329 int ncopies;
4330 int j;
4331 tree vectype_in;
4332
89d67cca
DN
4333 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4334 return false;
4335
60555ced
DN
4336 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4337 return false;
89d67cca 4338
60555ced 4339 /* Is STMT a vectorizable type-promotion operation? */
07beea0d 4340 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
89d67cca
DN
4341 return false;
4342
07beea0d 4343 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
89d67cca
DN
4344 return false;
4345
07beea0d 4346 operation = GIMPLE_STMT_OPERAND (stmt, 1);
89d67cca 4347 code = TREE_CODE (operation);
d9987fb4
UB
4348 if (code != NOP_EXPR && code != CONVERT_EXPR
4349 && code != WIDEN_MULT_EXPR)
89d67cca
DN
4350 return false;
4351
f7064d11 4352 op0 = TREE_OPERAND (operation, 0);
89d67cca 4353 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4934454b
DN
4354 if (!vectype_in)
4355 return false;
89d67cca 4356 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
89d67cca 4357
07beea0d 4358 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
89d67cca 4359 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4934454b
DN
4360 if (!vectype_out)
4361 return false;
89d67cca
DN
4362 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4363 if (nunits_out != nunits_in / 2) /* FORNOW */
4364 return false;
4365
459e691a
UB
4366 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4367 gcc_assert (ncopies >= 1);
d29de1bf
DN
4368 /* FORNOW. This restriction should be relaxed. */
4369 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4370 {
4371 if (vect_print_dump_info (REPORT_DETAILS))
4372 fprintf (vect_dump, "multiple types in nested loop.");
4373 return false;
4374 }
459e691a 4375
8115817b
UB
4376 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4377 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4378 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4379 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4380 && (code == CONVERT_EXPR || code == NOP_EXPR))))
878aa817
DN
4381 return false;
4382
89d67cca 4383 /* Check the operands of the operation. */
3a70f3ef 4384 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
89d67cca
DN
4385 {
4386 if (vect_print_dump_info (REPORT_DETAILS))
4387 fprintf (vect_dump, "use not simple.");
4388 return false;
4389 }
f7064d11 4390
89d67cca 4391 op_type = TREE_CODE_LENGTH (code);
f7064d11
DN
4392 if (op_type == binary_op)
4393 {
4394 op1 = TREE_OPERAND (operation, 1);
3a70f3ef 4395 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
89d67cca
DN
4396 {
4397 if (vect_print_dump_info (REPORT_DETAILS))
4398 fprintf (vect_dump, "use not simple.");
4399 return false;
4400 }
4401 }
b2d16a23 4402
89d67cca
DN
4403 /* Supportable by target? */
4404 if (!supportable_widening_operation (code, stmt, vectype_in,
4405 &decl1, &decl2, &code1, &code2))
4406 return false;
b2d16a23 4407
89d67cca 4408 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
b2d16a23 4409
89d67cca
DN
4410 if (!vec_stmt) /* transformation not required. */
4411 {
4412 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
792ed98b
HJ
4413 if (vect_print_dump_info (REPORT_DETAILS))
4414 fprintf (vect_dump, "=== vectorizable_promotion ===");
805e2059 4415 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
89d67cca 4416 return true;
f7064d11
DN
4417 }
4418
89d67cca 4419 /** Transform. **/
f7064d11 4420
89d67cca
DN
4421 if (vect_print_dump_info (REPORT_DETAILS))
4422 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4423 ncopies);
4424
4425 /* Handle def. */
4426 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4427
4428 /* In case the vectorization factor (VF) is bigger than the number
4429 of elements that we can fit in a vectype (nunits), we have to generate
4430 more than one vector stmt - i.e - we need to "unroll" the
4431 vector stmt by a factor VF/nunits. */
f7064d11 4432
89d67cca
DN
4433 prev_stmt_info = NULL;
4434 for (j = 0; j < ncopies; j++)
4435 {
4436 /* Handle uses. */
4437 if (j == 0)
4438 {
4439 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4440 if (op_type == binary_op)
4441 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4442 }
4443 else
4444 {
3a70f3ef 4445 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
89d67cca 4446 if (op_type == binary_op)
3a70f3ef 4447 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
89d67cca
DN
4448 }
4449
4450 /* Arguments are ready. Create the new vector stmt. We are creating
4451 two vector defs because the widened result does not fit in one vector.
4452 The vectorized stmt can be expressed as a call to a taregt builtin,
4453 or a using a tree-code. */
4454 /* Generate first half of the widened result: */
4455 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
4456 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4457 if (j == 0)
4458 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4459 else
4460 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4461 prev_stmt_info = vinfo_for_stmt (new_stmt);
4462
4463 /* Generate second half of the widened result: */
4464 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
4465 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4466 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4467 prev_stmt_info = vinfo_for_stmt (new_stmt);
4468
4469 }
4470
4471 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
f7064d11
DN
4472 return true;
4473}
4474
4475
98b44b0e
IR
4476/* Function vect_strided_store_supported.
4477
4478 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4479 and FALSE otherwise. */
4480
4481static bool
4482vect_strided_store_supported (tree vectype)
4483{
4484 optab interleave_high_optab, interleave_low_optab;
4485 int mode;
4486
4487 mode = (int) TYPE_MODE (vectype);
4488
4489 /* Check that the operation is supported. */
4490 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4491 vectype);
4492 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4493 vectype);
4494 if (!interleave_high_optab || !interleave_low_optab)
4495 {
4496 if (vect_print_dump_info (REPORT_DETAILS))
4497 fprintf (vect_dump, "no optab for interleave.");
4498 return false;
4499 }
4500
166cdb08 4501 if (optab_handler (interleave_high_optab, mode)->insn_code
98b44b0e 4502 == CODE_FOR_nothing
166cdb08 4503 || optab_handler (interleave_low_optab, mode)->insn_code
98b44b0e
IR
4504 == CODE_FOR_nothing)
4505 {
4506 if (vect_print_dump_info (REPORT_DETAILS))
4507 fprintf (vect_dump, "interleave op not supported by target.");
4508 return false;
4509 }
805e2059 4510
98b44b0e
IR
4511 return true;
4512}
4513
4514
4515/* Function vect_permute_store_chain.
4516
2f8e468b 4517 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
98b44b0e
IR
4518 a power of 2, generate interleave_high/low stmts to reorder the data
4519 correctly for the stores. Return the final references for stores in
4520 RESULT_CHAIN.
4521
4522 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2f8e468b 4523 The input is 4 vectors each containing 8 elements. We assign a number to each
98b44b0e
IR
4524 element, the input sequence is:
4525
4526 1st vec: 0 1 2 3 4 5 6 7
4527 2nd vec: 8 9 10 11 12 13 14 15
4528 3rd vec: 16 17 18 19 20 21 22 23
4529 4th vec: 24 25 26 27 28 29 30 31
4530
4531 The output sequence should be:
4532
4533 1st vec: 0 8 16 24 1 9 17 25
4534 2nd vec: 2 10 18 26 3 11 19 27
4535 3rd vec: 4 12 20 28 5 13 21 30
4536 4th vec: 6 14 22 30 7 15 23 31
4537
4538 i.e., we interleave the contents of the four vectors in their order.
4539
4540 We use interleave_high/low instructions to create such output. The input of
4541 each interleave_high/low operation is two vectors:
4542 1st vec 2nd vec
4543 0 1 2 3 4 5 6 7
4544 the even elements of the result vector are obtained left-to-right from the
4545 high/low elements of the first vector. The odd elements of the result are
4546 obtained left-to-right from the high/low elements of the second vector.
4547 The output of interleave_high will be: 0 4 1 5
4548 and of interleave_low: 2 6 3 7
4549
4550
2f8e468b 4551 The permutation is done in log LENGTH stages. In each stage interleave_high
98b44b0e
IR
4552 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4553 where the first argument is taken from the first half of DR_CHAIN and the
4554 second argument from it's second half.
4555 In our example,
4556
4557 I1: interleave_high (1st vec, 3rd vec)
4558 I2: interleave_low (1st vec, 3rd vec)
4559 I3: interleave_high (2nd vec, 4th vec)
4560 I4: interleave_low (2nd vec, 4th vec)
4561
4562 The output for the first stage is:
4563
4564 I1: 0 16 1 17 2 18 3 19
4565 I2: 4 20 5 21 6 22 7 23
4566 I3: 8 24 9 25 10 26 11 27
4567 I4: 12 28 13 29 14 30 15 31
4568
4569 The output of the second stage, i.e. the final result is:
4570
4571 I1: 0 8 16 24 1 9 17 25
4572 I2: 2 10 18 26 3 11 19 27
4573 I3: 4 12 20 28 5 13 21 30
4574 I4: 6 14 22 30 7 15 23 31. */
4575
4576static bool
4577vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4578 unsigned int length,
4579 tree stmt,
4580 block_stmt_iterator *bsi,
4581 VEC(tree,heap) **result_chain)
4582{
4583 tree perm_dest, perm_stmt, vect1, vect2, high, low;
4584 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
ebb07520 4585 tree scalar_dest, tmp;
98b44b0e
IR
4586 int i;
4587 unsigned int j;
4588 VEC(tree,heap) *first, *second;
4589
ce133c3f 4590 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
98b44b0e
IR
4591 first = VEC_alloc (tree, heap, length/2);
4592 second = VEC_alloc (tree, heap, length/2);
4593
4594 /* Check that the operation is supported. */
4595 if (!vect_strided_store_supported (vectype))
4596 return false;
4597
4598 *result_chain = VEC_copy (tree, heap, dr_chain);
4599
4600 for (i = 0; i < exact_log2 (length); i++)
4601 {
4602 for (j = 0; j < length/2; j++)
4603 {
4604 vect1 = VEC_index (tree, dr_chain, j);
4605 vect2 = VEC_index (tree, dr_chain, j+length/2);
4606
a3895f55
IR
4607 /* Create interleaving stmt:
4608 in the case of big endian:
4609 high = interleave_high (vect1, vect2)
4610 and in the case of little endian:
4611 high = interleave_low (vect1, vect2). */
98b44b0e 4612 perm_dest = create_tmp_var (vectype, "vect_inter_high");
0890b981 4613 DECL_GIMPLE_REG_P (perm_dest) = 1;
98b44b0e 4614 add_referenced_var (perm_dest);
a3895f55 4615 if (BYTES_BIG_ENDIAN)
ebb07520 4616 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
a3895f55 4617 else
ebb07520
RS
4618 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4619 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
98b44b0e 4620 high = make_ssa_name (perm_dest, perm_stmt);
07beea0d 4621 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
98b44b0e
IR
4622 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4623 VEC_replace (tree, *result_chain, 2*j, high);
4624
a3895f55
IR
4625 /* Create interleaving stmt:
4626 in the case of big endian:
4627 low = interleave_low (vect1, vect2)
4628 and in the case of little endian:
4629 low = interleave_high (vect1, vect2). */
98b44b0e 4630 perm_dest = create_tmp_var (vectype, "vect_inter_low");
0890b981 4631 DECL_GIMPLE_REG_P (perm_dest) = 1;
98b44b0e 4632 add_referenced_var (perm_dest);
a3895f55 4633 if (BYTES_BIG_ENDIAN)
ebb07520 4634 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
a3895f55 4635 else
ebb07520
RS
4636 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4637 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
98b44b0e 4638 low = make_ssa_name (perm_dest, perm_stmt);
07beea0d 4639 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
98b44b0e
IR
4640 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4641 VEC_replace (tree, *result_chain, 2*j+1, low);
4642 }
4643 dr_chain = VEC_copy (tree, heap, *result_chain);
4644 }
4645 return true;
4646}
4647
4648
f7064d11
DN
4649/* Function vectorizable_store.
4650
4651 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4652 can be vectorized.
4653 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4654 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4655 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4656
4657bool
805e2059
IR
4658vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
4659 slp_tree slp_node)
f7064d11
DN
4660{
4661 tree scalar_dest;
4662 tree data_ref;
4663 tree op;
89d67cca 4664 tree vec_oprnd = NULL_TREE;
f7064d11 4665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
98b44b0e 4666 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
f7064d11
DN
4667 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 4669 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
f7064d11
DN
4670 enum machine_mode vec_mode;
4671 tree dummy;
468c2ac0 4672 enum dr_alignment_support alignment_support_scheme;
88088c03
DN
4673 tree def, def_stmt;
4674 enum vect_def_type dt;
98b44b0e 4675 stmt_vec_info prev_stmt_info = NULL;
89d67cca
DN
4676 tree dataref_ptr = NULL_TREE;
4677 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4678 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4679 int j;
0bf2cf89 4680 tree next_stmt, first_stmt = NULL_TREE;
98b44b0e
IR
4681 bool strided_store = false;
4682 unsigned int group_size, i;
4683 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
468c2ac0 4684 bool inv_p;
805e2059
IR
4685 VEC(tree,heap) *vec_oprnds = NULL;
4686 bool slp = (slp_node != NULL);
4687 stmt_vec_info first_stmt_vinfo;
4688 unsigned int vec_num;
4689
4690 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4691 this, so we can safely override NCOPIES with 1 here. */
4692 if (slp)
4693 ncopies = 1;
468c2ac0 4694
89d67cca 4695 gcc_assert (ncopies >= 1);
468c2ac0 4696
d29de1bf
DN
4697 /* FORNOW. This restriction should be relaxed. */
4698 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4699 {
4700 if (vect_print_dump_info (REPORT_DETAILS))
4701 fprintf (vect_dump, "multiple types in nested loop.");
4702 return false;
4703 }
f7064d11 4704
60555ced
DN
4705 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4706 return false;
4707
4708 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4709 return false;
4710
f7064d11
DN
4711 /* Is vectorizable store? */
4712
07beea0d 4713 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
f7064d11
DN
4714 return false;
4715
07beea0d 4716 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
f7064d11 4717 if (TREE_CODE (scalar_dest) != ARRAY_REF
98b44b0e 4718 && TREE_CODE (scalar_dest) != INDIRECT_REF
805e2059 4719 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
f7064d11
DN
4720 return false;
4721
07beea0d 4722 op = GIMPLE_STMT_OPERAND (stmt, 1);
88088c03 4723 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
f7064d11 4724 {
00518cb1 4725 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
4726 fprintf (vect_dump, "use not simple.");
4727 return false;
4728 }
4729
4730 vec_mode = TYPE_MODE (vectype);
4731 /* FORNOW. In some cases can vectorize even if data-type not supported
4732 (e.g. - array initialization with 0). */
166cdb08 4733 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
f7064d11
DN
4734 return false;
4735
4736 if (!STMT_VINFO_DATA_REF (stmt_info))
4737 return false;
4738
805e2059 4739 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
98b44b0e
IR
4740 {
4741 strided_store = true;
0bf2cf89 4742 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
805e2059
IR
4743 if (!vect_strided_store_supported (vectype)
4744 && !PURE_SLP_STMT (stmt_info) && !slp)
0bf2cf89
IR
4745 return false;
4746
4747 if (first_stmt == stmt)
4748 {
4749 /* STMT is the leader of the group. Check the operands of all the
4750 stmts of the group. */
4751 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
4752 while (next_stmt)
4753 {
4754 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4755 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4756 {
4757 if (vect_print_dump_info (REPORT_DETAILS))
4758 fprintf (vect_dump, "use not simple.");
4759 return false;
4760 }
4761 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4762 }
4763 }
98b44b0e 4764 }
f7064d11
DN
4765
4766 if (!vec_stmt) /* transformation not required. */
4767 {
4768 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
805e2059
IR
4769 if (!PURE_SLP_STMT (stmt_info))
4770 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
f7064d11
DN
4771 return true;
4772 }
4773
4774 /** Transform. **/
4775
98b44b0e
IR
4776 if (strided_store)
4777 {
98b44b0e
IR
4778 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4779 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4780
4781 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4782
468c2ac0
DN
4783 /* FORNOW */
4784 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
4785
98b44b0e
IR
4786 /* We vectorize all the stmts of the interleaving group when we
4787 reach the last stmt in the group. */
4788 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
805e2059
IR
4789 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
4790 && !slp)
98b44b0e
IR
4791 {
4792 *vec_stmt = NULL_TREE;
4793 return true;
4794 }
805e2059
IR
4795
4796 if (slp)
4797 strided_store = false;
4798
4799 /* VEC_NUM is the number of vect stmts to be created for this group. */
4800 if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
4801 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4802 else
4803 vec_num = group_size;
98b44b0e
IR
4804 }
4805 else
4806 {
4807 first_stmt = stmt;
4808 first_dr = dr;
805e2059
IR
4809 group_size = vec_num = 1;
4810 first_stmt_vinfo = stmt_info;
98b44b0e
IR
4811 }
4812
792ed98b
HJ
4813 if (vect_print_dump_info (REPORT_DETAILS))
4814 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
4815
98b44b0e
IR
4816 dr_chain = VEC_alloc (tree, heap, group_size);
4817 oprnds = VEC_alloc (tree, heap, group_size);
4818
468c2ac0
DN
4819 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
4820 gcc_assert (alignment_support_scheme);
4821 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
f7064d11 4822
89d67cca
DN
4823 /* In case the vectorization factor (VF) is bigger than the number
4824 of elements that we can fit in a vectype (nunits), we have to generate
4825 more than one vector stmt - i.e - we need to "unroll" the
4826 vector stmt by a factor VF/nunits. For more details see documentation in
4827 vect_get_vec_def_for_copy_stmt. */
f7064d11 4828
98b44b0e
IR
4829 /* In case of interleaving (non-unit strided access):
4830
4831 S1: &base + 2 = x2
4832 S2: &base = x0
4833 S3: &base + 1 = x1
4834 S4: &base + 3 = x3
4835
878aa817 4836 We create vectorized stores starting from base address (the access of the
98b44b0e
IR
4837 first stmt in the chain (S2 in the above example), when the last store stmt
4838 of the chain (S4) is reached:
4839
4840 VS1: &base = vx2
4841 VS2: &base + vec_size*1 = vx0
4842 VS3: &base + vec_size*2 = vx1
4843 VS4: &base + vec_size*3 = vx3
4844
4845 Then permutation statements are generated:
4846
4847 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4848 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4849 ...
4850
4851 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4852 (the order of the data-refs in the output of vect_permute_store_chain
4853 corresponds to the order of scalar stmts in the interleaving chain - see
2f8e468b 4854 the documentation of vect_permute_store_chain()).
98b44b0e
IR
4855
4856 In case of both multiple types and interleaving, above vector stores and
4857 permutation stmts are created for every copy. The result vector stmts are
4858 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4859 STMT_VINFO_RELATED_STMT for the next copies.
4860 */
4861
89d67cca
DN
4862 prev_stmt_info = NULL;
4863 for (j = 0; j < ncopies; j++)
4864 {
4865 tree new_stmt;
4866 tree ptr_incr;
f7064d11 4867
89d67cca
DN
4868 if (j == 0)
4869 {
805e2059
IR
4870 if (slp)
4871 {
4872 /* Get vectorized arguments for SLP_NODE. */
4873 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
4874
4875 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4876 }
4877 else
4878 {
4879 /* For interleaved stores we collect vectorized defs for all the
4880 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4881 used as an input to vect_permute_store_chain(), and OPRNDS as
4882 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4883
4884 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4885 OPRNDS are of size 1. */
4886 next_stmt = first_stmt;
4887 for (i = 0; i < group_size; i++)
4888 {
4889 /* Since gaps are not supported for interleaved stores,
4890 GROUP_SIZE is the exact number of stmts in the chain.
4891 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4892 there is no interleaving, GROUP_SIZE is 1, and only one
4893 iteration of the loop will be executed. */
4894 gcc_assert (next_stmt);
4895 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4896
4897 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4898 NULL);
4899 VEC_quick_push(tree, dr_chain, vec_oprnd);
4900 VEC_quick_push(tree, oprnds, vec_oprnd);
4901 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4902 }
98b44b0e 4903 }
468c2ac0 4904 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
4090db01 4905 &dummy, &ptr_incr, false,
468c2ac0
DN
4906 TREE_TYPE (vec_oprnd), &inv_p);
4907 gcc_assert (!inv_p);
89d67cca
DN
4908 }
4909 else
4910 {
805e2059
IR
4911 /* FORNOW SLP doesn't work for multiple types. */
4912 gcc_assert (!slp);
4913
98b44b0e
IR
4914 /* For interleaved stores we created vectorized defs for all the
4915 defs stored in OPRNDS in the previous iteration (previous copy).
4916 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8115817b 4917 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
98b44b0e
IR
4918 next copy.
4919 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
878aa817 4920 OPRNDS are of size 1. */
98b44b0e
IR
4921 for (i = 0; i < group_size; i++)
4922 {
0bf2cf89
IR
4923 op = VEC_index (tree, oprnds, i);
4924 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
4925 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
98b44b0e
IR
4926 VEC_replace(tree, dr_chain, i, vec_oprnd);
4927 VEC_replace(tree, oprnds, i, vec_oprnd);
4928 }
468c2ac0
DN
4929 dataref_ptr =
4930 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
89d67cca 4931 }
f7064d11 4932
98b44b0e
IR
4933 if (strided_store)
4934 {
4935 result_chain = VEC_alloc (tree, heap, group_size);
4936 /* Permute. */
4937 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4938 &result_chain))
4939 return false;
4940 }
9cf5a7e3 4941
98b44b0e 4942 next_stmt = first_stmt;
805e2059 4943 for (i = 0; i < vec_num; i++)
89d67cca 4944 {
805e2059
IR
4945 if (i > 0)
4946 /* Bump the vector pointer. */
4947 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
4948 NULL_TREE);
4949
4950 if (slp)
4951 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4952 else if (strided_store)
4953 /* For strided stores vectorized defs are interleaved in
4954 vect_permute_store_chain(). */
4955 vec_oprnd = VEC_index (tree, result_chain, i);
98b44b0e
IR
4956
4957 data_ref = build_fold_indirect_ref (dataref_ptr);
4958 /* Arguments are ready. Create the new vector stmt. */
ebb07520 4959 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
98b44b0e 4960 vect_finish_stmt_generation (stmt, new_stmt, bsi);
cd7ae74d
IR
4961 mark_symbols_for_renaming (new_stmt);
4962
4963 if (j == 0)
4964 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
98b44b0e 4965 else
cd7ae74d 4966 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
89d67cca 4967
98b44b0e 4968 prev_stmt_info = vinfo_for_stmt (new_stmt);
878aa817 4969 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
98b44b0e
IR
4970 if (!next_stmt)
4971 break;
89d67cca 4972 }
9cf5a7e3
KB
4973 }
4974
f7064d11
DN
4975 return true;
4976}
4977
4978
89d67cca
DN
4979/* Function vect_setup_realignment
4980
4981 This function is called when vectorizing an unaligned load using
468c2ac0 4982 the dr_explicit_realign[_optimized] scheme.
89d67cca
DN
4983 This function generates the following code at the loop prolog:
4984
4985 p = initial_addr;
468c2ac0 4986 x msq_init = *(floor(p)); # prolog load
89d67cca
DN
4987 realignment_token = call target_builtin;
4988 loop:
468c2ac0
DN
4989 x msq = phi (msq_init, ---)
4990
4991 The stmts marked with x are generated only for the case of
4992 dr_explicit_realign_optimized.
89d67cca
DN
4993
4994 The code above sets up a new (vector) pointer, pointing to the first
4995 location accessed by STMT, and a "floor-aligned" load using that pointer.
4996 It also generates code to compute the "realignment-token" (if the relevant
4997 target hook was defined), and creates a phi-node at the loop-header bb
4998 whose arguments are the result of the prolog-load (created by this
4999 function) and the result of a load that takes place in the loop (to be
5000 created by the caller to this function).
468c2ac0
DN
5001
5002 For the case of dr_explicit_realign_optimized:
89d67cca
DN
5003 The caller to this function uses the phi-result (msq) to create the
5004 realignment code inside the loop, and sets up the missing phi argument,
5005 as follows:
89d67cca
DN
5006 loop:
5007 msq = phi (msq_init, lsq)
5008 lsq = *(floor(p')); # load in loop
5009 result = realign_load (msq, lsq, realignment_token);
5010
468c2ac0
DN
5011 For the case of dr_explicit_realign:
5012 loop:
5013 msq = *(floor(p)); # load in loop
5014 p' = p + (VS-1);
5015 lsq = *(floor(p')); # load in loop
5016 result = realign_load (msq, lsq, realignment_token);
5017
89d67cca
DN
5018 Input:
5019 STMT - (scalar) load stmt to be vectorized. This load accesses
5020 a memory location that may be unaligned.
5021 BSI - place where new code is to be inserted.
468c2ac0
DN
5022 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5023 is used.
89d67cca
DN
5024
5025 Output:
5026 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5027 target hook, if defined.
98b44b0e 5028 Return value - the result of the loop-header phi node. */
89d67cca
DN
5029
5030static tree
5031vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
468c2ac0
DN
5032 tree *realignment_token,
5033 enum dr_alignment_support alignment_support_scheme,
5034 tree init_addr,
5035 struct loop **at_loop)
89d67cca
DN
5036{
5037 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5038 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5039 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5040 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468c2ac0 5041 edge pe;
ce133c3f 5042 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
89d67cca 5043 tree vec_dest;
89d67cca
DN
5044 tree inc;
5045 tree ptr;
5046 tree data_ref;
5047 tree new_stmt;
5048 basic_block new_bb;
468c2ac0 5049 tree msq_init = NULL_TREE;
89d67cca
DN
5050 tree new_temp;
5051 tree phi_stmt;
468c2ac0
DN
5052 tree msq = NULL_TREE;
5053 tree stmts = NULL_TREE;
5054 bool inv_p;
5055 bool compute_in_loop = false;
5056 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5057 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5058 struct loop *loop_for_initial_load;
5059
5060 gcc_assert (alignment_support_scheme == dr_explicit_realign
5061 || alignment_support_scheme == dr_explicit_realign_optimized);
5062
5063 /* We need to generate three things:
5064 1. the misalignment computation
5065 2. the extra vector load (for the optimized realignment scheme).
5066 3. the phi node for the two vectors from which the realignment is
5067 done (for the optimized realignment scheme).
5068 */
5069
5070 /* 1. Determine where to generate the misalignment computation.
5071
5072 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5073 calculation will be generated by this function, outside the loop (in the
5074 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5075 caller, inside the loop.
5076
5077 Background: If the misalignment remains fixed throughout the iterations of
5078 the loop, then both realignment schemes are applicable, and also the
5079 misalignment computation can be done outside LOOP. This is because we are
5080 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5081 are a multiple of VS (the Vector Size), and therefore the misalignment in
5082 different vectorized LOOP iterations is always the same.
5083 The problem arises only if the memory access is in an inner-loop nested
5084 inside LOOP, which is now being vectorized using outer-loop vectorization.
5085 This is the only case when the misalignment of the memory access may not
15dc95cb 5086 remain fixed throughout the iterations of the inner-loop (as explained in
468c2ac0
DN
5087 detail in vect_supportable_dr_alignment). In this case, not only is the
5088 optimized realignment scheme not applicable, but also the misalignment
5089 computation (and generation of the realignment token that is passed to
5090 REALIGN_LOAD) have to be done inside the loop.
5091
5092 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5093 or not, which in turn determines if the misalignment is computed inside
5094 the inner-loop, or outside LOOP. */
5095
5096 if (init_addr != NULL_TREE)
5097 {
5098 compute_in_loop = true;
5099 gcc_assert (alignment_support_scheme == dr_explicit_realign);
5100 }
5101
5102
5103 /* 2. Determine where to generate the extra vector load.
5104
5105 For the optimized realignment scheme, instead of generating two vector
5106 loads in each iteration, we generate a single extra vector load in the
5107 preheader of the loop, and in each iteration reuse the result of the
5108 vector load from the previous iteration. In case the memory access is in
5109 an inner-loop nested inside LOOP, which is now being vectorized using
5110 outer-loop vectorization, we need to determine whether this initial vector
5111 load should be generated at the preheader of the inner-loop, or can be
5112 generated at the preheader of LOOP. If the memory access has no evolution
5113 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5114 to be generated inside LOOP (in the preheader of the inner-loop). */
89d67cca 5115
468c2ac0
DN
5116 if (nested_in_vect_loop)
5117 {
5118 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5119 bool invariant_in_outerloop =
5120 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5121 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5122 }
5123 else
5124 loop_for_initial_load = loop;
5125 if (at_loop)
5126 *at_loop = loop_for_initial_load;
5127
5128 /* 3. For the case of the optimized realignment, create the first vector
5129 load at the loop preheader. */
5130
5131 if (alignment_support_scheme == dr_explicit_realign_optimized)
5132 {
5133 /* Create msq_init = *(floor(p1)) in the loop preheader */
5134
5135 gcc_assert (!compute_in_loop);
5136 pe = loop_preheader_edge (loop_for_initial_load);
5137 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5138 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5139 &init_addr, &inc, true, NULL_TREE, &inv_p);
5140 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5141 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5142 new_temp = make_ssa_name (vec_dest, new_stmt);
5143 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
f10d132b 5144 mark_symbols_for_renaming (new_stmt);
468c2ac0
DN
5145 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5146 gcc_assert (!new_bb);
5147 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
5148 }
5149
5150 /* 4. Create realignment token using a target builtin, if available.
5151 It is done either inside the containing loop, or before LOOP (as
5152 determined above). */
89d67cca 5153
89d67cca
DN
5154 if (targetm.vectorize.builtin_mask_for_load)
5155 {
5156 tree builtin_decl;
89d67cca 5157
468c2ac0
DN
5158 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5159 if (compute_in_loop)
5160 gcc_assert (init_addr); /* already computed by the caller. */
5161 else
5162 {
5163 /* Generate the INIT_ADDR computation outside LOOP. */
5164 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5165 NULL_TREE, loop);
5166 pe = loop_preheader_edge (loop);
5167 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
5168 gcc_assert (!new_bb);
5169 }
5170
89d67cca 5171 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5039610b 5172 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4090db01
IR
5173 vec_dest = vect_create_destination_var (scalar_dest,
5174 TREE_TYPE (new_stmt));
ebb07520 5175 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
89d67cca 5176 new_temp = make_ssa_name (vec_dest, new_stmt);
07beea0d 5177 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
468c2ac0
DN
5178
5179 if (compute_in_loop)
5180 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
5181 else
5182 {
5183 /* Generate the misalignment computation outside LOOP. */
5184 pe = loop_preheader_edge (loop);
5185 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5186 gcc_assert (!new_bb);
5187 }
5188
07beea0d 5189 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
89d67cca
DN
5190
5191 /* The result of the CALL_EXPR to this builtin is determined from
5192 the value of the parameter and no global variables are touched
5193 which makes the builtin a "const" function. Requiring the
5194 builtin to have the "const" attribute makes it unnecessary
5195 to call mark_call_clobbered. */
5196 gcc_assert (TREE_READONLY (builtin_decl));
5197 }
5198
468c2ac0
DN
5199 if (alignment_support_scheme == dr_explicit_realign)
5200 return msq;
5201
5202 gcc_assert (!compute_in_loop);
5203 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5204
5205
5206 /* 5. Create msq = phi <msq_init, lsq> in loop */
5207
5208 pe = loop_preheader_edge (containing_loop);
89d67cca
DN
5209 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5210 msq = make_ssa_name (vec_dest, NULL_TREE);
468c2ac0 5211 phi_stmt = create_phi_node (msq, containing_loop->header);
89d67cca 5212 SSA_NAME_DEF_STMT (msq) = phi_stmt;
468c2ac0 5213 add_phi_arg (phi_stmt, msq_init, pe);
89d67cca
DN
5214
5215 return msq;
5216}
5217
5218
98b44b0e
IR
5219/* Function vect_strided_load_supported.
5220
5221 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5222 and FALSE otherwise. */
5223
5224static bool
5225vect_strided_load_supported (tree vectype)
5226{
5227 optab perm_even_optab, perm_odd_optab;
5228 int mode;
5229
5230 mode = (int) TYPE_MODE (vectype);
5231
5232 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
5233 if (!perm_even_optab)
5234 {
5235 if (vect_print_dump_info (REPORT_DETAILS))
5236 fprintf (vect_dump, "no optab for perm_even.");
5237 return false;
5238 }
5239
166cdb08 5240 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
98b44b0e
IR
5241 {
5242 if (vect_print_dump_info (REPORT_DETAILS))
5243 fprintf (vect_dump, "perm_even op not supported by target.");
5244 return false;
5245 }
5246
5247 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
5248 if (!perm_odd_optab)
5249 {
5250 if (vect_print_dump_info (REPORT_DETAILS))
5251 fprintf (vect_dump, "no optab for perm_odd.");
5252 return false;
5253 }
5254
166cdb08 5255 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
98b44b0e
IR
5256 {
5257 if (vect_print_dump_info (REPORT_DETAILS))
5258 fprintf (vect_dump, "perm_odd op not supported by target.");
5259 return false;
5260 }
5261 return true;
5262}
5263
5264
5265/* Function vect_permute_load_chain.
5266
5267 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5268 a power of 2, generate extract_even/odd stmts to reorder the input data
5269 correctly. Return the final references for loads in RESULT_CHAIN.
5270
5271 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2f8e468b 5272 The input is 4 vectors each containing 8 elements. We assign a number to each
98b44b0e
IR
5273 element, the input sequence is:
5274
5275 1st vec: 0 1 2 3 4 5 6 7
5276 2nd vec: 8 9 10 11 12 13 14 15
5277 3rd vec: 16 17 18 19 20 21 22 23
5278 4th vec: 24 25 26 27 28 29 30 31
5279
5280 The output sequence should be:
5281
5282 1st vec: 0 4 8 12 16 20 24 28
5283 2nd vec: 1 5 9 13 17 21 25 29
5284 3rd vec: 2 6 10 14 18 22 26 30
5285 4th vec: 3 7 11 15 19 23 27 31
5286
5287 i.e., the first output vector should contain the first elements of each
5288 interleaving group, etc.
5289
5290 We use extract_even/odd instructions to create such output. The input of each
5291 extract_even/odd operation is two vectors
5292 1st vec 2nd vec
5293 0 1 2 3 4 5 6 7
5294
5295 and the output is the vector of extracted even/odd elements. The output of
5296 extract_even will be: 0 2 4 6
5297 and of extract_odd: 1 3 5 7
5298
5299
2f8e468b 5300 The permutation is done in log LENGTH stages. In each stage extract_even and
98b44b0e
IR
5301 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5302 order. In our example,
5303
5304 E1: extract_even (1st vec, 2nd vec)
5305 E2: extract_odd (1st vec, 2nd vec)
5306 E3: extract_even (3rd vec, 4th vec)
5307 E4: extract_odd (3rd vec, 4th vec)
5308
5309 The output for the first stage will be:
5310
5311 E1: 0 2 4 6 8 10 12 14
5312 E2: 1 3 5 7 9 11 13 15
5313 E3: 16 18 20 22 24 26 28 30
5314 E4: 17 19 21 23 25 27 29 31
5315
5316 In order to proceed and create the correct sequence for the next stage (or
5317 for the correct output, if the second stage is the last one, as in our
5318 example), we first put the output of extract_even operation and then the
5319 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5320 The input for the second stage is:
5321
5322 1st vec (E1): 0 2 4 6 8 10 12 14
5323 2nd vec (E3): 16 18 20 22 24 26 28 30
5324 3rd vec (E2): 1 3 5 7 9 11 13 15
5325 4th vec (E4): 17 19 21 23 25 27 29 31
5326
5327 The output of the second stage:
5328
5329 E1: 0 4 8 12 16 20 24 28
5330 E2: 2 6 10 14 18 22 26 30
5331 E3: 1 5 9 13 17 21 25 29
5332 E4: 3 7 11 15 19 23 27 31
5333
5334 And RESULT_CHAIN after reordering:
5335
5336 1st vec (E1): 0 4 8 12 16 20 24 28
5337 2nd vec (E3): 1 5 9 13 17 21 25 29
5338 3rd vec (E2): 2 6 10 14 18 22 26 30
5339 4th vec (E4): 3 7 11 15 19 23 27 31. */
5340
5341static bool
5342vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5343 unsigned int length,
5344 tree stmt,
5345 block_stmt_iterator *bsi,
5346 VEC(tree,heap) **result_chain)
5347{
5348 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
5349 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
ebb07520 5350 tree tmp;
98b44b0e
IR
5351 int i;
5352 unsigned int j;
5353
5354 /* Check that the operation is supported. */
5355 if (!vect_strided_load_supported (vectype))
5356 return false;
5357
5358 *result_chain = VEC_copy (tree, heap, dr_chain);
5359 for (i = 0; i < exact_log2 (length); i++)
5360 {
5361 for (j = 0; j < length; j +=2)
5362 {
5363 first_vect = VEC_index (tree, dr_chain, j);
5364 second_vect = VEC_index (tree, dr_chain, j+1);
5365
5366 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5367 perm_dest = create_tmp_var (vectype, "vect_perm_even");
fc98ed56 5368 DECL_GIMPLE_REG_P (perm_dest) = 1;
98b44b0e 5369 add_referenced_var (perm_dest);
ebb07520
RS
5370
5371 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
5372 first_vect, second_vect);
5373 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
98b44b0e
IR
5374
5375 data_ref = make_ssa_name (perm_dest, perm_stmt);
07beea0d 5376 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
98b44b0e 5377 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
cfaab3a9 5378 mark_symbols_for_renaming (perm_stmt);
98b44b0e
IR
5379
5380 VEC_replace (tree, *result_chain, j/2, data_ref);
5381
5382 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5383 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
fc98ed56 5384 DECL_GIMPLE_REG_P (perm_dest) = 1;
98b44b0e
IR
5385 add_referenced_var (perm_dest);
5386
ebb07520
RS
5387 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
5388 first_vect, second_vect);
5389 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
98b44b0e 5390 data_ref = make_ssa_name (perm_dest, perm_stmt);
07beea0d 5391 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
98b44b0e 5392 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
cfaab3a9 5393 mark_symbols_for_renaming (perm_stmt);
98b44b0e
IR
5394
5395 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5396 }
5397 dr_chain = VEC_copy (tree, heap, *result_chain);
5398 }
5399 return true;
5400}
5401
5402
5403/* Function vect_transform_strided_load.
5404
5405 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5406 to perform their permutation and ascribe the result vectorized statements to
5407 the scalar statements.
5408*/
5409
5410static bool
5411vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
5412 block_stmt_iterator *bsi)
5413{
5414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5415 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5416 tree next_stmt, new_stmt;
5417 VEC(tree,heap) *result_chain = NULL;
5418 unsigned int i, gap_count;
5419 tree tmp_data_ref;
5420
5421 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5422 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5423 vectors, that are ready for vector computation. */
5424 result_chain = VEC_alloc (tree, heap, size);
5425 /* Permute. */
5426 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
5427 return false;
5428
5429 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5430 Since we scan the chain starting from it's first node, their order
5431 corresponds the order of data-refs in RESULT_CHAIN. */
5432 next_stmt = first_stmt;
5433 gap_count = 1;
639d3040 5434 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
98b44b0e
IR
5435 {
5436 if (!next_stmt)
5437 break;
5438
5439 /* Skip the gaps. Loads created for the gaps will be removed by dead
5440 code elimination pass later.
5441 DR_GROUP_GAP is the number of steps in elements from the previous
5442 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5443 correspond to the gaps.
5444 */
5445 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5446 {
5447 gap_count++;
5448 continue;
5449 }
5450
5451 while (next_stmt)
5452 {
5453 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5454 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5455 copies, and we put the new vector statement in the first available
5456 RELATED_STMT. */
5457 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5458 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5459 else
5460 {
5461 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5462 tree rel_stmt = STMT_VINFO_RELATED_STMT (
5463 vinfo_for_stmt (prev_stmt));
5464 while (rel_stmt)
5465 {
5466 prev_stmt = rel_stmt;
5467 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5468 }
5469 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5470 }
5471 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5472 gap_count = 1;
5473 /* If NEXT_STMT accesses the same DR as the previous statement,
5474 put the same TMP_DATA_REF as its vectorized statement; otherwise
5475 get the next data-ref from RESULT_CHAIN. */
5476 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5477 break;
5478 }
5479 }
5480 return true;
5481}
5482
5483
f7064d11
DN
5484/* vectorizable_load.
5485
5486 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5487 can be vectorized.
5488 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5489 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5490 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5491
5492bool
805e2059
IR
5493vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
5494 slp_tree slp_node)
f7064d11
DN
5495{
5496 tree scalar_dest;
5497 tree vec_dest = NULL;
5498 tree data_ref = NULL;
5499 tree op;
5500 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
89d67cca
DN
5501 stmt_vec_info prev_stmt_info;
5502 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5503 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468c2ac0
DN
5504 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5505 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
98b44b0e 5506 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
f7064d11
DN
5507 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5508 tree new_temp;
5509 int mode;
98b44b0e 5510 tree new_stmt = NULL_TREE;
f7064d11 5511 tree dummy;
468c2ac0 5512 enum dr_alignment_support alignment_support_scheme;
89d67cca
DN
5513 tree dataref_ptr = NULL_TREE;
5514 tree ptr_incr;
5515 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5516 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
98b44b0e 5517 int i, j, group_size;
89d67cca
DN
5518 tree msq = NULL_TREE, lsq;
5519 tree offset = NULL_TREE;
5520 tree realignment_token = NULL_TREE;
468c2ac0 5521 tree phi = NULL_TREE;
98b44b0e
IR
5522 VEC(tree,heap) *dr_chain = NULL;
5523 bool strided_load = false;
5524 tree first_stmt;
468c2ac0
DN
5525 tree scalar_type;
5526 bool inv_p;
5527 bool compute_in_loop = false;
5528 struct loop *at_loop;
805e2059
IR
5529 int vec_num;
5530 bool slp = (slp_node != NULL);
5531
5532 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5533 this, so we can safely override NCOPIES with 1 here. */
5534 if (slp)
5535 ncopies = 1;
f7064d11 5536
d29de1bf 5537 gcc_assert (ncopies >= 1);
468c2ac0 5538
d29de1bf 5539 /* FORNOW. This restriction should be relaxed. */
468c2ac0 5540 if (nested_in_vect_loop && ncopies > 1)
d29de1bf
DN
5541 {
5542 if (vect_print_dump_info (REPORT_DETAILS))
5543 fprintf (vect_dump, "multiple types in nested loop.");
5544 return false;
5545 }
5546
88088c03
DN
5547 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5548 return false;
5549
60555ced
DN
5550 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5551 return false;
88088c03 5552
60555ced 5553 /* Is vectorizable load? */
07beea0d 5554 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
f7064d11
DN
5555 return false;
5556
07beea0d 5557 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
f7064d11
DN
5558 if (TREE_CODE (scalar_dest) != SSA_NAME)
5559 return false;
5560
07beea0d 5561 op = GIMPLE_STMT_OPERAND (stmt, 1);
98b44b0e
IR
5562 if (TREE_CODE (op) != ARRAY_REF
5563 && TREE_CODE (op) != INDIRECT_REF
805e2059 5564 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
f7064d11
DN
5565 return false;
5566
5567 if (!STMT_VINFO_DATA_REF (stmt_info))
5568 return false;
5569
468c2ac0 5570 scalar_type = TREE_TYPE (DR_REF (dr));
f7064d11
DN
5571 mode = (int) TYPE_MODE (vectype);
5572
5573 /* FORNOW. In some cases can vectorize even if data-type not supported
5574 (e.g. - data copies). */
166cdb08 5575 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
f7064d11 5576 {
00518cb1 5577 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
5578 fprintf (vect_dump, "Aligned load, but unsupported type.");
5579 return false;
5580 }
5581
98b44b0e 5582 /* Check if the load is a part of an interleaving chain. */
805e2059 5583 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
98b44b0e
IR
5584 {
5585 strided_load = true;
468c2ac0
DN
5586 /* FORNOW */
5587 gcc_assert (! nested_in_vect_loop);
98b44b0e
IR
5588
5589 /* Check if interleaving is supported. */
805e2059
IR
5590 if (!vect_strided_load_supported (vectype)
5591 && !PURE_SLP_STMT (stmt_info) && !slp)
98b44b0e
IR
5592 return false;
5593 }
5594
f7064d11
DN
5595 if (!vec_stmt) /* transformation not required. */
5596 {
5597 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
805e2059 5598 vect_model_load_cost (stmt_info, ncopies, NULL);
f7064d11
DN
5599 return true;
5600 }
5601
00518cb1 5602 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
5603 fprintf (vect_dump, "transform load.");
5604
792ed98b
HJ
5605 /** Transform. **/
5606
98b44b0e
IR
5607 if (strided_load)
5608 {
5609 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5610 /* Check if the chain of loads is already vectorized. */
5611 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
5612 {
5613 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5614 return true;
5615 }
5616 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5617 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5618 dr_chain = VEC_alloc (tree, heap, group_size);
805e2059
IR
5619
5620 /* VEC_NUM is the number of vect stmts to be created for this group. */
5621 if (slp)
5622 {
5623 strided_load = false;
5624 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5625 }
5626 else
5627 vec_num = group_size;
98b44b0e
IR
5628 }
5629 else
5630 {
5631 first_stmt = stmt;
5632 first_dr = dr;
805e2059 5633 group_size = vec_num = 1;
98b44b0e
IR
5634 }
5635
468c2ac0
DN
5636 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5637 gcc_assert (alignment_support_scheme);
98b44b0e 5638
89d67cca
DN
5639 /* In case the vectorization factor (VF) is bigger than the number
5640 of elements that we can fit in a vectype (nunits), we have to generate
5641 more than one vector stmt - i.e - we need to "unroll" the
5642 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5643 from one copy of the vector stmt to the next, in the field
5644 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5645 stages to find the correct vector defs to be used when vectorizing
5646 stmts that use the defs of the current stmt. The example below illustrates
5647 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5648 4 vectorized stmts):
5649
5650 before vectorization:
5651 RELATED_STMT VEC_STMT
5652 S1: x = memref - -
5653 S2: z = x + 1 - -
5654
5655 step 1: vectorize stmt S1:
5656 We first create the vector stmt VS1_0, and, as usual, record a
5657 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5658 Next, we create the vector stmt VS1_1, and record a pointer to
5659 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5660 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5661 stmts and pointers:
5662 RELATED_STMT VEC_STMT
5663 VS1_0: vx0 = memref0 VS1_1 -
5664 VS1_1: vx1 = memref1 VS1_2 -
5665 VS1_2: vx2 = memref2 VS1_3 -
5666 VS1_3: vx3 = memref3 - -
5667 S1: x = load - VS1_0
5668 S2: z = x + 1 - -
5669
5670 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5671 information we recorded in RELATED_STMT field is used to vectorize
5672 stmt S2. */
5673
98b44b0e
IR
5674 /* In case of interleaving (non-unit strided access):
5675
5676 S1: x2 = &base + 2
5677 S2: x0 = &base
5678 S3: x1 = &base + 1
5679 S4: x3 = &base + 3
5680
5681 Vectorized loads are created in the order of memory accesses
5682 starting from the access of the first stmt of the chain:
5683
5684 VS1: vx0 = &base
5685 VS2: vx1 = &base + vec_size*1
5686 VS3: vx3 = &base + vec_size*2
5687 VS4: vx4 = &base + vec_size*3
5688
5689 Then permutation statements are generated:
5690
5691 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5692 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5693 ...
5694
5695 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5696 (the order of the data-refs in the output of vect_permute_load_chain
5697 corresponds to the order of scalar stmts in the interleaving chain - see
2f8e468b 5698 the documentation of vect_permute_load_chain()).
98b44b0e
IR
5699 The generation of permutation stmts and recording them in
5700 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5701
5702 In case of both multiple types and interleaving, the vector loads and
5703 permutation stmts above are created for every copy. The result vector stmts
5704 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5705 STMT_VINFO_RELATED_STMT for the next copies. */
5706
89d67cca
DN
5707 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5708 on a target that supports unaligned accesses (dr_unaligned_supported)
5709 we generate the following code:
f7064d11
DN
5710 p = initial_addr;
5711 indx = 0;
5712 loop {
89d67cca 5713 p = p + indx * vectype_size;
f7064d11
DN
5714 vec_dest = *(p);
5715 indx = indx + 1;
5716 }
f7064d11 5717
89d67cca 5718 Otherwise, the data reference is potentially unaligned on a target that
468c2ac0 5719 does not support unaligned accesses (dr_explicit_realign_optimized) -
89d67cca
DN
5720 then generate the following code, in which the data in each iteration is
5721 obtained by two vector loads, one from the previous iteration, and one
5722 from the current iteration:
5723 p1 = initial_addr;
5724 msq_init = *(floor(p1))
5725 p2 = initial_addr + VS - 1;
5726 realignment_token = call target_builtin;
5727 indx = 0;
5728 loop {
5729 p2 = p2 + indx * vectype_size
5730 lsq = *(floor(p2))
5731 vec_dest = realign_load (msq, lsq, realignment_token)
5732 indx = indx + 1;
5733 msq = lsq;
98b44b0e 5734 } */
89d67cca 5735
468c2ac0
DN
5736 /* If the misalignment remains the same throughout the execution of the
5737 loop, we can create the init_addr and permutation mask at the loop
5738 preheader. Otherwise, it needs to be created inside the loop.
5739 This can only occur when vectorizing memory accesses in the inner-loop
5740 nested within an outer-loop that is being vectorized. */
5741
5742 if (nested_in_vect_loop_p (loop, stmt)
5743 && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
5744 {
5745 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5746 compute_in_loop = true;
5747 }
5748
5749 if ((alignment_support_scheme == dr_explicit_realign_optimized
5750 || alignment_support_scheme == dr_explicit_realign)
5751 && !compute_in_loop)
89d67cca 5752 {
468c2ac0
DN
5753 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token,
5754 alignment_support_scheme, NULL_TREE,
5755 &at_loop);
5756 if (alignment_support_scheme == dr_explicit_realign_optimized)
5757 {
5758 phi = SSA_NAME_DEF_STMT (msq);
5759 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5760 }
f7064d11 5761 }
468c2ac0
DN
5762 else
5763 at_loop = loop;
f7064d11 5764
89d67cca
DN
5765 prev_stmt_info = NULL;
5766 for (j = 0; j < ncopies; j++)
5767 {
5768 /* 1. Create the vector pointer update chain. */
5769 if (j == 0)
468c2ac0
DN
5770 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
5771 at_loop, offset,
5772 &dummy, &ptr_incr, false,
5773 NULL_TREE, &inv_p);
89d67cca 5774 else
468c2ac0
DN
5775 dataref_ptr =
5776 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
f7064d11 5777
805e2059 5778 for (i = 0; i < vec_num; i++)
98b44b0e 5779 {
805e2059
IR
5780 if (i > 0)
5781 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
5782 NULL_TREE);
5783
98b44b0e 5784 /* 2. Create the vector-load in the loop. */
468c2ac0 5785 switch (alignment_support_scheme)
98b44b0e
IR
5786 {
5787 case dr_aligned:
5788 gcc_assert (aligned_access_p (first_dr));
5789 data_ref = build_fold_indirect_ref (dataref_ptr);
5790 break;
5791 case dr_unaligned_supported:
5792 {
5793 int mis = DR_MISALIGNMENT (first_dr);
5794 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
5795
98b44b0e
IR
5796 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
5797 data_ref =
5798 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
5799 break;
5800 }
468c2ac0
DN
5801 case dr_explicit_realign:
5802 {
5803 tree ptr, bump;
5804 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5805
5806 if (compute_in_loop)
5807 msq = vect_setup_realignment (first_stmt, bsi,
5808 &realignment_token,
5809 dr_explicit_realign,
5810 dataref_ptr, NULL);
5811
5812 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5813 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5814 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5815 new_temp = make_ssa_name (vec_dest, new_stmt);
5816 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5817 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5818 copy_virtual_operands (new_stmt, stmt);
5819 mark_symbols_for_renaming (new_stmt);
5820 msq = new_temp;
5821
5822 bump = size_binop (MULT_EXPR, vs_minus_1,
5823 TYPE_SIZE_UNIT (scalar_type));
5824 ptr = bump_vector_ptr (dataref_ptr, NULL_TREE, bsi, stmt, bump);
5825 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5826 break;
5827 }
5828 case dr_explicit_realign_optimized:
98b44b0e
IR
5829 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5830 break;
5831 default:
5832 gcc_unreachable ();
5833 }
5834 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebb07520 5835 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
98b44b0e 5836 new_temp = make_ssa_name (vec_dest, new_stmt);
07beea0d 5837 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
98b44b0e 5838 vect_finish_stmt_generation (stmt, new_stmt, bsi);
cfaab3a9 5839 mark_symbols_for_renaming (new_stmt);
98b44b0e 5840
468c2ac0
DN
5841 /* 3. Handle explicit realignment if necessary/supported. Create in
5842 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5843 if (alignment_support_scheme == dr_explicit_realign_optimized
5844 || alignment_support_scheme == dr_explicit_realign)
98b44b0e 5845 {
07beea0d 5846 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
98b44b0e
IR
5847 if (!realignment_token)
5848 realignment_token = dataref_ptr;
5849 vec_dest = vect_create_destination_var (scalar_dest, vectype);
468c2ac0
DN
5850 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
5851 realignment_token);
ebb07520 5852 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
98b44b0e 5853 new_temp = make_ssa_name (vec_dest, new_stmt);
07beea0d 5854 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
98b44b0e 5855 vect_finish_stmt_generation (stmt, new_stmt, bsi);
468c2ac0
DN
5856
5857 if (alignment_support_scheme == dr_explicit_realign_optimized)
5858 {
805e2059 5859 if (i == vec_num - 1 && j == ncopies - 1)
468c2ac0
DN
5860 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
5861 msq = lsq;
5862 }
5863 }
5864
5865 /* 4. Handle invariant-load. */
5866 if (inv_p)
5867 {
5868 gcc_assert (!strided_load);
5869 gcc_assert (nested_in_vect_loop_p (loop, stmt));
5870 if (j == 0)
5871 {
5872 int k;
5873 tree t = NULL_TREE;
5874 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
5875
5876 /* CHECKME: bitpos depends on endianess? */
5877 bitpos = bitsize_zero_node;
5878 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5879 bitsize, bitpos);
5880 BIT_FIELD_REF_UNSIGNED (vec_inv) =
5881 TYPE_UNSIGNED (scalar_type);
5882 vec_dest =
5883 vect_create_destination_var (scalar_dest, NULL_TREE);
5884 new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
5885 new_temp = make_ssa_name (vec_dest, new_stmt);
5886 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5887 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5888
5889 for (k = nunits - 1; k >= 0; --k)
5890 t = tree_cons (NULL_TREE, new_temp, t);
5891 /* FIXME: use build_constructor directly. */
5892 vec_inv = build_constructor_from_list (vectype, t);
5893 new_temp = vect_init_vector (stmt, vec_inv, vectype, bsi);
5894 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5895 }
5896 else
5897 gcc_unreachable (); /* FORNOW. */
98b44b0e 5898 }
468c2ac0 5899
805e2059
IR
5900 /* Collect vector loads and later create their permutation in
5901 vect_transform_strided_load (). */
5902 if (strided_load)
5903 VEC_quick_push (tree, dr_chain, new_temp);
5904
5905 /* Store vector loads in the corresponding SLP_NODE. */
5906 if (slp)
5907 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
98b44b0e 5908 }
f7064d11 5909
805e2059
IR
5910 /* FORNOW: SLP with multiple types is unsupported. */
5911 if (slp)
5912 return true;
5913
98b44b0e
IR
5914 if (strided_load)
5915 {
5916 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
5917 return false;
5918 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5919 dr_chain = VEC_alloc (tree, heap, group_size);
5920 }
89d67cca 5921 else
98b44b0e
IR
5922 {
5923 if (j == 0)
5924 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5925 else
5926 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5927 prev_stmt_info = vinfo_for_stmt (new_stmt);
5928 }
f7064d11 5929 }
f7064d11 5930
f7064d11
DN
5931 return true;
5932}
5933
88088c03
DN
5934
5935/* Function vectorizable_live_operation.
5936
5937 STMT computes a value that is used outside the loop. Check if
5938 it can be supported. */
5939
5940bool
5941vectorizable_live_operation (tree stmt,
5942 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
5943 tree *vec_stmt ATTRIBUTE_UNUSED)
5944{
5945 tree operation;
5946 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5947 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf 5948 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
88088c03 5949 int i;
88088c03
DN
5950 int op_type;
5951 tree op;
5952 tree def, def_stmt;
5953 enum vect_def_type dt;
5954
60555ced
DN
5955 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5956
5957 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
88088c03
DN
5958 return false;
5959
07beea0d 5960 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
88088c03
DN
5961 return false;
5962
07beea0d 5963 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
88088c03
DN
5964 return false;
5965
d29de1bf
DN
5966 /* FORNOW. CHECKME. */
5967 if (nested_in_vect_loop_p (loop, stmt))
5968 return false;
5969
07beea0d 5970 operation = GIMPLE_STMT_OPERAND (stmt, 1);
5039610b 5971 op_type = TREE_OPERAND_LENGTH (operation);
88088c03
DN
5972
5973 /* FORNOW: support only if all uses are invariant. This means
5974 that the scalar operations can remain in place, unvectorized.
5975 The original last scalar value that they compute will be used. */
5976
5977 for (i = 0; i < op_type; i++)
5978 {
5979 op = TREE_OPERAND (operation, i);
7de5c6a4 5980 if (op && !vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
88088c03 5981 {
00518cb1 5982 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
5983 fprintf (vect_dump, "use not simple.");
5984 return false;
5985 }
5986
5987 if (dt != vect_invariant_def && dt != vect_constant_def)
5988 return false;
5989 }
5990
5991 /* No transformation is required for the cases we currently support. */
5992 return true;
5993}
5994
5995
b52485c6
DP
5996/* Function vect_is_simple_cond.
5997
5998 Input:
5999 LOOP - the loop that is being vectorized.
6000 COND - Condition that is checked for simple use.
6001
a82635d0 6002 Returns whether a COND can be vectorized. Checks whether
b52485c6
DP
6003 condition operands are supportable using vec_is_simple_use. */
6004
6005static bool
6006vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
6007{
6008 tree lhs, rhs;
88088c03
DN
6009 tree def;
6010 enum vect_def_type dt;
b52485c6 6011
7da4bf7d 6012 if (!COMPARISON_CLASS_P (cond))
b52485c6
DP
6013 return false;
6014
6015 lhs = TREE_OPERAND (cond, 0);
6016 rhs = TREE_OPERAND (cond, 1);
6017
6018 if (TREE_CODE (lhs) == SSA_NAME)
6019 {
6020 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
88088c03 6021 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
b52485c6
DP
6022 return false;
6023 }
325217ed
CF
6024 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6025 && TREE_CODE (lhs) != FIXED_CST)
b52485c6
DP
6026 return false;
6027
6028 if (TREE_CODE (rhs) == SSA_NAME)
6029 {
6030 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
88088c03 6031 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
b52485c6
DP
6032 return false;
6033 }
325217ed
CF
6034 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6035 && TREE_CODE (rhs) != FIXED_CST)
b52485c6
DP
6036 return false;
6037
6038 return true;
6039}
6040
6041/* vectorizable_condition.
6042
6043 Check if STMT is conditional modify expression that can be vectorized.
6044 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6045 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6046 at BSI.
6047
6048 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6049
6050bool
6051vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
6052{
6053 tree scalar_dest = NULL_TREE;
6054 tree vec_dest = NULL_TREE;
6055 tree op = NULL_TREE;
6056 tree cond_expr, then_clause, else_clause;
6057 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6058 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6059 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
6060 tree vec_compare, vec_cond_expr;
6061 tree new_temp;
6062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6063 enum machine_mode vec_mode;
88088c03
DN
6064 tree def;
6065 enum vect_def_type dt;
89d67cca
DN
6066 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6067 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6068
6069 gcc_assert (ncopies >= 1);
6070 if (ncopies > 1)
6071 return false; /* FORNOW */
b52485c6
DP
6072
6073 if (!STMT_VINFO_RELEVANT_P (stmt_info))
6074 return false;
6075
60555ced
DN
6076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
6077 return false;
88088c03 6078
805e2059
IR
6079 /* FORNOW: SLP not supported. */
6080 if (STMT_SLP_TYPE (stmt_info))
6081 return false;
6082
60555ced 6083 /* FORNOW: not yet supported. */
88088c03
DN
6084 if (STMT_VINFO_LIVE_P (stmt_info))
6085 {
00518cb1 6086 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
6087 fprintf (vect_dump, "value used after loop.");
6088 return false;
6089 }
6090
60555ced 6091 /* Is vectorizable conditional operation? */
07beea0d 6092 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
b52485c6
DP
6093 return false;
6094
07beea0d 6095 op = GIMPLE_STMT_OPERAND (stmt, 1);
b52485c6
DP
6096
6097 if (TREE_CODE (op) != COND_EXPR)
6098 return false;
6099
6100 cond_expr = TREE_OPERAND (op, 0);
6101 then_clause = TREE_OPERAND (op, 1);
6102 else_clause = TREE_OPERAND (op, 2);
6103
10b96810
AP
6104 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
6105 return false;
6106
75bfa678
RG
6107 /* We do not handle two different vector types for the condition
6108 and the values. */
6109 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
6110 return false;
6111
b52485c6
DP
6112 if (TREE_CODE (then_clause) == SSA_NAME)
6113 {
6114 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
88088c03
DN
6115 if (!vect_is_simple_use (then_clause, loop_vinfo,
6116 &then_def_stmt, &def, &dt))
b52485c6
DP
6117 return false;
6118 }
6119 else if (TREE_CODE (then_clause) != INTEGER_CST
325217ed
CF
6120 && TREE_CODE (then_clause) != REAL_CST
6121 && TREE_CODE (then_clause) != FIXED_CST)
b52485c6
DP
6122 return false;
6123
6124 if (TREE_CODE (else_clause) == SSA_NAME)
6125 {
6126 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
88088c03
DN
6127 if (!vect_is_simple_use (else_clause, loop_vinfo,
6128 &else_def_stmt, &def, &dt))
b52485c6
DP
6129 return false;
6130 }
6131 else if (TREE_CODE (else_clause) != INTEGER_CST
325217ed
CF
6132 && TREE_CODE (else_clause) != REAL_CST
6133 && TREE_CODE (else_clause) != FIXED_CST)
b52485c6
DP
6134 return false;
6135
6136
6137 vec_mode = TYPE_MODE (vectype);
6138
6139 if (!vec_stmt)
6140 {
6141 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6142 return expand_vec_cond_expr_p (op, vec_mode);
6143 }
6144
6145 /* Transform */
6146
6147 /* Handle def. */
07beea0d 6148 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
b52485c6
DP
6149 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6150
6151 /* Handle cond expr. */
6152 vec_cond_lhs =
61d3cdbb 6153 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
b52485c6 6154 vec_cond_rhs =
61d3cdbb
DN
6155 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6156 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6157 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
b52485c6
DP
6158
6159 /* Arguments are ready. create the new vector stmt. */
6160 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6161 vec_cond_lhs, vec_cond_rhs);
b4257cfc
RG
6162 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6163 vec_compare, vec_then_clause, vec_else_clause);
b52485c6 6164
ebb07520 6165 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
b52485c6 6166 new_temp = make_ssa_name (vec_dest, *vec_stmt);
07beea0d 6167 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
b52485c6
DP
6168 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
6169
6170 return true;
6171}
f7064d11 6172
805e2059 6173
f7064d11
DN
6174/* Function vect_transform_stmt.
6175
6176 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6177
805e2059
IR
6178static bool
6179vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store,
6180 slp_tree slp_node)
f7064d11
DN
6181{
6182 bool is_store = false;
6183 tree vec_stmt = NULL_TREE;
6184 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
20f06221 6185 tree orig_stmt_in_pattern;
f7064d11
DN
6186 bool done;
6187
60555ced 6188 switch (STMT_VINFO_TYPE (stmt_info))
f7064d11 6189 {
60555ced 6190 case type_demotion_vec_info_type:
805e2059 6191 gcc_assert (!slp_node);
60555ced
DN
6192 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
6193 gcc_assert (done);
6194 break;
8115817b 6195
60555ced 6196 case type_promotion_vec_info_type:
805e2059 6197 gcc_assert (!slp_node);
60555ced
DN
6198 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
6199 gcc_assert (done);
6200 break;
6201
6202 case type_conversion_vec_info_type:
805e2059 6203 done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node);
60555ced
DN
6204 gcc_assert (done);
6205 break;
6206
cd38ca7f 6207 case induc_vec_info_type:
805e2059 6208 gcc_assert (!slp_node);
cd38ca7f
DN
6209 done = vectorizable_induction (stmt, bsi, &vec_stmt);
6210 gcc_assert (done);
6211 break;
6212
60555ced 6213 case op_vec_info_type:
805e2059 6214 done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node);
60555ced
DN
6215 gcc_assert (done);
6216 break;
6217
6218 case assignment_vec_info_type:
805e2059 6219 done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node);
60555ced
DN
6220 gcc_assert (done);
6221 break;
6222
6223 case load_vec_info_type:
805e2059 6224 done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node);
60555ced
DN
6225 gcc_assert (done);
6226 break;
6227
6228 case store_vec_info_type:
805e2059 6229 done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node);
60555ced 6230 gcc_assert (done);
805e2059 6231 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
60555ced
DN
6232 {
6233 /* In case of interleaving, the whole chain is vectorized when the
6234 last store in the chain is reached. Store stmts before the last
6235 one are skipped, and there vec_stmt_info shouldn't be freed
6236 meanwhile. */
6237 *strided_store = true;
6238 if (STMT_VINFO_VEC_STMT (stmt_info))
6239 is_store = true;
98b44b0e 6240 }
60555ced
DN
6241 else
6242 is_store = true;
6243 break;
88088c03 6244
60555ced 6245 case condition_vec_info_type:
805e2059 6246 gcc_assert (!slp_node);
60555ced
DN
6247 done = vectorizable_condition (stmt, bsi, &vec_stmt);
6248 gcc_assert (done);
6249 break;
88088c03 6250
60555ced 6251 case call_vec_info_type:
805e2059 6252 gcc_assert (!slp_node);
60555ced
DN
6253 done = vectorizable_call (stmt, bsi, &vec_stmt);
6254 break;
2505a3f2 6255
60555ced 6256 case reduc_vec_info_type:
805e2059 6257 gcc_assert (!slp_node);
60555ced
DN
6258 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
6259 gcc_assert (done);
6260 break;
88088c03 6261
60555ced
DN
6262 default:
6263 if (!STMT_VINFO_LIVE_P (stmt_info))
98b44b0e 6264 {
60555ced
DN
6265 if (vect_print_dump_info (REPORT_DETAILS))
6266 fprintf (vect_dump, "stmt not supported.");
6267 gcc_unreachable ();
98b44b0e 6268 }
88088c03 6269 }
b52485c6 6270
60555ced
DN
6271 if (STMT_VINFO_LIVE_P (stmt_info)
6272 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
88088c03 6273 {
60555ced
DN
6274 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
6275 gcc_assert (done);
6276 }
6277
6278 if (vec_stmt)
6279 {
6280 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6281 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6282 if (orig_stmt_in_pattern)
6283 {
6284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6285 /* STMT was inserted by the vectorizer to replace a computation idiom.
6286 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6287 computed this idiom. We need to record a pointer to VEC_STMT in
6288 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6289 documentation of vect_pattern_recog. */
6290 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6291 {
6292 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6293 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6294 }
6295 }
f7064d11
DN
6296 }
6297
88088c03 6298 return is_store;
f7064d11
DN
6299}
6300
6301
6302/* This function builds ni_name = number of iterations loop executes
6303 on the loop preheader. */
6304
6305static tree
6306vect_build_loop_niters (loop_vec_info loop_vinfo)
6307{
6308 tree ni_name, stmt, var;
6309 edge pe;
6310 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6311 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6312
6313 var = create_tmp_var (TREE_TYPE (ni), "niters");
f004ab02 6314 add_referenced_var (var);
f7064d11
DN
6315 ni_name = force_gimple_operand (ni, &stmt, false, var);
6316
6317 pe = loop_preheader_edge (loop);
6318 if (stmt)
6319 {
6320 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6321 gcc_assert (!new_bb);
6322 }
6323
6324 return ni_name;
6325}
6326
6327
6328/* This function generates the following statements:
6329
6330 ni_name = number of iterations loop executes
6331 ratio = ni_name / vf
6332 ratio_mult_vf_name = ratio * vf
6333
6334 and places them at the loop preheader edge. */
6335
6336static void
6337vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6338 tree *ni_name_ptr,
6339 tree *ratio_mult_vf_name_ptr,
6340 tree *ratio_name_ptr)
6341{
6342
6343 edge pe;
6344 basic_block new_bb;
6345 tree stmt, ni_name;
6346 tree var;
6347 tree ratio_name;
6348 tree ratio_mult_vf_name;
6349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6350 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6351 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
99c09897 6352 tree log_vf;
f7064d11
DN
6353
6354 pe = loop_preheader_edge (loop);
6355
6356 /* Generate temporary variable that contains
6357 number of iterations loop executes. */
6358
6359 ni_name = vect_build_loop_niters (loop_vinfo);
99c09897 6360 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
f7064d11
DN
6361
6362 /* Create: ratio = ni >> log2(vf) */
6363
80b4a8d9
ZD
6364 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6365 if (!is_gimple_val (ratio_name))
6366 {
6367 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6368 add_referenced_var (var);
f7064d11 6369
80b4a8d9
ZD
6370 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
6371 pe = loop_preheader_edge (loop);
6372 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6373 gcc_assert (!new_bb);
6374 }
f7064d11
DN
6375
6376 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6377
80b4a8d9
ZD
6378 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6379 ratio_name, log_vf);
6380 if (!is_gimple_val (ratio_mult_vf_name))
6381 {
6382 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6383 add_referenced_var (var);
f7064d11 6384
80b4a8d9
ZD
6385 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
6386 true, var);
6387 pe = loop_preheader_edge (loop);
6388 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6389 gcc_assert (!new_bb);
6390 }
f7064d11
DN
6391
6392 *ni_name_ptr = ni_name;
6393 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6394 *ratio_name_ptr = ratio_name;
6395
6396 return;
6397}
6398
6399
6400/* Function vect_update_ivs_after_vectorizer.
6401
6402 "Advance" the induction variables of LOOP to the value they should take
6403 after the execution of LOOP. This is currently necessary because the
6404 vectorizer does not handle induction variables that are used after the
6405 loop. Such a situation occurs when the last iterations of LOOP are
6406 peeled, because:
6407 1. We introduced new uses after LOOP for IVs that were not originally used
6408 after LOOP: the IVs of LOOP are now used by an epilog loop.
6409 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6410 times, whereas the loop IVs should be bumped N times.
6411
6412 Input:
6413 - LOOP - a loop that is going to be vectorized. The last few iterations
6414 of LOOP were peeled.
6415 - NITERS - the number of iterations that LOOP executes (before it is
6416 vectorized). i.e, the number of times the ivs should be bumped.
6417 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6418 coming out from LOOP on which there are uses of the LOOP ivs
6419 (this is the path from LOOP->exit to epilog_loop->preheader).
6420
6421 The new definitions of the ivs are placed in LOOP->exit.
6422 The phi args associated with the edge UPDATE_E in the bb
6423 UPDATE_E->dest are updated accordingly.
6424
6425 Assumption 1: Like the rest of the vectorizer, this function assumes
6426 a single loop exit that has a single predecessor.
6427
6428 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6429 organized in the same order.
6430
6431 Assumption 3: The access function of the ivs is simple enough (see
6432 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6433
6434 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6435 coming out of LOOP on which the ivs of LOOP are used (this is the path
6436 that leads to the epilog loop; other paths skip the epilog loop). This
6437 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6438 needs to have its phis updated.
6439 */
6440
6441static void
6442vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6443 edge update_e)
6444{
6445 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ac8f6c69 6446 basic_block exit_bb = single_exit (loop)->dest;
f7064d11
DN
6447 tree phi, phi1;
6448 basic_block update_bb = update_e->dest;
6449
6450 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6451
6452 /* Make sure there exists a single-predecessor exit bb: */
c5cbcccf 6453 gcc_assert (single_pred_p (exit_bb));
f7064d11
DN
6454
6455 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
6456 phi && phi1;
6457 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
6458 {
6459 tree access_fn = NULL;
6460 tree evolution_part;
6461 tree init_expr;
6462 tree step_expr;
c6540bde 6463 tree var, ni, ni_name;
f7064d11
DN
6464 block_stmt_iterator last_bsi;
6465
00518cb1 6466 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
6467 {
6468 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6469 print_generic_expr (vect_dump, phi, TDF_SLIM);
6470 }
6471
f7064d11
DN
6472 /* Skip virtual phi's. */
6473 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6474 {
00518cb1 6475 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
6476 fprintf (vect_dump, "virtual phi. skip.");
6477 continue;
6478 }
6479
61d3cdbb
DN
6480 /* Skip reduction phis. */
6481 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6482 {
00518cb1 6483 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
6484 fprintf (vect_dump, "reduc phi. skip.");
6485 continue;
6486 }
6487
f7064d11
DN
6488 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6489 gcc_assert (access_fn);
6490 evolution_part =
6491 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6492 gcc_assert (evolution_part != NULL_TREE);
6493
6494 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6495 of degree >= 2 or exponential. */
6496 gcc_assert (!tree_is_chrec (evolution_part));
6497
6498 step_expr = evolution_part;
6499 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
6500 loop->num));
6501
5be014d5
AP
6502 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
6503 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
6504 init_expr,
6505 fold_convert (sizetype,
6506 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
6507 niters, step_expr)));
6508 else
6509 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
6510 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
6511 fold_convert (TREE_TYPE (init_expr),
6512 niters),
6513 step_expr),
6514 init_expr);
6515
6516
f7064d11
DN
6517
6518 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
f004ab02 6519 add_referenced_var (var);
f7064d11 6520
f7064d11 6521 last_bsi = bsi_last (exit_bb);
c6540bde
ZD
6522 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
6523 true, BSI_SAME_STMT);
6524
f7064d11 6525 /* Fix phi expressions in the successor bb. */
f7064d11
DN
6526 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
6527 }
6528}
6529
749cc4b1
HJ
6530/* Return the more conservative threshold between the
6531 min_profitable_iters returned by the cost model and the user
6532 specified threshold, if provided. */
6533
6534static unsigned int
6535conservative_cost_threshold (loop_vec_info loop_vinfo,
6536 int min_profitable_iters)
6537{
6538 unsigned int th;
6539 int min_scalar_loop_bound;
6540
6541 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
6542 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
6543
6544 /* Use the cost model only if it is more conservative than user specified
6545 threshold. */
6546 th = (unsigned) min_scalar_loop_bound;
6547 if (min_profitable_iters
6548 && (!min_scalar_loop_bound
6549 || min_profitable_iters > min_scalar_loop_bound))
6550 th = (unsigned) min_profitable_iters;
6551
f5adacc5 6552 if (th && vect_print_dump_info (REPORT_COST))
749cc4b1
HJ
6553 fprintf (vect_dump, "Vectorization may not be profitable.");
6554
6555 return th;
6556}
f7064d11
DN
6557
6558/* Function vect_do_peeling_for_loop_bound
6559
6560 Peel the last iterations of the loop represented by LOOP_VINFO.
6561 The peeled iterations form a new epilog loop. Given that the loop now
6562 iterates NITERS times, the new epilog loop iterates
6563 NITERS % VECTORIZATION_FACTOR times.
6564
6565 The original loop will later be made to iterate
6566 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6567
6568static void
d73be268 6569vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
f7064d11 6570{
f7064d11
DN
6571 tree ni_name, ratio_mult_vf_name;
6572 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6573 struct loop *new_loop;
6574 edge update_e;
70388d94 6575 basic_block preheader;
f7064d11 6576 int loop_num;
749cc4b1
HJ
6577 bool check_profitability = false;
6578 unsigned int th = 0;
792ed98b 6579 int min_profitable_iters;
f7064d11 6580
00518cb1 6581 if (vect_print_dump_info (REPORT_DETAILS))
bb748329 6582 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
f7064d11 6583
9498a22f
RH
6584 initialize_original_copy_tables ();
6585
f7064d11
DN
6586 /* Generate the following variables on the preheader of original loop:
6587
6588 ni_name = number of iteration the original loop executes
6589 ratio = ni_name / vf
6590 ratio_mult_vf_name = ratio * vf */
6591 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
6592 &ratio_mult_vf_name, ratio);
6593
f7064d11 6594 loop_num = loop->num;
792ed98b 6595
749cc4b1
HJ
6596 /* If cost model check not done during versioning and
6597 peeling for alignment. */
6598 if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
6599 && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
6600 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
6601 {
6602 check_profitability = true;
792ed98b 6603
749cc4b1
HJ
6604 /* Get profitability threshold for vectorized loop. */
6605 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
792ed98b 6606
749cc4b1
HJ
6607 th = conservative_cost_threshold (loop_vinfo,
6608 min_profitable_iters);
6609 }
792ed98b 6610
d73be268 6611 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
792ed98b 6612 ratio_mult_vf_name, ni_name, false,
749cc4b1 6613 th, check_profitability);
f7064d11
DN
6614 gcc_assert (new_loop);
6615 gcc_assert (loop_num == loop->num);
61d3cdbb 6616#ifdef ENABLE_CHECKING
f7064d11
DN
6617 slpeel_verify_cfg_after_peeling (loop, new_loop);
6618#endif
6619
6620 /* A guard that controls whether the new_loop is to be executed or skipped
6621 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6622 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6623 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6624 is on the path where the LOOP IVs are used and need to be updated. */
6625
70388d94 6626 preheader = loop_preheader_edge (new_loop)->src;
ac8f6c69 6627 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
70388d94 6628 update_e = EDGE_PRED (preheader, 0);
f7064d11 6629 else
70388d94 6630 update_e = EDGE_PRED (preheader, 1);
f7064d11
DN
6631
6632 /* Update IVs of original loop as if they were advanced
6633 by ratio_mult_vf_name steps. */
6634 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
6635
6636 /* After peeling we have to reset scalar evolution analyzer. */
6637 scev_reset ();
6638
9498a22f 6639 free_original_copy_tables ();
f7064d11
DN
6640}
6641
6642
6643/* Function vect_gen_niters_for_prolog_loop
6644
6645 Set the number of iterations for the loop represented by LOOP_VINFO
6646 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5f55a1ba 6647 and the misalignment of DR - the data reference recorded in
f7064d11
DN
6648 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6649 this loop, the data reference DR will refer to an aligned location.
6650
6651 The following computation is generated:
6652
5f55a1ba
DN
6653 If the misalignment of DR is known at compile time:
6654 addr_mis = int mis = DR_MISALIGNMENT (dr);
6655 Else, compute address misalignment in bytes:
6656 addr_mis = addr & (vectype_size - 1)
f7064d11
DN
6657
6658 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6659
6660 (elem_size = element type size; an element is the scalar element
98b44b0e
IR
6661 whose type is the inner type of the vectype)
6662
6663 For interleaving,
6664
6665 prolog_niters = min ( LOOP_NITERS ,
6666 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6667 where group_size is the size of the interleaved group.
cb9ed5d7
DN
6668
6669 The above formulas assume that VF == number of elements in the vector. This
6670 may not hold when there are multiple-types in the loop.
6671 In this case, for some data-references in the loop the VF does not represent
6672 the number of elements that fit in the vector. Therefore, instead of VF we
6673 use TYPE_VECTOR_SUBPARTS. */
f7064d11
DN
6674
6675static tree
6676vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
6677{
6678 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
f7064d11
DN
6679 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6680 tree var, stmt;
6681 tree iters, iters_name;
6682 edge pe;
6683 basic_block new_bb;
6684 tree dr_stmt = DR_STMT (dr);
6685 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
6686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6687 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
f7064d11 6688 tree niters_type = TREE_TYPE (loop_niters);
98b44b0e
IR
6689 int group_size = 1;
6690 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
cb9ed5d7 6691 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
98b44b0e 6692
805e2059 6693 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
98b44b0e 6694 {
2f8e468b 6695 /* For interleaved access element size must be multiplied by the size of
98b44b0e
IR
6696 the interleaved group. */
6697 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
6698 DR_GROUP_FIRST_DR (stmt_info)));
6699 element_size *= group_size;
6700 }
f7064d11
DN
6701
6702 pe = loop_preheader_edge (loop);
f7064d11 6703
5f55a1ba
DN
6704 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
6705 {
6706 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5f55a1ba 6707 int elem_misalign = byte_misalign / element_size;
f7064d11 6708
00518cb1 6709 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba 6710 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
98b44b0e 6711 iters = build_int_cst (niters_type,
cb9ed5d7 6712 (nelements - elem_misalign)&(nelements/group_size-1));
5f55a1ba
DN
6713 }
6714 else
6715 {
6716 tree new_stmts = NULL_TREE;
468c2ac0
DN
6717 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
6718 &new_stmts, NULL_TREE, loop);
5f55a1ba
DN
6719 tree ptr_type = TREE_TYPE (start_addr);
6720 tree size = TYPE_SIZE (ptr_type);
6721 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
6722 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
6723 tree elem_size_log =
cb9ed5d7
DN
6724 build_int_cst (type, exact_log2 (vectype_align/nelements));
6725 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
6726 tree nelements_tree = build_int_cst (type, nelements);
5f55a1ba
DN
6727 tree byte_misalign;
6728 tree elem_misalign;
6729
6730 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
6731 gcc_assert (!new_bb);
f7064d11 6732
5f55a1ba
DN
6733 /* Create: byte_misalign = addr & (vectype_size - 1) */
6734 byte_misalign =
5be014d5 6735 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
f7064d11 6736
5f55a1ba
DN
6737 /* Create: elem_misalign = byte_misalign / element_size */
6738 elem_misalign =
80b4a8d9 6739 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5f55a1ba 6740
cb9ed5d7
DN
6741 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6742 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
6743 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5f55a1ba
DN
6744 iters = fold_convert (niters_type, iters);
6745 }
6746
f7064d11
DN
6747 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6748 /* If the loop bound is known at compile time we already verified that it is
6749 greater than vf; since the misalignment ('iters') is at most vf, there's
6750 no need to generate the MIN_EXPR in this case. */
6751 if (TREE_CODE (loop_niters) != INTEGER_CST)
80b4a8d9 6752 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
f7064d11 6753
00518cb1 6754 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba
DN
6755 {
6756 fprintf (vect_dump, "niters for prolog loop: ");
6757 print_generic_expr (vect_dump, iters, TDF_SLIM);
6758 }
6759
f7064d11 6760 var = create_tmp_var (niters_type, "prolog_loop_niters");
f004ab02 6761 add_referenced_var (var);
f7064d11
DN
6762 iters_name = force_gimple_operand (iters, &stmt, false, var);
6763
6764 /* Insert stmt on loop preheader edge. */
f7064d11
DN
6765 if (stmt)
6766 {
6767 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6768 gcc_assert (!new_bb);
6769 }
6770
6771 return iters_name;
6772}
6773
6774
5f55a1ba 6775/* Function vect_update_init_of_dr
f7064d11
DN
6776
6777 NITERS iterations were peeled from LOOP. DR represents a data reference
6778 in LOOP. This function updates the information recorded in DR to
6779 account for the fact that the first NITERS iterations had already been
86a07404 6780 executed. Specifically, it updates the OFFSET field of DR. */
f7064d11
DN
6781
6782static void
5f55a1ba 6783vect_update_init_of_dr (struct data_reference *dr, tree niters)
f7064d11 6784{
86a07404 6785 tree offset = DR_OFFSET (dr);
f7064d11 6786
86a07404 6787 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
987b67bc 6788 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
86a07404 6789 DR_OFFSET (dr) = offset;
f7064d11
DN
6790}
6791
6792
6793/* Function vect_update_inits_of_drs
6794
6795 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6796 This function updates the information recorded for the data references in
6797 the loop to account for the fact that the first NITERS iterations had
98120f62
UB
6798 already been executed. Specifically, it updates the initial_condition of
6799 the access_function of all the data_references in the loop. */
f7064d11
DN
6800
6801static void
6802vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
6803{
6804 unsigned int i;
ebf78a47
SP
6805 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
6806 struct data_reference *dr;
f7064d11 6807
98120f62 6808 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
6809 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
6810
ebf78a47
SP
6811 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
6812 vect_update_init_of_dr (dr, niters);
f7064d11
DN
6813}
6814
6815
6816/* Function vect_do_peeling_for_alignment
6817
6818 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6819 'niters' is set to the misalignment of one of the data references in the
6820 loop, thereby forcing it to refer to an aligned location at the beginning
6821 of the execution of this loop. The data reference for which we are
6822 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6823
6824static void
d73be268 6825vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
f7064d11
DN
6826{
6827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6828 tree niters_of_prolog_loop, ni_name;
6829 tree n_iters;
6830 struct loop *new_loop;
749cc4b1
HJ
6831 bool check_profitability = false;
6832 unsigned int th = 0;
6833 int min_profitable_iters;
f7064d11 6834
00518cb1 6835 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
6836 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
6837
9498a22f
RH
6838 initialize_original_copy_tables ();
6839
f7064d11
DN
6840 ni_name = vect_build_loop_niters (loop_vinfo);
6841 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
6842
749cc4b1
HJ
6843
6844 /* If cost model check not done during versioning. */
6845 if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
6846 && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
6847 {
6848 check_profitability = true;
6849
6850 /* Get profitability threshold for vectorized loop. */
6851 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
6852
6853 th = conservative_cost_threshold (loop_vinfo,
6854 min_profitable_iters);
6855 }
6856
f7064d11 6857 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
749cc4b1
HJ
6858 new_loop =
6859 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
6860 niters_of_prolog_loop, ni_name, true,
6861 th, check_profitability);
6862
f7064d11 6863 gcc_assert (new_loop);
61d3cdbb 6864#ifdef ENABLE_CHECKING
f7064d11
DN
6865 slpeel_verify_cfg_after_peeling (new_loop, loop);
6866#endif
6867
6868 /* Update number of times loop executes. */
6869 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
987b67bc
KH
6870 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
6871 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
f7064d11
DN
6872
6873 /* Update the init conditions of the access functions of all data refs. */
6874 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
6875
6876 /* After peeling we have to reset scalar evolution analyzer. */
6877 scev_reset ();
6878
9498a22f 6879 free_original_copy_tables ();
f7064d11
DN
6880}
6881
6882
c12cc930
KB
6883/* Function vect_create_cond_for_align_checks.
6884
6885 Create a conditional expression that represents the alignment checks for
6886 all of data references (array element references) whose alignment must be
6887 checked at runtime.
6888
6889 Input:
749cc4b1
HJ
6890 COND_EXPR - input conditional expression. New conditions will be chained
6891 with logical AND operation.
c12cc930
KB
6892 LOOP_VINFO - two fields of the loop information are used.
6893 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6894 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6895
6896 Output:
6897 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6898 expression.
6899 The returned value is the conditional expression to be used in the if
6900 statement that controls which version of the loop gets executed at runtime.
6901
6902 The algorithm makes two assumptions:
6903 1) The number of bytes "n" in a vector is a power of 2.
6904 2) An address "a" is aligned if a%n is zero and that this
6905 test can be done as a&(n-1) == 0. For example, for 16
6906 byte vectors the test is a&0xf == 0. */
6907
749cc4b1 6908static void
c12cc930 6909vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
749cc4b1 6910 tree *cond_expr,
c12cc930
KB
6911 tree *cond_expr_stmt_list)
6912{
468c2ac0 6913 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
c12cc930
KB
6914 VEC(tree,heap) *may_misalign_stmts
6915 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
ebb07520 6916 tree ref_stmt, tmp;
c12cc930
KB
6917 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
6918 tree mask_cst;
6919 unsigned int i;
6920 tree psize;
6921 tree int_ptrsize_type;
6922 char tmp_name[20];
6923 tree or_tmp_name = NULL_TREE;
6924 tree and_tmp, and_tmp_name, and_stmt;
6925 tree ptrsize_zero;
749cc4b1 6926 tree part_cond_expr;
c12cc930
KB
6927
6928 /* Check that mask is one less than a power of 2, i.e., mask is
6929 all zeros followed by all ones. */
6930 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
6931
6932 /* CHECKME: what is the best integer or unsigned type to use to hold a
6933 cast from a pointer value? */
6934 psize = TYPE_SIZE (ptr_type_node);
6935 int_ptrsize_type
6936 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
6937
6938 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6939 of the first vector of the i'th data reference. */
6940
6941 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
6942 {
6943 tree new_stmt_list = NULL_TREE;
6944 tree addr_base;
6945 tree addr_tmp, addr_tmp_name, addr_stmt;
6946 tree or_tmp, new_or_tmp_name, or_stmt;
6947
6948 /* create: addr_tmp = (int)(address_of_first_vector) */
6949 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
468c2ac0 6950 &new_stmt_list, NULL_TREE, loop);
c12cc930
KB
6951
6952 if (new_stmt_list != NULL_TREE)
6953 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
6954
6955 sprintf (tmp_name, "%s%d", "addr2int", i);
6956 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
f004ab02 6957 add_referenced_var (addr_tmp);
c12cc930
KB
6958 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
6959 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
ebb07520 6960 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
c12cc930
KB
6961 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
6962 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
6963
6964 /* The addresses are OR together. */
6965
6966 if (or_tmp_name != NULL_TREE)
6967 {
6968 /* create: or_tmp = or_tmp | addr_tmp */
6969 sprintf (tmp_name, "%s%d", "orptrs", i);
6970 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
f004ab02 6971 add_referenced_var (or_tmp);
c12cc930 6972 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
ebb07520
RS
6973 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
6974 or_tmp_name, addr_tmp_name);
6975 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
c12cc930
KB
6976 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
6977 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
6978 or_tmp_name = new_or_tmp_name;
6979 }
6980 else
6981 or_tmp_name = addr_tmp_name;
6982
6983 } /* end for i */
6984
6985 mask_cst = build_int_cst (int_ptrsize_type, mask);
6986
6987 /* create: and_tmp = or_tmp & mask */
6988 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
f004ab02 6989 add_referenced_var (and_tmp);
c12cc930
KB
6990 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
6991
ebb07520
RS
6992 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
6993 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
c12cc930
KB
6994 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
6995 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
6996
6997 /* Make and_tmp the left operand of the conditional test against zero.
c0220ea4 6998 if and_tmp has a nonzero bit then some address is unaligned. */
c12cc930 6999 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
749cc4b1
HJ
7000 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
7001 and_tmp_name, ptrsize_zero);
7002 if (*cond_expr)
7003 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
7004 *cond_expr, part_cond_expr);
7005 else
7006 *cond_expr = part_cond_expr;
c12cc930
KB
7007}
7008
bc1edb77
VK
7009/* Function vect_vfa_segment_size.
7010
7011 Create an expression that computes the size of segment
7012 that will be accessed for a data reference. The functions takes into
7013 account that realignment loads may access one more vector.
7014
7015 Input:
7016 DR: The data reference.
7017 VECT_FACTOR: vectorization factor.
7018
15dc95cb 7019 Return an expression whose value is the size of segment which will be
bc1edb77
VK
7020 accessed by DR. */
7021
7022static tree
7023vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
7024{
42cbdeac
VK
7025 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
7026 DR_STEP (dr), vect_factor);
bc1edb77 7027
468c2ac0 7028 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
bc1edb77 7029 {
42cbdeac
VK
7030 tree vector_size = TYPE_SIZE_UNIT
7031 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
bc1edb77 7032
42cbdeac
VK
7033 segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
7034 segment_length, vector_size);
bc1edb77 7035 }
42cbdeac 7036 return fold_convert (sizetype, segment_length);
bc1edb77
VK
7037}
7038
7039/* Function vect_create_cond_for_alias_checks.
7040
7041 Create a conditional expression that represents the run-time checks for
7042 overlapping of address ranges represented by a list of data references
7043 relations passed as input.
7044
7045 Input:
7046 COND_EXPR - input conditional expression. New conditions will be chained
749cc4b1 7047 with logical AND operation.
bc1edb77
VK
7048 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
7049 to be checked.
7050
7051 Output:
7052 COND_EXPR - conditional expression.
7053 COND_EXPR_STMT_LIST - statements needed to construct the conditional
7054 expression.
42cbdeac
VK
7055
7056
bc1edb77
VK
7057 The returned value is the conditional expression to be used in the if
7058 statement that controls which version of the loop gets executed at runtime.
7059*/
7060
7061static void
7062vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
7063 tree * cond_expr,
7064 tree * cond_expr_stmt_list)
7065{
468c2ac0 7066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
bc1edb77
VK
7067 VEC (ddr_p, heap) * may_alias_ddrs =
7068 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
7069 tree vect_factor =
7070 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
7071
7072 ddr_p ddr;
7073 unsigned int i;
7074 tree part_cond_expr;
7075
7076 /* Create expression
7077 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
7078 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
7079 &&
7080 ...
7081 &&
7082 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
7083 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
7084
7085 if (VEC_empty (ddr_p, may_alias_ddrs))
7086 return;
7087
7088 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
7089 {
42cbdeac
VK
7090 struct data_reference *dr_a, *dr_b;
7091 tree dr_group_first_a, dr_group_first_b;
7092 tree addr_base_a, addr_base_b;
7093 tree segment_length_a, segment_length_b;
7094 tree stmt_a, stmt_b;
bc1edb77 7095
42cbdeac
VK
7096 dr_a = DDR_A (ddr);
7097 stmt_a = DR_STMT (DDR_A (ddr));
7098 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
7099 if (dr_group_first_a)
7100 {
7101 stmt_a = dr_group_first_a;
7102 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
7103 }
7104
7105 dr_b = DDR_B (ddr);
7106 stmt_b = DR_STMT (DDR_B (ddr));
7107 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
7108 if (dr_group_first_b)
7109 {
7110 stmt_b = dr_group_first_b;
7111 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
7112 }
7113
7114 addr_base_a =
bc1edb77 7115 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
468c2ac0 7116 NULL_TREE, loop);
42cbdeac 7117 addr_base_b =
bc1edb77 7118 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
468c2ac0 7119 NULL_TREE, loop);
bc1edb77 7120
42cbdeac
VK
7121 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
7122 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
bc1edb77
VK
7123
7124 if (vect_print_dump_info (REPORT_DR_DETAILS))
7125 {
7126 fprintf (vect_dump,
7127 "create runtime check for data references ");
42cbdeac 7128 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
bc1edb77 7129 fprintf (vect_dump, " and ");
42cbdeac 7130 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
bc1edb77
VK
7131 }
7132
7133
7134 part_cond_expr =
7135 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
7136 fold_build2 (LT_EXPR, boolean_type_node,
7137 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
7138 addr_base_a,
7139 segment_length_a),
7140 addr_base_b),
7141 fold_build2 (LT_EXPR, boolean_type_node,
7142 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
7143 addr_base_b,
7144 segment_length_b),
7145 addr_base_a));
7146
7147 if (*cond_expr)
7148 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
7149 *cond_expr, part_cond_expr);
7150 else
7151 *cond_expr = part_cond_expr;
7152 }
7153 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7154 fprintf (vect_dump, "created %u versioning for alias checks.\n",
7155 VEC_length (ddr_p, may_alias_ddrs));
7156
7157}
c12cc930 7158
42cbdeac
VK
7159/* Function vect_loop_versioning.
7160
7161 If the loop has data references that may or may not be aligned or/and
7162 has data reference relations whose independence was not proven then
7163 two versions of the loop need to be generated, one which is vectorized
7164 and one which isn't. A test is then generated to control which of the
7165 loops is executed. The test checks for the alignment of all of the
7166 data references that may or may not be aligned. An additional
7167 sequence of runtime tests is generated for each pairs of DDRs whose
7168 independence was not proven. The vectorized version of loop is
749cc4b1
HJ
7169 executed only if both alias and alignment tests are passed.
7170
7171 The test generated to check which version of loop is executed
7172 is modified to also check for profitability as indicated by the
7173 cost model initially. */
42cbdeac
VK
7174
7175static void
7176vect_loop_versioning (loop_vec_info loop_vinfo)
7177{
7178 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7179 struct loop *nloop;
7180 tree cond_expr = NULL_TREE;
7181 tree cond_expr_stmt_list = NULL_TREE;
7182 basic_block condition_bb;
7183 block_stmt_iterator cond_exp_bsi;
7184 basic_block merge_bb;
7185 basic_block new_exit_bb;
7186 edge new_exit_e, e;
7187 tree orig_phi, new_phi, arg;
7188 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7189 tree gimplify_stmt_list;
749cc4b1
HJ
7190 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
7191 int min_profitable_iters = 0;
7192 unsigned int th;
42cbdeac 7193
749cc4b1
HJ
7194 /* Get profitability threshold for vectorized loop. */
7195 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
7196
7197 th = conservative_cost_threshold (loop_vinfo,
7198 min_profitable_iters);
7199
7200 cond_expr =
7201 build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
7202 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
7203
7204 cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
7205 false, NULL_TREE);
42cbdeac
VK
7206
7207 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
749cc4b1
HJ
7208 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
7209 &cond_expr_stmt_list);
42cbdeac
VK
7210
7211 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
749cc4b1
HJ
7212 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
7213 &cond_expr_stmt_list);
42cbdeac
VK
7214
7215 cond_expr =
7216 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7217 cond_expr =
7218 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
7219 NULL_TREE);
7220 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
7221
7222 initialize_original_copy_tables ();
7223 nloop = loop_version (loop, cond_expr, &condition_bb,
7224 prob, prob, REG_BR_PROB_BASE - prob, true);
7225 free_original_copy_tables();
7226
7227 /* Loop versioning violates an assumption we try to maintain during
7228 vectorization - that the loop exit block has a single predecessor.
7229 After versioning, the exit block of both loop versions is the same
7230 basic block (i.e. it has two predecessors). Just in order to simplify
7231 following transformations in the vectorizer, we fix this situation
7232 here by adding a new (empty) block on the exit-edge of the loop,
7233 with the proper loop-exit phis to maintain loop-closed-form. */
7234
7235 merge_bb = single_exit (loop)->dest;
7236 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7237 new_exit_bb = split_edge (single_exit (loop));
7238 new_exit_e = single_exit (loop);
7239 e = EDGE_SUCC (new_exit_bb, 0);
7240
7241 for (orig_phi = phi_nodes (merge_bb); orig_phi;
7242 orig_phi = PHI_CHAIN (orig_phi))
7243 {
7244 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7245 new_exit_bb);
7246 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7247 add_phi_arg (new_phi, arg, new_exit_e);
7248 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7249 }
7250
7251 /* End loop-exit-fixes after versioning. */
7252
7253 update_ssa (TODO_update_ssa);
7254 if (cond_expr_stmt_list)
7255 {
7256 cond_exp_bsi = bsi_last (condition_bb);
7257 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
7258 }
7259}
7260
805e2059
IR
7261/* Remove a group of stores (for SLP or interleaving), free their
7262 stmt_vec_info. */
7263
7264static void
7265vect_remove_stores (tree first_stmt)
7266{
7267 stmt_ann_t ann;
7268 tree next = first_stmt;
7269 tree tmp;
7270 stmt_vec_info next_stmt_info;
7271 block_stmt_iterator next_si;
7272
7273 while (next)
7274 {
7275 /* Free the attached stmt_vec_info and remove the stmt. */
7276 next_si = bsi_for_stmt (next);
7277 bsi_remove (&next_si, true);
7278 next_stmt_info = vinfo_for_stmt (next);
7279 ann = stmt_ann (next);
7280 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7281 free (next_stmt_info);
7282 set_stmt_info (ann, NULL);
7283 next = tmp;
7284 }
7285}
7286
7287
7288/* Vectorize SLP instance tree in postorder. */
7289
7290static bool
7291vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
7292{
7293 tree stmt;
7294 bool strided_store, is_store;
7295 block_stmt_iterator si;
7296 stmt_vec_info stmt_info;
7297
7298 if (!node)
7299 return false;
7300
7301 vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
7302 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
7303
7304 stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
7305 stmt_info = vinfo_for_stmt (stmt);
7306 SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
7307 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7308
7309 if (vect_print_dump_info (REPORT_DETAILS))
7310 {
7311 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7312 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7313 }
7314
7315 si = bsi_for_stmt (stmt);
7316 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7317 if (is_store)
7318 {
7319 if (DR_GROUP_FIRST_DR (stmt_info))
7320 /* If IS_STORE is TRUE, the vectorization of the
7321 interleaving chain was completed - free all the stores in
7322 the chain. */
7323 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7324 else
7325 /* FORNOW: SLP originates only from strided stores. */
7326 gcc_unreachable ();
7327
7328 return true;
7329 }
7330
7331 /* FORNOW: SLP originates only from strided stores. */
7332 return false;
7333}
7334
7335
7336static bool
7337vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
7338{
7339 VEC (slp_instance, heap) *slp_instances =
7340 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7341 slp_instance instance;
7342 unsigned int vec_stmts_size;
7343 unsigned int group_size, i;
7344 unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7345 bool is_store = false;
7346
7347 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7348 {
7349 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7350 /* For each SLP instance calculate number of vector stmts to be created
7351 for the scalar stmts in each node of the SLP tree. Number of vector
7352 elements in one vector iteration is the number of scalar elements in
7353 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7354 size. */
7355 vec_stmts_size = vectorization_factor * group_size / nunits;
7356
7357 /* Schedule the tree of INSTANCE. */
7358 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7359 vec_stmts_size);
7360
7361 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7362 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7363 fprintf (vect_dump, "vectorizing stmts using SLP.");
7364 }
7365
7366 return is_store;
7367}
7368
f7064d11
DN
7369/* Function vect_transform_loop.
7370
7371 The analysis phase has determined that the loop is vectorizable.
7372 Vectorize the loop - created vectorized stmts to replace the scalar
7373 stmts in the loop, and update the loop exit condition. */
7374
7375void
d73be268 7376vect_transform_loop (loop_vec_info loop_vinfo)
f7064d11
DN
7377{
7378 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7379 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7380 int nbbs = loop->num_nodes;
8fca6de5 7381 block_stmt_iterator si, next_si;
f7064d11
DN
7382 int i;
7383 tree ratio = NULL;
7384 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
98b44b0e 7385 bool strided_store;
805e2059
IR
7386 bool slp_scheduled = false;
7387 unsigned int nunits;
f7064d11 7388
00518cb1 7389 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11 7390 fprintf (vect_dump, "=== vec_transform_loop ===");
749cc4b1
HJ
7391
7392 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7393 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7394 vect_loop_versioning (loop_vinfo);
c12cc930 7395
6fc0bb99 7396 /* CHECKME: we wouldn't need this if we called update_ssa once
90ff949f 7397 for all loops. */
38635499 7398 bitmap_zero (vect_memsyms_to_rename);
90ff949f 7399
f7064d11
DN
7400 /* Peel the loop if there are data refs with unknown alignment.
7401 Only one data ref with unknown store is allowed. */
7402
5f55a1ba 7403 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
d73be268 7404 vect_do_peeling_for_alignment (loop_vinfo);
f7064d11
DN
7405
7406 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7407 compile time constant), or it is a constant that doesn't divide by the
7408 vectorization factor, then an epilog loop needs to be created.
7409 We therefore duplicate the loop: the original loop will be vectorized,
7410 and will compute the first (n/VF) iterations. The second copy of the loop
7411 will remain scalar and will compute the remaining (n%VF) iterations.
7412 (VF is the vectorization factor). */
7413
7414 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7415 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7416 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
d73be268 7417 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
f7064d11
DN
7418 else
7419 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7420 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7421
7422 /* 1) Make sure the loop header has exactly two entries
7423 2) Make sure we have a preheader basic block. */
7424
7425 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7426
598ec7bd 7427 split_edge (loop_preheader_edge (loop));
f7064d11
DN
7428
7429 /* FORNOW: the vectorizer supports only loops which body consist
7430 of one basic block (header + empty latch). When the vectorizer will
7431 support more involved loop forms, the order by which the BBs are
7432 traversed need to be reconsidered. */
7433
7434 for (i = 0; i < nbbs; i++)
7435 {
7436 basic_block bb = bbs[i];
cd38ca7f
DN
7437 stmt_vec_info stmt_info;
7438 tree phi;
7439
7440 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
7441 {
7442 if (vect_print_dump_info (REPORT_DETAILS))
7443 {
7444 fprintf (vect_dump, "------>vectorizing phi: ");
7445 print_generic_expr (vect_dump, phi, TDF_SLIM);
7446 }
7447 stmt_info = vinfo_for_stmt (phi);
7448 if (!stmt_info)
7449 continue;
805e2059 7450
cd38ca7f
DN
7451 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7452 && !STMT_VINFO_LIVE_P (stmt_info))
7453 continue;
7454
7455 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7456 != (unsigned HOST_WIDE_INT) vectorization_factor)
7457 && vect_print_dump_info (REPORT_DETAILS))
7458 fprintf (vect_dump, "multiple-types.");
7459
7460 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7461 {
7462 if (vect_print_dump_info (REPORT_DETAILS))
7463 fprintf (vect_dump, "transform phi.");
805e2059 7464 vect_transform_stmt (phi, NULL, NULL, NULL);
cd38ca7f
DN
7465 }
7466 }
f7064d11
DN
7467
7468 for (si = bsi_start (bb); !bsi_end_p (si);)
7469 {
7470 tree stmt = bsi_stmt (si);
f7064d11
DN
7471 bool is_store;
7472
00518cb1 7473 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
7474 {
7475 fprintf (vect_dump, "------>vectorizing statement: ");
7476 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7477 }
d29de1bf 7478
f7064d11 7479 stmt_info = vinfo_for_stmt (stmt);
d29de1bf
DN
7480
7481 /* vector stmts created in the outer-loop during vectorization of
7482 stmts in an inner-loop may not have a stmt_info, and do not
7483 need to be vectorized. */
7484 if (!stmt_info)
7485 {
7486 bsi_next (&si);
7487 continue;
7488 }
7489
61d3cdbb
DN
7490 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7491 && !STMT_VINFO_LIVE_P (stmt_info))
f7064d11
DN
7492 {
7493 bsi_next (&si);
7494 continue;
7495 }
89d67cca 7496
28e44f4f 7497 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
805e2059
IR
7498 nunits =
7499 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7500 if (!STMT_SLP_TYPE (stmt_info)
7501 && nunits != (unsigned int) vectorization_factor
7502 && vect_print_dump_info (REPORT_DETAILS))
7503 /* For SLP VF is set according to unrolling factor, and not to
7504 vector size, hence for SLP this print is not valid. */
7505 fprintf (vect_dump, "multiple-types.");
7506
7507 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7508 reached. */
7509 if (STMT_SLP_TYPE (stmt_info))
7510 {
7511 if (!slp_scheduled)
7512 {
7513 slp_scheduled = true;
7514
7515 if (vect_print_dump_info (REPORT_DETAILS))
7516 fprintf (vect_dump, "=== scheduling SLP instances ===");
61d3cdbb 7517
805e2059
IR
7518 is_store = vect_schedule_slp (loop_vinfo, nunits);
7519
7520 /* IS_STORE is true if STMT is a store. Stores cannot be of
7521 hybrid SLP type. They are removed in
7522 vect_schedule_slp_instance and their vinfo is destroyed. */
7523 if (is_store)
7524 {
7525 bsi_next (&si);
7526 continue;
7527 }
7528 }
7529
7530 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7531 if (PURE_SLP_STMT (stmt_info))
7532 {
7533 bsi_next (&si);
7534 continue;
7535 }
7536 }
7537
f7064d11 7538 /* -------- vectorize statement ------------ */
00518cb1 7539 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
7540 fprintf (vect_dump, "transform statement.");
7541
98b44b0e 7542 strided_store = false;
805e2059 7543 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
98b44b0e
IR
7544 if (is_store)
7545 {
7546 stmt_ann_t ann;
805e2059 7547 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
98b44b0e
IR
7548 {
7549 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7550 interleaving chain was completed - free all the stores in
7551 the chain. */
7552 tree next = DR_GROUP_FIRST_DR (stmt_info);
7553 tree tmp;
7554 stmt_vec_info next_stmt_info;
7555
7556 while (next)
7557 {
8fca6de5 7558 next_si = bsi_for_stmt (next);
98b44b0e
IR
7559 next_stmt_info = vinfo_for_stmt (next);
7560 /* Free the attached stmt_vec_info and remove the stmt. */
7561 ann = stmt_ann (next);
7562 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7563 free (next_stmt_info);
7564 set_stmt_info (ann, NULL);
8fca6de5 7565 bsi_remove (&next_si, true);
98b44b0e
IR
7566 next = tmp;
7567 }
7568 bsi_remove (&si, true);
7569 continue;
7570 }
7571 else
7572 {
7573 /* Free the attached stmt_vec_info and remove the stmt. */
7574 ann = stmt_ann (stmt);
7575 free (stmt_info);
7576 set_stmt_info (ann, NULL);
7577 bsi_remove (&si, true);
7578 continue;
7579 }
f7064d11 7580 }
f7064d11
DN
7581 bsi_next (&si);
7582 } /* stmts in BB */
7583 } /* BBs in loop */
7584
7585 slpeel_make_loop_iterate_ntimes (loop, ratio);
7586
38635499 7587 mark_set_for_renaming (vect_memsyms_to_rename);
90ff949f 7588
84d65814
DN
7589 /* The memory tags and pointers in vectorized statements need to
7590 have their SSA forms updated. FIXME, why can't this be delayed
7591 until all the loops have been transformed? */
7592 update_ssa (TODO_update_ssa);
7593
00518cb1 7594 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
f7064d11 7595 fprintf (vect_dump, "LOOP VECTORIZED.");
d29de1bf
DN
7596 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7597 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
f7064d11 7598}