]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-transform.c
[multiple changes]
[thirdparty/gcc.git] / gcc / tree-vect-transform.c
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "params.h"
39 #include "recog.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
46 #include "toplev.h"
47 #include "real.h"
48
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
64 static tree get_initial_def_for_reduction (tree, tree, tree *);
65
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
75
76
77 /* Function vect_estimate_min_profitable_iters
78
79 Return the number of iterations required for the vector version of the
80 loop to be profitable relative to the cost of the scalar version of the
81 loop.
82
83 TODO: Take profile info into account before making vectorization
84 decisions, if available. */
85
86 int
87 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
88 {
89 int i;
90 int min_profitable_iters;
91 int peel_iters_prologue;
92 int peel_iters_epilogue;
93 int vec_inside_cost = 0;
94 int vec_outside_cost = 0;
95 int scalar_single_iter_cost = 0;
96 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99 int nbbs = loop->num_nodes;
100
101 /* Cost model disabled. */
102 if (!flag_vect_cost_model)
103 {
104 if (vect_print_dump_info (REPORT_DETAILS))
105 fprintf (vect_dump, "cost model disabled.");
106 return 0;
107 }
108
109 /* Requires loop versioning tests to handle misalignment.
110 FIXME: Make cost depend on number of stmts in may_misalign list. */
111
112 if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
113 {
114 vec_outside_cost += TARG_COND_BRANCH_COST;
115 if (vect_print_dump_info (REPORT_DETAILS))
116 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
117 "versioning.\n");
118 }
119
120 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
121 two guards, one for the peeled loop and one for the vector loop. */
122
123 peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
124 if (peel_iters_prologue)
125 {
126 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
127 if (vect_print_dump_info (REPORT_DETAILS))
128 fprintf (vect_dump, "cost model: Adding cost of checks for "
129 "prologue.\n");
130 }
131
132 /* Requires an epilogue loop to finish up remaining iterations after vector
133 loop. Add cost of two guards, one for the peeled loop and one for the
134 vector loop. */
135
136 if ((peel_iters_prologue < 0)
137 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
138 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
139 {
140 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
141 if (vect_print_dump_info (REPORT_DETAILS))
142 fprintf (vect_dump, "cost model : Adding cost of checks for "
143 "epilogue.\n");
144 }
145
146 /* Count statements in scalar loop. Using this as scalar cost for a single
147 iteration for now.
148
149 TODO: Add outer loop support.
150
151 TODO: Consider assigning different costs to different scalar
152 statements. */
153
154 for (i = 0; i < nbbs; i++)
155 {
156 block_stmt_iterator si;
157 basic_block bb = bbs[i];
158
159 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
160 {
161 tree stmt = bsi_stmt (si);
162 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
163 if (!STMT_VINFO_RELEVANT_P (stmt_info)
164 && !STMT_VINFO_LIVE_P (stmt_info))
165 continue;
166 scalar_single_iter_cost++;
167 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
168 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
169 }
170 }
171
172 /* Add additional cost for the peeled instructions in prologue and epilogue
173 loop.
174
175 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
176 at compile-time - we assume the worst.
177
178 TODO: Build an expression that represents peel_iters for prologue and
179 epilogue to be used in a run-time test. */
180
181 peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
182
183 if (peel_iters_prologue < 0)
184 {
185 peel_iters_prologue = vf - 1;
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "cost model: "
188 "prologue peel iters set conservatively.");
189
190 /* If peeling for alignment is unknown, loop bound of main loop becomes
191 unknown. */
192 peel_iters_epilogue = vf - 1;
193 if (vect_print_dump_info (REPORT_DETAILS))
194 fprintf (vect_dump, "cost model: "
195 "epilogue peel iters set conservatively because "
196 "peeling for alignment is unknown .");
197 }
198 else
199 {
200 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
201 {
202 peel_iters_epilogue = vf - 1;
203 if (vect_print_dump_info (REPORT_DETAILS))
204 fprintf (vect_dump, "cost model: "
205 "epilogue peel iters set conservatively because "
206 "loop iterations are unknown .");
207 }
208 else
209 peel_iters_epilogue =
210 (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue)
211 % vf;
212 }
213
214 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
215 + (peel_iters_epilogue * scalar_single_iter_cost);
216
217 /* Calculate number of iterations required to make the vector version
218 profitable, relative to the loop bodies only. The following condition
219 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
220 SIC = scalar iteration cost, VIC = vector iteration cost,
221 VOC = vector outside cost and VF = vectorization factor. */
222
223 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
224 {
225 if (vec_outside_cost == 0)
226 min_profitable_iters = 1;
227 else
228 {
229 min_profitable_iters = (vec_outside_cost * vf)
230 / ((scalar_single_iter_cost * vf)
231 - vec_inside_cost);
232
233 if ((scalar_single_iter_cost * vf * min_profitable_iters)
234 <= ((vec_inside_cost * min_profitable_iters)
235 + (vec_outside_cost * vf)))
236 min_profitable_iters++;
237 }
238 }
239 /* vector version will never be profitable. */
240 else
241 {
242 if (vect_print_dump_info (REPORT_DETAILS))
243 fprintf (vect_dump, "cost model: vector iteration cost = %d "
244 "is divisible by scalar iteration cost = %d by a factor "
245 "greater than or equal to the vectorization factor = %d .",
246 vec_inside_cost, scalar_single_iter_cost, vf);
247 return -1;
248 }
249
250 if (vect_print_dump_info (REPORT_DETAILS))
251 {
252 fprintf (vect_dump, "Cost model analysis: \n");
253 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
254 vec_inside_cost);
255 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
256 vec_outside_cost);
257 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
258 fprintf (vect_dump, " prologue iterations: %d\n",
259 peel_iters_prologue);
260 fprintf (vect_dump, " epilogue iterations: %d\n",
261 peel_iters_epilogue);
262 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
263 min_profitable_iters);
264 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
265 min_profitable_iters < vf ? vf : min_profitable_iters);
266 }
267
268 return min_profitable_iters < vf ? vf : min_profitable_iters;
269 }
270
271
272 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
273 functions. Design better to avoid maintenance issues. */
274
275 /* Function vect_model_reduction_cost.
276
277 Models cost for a reduction operation, including the vector ops
278 generated within the strip-mine loop, the initial definition before
279 the loop, and the epilogue code that must be generated. */
280
281 static void
282 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
283 int ncopies)
284 {
285 int outer_cost = 0;
286 enum tree_code code;
287 optab optab;
288 tree vectype;
289 tree orig_stmt;
290 tree reduction_op;
291 enum machine_mode mode;
292 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
293 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
294
295 /* Cost of reduction op inside loop. */
296 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
297
298 reduction_op = TREE_OPERAND (operation, op_type-1);
299 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
300 mode = TYPE_MODE (vectype);
301 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
302
303 if (!orig_stmt)
304 orig_stmt = STMT_VINFO_STMT (stmt_info);
305
306 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
307
308 /* Add in cost for initial definition. */
309 outer_cost += TARG_VEC_STMT_COST;
310
311 /* Determine cost of epilogue code.
312
313 We have a reduction operator that will reduce the vector in one statement.
314 Also requires scalar extract. */
315
316 if (reduc_code < NUM_TREE_CODES)
317 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
318 else
319 {
320 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
321 tree bitsize =
322 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
323 int element_bitsize = tree_low_cst (bitsize, 1);
324 int nelements = vec_size_in_bits / element_bitsize;
325
326 optab = optab_for_tree_code (code, vectype);
327
328 /* We have a whole vector shift available. */
329 if (!VECTOR_MODE_P (mode)
330 || optab->handlers[mode].insn_code == CODE_FOR_nothing)
331 /* Final reduction via vector shifts and the reduction operator. Also
332 requires scalar extract. */
333 outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
334 else
335 /* Use extracts and reduction op for final reduction. For N elements,
336 we have N extracts and N-1 reduction ops. */
337 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
338 }
339
340 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
341
342 if (vect_print_dump_info (REPORT_DETAILS))
343 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
344 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
345 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
346 }
347
348
349 /* Function vect_model_induction_cost.
350
351 Models cost for induction operations. */
352
353 static void
354 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
355 {
356 /* loop cost for vec_loop. */
357 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
358 /* prologue cost for vec_init and vec_step. */
359 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST;
360
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
363 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
364 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
365 }
366
367
368 /* Function vect_model_simple_cost.
369
370 Models cost for simple operations, i.e. those that only emit ncopies of a
371 single op. Right now, this does not account for multiple insns that could
372 be generated for the single vector op. We will handle that shortly. */
373
374 static void
375 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies)
376 {
377 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
378
379 if (vect_print_dump_info (REPORT_DETAILS))
380 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
381 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
382 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
383 }
384
385
386 /* Function vect_cost_strided_group_size
387
388 For strided load or store, return the group_size only if it is the first
389 load or store of a group, else return 1. This ensures that group size is
390 only returned once per group. */
391
392 static int
393 vect_cost_strided_group_size (stmt_vec_info stmt_info)
394 {
395 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
396
397 if (first_stmt == STMT_VINFO_STMT (stmt_info))
398 return DR_GROUP_SIZE (stmt_info);
399
400 return 1;
401 }
402
403
404 /* Function vect_model_store_cost
405
406 Models cost for stores. In the case of strided accesses, one access
407 has the overhead of the strided access attributed to it. */
408
409 static void
410 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies)
411 {
412 int cost = 0;
413 int group_size;
414
415 /* Strided access? */
416 if (DR_GROUP_FIRST_DR (stmt_info))
417 group_size = vect_cost_strided_group_size (stmt_info);
418 /* Not a strided access. */
419 else
420 group_size = 1;
421
422 /* Is this an access in a group of stores, which provide strided access?
423 If so, add in the cost of the permutes. */
424 if (group_size > 1)
425 {
426 /* Uses a high and low interleave operation for each needed permute. */
427 cost = ncopies * exact_log2(group_size) * group_size
428 * TARG_VEC_STMT_COST;
429
430 if (vect_print_dump_info (REPORT_DETAILS))
431 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
432 group_size);
433
434 }
435
436 /* Costs of the stores. */
437 cost += ncopies * TARG_VEC_STORE_COST;
438
439 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
440
441 if (vect_print_dump_info (REPORT_DETAILS))
442 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
443 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
444 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
445 }
446
447
448 /* Function vect_model_load_cost
449
450 Models cost for loads. In the case of strided accesses, the last access
451 has the overhead of the strided access attributed to it. Since unaligned
452 accesses are supported for loads, we also account for the costs of the
453 access scheme chosen. */
454
455 static void
456 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
457
458 {
459 int inner_cost = 0;
460 int group_size;
461 int alignment_support_cheme;
462 tree first_stmt;
463 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
464
465 /* Strided accesses? */
466 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
467 if (first_stmt)
468 {
469 group_size = vect_cost_strided_group_size (stmt_info);
470 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
471 }
472 /* Not a strided access. */
473 else
474 {
475 group_size = 1;
476 first_dr = dr;
477 }
478
479 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
480
481 /* Is this an access in a group of loads providing strided access?
482 If so, add in the cost of the permutes. */
483 if (group_size > 1)
484 {
485 /* Uses an even and odd extract operations for each needed permute. */
486 inner_cost = ncopies * exact_log2(group_size) * group_size
487 * TARG_VEC_STMT_COST;
488
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
491 group_size);
492
493 }
494
495 /* The loads themselves. */
496 switch (alignment_support_cheme)
497 {
498 case dr_aligned:
499 {
500 inner_cost += ncopies * TARG_VEC_LOAD_COST;
501
502 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "vect_model_load_cost: aligned.");
504
505 break;
506 }
507 case dr_unaligned_supported:
508 {
509 /* Here, we assign an additional cost for the unaligned load. */
510 inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
511
512 if (vect_print_dump_info (REPORT_DETAILS))
513 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
514 "hardware.");
515
516 break;
517 }
518 case dr_unaligned_software_pipeline:
519 {
520 int outer_cost = 0;
521
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
524 "pipelined.");
525
526 /* Unaligned software pipeline has a load of an address, an initial
527 load, and possibly a mask operation to "prime" the loop. However,
528 if this is an access in a group of loads, which provide strided
529 access, then the above cost should only be considered for one
530 access in the group. Inside the loop, there is a load op
531 and a realignment op. */
532
533 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
534 {
535 outer_cost = 2*TARG_VEC_STMT_COST;
536 if (targetm.vectorize.builtin_mask_for_load)
537 outer_cost += TARG_VEC_STMT_COST;
538 }
539
540 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
541
542 inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
543
544 break;
545 }
546
547 default:
548 gcc_unreachable ();
549 }
550
551 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
552
553 if (vect_print_dump_info (REPORT_DETAILS))
554 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
555 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
556 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
557
558 }
559
560
561 /* Function vect_get_new_vect_var.
562
563 Returns a name for a new variable. The current naming scheme appends the
564 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
565 the name of vectorizer generated variables, and appends that to NAME if
566 provided. */
567
568 static tree
569 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
570 {
571 const char *prefix;
572 tree new_vect_var;
573
574 switch (var_kind)
575 {
576 case vect_simple_var:
577 prefix = "vect_";
578 break;
579 case vect_scalar_var:
580 prefix = "stmp_";
581 break;
582 case vect_pointer_var:
583 prefix = "vect_p";
584 break;
585 default:
586 gcc_unreachable ();
587 }
588
589 if (name)
590 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
591 else
592 new_vect_var = create_tmp_var (type, prefix);
593
594 /* Mark vector typed variable as a gimple register variable. */
595 if (TREE_CODE (type) == VECTOR_TYPE)
596 DECL_GIMPLE_REG_P (new_vect_var) = true;
597
598 return new_vect_var;
599 }
600
601
602 /* Function vect_create_addr_base_for_vector_ref.
603
604 Create an expression that computes the address of the first memory location
605 that will be accessed for a data reference.
606
607 Input:
608 STMT: The statement containing the data reference.
609 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
610 OFFSET: Optional. If supplied, it is be added to the initial address.
611
612 Output:
613 1. Return an SSA_NAME whose value is the address of the memory location of
614 the first vector of the data reference.
615 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
616 these statement(s) which define the returned SSA_NAME.
617
618 FORNOW: We are only handling array accesses with step 1. */
619
620 static tree
621 vect_create_addr_base_for_vector_ref (tree stmt,
622 tree *new_stmt_list,
623 tree offset)
624 {
625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
626 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
627 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
628 tree base_name = build_fold_indirect_ref (data_ref_base);
629 tree vec_stmt;
630 tree addr_base, addr_expr;
631 tree dest, new_stmt;
632 tree base_offset = unshare_expr (DR_OFFSET (dr));
633 tree init = unshare_expr (DR_INIT (dr));
634 tree vect_ptr_type, addr_expr2;
635
636 /* Create base_offset */
637 base_offset = size_binop (PLUS_EXPR, base_offset, init);
638 base_offset = fold_convert (sizetype, base_offset);
639 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
640 add_referenced_var (dest);
641 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
642 append_to_statement_list_force (new_stmt, new_stmt_list);
643
644 if (offset)
645 {
646 tree tmp = create_tmp_var (sizetype, "offset");
647 tree step;
648
649 /* For interleaved access step we divide STEP by the size of the
650 interleaving group. */
651 if (DR_GROUP_SIZE (stmt_info))
652 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
653 build_int_cst (TREE_TYPE (offset),
654 DR_GROUP_SIZE (stmt_info)));
655 else
656 step = DR_STEP (dr);
657
658 add_referenced_var (tmp);
659 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
660 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
661 base_offset, offset);
662 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
663 append_to_statement_list_force (new_stmt, new_stmt_list);
664 }
665
666 /* base + base_offset */
667 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
668 base_offset);
669
670 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
671
672 /* addr_expr = addr_base */
673 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
674 get_name (base_name));
675 add_referenced_var (addr_expr);
676 vec_stmt = fold_convert (vect_ptr_type, addr_base);
677 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
678 get_name (base_name));
679 add_referenced_var (addr_expr2);
680 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
681 append_to_statement_list_force (new_stmt, new_stmt_list);
682
683 if (vect_print_dump_info (REPORT_DETAILS))
684 {
685 fprintf (vect_dump, "created ");
686 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
687 }
688 return vec_stmt;
689 }
690
691
692 /* Function vect_create_data_ref_ptr.
693
694 Create a new pointer to vector type (vp), that points to the first location
695 accessed in the loop by STMT, along with the def-use update chain to
696 appropriately advance the pointer through the loop iterations. Also set
697 aliasing information for the pointer. This vector pointer is used by the
698 callers to this function to create a memory reference expression for vector
699 load/store access.
700
701 Input:
702 1. STMT: a stmt that references memory. Expected to be of the form
703 GIMPLE_MODIFY_STMT <name, data-ref> or
704 GIMPLE_MODIFY_STMT <data-ref, name>.
705 2. BSI: block_stmt_iterator where new stmts can be added.
706 3. OFFSET (optional): an offset to be added to the initial address accessed
707 by the data-ref in STMT.
708 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
709 pointing to the initial address.
710 5. TYPE: if not NULL indicates the required type of the data-ref
711
712 Output:
713 1. Declare a new ptr to vector_type, and have it point to the base of the
714 data reference (initial addressed accessed by the data reference).
715 For example, for vector of type V8HI, the following code is generated:
716
717 v8hi *vp;
718 vp = (v8hi *)initial_address;
719
720 if OFFSET is not supplied:
721 initial_address = &a[init];
722 if OFFSET is supplied:
723 initial_address = &a[init + OFFSET];
724
725 Return the initial_address in INITIAL_ADDRESS.
726
727 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
728 update the pointer in each iteration of the loop.
729
730 Return the increment stmt that updates the pointer in PTR_INCR.
731
732 3. Return the pointer. */
733
734 static tree
735 vect_create_data_ref_ptr (tree stmt,
736 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
737 tree offset, tree *initial_address, tree *ptr_incr,
738 bool only_init, tree type)
739 {
740 tree base_name;
741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
743 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
744 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
745 tree vect_ptr_type;
746 tree vect_ptr;
747 tree tag;
748 tree new_temp;
749 tree vec_stmt;
750 tree new_stmt_list = NULL_TREE;
751 edge pe = loop_preheader_edge (loop);
752 basic_block new_bb;
753 tree vect_ptr_init;
754 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
755
756 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
757
758 if (vect_print_dump_info (REPORT_DETAILS))
759 {
760 tree data_ref_base = base_name;
761 fprintf (vect_dump, "create vector-pointer variable to type: ");
762 print_generic_expr (vect_dump, vectype, TDF_SLIM);
763 if (TREE_CODE (data_ref_base) == VAR_DECL)
764 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
765 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
766 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
767 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
768 fprintf (vect_dump, " vectorizing a record based array ref: ");
769 else if (TREE_CODE (data_ref_base) == SSA_NAME)
770 fprintf (vect_dump, " vectorizing a pointer ref: ");
771 print_generic_expr (vect_dump, base_name, TDF_SLIM);
772 }
773
774 /** (1) Create the new vector-pointer variable: **/
775 if (type)
776 vect_ptr_type = build_pointer_type (type);
777 else
778 vect_ptr_type = build_pointer_type (vectype);
779 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
780 get_name (base_name));
781 add_referenced_var (vect_ptr);
782
783 /** (2) Add aliasing information to the new vector-pointer:
784 (The points-to info (DR_PTR_INFO) may be defined later.) **/
785
786 tag = DR_SYMBOL_TAG (dr);
787 gcc_assert (tag);
788
789 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
790 tag must be created with tag added to its may alias list. */
791 if (!MTAG_P (tag))
792 new_type_alias (vect_ptr, tag, DR_REF (dr));
793 else
794 set_symbol_mem_tag (vect_ptr, tag);
795
796 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
797
798 /** (3) Calculate the initial address the vector-pointer, and set
799 the vector-pointer to point to it before the loop: **/
800
801 /* Create: (&(base[init_val+offset]) in the loop preheader. */
802 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
803 offset);
804 pe = loop_preheader_edge (loop);
805 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
806 gcc_assert (!new_bb);
807 *initial_address = new_temp;
808
809 /* Create: p = (vectype *) initial_base */
810 vec_stmt = fold_convert (vect_ptr_type, new_temp);
811 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
812 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
813 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
814 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
815 gcc_assert (!new_bb);
816
817
818 /** (4) Handle the updating of the vector-pointer inside the loop: **/
819
820 if (only_init) /* No update in loop is required. */
821 {
822 /* Copy the points-to information if it exists. */
823 if (DR_PTR_INFO (dr))
824 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
825 return vect_ptr_init;
826 }
827 else
828 {
829 block_stmt_iterator incr_bsi;
830 bool insert_after;
831 tree indx_before_incr, indx_after_incr;
832 tree incr;
833
834 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
835 create_iv (vect_ptr_init,
836 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
837 NULL_TREE, loop, &incr_bsi, insert_after,
838 &indx_before_incr, &indx_after_incr);
839 incr = bsi_stmt (incr_bsi);
840 set_stmt_info (stmt_ann (incr),
841 new_stmt_vec_info (incr, loop_vinfo));
842
843 /* Copy the points-to information if it exists. */
844 if (DR_PTR_INFO (dr))
845 {
846 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
847 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
848 }
849 merge_alias_info (vect_ptr_init, indx_before_incr);
850 merge_alias_info (vect_ptr_init, indx_after_incr);
851 if (ptr_incr)
852 *ptr_incr = incr;
853
854 return indx_before_incr;
855 }
856 }
857
858
859 /* Function bump_vector_ptr
860
861 Increment a pointer (to a vector type) by vector-size. Connect the new
862 increment stmt to the existing def-use update-chain of the pointer.
863
864 The pointer def-use update-chain before this function:
865 DATAREF_PTR = phi (p_0, p_2)
866 ....
867 PTR_INCR: p_2 = DATAREF_PTR + step
868
869 The pointer def-use update-chain after this function:
870 DATAREF_PTR = phi (p_0, p_2)
871 ....
872 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
873 ....
874 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
875
876 Input:
877 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
878 in the loop.
879 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
880 The increment amount across iterations is also expected to be
881 vector_size.
882 BSI - location where the new update stmt is to be placed.
883 STMT - the original scalar memory-access stmt that is being vectorized.
884
885 Output: Return NEW_DATAREF_PTR as illustrated above.
886
887 */
888
889 static tree
890 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
891 tree stmt)
892 {
893 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
894 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 tree vptr_type = TREE_TYPE (dataref_ptr);
897 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
898 tree update = TYPE_SIZE_UNIT (vectype);
899 tree incr_stmt;
900 ssa_op_iter iter;
901 use_operand_p use_p;
902 tree new_dataref_ptr;
903
904 incr_stmt = build_gimple_modify_stmt (ptr_var,
905 build2 (POINTER_PLUS_EXPR, vptr_type,
906 dataref_ptr, update));
907 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
908 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
909 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
910
911 /* Update the vector-pointer's cross-iteration increment. */
912 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
913 {
914 tree use = USE_FROM_PTR (use_p);
915
916 if (use == dataref_ptr)
917 SET_USE (use_p, new_dataref_ptr);
918 else
919 gcc_assert (tree_int_cst_compare (use, update) == 0);
920 }
921
922 /* Copy the points-to information if it exists. */
923 if (DR_PTR_INFO (dr))
924 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
925 merge_alias_info (new_dataref_ptr, dataref_ptr);
926
927 return new_dataref_ptr;
928 }
929
930
931 /* Function vect_create_destination_var.
932
933 Create a new temporary of type VECTYPE. */
934
935 static tree
936 vect_create_destination_var (tree scalar_dest, tree vectype)
937 {
938 tree vec_dest;
939 const char *new_name;
940 tree type;
941 enum vect_var_kind kind;
942
943 kind = vectype ? vect_simple_var : vect_scalar_var;
944 type = vectype ? vectype : TREE_TYPE (scalar_dest);
945
946 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
947
948 new_name = get_name (scalar_dest);
949 if (!new_name)
950 new_name = "var_";
951 vec_dest = vect_get_new_vect_var (type, kind, new_name);
952 add_referenced_var (vec_dest);
953
954 return vec_dest;
955 }
956
957
958 /* Function vect_init_vector.
959
960 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
961 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
962 used in the vectorization of STMT. */
963
964 static tree
965 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
966 {
967 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
968 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
969 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
970 tree new_var;
971 tree init_stmt;
972 tree vec_oprnd;
973 edge pe;
974 tree new_temp;
975 basic_block new_bb;
976
977 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
978 add_referenced_var (new_var);
979
980 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
981 new_temp = make_ssa_name (new_var, init_stmt);
982 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
983
984 pe = loop_preheader_edge (loop);
985 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
986 gcc_assert (!new_bb);
987
988 if (vect_print_dump_info (REPORT_DETAILS))
989 {
990 fprintf (vect_dump, "created new init_stmt: ");
991 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
992 }
993
994 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
995 return vec_oprnd;
996 }
997
998
999 /* Function get_initial_def_for_induction
1000
1001 Input:
1002 IV_PHI - the initial value of the induction variable
1003
1004 Output:
1005 Return a vector variable, initialized with the first VF values of
1006 the induction variable. E.g., for an iv with IV_PHI='X' and
1007 evolution S, for a vector of 4 units, we want to return:
1008 [X, X + S, X + 2*S, X + 3*S]. */
1009
1010 static tree
1011 get_initial_def_for_induction (tree iv_phi)
1012 {
1013 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1014 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1015 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1016 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1017 tree vectype = get_vectype_for_scalar_type (scalar_type);
1018 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1019 edge pe = loop_preheader_edge (loop);
1020 basic_block new_bb;
1021 block_stmt_iterator bsi;
1022 tree vec, vec_init, vec_step, t;
1023 tree access_fn;
1024 tree new_var;
1025 tree new_name;
1026 tree init_stmt;
1027 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1028 tree init_expr, step_expr;
1029 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1030 int i;
1031 bool ok;
1032 int ncopies = vf / nunits;
1033 tree expr;
1034 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1035 tree stmts;
1036 tree stmt = NULL_TREE;
1037 block_stmt_iterator si;
1038 basic_block bb = bb_for_stmt (iv_phi);
1039
1040 gcc_assert (phi_info);
1041 gcc_assert (ncopies >= 1);
1042
1043 /* Find the first insertion point in the BB. */
1044 si = bsi_after_labels (bb);
1045 stmt = bsi_stmt (si);
1046
1047 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1048 gcc_assert (access_fn);
1049 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1050 &init_expr, &step_expr);
1051 gcc_assert (ok);
1052
1053 /* Create the vector that holds the initial_value of the induction. */
1054 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1055 add_referenced_var (new_var);
1056
1057 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1058 if (stmts)
1059 {
1060 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1061 gcc_assert (!new_bb);
1062 }
1063
1064 t = NULL_TREE;
1065 t = tree_cons (NULL_TREE, new_name, t);
1066 for (i = 1; i < nunits; i++)
1067 {
1068 tree tmp;
1069
1070 /* Create: new_name = new_name + step_expr */
1071 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1072 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1073 new_name = make_ssa_name (new_var, init_stmt);
1074 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1075
1076 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1077 gcc_assert (!new_bb);
1078
1079 if (vect_print_dump_info (REPORT_DETAILS))
1080 {
1081 fprintf (vect_dump, "created new init_stmt: ");
1082 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1083 }
1084 t = tree_cons (NULL_TREE, new_name, t);
1085 }
1086 vec = build_constructor_from_list (vectype, nreverse (t));
1087 vec_init = vect_init_vector (stmt, vec, vectype);
1088
1089
1090 /* Create the vector that holds the step of the induction. */
1091 expr = build_int_cst (scalar_type, vf);
1092 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1093 t = NULL_TREE;
1094 for (i = 0; i < nunits; i++)
1095 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1096 vec = build_constructor_from_list (vectype, t);
1097 vec_step = vect_init_vector (stmt, vec, vectype);
1098
1099
1100 /* Create the following def-use cycle:
1101 loop prolog:
1102 vec_init = [X, X+S, X+2*S, X+3*S]
1103 vec_step = [VF*S, VF*S, VF*S, VF*S]
1104 loop:
1105 vec_iv = PHI <vec_init, vec_loop>
1106 ...
1107 STMT
1108 ...
1109 vec_loop = vec_iv + vec_step; */
1110
1111 /* Create the induction-phi that defines the induction-operand. */
1112 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1113 add_referenced_var (vec_dest);
1114 induction_phi = create_phi_node (vec_dest, loop->header);
1115 set_stmt_info (get_stmt_ann (induction_phi),
1116 new_stmt_vec_info (induction_phi, loop_vinfo));
1117 induc_def = PHI_RESULT (induction_phi);
1118
1119 /* Create the iv update inside the loop */
1120 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1121 build2 (PLUS_EXPR, vectype,
1122 induc_def, vec_step));
1123 vec_def = make_ssa_name (vec_dest, new_stmt);
1124 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1125 bsi = bsi_for_stmt (stmt);
1126 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1127
1128 /* Set the arguments of the phi node: */
1129 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1130 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1131
1132
1133 /* In case the vectorization factor (VF) is bigger than the number
1134 of elements that we can fit in a vectype (nunits), we have to generate
1135 more than one vector stmt - i.e - we need to "unroll" the
1136 vector stmt by a factor VF/nunits. For more details see documentation
1137 in vectorizable_operation. */
1138
1139 if (ncopies > 1)
1140 {
1141 stmt_vec_info prev_stmt_vinfo;
1142
1143 /* Create the vector that holds the step of the induction. */
1144 expr = build_int_cst (scalar_type, nunits);
1145 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1146 t = NULL_TREE;
1147 for (i = 0; i < nunits; i++)
1148 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1149 vec = build_constructor_from_list (vectype, t);
1150 vec_step = vect_init_vector (stmt, vec, vectype);
1151
1152 vec_def = induc_def;
1153 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1154 for (i = 1; i < ncopies; i++)
1155 {
1156 tree tmp;
1157
1158 /* vec_i = vec_prev + vec_{step*nunits} */
1159 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1160 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1161 vec_def = make_ssa_name (vec_dest, new_stmt);
1162 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1163 bsi = bsi_for_stmt (stmt);
1164 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1165
1166 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1167 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1168 }
1169 }
1170
1171 if (vect_print_dump_info (REPORT_DETAILS))
1172 {
1173 fprintf (vect_dump, "transform induction: created def-use cycle:");
1174 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1175 fprintf (vect_dump, "\n");
1176 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1177 }
1178
1179 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1180 return induc_def;
1181 }
1182
1183
1184 /* Function vect_get_vec_def_for_operand.
1185
1186 OP is an operand in STMT. This function returns a (vector) def that will be
1187 used in the vectorized stmt for STMT.
1188
1189 In the case that OP is an SSA_NAME which is defined in the loop, then
1190 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1191
1192 In case OP is an invariant or constant, a new stmt that creates a vector def
1193 needs to be introduced. */
1194
1195 static tree
1196 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1197 {
1198 tree vec_oprnd;
1199 tree vec_stmt;
1200 tree def_stmt;
1201 stmt_vec_info def_stmt_info = NULL;
1202 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1203 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1204 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1207 tree vec_inv;
1208 tree vec_cst;
1209 tree t = NULL_TREE;
1210 tree def;
1211 int i;
1212 enum vect_def_type dt;
1213 bool is_simple_use;
1214 tree vector_type;
1215
1216 if (vect_print_dump_info (REPORT_DETAILS))
1217 {
1218 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1219 print_generic_expr (vect_dump, op, TDF_SLIM);
1220 }
1221
1222 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1223 gcc_assert (is_simple_use);
1224 if (vect_print_dump_info (REPORT_DETAILS))
1225 {
1226 if (def)
1227 {
1228 fprintf (vect_dump, "def = ");
1229 print_generic_expr (vect_dump, def, TDF_SLIM);
1230 }
1231 if (def_stmt)
1232 {
1233 fprintf (vect_dump, " def_stmt = ");
1234 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1235 }
1236 }
1237
1238 switch (dt)
1239 {
1240 /* Case 1: operand is a constant. */
1241 case vect_constant_def:
1242 {
1243 if (scalar_def)
1244 *scalar_def = op;
1245
1246 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1247 if (vect_print_dump_info (REPORT_DETAILS))
1248 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1249
1250 for (i = nunits - 1; i >= 0; --i)
1251 {
1252 t = tree_cons (NULL_TREE, op, t);
1253 }
1254 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1255 vec_cst = build_vector (vector_type, t);
1256
1257 return vect_init_vector (stmt, vec_cst, vector_type);
1258 }
1259
1260 /* Case 2: operand is defined outside the loop - loop invariant. */
1261 case vect_invariant_def:
1262 {
1263 if (scalar_def)
1264 *scalar_def = def;
1265
1266 /* Create 'vec_inv = {inv,inv,..,inv}' */
1267 if (vect_print_dump_info (REPORT_DETAILS))
1268 fprintf (vect_dump, "Create vector_inv.");
1269
1270 for (i = nunits - 1; i >= 0; --i)
1271 {
1272 t = tree_cons (NULL_TREE, def, t);
1273 }
1274
1275 /* FIXME: use build_constructor directly. */
1276 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1277 vec_inv = build_constructor_from_list (vector_type, t);
1278 return vect_init_vector (stmt, vec_inv, vector_type);
1279 }
1280
1281 /* Case 3: operand is defined inside the loop. */
1282 case vect_loop_def:
1283 {
1284 if (scalar_def)
1285 *scalar_def = def_stmt;
1286
1287 /* Get the def from the vectorized stmt. */
1288 def_stmt_info = vinfo_for_stmt (def_stmt);
1289 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1290 gcc_assert (vec_stmt);
1291 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1292 return vec_oprnd;
1293 }
1294
1295 /* Case 4: operand is defined by a loop header phi - reduction */
1296 case vect_reduction_def:
1297 {
1298 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1299
1300 /* Get the def before the loop */
1301 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1302 return get_initial_def_for_reduction (stmt, op, scalar_def);
1303 }
1304
1305 /* Case 5: operand is defined by loop-header phi - induction. */
1306 case vect_induction_def:
1307 {
1308 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1309
1310 /* Get the def before the loop */
1311 return get_initial_def_for_induction (def_stmt);
1312 }
1313
1314 default:
1315 gcc_unreachable ();
1316 }
1317 }
1318
1319
1320 /* Function vect_get_vec_def_for_stmt_copy
1321
1322 Return a vector-def for an operand. This function is used when the
1323 vectorized stmt to be created (by the caller to this function) is a "copy"
1324 created in case the vectorized result cannot fit in one vector, and several
1325 copies of the vector-stmt are required. In this case the vector-def is
1326 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1327 of the stmt that defines VEC_OPRND.
1328 DT is the type of the vector def VEC_OPRND.
1329
1330 Context:
1331 In case the vectorization factor (VF) is bigger than the number
1332 of elements that can fit in a vectype (nunits), we have to generate
1333 more than one vector stmt to vectorize the scalar stmt. This situation
1334 arises when there are multiple data-types operated upon in the loop; the
1335 smallest data-type determines the VF, and as a result, when vectorizing
1336 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1337 vector stmt (each computing a vector of 'nunits' results, and together
1338 computing 'VF' results in each iteration). This function is called when
1339 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1340 which VF=16 and nunits=4, so the number of copies required is 4):
1341
1342 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1343
1344 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1345 VS1.1: vx.1 = memref1 VS1.2
1346 VS1.2: vx.2 = memref2 VS1.3
1347 VS1.3: vx.3 = memref3
1348
1349 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1350 VSnew.1: vz1 = vx.1 + ... VSnew.2
1351 VSnew.2: vz2 = vx.2 + ... VSnew.3
1352 VSnew.3: vz3 = vx.3 + ...
1353
1354 The vectorization of S1 is explained in vectorizable_load.
1355 The vectorization of S2:
1356 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1357 the function 'vect_get_vec_def_for_operand' is called to
1358 get the relevant vector-def for each operand of S2. For operand x it
1359 returns the vector-def 'vx.0'.
1360
1361 To create the remaining copies of the vector-stmt (VSnew.j), this
1362 function is called to get the relevant vector-def for each operand. It is
1363 obtained from the respective VS1.j stmt, which is recorded in the
1364 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1365
1366 For example, to obtain the vector-def 'vx.1' in order to create the
1367 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1368 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1369 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1370 and return its def ('vx.1').
1371 Overall, to create the above sequence this function will be called 3 times:
1372 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1373 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1374 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1375
1376 static tree
1377 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1378 {
1379 tree vec_stmt_for_operand;
1380 stmt_vec_info def_stmt_info;
1381
1382 /* Do nothing; can reuse same def. */
1383 if (dt == vect_invariant_def || dt == vect_constant_def )
1384 return vec_oprnd;
1385
1386 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1387 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1388 gcc_assert (def_stmt_info);
1389 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1390 gcc_assert (vec_stmt_for_operand);
1391 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1392
1393 return vec_oprnd;
1394 }
1395
1396
1397 /* Function vect_finish_stmt_generation.
1398
1399 Insert a new stmt. */
1400
1401 static void
1402 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1403 block_stmt_iterator *bsi)
1404 {
1405 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1406 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1407
1408 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1409 set_stmt_info (get_stmt_ann (vec_stmt),
1410 new_stmt_vec_info (vec_stmt, loop_vinfo));
1411
1412 if (vect_print_dump_info (REPORT_DETAILS))
1413 {
1414 fprintf (vect_dump, "add new stmt: ");
1415 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1416 }
1417
1418 /* Make sure bsi points to the stmt that is being vectorized. */
1419 gcc_assert (stmt == bsi_stmt (*bsi));
1420
1421 #ifdef USE_MAPPED_LOCATION
1422 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1423 #else
1424 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1425 #endif
1426 }
1427
1428
1429 /* Function get_initial_def_for_reduction
1430
1431 Input:
1432 STMT - a stmt that performs a reduction operation in the loop.
1433 INIT_VAL - the initial value of the reduction variable
1434
1435 Output:
1436 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1437 of the reduction (used for adjusting the epilog - see below).
1438 Return a vector variable, initialized according to the operation that STMT
1439 performs. This vector will be used as the initial value of the
1440 vector of partial results.
1441
1442 Option1 (adjust in epilog): Initialize the vector as follows:
1443 add: [0,0,...,0,0]
1444 mult: [1,1,...,1,1]
1445 min/max: [init_val,init_val,..,init_val,init_val]
1446 bit and/or: [init_val,init_val,..,init_val,init_val]
1447 and when necessary (e.g. add/mult case) let the caller know
1448 that it needs to adjust the result by init_val.
1449
1450 Option2: Initialize the vector as follows:
1451 add: [0,0,...,0,init_val]
1452 mult: [1,1,...,1,init_val]
1453 min/max: [init_val,init_val,...,init_val]
1454 bit and/or: [init_val,init_val,...,init_val]
1455 and no adjustments are needed.
1456
1457 For example, for the following code:
1458
1459 s = init_val;
1460 for (i=0;i<n;i++)
1461 s = s + a[i];
1462
1463 STMT is 's = s + a[i]', and the reduction variable is 's'.
1464 For a vector of 4 units, we want to return either [0,0,0,init_val],
1465 or [0,0,0,0] and let the caller know that it needs to adjust
1466 the result at the end by 'init_val'.
1467
1468 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1469 initialization vector is simpler (same element in all entries).
1470 A cost model should help decide between these two schemes. */
1471
1472 static tree
1473 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1474 {
1475 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1476 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1477 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1478 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1479 tree type = TREE_TYPE (init_val);
1480 tree vecdef;
1481 tree def_for_init;
1482 tree init_def;
1483 tree t = NULL_TREE;
1484 int i;
1485 tree vector_type;
1486
1487 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1488 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1489
1490 switch (code)
1491 {
1492 case WIDEN_SUM_EXPR:
1493 case DOT_PROD_EXPR:
1494 case PLUS_EXPR:
1495 *adjustment_def = init_val;
1496 /* Create a vector of zeros for init_def. */
1497 if (INTEGRAL_TYPE_P (type))
1498 def_for_init = build_int_cst (type, 0);
1499 else
1500 def_for_init = build_real (type, dconst0);
1501 for (i = nunits - 1; i >= 0; --i)
1502 t = tree_cons (NULL_TREE, def_for_init, t);
1503 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1504 init_def = build_vector (vector_type, t);
1505 break;
1506
1507 case MIN_EXPR:
1508 case MAX_EXPR:
1509 *adjustment_def = NULL_TREE;
1510 init_def = vecdef;
1511 break;
1512
1513 default:
1514 gcc_unreachable ();
1515 }
1516
1517 return init_def;
1518 }
1519
1520
1521 /* Function vect_create_epilog_for_reduction
1522
1523 Create code at the loop-epilog to finalize the result of a reduction
1524 computation.
1525
1526 VECT_DEF is a vector of partial results.
1527 REDUC_CODE is the tree-code for the epilog reduction.
1528 STMT is the scalar reduction stmt that is being vectorized.
1529 REDUCTION_PHI is the phi-node that carries the reduction computation.
1530
1531 This function:
1532 1. Creates the reduction def-use cycle: sets the arguments for
1533 REDUCTION_PHI:
1534 The loop-entry argument is the vectorized initial-value of the reduction.
1535 The loop-latch argument is VECT_DEF - the vector of partial sums.
1536 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1537 by applying the operation specified by REDUC_CODE if available, or by
1538 other means (whole-vector shifts or a scalar loop).
1539 The function also creates a new phi node at the loop exit to preserve
1540 loop-closed form, as illustrated below.
1541
1542 The flow at the entry to this function:
1543
1544 loop:
1545 vec_def = phi <null, null> # REDUCTION_PHI
1546 VECT_DEF = vector_stmt # vectorized form of STMT
1547 s_loop = scalar_stmt # (scalar) STMT
1548 loop_exit:
1549 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1550 use <s_out0>
1551 use <s_out0>
1552
1553 The above is transformed by this function into:
1554
1555 loop:
1556 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1557 VECT_DEF = vector_stmt # vectorized form of STMT
1558 s_loop = scalar_stmt # (scalar) STMT
1559 loop_exit:
1560 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1561 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1562 v_out2 = reduce <v_out1>
1563 s_out3 = extract_field <v_out2, 0>
1564 s_out4 = adjust_result <s_out3>
1565 use <s_out4>
1566 use <s_out4>
1567 */
1568
1569 static void
1570 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1571 enum tree_code reduc_code, tree reduction_phi)
1572 {
1573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1574 tree vectype;
1575 enum machine_mode mode;
1576 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1577 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1578 basic_block exit_bb;
1579 tree scalar_dest;
1580 tree scalar_type;
1581 tree new_phi;
1582 block_stmt_iterator exit_bsi;
1583 tree vec_dest;
1584 tree new_temp;
1585 tree new_name;
1586 tree epilog_stmt;
1587 tree new_scalar_dest, exit_phi;
1588 tree bitsize, bitpos, bytesize;
1589 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1590 tree scalar_initial_def;
1591 tree vec_initial_def;
1592 tree orig_name;
1593 imm_use_iterator imm_iter;
1594 use_operand_p use_p;
1595 bool extract_scalar_result;
1596 tree reduction_op;
1597 tree orig_stmt;
1598 tree use_stmt;
1599 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1600 int op_type;
1601
1602 op_type = TREE_OPERAND_LENGTH (operation);
1603 reduction_op = TREE_OPERAND (operation, op_type-1);
1604 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1605 mode = TYPE_MODE (vectype);
1606
1607 /*** 1. Create the reduction def-use cycle ***/
1608
1609 /* 1.1 set the loop-entry arg of the reduction-phi: */
1610 /* For the case of reduction, vect_get_vec_def_for_operand returns
1611 the scalar def before the loop, that defines the initial value
1612 of the reduction variable. */
1613 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1614 &scalar_initial_def);
1615 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1616
1617 /* 1.2 set the loop-latch arg for the reduction-phi: */
1618 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1619
1620 if (vect_print_dump_info (REPORT_DETAILS))
1621 {
1622 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1623 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1624 fprintf (vect_dump, "\n");
1625 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1626 }
1627
1628
1629 /*** 2. Create epilog code
1630 The reduction epilog code operates across the elements of the vector
1631 of partial results computed by the vectorized loop.
1632 The reduction epilog code consists of:
1633 step 1: compute the scalar result in a vector (v_out2)
1634 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1635 step 3: adjust the scalar result (s_out3) if needed.
1636
1637 Step 1 can be accomplished using one the following three schemes:
1638 (scheme 1) using reduc_code, if available.
1639 (scheme 2) using whole-vector shifts, if available.
1640 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1641 combined.
1642
1643 The overall epilog code looks like this:
1644
1645 s_out0 = phi <s_loop> # original EXIT_PHI
1646 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1647 v_out2 = reduce <v_out1> # step 1
1648 s_out3 = extract_field <v_out2, 0> # step 2
1649 s_out4 = adjust_result <s_out3> # step 3
1650
1651 (step 3 is optional, and step2 1 and 2 may be combined).
1652 Lastly, the uses of s_out0 are replaced by s_out4.
1653
1654 ***/
1655
1656 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1657 v_out1 = phi <v_loop> */
1658
1659 exit_bb = single_exit (loop)->dest;
1660 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1661 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1662 exit_bsi = bsi_after_labels (exit_bb);
1663
1664 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1665 (i.e. when reduc_code is not available) and in the final adjustment
1666 code (if needed). Also get the original scalar reduction variable as
1667 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1668 represents a reduction pattern), the tree-code and scalar-def are
1669 taken from the original stmt that the pattern-stmt (STMT) replaces.
1670 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1671 are taken from STMT. */
1672
1673 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1674 if (!orig_stmt)
1675 {
1676 /* Regular reduction */
1677 orig_stmt = stmt;
1678 }
1679 else
1680 {
1681 /* Reduction pattern */
1682 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1683 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1684 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1685 }
1686 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1687 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1688 scalar_type = TREE_TYPE (scalar_dest);
1689 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1690 bitsize = TYPE_SIZE (scalar_type);
1691 bytesize = TYPE_SIZE_UNIT (scalar_type);
1692
1693 /* 2.3 Create the reduction code, using one of the three schemes described
1694 above. */
1695
1696 if (reduc_code < NUM_TREE_CODES)
1697 {
1698 tree tmp;
1699
1700 /*** Case 1: Create:
1701 v_out2 = reduc_expr <v_out1> */
1702
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "Reduce using direct vector reduction.");
1705
1706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1707 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1708 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1709 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1710 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1711 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1712
1713 extract_scalar_result = true;
1714 }
1715 else
1716 {
1717 enum tree_code shift_code = 0;
1718 bool have_whole_vector_shift = true;
1719 int bit_offset;
1720 int element_bitsize = tree_low_cst (bitsize, 1);
1721 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1722 tree vec_temp;
1723
1724 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1725 shift_code = VEC_RSHIFT_EXPR;
1726 else
1727 have_whole_vector_shift = false;
1728
1729 /* Regardless of whether we have a whole vector shift, if we're
1730 emulating the operation via tree-vect-generic, we don't want
1731 to use it. Only the first round of the reduction is likely
1732 to still be profitable via emulation. */
1733 /* ??? It might be better to emit a reduction tree code here, so that
1734 tree-vect-generic can expand the first round via bit tricks. */
1735 if (!VECTOR_MODE_P (mode))
1736 have_whole_vector_shift = false;
1737 else
1738 {
1739 optab optab = optab_for_tree_code (code, vectype);
1740 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1741 have_whole_vector_shift = false;
1742 }
1743
1744 if (have_whole_vector_shift)
1745 {
1746 /*** Case 2: Create:
1747 for (offset = VS/2; offset >= element_size; offset/=2)
1748 {
1749 Create: va' = vec_shift <va, offset>
1750 Create: va = vop <va, va'>
1751 } */
1752
1753 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "Reduce using vector shifts");
1755
1756 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1757 new_temp = PHI_RESULT (new_phi);
1758
1759 for (bit_offset = vec_size_in_bits/2;
1760 bit_offset >= element_bitsize;
1761 bit_offset /= 2)
1762 {
1763 tree bitpos = size_int (bit_offset);
1764 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1765 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1766 new_name = make_ssa_name (vec_dest, epilog_stmt);
1767 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1768 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1769
1770 tmp = build2 (code, vectype, new_name, new_temp);
1771 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1772 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1773 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1774 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1775 }
1776
1777 extract_scalar_result = true;
1778 }
1779 else
1780 {
1781 tree rhs;
1782
1783 /*** Case 3: Create:
1784 s = extract_field <v_out2, 0>
1785 for (offset = element_size;
1786 offset < vector_size;
1787 offset += element_size;)
1788 {
1789 Create: s' = extract_field <v_out2, offset>
1790 Create: s = op <s, s'>
1791 } */
1792
1793 if (vect_print_dump_info (REPORT_DETAILS))
1794 fprintf (vect_dump, "Reduce using scalar code. ");
1795
1796 vec_temp = PHI_RESULT (new_phi);
1797 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1798 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1799 bitsize_zero_node);
1800 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1801 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1802 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1803 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1804 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1805
1806 for (bit_offset = element_bitsize;
1807 bit_offset < vec_size_in_bits;
1808 bit_offset += element_bitsize)
1809 {
1810 tree tmp;
1811 tree bitpos = bitsize_int (bit_offset);
1812 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1813 bitpos);
1814
1815 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1816 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1817 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1818 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1819 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1820
1821 tmp = build2 (code, scalar_type, new_name, new_temp);
1822 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1823 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1824 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1825 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1826 }
1827
1828 extract_scalar_result = false;
1829 }
1830 }
1831
1832 /* 2.4 Extract the final scalar result. Create:
1833 s_out3 = extract_field <v_out2, bitpos> */
1834
1835 if (extract_scalar_result)
1836 {
1837 tree rhs;
1838
1839 if (vect_print_dump_info (REPORT_DETAILS))
1840 fprintf (vect_dump, "extract scalar result");
1841
1842 if (BYTES_BIG_ENDIAN)
1843 bitpos = size_binop (MULT_EXPR,
1844 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1845 TYPE_SIZE (scalar_type));
1846 else
1847 bitpos = bitsize_zero_node;
1848
1849 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1850 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1851 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1852 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1853 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1854 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1855 }
1856
1857 /* 2.4 Adjust the final result by the initial value of the reduction
1858 variable. (When such adjustment is not needed, then
1859 'scalar_initial_def' is zero).
1860
1861 Create:
1862 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1863
1864 if (scalar_initial_def)
1865 {
1866 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1867 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1868 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1869 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1870 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1871 }
1872
1873 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1874
1875 /* Find the loop-closed-use at the loop exit of the original scalar result.
1876 (The reduction result is expected to have two immediate uses - one at the
1877 latch block, and one at the loop exit). */
1878 exit_phi = NULL;
1879 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1880 {
1881 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1882 {
1883 exit_phi = USE_STMT (use_p);
1884 break;
1885 }
1886 }
1887 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1888 gcc_assert (exit_phi);
1889 /* Replace the uses: */
1890 orig_name = PHI_RESULT (exit_phi);
1891 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1892 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1893 SET_USE (use_p, new_temp);
1894 }
1895
1896
1897 /* Function vectorizable_reduction.
1898
1899 Check if STMT performs a reduction operation that can be vectorized.
1900 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1901 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1902 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1903
1904 This function also handles reduction idioms (patterns) that have been
1905 recognized in advance during vect_pattern_recog. In this case, STMT may be
1906 of this form:
1907 X = pattern_expr (arg0, arg1, ..., X)
1908 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1909 sequence that had been detected and replaced by the pattern-stmt (STMT).
1910
1911 In some cases of reduction patterns, the type of the reduction variable X is
1912 different than the type of the other arguments of STMT.
1913 In such cases, the vectype that is used when transforming STMT into a vector
1914 stmt is different than the vectype that is used to determine the
1915 vectorization factor, because it consists of a different number of elements
1916 than the actual number of elements that are being operated upon in parallel.
1917
1918 For example, consider an accumulation of shorts into an int accumulator.
1919 On some targets it's possible to vectorize this pattern operating on 8
1920 shorts at a time (hence, the vectype for purposes of determining the
1921 vectorization factor should be V8HI); on the other hand, the vectype that
1922 is used to create the vector form is actually V4SI (the type of the result).
1923
1924 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1925 indicates what is the actual level of parallelism (V8HI in the example), so
1926 that the right vectorization factor would be derived. This vectype
1927 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1928 be used to create the vectorized stmt. The right vectype for the vectorized
1929 stmt is obtained from the type of the result X:
1930 get_vectype_for_scalar_type (TREE_TYPE (X))
1931
1932 This means that, contrary to "regular" reductions (or "regular" stmts in
1933 general), the following equation:
1934 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1935 does *NOT* necessarily hold for reduction patterns. */
1936
1937 bool
1938 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1939 {
1940 tree vec_dest;
1941 tree scalar_dest;
1942 tree op;
1943 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1947 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1948 tree operation;
1949 enum tree_code code, orig_code, epilog_reduc_code = 0;
1950 enum machine_mode vec_mode;
1951 int op_type;
1952 optab optab, reduc_optab;
1953 tree new_temp = NULL_TREE;
1954 tree def, def_stmt;
1955 enum vect_def_type dt;
1956 tree new_phi;
1957 tree scalar_type;
1958 bool is_simple_use;
1959 tree orig_stmt;
1960 stmt_vec_info orig_stmt_info;
1961 tree expr = NULL_TREE;
1962 int i;
1963 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1965 stmt_vec_info prev_stmt_info;
1966 tree reduc_def;
1967 tree new_stmt = NULL_TREE;
1968 int j;
1969
1970 gcc_assert (ncopies >= 1);
1971
1972 /* 1. Is vectorizable reduction? */
1973
1974 /* Not supportable if the reduction variable is used in the loop. */
1975 if (STMT_VINFO_RELEVANT_P (stmt_info))
1976 return false;
1977
1978 if (!STMT_VINFO_LIVE_P (stmt_info))
1979 return false;
1980
1981 /* Make sure it was already recognized as a reduction computation. */
1982 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1983 return false;
1984
1985 /* 2. Has this been recognized as a reduction pattern?
1986
1987 Check if STMT represents a pattern that has been recognized
1988 in earlier analysis stages. For stmts that represent a pattern,
1989 the STMT_VINFO_RELATED_STMT field records the last stmt in
1990 the original sequence that constitutes the pattern. */
1991
1992 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1993 if (orig_stmt)
1994 {
1995 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1996 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1997 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1998 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1999 }
2000
2001 /* 3. Check the operands of the operation. The first operands are defined
2002 inside the loop body. The last operand is the reduction variable,
2003 which is defined by the loop-header-phi. */
2004
2005 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2006
2007 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2008 code = TREE_CODE (operation);
2009 op_type = TREE_OPERAND_LENGTH (operation);
2010 if (op_type != binary_op && op_type != ternary_op)
2011 return false;
2012 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2013 scalar_type = TREE_TYPE (scalar_dest);
2014
2015 /* All uses but the last are expected to be defined in the loop.
2016 The last use is the reduction variable. */
2017 for (i = 0; i < op_type-1; i++)
2018 {
2019 op = TREE_OPERAND (operation, i);
2020 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2021 gcc_assert (is_simple_use);
2022 if (dt != vect_loop_def
2023 && dt != vect_invariant_def
2024 && dt != vect_constant_def
2025 && dt != vect_induction_def)
2026 return false;
2027 }
2028
2029 op = TREE_OPERAND (operation, i);
2030 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2031 gcc_assert (is_simple_use);
2032 gcc_assert (dt == vect_reduction_def);
2033 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2034 if (orig_stmt)
2035 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2036 else
2037 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2038
2039 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2040 return false;
2041
2042 /* 4. Supportable by target? */
2043
2044 /* 4.1. check support for the operation in the loop */
2045 optab = optab_for_tree_code (code, vectype);
2046 if (!optab)
2047 {
2048 if (vect_print_dump_info (REPORT_DETAILS))
2049 fprintf (vect_dump, "no optab.");
2050 return false;
2051 }
2052 vec_mode = TYPE_MODE (vectype);
2053 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2054 {
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "op not supported by target.");
2057 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2058 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2059 < vect_min_worthwhile_factor (code))
2060 return false;
2061 if (vect_print_dump_info (REPORT_DETAILS))
2062 fprintf (vect_dump, "proceeding using word mode.");
2063 }
2064
2065 /* Worthwhile without SIMD support? */
2066 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2067 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2068 < vect_min_worthwhile_factor (code))
2069 {
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 fprintf (vect_dump, "not worthwhile without SIMD support.");
2072 return false;
2073 }
2074
2075 /* 4.2. Check support for the epilog operation.
2076
2077 If STMT represents a reduction pattern, then the type of the
2078 reduction variable may be different than the type of the rest
2079 of the arguments. For example, consider the case of accumulation
2080 of shorts into an int accumulator; The original code:
2081 S1: int_a = (int) short_a;
2082 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2083
2084 was replaced with:
2085 STMT: int_acc = widen_sum <short_a, int_acc>
2086
2087 This means that:
2088 1. The tree-code that is used to create the vector operation in the
2089 epilog code (that reduces the partial results) is not the
2090 tree-code of STMT, but is rather the tree-code of the original
2091 stmt from the pattern that STMT is replacing. I.e, in the example
2092 above we want to use 'widen_sum' in the loop, but 'plus' in the
2093 epilog.
2094 2. The type (mode) we use to check available target support
2095 for the vector operation to be created in the *epilog*, is
2096 determined by the type of the reduction variable (in the example
2097 above we'd check this: plus_optab[vect_int_mode]).
2098 However the type (mode) we use to check available target support
2099 for the vector operation to be created *inside the loop*, is
2100 determined by the type of the other arguments to STMT (in the
2101 example we'd check this: widen_sum_optab[vect_short_mode]).
2102
2103 This is contrary to "regular" reductions, in which the types of all
2104 the arguments are the same as the type of the reduction variable.
2105 For "regular" reductions we can therefore use the same vector type
2106 (and also the same tree-code) when generating the epilog code and
2107 when generating the code inside the loop. */
2108
2109 if (orig_stmt)
2110 {
2111 /* This is a reduction pattern: get the vectype from the type of the
2112 reduction variable, and get the tree-code from orig_stmt. */
2113 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2114 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2115 vec_mode = TYPE_MODE (vectype);
2116 }
2117 else
2118 {
2119 /* Regular reduction: use the same vectype and tree-code as used for
2120 the vector code inside the loop can be used for the epilog code. */
2121 orig_code = code;
2122 }
2123
2124 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2125 return false;
2126 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2127 if (!reduc_optab)
2128 {
2129 if (vect_print_dump_info (REPORT_DETAILS))
2130 fprintf (vect_dump, "no optab for reduction.");
2131 epilog_reduc_code = NUM_TREE_CODES;
2132 }
2133 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2134 {
2135 if (vect_print_dump_info (REPORT_DETAILS))
2136 fprintf (vect_dump, "reduc op not supported by target.");
2137 epilog_reduc_code = NUM_TREE_CODES;
2138 }
2139
2140 if (!vec_stmt) /* transformation not required. */
2141 {
2142 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2143 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2144 return true;
2145 }
2146
2147 /** Transform. **/
2148
2149 if (vect_print_dump_info (REPORT_DETAILS))
2150 fprintf (vect_dump, "transform reduction.");
2151
2152 /* Create the destination vector */
2153 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2154
2155 /* Create the reduction-phi that defines the reduction-operand. */
2156 new_phi = create_phi_node (vec_dest, loop->header);
2157
2158 /* In case the vectorization factor (VF) is bigger than the number
2159 of elements that we can fit in a vectype (nunits), we have to generate
2160 more than one vector stmt - i.e - we need to "unroll" the
2161 vector stmt by a factor VF/nunits. For more details see documentation
2162 in vectorizable_operation. */
2163
2164 prev_stmt_info = NULL;
2165 for (j = 0; j < ncopies; j++)
2166 {
2167 /* Handle uses. */
2168 if (j == 0)
2169 {
2170 op = TREE_OPERAND (operation, 0);
2171 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2172 if (op_type == ternary_op)
2173 {
2174 op = TREE_OPERAND (operation, 1);
2175 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2176 }
2177
2178 /* Get the vector def for the reduction variable from the phi node */
2179 reduc_def = PHI_RESULT (new_phi);
2180 }
2181 else
2182 {
2183 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2184 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2185 if (op_type == ternary_op)
2186 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2187
2188 /* Get the vector def for the reduction variable from the vectorized
2189 reduction operation generated in the previous iteration (j-1) */
2190 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2191 }
2192
2193 /* Arguments are ready. create the new vector stmt. */
2194 if (op_type == binary_op)
2195 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2196 else
2197 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2198 reduc_def);
2199 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2200 new_temp = make_ssa_name (vec_dest, new_stmt);
2201 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2202 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2203
2204 if (j == 0)
2205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2206 else
2207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2208 prev_stmt_info = vinfo_for_stmt (new_stmt);
2209 }
2210
2211 /* Finalize the reduction-phi (set it's arguments) and create the
2212 epilog reduction code. */
2213 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2214 return true;
2215 }
2216
2217 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2218 a function declaration if the target has a vectorized version
2219 of the function, or NULL_TREE if the function cannot be vectorized. */
2220
2221 tree
2222 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2223 {
2224 tree fndecl = get_callee_fndecl (call);
2225 enum built_in_function code;
2226
2227 /* We only handle functions that do not read or clobber memory -- i.e.
2228 const or novops ones. */
2229 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2230 return NULL_TREE;
2231
2232 if (!fndecl
2233 || TREE_CODE (fndecl) != FUNCTION_DECL
2234 || !DECL_BUILT_IN (fndecl))
2235 return NULL_TREE;
2236
2237 code = DECL_FUNCTION_CODE (fndecl);
2238 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2239 vectype_in);
2240 }
2241
2242 /* Function vectorizable_call.
2243
2244 Check if STMT performs a function call that can be vectorized.
2245 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2246 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2247 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2248
2249 bool
2250 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2251 {
2252 tree vec_dest;
2253 tree scalar_dest;
2254 tree operation;
2255 tree op, type;
2256 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2257 tree vectype_out, vectype_in;
2258 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2259 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2260 enum vect_def_type dt[2];
2261 int ncopies, j, nargs;
2262 call_expr_arg_iterator iter;
2263
2264 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2265 return false;
2266
2267 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2268 return false;
2269
2270 /* FORNOW: not yet supported. */
2271 if (STMT_VINFO_LIVE_P (stmt_info))
2272 {
2273 if (vect_print_dump_info (REPORT_DETAILS))
2274 fprintf (vect_dump, "value used after loop.");
2275 return false;
2276 }
2277
2278 /* Is STMT a vectorizable call? */
2279 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2280 return false;
2281
2282 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2283 return false;
2284
2285 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2286 if (TREE_CODE (operation) != CALL_EXPR)
2287 return false;
2288
2289 /* Process function arguments. */
2290 rhs_type = NULL_TREE;
2291 nargs = 0;
2292 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2293 {
2294 ++nargs;
2295
2296 /* Bail out if the function has more than two arguments, we
2297 do not have interesting builtin functions to vectorize with
2298 more than two arguments. */
2299 if (nargs > 2)
2300 return false;
2301
2302 /* We can only handle calls with arguments of the same type. */
2303 if (rhs_type
2304 && rhs_type != TREE_TYPE (op))
2305 {
2306 if (vect_print_dump_info (REPORT_DETAILS))
2307 fprintf (vect_dump, "argument types differ.");
2308 return false;
2309 }
2310 rhs_type = TREE_TYPE (op);
2311
2312 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs-1]))
2313 {
2314 if (vect_print_dump_info (REPORT_DETAILS))
2315 fprintf (vect_dump, "use not simple.");
2316 return false;
2317 }
2318 }
2319
2320 /* No arguments is also not good. */
2321 if (nargs == 0)
2322 return false;
2323
2324 vectype_in = get_vectype_for_scalar_type (rhs_type);
2325
2326 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2327 vectype_out = get_vectype_for_scalar_type (lhs_type);
2328
2329 /* Only handle the case of vectors with the same number of elements.
2330 FIXME: We need a way to handle for example the SSE2 cvtpd2dq
2331 instruction which converts V2DFmode to V4SImode but only
2332 using the lower half of the V4SImode result. */
2333 if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
2334 return false;
2335
2336 /* For now, we only vectorize functions if a target specific builtin
2337 is available. TODO -- in some cases, it might be profitable to
2338 insert the calls for pieces of the vector, in order to be able
2339 to vectorize other operations in the loop. */
2340 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2341 if (fndecl == NULL_TREE)
2342 {
2343 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump, "function is not vectorizable.");
2345
2346 return false;
2347 }
2348
2349 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2350
2351 ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2352 / TYPE_VECTOR_SUBPARTS (vectype_out));
2353
2354 if (!vec_stmt) /* transformation not required. */
2355 {
2356 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2357 if (vect_print_dump_info (REPORT_DETAILS))
2358 fprintf (vect_dump, "=== vectorizable_call ===");
2359 vect_model_simple_cost (stmt_info, ncopies);
2360 return true;
2361 }
2362
2363 /** Transform. **/
2364
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "transform operation.");
2367
2368 gcc_assert (ncopies >= 1);
2369
2370 /* Handle def. */
2371 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2372 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2373
2374 prev_stmt_info = NULL;
2375 for (j = 0; j < ncopies; ++j)
2376 {
2377 tree new_stmt, vargs;
2378 tree vec_oprnd[2];
2379 int n;
2380
2381 /* Build argument list for the vectorized call. */
2382 /* FIXME: Rewrite this so that it doesn't construct a temporary
2383 list. */
2384 vargs = NULL_TREE;
2385 n = -1;
2386 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2387 {
2388 ++n;
2389
2390 if (j == 0)
2391 vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
2392 else
2393 vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
2394
2395 vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
2396 }
2397 vargs = nreverse (vargs);
2398
2399 rhs = build_function_call_expr (fndecl, vargs);
2400 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2401 new_temp = make_ssa_name (vec_dest, new_stmt);
2402 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2403
2404 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2405
2406 if (j == 0)
2407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2408 else
2409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2410 prev_stmt_info = vinfo_for_stmt (new_stmt);
2411 }
2412
2413 /* The call in STMT might prevent it from being removed in dce. We however
2414 cannot remove it here, due to the way the ssa name it defines is mapped
2415 to the new definition. So just replace rhs of the statement with something
2416 harmless. */
2417 type = TREE_TYPE (scalar_dest);
2418 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2419 update_stmt (stmt);
2420
2421 return true;
2422 }
2423
2424
2425 /* Function vect_gen_widened_results_half
2426
2427 Create a vector stmt whose code, type, number of arguments, and result
2428 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2429 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2430 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2431 needs to be created (DECL is a function-decl of a target-builtin).
2432 STMT is the original scalar stmt that we are vectorizing. */
2433
2434 static tree
2435 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2436 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2437 tree vec_dest, block_stmt_iterator *bsi,
2438 tree stmt)
2439 {
2440 tree expr;
2441 tree new_stmt;
2442 tree new_temp;
2443 tree sym;
2444 ssa_op_iter iter;
2445
2446 /* Generate half of the widened result: */
2447 if (code == CALL_EXPR)
2448 {
2449 /* Target specific support */
2450 if (op_type == binary_op)
2451 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2452 else
2453 expr = build_call_expr (decl, 1, vec_oprnd0);
2454 }
2455 else
2456 {
2457 /* Generic support */
2458 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2459 if (op_type == binary_op)
2460 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2461 else
2462 expr = build1 (code, vectype, vec_oprnd0);
2463 }
2464 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2465 new_temp = make_ssa_name (vec_dest, new_stmt);
2466 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2467 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2468
2469 if (code == CALL_EXPR)
2470 {
2471 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2472 {
2473 if (TREE_CODE (sym) == SSA_NAME)
2474 sym = SSA_NAME_VAR (sym);
2475 mark_sym_for_renaming (sym);
2476 }
2477 }
2478
2479 return new_stmt;
2480 }
2481
2482
2483 /* Function vectorizable_conversion.
2484
2485 Check if STMT performs a conversion operation, that can be vectorized.
2486 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2487 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2488 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2489
2490 bool
2491 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2492 tree * vec_stmt)
2493 {
2494 tree vec_dest;
2495 tree scalar_dest;
2496 tree operation;
2497 tree op0;
2498 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2499 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2500 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2501 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2502 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2503 tree new_temp;
2504 tree def, def_stmt;
2505 enum vect_def_type dt0;
2506 tree new_stmt;
2507 stmt_vec_info prev_stmt_info;
2508 int nunits_in;
2509 int nunits_out;
2510 tree vectype_out, vectype_in;
2511 int ncopies, j;
2512 tree expr;
2513 tree rhs_type, lhs_type;
2514 tree builtin_decl;
2515 enum { NARROW, NONE, WIDEN } modifier;
2516
2517 /* Is STMT a vectorizable conversion? */
2518
2519 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2520 return false;
2521
2522 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2523 return false;
2524
2525 if (STMT_VINFO_LIVE_P (stmt_info))
2526 {
2527 /* FORNOW: not yet supported. */
2528 if (vect_print_dump_info (REPORT_DETAILS))
2529 fprintf (vect_dump, "value used after loop.");
2530 return false;
2531 }
2532
2533 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2534 return false;
2535
2536 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2537 return false;
2538
2539 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2540 code = TREE_CODE (operation);
2541 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2542 return false;
2543
2544 /* Check types of lhs and rhs */
2545 op0 = TREE_OPERAND (operation, 0);
2546 rhs_type = TREE_TYPE (op0);
2547 vectype_in = get_vectype_for_scalar_type (rhs_type);
2548 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2549
2550 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2551 lhs_type = TREE_TYPE (scalar_dest);
2552 vectype_out = get_vectype_for_scalar_type (lhs_type);
2553 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2554
2555 /* FORNOW */
2556 if (nunits_in == nunits_out / 2)
2557 modifier = NARROW;
2558 else if (nunits_out == nunits_in)
2559 modifier = NONE;
2560 else if (nunits_out == nunits_in / 2)
2561 modifier = WIDEN;
2562 else
2563 return false;
2564
2565 if (modifier == NONE)
2566 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2567
2568 /* Bail out if the types are both integral or non-integral */
2569 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2570 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2571 return false;
2572
2573 if (modifier == NARROW)
2574 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2575 else
2576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2577
2578 /* Sanity check: make sure that at least one copy of the vectorized stmt
2579 needs to be generated. */
2580 gcc_assert (ncopies >= 1);
2581
2582 /* Check the operands of the operation. */
2583 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2584 {
2585 if (vect_print_dump_info (REPORT_DETAILS))
2586 fprintf (vect_dump, "use not simple.");
2587 return false;
2588 }
2589
2590 /* Supportable by target? */
2591 if ((modifier == NONE
2592 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2593 || (modifier == WIDEN
2594 && !supportable_widening_operation (code, stmt, vectype_in,
2595 &decl1, &decl2,
2596 &code1, &code2))
2597 || (modifier == NARROW
2598 && !supportable_narrowing_operation (code, stmt, vectype_in,
2599 &code1)))
2600 {
2601 if (vect_print_dump_info (REPORT_DETAILS))
2602 fprintf (vect_dump, "op not supported by target.");
2603 return false;
2604 }
2605
2606 if (modifier != NONE)
2607 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2608
2609 if (!vec_stmt) /* transformation not required. */
2610 {
2611 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2612 return true;
2613 }
2614
2615 /** Transform. **/
2616 if (vect_print_dump_info (REPORT_DETAILS))
2617 fprintf (vect_dump, "transform conversion.");
2618
2619 /* Handle def. */
2620 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2621
2622 prev_stmt_info = NULL;
2623 switch (modifier)
2624 {
2625 case NONE:
2626 for (j = 0; j < ncopies; j++)
2627 {
2628 tree sym;
2629 ssa_op_iter iter;
2630
2631 if (j == 0)
2632 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2633 else
2634 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2635
2636 builtin_decl =
2637 targetm.vectorize.builtin_conversion (code, vectype_in);
2638 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2639
2640 /* Arguments are ready. create the new vector stmt. */
2641 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2642 new_temp = make_ssa_name (vec_dest, new_stmt);
2643 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2644 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2645 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2646 {
2647 if (TREE_CODE (sym) == SSA_NAME)
2648 sym = SSA_NAME_VAR (sym);
2649 mark_sym_for_renaming (sym);
2650 }
2651
2652 if (j == 0)
2653 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2654 else
2655 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2656 prev_stmt_info = vinfo_for_stmt (new_stmt);
2657 }
2658 break;
2659
2660 case WIDEN:
2661 /* In case the vectorization factor (VF) is bigger than the number
2662 of elements that we can fit in a vectype (nunits), we have to
2663 generate more than one vector stmt - i.e - we need to "unroll"
2664 the vector stmt by a factor VF/nunits. */
2665 for (j = 0; j < ncopies; j++)
2666 {
2667 if (j == 0)
2668 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2669 else
2670 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2671
2672 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2673
2674 /* Generate first half of the widened result: */
2675 new_stmt
2676 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2677 vec_oprnd0, vec_oprnd1,
2678 unary_op, vec_dest, bsi, stmt);
2679 if (j == 0)
2680 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2681 else
2682 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2683 prev_stmt_info = vinfo_for_stmt (new_stmt);
2684
2685 /* Generate second half of the widened result: */
2686 new_stmt
2687 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2688 vec_oprnd0, vec_oprnd1,
2689 unary_op, vec_dest, bsi, stmt);
2690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2691 prev_stmt_info = vinfo_for_stmt (new_stmt);
2692 }
2693 break;
2694
2695 case NARROW:
2696 /* In case the vectorization factor (VF) is bigger than the number
2697 of elements that we can fit in a vectype (nunits), we have to
2698 generate more than one vector stmt - i.e - we need to "unroll"
2699 the vector stmt by a factor VF/nunits. */
2700 for (j = 0; j < ncopies; j++)
2701 {
2702 /* Handle uses. */
2703 if (j == 0)
2704 {
2705 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2706 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2707 }
2708 else
2709 {
2710 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2711 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2712 }
2713
2714 /* Arguments are ready. Create the new vector stmt. */
2715 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2716 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2717 new_temp = make_ssa_name (vec_dest, new_stmt);
2718 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2719 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2720
2721 if (j == 0)
2722 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2723 else
2724 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2725
2726 prev_stmt_info = vinfo_for_stmt (new_stmt);
2727 }
2728
2729 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2730 }
2731 return true;
2732 }
2733
2734
2735 /* Function vectorizable_assignment.
2736
2737 Check if STMT performs an assignment (copy) that can be vectorized.
2738 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2739 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2740 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2741
2742 bool
2743 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2744 {
2745 tree vec_dest;
2746 tree scalar_dest;
2747 tree op;
2748 tree vec_oprnd;
2749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2750 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2751 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2752 tree new_temp;
2753 tree def, def_stmt;
2754 enum vect_def_type dt;
2755 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2756 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2757
2758 gcc_assert (ncopies >= 1);
2759 if (ncopies > 1)
2760 return false; /* FORNOW */
2761
2762 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2763 return false;
2764
2765 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2766 return false;
2767
2768 /* FORNOW: not yet supported. */
2769 if (STMT_VINFO_LIVE_P (stmt_info))
2770 {
2771 if (vect_print_dump_info (REPORT_DETAILS))
2772 fprintf (vect_dump, "value used after loop.");
2773 return false;
2774 }
2775
2776 /* Is vectorizable assignment? */
2777 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2778 return false;
2779
2780 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2781 if (TREE_CODE (scalar_dest) != SSA_NAME)
2782 return false;
2783
2784 op = GIMPLE_STMT_OPERAND (stmt, 1);
2785 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2786 {
2787 if (vect_print_dump_info (REPORT_DETAILS))
2788 fprintf (vect_dump, "use not simple.");
2789 return false;
2790 }
2791
2792 if (!vec_stmt) /* transformation not required. */
2793 {
2794 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2795 if (vect_print_dump_info (REPORT_DETAILS))
2796 fprintf (vect_dump, "=== vectorizable_assignment ===");
2797 vect_model_simple_cost (stmt_info, ncopies);
2798 return true;
2799 }
2800
2801 /** Transform. **/
2802 if (vect_print_dump_info (REPORT_DETAILS))
2803 fprintf (vect_dump, "transform assignment.");
2804
2805 /* Handle def. */
2806 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2807
2808 /* Handle use. */
2809 op = GIMPLE_STMT_OPERAND (stmt, 1);
2810 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2811
2812 /* Arguments are ready. create the new vector stmt. */
2813 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2814 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2815 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2816 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2817
2818 return true;
2819 }
2820
2821
2822 /* Function vect_min_worthwhile_factor.
2823
2824 For a loop where we could vectorize the operation indicated by CODE,
2825 return the minimum vectorization factor that makes it worthwhile
2826 to use generic vectors. */
2827 static int
2828 vect_min_worthwhile_factor (enum tree_code code)
2829 {
2830 switch (code)
2831 {
2832 case PLUS_EXPR:
2833 case MINUS_EXPR:
2834 case NEGATE_EXPR:
2835 return 4;
2836
2837 case BIT_AND_EXPR:
2838 case BIT_IOR_EXPR:
2839 case BIT_XOR_EXPR:
2840 case BIT_NOT_EXPR:
2841 return 2;
2842
2843 default:
2844 return INT_MAX;
2845 }
2846 }
2847
2848
2849 /* Function vectorizable_induction
2850
2851 Check if PHI performs an induction computation that can be vectorized.
2852 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
2853 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
2854 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2855
2856 bool
2857 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2858 tree *vec_stmt)
2859 {
2860 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
2861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2862 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2863 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2864 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2865 tree vec_def;
2866
2867 gcc_assert (ncopies >= 1);
2868
2869 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2870 return false;
2871
2872 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
2873
2874 if (STMT_VINFO_LIVE_P (stmt_info))
2875 {
2876 /* FORNOW: not yet supported. */
2877 if (vect_print_dump_info (REPORT_DETAILS))
2878 fprintf (vect_dump, "value used after loop.");
2879 return false;
2880 }
2881
2882 if (TREE_CODE (phi) != PHI_NODE)
2883 return false;
2884
2885 if (!vec_stmt) /* transformation not required. */
2886 {
2887 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
2888 if (vect_print_dump_info (REPORT_DETAILS))
2889 fprintf (vect_dump, "=== vectorizable_induction ===");
2890 vect_model_induction_cost (stmt_info, ncopies);
2891 return true;
2892 }
2893
2894 /** Transform. **/
2895
2896 if (vect_print_dump_info (REPORT_DETAILS))
2897 fprintf (vect_dump, "transform induction phi.");
2898
2899 vec_def = get_initial_def_for_induction (phi);
2900 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
2901 return true;
2902 }
2903
2904
2905 /* Function vectorizable_operation.
2906
2907 Check if STMT performs a binary or unary operation that can be vectorized.
2908 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2909 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2910 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2911
2912 bool
2913 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2914 {
2915 tree vec_dest;
2916 tree scalar_dest;
2917 tree operation;
2918 tree op0, op1 = NULL;
2919 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2921 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2922 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2923 enum tree_code code;
2924 enum machine_mode vec_mode;
2925 tree new_temp;
2926 int op_type;
2927 optab optab;
2928 int icode;
2929 enum machine_mode optab_op2_mode;
2930 tree def, def_stmt;
2931 enum vect_def_type dt0, dt1;
2932 tree new_stmt;
2933 stmt_vec_info prev_stmt_info;
2934 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2935 int nunits_out;
2936 tree vectype_out;
2937 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2938 int j;
2939
2940 gcc_assert (ncopies >= 1);
2941
2942 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2943 return false;
2944
2945 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2946 return false;
2947
2948 /* FORNOW: not yet supported. */
2949 if (STMT_VINFO_LIVE_P (stmt_info))
2950 {
2951 if (vect_print_dump_info (REPORT_DETAILS))
2952 fprintf (vect_dump, "value used after loop.");
2953 return false;
2954 }
2955
2956 /* Is STMT a vectorizable binary/unary operation? */
2957 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2958 return false;
2959
2960 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2961 return false;
2962
2963 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2964 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2965 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2966 if (nunits_out != nunits_in)
2967 return false;
2968
2969 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2970 code = TREE_CODE (operation);
2971 optab = optab_for_tree_code (code, vectype);
2972
2973 /* Support only unary or binary operations. */
2974 op_type = TREE_OPERAND_LENGTH (operation);
2975 if (op_type != unary_op && op_type != binary_op)
2976 {
2977 if (vect_print_dump_info (REPORT_DETAILS))
2978 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2979 return false;
2980 }
2981
2982 op0 = TREE_OPERAND (operation, 0);
2983 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2984 {
2985 if (vect_print_dump_info (REPORT_DETAILS))
2986 fprintf (vect_dump, "use not simple.");
2987 return false;
2988 }
2989
2990 if (op_type == binary_op)
2991 {
2992 op1 = TREE_OPERAND (operation, 1);
2993 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2994 {
2995 if (vect_print_dump_info (REPORT_DETAILS))
2996 fprintf (vect_dump, "use not simple.");
2997 return false;
2998 }
2999 }
3000
3001 /* Supportable by target? */
3002 if (!optab)
3003 {
3004 if (vect_print_dump_info (REPORT_DETAILS))
3005 fprintf (vect_dump, "no optab.");
3006 return false;
3007 }
3008 vec_mode = TYPE_MODE (vectype);
3009 icode = (int) optab->handlers[(int) vec_mode].insn_code;
3010 if (icode == CODE_FOR_nothing)
3011 {
3012 if (vect_print_dump_info (REPORT_DETAILS))
3013 fprintf (vect_dump, "op not supported by target.");
3014 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3015 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3016 < vect_min_worthwhile_factor (code))
3017 return false;
3018 if (vect_print_dump_info (REPORT_DETAILS))
3019 fprintf (vect_dump, "proceeding using word mode.");
3020 }
3021
3022 /* Worthwhile without SIMD support? */
3023 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3024 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3025 < vect_min_worthwhile_factor (code))
3026 {
3027 if (vect_print_dump_info (REPORT_DETAILS))
3028 fprintf (vect_dump, "not worthwhile without SIMD support.");
3029 return false;
3030 }
3031
3032 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3033 {
3034 /* FORNOW: not yet supported. */
3035 if (!VECTOR_MODE_P (vec_mode))
3036 return false;
3037
3038 /* Invariant argument is needed for a vector shift
3039 by a scalar shift operand. */
3040 optab_op2_mode = insn_data[icode].operand[2].mode;
3041 if (! (VECTOR_MODE_P (optab_op2_mode)
3042 || dt1 == vect_constant_def
3043 || dt1 == vect_invariant_def))
3044 {
3045 if (vect_print_dump_info (REPORT_DETAILS))
3046 fprintf (vect_dump, "operand mode requires invariant argument.");
3047 return false;
3048 }
3049 }
3050
3051 if (!vec_stmt) /* transformation not required. */
3052 {
3053 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3054 if (vect_print_dump_info (REPORT_DETAILS))
3055 fprintf (vect_dump, "=== vectorizable_operation ===");
3056 vect_model_simple_cost (stmt_info, ncopies);
3057 return true;
3058 }
3059
3060 /** Transform. **/
3061
3062 if (vect_print_dump_info (REPORT_DETAILS))
3063 fprintf (vect_dump, "transform binary/unary operation.");
3064
3065 /* Handle def. */
3066 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3067
3068 /* In case the vectorization factor (VF) is bigger than the number
3069 of elements that we can fit in a vectype (nunits), we have to generate
3070 more than one vector stmt - i.e - we need to "unroll" the
3071 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3072 from one copy of the vector stmt to the next, in the field
3073 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3074 stages to find the correct vector defs to be used when vectorizing
3075 stmts that use the defs of the current stmt. The example below illustrates
3076 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3077 4 vectorized stmts):
3078
3079 before vectorization:
3080 RELATED_STMT VEC_STMT
3081 S1: x = memref - -
3082 S2: z = x + 1 - -
3083
3084 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3085 there):
3086 RELATED_STMT VEC_STMT
3087 VS1_0: vx0 = memref0 VS1_1 -
3088 VS1_1: vx1 = memref1 VS1_2 -
3089 VS1_2: vx2 = memref2 VS1_3 -
3090 VS1_3: vx3 = memref3 - -
3091 S1: x = load - VS1_0
3092 S2: z = x + 1 - -
3093
3094 step2: vectorize stmt S2 (done here):
3095 To vectorize stmt S2 we first need to find the relevant vector
3096 def for the first operand 'x'. This is, as usual, obtained from
3097 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3098 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3099 relevant vector def 'vx0'. Having found 'vx0' we can generate
3100 the vector stmt VS2_0, and as usual, record it in the
3101 STMT_VINFO_VEC_STMT of stmt S2.
3102 When creating the second copy (VS2_1), we obtain the relevant vector
3103 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3104 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3105 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3106 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3107 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3108 chain of stmts and pointers:
3109 RELATED_STMT VEC_STMT
3110 VS1_0: vx0 = memref0 VS1_1 -
3111 VS1_1: vx1 = memref1 VS1_2 -
3112 VS1_2: vx2 = memref2 VS1_3 -
3113 VS1_3: vx3 = memref3 - -
3114 S1: x = load - VS1_0
3115 VS2_0: vz0 = vx0 + v1 VS2_1 -
3116 VS2_1: vz1 = vx1 + v1 VS2_2 -
3117 VS2_2: vz2 = vx2 + v1 VS2_3 -
3118 VS2_3: vz3 = vx3 + v1 - -
3119 S2: z = x + 1 - VS2_0 */
3120
3121 prev_stmt_info = NULL;
3122 for (j = 0; j < ncopies; j++)
3123 {
3124 /* Handle uses. */
3125 if (j == 0)
3126 {
3127 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3128 if (op_type == binary_op)
3129 {
3130 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3131 {
3132 /* Vector shl and shr insn patterns can be defined with
3133 scalar operand 2 (shift operand). In this case, use
3134 constant or loop invariant op1 directly, without
3135 extending it to vector mode first. */
3136 optab_op2_mode = insn_data[icode].operand[2].mode;
3137 if (!VECTOR_MODE_P (optab_op2_mode))
3138 {
3139 if (vect_print_dump_info (REPORT_DETAILS))
3140 fprintf (vect_dump, "operand 1 using scalar mode.");
3141 vec_oprnd1 = op1;
3142 }
3143 }
3144 if (!vec_oprnd1)
3145 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3146 }
3147 }
3148 else
3149 {
3150 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3151 if (op_type == binary_op)
3152 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
3153 }
3154
3155 /* Arguments are ready. create the new vector stmt. */
3156
3157 if (op_type == binary_op)
3158 new_stmt = build_gimple_modify_stmt (vec_dest,
3159 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3160 else
3161 new_stmt = build_gimple_modify_stmt (vec_dest,
3162 build1 (code, vectype, vec_oprnd0));
3163 new_temp = make_ssa_name (vec_dest, new_stmt);
3164 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3165 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3166
3167 if (j == 0)
3168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3169 else
3170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3171 prev_stmt_info = vinfo_for_stmt (new_stmt);
3172 }
3173
3174 return true;
3175 }
3176
3177
3178 /* Function vectorizable_type_demotion
3179
3180 Check if STMT performs a binary or unary operation that involves
3181 type demotion, and if it can be vectorized.
3182 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3183 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3184 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3185
3186 bool
3187 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3188 tree *vec_stmt)
3189 {
3190 tree vec_dest;
3191 tree scalar_dest;
3192 tree operation;
3193 tree op0;
3194 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3197 enum tree_code code, code1 = ERROR_MARK;
3198 tree new_temp;
3199 tree def, def_stmt;
3200 enum vect_def_type dt0;
3201 tree new_stmt;
3202 stmt_vec_info prev_stmt_info;
3203 int nunits_in;
3204 int nunits_out;
3205 tree vectype_out;
3206 int ncopies;
3207 int j;
3208 tree expr;
3209 tree vectype_in;
3210
3211 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3212 return false;
3213
3214 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3215 return false;
3216
3217 /* FORNOW: not yet supported. */
3218 if (STMT_VINFO_LIVE_P (stmt_info))
3219 {
3220 if (vect_print_dump_info (REPORT_DETAILS))
3221 fprintf (vect_dump, "value used after loop.");
3222 return false;
3223 }
3224
3225 /* Is STMT a vectorizable type-demotion operation? */
3226 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3227 return false;
3228
3229 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3230 return false;
3231
3232 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3233 code = TREE_CODE (operation);
3234 if (code != NOP_EXPR && code != CONVERT_EXPR)
3235 return false;
3236
3237 op0 = TREE_OPERAND (operation, 0);
3238 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3239 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3240
3241 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3242 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3243 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3244 if (nunits_in != nunits_out / 2) /* FORNOW */
3245 return false;
3246
3247 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3248 gcc_assert (ncopies >= 1);
3249
3250 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3251 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3252 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3253 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3254 && (code == NOP_EXPR || code == CONVERT_EXPR))))
3255 return false;
3256
3257 /* Check the operands of the operation. */
3258 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3259 {
3260 if (vect_print_dump_info (REPORT_DETAILS))
3261 fprintf (vect_dump, "use not simple.");
3262 return false;
3263 }
3264
3265 /* Supportable by target? */
3266 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3267 return false;
3268
3269 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3270
3271 if (!vec_stmt) /* transformation not required. */
3272 {
3273 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3274 if (vect_print_dump_info (REPORT_DETAILS))
3275 fprintf (vect_dump, "=== vectorizable_demotion ===");
3276 vect_model_simple_cost (stmt_info, ncopies);
3277 return true;
3278 }
3279
3280 /** Transform. **/
3281 if (vect_print_dump_info (REPORT_DETAILS))
3282 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3283 ncopies);
3284
3285 /* Handle def. */
3286 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3287
3288 /* In case the vectorization factor (VF) is bigger than the number
3289 of elements that we can fit in a vectype (nunits), we have to generate
3290 more than one vector stmt - i.e - we need to "unroll" the
3291 vector stmt by a factor VF/nunits. */
3292 prev_stmt_info = NULL;
3293 for (j = 0; j < ncopies; j++)
3294 {
3295 /* Handle uses. */
3296 if (j == 0)
3297 {
3298 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3299 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3300 }
3301 else
3302 {
3303 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
3304 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3305 }
3306
3307 /* Arguments are ready. Create the new vector stmt. */
3308 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3309 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3310 new_temp = make_ssa_name (vec_dest, new_stmt);
3311 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3312 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3313
3314 if (j == 0)
3315 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3316 else
3317 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3318
3319 prev_stmt_info = vinfo_for_stmt (new_stmt);
3320 }
3321
3322 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3323 return true;
3324 }
3325
3326
3327 /* Function vectorizable_type_promotion
3328
3329 Check if STMT performs a binary or unary operation that involves
3330 type promotion, and if it can be vectorized.
3331 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3332 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3333 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3334
3335 bool
3336 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3337 tree *vec_stmt)
3338 {
3339 tree vec_dest;
3340 tree scalar_dest;
3341 tree operation;
3342 tree op0, op1 = NULL;
3343 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3344 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3346 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3347 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3348 int op_type;
3349 tree def, def_stmt;
3350 enum vect_def_type dt0, dt1;
3351 tree new_stmt;
3352 stmt_vec_info prev_stmt_info;
3353 int nunits_in;
3354 int nunits_out;
3355 tree vectype_out;
3356 int ncopies;
3357 int j;
3358 tree vectype_in;
3359
3360 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3361 return false;
3362
3363 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3364 return false;
3365
3366 /* FORNOW: not yet supported. */
3367 if (STMT_VINFO_LIVE_P (stmt_info))
3368 {
3369 if (vect_print_dump_info (REPORT_DETAILS))
3370 fprintf (vect_dump, "value used after loop.");
3371 return false;
3372 }
3373
3374 /* Is STMT a vectorizable type-promotion operation? */
3375 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3376 return false;
3377
3378 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3379 return false;
3380
3381 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3382 code = TREE_CODE (operation);
3383 if (code != NOP_EXPR && code != CONVERT_EXPR
3384 && code != WIDEN_MULT_EXPR)
3385 return false;
3386
3387 op0 = TREE_OPERAND (operation, 0);
3388 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3389 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3390
3391 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3392 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3393 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3394 if (nunits_out != nunits_in / 2) /* FORNOW */
3395 return false;
3396
3397 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3398 gcc_assert (ncopies >= 1);
3399
3400 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3401 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3402 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3403 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3404 && (code == CONVERT_EXPR || code == NOP_EXPR))))
3405 return false;
3406
3407 /* Check the operands of the operation. */
3408 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
3409 {
3410 if (vect_print_dump_info (REPORT_DETAILS))
3411 fprintf (vect_dump, "use not simple.");
3412 return false;
3413 }
3414
3415 op_type = TREE_CODE_LENGTH (code);
3416 if (op_type == binary_op)
3417 {
3418 op1 = TREE_OPERAND (operation, 1);
3419 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
3420 {
3421 if (vect_print_dump_info (REPORT_DETAILS))
3422 fprintf (vect_dump, "use not simple.");
3423 return false;
3424 }
3425 }
3426
3427 /* Supportable by target? */
3428 if (!supportable_widening_operation (code, stmt, vectype_in,
3429 &decl1, &decl2, &code1, &code2))
3430 return false;
3431
3432 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3433
3434 if (!vec_stmt) /* transformation not required. */
3435 {
3436 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "=== vectorizable_promotion ===");
3439 vect_model_simple_cost (stmt_info, 2*ncopies);
3440 return true;
3441 }
3442
3443 /** Transform. **/
3444
3445 if (vect_print_dump_info (REPORT_DETAILS))
3446 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3447 ncopies);
3448
3449 /* Handle def. */
3450 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3451
3452 /* In case the vectorization factor (VF) is bigger than the number
3453 of elements that we can fit in a vectype (nunits), we have to generate
3454 more than one vector stmt - i.e - we need to "unroll" the
3455 vector stmt by a factor VF/nunits. */
3456
3457 prev_stmt_info = NULL;
3458 for (j = 0; j < ncopies; j++)
3459 {
3460 /* Handle uses. */
3461 if (j == 0)
3462 {
3463 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3464 if (op_type == binary_op)
3465 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3466 }
3467 else
3468 {
3469 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
3470 if (op_type == binary_op)
3471 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
3472 }
3473
3474 /* Arguments are ready. Create the new vector stmt. We are creating
3475 two vector defs because the widened result does not fit in one vector.
3476 The vectorized stmt can be expressed as a call to a taregt builtin,
3477 or a using a tree-code. */
3478 /* Generate first half of the widened result: */
3479 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3480 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3481 if (j == 0)
3482 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3483 else
3484 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3485 prev_stmt_info = vinfo_for_stmt (new_stmt);
3486
3487 /* Generate second half of the widened result: */
3488 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3489 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3490 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3491 prev_stmt_info = vinfo_for_stmt (new_stmt);
3492
3493 }
3494
3495 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3496 return true;
3497 }
3498
3499
3500 /* Function vect_strided_store_supported.
3501
3502 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3503 and FALSE otherwise. */
3504
3505 static bool
3506 vect_strided_store_supported (tree vectype)
3507 {
3508 optab interleave_high_optab, interleave_low_optab;
3509 int mode;
3510
3511 mode = (int) TYPE_MODE (vectype);
3512
3513 /* Check that the operation is supported. */
3514 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3515 vectype);
3516 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3517 vectype);
3518 if (!interleave_high_optab || !interleave_low_optab)
3519 {
3520 if (vect_print_dump_info (REPORT_DETAILS))
3521 fprintf (vect_dump, "no optab for interleave.");
3522 return false;
3523 }
3524
3525 if (interleave_high_optab->handlers[(int) mode].insn_code
3526 == CODE_FOR_nothing
3527 || interleave_low_optab->handlers[(int) mode].insn_code
3528 == CODE_FOR_nothing)
3529 {
3530 if (vect_print_dump_info (REPORT_DETAILS))
3531 fprintf (vect_dump, "interleave op not supported by target.");
3532 return false;
3533 }
3534 return true;
3535 }
3536
3537
3538 /* Function vect_permute_store_chain.
3539
3540 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3541 a power of 2, generate interleave_high/low stmts to reorder the data
3542 correctly for the stores. Return the final references for stores in
3543 RESULT_CHAIN.
3544
3545 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3546 The input is 4 vectors each containing 8 elements. We assign a number to each
3547 element, the input sequence is:
3548
3549 1st vec: 0 1 2 3 4 5 6 7
3550 2nd vec: 8 9 10 11 12 13 14 15
3551 3rd vec: 16 17 18 19 20 21 22 23
3552 4th vec: 24 25 26 27 28 29 30 31
3553
3554 The output sequence should be:
3555
3556 1st vec: 0 8 16 24 1 9 17 25
3557 2nd vec: 2 10 18 26 3 11 19 27
3558 3rd vec: 4 12 20 28 5 13 21 30
3559 4th vec: 6 14 22 30 7 15 23 31
3560
3561 i.e., we interleave the contents of the four vectors in their order.
3562
3563 We use interleave_high/low instructions to create such output. The input of
3564 each interleave_high/low operation is two vectors:
3565 1st vec 2nd vec
3566 0 1 2 3 4 5 6 7
3567 the even elements of the result vector are obtained left-to-right from the
3568 high/low elements of the first vector. The odd elements of the result are
3569 obtained left-to-right from the high/low elements of the second vector.
3570 The output of interleave_high will be: 0 4 1 5
3571 and of interleave_low: 2 6 3 7
3572
3573
3574 The permutation is done in log LENGTH stages. In each stage interleave_high
3575 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3576 where the first argument is taken from the first half of DR_CHAIN and the
3577 second argument from it's second half.
3578 In our example,
3579
3580 I1: interleave_high (1st vec, 3rd vec)
3581 I2: interleave_low (1st vec, 3rd vec)
3582 I3: interleave_high (2nd vec, 4th vec)
3583 I4: interleave_low (2nd vec, 4th vec)
3584
3585 The output for the first stage is:
3586
3587 I1: 0 16 1 17 2 18 3 19
3588 I2: 4 20 5 21 6 22 7 23
3589 I3: 8 24 9 25 10 26 11 27
3590 I4: 12 28 13 29 14 30 15 31
3591
3592 The output of the second stage, i.e. the final result is:
3593
3594 I1: 0 8 16 24 1 9 17 25
3595 I2: 2 10 18 26 3 11 19 27
3596 I3: 4 12 20 28 5 13 21 30
3597 I4: 6 14 22 30 7 15 23 31. */
3598
3599 static bool
3600 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3601 unsigned int length,
3602 tree stmt,
3603 block_stmt_iterator *bsi,
3604 VEC(tree,heap) **result_chain)
3605 {
3606 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3607 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3608 tree scalar_dest, tmp;
3609 int i;
3610 unsigned int j;
3611 VEC(tree,heap) *first, *second;
3612
3613 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3614 first = VEC_alloc (tree, heap, length/2);
3615 second = VEC_alloc (tree, heap, length/2);
3616
3617 /* Check that the operation is supported. */
3618 if (!vect_strided_store_supported (vectype))
3619 return false;
3620
3621 *result_chain = VEC_copy (tree, heap, dr_chain);
3622
3623 for (i = 0; i < exact_log2 (length); i++)
3624 {
3625 for (j = 0; j < length/2; j++)
3626 {
3627 vect1 = VEC_index (tree, dr_chain, j);
3628 vect2 = VEC_index (tree, dr_chain, j+length/2);
3629
3630 /* Create interleaving stmt:
3631 in the case of big endian:
3632 high = interleave_high (vect1, vect2)
3633 and in the case of little endian:
3634 high = interleave_low (vect1, vect2). */
3635 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3636 DECL_GIMPLE_REG_P (perm_dest) = 1;
3637 add_referenced_var (perm_dest);
3638 if (BYTES_BIG_ENDIAN)
3639 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3640 else
3641 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3642 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3643 high = make_ssa_name (perm_dest, perm_stmt);
3644 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3645 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3646 VEC_replace (tree, *result_chain, 2*j, high);
3647
3648 /* Create interleaving stmt:
3649 in the case of big endian:
3650 low = interleave_low (vect1, vect2)
3651 and in the case of little endian:
3652 low = interleave_high (vect1, vect2). */
3653 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3654 DECL_GIMPLE_REG_P (perm_dest) = 1;
3655 add_referenced_var (perm_dest);
3656 if (BYTES_BIG_ENDIAN)
3657 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3658 else
3659 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3660 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3661 low = make_ssa_name (perm_dest, perm_stmt);
3662 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3663 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3664 VEC_replace (tree, *result_chain, 2*j+1, low);
3665 }
3666 dr_chain = VEC_copy (tree, heap, *result_chain);
3667 }
3668 return true;
3669 }
3670
3671
3672 /* Function vectorizable_store.
3673
3674 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3675 can be vectorized.
3676 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3677 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3678 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3679
3680 bool
3681 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3682 {
3683 tree scalar_dest;
3684 tree data_ref;
3685 tree op;
3686 tree vec_oprnd = NULL_TREE;
3687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3688 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3689 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3690 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3691 enum machine_mode vec_mode;
3692 tree dummy;
3693 enum dr_alignment_support alignment_support_cheme;
3694 ssa_op_iter iter;
3695 def_operand_p def_p;
3696 tree def, def_stmt;
3697 enum vect_def_type dt;
3698 stmt_vec_info prev_stmt_info = NULL;
3699 tree dataref_ptr = NULL_TREE;
3700 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3701 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3702 int j;
3703 tree next_stmt, first_stmt;
3704 bool strided_store = false;
3705 unsigned int group_size, i;
3706 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3707 gcc_assert (ncopies >= 1);
3708
3709 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3710 return false;
3711
3712 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3713 return false;
3714
3715 if (STMT_VINFO_LIVE_P (stmt_info))
3716 {
3717 if (vect_print_dump_info (REPORT_DETAILS))
3718 fprintf (vect_dump, "value used after loop.");
3719 return false;
3720 }
3721
3722 /* Is vectorizable store? */
3723
3724 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3725 return false;
3726
3727 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3728 if (TREE_CODE (scalar_dest) != ARRAY_REF
3729 && TREE_CODE (scalar_dest) != INDIRECT_REF
3730 && !DR_GROUP_FIRST_DR (stmt_info))
3731 return false;
3732
3733 op = GIMPLE_STMT_OPERAND (stmt, 1);
3734 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3735 {
3736 if (vect_print_dump_info (REPORT_DETAILS))
3737 fprintf (vect_dump, "use not simple.");
3738 return false;
3739 }
3740
3741 vec_mode = TYPE_MODE (vectype);
3742 /* FORNOW. In some cases can vectorize even if data-type not supported
3743 (e.g. - array initialization with 0). */
3744 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3745 return false;
3746
3747 if (!STMT_VINFO_DATA_REF (stmt_info))
3748 return false;
3749
3750 if (DR_GROUP_FIRST_DR (stmt_info))
3751 {
3752 strided_store = true;
3753 if (!vect_strided_store_supported (vectype))
3754 return false;
3755 }
3756
3757 if (!vec_stmt) /* transformation not required. */
3758 {
3759 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3760 vect_model_store_cost (stmt_info, ncopies);
3761 return true;
3762 }
3763
3764 /** Transform. **/
3765
3766 if (strided_store)
3767 {
3768 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3769 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3770 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3771
3772 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3773
3774 /* We vectorize all the stmts of the interleaving group when we
3775 reach the last stmt in the group. */
3776 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3777 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3778 {
3779 *vec_stmt = NULL_TREE;
3780 return true;
3781 }
3782 }
3783 else
3784 {
3785 first_stmt = stmt;
3786 first_dr = dr;
3787 group_size = 1;
3788 }
3789
3790 if (vect_print_dump_info (REPORT_DETAILS))
3791 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3792
3793 dr_chain = VEC_alloc (tree, heap, group_size);
3794 oprnds = VEC_alloc (tree, heap, group_size);
3795
3796 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3797 gcc_assert (alignment_support_cheme);
3798 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3799
3800 /* In case the vectorization factor (VF) is bigger than the number
3801 of elements that we can fit in a vectype (nunits), we have to generate
3802 more than one vector stmt - i.e - we need to "unroll" the
3803 vector stmt by a factor VF/nunits. For more details see documentation in
3804 vect_get_vec_def_for_copy_stmt. */
3805
3806 /* In case of interleaving (non-unit strided access):
3807
3808 S1: &base + 2 = x2
3809 S2: &base = x0
3810 S3: &base + 1 = x1
3811 S4: &base + 3 = x3
3812
3813 We create vectorized stores starting from base address (the access of the
3814 first stmt in the chain (S2 in the above example), when the last store stmt
3815 of the chain (S4) is reached:
3816
3817 VS1: &base = vx2
3818 VS2: &base + vec_size*1 = vx0
3819 VS3: &base + vec_size*2 = vx1
3820 VS4: &base + vec_size*3 = vx3
3821
3822 Then permutation statements are generated:
3823
3824 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3825 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3826 ...
3827
3828 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3829 (the order of the data-refs in the output of vect_permute_store_chain
3830 corresponds to the order of scalar stmts in the interleaving chain - see
3831 the documentation of vect_permute_store_chain()).
3832
3833 In case of both multiple types and interleaving, above vector stores and
3834 permutation stmts are created for every copy. The result vector stmts are
3835 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3836 STMT_VINFO_RELATED_STMT for the next copies.
3837 */
3838
3839 prev_stmt_info = NULL;
3840 for (j = 0; j < ncopies; j++)
3841 {
3842 tree new_stmt;
3843 tree ptr_incr;
3844
3845 if (j == 0)
3846 {
3847 /* For interleaved stores we collect vectorized defs for all the
3848 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
3849 as an input to vect_permute_store_chain(), and OPRNDS as an input
3850 to vect_get_vec_def_for_stmt_copy() for the next copy.
3851 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3852 OPRNDS are of size 1. */
3853 next_stmt = first_stmt;
3854 for (i = 0; i < group_size; i++)
3855 {
3856 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
3857 is the exact number of stmts in the chain. Therefore, NEXT_STMT
3858 can't be NULL_TREE. In case that there is no interleaving,
3859 GROUP_SIZE is 1, and only one iteration of the loop will be
3860 executed. */
3861 gcc_assert (next_stmt);
3862 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
3863 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
3864 VEC_quick_push(tree, dr_chain, vec_oprnd);
3865 VEC_quick_push(tree, oprnds, vec_oprnd);
3866 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3867 }
3868 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
3869 &dummy, &ptr_incr, false,
3870 TREE_TYPE (vec_oprnd));
3871 }
3872 else
3873 {
3874 /* For interleaved stores we created vectorized defs for all the
3875 defs stored in OPRNDS in the previous iteration (previous copy).
3876 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3877 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3878 next copy.
3879 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3880 OPRNDS are of size 1. */
3881 for (i = 0; i < group_size; i++)
3882 {
3883 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
3884 VEC_index (tree, oprnds, i));
3885 VEC_replace(tree, dr_chain, i, vec_oprnd);
3886 VEC_replace(tree, oprnds, i, vec_oprnd);
3887 }
3888 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3889 }
3890
3891 if (strided_store)
3892 {
3893 result_chain = VEC_alloc (tree, heap, group_size);
3894 /* Permute. */
3895 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
3896 &result_chain))
3897 return false;
3898 }
3899
3900 next_stmt = first_stmt;
3901 for (i = 0; i < group_size; i++)
3902 {
3903 /* For strided stores vectorized defs are interleaved in
3904 vect_permute_store_chain(). */
3905 if (strided_store)
3906 vec_oprnd = VEC_index(tree, result_chain, i);
3907
3908 data_ref = build_fold_indirect_ref (dataref_ptr);
3909 /* Arguments are ready. Create the new vector stmt. */
3910 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
3911 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3912
3913 /* Set the VDEFs for the vector pointer. If this virtual def
3914 has a use outside the loop and a loop peel is performed
3915 then the def may be renamed by the peel. Mark it for
3916 renaming so the later use will also be renamed. */
3917 copy_virtual_operands (new_stmt, next_stmt);
3918 if (j == 0)
3919 {
3920 /* The original store is deleted so the same SSA_NAMEs
3921 can be used. */
3922 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
3923 {
3924 SSA_NAME_DEF_STMT (def) = new_stmt;
3925 mark_sym_for_renaming (SSA_NAME_VAR (def));
3926 }
3927
3928 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3929 }
3930 else
3931 {
3932 /* Create new names for all the definitions created by COPY and
3933 add replacement mappings for each new name. */
3934 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
3935 {
3936 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
3937 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
3938 }
3939
3940 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3941 }
3942
3943 prev_stmt_info = vinfo_for_stmt (new_stmt);
3944 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3945 if (!next_stmt)
3946 break;
3947 /* Bump the vector pointer. */
3948 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3949 }
3950 }
3951
3952 return true;
3953 }
3954
3955
3956 /* Function vect_setup_realignment
3957
3958 This function is called when vectorizing an unaligned load using
3959 the dr_unaligned_software_pipeline scheme.
3960 This function generates the following code at the loop prolog:
3961
3962 p = initial_addr;
3963 msq_init = *(floor(p)); # prolog load
3964 realignment_token = call target_builtin;
3965 loop:
3966 msq = phi (msq_init, ---)
3967
3968 The code above sets up a new (vector) pointer, pointing to the first
3969 location accessed by STMT, and a "floor-aligned" load using that pointer.
3970 It also generates code to compute the "realignment-token" (if the relevant
3971 target hook was defined), and creates a phi-node at the loop-header bb
3972 whose arguments are the result of the prolog-load (created by this
3973 function) and the result of a load that takes place in the loop (to be
3974 created by the caller to this function).
3975 The caller to this function uses the phi-result (msq) to create the
3976 realignment code inside the loop, and sets up the missing phi argument,
3977 as follows:
3978
3979 loop:
3980 msq = phi (msq_init, lsq)
3981 lsq = *(floor(p')); # load in loop
3982 result = realign_load (msq, lsq, realignment_token);
3983
3984 Input:
3985 STMT - (scalar) load stmt to be vectorized. This load accesses
3986 a memory location that may be unaligned.
3987 BSI - place where new code is to be inserted.
3988
3989 Output:
3990 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3991 target hook, if defined.
3992 Return value - the result of the loop-header phi node. */
3993
3994 static tree
3995 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
3996 tree *realignment_token)
3997 {
3998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4000 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4001 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4002 edge pe = loop_preheader_edge (loop);
4003 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4004 tree vec_dest;
4005 tree init_addr;
4006 tree inc;
4007 tree ptr;
4008 tree data_ref;
4009 tree new_stmt;
4010 basic_block new_bb;
4011 tree msq_init;
4012 tree new_temp;
4013 tree phi_stmt;
4014 tree msq;
4015
4016 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
4017 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4018 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4019 NULL_TREE);
4020 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4021 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4022 new_temp = make_ssa_name (vec_dest, new_stmt);
4023 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4024 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4025 gcc_assert (!new_bb);
4026 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4027 copy_virtual_operands (new_stmt, stmt);
4028 update_vuses_to_preheader (new_stmt, loop);
4029
4030 /* 2. Create permutation mask, if required, in loop preheader. */
4031 if (targetm.vectorize.builtin_mask_for_load)
4032 {
4033 tree builtin_decl;
4034
4035 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4036 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4037 vec_dest = vect_create_destination_var (scalar_dest,
4038 TREE_TYPE (new_stmt));
4039 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4040 new_temp = make_ssa_name (vec_dest, new_stmt);
4041 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4042 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4043 gcc_assert (!new_bb);
4044 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4045
4046 /* The result of the CALL_EXPR to this builtin is determined from
4047 the value of the parameter and no global variables are touched
4048 which makes the builtin a "const" function. Requiring the
4049 builtin to have the "const" attribute makes it unnecessary
4050 to call mark_call_clobbered. */
4051 gcc_assert (TREE_READONLY (builtin_decl));
4052 }
4053
4054 /* 3. Create msq = phi <msq_init, lsq> in loop */
4055 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4056 msq = make_ssa_name (vec_dest, NULL_TREE);
4057 phi_stmt = create_phi_node (msq, loop->header);
4058 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4059 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4060
4061 return msq;
4062 }
4063
4064
4065 /* Function vect_strided_load_supported.
4066
4067 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4068 and FALSE otherwise. */
4069
4070 static bool
4071 vect_strided_load_supported (tree vectype)
4072 {
4073 optab perm_even_optab, perm_odd_optab;
4074 int mode;
4075
4076 mode = (int) TYPE_MODE (vectype);
4077
4078 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4079 if (!perm_even_optab)
4080 {
4081 if (vect_print_dump_info (REPORT_DETAILS))
4082 fprintf (vect_dump, "no optab for perm_even.");
4083 return false;
4084 }
4085
4086 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4087 {
4088 if (vect_print_dump_info (REPORT_DETAILS))
4089 fprintf (vect_dump, "perm_even op not supported by target.");
4090 return false;
4091 }
4092
4093 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4094 if (!perm_odd_optab)
4095 {
4096 if (vect_print_dump_info (REPORT_DETAILS))
4097 fprintf (vect_dump, "no optab for perm_odd.");
4098 return false;
4099 }
4100
4101 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4102 {
4103 if (vect_print_dump_info (REPORT_DETAILS))
4104 fprintf (vect_dump, "perm_odd op not supported by target.");
4105 return false;
4106 }
4107 return true;
4108 }
4109
4110
4111 /* Function vect_permute_load_chain.
4112
4113 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4114 a power of 2, generate extract_even/odd stmts to reorder the input data
4115 correctly. Return the final references for loads in RESULT_CHAIN.
4116
4117 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4118 The input is 4 vectors each containing 8 elements. We assign a number to each
4119 element, the input sequence is:
4120
4121 1st vec: 0 1 2 3 4 5 6 7
4122 2nd vec: 8 9 10 11 12 13 14 15
4123 3rd vec: 16 17 18 19 20 21 22 23
4124 4th vec: 24 25 26 27 28 29 30 31
4125
4126 The output sequence should be:
4127
4128 1st vec: 0 4 8 12 16 20 24 28
4129 2nd vec: 1 5 9 13 17 21 25 29
4130 3rd vec: 2 6 10 14 18 22 26 30
4131 4th vec: 3 7 11 15 19 23 27 31
4132
4133 i.e., the first output vector should contain the first elements of each
4134 interleaving group, etc.
4135
4136 We use extract_even/odd instructions to create such output. The input of each
4137 extract_even/odd operation is two vectors
4138 1st vec 2nd vec
4139 0 1 2 3 4 5 6 7
4140
4141 and the output is the vector of extracted even/odd elements. The output of
4142 extract_even will be: 0 2 4 6
4143 and of extract_odd: 1 3 5 7
4144
4145
4146 The permutation is done in log LENGTH stages. In each stage extract_even and
4147 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4148 order. In our example,
4149
4150 E1: extract_even (1st vec, 2nd vec)
4151 E2: extract_odd (1st vec, 2nd vec)
4152 E3: extract_even (3rd vec, 4th vec)
4153 E4: extract_odd (3rd vec, 4th vec)
4154
4155 The output for the first stage will be:
4156
4157 E1: 0 2 4 6 8 10 12 14
4158 E2: 1 3 5 7 9 11 13 15
4159 E3: 16 18 20 22 24 26 28 30
4160 E4: 17 19 21 23 25 27 29 31
4161
4162 In order to proceed and create the correct sequence for the next stage (or
4163 for the correct output, if the second stage is the last one, as in our
4164 example), we first put the output of extract_even operation and then the
4165 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4166 The input for the second stage is:
4167
4168 1st vec (E1): 0 2 4 6 8 10 12 14
4169 2nd vec (E3): 16 18 20 22 24 26 28 30
4170 3rd vec (E2): 1 3 5 7 9 11 13 15
4171 4th vec (E4): 17 19 21 23 25 27 29 31
4172
4173 The output of the second stage:
4174
4175 E1: 0 4 8 12 16 20 24 28
4176 E2: 2 6 10 14 18 22 26 30
4177 E3: 1 5 9 13 17 21 25 29
4178 E4: 3 7 11 15 19 23 27 31
4179
4180 And RESULT_CHAIN after reordering:
4181
4182 1st vec (E1): 0 4 8 12 16 20 24 28
4183 2nd vec (E3): 1 5 9 13 17 21 25 29
4184 3rd vec (E2): 2 6 10 14 18 22 26 30
4185 4th vec (E4): 3 7 11 15 19 23 27 31. */
4186
4187 static bool
4188 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4189 unsigned int length,
4190 tree stmt,
4191 block_stmt_iterator *bsi,
4192 VEC(tree,heap) **result_chain)
4193 {
4194 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4195 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4196 tree tmp;
4197 int i;
4198 unsigned int j;
4199
4200 /* Check that the operation is supported. */
4201 if (!vect_strided_load_supported (vectype))
4202 return false;
4203
4204 *result_chain = VEC_copy (tree, heap, dr_chain);
4205 for (i = 0; i < exact_log2 (length); i++)
4206 {
4207 for (j = 0; j < length; j +=2)
4208 {
4209 first_vect = VEC_index (tree, dr_chain, j);
4210 second_vect = VEC_index (tree, dr_chain, j+1);
4211
4212 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4213 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4214 DECL_GIMPLE_REG_P (perm_dest) = 1;
4215 add_referenced_var (perm_dest);
4216
4217 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4218 first_vect, second_vect);
4219 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4220
4221 data_ref = make_ssa_name (perm_dest, perm_stmt);
4222 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4223 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4224 mark_symbols_for_renaming (perm_stmt);
4225
4226 VEC_replace (tree, *result_chain, j/2, data_ref);
4227
4228 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4229 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4230 DECL_GIMPLE_REG_P (perm_dest) = 1;
4231 add_referenced_var (perm_dest);
4232
4233 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4234 first_vect, second_vect);
4235 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4236 data_ref = make_ssa_name (perm_dest, perm_stmt);
4237 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4238 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4239 mark_symbols_for_renaming (perm_stmt);
4240
4241 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4242 }
4243 dr_chain = VEC_copy (tree, heap, *result_chain);
4244 }
4245 return true;
4246 }
4247
4248
4249 /* Function vect_transform_strided_load.
4250
4251 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4252 to perform their permutation and ascribe the result vectorized statements to
4253 the scalar statements.
4254 */
4255
4256 static bool
4257 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4258 block_stmt_iterator *bsi)
4259 {
4260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4261 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4262 tree next_stmt, new_stmt;
4263 VEC(tree,heap) *result_chain = NULL;
4264 unsigned int i, gap_count;
4265 tree tmp_data_ref;
4266
4267 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4268 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4269 vectors, that are ready for vector computation. */
4270 result_chain = VEC_alloc (tree, heap, size);
4271 /* Permute. */
4272 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4273 return false;
4274
4275 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4276 Since we scan the chain starting from it's first node, their order
4277 corresponds the order of data-refs in RESULT_CHAIN. */
4278 next_stmt = first_stmt;
4279 gap_count = 1;
4280 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
4281 {
4282 if (!next_stmt)
4283 break;
4284
4285 /* Skip the gaps. Loads created for the gaps will be removed by dead
4286 code elimination pass later.
4287 DR_GROUP_GAP is the number of steps in elements from the previous
4288 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4289 correspond to the gaps.
4290 */
4291 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4292 {
4293 gap_count++;
4294 continue;
4295 }
4296
4297 while (next_stmt)
4298 {
4299 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4300 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4301 copies, and we put the new vector statement in the first available
4302 RELATED_STMT. */
4303 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4304 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4305 else
4306 {
4307 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4308 tree rel_stmt = STMT_VINFO_RELATED_STMT (
4309 vinfo_for_stmt (prev_stmt));
4310 while (rel_stmt)
4311 {
4312 prev_stmt = rel_stmt;
4313 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4314 }
4315 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4316 }
4317 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4318 gap_count = 1;
4319 /* If NEXT_STMT accesses the same DR as the previous statement,
4320 put the same TMP_DATA_REF as its vectorized statement; otherwise
4321 get the next data-ref from RESULT_CHAIN. */
4322 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4323 break;
4324 }
4325 }
4326 return true;
4327 }
4328
4329
4330 /* vectorizable_load.
4331
4332 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4333 can be vectorized.
4334 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4335 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4336 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4337
4338 bool
4339 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4340 {
4341 tree scalar_dest;
4342 tree vec_dest = NULL;
4343 tree data_ref = NULL;
4344 tree op;
4345 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4346 stmt_vec_info prev_stmt_info;
4347 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4348 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4349 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4350 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4351 tree new_temp;
4352 int mode;
4353 tree new_stmt = NULL_TREE;
4354 tree dummy;
4355 enum dr_alignment_support alignment_support_cheme;
4356 tree dataref_ptr = NULL_TREE;
4357 tree ptr_incr;
4358 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4359 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4360 int i, j, group_size;
4361 tree msq = NULL_TREE, lsq;
4362 tree offset = NULL_TREE;
4363 tree realignment_token = NULL_TREE;
4364 tree phi_stmt = NULL_TREE;
4365 VEC(tree,heap) *dr_chain = NULL;
4366 bool strided_load = false;
4367 tree first_stmt;
4368
4369 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4370 return false;
4371
4372 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4373 return false;
4374
4375 /* FORNOW: not yet supported. */
4376 if (STMT_VINFO_LIVE_P (stmt_info))
4377 {
4378 if (vect_print_dump_info (REPORT_DETAILS))
4379 fprintf (vect_dump, "value used after loop.");
4380 return false;
4381 }
4382
4383 /* Is vectorizable load? */
4384 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4385 return false;
4386
4387 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4388 if (TREE_CODE (scalar_dest) != SSA_NAME)
4389 return false;
4390
4391 op = GIMPLE_STMT_OPERAND (stmt, 1);
4392 if (TREE_CODE (op) != ARRAY_REF
4393 && TREE_CODE (op) != INDIRECT_REF
4394 && !DR_GROUP_FIRST_DR (stmt_info))
4395 return false;
4396
4397 if (!STMT_VINFO_DATA_REF (stmt_info))
4398 return false;
4399
4400 mode = (int) TYPE_MODE (vectype);
4401
4402 /* FORNOW. In some cases can vectorize even if data-type not supported
4403 (e.g. - data copies). */
4404 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4405 {
4406 if (vect_print_dump_info (REPORT_DETAILS))
4407 fprintf (vect_dump, "Aligned load, but unsupported type.");
4408 return false;
4409 }
4410
4411 /* Check if the load is a part of an interleaving chain. */
4412 if (DR_GROUP_FIRST_DR (stmt_info))
4413 {
4414 strided_load = true;
4415
4416 /* Check if interleaving is supported. */
4417 if (!vect_strided_load_supported (vectype))
4418 return false;
4419 }
4420
4421 if (!vec_stmt) /* transformation not required. */
4422 {
4423 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4424 vect_model_load_cost (stmt_info, ncopies);
4425 return true;
4426 }
4427
4428 if (vect_print_dump_info (REPORT_DETAILS))
4429 fprintf (vect_dump, "transform load.");
4430
4431 /** Transform. **/
4432
4433 if (strided_load)
4434 {
4435 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4436 /* Check if the chain of loads is already vectorized. */
4437 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4438 {
4439 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4440 return true;
4441 }
4442 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4443 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4444 dr_chain = VEC_alloc (tree, heap, group_size);
4445 }
4446 else
4447 {
4448 first_stmt = stmt;
4449 first_dr = dr;
4450 group_size = 1;
4451 }
4452
4453 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4454 gcc_assert (alignment_support_cheme);
4455
4456
4457 /* In case the vectorization factor (VF) is bigger than the number
4458 of elements that we can fit in a vectype (nunits), we have to generate
4459 more than one vector stmt - i.e - we need to "unroll" the
4460 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4461 from one copy of the vector stmt to the next, in the field
4462 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4463 stages to find the correct vector defs to be used when vectorizing
4464 stmts that use the defs of the current stmt. The example below illustrates
4465 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4466 4 vectorized stmts):
4467
4468 before vectorization:
4469 RELATED_STMT VEC_STMT
4470 S1: x = memref - -
4471 S2: z = x + 1 - -
4472
4473 step 1: vectorize stmt S1:
4474 We first create the vector stmt VS1_0, and, as usual, record a
4475 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4476 Next, we create the vector stmt VS1_1, and record a pointer to
4477 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4478 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4479 stmts and pointers:
4480 RELATED_STMT VEC_STMT
4481 VS1_0: vx0 = memref0 VS1_1 -
4482 VS1_1: vx1 = memref1 VS1_2 -
4483 VS1_2: vx2 = memref2 VS1_3 -
4484 VS1_3: vx3 = memref3 - -
4485 S1: x = load - VS1_0
4486 S2: z = x + 1 - -
4487
4488 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4489 information we recorded in RELATED_STMT field is used to vectorize
4490 stmt S2. */
4491
4492 /* In case of interleaving (non-unit strided access):
4493
4494 S1: x2 = &base + 2
4495 S2: x0 = &base
4496 S3: x1 = &base + 1
4497 S4: x3 = &base + 3
4498
4499 Vectorized loads are created in the order of memory accesses
4500 starting from the access of the first stmt of the chain:
4501
4502 VS1: vx0 = &base
4503 VS2: vx1 = &base + vec_size*1
4504 VS3: vx3 = &base + vec_size*2
4505 VS4: vx4 = &base + vec_size*3
4506
4507 Then permutation statements are generated:
4508
4509 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4510 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4511 ...
4512
4513 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4514 (the order of the data-refs in the output of vect_permute_load_chain
4515 corresponds to the order of scalar stmts in the interleaving chain - see
4516 the documentation of vect_permute_load_chain()).
4517 The generation of permutation stmts and recording them in
4518 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4519
4520 In case of both multiple types and interleaving, the vector loads and
4521 permutation stmts above are created for every copy. The result vector stmts
4522 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4523 STMT_VINFO_RELATED_STMT for the next copies. */
4524
4525 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4526 on a target that supports unaligned accesses (dr_unaligned_supported)
4527 we generate the following code:
4528 p = initial_addr;
4529 indx = 0;
4530 loop {
4531 p = p + indx * vectype_size;
4532 vec_dest = *(p);
4533 indx = indx + 1;
4534 }
4535
4536 Otherwise, the data reference is potentially unaligned on a target that
4537 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4538 then generate the following code, in which the data in each iteration is
4539 obtained by two vector loads, one from the previous iteration, and one
4540 from the current iteration:
4541 p1 = initial_addr;
4542 msq_init = *(floor(p1))
4543 p2 = initial_addr + VS - 1;
4544 realignment_token = call target_builtin;
4545 indx = 0;
4546 loop {
4547 p2 = p2 + indx * vectype_size
4548 lsq = *(floor(p2))
4549 vec_dest = realign_load (msq, lsq, realignment_token)
4550 indx = indx + 1;
4551 msq = lsq;
4552 } */
4553
4554 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4555 {
4556 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4557 phi_stmt = SSA_NAME_DEF_STMT (msq);
4558 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4559 }
4560
4561 prev_stmt_info = NULL;
4562 for (j = 0; j < ncopies; j++)
4563 {
4564 /* 1. Create the vector pointer update chain. */
4565 if (j == 0)
4566 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4567 &ptr_incr, false, NULL_TREE);
4568 else
4569 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4570
4571 for (i = 0; i < group_size; i++)
4572 {
4573 /* 2. Create the vector-load in the loop. */
4574 switch (alignment_support_cheme)
4575 {
4576 case dr_aligned:
4577 gcc_assert (aligned_access_p (first_dr));
4578 data_ref = build_fold_indirect_ref (dataref_ptr);
4579 break;
4580 case dr_unaligned_supported:
4581 {
4582 int mis = DR_MISALIGNMENT (first_dr);
4583 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4584
4585 gcc_assert (!aligned_access_p (first_dr));
4586 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4587 data_ref =
4588 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4589 break;
4590 }
4591 case dr_unaligned_software_pipeline:
4592 gcc_assert (!aligned_access_p (first_dr));
4593 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4594 break;
4595 default:
4596 gcc_unreachable ();
4597 }
4598 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4599 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4600 new_temp = make_ssa_name (vec_dest, new_stmt);
4601 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4602 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4603 copy_virtual_operands (new_stmt, stmt);
4604 mark_symbols_for_renaming (new_stmt);
4605
4606 /* 3. Handle explicit realignment if necessary/supported. */
4607 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4608 {
4609 /* Create in loop:
4610 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4611 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4612 if (!realignment_token)
4613 realignment_token = dataref_ptr;
4614 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4615 new_stmt =
4616 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4617 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4618 new_temp = make_ssa_name (vec_dest, new_stmt);
4619 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4620 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4621 if (i == group_size - 1 && j == ncopies - 1)
4622 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4623 msq = lsq;
4624 }
4625 if (strided_load)
4626 VEC_quick_push (tree, dr_chain, new_temp);
4627 if (i < group_size - 1)
4628 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4629 }
4630
4631 if (strided_load)
4632 {
4633 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4634 return false;
4635 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4636 dr_chain = VEC_alloc (tree, heap, group_size);
4637 }
4638 else
4639 {
4640 if (j == 0)
4641 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4642 else
4643 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4644 prev_stmt_info = vinfo_for_stmt (new_stmt);
4645 }
4646 }
4647
4648 return true;
4649 }
4650
4651
4652 /* Function vectorizable_live_operation.
4653
4654 STMT computes a value that is used outside the loop. Check if
4655 it can be supported. */
4656
4657 bool
4658 vectorizable_live_operation (tree stmt,
4659 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4660 tree *vec_stmt ATTRIBUTE_UNUSED)
4661 {
4662 tree operation;
4663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4664 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4665 int i;
4666 int op_type;
4667 tree op;
4668 tree def, def_stmt;
4669 enum vect_def_type dt;
4670
4671 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4672
4673 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4674 return false;
4675
4676 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4677 return false;
4678
4679 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4680 return false;
4681
4682 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4683 op_type = TREE_OPERAND_LENGTH (operation);
4684
4685 /* FORNOW: support only if all uses are invariant. This means
4686 that the scalar operations can remain in place, unvectorized.
4687 The original last scalar value that they compute will be used. */
4688
4689 for (i = 0; i < op_type; i++)
4690 {
4691 op = TREE_OPERAND (operation, i);
4692 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4693 {
4694 if (vect_print_dump_info (REPORT_DETAILS))
4695 fprintf (vect_dump, "use not simple.");
4696 return false;
4697 }
4698
4699 if (dt != vect_invariant_def && dt != vect_constant_def)
4700 return false;
4701 }
4702
4703 /* No transformation is required for the cases we currently support. */
4704 return true;
4705 }
4706
4707
4708 /* Function vect_is_simple_cond.
4709
4710 Input:
4711 LOOP - the loop that is being vectorized.
4712 COND - Condition that is checked for simple use.
4713
4714 Returns whether a COND can be vectorized. Checks whether
4715 condition operands are supportable using vec_is_simple_use. */
4716
4717 static bool
4718 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4719 {
4720 tree lhs, rhs;
4721 tree def;
4722 enum vect_def_type dt;
4723
4724 if (!COMPARISON_CLASS_P (cond))
4725 return false;
4726
4727 lhs = TREE_OPERAND (cond, 0);
4728 rhs = TREE_OPERAND (cond, 1);
4729
4730 if (TREE_CODE (lhs) == SSA_NAME)
4731 {
4732 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4733 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4734 return false;
4735 }
4736 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4737 return false;
4738
4739 if (TREE_CODE (rhs) == SSA_NAME)
4740 {
4741 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4742 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4743 return false;
4744 }
4745 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
4746 return false;
4747
4748 return true;
4749 }
4750
4751 /* vectorizable_condition.
4752
4753 Check if STMT is conditional modify expression that can be vectorized.
4754 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4755 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4756 at BSI.
4757
4758 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4759
4760 bool
4761 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4762 {
4763 tree scalar_dest = NULL_TREE;
4764 tree vec_dest = NULL_TREE;
4765 tree op = NULL_TREE;
4766 tree cond_expr, then_clause, else_clause;
4767 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4768 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4769 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4770 tree vec_compare, vec_cond_expr;
4771 tree new_temp;
4772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4773 enum machine_mode vec_mode;
4774 tree def;
4775 enum vect_def_type dt;
4776 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4777 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4778
4779 gcc_assert (ncopies >= 1);
4780 if (ncopies > 1)
4781 return false; /* FORNOW */
4782
4783 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4784 return false;
4785
4786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4787 return false;
4788
4789 /* FORNOW: not yet supported. */
4790 if (STMT_VINFO_LIVE_P (stmt_info))
4791 {
4792 if (vect_print_dump_info (REPORT_DETAILS))
4793 fprintf (vect_dump, "value used after loop.");
4794 return false;
4795 }
4796
4797 /* Is vectorizable conditional operation? */
4798 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4799 return false;
4800
4801 op = GIMPLE_STMT_OPERAND (stmt, 1);
4802
4803 if (TREE_CODE (op) != COND_EXPR)
4804 return false;
4805
4806 cond_expr = TREE_OPERAND (op, 0);
4807 then_clause = TREE_OPERAND (op, 1);
4808 else_clause = TREE_OPERAND (op, 2);
4809
4810 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4811 return false;
4812
4813 /* We do not handle two different vector types for the condition
4814 and the values. */
4815 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4816 return false;
4817
4818 if (TREE_CODE (then_clause) == SSA_NAME)
4819 {
4820 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4821 if (!vect_is_simple_use (then_clause, loop_vinfo,
4822 &then_def_stmt, &def, &dt))
4823 return false;
4824 }
4825 else if (TREE_CODE (then_clause) != INTEGER_CST
4826 && TREE_CODE (then_clause) != REAL_CST)
4827 return false;
4828
4829 if (TREE_CODE (else_clause) == SSA_NAME)
4830 {
4831 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4832 if (!vect_is_simple_use (else_clause, loop_vinfo,
4833 &else_def_stmt, &def, &dt))
4834 return false;
4835 }
4836 else if (TREE_CODE (else_clause) != INTEGER_CST
4837 && TREE_CODE (else_clause) != REAL_CST)
4838 return false;
4839
4840
4841 vec_mode = TYPE_MODE (vectype);
4842
4843 if (!vec_stmt)
4844 {
4845 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4846 return expand_vec_cond_expr_p (op, vec_mode);
4847 }
4848
4849 /* Transform */
4850
4851 /* Handle def. */
4852 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4854
4855 /* Handle cond expr. */
4856 vec_cond_lhs =
4857 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
4858 vec_cond_rhs =
4859 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4860 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4861 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4862
4863 /* Arguments are ready. create the new vector stmt. */
4864 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4865 vec_cond_lhs, vec_cond_rhs);
4866 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4867 vec_compare, vec_then_clause, vec_else_clause);
4868
4869 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
4870 new_temp = make_ssa_name (vec_dest, *vec_stmt);
4871 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
4872 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
4873
4874 return true;
4875 }
4876
4877 /* Function vect_transform_stmt.
4878
4879 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4880
4881 bool
4882 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
4883 {
4884 bool is_store = false;
4885 tree vec_stmt = NULL_TREE;
4886 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4887 tree orig_stmt_in_pattern;
4888 bool done;
4889
4890 switch (STMT_VINFO_TYPE (stmt_info))
4891 {
4892 case type_demotion_vec_info_type:
4893 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
4894 gcc_assert (done);
4895 break;
4896
4897 case type_promotion_vec_info_type:
4898 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
4899 gcc_assert (done);
4900 break;
4901
4902 case type_conversion_vec_info_type:
4903 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
4904 gcc_assert (done);
4905 break;
4906
4907 case induc_vec_info_type:
4908 done = vectorizable_induction (stmt, bsi, &vec_stmt);
4909 gcc_assert (done);
4910 break;
4911
4912 case op_vec_info_type:
4913 done = vectorizable_operation (stmt, bsi, &vec_stmt);
4914 gcc_assert (done);
4915 break;
4916
4917 case assignment_vec_info_type:
4918 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
4919 gcc_assert (done);
4920 break;
4921
4922 case load_vec_info_type:
4923 done = vectorizable_load (stmt, bsi, &vec_stmt);
4924 gcc_assert (done);
4925 break;
4926
4927 case store_vec_info_type:
4928 done = vectorizable_store (stmt, bsi, &vec_stmt);
4929 gcc_assert (done);
4930 if (DR_GROUP_FIRST_DR (stmt_info))
4931 {
4932 /* In case of interleaving, the whole chain is vectorized when the
4933 last store in the chain is reached. Store stmts before the last
4934 one are skipped, and there vec_stmt_info shouldn't be freed
4935 meanwhile. */
4936 *strided_store = true;
4937 if (STMT_VINFO_VEC_STMT (stmt_info))
4938 is_store = true;
4939 }
4940 else
4941 is_store = true;
4942 break;
4943
4944 case condition_vec_info_type:
4945 done = vectorizable_condition (stmt, bsi, &vec_stmt);
4946 gcc_assert (done);
4947 break;
4948
4949 case call_vec_info_type:
4950 done = vectorizable_call (stmt, bsi, &vec_stmt);
4951 break;
4952
4953 case reduc_vec_info_type:
4954 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
4955 gcc_assert (done);
4956 break;
4957
4958 default:
4959 if (!STMT_VINFO_LIVE_P (stmt_info))
4960 {
4961 if (vect_print_dump_info (REPORT_DETAILS))
4962 fprintf (vect_dump, "stmt not supported.");
4963 gcc_unreachable ();
4964 }
4965 }
4966
4967 if (STMT_VINFO_LIVE_P (stmt_info)
4968 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4969 {
4970 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
4971 gcc_assert (done);
4972 }
4973
4974 if (vec_stmt)
4975 {
4976 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4977 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4978 if (orig_stmt_in_pattern)
4979 {
4980 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4981 /* STMT was inserted by the vectorizer to replace a computation idiom.
4982 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4983 computed this idiom. We need to record a pointer to VEC_STMT in
4984 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4985 documentation of vect_pattern_recog. */
4986 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4987 {
4988 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4989 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4990 }
4991 }
4992 }
4993
4994 return is_store;
4995 }
4996
4997
4998 /* This function builds ni_name = number of iterations loop executes
4999 on the loop preheader. */
5000
5001 static tree
5002 vect_build_loop_niters (loop_vec_info loop_vinfo)
5003 {
5004 tree ni_name, stmt, var;
5005 edge pe;
5006 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5007 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5008
5009 var = create_tmp_var (TREE_TYPE (ni), "niters");
5010 add_referenced_var (var);
5011 ni_name = force_gimple_operand (ni, &stmt, false, var);
5012
5013 pe = loop_preheader_edge (loop);
5014 if (stmt)
5015 {
5016 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5017 gcc_assert (!new_bb);
5018 }
5019
5020 return ni_name;
5021 }
5022
5023
5024 /* This function generates the following statements:
5025
5026 ni_name = number of iterations loop executes
5027 ratio = ni_name / vf
5028 ratio_mult_vf_name = ratio * vf
5029
5030 and places them at the loop preheader edge. */
5031
5032 static void
5033 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5034 tree *ni_name_ptr,
5035 tree *ratio_mult_vf_name_ptr,
5036 tree *ratio_name_ptr)
5037 {
5038
5039 edge pe;
5040 basic_block new_bb;
5041 tree stmt, ni_name;
5042 tree var;
5043 tree ratio_name;
5044 tree ratio_mult_vf_name;
5045 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5046 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5047 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5048 tree log_vf;
5049
5050 pe = loop_preheader_edge (loop);
5051
5052 /* Generate temporary variable that contains
5053 number of iterations loop executes. */
5054
5055 ni_name = vect_build_loop_niters (loop_vinfo);
5056 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5057
5058 /* Create: ratio = ni >> log2(vf) */
5059
5060 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5061 if (!is_gimple_val (ratio_name))
5062 {
5063 var = create_tmp_var (TREE_TYPE (ni), "bnd");
5064 add_referenced_var (var);
5065
5066 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5067 pe = loop_preheader_edge (loop);
5068 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5069 gcc_assert (!new_bb);
5070 }
5071
5072 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5073
5074 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5075 ratio_name, log_vf);
5076 if (!is_gimple_val (ratio_mult_vf_name))
5077 {
5078 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5079 add_referenced_var (var);
5080
5081 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5082 true, var);
5083 pe = loop_preheader_edge (loop);
5084 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5085 gcc_assert (!new_bb);
5086 }
5087
5088 *ni_name_ptr = ni_name;
5089 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5090 *ratio_name_ptr = ratio_name;
5091
5092 return;
5093 }
5094
5095
5096 /* Function update_vuses_to_preheader.
5097
5098 Input:
5099 STMT - a statement with potential VUSEs.
5100 LOOP - the loop whose preheader will contain STMT.
5101
5102 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
5103 appears to be defined in a VDEF in another statement in a loop.
5104 One such case is when the VUSE is at the dereference of a __restricted__
5105 pointer in a load and the VDEF is at the dereference of a different
5106 __restricted__ pointer in a store. Vectorization may result in
5107 copy_virtual_uses being called to copy the problematic VUSE to a new
5108 statement that is being inserted in the loop preheader. This procedure
5109 is called to change the SSA_NAME in the new statement's VUSE from the
5110 SSA_NAME updated in the loop to the related SSA_NAME available on the
5111 path entering the loop.
5112
5113 When this function is called, we have the following situation:
5114
5115 # vuse <name1>
5116 S1: vload
5117 do {
5118 # name1 = phi < name0 , name2>
5119
5120 # vuse <name1>
5121 S2: vload
5122
5123 # name2 = vdef <name1>
5124 S3: vstore
5125
5126 }while...
5127
5128 Stmt S1 was created in the loop preheader block as part of misaligned-load
5129 handling. This function fixes the name of the vuse of S1 from 'name1' to
5130 'name0'. */
5131
5132 static void
5133 update_vuses_to_preheader (tree stmt, struct loop *loop)
5134 {
5135 basic_block header_bb = loop->header;
5136 edge preheader_e = loop_preheader_edge (loop);
5137 ssa_op_iter iter;
5138 use_operand_p use_p;
5139
5140 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
5141 {
5142 tree ssa_name = USE_FROM_PTR (use_p);
5143 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
5144 tree name_var = SSA_NAME_VAR (ssa_name);
5145 basic_block bb = bb_for_stmt (def_stmt);
5146
5147 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
5148 if (!IS_EMPTY_STMT (def_stmt)
5149 && flow_bb_inside_loop_p (loop, bb))
5150 {
5151 /* If the block containing the statement defining the SSA_NAME
5152 is in the loop then it's necessary to find the definition
5153 outside the loop using the PHI nodes of the header. */
5154 tree phi;
5155 bool updated = false;
5156
5157 for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
5158 {
5159 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
5160 {
5161 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
5162 updated = true;
5163 break;
5164 }
5165 }
5166 gcc_assert (updated);
5167 }
5168 }
5169 }
5170
5171
5172 /* Function vect_update_ivs_after_vectorizer.
5173
5174 "Advance" the induction variables of LOOP to the value they should take
5175 after the execution of LOOP. This is currently necessary because the
5176 vectorizer does not handle induction variables that are used after the
5177 loop. Such a situation occurs when the last iterations of LOOP are
5178 peeled, because:
5179 1. We introduced new uses after LOOP for IVs that were not originally used
5180 after LOOP: the IVs of LOOP are now used by an epilog loop.
5181 2. LOOP is going to be vectorized; this means that it will iterate N/VF
5182 times, whereas the loop IVs should be bumped N times.
5183
5184 Input:
5185 - LOOP - a loop that is going to be vectorized. The last few iterations
5186 of LOOP were peeled.
5187 - NITERS - the number of iterations that LOOP executes (before it is
5188 vectorized). i.e, the number of times the ivs should be bumped.
5189 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5190 coming out from LOOP on which there are uses of the LOOP ivs
5191 (this is the path from LOOP->exit to epilog_loop->preheader).
5192
5193 The new definitions of the ivs are placed in LOOP->exit.
5194 The phi args associated with the edge UPDATE_E in the bb
5195 UPDATE_E->dest are updated accordingly.
5196
5197 Assumption 1: Like the rest of the vectorizer, this function assumes
5198 a single loop exit that has a single predecessor.
5199
5200 Assumption 2: The phi nodes in the LOOP header and in update_bb are
5201 organized in the same order.
5202
5203 Assumption 3: The access function of the ivs is simple enough (see
5204 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
5205
5206 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5207 coming out of LOOP on which the ivs of LOOP are used (this is the path
5208 that leads to the epilog loop; other paths skip the epilog loop). This
5209 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5210 needs to have its phis updated.
5211 */
5212
5213 static void
5214 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5215 edge update_e)
5216 {
5217 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5218 basic_block exit_bb = single_exit (loop)->dest;
5219 tree phi, phi1;
5220 basic_block update_bb = update_e->dest;
5221
5222 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5223
5224 /* Make sure there exists a single-predecessor exit bb: */
5225 gcc_assert (single_pred_p (exit_bb));
5226
5227 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5228 phi && phi1;
5229 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5230 {
5231 tree access_fn = NULL;
5232 tree evolution_part;
5233 tree init_expr;
5234 tree step_expr;
5235 tree var, stmt, ni, ni_name;
5236 block_stmt_iterator last_bsi;
5237
5238 if (vect_print_dump_info (REPORT_DETAILS))
5239 {
5240 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5241 print_generic_expr (vect_dump, phi, TDF_SLIM);
5242 }
5243
5244 /* Skip virtual phi's. */
5245 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5246 {
5247 if (vect_print_dump_info (REPORT_DETAILS))
5248 fprintf (vect_dump, "virtual phi. skip.");
5249 continue;
5250 }
5251
5252 /* Skip reduction phis. */
5253 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5254 {
5255 if (vect_print_dump_info (REPORT_DETAILS))
5256 fprintf (vect_dump, "reduc phi. skip.");
5257 continue;
5258 }
5259
5260 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5261 gcc_assert (access_fn);
5262 evolution_part =
5263 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5264 gcc_assert (evolution_part != NULL_TREE);
5265
5266 /* FORNOW: We do not support IVs whose evolution function is a polynomial
5267 of degree >= 2 or exponential. */
5268 gcc_assert (!tree_is_chrec (evolution_part));
5269
5270 step_expr = evolution_part;
5271 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5272 loop->num));
5273
5274 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5275 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5276 init_expr,
5277 fold_convert (sizetype,
5278 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5279 niters, step_expr)));
5280 else
5281 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5282 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5283 fold_convert (TREE_TYPE (init_expr),
5284 niters),
5285 step_expr),
5286 init_expr);
5287
5288
5289
5290 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5291 add_referenced_var (var);
5292
5293 ni_name = force_gimple_operand (ni, &stmt, false, var);
5294
5295 /* Insert stmt into exit_bb. */
5296 last_bsi = bsi_last (exit_bb);
5297 if (stmt)
5298 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
5299
5300 /* Fix phi expressions in the successor bb. */
5301 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5302 }
5303 }
5304
5305
5306 /* Function vect_do_peeling_for_loop_bound
5307
5308 Peel the last iterations of the loop represented by LOOP_VINFO.
5309 The peeled iterations form a new epilog loop. Given that the loop now
5310 iterates NITERS times, the new epilog loop iterates
5311 NITERS % VECTORIZATION_FACTOR times.
5312
5313 The original loop will later be made to iterate
5314 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
5315
5316 static void
5317 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5318 {
5319 tree ni_name, ratio_mult_vf_name;
5320 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5321 struct loop *new_loop;
5322 edge update_e;
5323 basic_block preheader;
5324 int loop_num;
5325 unsigned int th;
5326 int min_scalar_loop_bound;
5327 int min_profitable_iters;
5328
5329 if (vect_print_dump_info (REPORT_DETAILS))
5330 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5331
5332 initialize_original_copy_tables ();
5333
5334 /* Generate the following variables on the preheader of original loop:
5335
5336 ni_name = number of iteration the original loop executes
5337 ratio = ni_name / vf
5338 ratio_mult_vf_name = ratio * vf */
5339 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5340 &ratio_mult_vf_name, ratio);
5341
5342 loop_num = loop->num;
5343
5344 /* Analyze cost to set threshhold for vectorized loop. */
5345 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
5346
5347 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5348 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5349
5350 /* Use the cost model only if it is more conservative than user specified
5351 threshold. */
5352
5353 th = (unsigned) min_scalar_loop_bound;
5354 if (min_profitable_iters
5355 && (!min_scalar_loop_bound
5356 || min_profitable_iters > min_scalar_loop_bound))
5357 th = (unsigned) min_profitable_iters;
5358
5359 if (vect_print_dump_info (REPORT_DETAILS))
5360 fprintf (vect_dump, "vectorization may not be profitable.");
5361
5362 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5363 ratio_mult_vf_name, ni_name, false,
5364 th);
5365 gcc_assert (new_loop);
5366 gcc_assert (loop_num == loop->num);
5367 #ifdef ENABLE_CHECKING
5368 slpeel_verify_cfg_after_peeling (loop, new_loop);
5369 #endif
5370
5371 /* A guard that controls whether the new_loop is to be executed or skipped
5372 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
5373 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
5374 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
5375 is on the path where the LOOP IVs are used and need to be updated. */
5376
5377 preheader = loop_preheader_edge (new_loop)->src;
5378 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5379 update_e = EDGE_PRED (preheader, 0);
5380 else
5381 update_e = EDGE_PRED (preheader, 1);
5382
5383 /* Update IVs of original loop as if they were advanced
5384 by ratio_mult_vf_name steps. */
5385 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5386
5387 /* After peeling we have to reset scalar evolution analyzer. */
5388 scev_reset ();
5389
5390 free_original_copy_tables ();
5391 }
5392
5393
5394 /* Function vect_gen_niters_for_prolog_loop
5395
5396 Set the number of iterations for the loop represented by LOOP_VINFO
5397 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5398 and the misalignment of DR - the data reference recorded in
5399 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
5400 this loop, the data reference DR will refer to an aligned location.
5401
5402 The following computation is generated:
5403
5404 If the misalignment of DR is known at compile time:
5405 addr_mis = int mis = DR_MISALIGNMENT (dr);
5406 Else, compute address misalignment in bytes:
5407 addr_mis = addr & (vectype_size - 1)
5408
5409 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5410
5411 (elem_size = element type size; an element is the scalar element
5412 whose type is the inner type of the vectype)
5413
5414 For interleaving,
5415
5416 prolog_niters = min ( LOOP_NITERS ,
5417 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5418 where group_size is the size of the interleaved group.
5419
5420 The above formulas assume that VF == number of elements in the vector. This
5421 may not hold when there are multiple-types in the loop.
5422 In this case, for some data-references in the loop the VF does not represent
5423 the number of elements that fit in the vector. Therefore, instead of VF we
5424 use TYPE_VECTOR_SUBPARTS. */
5425
5426 static tree
5427 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5428 {
5429 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5431 tree var, stmt;
5432 tree iters, iters_name;
5433 edge pe;
5434 basic_block new_bb;
5435 tree dr_stmt = DR_STMT (dr);
5436 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5437 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5438 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5439 tree niters_type = TREE_TYPE (loop_niters);
5440 int group_size = 1;
5441 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5442 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5443
5444 if (DR_GROUP_FIRST_DR (stmt_info))
5445 {
5446 /* For interleaved access element size must be multiplied by the size of
5447 the interleaved group. */
5448 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5449 DR_GROUP_FIRST_DR (stmt_info)));
5450 element_size *= group_size;
5451 }
5452
5453 pe = loop_preheader_edge (loop);
5454
5455 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5456 {
5457 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5458 int elem_misalign = byte_misalign / element_size;
5459
5460 if (vect_print_dump_info (REPORT_DETAILS))
5461 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5462 iters = build_int_cst (niters_type,
5463 (nelements - elem_misalign)&(nelements/group_size-1));
5464 }
5465 else
5466 {
5467 tree new_stmts = NULL_TREE;
5468 tree start_addr =
5469 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5470 tree ptr_type = TREE_TYPE (start_addr);
5471 tree size = TYPE_SIZE (ptr_type);
5472 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5473 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5474 tree elem_size_log =
5475 build_int_cst (type, exact_log2 (vectype_align/nelements));
5476 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5477 tree nelements_tree = build_int_cst (type, nelements);
5478 tree byte_misalign;
5479 tree elem_misalign;
5480
5481 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5482 gcc_assert (!new_bb);
5483
5484 /* Create: byte_misalign = addr & (vectype_size - 1) */
5485 byte_misalign =
5486 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5487
5488 /* Create: elem_misalign = byte_misalign / element_size */
5489 elem_misalign =
5490 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5491
5492 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
5493 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5494 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5495 iters = fold_convert (niters_type, iters);
5496 }
5497
5498 /* Create: prolog_loop_niters = min (iters, loop_niters) */
5499 /* If the loop bound is known at compile time we already verified that it is
5500 greater than vf; since the misalignment ('iters') is at most vf, there's
5501 no need to generate the MIN_EXPR in this case. */
5502 if (TREE_CODE (loop_niters) != INTEGER_CST)
5503 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5504
5505 if (vect_print_dump_info (REPORT_DETAILS))
5506 {
5507 fprintf (vect_dump, "niters for prolog loop: ");
5508 print_generic_expr (vect_dump, iters, TDF_SLIM);
5509 }
5510
5511 var = create_tmp_var (niters_type, "prolog_loop_niters");
5512 add_referenced_var (var);
5513 iters_name = force_gimple_operand (iters, &stmt, false, var);
5514
5515 /* Insert stmt on loop preheader edge. */
5516 if (stmt)
5517 {
5518 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5519 gcc_assert (!new_bb);
5520 }
5521
5522 return iters_name;
5523 }
5524
5525
5526 /* Function vect_update_init_of_dr
5527
5528 NITERS iterations were peeled from LOOP. DR represents a data reference
5529 in LOOP. This function updates the information recorded in DR to
5530 account for the fact that the first NITERS iterations had already been
5531 executed. Specifically, it updates the OFFSET field of DR. */
5532
5533 static void
5534 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5535 {
5536 tree offset = DR_OFFSET (dr);
5537
5538 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5539 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5540 DR_OFFSET (dr) = offset;
5541 }
5542
5543
5544 /* Function vect_update_inits_of_drs
5545
5546 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5547 This function updates the information recorded for the data references in
5548 the loop to account for the fact that the first NITERS iterations had
5549 already been executed. Specifically, it updates the initial_condition of
5550 the access_function of all the data_references in the loop. */
5551
5552 static void
5553 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5554 {
5555 unsigned int i;
5556 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5557 struct data_reference *dr;
5558
5559 if (vect_print_dump_info (REPORT_DETAILS))
5560 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5561
5562 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5563 vect_update_init_of_dr (dr, niters);
5564 }
5565
5566
5567 /* Function vect_do_peeling_for_alignment
5568
5569 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5570 'niters' is set to the misalignment of one of the data references in the
5571 loop, thereby forcing it to refer to an aligned location at the beginning
5572 of the execution of this loop. The data reference for which we are
5573 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5574
5575 static void
5576 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5577 {
5578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5579 tree niters_of_prolog_loop, ni_name;
5580 tree n_iters;
5581 struct loop *new_loop;
5582
5583 if (vect_print_dump_info (REPORT_DETAILS))
5584 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5585
5586 initialize_original_copy_tables ();
5587
5588 ni_name = vect_build_loop_niters (loop_vinfo);
5589 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5590
5591 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5592 new_loop =
5593 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5594 niters_of_prolog_loop, ni_name, true, 0);
5595 gcc_assert (new_loop);
5596 #ifdef ENABLE_CHECKING
5597 slpeel_verify_cfg_after_peeling (new_loop, loop);
5598 #endif
5599
5600 /* Update number of times loop executes. */
5601 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5602 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5603 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5604
5605 /* Update the init conditions of the access functions of all data refs. */
5606 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5607
5608 /* After peeling we have to reset scalar evolution analyzer. */
5609 scev_reset ();
5610
5611 free_original_copy_tables ();
5612 }
5613
5614
5615 /* Function vect_create_cond_for_align_checks.
5616
5617 Create a conditional expression that represents the alignment checks for
5618 all of data references (array element references) whose alignment must be
5619 checked at runtime.
5620
5621 Input:
5622 LOOP_VINFO - two fields of the loop information are used.
5623 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5624 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5625
5626 Output:
5627 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5628 expression.
5629 The returned value is the conditional expression to be used in the if
5630 statement that controls which version of the loop gets executed at runtime.
5631
5632 The algorithm makes two assumptions:
5633 1) The number of bytes "n" in a vector is a power of 2.
5634 2) An address "a" is aligned if a%n is zero and that this
5635 test can be done as a&(n-1) == 0. For example, for 16
5636 byte vectors the test is a&0xf == 0. */
5637
5638 static tree
5639 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5640 tree *cond_expr_stmt_list)
5641 {
5642 VEC(tree,heap) *may_misalign_stmts
5643 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5644 tree ref_stmt, tmp;
5645 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5646 tree mask_cst;
5647 unsigned int i;
5648 tree psize;
5649 tree int_ptrsize_type;
5650 char tmp_name[20];
5651 tree or_tmp_name = NULL_TREE;
5652 tree and_tmp, and_tmp_name, and_stmt;
5653 tree ptrsize_zero;
5654
5655 /* Check that mask is one less than a power of 2, i.e., mask is
5656 all zeros followed by all ones. */
5657 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5658
5659 /* CHECKME: what is the best integer or unsigned type to use to hold a
5660 cast from a pointer value? */
5661 psize = TYPE_SIZE (ptr_type_node);
5662 int_ptrsize_type
5663 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5664
5665 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5666 of the first vector of the i'th data reference. */
5667
5668 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5669 {
5670 tree new_stmt_list = NULL_TREE;
5671 tree addr_base;
5672 tree addr_tmp, addr_tmp_name, addr_stmt;
5673 tree or_tmp, new_or_tmp_name, or_stmt;
5674
5675 /* create: addr_tmp = (int)(address_of_first_vector) */
5676 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5677 &new_stmt_list,
5678 NULL_TREE);
5679
5680 if (new_stmt_list != NULL_TREE)
5681 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5682
5683 sprintf (tmp_name, "%s%d", "addr2int", i);
5684 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5685 add_referenced_var (addr_tmp);
5686 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5687 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5688 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5689 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5690 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5691
5692 /* The addresses are OR together. */
5693
5694 if (or_tmp_name != NULL_TREE)
5695 {
5696 /* create: or_tmp = or_tmp | addr_tmp */
5697 sprintf (tmp_name, "%s%d", "orptrs", i);
5698 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5699 add_referenced_var (or_tmp);
5700 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5701 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5702 or_tmp_name, addr_tmp_name);
5703 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5704 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5705 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5706 or_tmp_name = new_or_tmp_name;
5707 }
5708 else
5709 or_tmp_name = addr_tmp_name;
5710
5711 } /* end for i */
5712
5713 mask_cst = build_int_cst (int_ptrsize_type, mask);
5714
5715 /* create: and_tmp = or_tmp & mask */
5716 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5717 add_referenced_var (and_tmp);
5718 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5719
5720 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5721 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5722 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5723 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5724
5725 /* Make and_tmp the left operand of the conditional test against zero.
5726 if and_tmp has a nonzero bit then some address is unaligned. */
5727 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5728 return build2 (EQ_EXPR, boolean_type_node,
5729 and_tmp_name, ptrsize_zero);
5730 }
5731
5732
5733 /* Function vect_transform_loop.
5734
5735 The analysis phase has determined that the loop is vectorizable.
5736 Vectorize the loop - created vectorized stmts to replace the scalar
5737 stmts in the loop, and update the loop exit condition. */
5738
5739 void
5740 vect_transform_loop (loop_vec_info loop_vinfo)
5741 {
5742 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5743 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5744 int nbbs = loop->num_nodes;
5745 block_stmt_iterator si, next_si;
5746 int i;
5747 tree ratio = NULL;
5748 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5749 bool strided_store;
5750
5751 if (vect_print_dump_info (REPORT_DETAILS))
5752 fprintf (vect_dump, "=== vec_transform_loop ===");
5753
5754 /* If the loop has data references that may or may not be aligned then
5755 two versions of the loop need to be generated, one which is vectorized
5756 and one which isn't. A test is then generated to control which of the
5757 loops is executed. The test checks for the alignment of all of the
5758 data references that may or may not be aligned. */
5759
5760 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5761 {
5762 struct loop *nloop;
5763 tree cond_expr;
5764 tree cond_expr_stmt_list = NULL_TREE;
5765 basic_block condition_bb;
5766 block_stmt_iterator cond_exp_bsi;
5767 basic_block merge_bb;
5768 basic_block new_exit_bb;
5769 edge new_exit_e, e;
5770 tree orig_phi, new_phi, arg;
5771 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5772
5773 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5774 &cond_expr_stmt_list);
5775 initialize_original_copy_tables ();
5776 nloop = loop_version (loop, cond_expr, &condition_bb,
5777 prob, prob, REG_BR_PROB_BASE - prob, true);
5778 free_original_copy_tables();
5779
5780 /** Loop versioning violates an assumption we try to maintain during
5781 vectorization - that the loop exit block has a single predecessor.
5782 After versioning, the exit block of both loop versions is the same
5783 basic block (i.e. it has two predecessors). Just in order to simplify
5784 following transformations in the vectorizer, we fix this situation
5785 here by adding a new (empty) block on the exit-edge of the loop,
5786 with the proper loop-exit phis to maintain loop-closed-form. **/
5787
5788 merge_bb = single_exit (loop)->dest;
5789 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5790 new_exit_bb = split_edge (single_exit (loop));
5791 new_exit_e = single_exit (loop);
5792 e = EDGE_SUCC (new_exit_bb, 0);
5793
5794 for (orig_phi = phi_nodes (merge_bb); orig_phi;
5795 orig_phi = PHI_CHAIN (orig_phi))
5796 {
5797 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5798 new_exit_bb);
5799 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5800 add_phi_arg (new_phi, arg, new_exit_e);
5801 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5802 }
5803
5804 /** end loop-exit-fixes after versioning **/
5805
5806 update_ssa (TODO_update_ssa);
5807 cond_exp_bsi = bsi_last (condition_bb);
5808 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5809 }
5810
5811 /* CHECKME: we wouldn't need this if we called update_ssa once
5812 for all loops. */
5813 bitmap_zero (vect_memsyms_to_rename);
5814
5815 /* Peel the loop if there are data refs with unknown alignment.
5816 Only one data ref with unknown store is allowed. */
5817
5818 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5819 vect_do_peeling_for_alignment (loop_vinfo);
5820
5821 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5822 compile time constant), or it is a constant that doesn't divide by the
5823 vectorization factor, then an epilog loop needs to be created.
5824 We therefore duplicate the loop: the original loop will be vectorized,
5825 and will compute the first (n/VF) iterations. The second copy of the loop
5826 will remain scalar and will compute the remaining (n%VF) iterations.
5827 (VF is the vectorization factor). */
5828
5829 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5830 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5831 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
5832 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
5833 else
5834 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5835 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5836
5837 /* 1) Make sure the loop header has exactly two entries
5838 2) Make sure we have a preheader basic block. */
5839
5840 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5841
5842 split_edge (loop_preheader_edge (loop));
5843
5844 /* FORNOW: the vectorizer supports only loops which body consist
5845 of one basic block (header + empty latch). When the vectorizer will
5846 support more involved loop forms, the order by which the BBs are
5847 traversed need to be reconsidered. */
5848
5849 for (i = 0; i < nbbs; i++)
5850 {
5851 basic_block bb = bbs[i];
5852 stmt_vec_info stmt_info;
5853 tree phi;
5854
5855 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5856 {
5857 if (vect_print_dump_info (REPORT_DETAILS))
5858 {
5859 fprintf (vect_dump, "------>vectorizing phi: ");
5860 print_generic_expr (vect_dump, phi, TDF_SLIM);
5861 }
5862 stmt_info = vinfo_for_stmt (phi);
5863 if (!stmt_info)
5864 continue;
5865 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5866 && !STMT_VINFO_LIVE_P (stmt_info))
5867 continue;
5868
5869 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5870 != (unsigned HOST_WIDE_INT) vectorization_factor)
5871 && vect_print_dump_info (REPORT_DETAILS))
5872 fprintf (vect_dump, "multiple-types.");
5873
5874 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5875 {
5876 if (vect_print_dump_info (REPORT_DETAILS))
5877 fprintf (vect_dump, "transform phi.");
5878 vect_transform_stmt (phi, NULL, NULL);
5879 }
5880 }
5881
5882 for (si = bsi_start (bb); !bsi_end_p (si);)
5883 {
5884 tree stmt = bsi_stmt (si);
5885 bool is_store;
5886
5887 if (vect_print_dump_info (REPORT_DETAILS))
5888 {
5889 fprintf (vect_dump, "------>vectorizing statement: ");
5890 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5891 }
5892 stmt_info = vinfo_for_stmt (stmt);
5893 gcc_assert (stmt_info);
5894 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5895 && !STMT_VINFO_LIVE_P (stmt_info))
5896 {
5897 bsi_next (&si);
5898 continue;
5899 }
5900
5901 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5902 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5903 != (unsigned HOST_WIDE_INT) vectorization_factor)
5904 && vect_print_dump_info (REPORT_DETAILS))
5905 fprintf (vect_dump, "multiple-types.");
5906
5907 /* -------- vectorize statement ------------ */
5908 if (vect_print_dump_info (REPORT_DETAILS))
5909 fprintf (vect_dump, "transform statement.");
5910
5911 strided_store = false;
5912 is_store = vect_transform_stmt (stmt, &si, &strided_store);
5913 if (is_store)
5914 {
5915 stmt_ann_t ann;
5916 if (DR_GROUP_FIRST_DR (stmt_info))
5917 {
5918 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5919 interleaving chain was completed - free all the stores in
5920 the chain. */
5921 tree next = DR_GROUP_FIRST_DR (stmt_info);
5922 tree tmp;
5923 stmt_vec_info next_stmt_info;
5924
5925 while (next)
5926 {
5927 next_si = bsi_for_stmt (next);
5928 next_stmt_info = vinfo_for_stmt (next);
5929 /* Free the attached stmt_vec_info and remove the stmt. */
5930 ann = stmt_ann (next);
5931 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
5932 free (next_stmt_info);
5933 set_stmt_info (ann, NULL);
5934 bsi_remove (&next_si, true);
5935 next = tmp;
5936 }
5937 bsi_remove (&si, true);
5938 continue;
5939 }
5940 else
5941 {
5942 /* Free the attached stmt_vec_info and remove the stmt. */
5943 ann = stmt_ann (stmt);
5944 free (stmt_info);
5945 set_stmt_info (ann, NULL);
5946 bsi_remove (&si, true);
5947 continue;
5948 }
5949 }
5950 bsi_next (&si);
5951 } /* stmts in BB */
5952 } /* BBs in loop */
5953
5954 slpeel_make_loop_iterate_ntimes (loop, ratio);
5955
5956 mark_set_for_renaming (vect_memsyms_to_rename);
5957
5958 /* The memory tags and pointers in vectorized statements need to
5959 have their SSA forms updated. FIXME, why can't this be delayed
5960 until all the loops have been transformed? */
5961 update_ssa (TODO_update_ssa);
5962
5963 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
5964 fprintf (vect_dump, "LOOP VECTORIZED.");
5965 }