]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-analyze.c
backport: ChangeLog.tuples: ChangeLog from gimple-tuples-branch.
[thirdparty/gcc.git] / gcc / tree-vect-analyze.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "toplev.h"
43 #include "recog.h"
44
45 static bool vect_can_advance_ivs_p (loop_vec_info);
46
47 /* Function vect_determine_vectorization_factor
48
49 Determine the vectorization factor (VF). VF is the number of data elements
50 that are operated upon in parallel in a single iteration of the vectorized
51 loop. For example, when vectorizing a loop that operates on 4byte elements,
52 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
53 elements can fit in a single vector register.
54
55 We currently support vectorization of loops in which all types operated upon
56 are of the same size. Therefore this function currently sets VF according to
57 the size of the types operated upon, and fails if there are multiple sizes
58 in the loop.
59
60 VF is also the factor by which the loop iterations are strip-mined, e.g.:
61 original loop:
62 for (i=0; i<N; i++){
63 a[i] = b[i] + c[i];
64 }
65
66 vectorized loop:
67 for (i=0; i<N; i+=VF){
68 a[i:VF] = b[i:VF] + c[i:VF];
69 }
70 */
71
72 static bool
73 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
74 {
75 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
76 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
77 int nbbs = loop->num_nodes;
78 gimple_stmt_iterator si;
79 unsigned int vectorization_factor = 0;
80 tree scalar_type;
81 gimple phi;
82 tree vectype;
83 unsigned int nunits;
84 stmt_vec_info stmt_info;
85 int i;
86
87 if (vect_print_dump_info (REPORT_DETAILS))
88 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
89
90 for (i = 0; i < nbbs; i++)
91 {
92 basic_block bb = bbs[i];
93
94 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
95 {
96 phi = gsi_stmt (si);
97 stmt_info = vinfo_for_stmt (phi);
98 if (vect_print_dump_info (REPORT_DETAILS))
99 {
100 fprintf (vect_dump, "==> examining phi: ");
101 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
102 }
103
104 gcc_assert (stmt_info);
105
106 if (STMT_VINFO_RELEVANT_P (stmt_info))
107 {
108 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
109 scalar_type = TREE_TYPE (PHI_RESULT (phi));
110
111 if (vect_print_dump_info (REPORT_DETAILS))
112 {
113 fprintf (vect_dump, "get vectype for scalar type: ");
114 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
115 }
116
117 vectype = get_vectype_for_scalar_type (scalar_type);
118 if (!vectype)
119 {
120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
121 {
122 fprintf (vect_dump,
123 "not vectorized: unsupported data-type ");
124 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
125 }
126 return false;
127 }
128 STMT_VINFO_VECTYPE (stmt_info) = vectype;
129
130 if (vect_print_dump_info (REPORT_DETAILS))
131 {
132 fprintf (vect_dump, "vectype: ");
133 print_generic_expr (vect_dump, vectype, TDF_SLIM);
134 }
135
136 nunits = TYPE_VECTOR_SUBPARTS (vectype);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "nunits = %d", nunits);
139
140 if (!vectorization_factor
141 || (nunits > vectorization_factor))
142 vectorization_factor = nunits;
143 }
144 }
145
146 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
147 {
148 gimple stmt = gsi_stmt (si);
149 stmt_info = vinfo_for_stmt (stmt);
150
151 if (vect_print_dump_info (REPORT_DETAILS))
152 {
153 fprintf (vect_dump, "==> examining statement: ");
154 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
155 }
156
157 gcc_assert (stmt_info);
158
159 /* skip stmts which do not need to be vectorized. */
160 if (!STMT_VINFO_RELEVANT_P (stmt_info)
161 && !STMT_VINFO_LIVE_P (stmt_info))
162 {
163 if (vect_print_dump_info (REPORT_DETAILS))
164 fprintf (vect_dump, "skip.");
165 continue;
166 }
167
168 if (gimple_get_lhs (stmt) == NULL_TREE)
169 {
170 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
171 {
172 fprintf (vect_dump, "not vectorized: irregular stmt.");
173 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
174 }
175 return false;
176 }
177
178 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
179 {
180 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
181 {
182 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
183 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
184 }
185 return false;
186 }
187
188 if (STMT_VINFO_VECTYPE (stmt_info))
189 {
190 /* The only case when a vectype had been already set is for stmts
191 that contain a dataref, or for "pattern-stmts" (stmts generated
192 by the vectorizer to represent/replace a certain idiom). */
193 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
194 || is_pattern_stmt_p (stmt_info));
195 vectype = STMT_VINFO_VECTYPE (stmt_info);
196 }
197 else
198 {
199
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
201 && !is_pattern_stmt_p (stmt_info));
202
203 /* We generally set the vectype according to the type of the
204 result (lhs).
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
217 check the rhs. */
218 scalar_type = gimple_expr_type (stmt);
219
220 if (is_gimple_assign (stmt)
221 && (gimple_assign_cast_p (stmt)
222 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
223 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
224 {
225 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
226 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type))
227 < TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
228 scalar_type = rhs_type;
229 }
230
231 if (vect_print_dump_info (REPORT_DETAILS))
232 {
233 fprintf (vect_dump, "get vectype for scalar type: ");
234 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
235 }
236
237 vectype = get_vectype_for_scalar_type (scalar_type);
238 if (!vectype)
239 {
240 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
241 {
242 fprintf (vect_dump,
243 "not vectorized: unsupported data-type ");
244 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
245 }
246 return false;
247 }
248 STMT_VINFO_VECTYPE (stmt_info) = vectype;
249 }
250
251 if (vect_print_dump_info (REPORT_DETAILS))
252 {
253 fprintf (vect_dump, "vectype: ");
254 print_generic_expr (vect_dump, vectype, TDF_SLIM);
255 }
256
257 nunits = TYPE_VECTOR_SUBPARTS (vectype);
258 if (vect_print_dump_info (REPORT_DETAILS))
259 fprintf (vect_dump, "nunits = %d", nunits);
260
261 if (!vectorization_factor
262 || (nunits > vectorization_factor))
263 vectorization_factor = nunits;
264
265 }
266 }
267
268 /* TODO: Analyze cost. Decide if worth while to vectorize. */
269 if (vect_print_dump_info (REPORT_DETAILS))
270 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
271 if (vectorization_factor <= 1)
272 {
273 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
274 fprintf (vect_dump, "not vectorized: unsupported data-type");
275 return false;
276 }
277 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
278
279 return true;
280 }
281
282
283 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
284 the number of created vector stmts depends on the unrolling factor). However,
285 the actual number of vector stmts for every SLP node depends on VF which is
286 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
287 In this function we assume that the inside costs calculated in
288 vect_model_xxx_cost are linear in ncopies. */
289
290 static void
291 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
292 {
293 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
294 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
295 slp_instance instance;
296
297 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
299
300 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
301 /* We assume that costs are linear in ncopies. */
302 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
303 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
304 }
305
306
307 /* Function vect_analyze_operations.
308
309 Scan the loop stmts and make sure they are all vectorizable. */
310
311 static bool
312 vect_analyze_operations (loop_vec_info loop_vinfo)
313 {
314 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
315 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
316 int nbbs = loop->num_nodes;
317 gimple_stmt_iterator si;
318 unsigned int vectorization_factor = 0;
319 int i;
320 bool ok;
321 gimple phi;
322 stmt_vec_info stmt_info;
323 bool need_to_vectorize = false;
324 int min_profitable_iters;
325 int min_scalar_loop_bound;
326 unsigned int th;
327 bool only_slp_in_loop = true;
328
329 if (vect_print_dump_info (REPORT_DETAILS))
330 fprintf (vect_dump, "=== vect_analyze_operations ===");
331
332 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
333 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
334
335 for (i = 0; i < nbbs; i++)
336 {
337 basic_block bb = bbs[i];
338
339 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
340 {
341 phi = gsi_stmt (si);
342 ok = true;
343
344 stmt_info = vinfo_for_stmt (phi);
345 if (vect_print_dump_info (REPORT_DETAILS))
346 {
347 fprintf (vect_dump, "examining phi: ");
348 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
349 }
350
351 if (! is_loop_header_bb_p (bb))
352 {
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info)
359 || STMT_VINFO_LIVE_P (stmt_info))
360 {
361 if (vect_print_dump_info (REPORT_DETAILS))
362 fprintf (vect_dump,
363 "Unsupported loop-closed phi in outer-loop.");
364 return false;
365 }
366 continue;
367 }
368
369 gcc_assert (stmt_info);
370
371 if (STMT_VINFO_LIVE_P (stmt_info))
372 {
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump, "not vectorized: value used after loop.");
376 return false;
377 }
378
379 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
381 {
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
385 return false;
386 }
387
388 if (STMT_VINFO_RELEVANT_P (stmt_info))
389 {
390 need_to_vectorize = true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
392 ok = vectorizable_induction (phi, NULL, NULL);
393 }
394
395 if (!ok)
396 {
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
398 {
399 fprintf (vect_dump,
400 "not vectorized: relevant phi not supported: ");
401 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
402 }
403 return false;
404 }
405 }
406
407 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
408 {
409 gimple stmt = gsi_stmt (si);
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
411 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
412
413 if (vect_print_dump_info (REPORT_DETAILS))
414 {
415 fprintf (vect_dump, "==> examining statement: ");
416 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
417 }
418
419 gcc_assert (stmt_info);
420
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
426 control */
427
428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
429 && !STMT_VINFO_LIVE_P (stmt_info))
430 {
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "irrelevant.");
433 continue;
434 }
435
436 switch (STMT_VINFO_DEF_TYPE (stmt_info))
437 {
438 case vect_loop_def:
439 break;
440
441 case vect_reduction_def:
442 gcc_assert (relevance == vect_used_in_outer
443 || relevance == vect_used_in_outer_by_reduction
444 || relevance == vect_unused_in_loop);
445 break;
446
447 case vect_induction_def:
448 case vect_constant_def:
449 case vect_invariant_def:
450 case vect_unknown_def_type:
451 default:
452 gcc_unreachable ();
453 }
454
455 if (STMT_VINFO_RELEVANT_P (stmt_info))
456 {
457 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
458 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
459 need_to_vectorize = true;
460 }
461
462 ok = true;
463 if (STMT_VINFO_RELEVANT_P (stmt_info)
464 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
465 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
466 || vectorizable_type_demotion (stmt, NULL, NULL)
467 || vectorizable_conversion (stmt, NULL, NULL, NULL)
468 || vectorizable_operation (stmt, NULL, NULL, NULL)
469 || vectorizable_assignment (stmt, NULL, NULL, NULL)
470 || vectorizable_load (stmt, NULL, NULL, NULL)
471 || vectorizable_call (stmt, NULL, NULL)
472 || vectorizable_store (stmt, NULL, NULL, NULL)
473 || vectorizable_condition (stmt, NULL, NULL)
474 || vectorizable_reduction (stmt, NULL, NULL));
475
476 if (!ok)
477 {
478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
479 {
480 fprintf (vect_dump, "not vectorized: relevant stmt not ");
481 fprintf (vect_dump, "supported: ");
482 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
483 }
484 return false;
485 }
486
487 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
488 need extra handling, except for vectorizable reductions. */
489 if (STMT_VINFO_LIVE_P (stmt_info)
490 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
491 ok = vectorizable_live_operation (stmt, NULL, NULL);
492
493 if (!ok)
494 {
495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
496 {
497 fprintf (vect_dump, "not vectorized: live stmt not ");
498 fprintf (vect_dump, "supported: ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 }
501 return false;
502 }
503
504 if (!PURE_SLP_STMT (stmt_info))
505 {
506 /* STMT needs loop-based vectorization. */
507 only_slp_in_loop = false;
508
509 /* Groups of strided accesses whose size is not a power of 2 are
510 not vectorizable yet using loop-vectorization. Therefore, if
511 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
512 both SLPed and loop-based vectorized), the loop cannot be
513 vectorized. */
514 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
515 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
516 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
517 {
518 if (vect_print_dump_info (REPORT_DETAILS))
519 {
520 fprintf (vect_dump, "not vectorized: the size of group "
521 "of strided accesses is not a power of 2");
522 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 }
524 return false;
525 }
526 }
527 } /* stmts in bb */
528 } /* bbs */
529
530 /* All operations in the loop are either irrelevant (deal with loop
531 control, or dead), or only used outside the loop and can be moved
532 out of the loop (e.g. invariants, inductions). The loop can be
533 optimized away by scalar optimizations. We're better off not
534 touching this loop. */
535 if (!need_to_vectorize)
536 {
537 if (vect_print_dump_info (REPORT_DETAILS))
538 fprintf (vect_dump,
539 "All the computation can be taken out of the loop.");
540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
541 fprintf (vect_dump,
542 "not vectorized: redundant loop. no profit to vectorize.");
543 return false;
544 }
545
546 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
547 vectorization factor of the loop is the unrolling factor required by the
548 SLP instances. If that unrolling factor is 1, we say, that we perform
549 pure SLP on loop - cross iteration parallelism is not exploited. */
550 if (only_slp_in_loop)
551 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
552 else
553 vectorization_factor = least_common_multiple (vectorization_factor,
554 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
555
556 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
557
558 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
559 && vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump,
561 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
562 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
563
564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
565 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
566 {
567 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
568 fprintf (vect_dump, "not vectorized: iteration count too small.");
569 if (vect_print_dump_info (REPORT_DETAILS))
570 fprintf (vect_dump,"not vectorized: iteration count smaller than "
571 "vectorization factor.");
572 return false;
573 }
574
575 /* Analyze cost. Decide if worth while to vectorize. */
576
577 /* Once VF is set, SLP costs should be updated since the number of created
578 vector stmts depends on VF. */
579 vect_update_slp_costs_according_to_vf (loop_vinfo);
580
581 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
582 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
583
584 if (min_profitable_iters < 0)
585 {
586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
587 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
588 if (vect_print_dump_info (REPORT_DETAILS))
589 fprintf (vect_dump, "not vectorized: vector version will never be "
590 "profitable.");
591 return false;
592 }
593
594 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
595 * vectorization_factor) - 1);
596
597 /* Use the cost model only if it is more conservative than user specified
598 threshold. */
599
600 th = (unsigned) min_scalar_loop_bound;
601 if (min_profitable_iters
602 && (!min_scalar_loop_bound
603 || min_profitable_iters > min_scalar_loop_bound))
604 th = (unsigned) min_profitable_iters;
605
606 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
607 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
608 {
609 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
610 fprintf (vect_dump, "not vectorized: vectorization not "
611 "profitable.");
612 if (vect_print_dump_info (REPORT_DETAILS))
613 fprintf (vect_dump, "not vectorized: iteration count smaller than "
614 "user specified loop bound parameter or minimum "
615 "profitable iterations (whichever is more conservative).");
616 return false;
617 }
618
619 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
620 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
621 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
622 {
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "epilog loop required.");
625 if (!vect_can_advance_ivs_p (loop_vinfo))
626 {
627 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
628 fprintf (vect_dump,
629 "not vectorized: can't create epilog loop 1.");
630 return false;
631 }
632 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
633 {
634 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
635 fprintf (vect_dump,
636 "not vectorized: can't create epilog loop 2.");
637 return false;
638 }
639 }
640
641 return true;
642 }
643
644
645 /* Function exist_non_indexing_operands_for_use_p
646
647 USE is one of the uses attached to STMT. Check if USE is
648 used in STMT for anything other than indexing an array. */
649
650 static bool
651 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
652 {
653 tree operand;
654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
655
656 /* USE corresponds to some operand in STMT. If there is no data
657 reference in STMT, then any operand that corresponds to USE
658 is not indexing an array. */
659 if (!STMT_VINFO_DATA_REF (stmt_info))
660 return true;
661
662 /* STMT has a data_ref. FORNOW this means that its of one of
663 the following forms:
664 -1- ARRAY_REF = var
665 -2- var = ARRAY_REF
666 (This should have been verified in analyze_data_refs).
667
668 'var' in the second case corresponds to a def, not a use,
669 so USE cannot correspond to any operands that are not used
670 for array indexing.
671
672 Therefore, all we need to check is if STMT falls into the
673 first case, and whether var corresponds to USE. */
674
675 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
676 return false;
677
678 if (!gimple_assign_copy_p (stmt))
679 return false;
680 operand = gimple_assign_rhs1 (stmt);
681
682 if (TREE_CODE (operand) != SSA_NAME)
683 return false;
684
685 if (operand == use)
686 return true;
687
688 return false;
689 }
690
691
692 /* Function vect_analyze_scalar_cycles_1.
693
694 Examine the cross iteration def-use cycles of scalar variables
695 in LOOP. LOOP_VINFO represents the loop that is now being
696 considered for vectorization (can be LOOP, or an outer-loop
697 enclosing LOOP). */
698
699 static void
700 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
701 {
702 basic_block bb = loop->header;
703 tree dumy;
704 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
705 gimple_stmt_iterator gsi;
706
707 if (vect_print_dump_info (REPORT_DETAILS))
708 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
709
710 /* First - identify all inductions. */
711 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
712 {
713 gimple phi = gsi_stmt (gsi);
714 tree access_fn = NULL;
715 tree def = PHI_RESULT (phi);
716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
717
718 if (vect_print_dump_info (REPORT_DETAILS))
719 {
720 fprintf (vect_dump, "Analyze phi: ");
721 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
722 }
723
724 /* Skip virtual phi's. The data dependences that are associated with
725 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
726 if (!is_gimple_reg (SSA_NAME_VAR (def)))
727 continue;
728
729 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
730
731 /* Analyze the evolution function. */
732 access_fn = analyze_scalar_evolution (loop, def);
733 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
734 {
735 fprintf (vect_dump, "Access function of PHI: ");
736 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
737 }
738
739 if (!access_fn
740 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
741 {
742 VEC_safe_push (gimple, heap, worklist, phi);
743 continue;
744 }
745
746 if (vect_print_dump_info (REPORT_DETAILS))
747 fprintf (vect_dump, "Detected induction.");
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
749 }
750
751
752 /* Second - identify all reductions. */
753 while (VEC_length (gimple, worklist) > 0)
754 {
755 gimple phi = VEC_pop (gimple, worklist);
756 tree def = PHI_RESULT (phi);
757 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
758 gimple reduc_stmt;
759
760 if (vect_print_dump_info (REPORT_DETAILS))
761 {
762 fprintf (vect_dump, "Analyze phi: ");
763 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
764 }
765
766 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
767 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
768
769 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
770 if (reduc_stmt)
771 {
772 if (vect_print_dump_info (REPORT_DETAILS))
773 fprintf (vect_dump, "Detected reduction.");
774 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
775 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
776 vect_reduction_def;
777 }
778 else
779 if (vect_print_dump_info (REPORT_DETAILS))
780 fprintf (vect_dump, "Unknown def-use cycle pattern.");
781 }
782
783 VEC_free (gimple, heap, worklist);
784 return;
785 }
786
787
788 /* Function vect_analyze_scalar_cycles.
789
790 Examine the cross iteration def-use cycles of scalar variables, by
791 analyzing the loop-header PHIs of scalar variables; Classify each
792 cycle as one of the following: invariant, induction, reduction, unknown.
793 We do that for the loop represented by LOOP_VINFO, and also to its
794 inner-loop, if exists.
795 Examples for scalar cycles:
796
797 Example1: reduction:
798
799 loop1:
800 for (i=0; i<N; i++)
801 sum += a[i];
802
803 Example2: induction:
804
805 loop2:
806 for (i=0; i<N; i++)
807 a[i] = i; */
808
809 static void
810 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
811 {
812 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
813
814 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
815
816 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
817 Reductions in such inner-loop therefore have different properties than
818 the reductions in the nest that gets vectorized:
819 1. When vectorized, they are executed in the same order as in the original
820 scalar loop, so we can't change the order of computation when
821 vectorizing them.
822 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
823 current checks are too strict. */
824
825 if (loop->inner)
826 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
827 }
828
829
830 /* Function vect_insert_into_interleaving_chain.
831
832 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
833
834 static void
835 vect_insert_into_interleaving_chain (struct data_reference *dra,
836 struct data_reference *drb)
837 {
838 gimple prev, next;
839 tree next_init;
840 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
841 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
842
843 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
844 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
845 while (next)
846 {
847 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
848 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
849 {
850 /* Insert here. */
851 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
852 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
853 return;
854 }
855 prev = next;
856 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
857 }
858
859 /* We got to the end of the list. Insert here. */
860 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
861 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
862 }
863
864
865 /* Function vect_update_interleaving_chain.
866
867 For two data-refs DRA and DRB that are a part of a chain interleaved data
868 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
869
870 There are four possible cases:
871 1. New stmts - both DRA and DRB are not a part of any chain:
872 FIRST_DR = DRB
873 NEXT_DR (DRB) = DRA
874 2. DRB is a part of a chain and DRA is not:
875 no need to update FIRST_DR
876 no need to insert DRB
877 insert DRA according to init
878 3. DRA is a part of a chain and DRB is not:
879 if (init of FIRST_DR > init of DRB)
880 FIRST_DR = DRB
881 NEXT(FIRST_DR) = previous FIRST_DR
882 else
883 insert DRB according to its init
884 4. both DRA and DRB are in some interleaving chains:
885 choose the chain with the smallest init of FIRST_DR
886 insert the nodes of the second chain into the first one. */
887
888 static void
889 vect_update_interleaving_chain (struct data_reference *drb,
890 struct data_reference *dra)
891 {
892 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
893 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
894 tree next_init, init_dra_chain, init_drb_chain;
895 gimple first_a, first_b;
896 tree node_init;
897 gimple node, prev, next, first_stmt;
898
899 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
900 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
901 {
902 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
903 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
904 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
905 return;
906 }
907
908 /* 2. DRB is a part of a chain and DRA is not. */
909 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
910 {
911 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
912 /* Insert DRA into the chain of DRB. */
913 vect_insert_into_interleaving_chain (dra, drb);
914 return;
915 }
916
917 /* 3. DRA is a part of a chain and DRB is not. */
918 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
919 {
920 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
921 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
922 old_first_stmt)));
923 gimple tmp;
924
925 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
926 {
927 /* DRB's init is smaller than the init of the stmt previously marked
928 as the first stmt of the interleaving chain of DRA. Therefore, we
929 update FIRST_STMT and put DRB in the head of the list. */
930 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
931 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
932
933 /* Update all the stmts in the list to point to the new FIRST_STMT. */
934 tmp = old_first_stmt;
935 while (tmp)
936 {
937 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
938 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
939 }
940 }
941 else
942 {
943 /* Insert DRB in the list of DRA. */
944 vect_insert_into_interleaving_chain (drb, dra);
945 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
946 }
947 return;
948 }
949
950 /* 4. both DRA and DRB are in some interleaving chains. */
951 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
952 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
953 if (first_a == first_b)
954 return;
955 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
956 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
957
958 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
959 {
960 /* Insert the nodes of DRA chain into the DRB chain.
961 After inserting a node, continue from this node of the DRB chain (don't
962 start from the beginning. */
963 node = DR_GROUP_FIRST_DR (stmtinfo_a);
964 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
965 first_stmt = first_b;
966 }
967 else
968 {
969 /* Insert the nodes of DRB chain into the DRA chain.
970 After inserting a node, continue from this node of the DRA chain (don't
971 start from the beginning. */
972 node = DR_GROUP_FIRST_DR (stmtinfo_b);
973 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
974 first_stmt = first_a;
975 }
976
977 while (node)
978 {
979 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
980 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
981 while (next)
982 {
983 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
984 if (tree_int_cst_compare (next_init, node_init) > 0)
985 {
986 /* Insert here. */
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
988 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
989 prev = node;
990 break;
991 }
992 prev = next;
993 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
994 }
995 if (!next)
996 {
997 /* We got to the end of the list. Insert here. */
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
999 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
1000 prev = node;
1001 }
1002 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1003 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1004 }
1005 }
1006
1007
1008 /* Function vect_equal_offsets.
1009
1010 Check if OFFSET1 and OFFSET2 are identical expressions. */
1011
1012 static bool
1013 vect_equal_offsets (tree offset1, tree offset2)
1014 {
1015 bool res0, res1;
1016
1017 STRIP_NOPS (offset1);
1018 STRIP_NOPS (offset2);
1019
1020 if (offset1 == offset2)
1021 return true;
1022
1023 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1024 || !BINARY_CLASS_P (offset1)
1025 || !BINARY_CLASS_P (offset2))
1026 return false;
1027
1028 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1029 TREE_OPERAND (offset2, 0));
1030 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1031 TREE_OPERAND (offset2, 1));
1032
1033 return (res0 && res1);
1034 }
1035
1036
1037 /* Function vect_check_interleaving.
1038
1039 Check if DRA and DRB are a part of interleaving. In case they are, insert
1040 DRA and DRB in an interleaving chain. */
1041
1042 static void
1043 vect_check_interleaving (struct data_reference *dra,
1044 struct data_reference *drb)
1045 {
1046 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1047
1048 /* Check that the data-refs have same first location (except init) and they
1049 are both either store or load (not load and store). */
1050 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1051 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1052 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1053 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1054 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1055 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1056 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1057 || DR_IS_READ (dra) != DR_IS_READ (drb))
1058 return;
1059
1060 /* Check:
1061 1. data-refs are of the same type
1062 2. their steps are equal
1063 3. the step is greater than the difference between data-refs' inits */
1064 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1065 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1066
1067 if (type_size_a != type_size_b
1068 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1069 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1070 TREE_TYPE (DR_REF (drb))))
1071 return;
1072
1073 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1074 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1075 step = TREE_INT_CST_LOW (DR_STEP (dra));
1076
1077 if (init_a > init_b)
1078 {
1079 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1080 and DRB is accessed before DRA. */
1081 diff_mod_size = (init_a - init_b) % type_size_a;
1082
1083 if ((init_a - init_b) > step)
1084 return;
1085
1086 if (diff_mod_size == 0)
1087 {
1088 vect_update_interleaving_chain (drb, dra);
1089 if (vect_print_dump_info (REPORT_DR_DETAILS))
1090 {
1091 fprintf (vect_dump, "Detected interleaving ");
1092 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1093 fprintf (vect_dump, " and ");
1094 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1095 }
1096 return;
1097 }
1098 }
1099 else
1100 {
1101 /* If init_b == init_a + the size of the type * k, we have an
1102 interleaving, and DRA is accessed before DRB. */
1103 diff_mod_size = (init_b - init_a) % type_size_a;
1104
1105 if ((init_b - init_a) > step)
1106 return;
1107
1108 if (diff_mod_size == 0)
1109 {
1110 vect_update_interleaving_chain (dra, drb);
1111 if (vect_print_dump_info (REPORT_DR_DETAILS))
1112 {
1113 fprintf (vect_dump, "Detected interleaving ");
1114 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1115 fprintf (vect_dump, " and ");
1116 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1117 }
1118 return;
1119 }
1120 }
1121 }
1122
1123 /* Check if data references pointed by DR_I and DR_J are same or
1124 belong to same interleaving group. Return FALSE if drs are
1125 different, otherwise return TRUE. */
1126
1127 static bool
1128 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1129 {
1130 gimple stmt_i = DR_STMT (dr_i);
1131 gimple stmt_j = DR_STMT (dr_j);
1132
1133 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1134 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1135 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1136 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1137 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1138 return true;
1139 else
1140 return false;
1141 }
1142
1143 /* If address ranges represented by DDR_I and DDR_J are equal,
1144 return TRUE, otherwise return FALSE. */
1145
1146 static bool
1147 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1148 {
1149 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1150 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1151 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1152 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1153 return true;
1154 else
1155 return false;
1156 }
1157
1158 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1159 tested at run-time. Return TRUE if DDR was successfully inserted.
1160 Return false if versioning is not supported. */
1161
1162 static bool
1163 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1164 {
1165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1166
1167 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1168 return false;
1169
1170 if (vect_print_dump_info (REPORT_DR_DETAILS))
1171 {
1172 fprintf (vect_dump, "mark for run-time aliasing test between ");
1173 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1174 fprintf (vect_dump, " and ");
1175 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1176 }
1177
1178 if (optimize_size)
1179 {
1180 if (vect_print_dump_info (REPORT_DR_DETAILS))
1181 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1182 return false;
1183 }
1184
1185 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1186 if (loop->inner)
1187 {
1188 if (vect_print_dump_info (REPORT_DR_DETAILS))
1189 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1190 return false;
1191 }
1192
1193 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1194 return true;
1195 }
1196
1197 /* Function vect_analyze_data_ref_dependence.
1198
1199 Return TRUE if there (might) exist a dependence between a memory-reference
1200 DRA and a memory-reference DRB. When versioning for alias may check a
1201 dependence at run-time, return FALSE. */
1202
1203 static bool
1204 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1205 loop_vec_info loop_vinfo)
1206 {
1207 unsigned int i;
1208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1209 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1210 struct data_reference *dra = DDR_A (ddr);
1211 struct data_reference *drb = DDR_B (ddr);
1212 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1213 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1214 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1215 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1216 lambda_vector dist_v;
1217 unsigned int loop_depth;
1218
1219 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1220 {
1221 /* Independent data accesses. */
1222 vect_check_interleaving (dra, drb);
1223 return false;
1224 }
1225
1226 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1227 return false;
1228
1229 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1230 {
1231 if (vect_print_dump_info (REPORT_DR_DETAILS))
1232 {
1233 fprintf (vect_dump,
1234 "versioning for alias required: can't determine dependence between ");
1235 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1236 fprintf (vect_dump, " and ");
1237 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1238 }
1239 /* Add to list of ddrs that need to be tested at run-time. */
1240 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1241 }
1242
1243 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1244 {
1245 if (vect_print_dump_info (REPORT_DR_DETAILS))
1246 {
1247 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1248 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1249 fprintf (vect_dump, " and ");
1250 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1251 }
1252 /* Add to list of ddrs that need to be tested at run-time. */
1253 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1254 }
1255
1256 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1257 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1258 {
1259 int dist = dist_v[loop_depth];
1260
1261 if (vect_print_dump_info (REPORT_DR_DETAILS))
1262 fprintf (vect_dump, "dependence distance = %d.", dist);
1263
1264 /* Same loop iteration. */
1265 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1266 {
1267 /* Two references with distance zero have the same alignment. */
1268 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1269 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1270 if (vect_print_dump_info (REPORT_ALIGNMENT))
1271 fprintf (vect_dump, "accesses have the same alignment.");
1272 if (vect_print_dump_info (REPORT_DR_DETAILS))
1273 {
1274 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1275 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1276 fprintf (vect_dump, " and ");
1277 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1278 }
1279
1280 /* For interleaving, mark that there is a read-write dependency if
1281 necessary. We check before that one of the data-refs is store. */
1282 if (DR_IS_READ (dra))
1283 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1284 else
1285 {
1286 if (DR_IS_READ (drb))
1287 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1288 }
1289
1290 continue;
1291 }
1292
1293 if (abs (dist) >= vectorization_factor
1294 || (dist > 0 && DDR_REVERSED_P (ddr)))
1295 {
1296 /* Dependence distance does not create dependence, as far as
1297 vectorization is concerned, in this case. If DDR_REVERSED_P the
1298 order of the data-refs in DDR was reversed (to make distance
1299 vector positive), and the actual distance is negative. */
1300 if (vect_print_dump_info (REPORT_DR_DETAILS))
1301 fprintf (vect_dump, "dependence distance >= VF or negative.");
1302 continue;
1303 }
1304
1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1306 {
1307 fprintf (vect_dump,
1308 "not vectorized, possible dependence "
1309 "between data-refs ");
1310 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1311 fprintf (vect_dump, " and ");
1312 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1313 }
1314
1315 return true;
1316 }
1317
1318 return false;
1319 }
1320
1321 /* Function vect_analyze_data_ref_dependences.
1322
1323 Examine all the data references in the loop, and make sure there do not
1324 exist any data dependences between them. */
1325
1326 static bool
1327 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1328 {
1329 unsigned int i;
1330 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1331 struct data_dependence_relation *ddr;
1332
1333 if (vect_print_dump_info (REPORT_DETAILS))
1334 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1335
1336 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1337 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1338 return false;
1339
1340 return true;
1341 }
1342
1343
1344 /* Function vect_compute_data_ref_alignment
1345
1346 Compute the misalignment of the data reference DR.
1347
1348 Output:
1349 1. If during the misalignment computation it is found that the data reference
1350 cannot be vectorized then false is returned.
1351 2. DR_MISALIGNMENT (DR) is defined.
1352
1353 FOR NOW: No analysis is actually performed. Misalignment is calculated
1354 only for trivial cases. TODO. */
1355
1356 static bool
1357 vect_compute_data_ref_alignment (struct data_reference *dr)
1358 {
1359 gimple stmt = DR_STMT (dr);
1360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1362 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1363 tree ref = DR_REF (dr);
1364 tree vectype;
1365 tree base, base_addr;
1366 bool base_aligned;
1367 tree misalign;
1368 tree aligned_to, alignment;
1369
1370 if (vect_print_dump_info (REPORT_DETAILS))
1371 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1372
1373 /* Initialize misalignment to unknown. */
1374 SET_DR_MISALIGNMENT (dr, -1);
1375
1376 misalign = DR_INIT (dr);
1377 aligned_to = DR_ALIGNED_TO (dr);
1378 base_addr = DR_BASE_ADDRESS (dr);
1379 vectype = STMT_VINFO_VECTYPE (stmt_info);
1380
1381 /* In case the dataref is in an inner-loop of the loop that is being
1382 vectorized (LOOP), we use the base and misalignment information
1383 relative to the outer-loop (LOOP). This is ok only if the misalignment
1384 stays the same throughout the execution of the inner-loop, which is why
1385 we have to check that the stride of the dataref in the inner-loop evenly
1386 divides by the vector size. */
1387 if (nested_in_vect_loop_p (loop, stmt))
1388 {
1389 tree step = DR_STEP (dr);
1390 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1391
1392 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1393 {
1394 if (vect_print_dump_info (REPORT_ALIGNMENT))
1395 fprintf (vect_dump, "inner step divides the vector-size.");
1396 misalign = STMT_VINFO_DR_INIT (stmt_info);
1397 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1398 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1399 }
1400 else
1401 {
1402 if (vect_print_dump_info (REPORT_ALIGNMENT))
1403 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1404 misalign = NULL_TREE;
1405 }
1406 }
1407
1408 base = build_fold_indirect_ref (base_addr);
1409 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1410
1411 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1412 || !misalign)
1413 {
1414 if (vect_print_dump_info (REPORT_ALIGNMENT))
1415 {
1416 fprintf (vect_dump, "Unknown alignment for access: ");
1417 print_generic_expr (vect_dump, base, TDF_SLIM);
1418 }
1419 return true;
1420 }
1421
1422 if ((DECL_P (base)
1423 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1424 alignment) >= 0)
1425 || (TREE_CODE (base_addr) == SSA_NAME
1426 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1427 TREE_TYPE (base_addr)))),
1428 alignment) >= 0))
1429 base_aligned = true;
1430 else
1431 base_aligned = false;
1432
1433 if (!base_aligned)
1434 {
1435 /* Do not change the alignment of global variables if
1436 flag_section_anchors is enabled. */
1437 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1438 || (TREE_STATIC (base) && flag_section_anchors))
1439 {
1440 if (vect_print_dump_info (REPORT_DETAILS))
1441 {
1442 fprintf (vect_dump, "can't force alignment of ref: ");
1443 print_generic_expr (vect_dump, ref, TDF_SLIM);
1444 }
1445 return true;
1446 }
1447
1448 /* Force the alignment of the decl.
1449 NOTE: This is the only change to the code we make during
1450 the analysis phase, before deciding to vectorize the loop. */
1451 if (vect_print_dump_info (REPORT_DETAILS))
1452 fprintf (vect_dump, "force alignment");
1453 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1454 DECL_USER_ALIGN (base) = 1;
1455 }
1456
1457 /* At this point we assume that the base is aligned. */
1458 gcc_assert (base_aligned
1459 || (TREE_CODE (base) == VAR_DECL
1460 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1461
1462 /* Modulo alignment. */
1463 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1464
1465 if (!host_integerp (misalign, 1))
1466 {
1467 /* Negative or overflowed misalignment value. */
1468 if (vect_print_dump_info (REPORT_DETAILS))
1469 fprintf (vect_dump, "unexpected misalign value");
1470 return false;
1471 }
1472
1473 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1474
1475 if (vect_print_dump_info (REPORT_DETAILS))
1476 {
1477 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1478 print_generic_expr (vect_dump, ref, TDF_SLIM);
1479 }
1480
1481 return true;
1482 }
1483
1484
1485 /* Function vect_compute_data_refs_alignment
1486
1487 Compute the misalignment of data references in the loop.
1488 Return FALSE if a data reference is found that cannot be vectorized. */
1489
1490 static bool
1491 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1492 {
1493 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1494 struct data_reference *dr;
1495 unsigned int i;
1496
1497 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1498 if (!vect_compute_data_ref_alignment (dr))
1499 return false;
1500
1501 return true;
1502 }
1503
1504
1505 /* Function vect_update_misalignment_for_peel
1506
1507 DR - the data reference whose misalignment is to be adjusted.
1508 DR_PEEL - the data reference whose misalignment is being made
1509 zero in the vector loop by the peel.
1510 NPEEL - the number of iterations in the peel loop if the misalignment
1511 of DR_PEEL is known at compile time. */
1512
1513 static void
1514 vect_update_misalignment_for_peel (struct data_reference *dr,
1515 struct data_reference *dr_peel, int npeel)
1516 {
1517 unsigned int i;
1518 VEC(dr_p,heap) *same_align_drs;
1519 struct data_reference *current_dr;
1520 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1521 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1522 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1523 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1524
1525 /* For interleaved data accesses the step in the loop must be multiplied by
1526 the size of the interleaving group. */
1527 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1528 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1529 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1530 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1531
1532 /* It can be assumed that the data refs with the same alignment as dr_peel
1533 are aligned in the vector loop. */
1534 same_align_drs
1535 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1536 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1537 {
1538 if (current_dr != dr)
1539 continue;
1540 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1541 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1542 SET_DR_MISALIGNMENT (dr, 0);
1543 return;
1544 }
1545
1546 if (known_alignment_for_access_p (dr)
1547 && known_alignment_for_access_p (dr_peel))
1548 {
1549 int misal = DR_MISALIGNMENT (dr);
1550 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1551 misal += npeel * dr_size;
1552 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1553 SET_DR_MISALIGNMENT (dr, misal);
1554 return;
1555 }
1556
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "Setting misalignment to -1.");
1559 SET_DR_MISALIGNMENT (dr, -1);
1560 }
1561
1562
1563 /* Function vect_verify_datarefs_alignment
1564
1565 Return TRUE if all data references in the loop can be
1566 handled with respect to alignment. */
1567
1568 static bool
1569 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1570 {
1571 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1572 struct data_reference *dr;
1573 enum dr_alignment_support supportable_dr_alignment;
1574 unsigned int i;
1575
1576 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1577 {
1578 gimple stmt = DR_STMT (dr);
1579 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1580
1581 /* For interleaving, only the alignment of the first access matters. */
1582 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1583 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1584 continue;
1585
1586 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1587 if (!supportable_dr_alignment)
1588 {
1589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1590 {
1591 if (DR_IS_READ (dr))
1592 fprintf (vect_dump,
1593 "not vectorized: unsupported unaligned load.");
1594 else
1595 fprintf (vect_dump,
1596 "not vectorized: unsupported unaligned store.");
1597 }
1598 return false;
1599 }
1600 if (supportable_dr_alignment != dr_aligned
1601 && vect_print_dump_info (REPORT_ALIGNMENT))
1602 fprintf (vect_dump, "Vectorizing an unaligned access.");
1603 }
1604 return true;
1605 }
1606
1607
1608 /* Function vector_alignment_reachable_p
1609
1610 Return true if vector alignment for DR is reachable by peeling
1611 a few loop iterations. Return false otherwise. */
1612
1613 static bool
1614 vector_alignment_reachable_p (struct data_reference *dr)
1615 {
1616 gimple stmt = DR_STMT (dr);
1617 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1618 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1619
1620 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1621 {
1622 /* For interleaved access we peel only if number of iterations in
1623 the prolog loop ({VF - misalignment}), is a multiple of the
1624 number of the interleaved accesses. */
1625 int elem_size, mis_in_elements;
1626 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1627
1628 /* FORNOW: handle only known alignment. */
1629 if (!known_alignment_for_access_p (dr))
1630 return false;
1631
1632 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1633 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1634
1635 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1636 return false;
1637 }
1638
1639 /* If misalignment is known at the compile time then allow peeling
1640 only if natural alignment is reachable through peeling. */
1641 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1642 {
1643 HOST_WIDE_INT elmsize =
1644 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1645 if (vect_print_dump_info (REPORT_DETAILS))
1646 {
1647 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1648 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1649 }
1650 if (DR_MISALIGNMENT (dr) % elmsize)
1651 {
1652 if (vect_print_dump_info (REPORT_DETAILS))
1653 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1654 return false;
1655 }
1656 }
1657
1658 if (!known_alignment_for_access_p (dr))
1659 {
1660 tree type = (TREE_TYPE (DR_REF (dr)));
1661 tree ba = DR_BASE_OBJECT (dr);
1662 bool is_packed = false;
1663
1664 if (ba)
1665 is_packed = contains_packed_reference (ba);
1666
1667 if (vect_print_dump_info (REPORT_DETAILS))
1668 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1669 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1670 return true;
1671 else
1672 return false;
1673 }
1674
1675 return true;
1676 }
1677
1678 /* Function vect_enhance_data_refs_alignment
1679
1680 This pass will use loop versioning and loop peeling in order to enhance
1681 the alignment of data references in the loop.
1682
1683 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1684 original loop is to be vectorized; Any other loops that are created by
1685 the transformations performed in this pass - are not supposed to be
1686 vectorized. This restriction will be relaxed.
1687
1688 This pass will require a cost model to guide it whether to apply peeling
1689 or versioning or a combination of the two. For example, the scheme that
1690 intel uses when given a loop with several memory accesses, is as follows:
1691 choose one memory access ('p') which alignment you want to force by doing
1692 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1693 other accesses are not necessarily aligned, or (2) use loop versioning to
1694 generate one loop in which all accesses are aligned, and another loop in
1695 which only 'p' is necessarily aligned.
1696
1697 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1698 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1699 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1700
1701 Devising a cost model is the most critical aspect of this work. It will
1702 guide us on which access to peel for, whether to use loop versioning, how
1703 many versions to create, etc. The cost model will probably consist of
1704 generic considerations as well as target specific considerations (on
1705 powerpc for example, misaligned stores are more painful than misaligned
1706 loads).
1707
1708 Here are the general steps involved in alignment enhancements:
1709
1710 -- original loop, before alignment analysis:
1711 for (i=0; i<N; i++){
1712 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1713 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1714 }
1715
1716 -- After vect_compute_data_refs_alignment:
1717 for (i=0; i<N; i++){
1718 x = q[i]; # DR_MISALIGNMENT(q) = 3
1719 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1720 }
1721
1722 -- Possibility 1: we do loop versioning:
1723 if (p is aligned) {
1724 for (i=0; i<N; i++){ # loop 1A
1725 x = q[i]; # DR_MISALIGNMENT(q) = 3
1726 p[i] = y; # DR_MISALIGNMENT(p) = 0
1727 }
1728 }
1729 else {
1730 for (i=0; i<N; i++){ # loop 1B
1731 x = q[i]; # DR_MISALIGNMENT(q) = 3
1732 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1733 }
1734 }
1735
1736 -- Possibility 2: we do loop peeling:
1737 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1738 x = q[i];
1739 p[i] = y;
1740 }
1741 for (i = 3; i < N; i++){ # loop 2A
1742 x = q[i]; # DR_MISALIGNMENT(q) = 0
1743 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1744 }
1745
1746 -- Possibility 3: combination of loop peeling and versioning:
1747 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1748 x = q[i];
1749 p[i] = y;
1750 }
1751 if (p is aligned) {
1752 for (i = 3; i<N; i++){ # loop 3A
1753 x = q[i]; # DR_MISALIGNMENT(q) = 0
1754 p[i] = y; # DR_MISALIGNMENT(p) = 0
1755 }
1756 }
1757 else {
1758 for (i = 3; i<N; i++){ # loop 3B
1759 x = q[i]; # DR_MISALIGNMENT(q) = 0
1760 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1761 }
1762 }
1763
1764 These loops are later passed to loop_transform to be vectorized. The
1765 vectorizer will use the alignment information to guide the transformation
1766 (whether to generate regular loads/stores, or with special handling for
1767 misalignment). */
1768
1769 static bool
1770 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1771 {
1772 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774 enum dr_alignment_support supportable_dr_alignment;
1775 struct data_reference *dr0 = NULL;
1776 struct data_reference *dr;
1777 unsigned int i;
1778 bool do_peeling = false;
1779 bool do_versioning = false;
1780 bool stat;
1781 gimple stmt;
1782 stmt_vec_info stmt_info;
1783 int vect_versioning_for_alias_required;
1784
1785 if (vect_print_dump_info (REPORT_DETAILS))
1786 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1787
1788 /* While cost model enhancements are expected in the future, the high level
1789 view of the code at this time is as follows:
1790
1791 A) If there is a misaligned write then see if peeling to align this write
1792 can make all data references satisfy vect_supportable_dr_alignment.
1793 If so, update data structures as needed and return true. Note that
1794 at this time vect_supportable_dr_alignment is known to return false
1795 for a misaligned write.
1796
1797 B) If peeling wasn't possible and there is a data reference with an
1798 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1799 then see if loop versioning checks can be used to make all data
1800 references satisfy vect_supportable_dr_alignment. If so, update
1801 data structures as needed and return true.
1802
1803 C) If neither peeling nor versioning were successful then return false if
1804 any data reference does not satisfy vect_supportable_dr_alignment.
1805
1806 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1807
1808 Note, Possibility 3 above (which is peeling and versioning together) is not
1809 being done at this time. */
1810
1811 /* (1) Peeling to force alignment. */
1812
1813 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1814 Considerations:
1815 + How many accesses will become aligned due to the peeling
1816 - How many accesses will become unaligned due to the peeling,
1817 and the cost of misaligned accesses.
1818 - The cost of peeling (the extra runtime checks, the increase
1819 in code size).
1820
1821 The scheme we use FORNOW: peel to force the alignment of the first
1822 misaligned store in the loop.
1823 Rationale: misaligned stores are not yet supported.
1824
1825 TODO: Use a cost model. */
1826
1827 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1828 {
1829 stmt = DR_STMT (dr);
1830 stmt_info = vinfo_for_stmt (stmt);
1831
1832 /* For interleaving, only the alignment of the first access
1833 matters. */
1834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1835 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1836 continue;
1837
1838 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1839 {
1840 do_peeling = vector_alignment_reachable_p (dr);
1841 if (do_peeling)
1842 dr0 = dr;
1843 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1844 fprintf (vect_dump, "vector alignment may not be reachable");
1845 break;
1846 }
1847 }
1848
1849 vect_versioning_for_alias_required =
1850 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1851
1852 /* Temporarily, if versioning for alias is required, we disable peeling
1853 until we support peeling and versioning. Often peeling for alignment
1854 will require peeling for loop-bound, which in turn requires that we
1855 know how to adjust the loop ivs after the loop. */
1856 if (vect_versioning_for_alias_required
1857 || !vect_can_advance_ivs_p (loop_vinfo)
1858 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1859 do_peeling = false;
1860
1861 if (do_peeling)
1862 {
1863 int mis;
1864 int npeel = 0;
1865 gimple stmt = DR_STMT (dr0);
1866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1867 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1868 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1869
1870 if (known_alignment_for_access_p (dr0))
1871 {
1872 /* Since it's known at compile time, compute the number of iterations
1873 in the peeled loop (the peeling factor) for use in updating
1874 DR_MISALIGNMENT values. The peeling factor is the vectorization
1875 factor minus the misalignment as an element count. */
1876 mis = DR_MISALIGNMENT (dr0);
1877 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1878 npeel = nelements - mis;
1879
1880 /* For interleaved data access every iteration accesses all the
1881 members of the group, therefore we divide the number of iterations
1882 by the group size. */
1883 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1884 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1885 npeel /= DR_GROUP_SIZE (stmt_info);
1886
1887 if (vect_print_dump_info (REPORT_DETAILS))
1888 fprintf (vect_dump, "Try peeling by %d", npeel);
1889 }
1890
1891 /* Ensure that all data refs can be vectorized after the peel. */
1892 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1893 {
1894 int save_misalignment;
1895
1896 if (dr == dr0)
1897 continue;
1898
1899 stmt = DR_STMT (dr);
1900 stmt_info = vinfo_for_stmt (stmt);
1901 /* For interleaving, only the alignment of the first access
1902 matters. */
1903 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1904 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1905 continue;
1906
1907 save_misalignment = DR_MISALIGNMENT (dr);
1908 vect_update_misalignment_for_peel (dr, dr0, npeel);
1909 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1910 SET_DR_MISALIGNMENT (dr, save_misalignment);
1911
1912 if (!supportable_dr_alignment)
1913 {
1914 do_peeling = false;
1915 break;
1916 }
1917 }
1918
1919 if (do_peeling)
1920 {
1921 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1922 If the misalignment of DR_i is identical to that of dr0 then set
1923 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1924 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1925 by the peeling factor times the element size of DR_i (MOD the
1926 vectorization factor times the size). Otherwise, the
1927 misalignment of DR_i must be set to unknown. */
1928 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1929 if (dr != dr0)
1930 vect_update_misalignment_for_peel (dr, dr0, npeel);
1931
1932 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1933 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1934 SET_DR_MISALIGNMENT (dr0, 0);
1935 if (vect_print_dump_info (REPORT_ALIGNMENT))
1936 fprintf (vect_dump, "Alignment of access forced using peeling.");
1937
1938 if (vect_print_dump_info (REPORT_DETAILS))
1939 fprintf (vect_dump, "Peeling for alignment will be applied.");
1940
1941 stat = vect_verify_datarefs_alignment (loop_vinfo);
1942 gcc_assert (stat);
1943 return stat;
1944 }
1945 }
1946
1947
1948 /* (2) Versioning to force alignment. */
1949
1950 /* Try versioning if:
1951 1) flag_tree_vect_loop_version is TRUE
1952 2) optimize_size is FALSE
1953 3) there is at least one unsupported misaligned data ref with an unknown
1954 misalignment, and
1955 4) all misaligned data refs with a known misalignment are supported, and
1956 5) the number of runtime alignment checks is within reason. */
1957
1958 do_versioning =
1959 flag_tree_vect_loop_version
1960 && (!optimize_size)
1961 && (!loop->inner); /* FORNOW */
1962
1963 if (do_versioning)
1964 {
1965 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1966 {
1967 stmt = DR_STMT (dr);
1968 stmt_info = vinfo_for_stmt (stmt);
1969
1970 /* For interleaving, only the alignment of the first access
1971 matters. */
1972 if (aligned_access_p (dr)
1973 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1974 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1975 continue;
1976
1977 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1978
1979 if (!supportable_dr_alignment)
1980 {
1981 gimple stmt;
1982 int mask;
1983 tree vectype;
1984
1985 if (known_alignment_for_access_p (dr)
1986 || VEC_length (gimple,
1987 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1988 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1989 {
1990 do_versioning = false;
1991 break;
1992 }
1993
1994 stmt = DR_STMT (dr);
1995 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1996 gcc_assert (vectype);
1997
1998 /* The rightmost bits of an aligned address must be zeros.
1999 Construct the mask needed for this test. For example,
2000 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2001 mask must be 15 = 0xf. */
2002 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2003
2004 /* FORNOW: use the same mask to test all potentially unaligned
2005 references in the loop. The vectorizer currently supports
2006 a single vector size, see the reference to
2007 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2008 vectorization factor is computed. */
2009 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2010 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2011 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2012 VEC_safe_push (gimple, heap,
2013 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2014 DR_STMT (dr));
2015 }
2016 }
2017
2018 /* Versioning requires at least one misaligned data reference. */
2019 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2020 do_versioning = false;
2021 else if (!do_versioning)
2022 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2023 }
2024
2025 if (do_versioning)
2026 {
2027 VEC(gimple,heap) *may_misalign_stmts
2028 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2029 gimple stmt;
2030
2031 /* It can now be assumed that the data references in the statements
2032 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2033 of the loop being vectorized. */
2034 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
2035 {
2036 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2037 dr = STMT_VINFO_DATA_REF (stmt_info);
2038 SET_DR_MISALIGNMENT (dr, 0);
2039 if (vect_print_dump_info (REPORT_ALIGNMENT))
2040 fprintf (vect_dump, "Alignment of access forced using versioning.");
2041 }
2042
2043 if (vect_print_dump_info (REPORT_DETAILS))
2044 fprintf (vect_dump, "Versioning for alignment will be applied.");
2045
2046 /* Peeling and versioning can't be done together at this time. */
2047 gcc_assert (! (do_peeling && do_versioning));
2048
2049 stat = vect_verify_datarefs_alignment (loop_vinfo);
2050 gcc_assert (stat);
2051 return stat;
2052 }
2053
2054 /* This point is reached if neither peeling nor versioning is being done. */
2055 gcc_assert (! (do_peeling || do_versioning));
2056
2057 stat = vect_verify_datarefs_alignment (loop_vinfo);
2058 return stat;
2059 }
2060
2061
2062 /* Function vect_analyze_data_refs_alignment
2063
2064 Analyze the alignment of the data-references in the loop.
2065 Return FALSE if a data reference is found that cannot be vectorized. */
2066
2067 static bool
2068 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2069 {
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2072
2073 if (!vect_compute_data_refs_alignment (loop_vinfo))
2074 {
2075 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2076 fprintf (vect_dump,
2077 "not vectorized: can't calculate alignment for data ref.");
2078 return false;
2079 }
2080
2081 return true;
2082 }
2083
2084
2085 /* Analyze groups of strided accesses: check that DR belongs to a group of
2086 strided accesses of legal size, step, etc. Detect gaps, single element
2087 interleaving, and other special cases. Set strided access info.
2088 Collect groups of strided stores for further use in SLP analysis. */
2089
2090 static bool
2091 vect_analyze_group_access (struct data_reference *dr)
2092 {
2093 tree step = DR_STEP (dr);
2094 tree scalar_type = TREE_TYPE (DR_REF (dr));
2095 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2096 gimple stmt = DR_STMT (dr);
2097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2099 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2100 HOST_WIDE_INT stride;
2101 bool slp_impossible = false;
2102
2103 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2104 interleaving group (including gaps). */
2105 stride = dr_step / type_size;
2106
2107 /* Not consecutive access is possible only if it is a part of interleaving. */
2108 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2109 {
2110 /* Check if it this DR is a part of interleaving, and is a single
2111 element of the group that is accessed in the loop. */
2112
2113 /* Gaps are supported only for loads. STEP must be a multiple of the type
2114 size. The size of the group must be a power of 2. */
2115 if (DR_IS_READ (dr)
2116 && (dr_step % type_size) == 0
2117 && stride > 0
2118 && exact_log2 (stride) != -1)
2119 {
2120 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2121 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2122 if (vect_print_dump_info (REPORT_DR_DETAILS))
2123 {
2124 fprintf (vect_dump, "Detected single element interleaving %d ",
2125 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2126 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2127 fprintf (vect_dump, " step ");
2128 print_generic_expr (vect_dump, step, TDF_SLIM);
2129 }
2130 return true;
2131 }
2132 if (vect_print_dump_info (REPORT_DETAILS))
2133 fprintf (vect_dump, "not consecutive access");
2134 return false;
2135 }
2136
2137 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2138 {
2139 /* First stmt in the interleaving chain. Check the chain. */
2140 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2141 struct data_reference *data_ref = dr;
2142 unsigned int count = 1;
2143 tree next_step;
2144 tree prev_init = DR_INIT (data_ref);
2145 gimple prev = stmt;
2146 HOST_WIDE_INT diff, count_in_bytes;
2147
2148 while (next)
2149 {
2150 /* Skip same data-refs. In case that two or more stmts share data-ref
2151 (supported only for loads), we vectorize only the first stmt, and
2152 the rest get their vectorized loads from the first one. */
2153 if (!tree_int_cst_compare (DR_INIT (data_ref),
2154 DR_INIT (STMT_VINFO_DATA_REF (
2155 vinfo_for_stmt (next)))))
2156 {
2157 if (!DR_IS_READ (data_ref))
2158 {
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "Two store stmts share the same dr.");
2161 return false;
2162 }
2163
2164 /* Check that there is no load-store dependencies for this loads
2165 to prevent a case of load-store-load to the same location. */
2166 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2167 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2168 {
2169 if (vect_print_dump_info (REPORT_DETAILS))
2170 fprintf (vect_dump,
2171 "READ_WRITE dependence in interleaving.");
2172 return false;
2173 }
2174
2175 /* For load use the same data-ref load. */
2176 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2177
2178 prev = next;
2179 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2180 continue;
2181 }
2182 prev = next;
2183
2184 /* Check that all the accesses have the same STEP. */
2185 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2186 if (tree_int_cst_compare (step, next_step))
2187 {
2188 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "not consecutive access in interleaving");
2190 return false;
2191 }
2192
2193 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2194 /* Check that the distance between two accesses is equal to the type
2195 size. Otherwise, we have gaps. */
2196 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2197 - TREE_INT_CST_LOW (prev_init)) / type_size;
2198 if (diff != 1)
2199 {
2200 /* FORNOW: SLP of accesses with gaps is not supported. */
2201 slp_impossible = true;
2202 if (!DR_IS_READ (data_ref))
2203 {
2204 if (vect_print_dump_info (REPORT_DETAILS))
2205 fprintf (vect_dump, "interleaved store with gaps");
2206 return false;
2207 }
2208 }
2209
2210 /* Store the gap from the previous member of the group. If there is no
2211 gap in the access, DR_GROUP_GAP is always 1. */
2212 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2213
2214 prev_init = DR_INIT (data_ref);
2215 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2216 /* Count the number of data-refs in the chain. */
2217 count++;
2218 }
2219
2220 /* COUNT is the number of accesses found, we multiply it by the size of
2221 the type to get COUNT_IN_BYTES. */
2222 count_in_bytes = type_size * count;
2223
2224 /* Check that the size of the interleaving is not greater than STEP. */
2225 if (dr_step < count_in_bytes)
2226 {
2227 if (vect_print_dump_info (REPORT_DETAILS))
2228 {
2229 fprintf (vect_dump, "interleaving size is greater than step for ");
2230 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2231 }
2232 return false;
2233 }
2234
2235 /* Check that the size of the interleaving is equal to STEP for stores,
2236 i.e., that there are no gaps. */
2237 if (dr_step != count_in_bytes)
2238 {
2239 if (DR_IS_READ (dr))
2240 {
2241 slp_impossible = true;
2242 /* There is a gap after the last load in the group. This gap is a
2243 difference between the stride and the number of elements. When
2244 there is no gap, this difference should be 0. */
2245 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2246 }
2247 else
2248 {
2249 if (vect_print_dump_info (REPORT_DETAILS))
2250 fprintf (vect_dump, "interleaved store with gaps");
2251 return false;
2252 }
2253 }
2254
2255 /* Check that STEP is a multiple of type size. */
2256 if ((dr_step % type_size) != 0)
2257 {
2258 if (vect_print_dump_info (REPORT_DETAILS))
2259 {
2260 fprintf (vect_dump, "step is not a multiple of type size: step ");
2261 print_generic_expr (vect_dump, step, TDF_SLIM);
2262 fprintf (vect_dump, " size ");
2263 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2264 TDF_SLIM);
2265 }
2266 return false;
2267 }
2268
2269 /* FORNOW: we handle only interleaving that is a power of 2.
2270 We don't fail here if it may be still possible to vectorize the
2271 group using SLP. If not, the size of the group will be checked in
2272 vect_analyze_operations, and the vectorization will fail. */
2273 if (exact_log2 (stride) == -1)
2274 {
2275 if (vect_print_dump_info (REPORT_DETAILS))
2276 fprintf (vect_dump, "interleaving is not a power of 2");
2277
2278 if (slp_impossible)
2279 return false;
2280 }
2281 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2282 if (vect_print_dump_info (REPORT_DETAILS))
2283 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2284
2285 /* SLP: create an SLP data structure for every interleaving group of
2286 stores for further analysis in vect_analyse_slp. */
2287 if (!DR_IS_READ (dr) && !slp_impossible)
2288 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2289 }
2290
2291 return true;
2292 }
2293
2294
2295 /* Analyze the access pattern of the data-reference DR.
2296 In case of non-consecutive accesses call vect_analyze_group_access() to
2297 analyze groups of strided accesses. */
2298
2299 static bool
2300 vect_analyze_data_ref_access (struct data_reference *dr)
2301 {
2302 tree step = DR_STEP (dr);
2303 tree scalar_type = TREE_TYPE (DR_REF (dr));
2304 gimple stmt = DR_STMT (dr);
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2308 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2309
2310 if (!step)
2311 {
2312 if (vect_print_dump_info (REPORT_DETAILS))
2313 fprintf (vect_dump, "bad data-ref access");
2314 return false;
2315 }
2316
2317 /* Don't allow invariant accesses. */
2318 if (dr_step == 0)
2319 return false;
2320
2321 if (nested_in_vect_loop_p (loop, stmt))
2322 {
2323 /* Interleaved accesses are not yet supported within outer-loop
2324 vectorization for references in the inner-loop. */
2325 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2326
2327 /* For the rest of the analysis we use the outer-loop step. */
2328 step = STMT_VINFO_DR_STEP (stmt_info);
2329 dr_step = TREE_INT_CST_LOW (step);
2330
2331 if (dr_step == 0)
2332 {
2333 if (vect_print_dump_info (REPORT_ALIGNMENT))
2334 fprintf (vect_dump, "zero step in outer loop.");
2335 if (DR_IS_READ (dr))
2336 return true;
2337 else
2338 return false;
2339 }
2340 }
2341
2342 /* Consecutive? */
2343 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2344 {
2345 /* Mark that it is not interleaving. */
2346 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2347 return true;
2348 }
2349
2350 if (nested_in_vect_loop_p (loop, stmt))
2351 {
2352 if (vect_print_dump_info (REPORT_ALIGNMENT))
2353 fprintf (vect_dump, "strided access in outer loop.");
2354 return false;
2355 }
2356
2357 /* Not consecutive access - check if it's a part of interleaving group. */
2358 return vect_analyze_group_access (dr);
2359 }
2360
2361
2362 /* Function vect_analyze_data_ref_accesses.
2363
2364 Analyze the access pattern of all the data references in the loop.
2365
2366 FORNOW: the only access pattern that is considered vectorizable is a
2367 simple step 1 (consecutive) access.
2368
2369 FORNOW: handle only arrays and pointer accesses. */
2370
2371 static bool
2372 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2373 {
2374 unsigned int i;
2375 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2376 struct data_reference *dr;
2377
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2380
2381 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2382 if (!vect_analyze_data_ref_access (dr))
2383 {
2384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2385 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2386 return false;
2387 }
2388
2389 return true;
2390 }
2391
2392 /* Function vect_prune_runtime_alias_test_list.
2393
2394 Prune a list of ddrs to be tested at run-time by versioning for alias.
2395 Return FALSE if resulting list of ddrs is longer then allowed by
2396 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2397
2398 static bool
2399 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2400 {
2401 VEC (ddr_p, heap) * ddrs =
2402 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2403 unsigned i, j;
2404
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2407
2408 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2409 {
2410 bool found;
2411 ddr_p ddr_i;
2412
2413 ddr_i = VEC_index (ddr_p, ddrs, i);
2414 found = false;
2415
2416 for (j = 0; j < i; j++)
2417 {
2418 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2419
2420 if (vect_vfa_range_equal (ddr_i, ddr_j))
2421 {
2422 if (vect_print_dump_info (REPORT_DR_DETAILS))
2423 {
2424 fprintf (vect_dump, "found equal ranges ");
2425 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2426 fprintf (vect_dump, ", ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2428 fprintf (vect_dump, " and ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2430 fprintf (vect_dump, ", ");
2431 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2432 }
2433 found = true;
2434 break;
2435 }
2436 }
2437
2438 if (found)
2439 {
2440 VEC_ordered_remove (ddr_p, ddrs, i);
2441 continue;
2442 }
2443 i++;
2444 }
2445
2446 if (VEC_length (ddr_p, ddrs) >
2447 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2448 {
2449 if (vect_print_dump_info (REPORT_DR_DETAILS))
2450 {
2451 fprintf (vect_dump,
2452 "disable versioning for alias - max number of generated "
2453 "checks exceeded.");
2454 }
2455
2456 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2457
2458 return false;
2459 }
2460
2461 return true;
2462 }
2463
2464 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2465
2466 void
2467 vect_free_slp_tree (slp_tree node)
2468 {
2469 if (!node)
2470 return;
2471
2472 if (SLP_TREE_LEFT (node))
2473 vect_free_slp_tree (SLP_TREE_LEFT (node));
2474
2475 if (SLP_TREE_RIGHT (node))
2476 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2477
2478 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
2479
2480 if (SLP_TREE_VEC_STMTS (node))
2481 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
2482
2483 free (node);
2484 }
2485
2486
2487 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
2488 they are of a legal type and that they match the defs of the first stmt of
2489 the SLP group (stored in FIRST_STMT_...). */
2490
2491 static bool
2492 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2493 gimple stmt, VEC (gimple, heap) **def_stmts0,
2494 VEC (gimple, heap) **def_stmts1,
2495 enum vect_def_type *first_stmt_dt0,
2496 enum vect_def_type *first_stmt_dt1,
2497 tree *first_stmt_def0_type,
2498 tree *first_stmt_def1_type,
2499 tree *first_stmt_const_oprnd,
2500 int ncopies_for_cost)
2501 {
2502 tree oprnd;
2503 unsigned int i, number_of_oprnds;
2504 tree def;
2505 gimple def_stmt;
2506 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2507 stmt_vec_info stmt_info =
2508 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2509 enum gimple_rhs_class rhs_class;
2510
2511 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
2512 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
2513
2514 for (i = 0; i < number_of_oprnds; i++)
2515 {
2516 oprnd = gimple_op (stmt, i + 1);
2517
2518 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2519 || (!def_stmt && dt[i] != vect_constant_def))
2520 {
2521 if (vect_print_dump_info (REPORT_SLP))
2522 {
2523 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2524 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2525 }
2526
2527 return false;
2528 }
2529
2530 if (!*first_stmt_dt0)
2531 {
2532 /* op0 of the first stmt of the group - store its info. */
2533 *first_stmt_dt0 = dt[i];
2534 if (def)
2535 *first_stmt_def0_type = TREE_TYPE (def);
2536 else
2537 *first_stmt_const_oprnd = oprnd;
2538
2539 /* Analyze costs (for the first stmt of the group only). */
2540 if (rhs_class != GIMPLE_SINGLE_RHS)
2541 /* Not memory operation (we don't call this functions for loads). */
2542 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2543 else
2544 /* Store. */
2545 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2546 }
2547
2548 else
2549 {
2550 if (!*first_stmt_dt1 && i == 1)
2551 {
2552 /* op1 of the first stmt of the group - store its info. */
2553 *first_stmt_dt1 = dt[i];
2554 if (def)
2555 *first_stmt_def1_type = TREE_TYPE (def);
2556 else
2557 {
2558 /* We assume that the stmt contains only one constant
2559 operand. We fail otherwise, to be on the safe side. */
2560 if (*first_stmt_const_oprnd)
2561 {
2562 if (vect_print_dump_info (REPORT_SLP))
2563 fprintf (vect_dump, "Build SLP failed: two constant "
2564 "oprnds in stmt");
2565 return false;
2566 }
2567 *first_stmt_const_oprnd = oprnd;
2568 }
2569 }
2570 else
2571 {
2572 /* Not first stmt of the group, check that the def-stmt/s match
2573 the def-stmt/s of the first stmt. */
2574 if ((i == 0
2575 && (*first_stmt_dt0 != dt[i]
2576 || (*first_stmt_def0_type && def
2577 && *first_stmt_def0_type != TREE_TYPE (def))))
2578 || (i == 1
2579 && (*first_stmt_dt1 != dt[i]
2580 || (*first_stmt_def1_type && def
2581 && *first_stmt_def1_type != TREE_TYPE (def))))
2582 || (!def
2583 && TREE_TYPE (*first_stmt_const_oprnd)
2584 != TREE_TYPE (oprnd)))
2585 {
2586 if (vect_print_dump_info (REPORT_SLP))
2587 fprintf (vect_dump, "Build SLP failed: different types ");
2588
2589 return false;
2590 }
2591 }
2592 }
2593
2594 /* Check the types of the definitions. */
2595 switch (dt[i])
2596 {
2597 case vect_constant_def:
2598 case vect_invariant_def:
2599 break;
2600
2601 case vect_loop_def:
2602 if (i == 0)
2603 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
2604 else
2605 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
2606 break;
2607
2608 default:
2609 /* FORNOW: Not supported. */
2610 if (vect_print_dump_info (REPORT_SLP))
2611 {
2612 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2613 print_generic_expr (vect_dump, def, TDF_SLIM);
2614 }
2615
2616 return false;
2617 }
2618 }
2619
2620 return true;
2621 }
2622
2623
2624 /* Recursively build an SLP tree starting from NODE.
2625 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2626 permutation or are of unsupported types of operation. Otherwise, return
2627 TRUE.
2628 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2629 in the case of multiple types for now. */
2630
2631 static bool
2632 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2633 unsigned int group_size, bool *slp_impossible,
2634 int *inside_cost, int *outside_cost,
2635 int ncopies_for_cost)
2636 {
2637 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
2638 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
2639 unsigned int i;
2640 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2641 gimple stmt = VEC_index (gimple, stmts, 0);
2642 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2643 enum tree_code first_stmt_code = 0, rhs_code;
2644 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2645 tree lhs;
2646 gimple prev_stmt = NULL;
2647 bool stop_recursion = false, need_same_oprnds = false;
2648 tree vectype, scalar_type, first_op1 = NULL_TREE;
2649 unsigned int vectorization_factor = 0, ncopies;
2650 optab optab;
2651 int icode;
2652 enum machine_mode optab_op2_mode;
2653 enum machine_mode vec_mode;
2654 tree first_stmt_const_oprnd = NULL_TREE;
2655 struct data_reference *first_dr;
2656
2657 /* For every stmt in NODE find its def stmt/s. */
2658 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
2659 {
2660 if (vect_print_dump_info (REPORT_SLP))
2661 {
2662 fprintf (vect_dump, "Build SLP for ");
2663 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2664 }
2665
2666 lhs = gimple_get_lhs (stmt);
2667 if (lhs == NULL_TREE)
2668 {
2669 if (vect_print_dump_info (REPORT_SLP))
2670 {
2671 fprintf (vect_dump,
2672 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
2673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2674 }
2675
2676 return false;
2677 }
2678
2679 scalar_type = TREE_TYPE (lhs);
2680 vectype = get_vectype_for_scalar_type (scalar_type);
2681 if (!vectype)
2682 {
2683 if (vect_print_dump_info (REPORT_SLP))
2684 {
2685 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2686 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2687 }
2688 return false;
2689 }
2690
2691 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2692 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2693 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2694 if (ncopies > 1)
2695 {
2696 /* FORNOW. */
2697 if (vect_print_dump_info (REPORT_SLP))
2698 fprintf (vect_dump, "SLP failed - multiple types ");
2699
2700 *slp_impossible = true;
2701 return false;
2702 }
2703
2704 if (is_gimple_call (stmt))
2705 rhs_code = CALL_EXPR;
2706 else
2707 rhs_code = gimple_assign_rhs_code (stmt);
2708
2709 /* Check the operation. */
2710 if (i == 0)
2711 {
2712 first_stmt_code = rhs_code;
2713
2714 /* Shift arguments should be equal in all the packed stmts for a
2715 vector shift with scalar shift operand. */
2716 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
2717 || rhs_code == LROTATE_EXPR
2718 || rhs_code == RROTATE_EXPR)
2719 {
2720 vec_mode = TYPE_MODE (vectype);
2721
2722 /* First see if we have a vector/vector shift. */
2723 optab = optab_for_tree_code (rhs_code, vectype,
2724 optab_vector);
2725
2726 if (!optab
2727 || (optab->handlers[(int) vec_mode].insn_code
2728 == CODE_FOR_nothing))
2729 {
2730 /* No vector/vector shift, try for a vector/scalar shift. */
2731 optab = optab_for_tree_code (rhs_code, vectype,
2732 optab_scalar);
2733
2734 if (!optab)
2735 {
2736 if (vect_print_dump_info (REPORT_SLP))
2737 fprintf (vect_dump, "Build SLP failed: no optab.");
2738 return false;
2739 }
2740 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2741 if (icode == CODE_FOR_nothing)
2742 {
2743 if (vect_print_dump_info (REPORT_SLP))
2744 fprintf (vect_dump,
2745 "Build SLP failed: op not supported by target.");
2746 return false;
2747 }
2748 optab_op2_mode = insn_data[icode].operand[2].mode;
2749 if (!VECTOR_MODE_P (optab_op2_mode))
2750 {
2751 need_same_oprnds = true;
2752 first_op1 = gimple_assign_rhs2 (stmt);
2753 }
2754 }
2755 }
2756 }
2757 else
2758 {
2759 if (first_stmt_code != rhs_code
2760 && (first_stmt_code != IMAGPART_EXPR
2761 || rhs_code != REALPART_EXPR)
2762 && (first_stmt_code != REALPART_EXPR
2763 || rhs_code != IMAGPART_EXPR))
2764 {
2765 if (vect_print_dump_info (REPORT_SLP))
2766 {
2767 fprintf (vect_dump,
2768 "Build SLP failed: different operation in stmt ");
2769 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2770 }
2771
2772 return false;
2773 }
2774
2775 if (need_same_oprnds
2776 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
2777 {
2778 if (vect_print_dump_info (REPORT_SLP))
2779 {
2780 fprintf (vect_dump,
2781 "Build SLP failed: different shift arguments in ");
2782 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2783 }
2784
2785 return false;
2786 }
2787 }
2788
2789 /* Strided store or load. */
2790 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2791 {
2792 if (REFERENCE_CLASS_P (lhs))
2793 {
2794 /* Store. */
2795 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2796 &def_stmts0, &def_stmts1,
2797 &first_stmt_dt0,
2798 &first_stmt_dt1,
2799 &first_stmt_def0_type,
2800 &first_stmt_def1_type,
2801 &first_stmt_const_oprnd,
2802 ncopies_for_cost))
2803 return false;
2804 }
2805 else
2806 {
2807 /* Load. */
2808 if (i == 0)
2809 {
2810 /* First stmt of the SLP group should be the first load of
2811 the interleaving loop if data permutation is not allowed.
2812 Check that there is no gap between the loads. */
2813 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2814 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2815 {
2816 /* FORNOW: data permutations and gaps in loads are not
2817 supported. */
2818 if (vect_print_dump_info (REPORT_SLP))
2819 {
2820 fprintf (vect_dump, "Build SLP failed: strided "
2821 " loads need permutation or have gaps ");
2822 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2823 }
2824
2825 return false;
2826 }
2827
2828 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2829 if (vect_supportable_dr_alignment (first_dr)
2830 == dr_unaligned_unsupported)
2831 {
2832 if (vect_print_dump_info (REPORT_SLP))
2833 {
2834 fprintf (vect_dump, "Build SLP failed: unsupported "
2835 " unaligned load ");
2836 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2837 }
2838
2839 return false;
2840 }
2841
2842 /* Analyze costs (for the first stmt in the group). */
2843 vect_model_load_cost (vinfo_for_stmt (stmt),
2844 ncopies_for_cost, *node);
2845 }
2846 else
2847 {
2848 /* Check that we have consecutive loads from interleaving
2849 chain and that there is no gap between the loads. */
2850 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt
2851 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)
2852 {
2853 /* FORNOW: data permutations and gaps in loads are not
2854 supported. */
2855 if (vect_print_dump_info (REPORT_SLP))
2856 {
2857 fprintf (vect_dump, "Build SLP failed: strided "
2858 " loads need permutation or have gaps ");
2859 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2860 }
2861 return false;
2862 }
2863 }
2864
2865 prev_stmt = stmt;
2866
2867 /* We stop the tree when we reach a group of loads. */
2868 stop_recursion = true;
2869 continue;
2870 }
2871 } /* Strided access. */
2872 else
2873 {
2874 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
2875 {
2876 /* Not strided load. */
2877 if (vect_print_dump_info (REPORT_SLP))
2878 {
2879 fprintf (vect_dump, "Build SLP failed: not strided load ");
2880 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2881 }
2882
2883 /* FORNOW: Not strided loads are not supported. */
2884 return false;
2885 }
2886
2887 /* Not memory operation. */
2888 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
2889 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
2890 {
2891 if (vect_print_dump_info (REPORT_SLP))
2892 {
2893 fprintf (vect_dump, "Build SLP failed: operation");
2894 fprintf (vect_dump, " unsupported ");
2895 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2896 }
2897
2898 return false;
2899 }
2900
2901 /* Find the def-stmts. */
2902 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2903 &def_stmts0, &def_stmts1,
2904 &first_stmt_dt0, &first_stmt_dt1,
2905 &first_stmt_def0_type,
2906 &first_stmt_def1_type,
2907 &first_stmt_const_oprnd,
2908 ncopies_for_cost))
2909 return false;
2910 }
2911 }
2912
2913 /* Add the costs of the node to the overall instance costs. */
2914 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2915 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2916
2917 /* Strided loads were reached - stop the recursion. */
2918 if (stop_recursion)
2919 return true;
2920
2921 /* Create SLP_TREE nodes for the definition node/s. */
2922 if (first_stmt_dt0 == vect_loop_def)
2923 {
2924 slp_tree left_node = XNEW (struct _slp_tree);
2925 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2926 SLP_TREE_VEC_STMTS (left_node) = NULL;
2927 SLP_TREE_LEFT (left_node) = NULL;
2928 SLP_TREE_RIGHT (left_node) = NULL;
2929 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2930 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2931 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2932 slp_impossible, inside_cost, outside_cost,
2933 ncopies_for_cost))
2934 return false;
2935
2936 SLP_TREE_LEFT (*node) = left_node;
2937 }
2938
2939 if (first_stmt_dt1 == vect_loop_def)
2940 {
2941 slp_tree right_node = XNEW (struct _slp_tree);
2942 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2943 SLP_TREE_VEC_STMTS (right_node) = NULL;
2944 SLP_TREE_LEFT (right_node) = NULL;
2945 SLP_TREE_RIGHT (right_node) = NULL;
2946 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2947 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2948 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2949 slp_impossible, inside_cost, outside_cost,
2950 ncopies_for_cost))
2951 return false;
2952
2953 SLP_TREE_RIGHT (*node) = right_node;
2954 }
2955
2956 return true;
2957 }
2958
2959
2960 static void
2961 vect_print_slp_tree (slp_tree node)
2962 {
2963 int i;
2964 gimple stmt;
2965
2966 if (!node)
2967 return;
2968
2969 fprintf (vect_dump, "node ");
2970 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2971 {
2972 fprintf (vect_dump, "\n\tstmt %d ", i);
2973 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2974 }
2975 fprintf (vect_dump, "\n");
2976
2977 vect_print_slp_tree (SLP_TREE_LEFT (node));
2978 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2979 }
2980
2981
2982 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2983 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2984 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2985 stmts in NODE are to be marked. */
2986
2987 static void
2988 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2989 {
2990 int i;
2991 gimple stmt;
2992
2993 if (!node)
2994 return;
2995
2996 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2997 if (j < 0 || i == j)
2998 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2999
3000 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
3001 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
3002 }
3003
3004
3005 /* Analyze an SLP instance starting from a group of strided stores. Call
3006 vect_build_slp_tree to build a tree of packed stmts if possible.
3007 Return FALSE if it's impossible to SLP any stmt in the loop. */
3008
3009 static bool
3010 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
3011 {
3012 slp_instance new_instance;
3013 slp_tree node = XNEW (struct _slp_tree);
3014 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3015 unsigned int unrolling_factor = 1, nunits;
3016 tree vectype, scalar_type;
3017 gimple next;
3018 unsigned int vectorization_factor = 0, ncopies;
3019 bool slp_impossible = false;
3020 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3021
3022 /* FORNOW: multiple types are not supported. */
3023 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
3024 vectype = get_vectype_for_scalar_type (scalar_type);
3025 if (!vectype)
3026 {
3027 if (vect_print_dump_info (REPORT_SLP))
3028 {
3029 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3030 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3031 }
3032 return false;
3033 }
3034
3035 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3036 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3037 ncopies = vectorization_factor / nunits;
3038 if (ncopies > 1)
3039 {
3040 if (vect_print_dump_info (REPORT_SLP))
3041 fprintf (vect_dump, "SLP failed - multiple types ");
3042
3043 return false;
3044 }
3045
3046 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3047 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
3048 next = stmt;
3049 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3050 while (next)
3051 {
3052 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
3053 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3054 }
3055
3056 SLP_TREE_VEC_STMTS (node) = NULL;
3057 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3058 SLP_TREE_LEFT (node) = NULL;
3059 SLP_TREE_RIGHT (node) = NULL;
3060 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3061 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3062
3063 /* Calculate the unrolling factor. */
3064 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3065
3066 /* Calculate the number of vector stmts to create based on the unrolling
3067 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3068 GROUP_SIZE / NUNITS otherwise. */
3069 ncopies_for_cost = unrolling_factor * group_size / nunits;
3070
3071 /* Build the tree for the SLP instance. */
3072 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3073 &inside_cost, &outside_cost, ncopies_for_cost))
3074 {
3075 /* Create a new SLP instance. */
3076 new_instance = XNEW (struct _slp_instance);
3077 SLP_INSTANCE_TREE (new_instance) = node;
3078 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3079 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3080 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3081 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3082 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3083 new_instance);
3084 if (vect_print_dump_info (REPORT_SLP))
3085 vect_print_slp_tree (node);
3086
3087 return true;
3088 }
3089
3090 /* Failed to SLP. */
3091 /* Free the allocated memory. */
3092 vect_free_slp_tree (node);
3093
3094 if (slp_impossible)
3095 return false;
3096
3097 /* SLP failed for this instance, but it is still possible to SLP other stmts
3098 in the loop. */
3099 return true;
3100 }
3101
3102
3103 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3104 trees of packed scalar stmts if SLP is possible. */
3105
3106 static bool
3107 vect_analyze_slp (loop_vec_info loop_vinfo)
3108 {
3109 unsigned int i;
3110 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3111 gimple store;
3112
3113 if (vect_print_dump_info (REPORT_SLP))
3114 fprintf (vect_dump, "=== vect_analyze_slp ===");
3115
3116 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
3117 if (!vect_analyze_slp_instance (loop_vinfo, store))
3118 {
3119 /* SLP failed. No instance can be SLPed in the loop. */
3120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3121 fprintf (vect_dump, "SLP failed.");
3122
3123 return false;
3124 }
3125
3126 return true;
3127 }
3128
3129
3130 /* For each possible SLP instance decide whether to SLP it and calculate overall
3131 unrolling factor needed to SLP the loop. */
3132
3133 static void
3134 vect_make_slp_decision (loop_vec_info loop_vinfo)
3135 {
3136 unsigned int i, unrolling_factor = 1;
3137 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3138 slp_instance instance;
3139 int decided_to_slp = 0;
3140
3141 if (vect_print_dump_info (REPORT_SLP))
3142 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3143
3144 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3145 {
3146 /* FORNOW: SLP if you can. */
3147 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3148 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3149
3150 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3151 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3152 loop-based vectorization. Such stmts will be marked as HYBRID. */
3153 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3154 decided_to_slp++;
3155 }
3156
3157 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3158
3159 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3160 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3161 decided_to_slp, unrolling_factor);
3162 }
3163
3164
3165 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3166 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3167
3168 static void
3169 vect_detect_hybrid_slp_stmts (slp_tree node)
3170 {
3171 int i;
3172 gimple stmt;
3173 imm_use_iterator imm_iter;
3174 gimple use_stmt;
3175
3176 if (!node)
3177 return;
3178
3179 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3180 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3181 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
3182 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
3183 if (vinfo_for_stmt (use_stmt)
3184 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3185 vect_mark_slp_stmts (node, hybrid, i);
3186
3187 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3188 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3189 }
3190
3191
3192 /* Find stmts that must be both vectorized and SLPed. */
3193
3194 static void
3195 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3196 {
3197 unsigned int i;
3198 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3199 slp_instance instance;
3200
3201 if (vect_print_dump_info (REPORT_SLP))
3202 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3203
3204 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3205 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3206 }
3207
3208
3209 /* Function vect_analyze_data_refs.
3210
3211 Find all the data references in the loop.
3212
3213 The general structure of the analysis of data refs in the vectorizer is as
3214 follows:
3215 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3216 find and analyze all data-refs in the loop and their dependences.
3217 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3218 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3219 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3220
3221 */
3222
3223 static bool
3224 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3225 {
3226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3227 unsigned int i;
3228 VEC (data_reference_p, heap) *datarefs;
3229 struct data_reference *dr;
3230 tree scalar_type;
3231
3232 if (vect_print_dump_info (REPORT_DETAILS))
3233 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3234
3235 compute_data_dependences_for_loop (loop, true,
3236 &LOOP_VINFO_DATAREFS (loop_vinfo),
3237 &LOOP_VINFO_DDRS (loop_vinfo));
3238
3239 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3240 from stmt_vec_info struct to DR and vectype. */
3241 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3242
3243 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3244 {
3245 gimple stmt;
3246 stmt_vec_info stmt_info;
3247 basic_block bb;
3248 tree base, offset, init;
3249
3250 if (!dr || !DR_REF (dr))
3251 {
3252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3253 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3254 return false;
3255 }
3256
3257 stmt = DR_STMT (dr);
3258 stmt_info = vinfo_for_stmt (stmt);
3259
3260 /* Check that analysis of the data-ref succeeded. */
3261 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3262 || !DR_STEP (dr))
3263 {
3264 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3265 {
3266 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3267 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3268 }
3269 return false;
3270 }
3271
3272 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3273 {
3274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3275 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3276 "constant");
3277 return false;
3278 }
3279
3280 if (!DR_SYMBOL_TAG (dr))
3281 {
3282 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3283 {
3284 fprintf (vect_dump, "not vectorized: no memory tag for ");
3285 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3286 }
3287 return false;
3288 }
3289
3290 base = unshare_expr (DR_BASE_ADDRESS (dr));
3291 offset = unshare_expr (DR_OFFSET (dr));
3292 init = unshare_expr (DR_INIT (dr));
3293
3294 /* Update DR field in stmt_vec_info struct. */
3295 bb = gimple_bb (stmt);
3296
3297 /* If the dataref is in an inner-loop of the loop that is considered for
3298 for vectorization, we also want to analyze the access relative to
3299 the outer-loop (DR contains information only relative to the
3300 inner-most enclosing loop). We do that by building a reference to the
3301 first location accessed by the inner-loop, and analyze it relative to
3302 the outer-loop. */
3303 if (nested_in_vect_loop_p (loop, stmt))
3304 {
3305 tree outer_step, outer_base, outer_init;
3306 HOST_WIDE_INT pbitsize, pbitpos;
3307 tree poffset;
3308 enum machine_mode pmode;
3309 int punsignedp, pvolatilep;
3310 affine_iv base_iv, offset_iv;
3311 tree dinit;
3312
3313 /* Build a reference to the first location accessed by the
3314 inner-loop: *(BASE+INIT). (The first location is actually
3315 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3316 tree inner_base = build_fold_indirect_ref
3317 (fold_build2 (POINTER_PLUS_EXPR,
3318 TREE_TYPE (base), base,
3319 fold_convert (sizetype, init)));
3320
3321 if (vect_print_dump_info (REPORT_DETAILS))
3322 {
3323 fprintf (dump_file, "analyze in outer-loop: ");
3324 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3325 }
3326
3327 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3328 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3329 gcc_assert (outer_base != NULL_TREE);
3330
3331 if (pbitpos % BITS_PER_UNIT != 0)
3332 {
3333 if (vect_print_dump_info (REPORT_DETAILS))
3334 fprintf (dump_file, "failed: bit offset alignment.\n");
3335 return false;
3336 }
3337
3338 outer_base = build_fold_addr_expr (outer_base);
3339 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3340 {
3341 if (vect_print_dump_info (REPORT_DETAILS))
3342 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3343 return false;
3344 }
3345
3346 if (offset)
3347 {
3348 if (poffset)
3349 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3350 else
3351 poffset = offset;
3352 }
3353
3354 if (!poffset)
3355 {
3356 offset_iv.base = ssize_int (0);
3357 offset_iv.step = ssize_int (0);
3358 }
3359 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3360 {
3361 if (vect_print_dump_info (REPORT_DETAILS))
3362 fprintf (dump_file, "evolution of offset is not affine.\n");
3363 return false;
3364 }
3365
3366 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3367 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3368 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3369 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3370 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3371
3372 outer_step = size_binop (PLUS_EXPR,
3373 fold_convert (ssizetype, base_iv.step),
3374 fold_convert (ssizetype, offset_iv.step));
3375
3376 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3377 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3378 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3379 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3380 STMT_VINFO_DR_OFFSET (stmt_info) =
3381 fold_convert (ssizetype, offset_iv.base);
3382 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3383 size_int (highest_pow2_factor (offset_iv.base));
3384
3385 if (dump_file && (dump_flags & TDF_DETAILS))
3386 {
3387 fprintf (dump_file, "\touter base_address: ");
3388 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3389 fprintf (dump_file, "\n\touter offset from base address: ");
3390 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3391 fprintf (dump_file, "\n\touter constant offset from base address: ");
3392 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3393 fprintf (dump_file, "\n\touter step: ");
3394 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3395 fprintf (dump_file, "\n\touter aligned to: ");
3396 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3397 }
3398 }
3399
3400 if (STMT_VINFO_DATA_REF (stmt_info))
3401 {
3402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3403 {
3404 fprintf (vect_dump,
3405 "not vectorized: more than one data ref in stmt: ");
3406 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3407 }
3408 return false;
3409 }
3410 STMT_VINFO_DATA_REF (stmt_info) = dr;
3411
3412 /* Set vectype for STMT. */
3413 scalar_type = TREE_TYPE (DR_REF (dr));
3414 STMT_VINFO_VECTYPE (stmt_info) =
3415 get_vectype_for_scalar_type (scalar_type);
3416 if (!STMT_VINFO_VECTYPE (stmt_info))
3417 {
3418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3419 {
3420 fprintf (vect_dump,
3421 "not vectorized: no vectype for stmt: ");
3422 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3423 fprintf (vect_dump, " scalar_type: ");
3424 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3425 }
3426 return false;
3427 }
3428 }
3429
3430 return true;
3431 }
3432
3433
3434 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3435
3436 /* Function vect_mark_relevant.
3437
3438 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3439
3440 static void
3441 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
3442 enum vect_relevant relevant, bool live_p)
3443 {
3444 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3445 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3446 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3447
3448 if (vect_print_dump_info (REPORT_DETAILS))
3449 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3450
3451 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3452 {
3453 gimple pattern_stmt;
3454
3455 /* This is the last stmt in a sequence that was detected as a
3456 pattern that can potentially be vectorized. Don't mark the stmt
3457 as relevant/live because it's not going to be vectorized.
3458 Instead mark the pattern-stmt that replaces it. */
3459
3460 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3461
3462 if (vect_print_dump_info (REPORT_DETAILS))
3463 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3464 stmt_info = vinfo_for_stmt (pattern_stmt);
3465 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3466 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3467 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3468 stmt = pattern_stmt;
3469 }
3470
3471 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3472 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3473 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3474
3475 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3476 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3477 {
3478 if (vect_print_dump_info (REPORT_DETAILS))
3479 fprintf (vect_dump, "already marked relevant/live.");
3480 return;
3481 }
3482
3483 VEC_safe_push (gimple, heap, *worklist, stmt);
3484 }
3485
3486
3487 /* Function vect_stmt_relevant_p.
3488
3489 Return true if STMT in loop that is represented by LOOP_VINFO is
3490 "relevant for vectorization".
3491
3492 A stmt is considered "relevant for vectorization" if:
3493 - it has uses outside the loop.
3494 - it has vdefs (it alters memory).
3495 - control stmts in the loop (except for the exit condition).
3496
3497 CHECKME: what other side effects would the vectorizer allow? */
3498
3499 static bool
3500 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
3501 enum vect_relevant *relevant, bool *live_p)
3502 {
3503 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3504 ssa_op_iter op_iter;
3505 imm_use_iterator imm_iter;
3506 use_operand_p use_p;
3507 def_operand_p def_p;
3508
3509 *relevant = vect_unused_in_loop;
3510 *live_p = false;
3511
3512 /* cond stmt other than loop exit cond. */
3513 if (is_ctrl_stmt (stmt)
3514 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3515 *relevant = vect_used_in_loop;
3516
3517 /* changing memory. */
3518 if (gimple_code (stmt) != GIMPLE_PHI)
3519 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3520 {
3521 if (vect_print_dump_info (REPORT_DETAILS))
3522 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3523 *relevant = vect_used_in_loop;
3524 }
3525
3526 /* uses outside the loop. */
3527 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3528 {
3529 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3530 {
3531 basic_block bb = gimple_bb (USE_STMT (use_p));
3532 if (!flow_bb_inside_loop_p (loop, bb))
3533 {
3534 if (vect_print_dump_info (REPORT_DETAILS))
3535 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3536
3537 /* We expect all such uses to be in the loop exit phis
3538 (because of loop closed form) */
3539 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
3540 gcc_assert (bb == single_exit (loop)->dest);
3541
3542 *live_p = true;
3543 }
3544 }
3545 }
3546
3547 return (*live_p || *relevant);
3548 }
3549
3550
3551 /*
3552 Function process_use.
3553
3554 Inputs:
3555 - a USE in STMT in a loop represented by LOOP_VINFO
3556 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3557 that defined USE. This is done by calling mark_relevant and passing it
3558 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3559
3560 Outputs:
3561 Generally, LIVE_P and RELEVANT are used to define the liveness and
3562 relevance info of the DEF_STMT of this USE:
3563 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3564 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3565 Exceptions:
3566 - case 1: If USE is used only for address computations (e.g. array indexing),
3567 which does not need to be directly vectorized, then the liveness/relevance
3568 of the respective DEF_STMT is left unchanged.
3569 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3570 skip DEF_STMT cause it had already been processed.
3571 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3572 be modified accordingly.
3573
3574 Return true if everything is as expected. Return false otherwise. */
3575
3576 static bool
3577 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3578 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
3579 {
3580 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3581 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3582 stmt_vec_info dstmt_vinfo;
3583 basic_block bb, def_bb;
3584 tree def;
3585 gimple def_stmt;
3586 enum vect_def_type dt;
3587
3588 /* case 1: we are only interested in uses that need to be vectorized. Uses
3589 that are used for address computation are not considered relevant. */
3590 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3591 return true;
3592
3593 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3594 {
3595 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3596 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3597 return false;
3598 }
3599
3600 if (!def_stmt || gimple_nop_p (def_stmt))
3601 return true;
3602
3603 def_bb = gimple_bb (def_stmt);
3604 if (!flow_bb_inside_loop_p (loop, def_bb))
3605 {
3606 if (vect_print_dump_info (REPORT_DETAILS))
3607 fprintf (vect_dump, "def_stmt is out of loop.");
3608 return true;
3609 }
3610
3611 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3612 DEF_STMT must have already been processed, because this should be the
3613 only way that STMT, which is a reduction-phi, was put in the worklist,
3614 as there should be no other uses for DEF_STMT in the loop. So we just
3615 check that everything is as expected, and we are done. */
3616 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3617 bb = gimple_bb (stmt);
3618 if (gimple_code (stmt) == GIMPLE_PHI
3619 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3620 && gimple_code (def_stmt) != GIMPLE_PHI
3621 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3622 && bb->loop_father == def_bb->loop_father)
3623 {
3624 if (vect_print_dump_info (REPORT_DETAILS))
3625 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3626 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3627 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3628 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3629 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3630 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3631 return true;
3632 }
3633
3634 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3635 outer-loop-header-bb:
3636 d = def_stmt
3637 inner-loop:
3638 stmt # use (d)
3639 outer-loop-tail-bb:
3640 ... */
3641 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3642 {
3643 if (vect_print_dump_info (REPORT_DETAILS))
3644 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3645 switch (relevant)
3646 {
3647 case vect_unused_in_loop:
3648 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3649 vect_used_by_reduction : vect_unused_in_loop;
3650 break;
3651 case vect_used_in_outer_by_reduction:
3652 relevant = vect_used_by_reduction;
3653 break;
3654 case vect_used_in_outer:
3655 relevant = vect_used_in_loop;
3656 break;
3657 case vect_used_by_reduction:
3658 case vect_used_in_loop:
3659 break;
3660
3661 default:
3662 gcc_unreachable ();
3663 }
3664 }
3665
3666 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3667 outer-loop-header-bb:
3668 ...
3669 inner-loop:
3670 d = def_stmt
3671 outer-loop-tail-bb:
3672 stmt # use (d) */
3673 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3674 {
3675 if (vect_print_dump_info (REPORT_DETAILS))
3676 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3677 switch (relevant)
3678 {
3679 case vect_unused_in_loop:
3680 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3681 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3682 break;
3683
3684 case vect_used_in_outer_by_reduction:
3685 case vect_used_in_outer:
3686 break;
3687
3688 case vect_used_by_reduction:
3689 relevant = vect_used_in_outer_by_reduction;
3690 break;
3691
3692 case vect_used_in_loop:
3693 relevant = vect_used_in_outer;
3694 break;
3695
3696 default:
3697 gcc_unreachable ();
3698 }
3699 }
3700
3701 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3702 return true;
3703 }
3704
3705
3706 /* Function vect_mark_stmts_to_be_vectorized.
3707
3708 Not all stmts in the loop need to be vectorized. For example:
3709
3710 for i...
3711 for j...
3712 1. T0 = i + j
3713 2. T1 = a[T0]
3714
3715 3. j = j + 1
3716
3717 Stmt 1 and 3 do not need to be vectorized, because loop control and
3718 addressing of vectorized data-refs are handled differently.
3719
3720 This pass detects such stmts. */
3721
3722 static bool
3723 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3724 {
3725 VEC(gimple,heap) *worklist;
3726 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3727 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3728 unsigned int nbbs = loop->num_nodes;
3729 gimple_stmt_iterator si;
3730 gimple stmt;
3731 unsigned int i;
3732 stmt_vec_info stmt_vinfo;
3733 basic_block bb;
3734 gimple phi;
3735 bool live_p;
3736 enum vect_relevant relevant;
3737
3738 if (vect_print_dump_info (REPORT_DETAILS))
3739 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3740
3741 worklist = VEC_alloc (gimple, heap, 64);
3742
3743 /* 1. Init worklist. */
3744 for (i = 0; i < nbbs; i++)
3745 {
3746 bb = bbs[i];
3747 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
3748 {
3749 phi = gsi_stmt (si);
3750 if (vect_print_dump_info (REPORT_DETAILS))
3751 {
3752 fprintf (vect_dump, "init: phi relevant? ");
3753 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3754 }
3755
3756 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3757 vect_mark_relevant (&worklist, phi, relevant, live_p);
3758 }
3759 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3760 {
3761 stmt = gsi_stmt (si);
3762 if (vect_print_dump_info (REPORT_DETAILS))
3763 {
3764 fprintf (vect_dump, "init: stmt relevant? ");
3765 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3766 }
3767
3768 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3769 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3770 }
3771 }
3772
3773 /* 2. Process_worklist */
3774 while (VEC_length (gimple, worklist) > 0)
3775 {
3776 use_operand_p use_p;
3777 ssa_op_iter iter;
3778
3779 stmt = VEC_pop (gimple, worklist);
3780 if (vect_print_dump_info (REPORT_DETAILS))
3781 {
3782 fprintf (vect_dump, "worklist: examine stmt: ");
3783 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3784 }
3785
3786 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3787 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3788 liveness and relevance properties of STMT. */
3789 stmt_vinfo = vinfo_for_stmt (stmt);
3790 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3791 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3792
3793 /* Generally, the liveness and relevance properties of STMT are
3794 propagated as is to the DEF_STMTs of its USEs:
3795 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3796 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3797
3798 One exception is when STMT has been identified as defining a reduction
3799 variable; in this case we set the liveness/relevance as follows:
3800 live_p = false
3801 relevant = vect_used_by_reduction
3802 This is because we distinguish between two kinds of relevant stmts -
3803 those that are used by a reduction computation, and those that are
3804 (also) used by a regular computation. This allows us later on to
3805 identify stmts that are used solely by a reduction, and therefore the
3806 order of the results that they produce does not have to be kept.
3807
3808 Reduction phis are expected to be used by a reduction stmt, or by
3809 in an outer loop; Other reduction stmts are expected to be
3810 in the loop, and possibly used by a stmt in an outer loop.
3811 Here are the expected values of "relevant" for reduction phis/stmts:
3812
3813 relevance: phi stmt
3814 vect_unused_in_loop ok
3815 vect_used_in_outer_by_reduction ok ok
3816 vect_used_in_outer ok ok
3817 vect_used_by_reduction ok
3818 vect_used_in_loop */
3819
3820 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3821 {
3822 enum vect_relevant tmp_relevant = relevant;
3823 switch (tmp_relevant)
3824 {
3825 case vect_unused_in_loop:
3826 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
3827 relevant = vect_used_by_reduction;
3828 break;
3829
3830 case vect_used_in_outer_by_reduction:
3831 case vect_used_in_outer:
3832 gcc_assert (gimple_code (stmt) != WIDEN_SUM_EXPR
3833 && gimple_code (stmt) != DOT_PROD_EXPR);
3834 break;
3835
3836 case vect_used_by_reduction:
3837 if (gimple_code (stmt) == GIMPLE_PHI)
3838 break;
3839 /* fall through */
3840 case vect_used_in_loop:
3841 default:
3842 if (vect_print_dump_info (REPORT_DETAILS))
3843 fprintf (vect_dump, "unsupported use of reduction.");
3844 VEC_free (gimple, heap, worklist);
3845 return false;
3846 }
3847 live_p = false;
3848 }
3849
3850 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3851 {
3852 tree op = USE_FROM_PTR (use_p);
3853 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3854 {
3855 VEC_free (gimple, heap, worklist);
3856 return false;
3857 }
3858 }
3859 } /* while worklist */
3860
3861 VEC_free (gimple, heap, worklist);
3862 return true;
3863 }
3864
3865
3866 /* Function vect_can_advance_ivs_p
3867
3868 In case the number of iterations that LOOP iterates is unknown at compile
3869 time, an epilog loop will be generated, and the loop induction variables
3870 (IVs) will be "advanced" to the value they are supposed to take just before
3871 the epilog loop. Here we check that the access function of the loop IVs
3872 and the expression that represents the loop bound are simple enough.
3873 These restrictions will be relaxed in the future. */
3874
3875 static bool
3876 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3877 {
3878 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3879 basic_block bb = loop->header;
3880 gimple phi;
3881 gimple_stmt_iterator gsi;
3882
3883 /* Analyze phi functions of the loop header. */
3884
3885 if (vect_print_dump_info (REPORT_DETAILS))
3886 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3887
3888 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3889 {
3890 tree access_fn = NULL;
3891 tree evolution_part;
3892
3893 phi = gsi_stmt (gsi);
3894 if (vect_print_dump_info (REPORT_DETAILS))
3895 {
3896 fprintf (vect_dump, "Analyze phi: ");
3897 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3898 }
3899
3900 /* Skip virtual phi's. The data dependences that are associated with
3901 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3902
3903 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3904 {
3905 if (vect_print_dump_info (REPORT_DETAILS))
3906 fprintf (vect_dump, "virtual phi. skip.");
3907 continue;
3908 }
3909
3910 /* Skip reduction phis. */
3911
3912 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3913 {
3914 if (vect_print_dump_info (REPORT_DETAILS))
3915 fprintf (vect_dump, "reduc phi. skip.");
3916 continue;
3917 }
3918
3919 /* Analyze the evolution function. */
3920
3921 access_fn = instantiate_parameters
3922 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3923
3924 if (!access_fn)
3925 {
3926 if (vect_print_dump_info (REPORT_DETAILS))
3927 fprintf (vect_dump, "No Access function.");
3928 return false;
3929 }
3930
3931 if (vect_print_dump_info (REPORT_DETAILS))
3932 {
3933 fprintf (vect_dump, "Access function of PHI: ");
3934 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3935 }
3936
3937 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3938
3939 if (evolution_part == NULL_TREE)
3940 {
3941 if (vect_print_dump_info (REPORT_DETAILS))
3942 fprintf (vect_dump, "No evolution.");
3943 return false;
3944 }
3945
3946 /* FORNOW: We do not transform initial conditions of IVs
3947 which evolution functions are a polynomial of degree >= 2. */
3948
3949 if (tree_is_chrec (evolution_part))
3950 return false;
3951 }
3952
3953 return true;
3954 }
3955
3956
3957 /* Function vect_get_loop_niters.
3958
3959 Determine how many iterations the loop is executed.
3960 If an expression that represents the number of iterations
3961 can be constructed, place it in NUMBER_OF_ITERATIONS.
3962 Return the loop exit condition. */
3963
3964 static gimple
3965 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3966 {
3967 tree niters;
3968
3969 if (vect_print_dump_info (REPORT_DETAILS))
3970 fprintf (vect_dump, "=== get_loop_niters ===");
3971
3972 niters = number_of_exit_cond_executions (loop);
3973
3974 if (niters != NULL_TREE
3975 && niters != chrec_dont_know)
3976 {
3977 *number_of_iterations = niters;
3978
3979 if (vect_print_dump_info (REPORT_DETAILS))
3980 {
3981 fprintf (vect_dump, "==> get_loop_niters:" );
3982 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3983 }
3984 }
3985
3986 return get_loop_exit_condition (loop);
3987 }
3988
3989
3990 /* Function vect_analyze_loop_1.
3991
3992 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3993 for it. The different analyses will record information in the
3994 loop_vec_info struct. This is a subset of the analyses applied in
3995 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3996 that is now considered for (outer-loop) vectorization. */
3997
3998 static loop_vec_info
3999 vect_analyze_loop_1 (struct loop *loop)
4000 {
4001 loop_vec_info loop_vinfo;
4002
4003 if (vect_print_dump_info (REPORT_DETAILS))
4004 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
4005
4006 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4007
4008 loop_vinfo = vect_analyze_loop_form (loop);
4009 if (!loop_vinfo)
4010 {
4011 if (vect_print_dump_info (REPORT_DETAILS))
4012 fprintf (vect_dump, "bad inner-loop form.");
4013 return NULL;
4014 }
4015
4016 return loop_vinfo;
4017 }
4018
4019
4020 /* Function vect_analyze_loop_form.
4021
4022 Verify that certain CFG restrictions hold, including:
4023 - the loop has a pre-header
4024 - the loop has a single entry and exit
4025 - the loop exit condition is simple enough, and the number of iterations
4026 can be analyzed (a countable loop). */
4027
4028 loop_vec_info
4029 vect_analyze_loop_form (struct loop *loop)
4030 {
4031 loop_vec_info loop_vinfo;
4032 gimple loop_cond;
4033 tree number_of_iterations = NULL;
4034 loop_vec_info inner_loop_vinfo = NULL;
4035
4036 if (vect_print_dump_info (REPORT_DETAILS))
4037 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4038
4039 /* Different restrictions apply when we are considering an inner-most loop,
4040 vs. an outer (nested) loop.
4041 (FORNOW. May want to relax some of these restrictions in the future). */
4042
4043 if (!loop->inner)
4044 {
4045 /* Inner-most loop. We currently require that the number of BBs is
4046 exactly 2 (the header and latch). Vectorizable inner-most loops
4047 look like this:
4048
4049 (pre-header)
4050 |
4051 header <--------+
4052 | | |
4053 | +--> latch --+
4054 |
4055 (exit-bb) */
4056
4057 if (loop->num_nodes != 2)
4058 {
4059 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4060 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4061 return NULL;
4062 }
4063
4064 if (empty_block_p (loop->header))
4065 {
4066 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4067 fprintf (vect_dump, "not vectorized: empty loop.");
4068 return NULL;
4069 }
4070 }
4071 else
4072 {
4073 struct loop *innerloop = loop->inner;
4074 edge backedge, entryedge;
4075
4076 /* Nested loop. We currently require that the loop is doubly-nested,
4077 contains a single inner loop, and the number of BBs is exactly 5.
4078 Vectorizable outer-loops look like this:
4079
4080 (pre-header)
4081 |
4082 header <---+
4083 | |
4084 inner-loop |
4085 | |
4086 tail ------+
4087 |
4088 (exit-bb)
4089
4090 The inner-loop has the properties expected of inner-most loops
4091 as described above. */
4092
4093 if ((loop->inner)->inner || (loop->inner)->next)
4094 {
4095 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4096 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4097 return NULL;
4098 }
4099
4100 /* Analyze the inner-loop. */
4101 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4102 if (!inner_loop_vinfo)
4103 {
4104 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4105 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4106 return NULL;
4107 }
4108
4109 if (!expr_invariant_in_loop_p (loop,
4110 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4111 {
4112 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4113 fprintf (vect_dump,
4114 "not vectorized: inner-loop count not invariant.");
4115 destroy_loop_vec_info (inner_loop_vinfo, true);
4116 return NULL;
4117 }
4118
4119 if (loop->num_nodes != 5)
4120 {
4121 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4122 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4123 destroy_loop_vec_info (inner_loop_vinfo, true);
4124 return NULL;
4125 }
4126
4127 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4128 backedge = EDGE_PRED (innerloop->header, 1);
4129 entryedge = EDGE_PRED (innerloop->header, 0);
4130 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4131 {
4132 backedge = EDGE_PRED (innerloop->header, 0);
4133 entryedge = EDGE_PRED (innerloop->header, 1);
4134 }
4135
4136 if (entryedge->src != loop->header
4137 || !single_exit (innerloop)
4138 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4139 {
4140 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4141 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4142 destroy_loop_vec_info (inner_loop_vinfo, true);
4143 return NULL;
4144 }
4145
4146 if (vect_print_dump_info (REPORT_DETAILS))
4147 fprintf (vect_dump, "Considering outer-loop vectorization.");
4148 }
4149
4150 if (!single_exit (loop)
4151 || EDGE_COUNT (loop->header->preds) != 2)
4152 {
4153 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4154 {
4155 if (!single_exit (loop))
4156 fprintf (vect_dump, "not vectorized: multiple exits.");
4157 else if (EDGE_COUNT (loop->header->preds) != 2)
4158 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4159 }
4160 if (inner_loop_vinfo)
4161 destroy_loop_vec_info (inner_loop_vinfo, true);
4162 return NULL;
4163 }
4164
4165 /* We assume that the loop exit condition is at the end of the loop. i.e,
4166 that the loop is represented as a do-while (with a proper if-guard
4167 before the loop if needed), where the loop header contains all the
4168 executable statements, and the latch is empty. */
4169 if (!empty_block_p (loop->latch)
4170 || phi_nodes (loop->latch))
4171 {
4172 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4173 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4174 if (inner_loop_vinfo)
4175 destroy_loop_vec_info (inner_loop_vinfo, true);
4176 return NULL;
4177 }
4178
4179 /* Make sure there exists a single-predecessor exit bb: */
4180 if (!single_pred_p (single_exit (loop)->dest))
4181 {
4182 edge e = single_exit (loop);
4183 if (!(e->flags & EDGE_ABNORMAL))
4184 {
4185 split_loop_exit_edge (e);
4186 if (vect_print_dump_info (REPORT_DETAILS))
4187 fprintf (vect_dump, "split exit edge.");
4188 }
4189 else
4190 {
4191 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4192 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4193 if (inner_loop_vinfo)
4194 destroy_loop_vec_info (inner_loop_vinfo, true);
4195 return NULL;
4196 }
4197 }
4198
4199 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4200 if (!loop_cond)
4201 {
4202 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4203 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4204 if (inner_loop_vinfo)
4205 destroy_loop_vec_info (inner_loop_vinfo, true);
4206 return NULL;
4207 }
4208
4209 if (!number_of_iterations)
4210 {
4211 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4212 fprintf (vect_dump,
4213 "not vectorized: number of iterations cannot be computed.");
4214 if (inner_loop_vinfo)
4215 destroy_loop_vec_info (inner_loop_vinfo, true);
4216 return NULL;
4217 }
4218
4219 if (chrec_contains_undetermined (number_of_iterations))
4220 {
4221 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4222 fprintf (vect_dump, "Infinite number of iterations.");
4223 if (inner_loop_vinfo)
4224 destroy_loop_vec_info (inner_loop_vinfo, true);
4225 return NULL;
4226 }
4227
4228 if (!NITERS_KNOWN_P (number_of_iterations))
4229 {
4230 if (vect_print_dump_info (REPORT_DETAILS))
4231 {
4232 fprintf (vect_dump, "Symbolic number of iterations is ");
4233 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4234 }
4235 }
4236 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4237 {
4238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4239 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4240 if (inner_loop_vinfo)
4241 destroy_loop_vec_info (inner_loop_vinfo, false);
4242 return NULL;
4243 }
4244
4245 loop_vinfo = new_loop_vec_info (loop);
4246 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4247 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4248
4249 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4250
4251 /* CHECKME: May want to keep it around it in the future. */
4252 if (inner_loop_vinfo)
4253 destroy_loop_vec_info (inner_loop_vinfo, false);
4254
4255 gcc_assert (!loop->aux);
4256 loop->aux = loop_vinfo;
4257 return loop_vinfo;
4258 }
4259
4260
4261 /* Function vect_analyze_loop.
4262
4263 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4264 for it. The different analyses will record information in the
4265 loop_vec_info struct. */
4266 loop_vec_info
4267 vect_analyze_loop (struct loop *loop)
4268 {
4269 bool ok;
4270 loop_vec_info loop_vinfo;
4271
4272 if (vect_print_dump_info (REPORT_DETAILS))
4273 fprintf (vect_dump, "===== analyze_loop_nest =====");
4274
4275 if (loop_outer (loop)
4276 && loop_vec_info_for_loop (loop_outer (loop))
4277 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4278 {
4279 if (vect_print_dump_info (REPORT_DETAILS))
4280 fprintf (vect_dump, "outer-loop already vectorized.");
4281 return NULL;
4282 }
4283
4284 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4285
4286 loop_vinfo = vect_analyze_loop_form (loop);
4287 if (!loop_vinfo)
4288 {
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "bad loop form.");
4291 return NULL;
4292 }
4293
4294 /* Find all data references in the loop (which correspond to vdefs/vuses)
4295 and analyze their evolution in the loop.
4296
4297 FORNOW: Handle only simple, array references, which
4298 alignment can be forced, and aligned pointer-references. */
4299
4300 ok = vect_analyze_data_refs (loop_vinfo);
4301 if (!ok)
4302 {
4303 if (vect_print_dump_info (REPORT_DETAILS))
4304 fprintf (vect_dump, "bad data references.");
4305 destroy_loop_vec_info (loop_vinfo, true);
4306 return NULL;
4307 }
4308
4309 /* Classify all cross-iteration scalar data-flow cycles.
4310 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4311
4312 vect_analyze_scalar_cycles (loop_vinfo);
4313
4314 vect_pattern_recog (loop_vinfo);
4315
4316 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4317
4318 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4319 if (!ok)
4320 {
4321 if (vect_print_dump_info (REPORT_DETAILS))
4322 fprintf (vect_dump, "unexpected pattern.");
4323 destroy_loop_vec_info (loop_vinfo, true);
4324 return NULL;
4325 }
4326
4327 /* Analyze the alignment of the data-refs in the loop.
4328 Fail if a data reference is found that cannot be vectorized. */
4329
4330 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4331 if (!ok)
4332 {
4333 if (vect_print_dump_info (REPORT_DETAILS))
4334 fprintf (vect_dump, "bad data alignment.");
4335 destroy_loop_vec_info (loop_vinfo, true);
4336 return NULL;
4337 }
4338
4339 ok = vect_determine_vectorization_factor (loop_vinfo);
4340 if (!ok)
4341 {
4342 if (vect_print_dump_info (REPORT_DETAILS))
4343 fprintf (vect_dump, "can't determine vectorization factor.");
4344 destroy_loop_vec_info (loop_vinfo, true);
4345 return NULL;
4346 }
4347
4348 /* Analyze data dependences between the data-refs in the loop.
4349 FORNOW: fail at the first data dependence that we encounter. */
4350
4351 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4352 if (!ok)
4353 {
4354 if (vect_print_dump_info (REPORT_DETAILS))
4355 fprintf (vect_dump, "bad data dependence.");
4356 destroy_loop_vec_info (loop_vinfo, true);
4357 return NULL;
4358 }
4359
4360 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4361 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4362
4363 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4364 if (!ok)
4365 {
4366 if (vect_print_dump_info (REPORT_DETAILS))
4367 fprintf (vect_dump, "bad data access.");
4368 destroy_loop_vec_info (loop_vinfo, true);
4369 return NULL;
4370 }
4371
4372 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4373 It is important to call pruning after vect_analyze_data_ref_accesses,
4374 since we use grouping information gathered by interleaving analysis. */
4375 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4376 if (!ok)
4377 {
4378 if (vect_print_dump_info (REPORT_DETAILS))
4379 fprintf (vect_dump, "too long list of versioning for alias "
4380 "run-time tests.");
4381 destroy_loop_vec_info (loop_vinfo, true);
4382 return NULL;
4383 }
4384
4385 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4386 ok = vect_analyze_slp (loop_vinfo);
4387 if (ok)
4388 {
4389 /* Decide which possible SLP instances to SLP. */
4390 vect_make_slp_decision (loop_vinfo);
4391
4392 /* Find stmts that need to be both vectorized and SLPed. */
4393 vect_detect_hybrid_slp (loop_vinfo);
4394 }
4395
4396 /* This pass will decide on using loop versioning and/or loop peeling in
4397 order to enhance the alignment of data references in the loop. */
4398
4399 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4400 if (!ok)
4401 {
4402 if (vect_print_dump_info (REPORT_DETAILS))
4403 fprintf (vect_dump, "bad data alignment.");
4404 destroy_loop_vec_info (loop_vinfo, true);
4405 return NULL;
4406 }
4407
4408 /* Scan all the operations in the loop and make sure they are
4409 vectorizable. */
4410
4411 ok = vect_analyze_operations (loop_vinfo);
4412 if (!ok)
4413 {
4414 if (vect_print_dump_info (REPORT_DETAILS))
4415 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4416 destroy_loop_vec_info (loop_vinfo, true);
4417 return NULL;
4418 }
4419
4420 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4421
4422 return loop_vinfo;
4423 }