1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
45 static bool vect_can_advance_ivs_p (loop_vec_info
);
47 /* Function vect_determine_vectorization_factor
49 Determine the vectorization factor (VF). VF is the number of data elements
50 that are operated upon in parallel in a single iteration of the vectorized
51 loop. For example, when vectorizing a loop that operates on 4byte elements,
52 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
53 elements can fit in a single vector register.
55 We currently support vectorization of loops in which all types operated upon
56 are of the same size. Therefore this function currently sets VF according to
57 the size of the types operated upon, and fails if there are multiple sizes
60 VF is also the factor by which the loop iterations are strip-mined, e.g.:
67 for (i=0; i<N; i+=VF){
68 a[i:VF] = b[i:VF] + c[i:VF];
73 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
75 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
76 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
77 int nbbs
= loop
->num_nodes
;
78 gimple_stmt_iterator si
;
79 unsigned int vectorization_factor
= 0;
84 stmt_vec_info stmt_info
;
87 if (vect_print_dump_info (REPORT_DETAILS
))
88 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
90 for (i
= 0; i
< nbbs
; i
++)
92 basic_block bb
= bbs
[i
];
94 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
97 stmt_info
= vinfo_for_stmt (phi
);
98 if (vect_print_dump_info (REPORT_DETAILS
))
100 fprintf (vect_dump
, "==> examining phi: ");
101 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
104 gcc_assert (stmt_info
);
106 if (STMT_VINFO_RELEVANT_P (stmt_info
))
108 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
109 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
111 if (vect_print_dump_info (REPORT_DETAILS
))
113 fprintf (vect_dump
, "get vectype for scalar type: ");
114 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
117 vectype
= get_vectype_for_scalar_type (scalar_type
);
120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
123 "not vectorized: unsupported data-type ");
124 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
128 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
130 if (vect_print_dump_info (REPORT_DETAILS
))
132 fprintf (vect_dump
, "vectype: ");
133 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
136 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
137 if (vect_print_dump_info (REPORT_DETAILS
))
138 fprintf (vect_dump
, "nunits = %d", nunits
);
140 if (!vectorization_factor
141 || (nunits
> vectorization_factor
))
142 vectorization_factor
= nunits
;
146 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
148 gimple stmt
= gsi_stmt (si
);
149 stmt_info
= vinfo_for_stmt (stmt
);
151 if (vect_print_dump_info (REPORT_DETAILS
))
153 fprintf (vect_dump
, "==> examining statement: ");
154 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
157 gcc_assert (stmt_info
);
159 /* skip stmts which do not need to be vectorized. */
160 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
161 && !STMT_VINFO_LIVE_P (stmt_info
))
163 if (vect_print_dump_info (REPORT_DETAILS
))
164 fprintf (vect_dump
, "skip.");
168 if (gimple_get_lhs (stmt
) == NULL_TREE
)
170 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
172 fprintf (vect_dump
, "not vectorized: irregular stmt.");
173 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
178 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
180 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
182 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
183 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
188 if (STMT_VINFO_VECTYPE (stmt_info
))
190 /* The only case when a vectype had been already set is for stmts
191 that contain a dataref, or for "pattern-stmts" (stmts generated
192 by the vectorizer to represent/replace a certain idiom). */
193 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
194 || is_pattern_stmt_p (stmt_info
));
195 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info
)
201 && !is_pattern_stmt_p (stmt_info
));
203 /* We generally set the vectype according to the type of the
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
218 scalar_type
= gimple_expr_type (stmt
);
220 if (is_gimple_assign (stmt
)
221 && (gimple_assign_cast_p (stmt
)
222 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
223 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
225 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
226 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
))
227 < TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
)))
228 scalar_type
= rhs_type
;
231 if (vect_print_dump_info (REPORT_DETAILS
))
233 fprintf (vect_dump
, "get vectype for scalar type: ");
234 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
237 vectype
= get_vectype_for_scalar_type (scalar_type
);
240 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
243 "not vectorized: unsupported data-type ");
244 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
248 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
251 if (vect_print_dump_info (REPORT_DETAILS
))
253 fprintf (vect_dump
, "vectype: ");
254 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
257 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
258 if (vect_print_dump_info (REPORT_DETAILS
))
259 fprintf (vect_dump
, "nunits = %d", nunits
);
261 if (!vectorization_factor
262 || (nunits
> vectorization_factor
))
263 vectorization_factor
= nunits
;
268 /* TODO: Analyze cost. Decide if worth while to vectorize. */
269 if (vect_print_dump_info (REPORT_DETAILS
))
270 fprintf (vect_dump
, "vectorization factor = %d", vectorization_factor
);
271 if (vectorization_factor
<= 1)
273 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
274 fprintf (vect_dump
, "not vectorized: unsupported data-type");
277 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
283 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
284 the number of created vector stmts depends on the unrolling factor). However,
285 the actual number of vector stmts for every SLP node depends on VF which is
286 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
287 In this function we assume that the inside costs calculated in
288 vect_model_xxx_cost are linear in ncopies. */
291 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
293 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
294 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
295 slp_instance instance
;
297 if (vect_print_dump_info (REPORT_SLP
))
298 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
300 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
301 /* We assume that costs are linear in ncopies. */
302 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
303 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
307 /* Function vect_analyze_operations.
309 Scan the loop stmts and make sure they are all vectorizable. */
312 vect_analyze_operations (loop_vec_info loop_vinfo
)
314 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
315 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
316 int nbbs
= loop
->num_nodes
;
317 gimple_stmt_iterator si
;
318 unsigned int vectorization_factor
= 0;
322 stmt_vec_info stmt_info
;
323 bool need_to_vectorize
= false;
324 int min_profitable_iters
;
325 int min_scalar_loop_bound
;
327 bool only_slp_in_loop
= true;
329 if (vect_print_dump_info (REPORT_DETAILS
))
330 fprintf (vect_dump
, "=== vect_analyze_operations ===");
332 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
333 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
335 for (i
= 0; i
< nbbs
; i
++)
337 basic_block bb
= bbs
[i
];
339 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
344 stmt_info
= vinfo_for_stmt (phi
);
345 if (vect_print_dump_info (REPORT_DETAILS
))
347 fprintf (vect_dump
, "examining phi: ");
348 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
351 if (! is_loop_header_bb_p (bb
))
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
359 || STMT_VINFO_LIVE_P (stmt_info
))
361 if (vect_print_dump_info (REPORT_DETAILS
))
363 "Unsupported loop-closed phi in outer-loop.");
369 gcc_assert (stmt_info
);
371 if (STMT_VINFO_LIVE_P (stmt_info
))
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
375 fprintf (vect_dump
, "not vectorized: value used after loop.");
379 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
384 fprintf (vect_dump
, "not vectorized: scalar dependence cycle.");
388 if (STMT_VINFO_RELEVANT_P (stmt_info
))
390 need_to_vectorize
= true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
392 ok
= vectorizable_induction (phi
, NULL
, NULL
);
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
400 "not vectorized: relevant phi not supported: ");
401 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
407 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
409 gimple stmt
= gsi_stmt (si
);
410 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
411 enum vect_def_type relevance
= STMT_VINFO_RELEVANT (stmt_info
);
413 if (vect_print_dump_info (REPORT_DETAILS
))
415 fprintf (vect_dump
, "==> examining statement: ");
416 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
419 gcc_assert (stmt_info
);
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
428 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
429 && !STMT_VINFO_LIVE_P (stmt_info
))
431 if (vect_print_dump_info (REPORT_DETAILS
))
432 fprintf (vect_dump
, "irrelevant.");
436 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
441 case vect_reduction_def
:
442 gcc_assert (relevance
== vect_used_in_outer
443 || relevance
== vect_used_in_outer_by_reduction
444 || relevance
== vect_unused_in_loop
);
447 case vect_induction_def
:
448 case vect_constant_def
:
449 case vect_invariant_def
:
450 case vect_unknown_def_type
:
455 if (STMT_VINFO_RELEVANT_P (stmt_info
))
457 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
458 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
459 need_to_vectorize
= true;
463 if (STMT_VINFO_RELEVANT_P (stmt_info
)
464 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
465 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
)
466 || vectorizable_type_demotion (stmt
, NULL
, NULL
)
467 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
468 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
469 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
470 || vectorizable_load (stmt
, NULL
, NULL
, NULL
)
471 || vectorizable_call (stmt
, NULL
, NULL
)
472 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
473 || vectorizable_condition (stmt
, NULL
, NULL
)
474 || vectorizable_reduction (stmt
, NULL
, NULL
));
478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
480 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
481 fprintf (vect_dump
, "supported: ");
482 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
487 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
488 need extra handling, except for vectorizable reductions. */
489 if (STMT_VINFO_LIVE_P (stmt_info
)
490 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
491 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
497 fprintf (vect_dump
, "not vectorized: live stmt not ");
498 fprintf (vect_dump
, "supported: ");
499 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
504 if (!PURE_SLP_STMT (stmt_info
))
506 /* STMT needs loop-based vectorization. */
507 only_slp_in_loop
= false;
509 /* Groups of strided accesses whose size is not a power of 2 are
510 not vectorizable yet using loop-vectorization. Therefore, if
511 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
512 both SLPed and loop-based vectorized), the loop cannot be
514 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
515 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
516 DR_GROUP_FIRST_DR (stmt_info
)))) == -1)
518 if (vect_print_dump_info (REPORT_DETAILS
))
520 fprintf (vect_dump
, "not vectorized: the size of group "
521 "of strided accesses is not a power of 2");
522 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
530 /* All operations in the loop are either irrelevant (deal with loop
531 control, or dead), or only used outside the loop and can be moved
532 out of the loop (e.g. invariants, inductions). The loop can be
533 optimized away by scalar optimizations. We're better off not
534 touching this loop. */
535 if (!need_to_vectorize
)
537 if (vect_print_dump_info (REPORT_DETAILS
))
539 "All the computation can be taken out of the loop.");
540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
542 "not vectorized: redundant loop. no profit to vectorize.");
546 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
547 vectorization factor of the loop is the unrolling factor required by the
548 SLP instances. If that unrolling factor is 1, we say, that we perform
549 pure SLP on loop - cross iteration parallelism is not exploited. */
550 if (only_slp_in_loop
)
551 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
553 vectorization_factor
= least_common_multiple (vectorization_factor
,
554 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
556 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
558 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
559 && vect_print_dump_info (REPORT_DETAILS
))
561 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
562 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
564 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
565 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
567 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
568 fprintf (vect_dump
, "not vectorized: iteration count too small.");
569 if (vect_print_dump_info (REPORT_DETAILS
))
570 fprintf (vect_dump
,"not vectorized: iteration count smaller than "
571 "vectorization factor.");
575 /* Analyze cost. Decide if worth while to vectorize. */
577 /* Once VF is set, SLP costs should be updated since the number of created
578 vector stmts depends on VF. */
579 vect_update_slp_costs_according_to_vf (loop_vinfo
);
581 min_profitable_iters
= vect_estimate_min_profitable_iters (loop_vinfo
);
582 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
584 if (min_profitable_iters
< 0)
586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
587 fprintf (vect_dump
, "not vectorized: vectorization not profitable.");
588 if (vect_print_dump_info (REPORT_DETAILS
))
589 fprintf (vect_dump
, "not vectorized: vector version will never be "
594 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
595 * vectorization_factor
) - 1);
597 /* Use the cost model only if it is more conservative than user specified
600 th
= (unsigned) min_scalar_loop_bound
;
601 if (min_profitable_iters
602 && (!min_scalar_loop_bound
603 || min_profitable_iters
> min_scalar_loop_bound
))
604 th
= (unsigned) min_profitable_iters
;
606 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
607 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
609 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
610 fprintf (vect_dump
, "not vectorized: vectorization not "
612 if (vect_print_dump_info (REPORT_DETAILS
))
613 fprintf (vect_dump
, "not vectorized: iteration count smaller than "
614 "user specified loop bound parameter or minimum "
615 "profitable iterations (whichever is more conservative).");
619 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
620 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
621 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
623 if (vect_print_dump_info (REPORT_DETAILS
))
624 fprintf (vect_dump
, "epilog loop required.");
625 if (!vect_can_advance_ivs_p (loop_vinfo
))
627 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
629 "not vectorized: can't create epilog loop 1.");
632 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
634 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
636 "not vectorized: can't create epilog loop 2.");
645 /* Function exist_non_indexing_operands_for_use_p
647 USE is one of the uses attached to STMT. Check if USE is
648 used in STMT for anything other than indexing an array. */
651 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
654 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
656 /* USE corresponds to some operand in STMT. If there is no data
657 reference in STMT, then any operand that corresponds to USE
658 is not indexing an array. */
659 if (!STMT_VINFO_DATA_REF (stmt_info
))
662 /* STMT has a data_ref. FORNOW this means that its of one of
666 (This should have been verified in analyze_data_refs).
668 'var' in the second case corresponds to a def, not a use,
669 so USE cannot correspond to any operands that are not used
672 Therefore, all we need to check is if STMT falls into the
673 first case, and whether var corresponds to USE. */
675 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
678 if (!gimple_assign_copy_p (stmt
))
680 operand
= gimple_assign_rhs1 (stmt
);
682 if (TREE_CODE (operand
) != SSA_NAME
)
692 /* Function vect_analyze_scalar_cycles_1.
694 Examine the cross iteration def-use cycles of scalar variables
695 in LOOP. LOOP_VINFO represents the loop that is now being
696 considered for vectorization (can be LOOP, or an outer-loop
700 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
702 basic_block bb
= loop
->header
;
704 VEC(gimple
,heap
) *worklist
= VEC_alloc (gimple
, heap
, 64);
705 gimple_stmt_iterator gsi
;
707 if (vect_print_dump_info (REPORT_DETAILS
))
708 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
710 /* First - identify all inductions. */
711 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
713 gimple phi
= gsi_stmt (gsi
);
714 tree access_fn
= NULL
;
715 tree def
= PHI_RESULT (phi
);
716 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
718 if (vect_print_dump_info (REPORT_DETAILS
))
720 fprintf (vect_dump
, "Analyze phi: ");
721 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
724 /* Skip virtual phi's. The data dependences that are associated with
725 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
726 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
729 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
731 /* Analyze the evolution function. */
732 access_fn
= analyze_scalar_evolution (loop
, def
);
733 if (access_fn
&& vect_print_dump_info (REPORT_DETAILS
))
735 fprintf (vect_dump
, "Access function of PHI: ");
736 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
740 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
742 VEC_safe_push (gimple
, heap
, worklist
, phi
);
746 if (vect_print_dump_info (REPORT_DETAILS
))
747 fprintf (vect_dump
, "Detected induction.");
748 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
752 /* Second - identify all reductions. */
753 while (VEC_length (gimple
, worklist
) > 0)
755 gimple phi
= VEC_pop (gimple
, worklist
);
756 tree def
= PHI_RESULT (phi
);
757 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
760 if (vect_print_dump_info (REPORT_DETAILS
))
762 fprintf (vect_dump
, "Analyze phi: ");
763 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
766 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
767 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
769 reduc_stmt
= vect_is_simple_reduction (loop_vinfo
, phi
);
772 if (vect_print_dump_info (REPORT_DETAILS
))
773 fprintf (vect_dump
, "Detected reduction.");
774 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
775 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
779 if (vect_print_dump_info (REPORT_DETAILS
))
780 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
783 VEC_free (gimple
, heap
, worklist
);
788 /* Function vect_analyze_scalar_cycles.
790 Examine the cross iteration def-use cycles of scalar variables, by
791 analyzing the loop-header PHIs of scalar variables; Classify each
792 cycle as one of the following: invariant, induction, reduction, unknown.
793 We do that for the loop represented by LOOP_VINFO, and also to its
794 inner-loop, if exists.
795 Examples for scalar cycles:
810 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
812 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
814 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
816 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
817 Reductions in such inner-loop therefore have different properties than
818 the reductions in the nest that gets vectorized:
819 1. When vectorized, they are executed in the same order as in the original
820 scalar loop, so we can't change the order of computation when
822 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
823 current checks are too strict. */
826 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
830 /* Function vect_insert_into_interleaving_chain.
832 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
835 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
836 struct data_reference
*drb
)
840 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
841 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
843 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
844 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
847 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
848 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
851 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
852 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
856 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
859 /* We got to the end of the list. Insert here. */
860 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
861 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL
;
865 /* Function vect_update_interleaving_chain.
867 For two data-refs DRA and DRB that are a part of a chain interleaved data
868 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
870 There are four possible cases:
871 1. New stmts - both DRA and DRB are not a part of any chain:
874 2. DRB is a part of a chain and DRA is not:
875 no need to update FIRST_DR
876 no need to insert DRB
877 insert DRA according to init
878 3. DRA is a part of a chain and DRB is not:
879 if (init of FIRST_DR > init of DRB)
881 NEXT(FIRST_DR) = previous FIRST_DR
883 insert DRB according to its init
884 4. both DRA and DRB are in some interleaving chains:
885 choose the chain with the smallest init of FIRST_DR
886 insert the nodes of the second chain into the first one. */
889 vect_update_interleaving_chain (struct data_reference
*drb
,
890 struct data_reference
*dra
)
892 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
893 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
894 tree next_init
, init_dra_chain
, init_drb_chain
;
895 gimple first_a
, first_b
;
897 gimple node
, prev
, next
, first_stmt
;
899 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
900 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
902 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
903 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
904 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
908 /* 2. DRB is a part of a chain and DRA is not. */
909 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
911 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
912 /* Insert DRA into the chain of DRB. */
913 vect_insert_into_interleaving_chain (dra
, drb
);
917 /* 3. DRA is a part of a chain and DRB is not. */
918 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
920 gimple old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
921 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
925 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
927 /* DRB's init is smaller than the init of the stmt previously marked
928 as the first stmt of the interleaving chain of DRA. Therefore, we
929 update FIRST_STMT and put DRB in the head of the list. */
930 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
931 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
933 /* Update all the stmts in the list to point to the new FIRST_STMT. */
934 tmp
= old_first_stmt
;
937 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
938 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
943 /* Insert DRB in the list of DRA. */
944 vect_insert_into_interleaving_chain (drb
, dra
);
945 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
950 /* 4. both DRA and DRB are in some interleaving chains. */
951 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
952 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
953 if (first_a
== first_b
)
955 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
956 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
958 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
960 /* Insert the nodes of DRA chain into the DRB chain.
961 After inserting a node, continue from this node of the DRB chain (don't
962 start from the beginning. */
963 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
964 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
965 first_stmt
= first_b
;
969 /* Insert the nodes of DRB chain into the DRA chain.
970 After inserting a node, continue from this node of the DRA chain (don't
971 start from the beginning. */
972 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
973 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
974 first_stmt
= first_a
;
979 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
980 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
983 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
984 if (tree_int_cst_compare (next_init
, node_init
) > 0)
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
988 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
993 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
997 /* We got to the end of the list. Insert here. */
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
999 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL
;
1002 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
1003 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
1008 /* Function vect_equal_offsets.
1010 Check if OFFSET1 and OFFSET2 are identical expressions. */
1013 vect_equal_offsets (tree offset1
, tree offset2
)
1017 STRIP_NOPS (offset1
);
1018 STRIP_NOPS (offset2
);
1020 if (offset1
== offset2
)
1023 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1024 || !BINARY_CLASS_P (offset1
)
1025 || !BINARY_CLASS_P (offset2
))
1028 res0
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
1029 TREE_OPERAND (offset2
, 0));
1030 res1
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
1031 TREE_OPERAND (offset2
, 1));
1033 return (res0
&& res1
);
1037 /* Function vect_check_interleaving.
1039 Check if DRA and DRB are a part of interleaving. In case they are, insert
1040 DRA and DRB in an interleaving chain. */
1043 vect_check_interleaving (struct data_reference
*dra
,
1044 struct data_reference
*drb
)
1046 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
1048 /* Check that the data-refs have same first location (except init) and they
1049 are both either store or load (not load and store). */
1050 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
1051 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
1052 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
1053 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
1054 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
1055 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
1056 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
1057 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
1061 1. data-refs are of the same type
1062 2. their steps are equal
1063 3. the step is greater than the difference between data-refs' inits */
1064 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
1065 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
1067 if (type_size_a
!= type_size_b
1068 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
))
1069 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
1070 TREE_TYPE (DR_REF (drb
))))
1073 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
1074 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
1075 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
1077 if (init_a
> init_b
)
1079 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1080 and DRB is accessed before DRA. */
1081 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
1083 if ((init_a
- init_b
) > step
)
1086 if (diff_mod_size
== 0)
1088 vect_update_interleaving_chain (drb
, dra
);
1089 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1091 fprintf (vect_dump
, "Detected interleaving ");
1092 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1093 fprintf (vect_dump
, " and ");
1094 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1101 /* If init_b == init_a + the size of the type * k, we have an
1102 interleaving, and DRA is accessed before DRB. */
1103 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
1105 if ((init_b
- init_a
) > step
)
1108 if (diff_mod_size
== 0)
1110 vect_update_interleaving_chain (dra
, drb
);
1111 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1113 fprintf (vect_dump
, "Detected interleaving ");
1114 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1115 fprintf (vect_dump
, " and ");
1116 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1123 /* Check if data references pointed by DR_I and DR_J are same or
1124 belong to same interleaving group. Return FALSE if drs are
1125 different, otherwise return TRUE. */
1128 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
1130 gimple stmt_i
= DR_STMT (dr_i
);
1131 gimple stmt_j
= DR_STMT (dr_j
);
1133 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
1134 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
1135 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
))
1136 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
1137 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
)))))
1143 /* If address ranges represented by DDR_I and DDR_J are equal,
1144 return TRUE, otherwise return FALSE. */
1147 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
1149 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
1150 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
1151 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
1152 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
1158 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1159 tested at run-time. Return TRUE if DDR was successfully inserted.
1160 Return false if versioning is not supported. */
1163 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
1165 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1167 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
1170 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1172 fprintf (vect_dump
, "mark for run-time aliasing test between ");
1173 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
1174 fprintf (vect_dump
, " and ");
1175 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
1180 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1181 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
1185 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1188 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1189 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
1193 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
1197 /* Function vect_analyze_data_ref_dependence.
1199 Return TRUE if there (might) exist a dependence between a memory-reference
1200 DRA and a memory-reference DRB. When versioning for alias may check a
1201 dependence at run-time, return FALSE. */
1204 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
1205 loop_vec_info loop_vinfo
)
1208 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1209 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1210 struct data_reference
*dra
= DDR_A (ddr
);
1211 struct data_reference
*drb
= DDR_B (ddr
);
1212 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1213 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1214 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1215 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1216 lambda_vector dist_v
;
1217 unsigned int loop_depth
;
1219 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1221 /* Independent data accesses. */
1222 vect_check_interleaving (dra
, drb
);
1226 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
)) || dra
== drb
)
1229 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1231 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1234 "versioning for alias required: can't determine dependence between ");
1235 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1236 fprintf (vect_dump
, " and ");
1237 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1239 /* Add to list of ddrs that need to be tested at run-time. */
1240 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
1243 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1245 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1247 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
1248 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1249 fprintf (vect_dump
, " and ");
1250 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1252 /* Add to list of ddrs that need to be tested at run-time. */
1253 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
1256 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1257 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
1259 int dist
= dist_v
[loop_depth
];
1261 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1262 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1264 /* Same loop iteration. */
1265 if (dist
% vectorization_factor
== 0 && dra_size
== drb_size
)
1267 /* Two references with distance zero have the same alignment. */
1268 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1269 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1270 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1271 fprintf (vect_dump
, "accesses have the same alignment.");
1272 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1274 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1275 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1276 fprintf (vect_dump
, " and ");
1277 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1280 /* For interleaving, mark that there is a read-write dependency if
1281 necessary. We check before that one of the data-refs is store. */
1282 if (DR_IS_READ (dra
))
1283 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
1286 if (DR_IS_READ (drb
))
1287 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
1293 if (abs (dist
) >= vectorization_factor
1294 || (dist
> 0 && DDR_REVERSED_P (ddr
)))
1296 /* Dependence distance does not create dependence, as far as
1297 vectorization is concerned, in this case. If DDR_REVERSED_P the
1298 order of the data-refs in DDR was reversed (to make distance
1299 vector positive), and the actual distance is negative. */
1300 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1301 fprintf (vect_dump
, "dependence distance >= VF or negative.");
1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1308 "not vectorized, possible dependence "
1309 "between data-refs ");
1310 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1311 fprintf (vect_dump
, " and ");
1312 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1321 /* Function vect_analyze_data_ref_dependences.
1323 Examine all the data references in the loop, and make sure there do not
1324 exist any data dependences between them. */
1327 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
1330 VEC (ddr_p
, heap
) * ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1331 struct data_dependence_relation
*ddr
;
1333 if (vect_print_dump_info (REPORT_DETAILS
))
1334 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
1336 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1337 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
))
1344 /* Function vect_compute_data_ref_alignment
1346 Compute the misalignment of the data reference DR.
1349 1. If during the misalignment computation it is found that the data reference
1350 cannot be vectorized then false is returned.
1351 2. DR_MISALIGNMENT (DR) is defined.
1353 FOR NOW: No analysis is actually performed. Misalignment is calculated
1354 only for trivial cases. TODO. */
1357 vect_compute_data_ref_alignment (struct data_reference
*dr
)
1359 gimple stmt
= DR_STMT (dr
);
1360 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1361 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1362 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1363 tree ref
= DR_REF (dr
);
1365 tree base
, base_addr
;
1368 tree aligned_to
, alignment
;
1370 if (vect_print_dump_info (REPORT_DETAILS
))
1371 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
1373 /* Initialize misalignment to unknown. */
1374 SET_DR_MISALIGNMENT (dr
, -1);
1376 misalign
= DR_INIT (dr
);
1377 aligned_to
= DR_ALIGNED_TO (dr
);
1378 base_addr
= DR_BASE_ADDRESS (dr
);
1379 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1381 /* In case the dataref is in an inner-loop of the loop that is being
1382 vectorized (LOOP), we use the base and misalignment information
1383 relative to the outer-loop (LOOP). This is ok only if the misalignment
1384 stays the same throughout the execution of the inner-loop, which is why
1385 we have to check that the stride of the dataref in the inner-loop evenly
1386 divides by the vector size. */
1387 if (nested_in_vect_loop_p (loop
, stmt
))
1389 tree step
= DR_STEP (dr
);
1390 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1392 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
1394 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1395 fprintf (vect_dump
, "inner step divides the vector-size.");
1396 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
1397 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
1398 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
1402 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1403 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
1404 misalign
= NULL_TREE
;
1408 base
= build_fold_indirect_ref (base_addr
);
1409 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
1411 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
1414 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1416 fprintf (vect_dump
, "Unknown alignment for access: ");
1417 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
1423 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
1425 || (TREE_CODE (base_addr
) == SSA_NAME
1426 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1427 TREE_TYPE (base_addr
)))),
1429 base_aligned
= true;
1431 base_aligned
= false;
1435 /* Do not change the alignment of global variables if
1436 flag_section_anchors is enabled. */
1437 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
1438 || (TREE_STATIC (base
) && flag_section_anchors
))
1440 if (vect_print_dump_info (REPORT_DETAILS
))
1442 fprintf (vect_dump
, "can't force alignment of ref: ");
1443 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1448 /* Force the alignment of the decl.
1449 NOTE: This is the only change to the code we make during
1450 the analysis phase, before deciding to vectorize the loop. */
1451 if (vect_print_dump_info (REPORT_DETAILS
))
1452 fprintf (vect_dump
, "force alignment");
1453 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
1454 DECL_USER_ALIGN (base
) = 1;
1457 /* At this point we assume that the base is aligned. */
1458 gcc_assert (base_aligned
1459 || (TREE_CODE (base
) == VAR_DECL
1460 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
1462 /* Modulo alignment. */
1463 misalign
= size_binop (TRUNC_MOD_EXPR
, misalign
, alignment
);
1465 if (!host_integerp (misalign
, 1))
1467 /* Negative or overflowed misalignment value. */
1468 if (vect_print_dump_info (REPORT_DETAILS
))
1469 fprintf (vect_dump
, "unexpected misalign value");
1473 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
1475 if (vect_print_dump_info (REPORT_DETAILS
))
1477 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
1478 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
1485 /* Function vect_compute_data_refs_alignment
1487 Compute the misalignment of data references in the loop.
1488 Return FALSE if a data reference is found that cannot be vectorized. */
1491 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
1493 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1494 struct data_reference
*dr
;
1497 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1498 if (!vect_compute_data_ref_alignment (dr
))
1505 /* Function vect_update_misalignment_for_peel
1507 DR - the data reference whose misalignment is to be adjusted.
1508 DR_PEEL - the data reference whose misalignment is being made
1509 zero in the vector loop by the peel.
1510 NPEEL - the number of iterations in the peel loop if the misalignment
1511 of DR_PEEL is known at compile time. */
1514 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1515 struct data_reference
*dr_peel
, int npeel
)
1518 VEC(dr_p
,heap
) *same_align_drs
;
1519 struct data_reference
*current_dr
;
1520 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1521 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1522 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1523 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1525 /* For interleaved data accesses the step in the loop must be multiplied by
1526 the size of the interleaving group. */
1527 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1528 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
1529 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
1530 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1532 /* It can be assumed that the data refs with the same alignment as dr_peel
1533 are aligned in the vector loop. */
1535 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1536 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
1538 if (current_dr
!= dr
)
1540 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1541 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1542 SET_DR_MISALIGNMENT (dr
, 0);
1546 if (known_alignment_for_access_p (dr
)
1547 && known_alignment_for_access_p (dr_peel
))
1549 int misal
= DR_MISALIGNMENT (dr
);
1550 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1551 misal
+= npeel
* dr_size
;
1552 misal
%= GET_MODE_SIZE (TYPE_MODE (vectype
));
1553 SET_DR_MISALIGNMENT (dr
, misal
);
1557 if (vect_print_dump_info (REPORT_DETAILS
))
1558 fprintf (vect_dump
, "Setting misalignment to -1.");
1559 SET_DR_MISALIGNMENT (dr
, -1);
1563 /* Function vect_verify_datarefs_alignment
1565 Return TRUE if all data references in the loop can be
1566 handled with respect to alignment. */
1569 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
)
1571 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1572 struct data_reference
*dr
;
1573 enum dr_alignment_support supportable_dr_alignment
;
1576 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1578 gimple stmt
= DR_STMT (dr
);
1579 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1581 /* For interleaving, only the alignment of the first access matters. */
1582 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1583 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1586 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1587 if (!supportable_dr_alignment
)
1589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
1591 if (DR_IS_READ (dr
))
1593 "not vectorized: unsupported unaligned load.");
1596 "not vectorized: unsupported unaligned store.");
1600 if (supportable_dr_alignment
!= dr_aligned
1601 && vect_print_dump_info (REPORT_ALIGNMENT
))
1602 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1608 /* Function vector_alignment_reachable_p
1610 Return true if vector alignment for DR is reachable by peeling
1611 a few loop iterations. Return false otherwise. */
1614 vector_alignment_reachable_p (struct data_reference
*dr
)
1616 gimple stmt
= DR_STMT (dr
);
1617 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1618 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1620 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1622 /* For interleaved access we peel only if number of iterations in
1623 the prolog loop ({VF - misalignment}), is a multiple of the
1624 number of the interleaved accesses. */
1625 int elem_size
, mis_in_elements
;
1626 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1628 /* FORNOW: handle only known alignment. */
1629 if (!known_alignment_for_access_p (dr
))
1632 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
1633 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1635 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1639 /* If misalignment is known at the compile time then allow peeling
1640 only if natural alignment is reachable through peeling. */
1641 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1643 HOST_WIDE_INT elmsize
=
1644 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1645 if (vect_print_dump_info (REPORT_DETAILS
))
1647 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1648 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1650 if (DR_MISALIGNMENT (dr
) % elmsize
)
1652 if (vect_print_dump_info (REPORT_DETAILS
))
1653 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1658 if (!known_alignment_for_access_p (dr
))
1660 tree type
= (TREE_TYPE (DR_REF (dr
)));
1661 tree ba
= DR_BASE_OBJECT (dr
);
1662 bool is_packed
= false;
1665 is_packed
= contains_packed_reference (ba
);
1667 if (vect_print_dump_info (REPORT_DETAILS
))
1668 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1669 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1678 /* Function vect_enhance_data_refs_alignment
1680 This pass will use loop versioning and loop peeling in order to enhance
1681 the alignment of data references in the loop.
1683 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1684 original loop is to be vectorized; Any other loops that are created by
1685 the transformations performed in this pass - are not supposed to be
1686 vectorized. This restriction will be relaxed.
1688 This pass will require a cost model to guide it whether to apply peeling
1689 or versioning or a combination of the two. For example, the scheme that
1690 intel uses when given a loop with several memory accesses, is as follows:
1691 choose one memory access ('p') which alignment you want to force by doing
1692 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1693 other accesses are not necessarily aligned, or (2) use loop versioning to
1694 generate one loop in which all accesses are aligned, and another loop in
1695 which only 'p' is necessarily aligned.
1697 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1698 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1699 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1701 Devising a cost model is the most critical aspect of this work. It will
1702 guide us on which access to peel for, whether to use loop versioning, how
1703 many versions to create, etc. The cost model will probably consist of
1704 generic considerations as well as target specific considerations (on
1705 powerpc for example, misaligned stores are more painful than misaligned
1708 Here are the general steps involved in alignment enhancements:
1710 -- original loop, before alignment analysis:
1711 for (i=0; i<N; i++){
1712 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1713 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1716 -- After vect_compute_data_refs_alignment:
1717 for (i=0; i<N; i++){
1718 x = q[i]; # DR_MISALIGNMENT(q) = 3
1719 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1722 -- Possibility 1: we do loop versioning:
1724 for (i=0; i<N; i++){ # loop 1A
1725 x = q[i]; # DR_MISALIGNMENT(q) = 3
1726 p[i] = y; # DR_MISALIGNMENT(p) = 0
1730 for (i=0; i<N; i++){ # loop 1B
1731 x = q[i]; # DR_MISALIGNMENT(q) = 3
1732 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1736 -- Possibility 2: we do loop peeling:
1737 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1741 for (i = 3; i < N; i++){ # loop 2A
1742 x = q[i]; # DR_MISALIGNMENT(q) = 0
1743 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1746 -- Possibility 3: combination of loop peeling and versioning:
1747 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1752 for (i = 3; i<N; i++){ # loop 3A
1753 x = q[i]; # DR_MISALIGNMENT(q) = 0
1754 p[i] = y; # DR_MISALIGNMENT(p) = 0
1758 for (i = 3; i<N; i++){ # loop 3B
1759 x = q[i]; # DR_MISALIGNMENT(q) = 0
1760 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1764 These loops are later passed to loop_transform to be vectorized. The
1765 vectorizer will use the alignment information to guide the transformation
1766 (whether to generate regular loads/stores, or with special handling for
1770 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1772 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1773 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1774 enum dr_alignment_support supportable_dr_alignment
;
1775 struct data_reference
*dr0
= NULL
;
1776 struct data_reference
*dr
;
1778 bool do_peeling
= false;
1779 bool do_versioning
= false;
1782 stmt_vec_info stmt_info
;
1783 int vect_versioning_for_alias_required
;
1785 if (vect_print_dump_info (REPORT_DETAILS
))
1786 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1788 /* While cost model enhancements are expected in the future, the high level
1789 view of the code at this time is as follows:
1791 A) If there is a misaligned write then see if peeling to align this write
1792 can make all data references satisfy vect_supportable_dr_alignment.
1793 If so, update data structures as needed and return true. Note that
1794 at this time vect_supportable_dr_alignment is known to return false
1795 for a misaligned write.
1797 B) If peeling wasn't possible and there is a data reference with an
1798 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1799 then see if loop versioning checks can be used to make all data
1800 references satisfy vect_supportable_dr_alignment. If so, update
1801 data structures as needed and return true.
1803 C) If neither peeling nor versioning were successful then return false if
1804 any data reference does not satisfy vect_supportable_dr_alignment.
1806 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1808 Note, Possibility 3 above (which is peeling and versioning together) is not
1809 being done at this time. */
1811 /* (1) Peeling to force alignment. */
1813 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1815 + How many accesses will become aligned due to the peeling
1816 - How many accesses will become unaligned due to the peeling,
1817 and the cost of misaligned accesses.
1818 - The cost of peeling (the extra runtime checks, the increase
1821 The scheme we use FORNOW: peel to force the alignment of the first
1822 misaligned store in the loop.
1823 Rationale: misaligned stores are not yet supported.
1825 TODO: Use a cost model. */
1827 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1829 stmt
= DR_STMT (dr
);
1830 stmt_info
= vinfo_for_stmt (stmt
);
1832 /* For interleaving, only the alignment of the first access
1834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1835 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1838 if (!DR_IS_READ (dr
) && !aligned_access_p (dr
))
1840 do_peeling
= vector_alignment_reachable_p (dr
);
1843 if (!do_peeling
&& vect_print_dump_info (REPORT_DETAILS
))
1844 fprintf (vect_dump
, "vector alignment may not be reachable");
1849 vect_versioning_for_alias_required
=
1850 (VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
)) > 0);
1852 /* Temporarily, if versioning for alias is required, we disable peeling
1853 until we support peeling and versioning. Often peeling for alignment
1854 will require peeling for loop-bound, which in turn requires that we
1855 know how to adjust the loop ivs after the loop. */
1856 if (vect_versioning_for_alias_required
1857 || !vect_can_advance_ivs_p (loop_vinfo
)
1858 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1865 gimple stmt
= DR_STMT (dr0
);
1866 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1867 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1868 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1870 if (known_alignment_for_access_p (dr0
))
1872 /* Since it's known at compile time, compute the number of iterations
1873 in the peeled loop (the peeling factor) for use in updating
1874 DR_MISALIGNMENT values. The peeling factor is the vectorization
1875 factor minus the misalignment as an element count. */
1876 mis
= DR_MISALIGNMENT (dr0
);
1877 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1878 npeel
= nelements
- mis
;
1880 /* For interleaved data access every iteration accesses all the
1881 members of the group, therefore we divide the number of iterations
1882 by the group size. */
1883 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1884 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1885 npeel
/= DR_GROUP_SIZE (stmt_info
);
1887 if (vect_print_dump_info (REPORT_DETAILS
))
1888 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1891 /* Ensure that all data refs can be vectorized after the peel. */
1892 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1894 int save_misalignment
;
1899 stmt
= DR_STMT (dr
);
1900 stmt_info
= vinfo_for_stmt (stmt
);
1901 /* For interleaving, only the alignment of the first access
1903 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1904 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1907 save_misalignment
= DR_MISALIGNMENT (dr
);
1908 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1909 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1910 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1912 if (!supportable_dr_alignment
)
1921 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1922 If the misalignment of DR_i is identical to that of dr0 then set
1923 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1924 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1925 by the peeling factor times the element size of DR_i (MOD the
1926 vectorization factor times the size). Otherwise, the
1927 misalignment of DR_i must be set to unknown. */
1928 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1930 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1932 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1933 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1934 SET_DR_MISALIGNMENT (dr0
, 0);
1935 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1936 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1938 if (vect_print_dump_info (REPORT_DETAILS
))
1939 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1941 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
1948 /* (2) Versioning to force alignment. */
1950 /* Try versioning if:
1951 1) flag_tree_vect_loop_version is TRUE
1952 2) optimize_size is FALSE
1953 3) there is at least one unsupported misaligned data ref with an unknown
1955 4) all misaligned data refs with a known misalignment are supported, and
1956 5) the number of runtime alignment checks is within reason. */
1959 flag_tree_vect_loop_version
1961 && (!loop
->inner
); /* FORNOW */
1965 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1967 stmt
= DR_STMT (dr
);
1968 stmt_info
= vinfo_for_stmt (stmt
);
1970 /* For interleaving, only the alignment of the first access
1972 if (aligned_access_p (dr
)
1973 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1974 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1977 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
);
1979 if (!supportable_dr_alignment
)
1985 if (known_alignment_for_access_p (dr
)
1986 || VEC_length (gimple
,
1987 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1988 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1990 do_versioning
= false;
1994 stmt
= DR_STMT (dr
);
1995 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1996 gcc_assert (vectype
);
1998 /* The rightmost bits of an aligned address must be zeros.
1999 Construct the mask needed for this test. For example,
2000 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2001 mask must be 15 = 0xf. */
2002 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
2004 /* FORNOW: use the same mask to test all potentially unaligned
2005 references in the loop. The vectorizer currently supports
2006 a single vector size, see the reference to
2007 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2008 vectorization factor is computed. */
2009 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
2010 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
2011 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
2012 VEC_safe_push (gimple
, heap
,
2013 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
2018 /* Versioning requires at least one misaligned data reference. */
2019 if (VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
)) == 0)
2020 do_versioning
= false;
2021 else if (!do_versioning
)
2022 VEC_truncate (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
2027 VEC(gimple
,heap
) *may_misalign_stmts
2028 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
2031 /* It can now be assumed that the data references in the statements
2032 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2033 of the loop being vectorized. */
2034 for (i
= 0; VEC_iterate (gimple
, may_misalign_stmts
, i
, stmt
); i
++)
2036 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2037 dr
= STMT_VINFO_DATA_REF (stmt_info
);
2038 SET_DR_MISALIGNMENT (dr
, 0);
2039 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2040 fprintf (vect_dump
, "Alignment of access forced using versioning.");
2043 if (vect_print_dump_info (REPORT_DETAILS
))
2044 fprintf (vect_dump
, "Versioning for alignment will be applied.");
2046 /* Peeling and versioning can't be done together at this time. */
2047 gcc_assert (! (do_peeling
&& do_versioning
));
2049 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
2054 /* This point is reached if neither peeling nor versioning is being done. */
2055 gcc_assert (! (do_peeling
|| do_versioning
));
2057 stat
= vect_verify_datarefs_alignment (loop_vinfo
);
2062 /* Function vect_analyze_data_refs_alignment
2064 Analyze the alignment of the data-references in the loop.
2065 Return FALSE if a data reference is found that cannot be vectorized. */
2068 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
2070 if (vect_print_dump_info (REPORT_DETAILS
))
2071 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
2073 if (!vect_compute_data_refs_alignment (loop_vinfo
))
2075 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2077 "not vectorized: can't calculate alignment for data ref.");
2085 /* Analyze groups of strided accesses: check that DR belongs to a group of
2086 strided accesses of legal size, step, etc. Detect gaps, single element
2087 interleaving, and other special cases. Set strided access info.
2088 Collect groups of strided stores for further use in SLP analysis. */
2091 vect_analyze_group_access (struct data_reference
*dr
)
2093 tree step
= DR_STEP (dr
);
2094 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2095 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2096 gimple stmt
= DR_STMT (dr
);
2097 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2098 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2099 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2100 HOST_WIDE_INT stride
;
2101 bool slp_impossible
= false;
2103 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2104 interleaving group (including gaps). */
2105 stride
= dr_step
/ type_size
;
2107 /* Not consecutive access is possible only if it is a part of interleaving. */
2108 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
2110 /* Check if it this DR is a part of interleaving, and is a single
2111 element of the group that is accessed in the loop. */
2113 /* Gaps are supported only for loads. STEP must be a multiple of the type
2114 size. The size of the group must be a power of 2. */
2116 && (dr_step
% type_size
) == 0
2118 && exact_log2 (stride
) != -1)
2120 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
2121 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2122 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2124 fprintf (vect_dump
, "Detected single element interleaving %d ",
2125 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)));
2126 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2127 fprintf (vect_dump
, " step ");
2128 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2132 if (vect_print_dump_info (REPORT_DETAILS
))
2133 fprintf (vect_dump
, "not consecutive access");
2137 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
2139 /* First stmt in the interleaving chain. Check the chain. */
2140 gimple next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
2141 struct data_reference
*data_ref
= dr
;
2142 unsigned int count
= 1;
2144 tree prev_init
= DR_INIT (data_ref
);
2146 HOST_WIDE_INT diff
, count_in_bytes
;
2150 /* Skip same data-refs. In case that two or more stmts share data-ref
2151 (supported only for loads), we vectorize only the first stmt, and
2152 the rest get their vectorized loads from the first one. */
2153 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2154 DR_INIT (STMT_VINFO_DATA_REF (
2155 vinfo_for_stmt (next
)))))
2157 if (!DR_IS_READ (data_ref
))
2159 if (vect_print_dump_info (REPORT_DETAILS
))
2160 fprintf (vect_dump
, "Two store stmts share the same dr.");
2164 /* Check that there is no load-store dependencies for this loads
2165 to prevent a case of load-store-load to the same location. */
2166 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2167 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2169 if (vect_print_dump_info (REPORT_DETAILS
))
2171 "READ_WRITE dependence in interleaving.");
2175 /* For load use the same data-ref load. */
2176 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2179 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2184 /* Check that all the accesses have the same STEP. */
2185 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2186 if (tree_int_cst_compare (step
, next_step
))
2188 if (vect_print_dump_info (REPORT_DETAILS
))
2189 fprintf (vect_dump
, "not consecutive access in interleaving");
2193 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2194 /* Check that the distance between two accesses is equal to the type
2195 size. Otherwise, we have gaps. */
2196 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2197 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2200 /* FORNOW: SLP of accesses with gaps is not supported. */
2201 slp_impossible
= true;
2202 if (!DR_IS_READ (data_ref
))
2204 if (vect_print_dump_info (REPORT_DETAILS
))
2205 fprintf (vect_dump
, "interleaved store with gaps");
2210 /* Store the gap from the previous member of the group. If there is no
2211 gap in the access, DR_GROUP_GAP is always 1. */
2212 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2214 prev_init
= DR_INIT (data_ref
);
2215 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2216 /* Count the number of data-refs in the chain. */
2220 /* COUNT is the number of accesses found, we multiply it by the size of
2221 the type to get COUNT_IN_BYTES. */
2222 count_in_bytes
= type_size
* count
;
2224 /* Check that the size of the interleaving is not greater than STEP. */
2225 if (dr_step
< count_in_bytes
)
2227 if (vect_print_dump_info (REPORT_DETAILS
))
2229 fprintf (vect_dump
, "interleaving size is greater than step for ");
2230 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2235 /* Check that the size of the interleaving is equal to STEP for stores,
2236 i.e., that there are no gaps. */
2237 if (dr_step
!= count_in_bytes
)
2239 if (DR_IS_READ (dr
))
2241 slp_impossible
= true;
2242 /* There is a gap after the last load in the group. This gap is a
2243 difference between the stride and the number of elements. When
2244 there is no gap, this difference should be 0. */
2245 DR_GROUP_GAP (vinfo_for_stmt (stmt
)) = stride
- count
;
2249 if (vect_print_dump_info (REPORT_DETAILS
))
2250 fprintf (vect_dump
, "interleaved store with gaps");
2255 /* Check that STEP is a multiple of type size. */
2256 if ((dr_step
% type_size
) != 0)
2258 if (vect_print_dump_info (REPORT_DETAILS
))
2260 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2261 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2262 fprintf (vect_dump
, " size ");
2263 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2269 /* FORNOW: we handle only interleaving that is a power of 2.
2270 We don't fail here if it may be still possible to vectorize the
2271 group using SLP. If not, the size of the group will be checked in
2272 vect_analyze_operations, and the vectorization will fail. */
2273 if (exact_log2 (stride
) == -1)
2275 if (vect_print_dump_info (REPORT_DETAILS
))
2276 fprintf (vect_dump
, "interleaving is not a power of 2");
2281 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2282 if (vect_print_dump_info (REPORT_DETAILS
))
2283 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
2285 /* SLP: create an SLP data structure for every interleaving group of
2286 stores for further analysis in vect_analyse_slp. */
2287 if (!DR_IS_READ (dr
) && !slp_impossible
)
2288 VEC_safe_push (gimple
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
), stmt
);
2295 /* Analyze the access pattern of the data-reference DR.
2296 In case of non-consecutive accesses call vect_analyze_group_access() to
2297 analyze groups of strided accesses. */
2300 vect_analyze_data_ref_access (struct data_reference
*dr
)
2302 tree step
= DR_STEP (dr
);
2303 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2304 gimple stmt
= DR_STMT (dr
);
2305 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2306 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2307 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2308 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2312 if (vect_print_dump_info (REPORT_DETAILS
))
2313 fprintf (vect_dump
, "bad data-ref access");
2317 /* Don't allow invariant accesses. */
2321 if (nested_in_vect_loop_p (loop
, stmt
))
2323 /* Interleaved accesses are not yet supported within outer-loop
2324 vectorization for references in the inner-loop. */
2325 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
2327 /* For the rest of the analysis we use the outer-loop step. */
2328 step
= STMT_VINFO_DR_STEP (stmt_info
);
2329 dr_step
= TREE_INT_CST_LOW (step
);
2333 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2334 fprintf (vect_dump
, "zero step in outer loop.");
2335 if (DR_IS_READ (dr
))
2343 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
2345 /* Mark that it is not interleaving. */
2346 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
2350 if (nested_in_vect_loop_p (loop
, stmt
))
2352 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2353 fprintf (vect_dump
, "strided access in outer loop.");
2357 /* Not consecutive access - check if it's a part of interleaving group. */
2358 return vect_analyze_group_access (dr
);
2362 /* Function vect_analyze_data_ref_accesses.
2364 Analyze the access pattern of all the data references in the loop.
2366 FORNOW: the only access pattern that is considered vectorizable is a
2367 simple step 1 (consecutive) access.
2369 FORNOW: handle only arrays and pointer accesses. */
2372 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
2375 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2376 struct data_reference
*dr
;
2378 if (vect_print_dump_info (REPORT_DETAILS
))
2379 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2381 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2382 if (!vect_analyze_data_ref_access (dr
))
2384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
2385 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2392 /* Function vect_prune_runtime_alias_test_list.
2394 Prune a list of ddrs to be tested at run-time by versioning for alias.
2395 Return FALSE if resulting list of ddrs is longer then allowed by
2396 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2399 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2401 VEC (ddr_p
, heap
) * ddrs
=
2402 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2405 if (vect_print_dump_info (REPORT_DETAILS
))
2406 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
2408 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
2413 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
2416 for (j
= 0; j
< i
; j
++)
2418 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
2420 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2422 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2424 fprintf (vect_dump
, "found equal ranges ");
2425 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
2426 fprintf (vect_dump
, ", ");
2427 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
2428 fprintf (vect_dump
, " and ");
2429 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
2430 fprintf (vect_dump
, ", ");
2431 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
2440 VEC_ordered_remove (ddr_p
, ddrs
, i
);
2446 if (VEC_length (ddr_p
, ddrs
) >
2447 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2449 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2452 "disable versioning for alias - max number of generated "
2453 "checks exceeded.");
2456 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
2464 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2467 vect_free_slp_tree (slp_tree node
)
2472 if (SLP_TREE_LEFT (node
))
2473 vect_free_slp_tree (SLP_TREE_LEFT (node
));
2475 if (SLP_TREE_RIGHT (node
))
2476 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
2478 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
2480 if (SLP_TREE_VEC_STMTS (node
))
2481 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
2487 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
2488 they are of a legal type and that they match the defs of the first stmt of
2489 the SLP group (stored in FIRST_STMT_...). */
2492 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, slp_tree slp_node
,
2493 gimple stmt
, VEC (gimple
, heap
) **def_stmts0
,
2494 VEC (gimple
, heap
) **def_stmts1
,
2495 enum vect_def_type
*first_stmt_dt0
,
2496 enum vect_def_type
*first_stmt_dt1
,
2497 tree
*first_stmt_def0_type
,
2498 tree
*first_stmt_def1_type
,
2499 tree
*first_stmt_const_oprnd
,
2500 int ncopies_for_cost
)
2503 unsigned int i
, number_of_oprnds
;
2506 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2507 stmt_vec_info stmt_info
=
2508 vinfo_for_stmt (VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
2509 enum gimple_rhs_class rhs_class
;
2511 rhs_class
= get_gimple_rhs_class (gimple_assign_rhs_code (stmt
));
2512 number_of_oprnds
= gimple_num_ops (stmt
) - 1; /* RHS only */
2514 for (i
= 0; i
< number_of_oprnds
; i
++)
2516 oprnd
= gimple_op (stmt
, i
+ 1);
2518 if (!vect_is_simple_use (oprnd
, loop_vinfo
, &def_stmt
, &def
, &dt
[i
])
2519 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
2521 if (vect_print_dump_info (REPORT_SLP
))
2523 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
2524 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
2530 if (!*first_stmt_dt0
)
2532 /* op0 of the first stmt of the group - store its info. */
2533 *first_stmt_dt0
= dt
[i
];
2535 *first_stmt_def0_type
= TREE_TYPE (def
);
2537 *first_stmt_const_oprnd
= oprnd
;
2539 /* Analyze costs (for the first stmt of the group only). */
2540 if (rhs_class
!= GIMPLE_SINGLE_RHS
)
2541 /* Not memory operation (we don't call this functions for loads). */
2542 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
2545 vect_model_store_cost (stmt_info
, ncopies_for_cost
, dt
[0], slp_node
);
2550 if (!*first_stmt_dt1
&& i
== 1)
2552 /* op1 of the first stmt of the group - store its info. */
2553 *first_stmt_dt1
= dt
[i
];
2555 *first_stmt_def1_type
= TREE_TYPE (def
);
2558 /* We assume that the stmt contains only one constant
2559 operand. We fail otherwise, to be on the safe side. */
2560 if (*first_stmt_const_oprnd
)
2562 if (vect_print_dump_info (REPORT_SLP
))
2563 fprintf (vect_dump
, "Build SLP failed: two constant "
2567 *first_stmt_const_oprnd
= oprnd
;
2572 /* Not first stmt of the group, check that the def-stmt/s match
2573 the def-stmt/s of the first stmt. */
2575 && (*first_stmt_dt0
!= dt
[i
]
2576 || (*first_stmt_def0_type
&& def
2577 && *first_stmt_def0_type
!= TREE_TYPE (def
))))
2579 && (*first_stmt_dt1
!= dt
[i
]
2580 || (*first_stmt_def1_type
&& def
2581 && *first_stmt_def1_type
!= TREE_TYPE (def
))))
2583 && TREE_TYPE (*first_stmt_const_oprnd
)
2584 != TREE_TYPE (oprnd
)))
2586 if (vect_print_dump_info (REPORT_SLP
))
2587 fprintf (vect_dump
, "Build SLP failed: different types ");
2594 /* Check the types of the definitions. */
2597 case vect_constant_def
:
2598 case vect_invariant_def
:
2603 VEC_safe_push (gimple
, heap
, *def_stmts0
, def_stmt
);
2605 VEC_safe_push (gimple
, heap
, *def_stmts1
, def_stmt
);
2609 /* FORNOW: Not supported. */
2610 if (vect_print_dump_info (REPORT_SLP
))
2612 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
2613 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
2624 /* Recursively build an SLP tree starting from NODE.
2625 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2626 permutation or are of unsupported types of operation. Otherwise, return
2628 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2629 in the case of multiple types for now. */
2632 vect_build_slp_tree (loop_vec_info loop_vinfo
, slp_tree
*node
,
2633 unsigned int group_size
, bool *slp_impossible
,
2634 int *inside_cost
, int *outside_cost
,
2635 int ncopies_for_cost
)
2637 VEC (gimple
, heap
) *def_stmts0
= VEC_alloc (gimple
, heap
, group_size
);
2638 VEC (gimple
, heap
) *def_stmts1
= VEC_alloc (gimple
, heap
, group_size
);
2640 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
2641 gimple stmt
= VEC_index (gimple
, stmts
, 0);
2642 enum vect_def_type first_stmt_dt0
= 0, first_stmt_dt1
= 0;
2643 enum tree_code first_stmt_code
= 0, rhs_code
;
2644 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
2646 gimple prev_stmt
= NULL
;
2647 bool stop_recursion
= false, need_same_oprnds
= false;
2648 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
2649 unsigned int vectorization_factor
= 0, ncopies
;
2652 enum machine_mode optab_op2_mode
;
2653 enum machine_mode vec_mode
;
2654 tree first_stmt_const_oprnd
= NULL_TREE
;
2655 struct data_reference
*first_dr
;
2657 /* For every stmt in NODE find its def stmt/s. */
2658 for (i
= 0; VEC_iterate (gimple
, stmts
, i
, stmt
); i
++)
2660 if (vect_print_dump_info (REPORT_SLP
))
2662 fprintf (vect_dump
, "Build SLP for ");
2663 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2666 lhs
= gimple_get_lhs (stmt
);
2667 if (lhs
== NULL_TREE
)
2669 if (vect_print_dump_info (REPORT_SLP
))
2672 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
2673 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2679 scalar_type
= TREE_TYPE (lhs
);
2680 vectype
= get_vectype_for_scalar_type (scalar_type
);
2683 if (vect_print_dump_info (REPORT_SLP
))
2685 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
2686 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
2691 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
2692 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2693 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
2697 if (vect_print_dump_info (REPORT_SLP
))
2698 fprintf (vect_dump
, "SLP failed - multiple types ");
2700 *slp_impossible
= true;
2704 if (is_gimple_call (stmt
))
2705 rhs_code
= CALL_EXPR
;
2707 rhs_code
= gimple_assign_rhs_code (stmt
);
2709 /* Check the operation. */
2712 first_stmt_code
= rhs_code
;
2714 /* Shift arguments should be equal in all the packed stmts for a
2715 vector shift with scalar shift operand. */
2716 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
2717 || rhs_code
== LROTATE_EXPR
2718 || rhs_code
== RROTATE_EXPR
)
2720 vec_mode
= TYPE_MODE (vectype
);
2722 /* First see if we have a vector/vector shift. */
2723 optab
= optab_for_tree_code (rhs_code
, vectype
,
2727 || (optab
->handlers
[(int) vec_mode
].insn_code
2728 == CODE_FOR_nothing
))
2730 /* No vector/vector shift, try for a vector/scalar shift. */
2731 optab
= optab_for_tree_code (rhs_code
, vectype
,
2736 if (vect_print_dump_info (REPORT_SLP
))
2737 fprintf (vect_dump
, "Build SLP failed: no optab.");
2740 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
2741 if (icode
== CODE_FOR_nothing
)
2743 if (vect_print_dump_info (REPORT_SLP
))
2745 "Build SLP failed: op not supported by target.");
2748 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2749 if (!VECTOR_MODE_P (optab_op2_mode
))
2751 need_same_oprnds
= true;
2752 first_op1
= gimple_assign_rhs2 (stmt
);
2759 if (first_stmt_code
!= rhs_code
2760 && (first_stmt_code
!= IMAGPART_EXPR
2761 || rhs_code
!= REALPART_EXPR
)
2762 && (first_stmt_code
!= REALPART_EXPR
2763 || rhs_code
!= IMAGPART_EXPR
))
2765 if (vect_print_dump_info (REPORT_SLP
))
2768 "Build SLP failed: different operation in stmt ");
2769 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2775 if (need_same_oprnds
2776 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
2778 if (vect_print_dump_info (REPORT_SLP
))
2781 "Build SLP failed: different shift arguments in ");
2782 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2789 /* Strided store or load. */
2790 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
2792 if (REFERENCE_CLASS_P (lhs
))
2795 if (!vect_get_and_check_slp_defs (loop_vinfo
, *node
, stmt
,
2796 &def_stmts0
, &def_stmts1
,
2799 &first_stmt_def0_type
,
2800 &first_stmt_def1_type
,
2801 &first_stmt_const_oprnd
,
2810 /* First stmt of the SLP group should be the first load of
2811 the interleaving loop if data permutation is not allowed.
2812 Check that there is no gap between the loads. */
2813 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != stmt
2814 || DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
2816 /* FORNOW: data permutations and gaps in loads are not
2818 if (vect_print_dump_info (REPORT_SLP
))
2820 fprintf (vect_dump
, "Build SLP failed: strided "
2821 " loads need permutation or have gaps ");
2822 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2828 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2829 if (vect_supportable_dr_alignment (first_dr
)
2830 == dr_unaligned_unsupported
)
2832 if (vect_print_dump_info (REPORT_SLP
))
2834 fprintf (vect_dump
, "Build SLP failed: unsupported "
2835 " unaligned load ");
2836 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2842 /* Analyze costs (for the first stmt in the group). */
2843 vect_model_load_cost (vinfo_for_stmt (stmt
),
2844 ncopies_for_cost
, *node
);
2848 /* Check that we have consecutive loads from interleaving
2849 chain and that there is no gap between the loads. */
2850 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt
)) != stmt
2851 || DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 1)
2853 /* FORNOW: data permutations and gaps in loads are not
2855 if (vect_print_dump_info (REPORT_SLP
))
2857 fprintf (vect_dump
, "Build SLP failed: strided "
2858 " loads need permutation or have gaps ");
2859 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2867 /* We stop the tree when we reach a group of loads. */
2868 stop_recursion
= true;
2871 } /* Strided access. */
2874 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
2876 /* Not strided load. */
2877 if (vect_print_dump_info (REPORT_SLP
))
2879 fprintf (vect_dump
, "Build SLP failed: not strided load ");
2880 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2883 /* FORNOW: Not strided loads are not supported. */
2887 /* Not memory operation. */
2888 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
2889 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
)
2891 if (vect_print_dump_info (REPORT_SLP
))
2893 fprintf (vect_dump
, "Build SLP failed: operation");
2894 fprintf (vect_dump
, " unsupported ");
2895 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2901 /* Find the def-stmts. */
2902 if (!vect_get_and_check_slp_defs (loop_vinfo
, *node
, stmt
,
2903 &def_stmts0
, &def_stmts1
,
2904 &first_stmt_dt0
, &first_stmt_dt1
,
2905 &first_stmt_def0_type
,
2906 &first_stmt_def1_type
,
2907 &first_stmt_const_oprnd
,
2913 /* Add the costs of the node to the overall instance costs. */
2914 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
2915 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
2917 /* Strided loads were reached - stop the recursion. */
2921 /* Create SLP_TREE nodes for the definition node/s. */
2922 if (first_stmt_dt0
== vect_loop_def
)
2924 slp_tree left_node
= XNEW (struct _slp_tree
);
2925 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
2926 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
2927 SLP_TREE_LEFT (left_node
) = NULL
;
2928 SLP_TREE_RIGHT (left_node
) = NULL
;
2929 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
2930 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
2931 if (!vect_build_slp_tree (loop_vinfo
, &left_node
, group_size
,
2932 slp_impossible
, inside_cost
, outside_cost
,
2936 SLP_TREE_LEFT (*node
) = left_node
;
2939 if (first_stmt_dt1
== vect_loop_def
)
2941 slp_tree right_node
= XNEW (struct _slp_tree
);
2942 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
2943 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
2944 SLP_TREE_LEFT (right_node
) = NULL
;
2945 SLP_TREE_RIGHT (right_node
) = NULL
;
2946 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
2947 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
2948 if (!vect_build_slp_tree (loop_vinfo
, &right_node
, group_size
,
2949 slp_impossible
, inside_cost
, outside_cost
,
2953 SLP_TREE_RIGHT (*node
) = right_node
;
2961 vect_print_slp_tree (slp_tree node
)
2969 fprintf (vect_dump
, "node ");
2970 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
2972 fprintf (vect_dump
, "\n\tstmt %d ", i
);
2973 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2975 fprintf (vect_dump
, "\n");
2977 vect_print_slp_tree (SLP_TREE_LEFT (node
));
2978 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
2982 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2983 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2984 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2985 stmts in NODE are to be marked. */
2988 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
2996 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
2997 if (j
< 0 || i
== j
)
2998 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
3000 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
3001 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
3005 /* Analyze an SLP instance starting from a group of strided stores. Call
3006 vect_build_slp_tree to build a tree of packed stmts if possible.
3007 Return FALSE if it's impossible to SLP any stmt in the loop. */
3010 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, gimple stmt
)
3012 slp_instance new_instance
;
3013 slp_tree node
= XNEW (struct _slp_tree
);
3014 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
3015 unsigned int unrolling_factor
= 1, nunits
;
3016 tree vectype
, scalar_type
;
3018 unsigned int vectorization_factor
= 0, ncopies
;
3019 bool slp_impossible
= false;
3020 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
;
3022 /* FORNOW: multiple types are not supported. */
3023 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))));
3024 vectype
= get_vectype_for_scalar_type (scalar_type
);
3027 if (vect_print_dump_info (REPORT_SLP
))
3029 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
3030 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
3035 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3036 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3037 ncopies
= vectorization_factor
/ nunits
;
3040 if (vect_print_dump_info (REPORT_SLP
))
3041 fprintf (vect_dump
, "SLP failed - multiple types ");
3046 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3047 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (gimple
, heap
, group_size
);
3049 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3052 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
3053 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
3056 SLP_TREE_VEC_STMTS (node
) = NULL
;
3057 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
3058 SLP_TREE_LEFT (node
) = NULL
;
3059 SLP_TREE_RIGHT (node
) = NULL
;
3060 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
3061 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
3063 /* Calculate the unrolling factor. */
3064 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
3066 /* Calculate the number of vector stmts to create based on the unrolling
3067 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3068 GROUP_SIZE / NUNITS otherwise. */
3069 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
3071 /* Build the tree for the SLP instance. */
3072 if (vect_build_slp_tree (loop_vinfo
, &node
, group_size
, &slp_impossible
,
3073 &inside_cost
, &outside_cost
, ncopies_for_cost
))
3075 /* Create a new SLP instance. */
3076 new_instance
= XNEW (struct _slp_instance
);
3077 SLP_INSTANCE_TREE (new_instance
) = node
;
3078 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
3079 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
3080 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
3081 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
3082 VEC_safe_push (slp_instance
, heap
, LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
3084 if (vect_print_dump_info (REPORT_SLP
))
3085 vect_print_slp_tree (node
);
3090 /* Failed to SLP. */
3091 /* Free the allocated memory. */
3092 vect_free_slp_tree (node
);
3097 /* SLP failed for this instance, but it is still possible to SLP other stmts
3103 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3104 trees of packed scalar stmts if SLP is possible. */
3107 vect_analyze_slp (loop_vec_info loop_vinfo
)
3110 VEC (gimple
, heap
) *strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
3113 if (vect_print_dump_info (REPORT_SLP
))
3114 fprintf (vect_dump
, "=== vect_analyze_slp ===");
3116 for (i
= 0; VEC_iterate (gimple
, strided_stores
, i
, store
); i
++)
3117 if (!vect_analyze_slp_instance (loop_vinfo
, store
))
3119 /* SLP failed. No instance can be SLPed in the loop. */
3120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3121 fprintf (vect_dump
, "SLP failed.");
3130 /* For each possible SLP instance decide whether to SLP it and calculate overall
3131 unrolling factor needed to SLP the loop. */
3134 vect_make_slp_decision (loop_vec_info loop_vinfo
)
3136 unsigned int i
, unrolling_factor
= 1;
3137 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3138 slp_instance instance
;
3139 int decided_to_slp
= 0;
3141 if (vect_print_dump_info (REPORT_SLP
))
3142 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
3144 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
3146 /* FORNOW: SLP if you can. */
3147 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
3148 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
3150 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3151 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3152 loop-based vectorization. Such stmts will be marked as HYBRID. */
3153 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3157 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
3159 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
3160 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
3161 decided_to_slp
, unrolling_factor
);
3165 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3166 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3169 vect_detect_hybrid_slp_stmts (slp_tree node
)
3173 imm_use_iterator imm_iter
;
3179 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
3180 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
3181 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
3182 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
3183 if (vinfo_for_stmt (use_stmt
)
3184 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt
)))
3185 vect_mark_slp_stmts (node
, hybrid
, i
);
3187 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
3188 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
3192 /* Find stmts that must be both vectorized and SLPed. */
3195 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
3198 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3199 slp_instance instance
;
3201 if (vect_print_dump_info (REPORT_SLP
))
3202 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
3204 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
3205 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
3209 /* Function vect_analyze_data_refs.
3211 Find all the data references in the loop.
3213 The general structure of the analysis of data refs in the vectorizer is as
3215 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3216 find and analyze all data-refs in the loop and their dependences.
3217 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3218 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3219 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3224 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
3226 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3228 VEC (data_reference_p
, heap
) *datarefs
;
3229 struct data_reference
*dr
;
3232 if (vect_print_dump_info (REPORT_DETAILS
))
3233 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
3235 compute_data_dependences_for_loop (loop
, true,
3236 &LOOP_VINFO_DATAREFS (loop_vinfo
),
3237 &LOOP_VINFO_DDRS (loop_vinfo
));
3239 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3240 from stmt_vec_info struct to DR and vectype. */
3241 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
3243 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
3246 stmt_vec_info stmt_info
;
3248 tree base
, offset
, init
;
3250 if (!dr
|| !DR_REF (dr
))
3252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3253 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
3257 stmt
= DR_STMT (dr
);
3258 stmt_info
= vinfo_for_stmt (stmt
);
3260 /* Check that analysis of the data-ref succeeded. */
3261 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
3264 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3266 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
3267 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3272 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
3274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3275 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
3280 if (!DR_SYMBOL_TAG (dr
))
3282 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3284 fprintf (vect_dump
, "not vectorized: no memory tag for ");
3285 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
3290 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3291 offset
= unshare_expr (DR_OFFSET (dr
));
3292 init
= unshare_expr (DR_INIT (dr
));
3294 /* Update DR field in stmt_vec_info struct. */
3295 bb
= gimple_bb (stmt
);
3297 /* If the dataref is in an inner-loop of the loop that is considered for
3298 for vectorization, we also want to analyze the access relative to
3299 the outer-loop (DR contains information only relative to the
3300 inner-most enclosing loop). We do that by building a reference to the
3301 first location accessed by the inner-loop, and analyze it relative to
3303 if (nested_in_vect_loop_p (loop
, stmt
))
3305 tree outer_step
, outer_base
, outer_init
;
3306 HOST_WIDE_INT pbitsize
, pbitpos
;
3308 enum machine_mode pmode
;
3309 int punsignedp
, pvolatilep
;
3310 affine_iv base_iv
, offset_iv
;
3313 /* Build a reference to the first location accessed by the
3314 inner-loop: *(BASE+INIT). (The first location is actually
3315 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3316 tree inner_base
= build_fold_indirect_ref
3317 (fold_build2 (POINTER_PLUS_EXPR
,
3318 TREE_TYPE (base
), base
,
3319 fold_convert (sizetype
, init
)));
3321 if (vect_print_dump_info (REPORT_DETAILS
))
3323 fprintf (dump_file
, "analyze in outer-loop: ");
3324 print_generic_expr (dump_file
, inner_base
, TDF_SLIM
);
3327 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3328 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3329 gcc_assert (outer_base
!= NULL_TREE
);
3331 if (pbitpos
% BITS_PER_UNIT
!= 0)
3333 if (vect_print_dump_info (REPORT_DETAILS
))
3334 fprintf (dump_file
, "failed: bit offset alignment.\n");
3338 outer_base
= build_fold_addr_expr (outer_base
);
3339 if (!simple_iv (loop
, stmt
, outer_base
, &base_iv
, false))
3341 if (vect_print_dump_info (REPORT_DETAILS
))
3342 fprintf (dump_file
, "failed: evolution of base is not affine.\n");
3349 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
, poffset
);
3356 offset_iv
.base
= ssize_int (0);
3357 offset_iv
.step
= ssize_int (0);
3359 else if (!simple_iv (loop
, stmt
, poffset
, &offset_iv
, false))
3361 if (vect_print_dump_info (REPORT_DETAILS
))
3362 fprintf (dump_file
, "evolution of offset is not affine.\n");
3366 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3367 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3368 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3369 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3370 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3372 outer_step
= size_binop (PLUS_EXPR
,
3373 fold_convert (ssizetype
, base_iv
.step
),
3374 fold_convert (ssizetype
, offset_iv
.step
));
3376 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3377 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3378 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3379 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3380 STMT_VINFO_DR_OFFSET (stmt_info
) =
3381 fold_convert (ssizetype
, offset_iv
.base
);
3382 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3383 size_int (highest_pow2_factor (offset_iv
.base
));
3385 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3387 fprintf (dump_file
, "\touter base_address: ");
3388 print_generic_expr (dump_file
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
3389 fprintf (dump_file
, "\n\touter offset from base address: ");
3390 print_generic_expr (dump_file
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
3391 fprintf (dump_file
, "\n\touter constant offset from base address: ");
3392 print_generic_expr (dump_file
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
3393 fprintf (dump_file
, "\n\touter step: ");
3394 print_generic_expr (dump_file
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
3395 fprintf (dump_file
, "\n\touter aligned to: ");
3396 print_generic_expr (dump_file
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
3400 if (STMT_VINFO_DATA_REF (stmt_info
))
3402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3405 "not vectorized: more than one data ref in stmt: ");
3406 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3410 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3412 /* Set vectype for STMT. */
3413 scalar_type
= TREE_TYPE (DR_REF (dr
));
3414 STMT_VINFO_VECTYPE (stmt_info
) =
3415 get_vectype_for_scalar_type (scalar_type
);
3416 if (!STMT_VINFO_VECTYPE (stmt_info
))
3418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3421 "not vectorized: no vectype for stmt: ");
3422 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3423 fprintf (vect_dump
, " scalar_type: ");
3424 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
3434 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3436 /* Function vect_mark_relevant.
3438 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3441 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
3442 enum vect_relevant relevant
, bool live_p
)
3444 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3445 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
3446 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
3448 if (vect_print_dump_info (REPORT_DETAILS
))
3449 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
3451 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
3453 gimple pattern_stmt
;
3455 /* This is the last stmt in a sequence that was detected as a
3456 pattern that can potentially be vectorized. Don't mark the stmt
3457 as relevant/live because it's not going to be vectorized.
3458 Instead mark the pattern-stmt that replaces it. */
3460 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3462 if (vect_print_dump_info (REPORT_DETAILS
))
3463 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
3464 stmt_info
= vinfo_for_stmt (pattern_stmt
);
3465 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
3466 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
3467 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
3468 stmt
= pattern_stmt
;
3471 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
3472 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
3473 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
3475 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
3476 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
3478 if (vect_print_dump_info (REPORT_DETAILS
))
3479 fprintf (vect_dump
, "already marked relevant/live.");
3483 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
3487 /* Function vect_stmt_relevant_p.
3489 Return true if STMT in loop that is represented by LOOP_VINFO is
3490 "relevant for vectorization".
3492 A stmt is considered "relevant for vectorization" if:
3493 - it has uses outside the loop.
3494 - it has vdefs (it alters memory).
3495 - control stmts in the loop (except for the exit condition).
3497 CHECKME: what other side effects would the vectorizer allow? */
3500 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
3501 enum vect_relevant
*relevant
, bool *live_p
)
3503 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3504 ssa_op_iter op_iter
;
3505 imm_use_iterator imm_iter
;
3506 use_operand_p use_p
;
3507 def_operand_p def_p
;
3509 *relevant
= vect_unused_in_loop
;
3512 /* cond stmt other than loop exit cond. */
3513 if (is_ctrl_stmt (stmt
)
3514 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
)) != loop_exit_ctrl_vec_info_type
)
3515 *relevant
= vect_used_in_loop
;
3517 /* changing memory. */
3518 if (gimple_code (stmt
) != GIMPLE_PHI
)
3519 if (!ZERO_SSA_OPERANDS (stmt
, SSA_OP_VIRTUAL_DEFS
))
3521 if (vect_print_dump_info (REPORT_DETAILS
))
3522 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
3523 *relevant
= vect_used_in_loop
;
3526 /* uses outside the loop. */
3527 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3529 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
3531 basic_block bb
= gimple_bb (USE_STMT (use_p
));
3532 if (!flow_bb_inside_loop_p (loop
, bb
))
3534 if (vect_print_dump_info (REPORT_DETAILS
))
3535 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
3537 /* We expect all such uses to be in the loop exit phis
3538 (because of loop closed form) */
3539 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
3540 gcc_assert (bb
== single_exit (loop
)->dest
);
3547 return (*live_p
|| *relevant
);
3552 Function process_use.
3555 - a USE in STMT in a loop represented by LOOP_VINFO
3556 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3557 that defined USE. This is done by calling mark_relevant and passing it
3558 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3561 Generally, LIVE_P and RELEVANT are used to define the liveness and
3562 relevance info of the DEF_STMT of this USE:
3563 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3564 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3566 - case 1: If USE is used only for address computations (e.g. array indexing),
3567 which does not need to be directly vectorized, then the liveness/relevance
3568 of the respective DEF_STMT is left unchanged.
3569 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3570 skip DEF_STMT cause it had already been processed.
3571 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3572 be modified accordingly.
3574 Return true if everything is as expected. Return false otherwise. */
3577 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
3578 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
)
3580 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3581 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3582 stmt_vec_info dstmt_vinfo
;
3583 basic_block bb
, def_bb
;
3586 enum vect_def_type dt
;
3588 /* case 1: we are only interested in uses that need to be vectorized. Uses
3589 that are used for address computation are not considered relevant. */
3590 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
3593 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &def
, &dt
))
3595 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
3596 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
3600 if (!def_stmt
|| gimple_nop_p (def_stmt
))
3603 def_bb
= gimple_bb (def_stmt
);
3604 if (!flow_bb_inside_loop_p (loop
, def_bb
))
3606 if (vect_print_dump_info (REPORT_DETAILS
))
3607 fprintf (vect_dump
, "def_stmt is out of loop.");
3611 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3612 DEF_STMT must have already been processed, because this should be the
3613 only way that STMT, which is a reduction-phi, was put in the worklist,
3614 as there should be no other uses for DEF_STMT in the loop. So we just
3615 check that everything is as expected, and we are done. */
3616 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
3617 bb
= gimple_bb (stmt
);
3618 if (gimple_code (stmt
) == GIMPLE_PHI
3619 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
3620 && gimple_code (def_stmt
) != GIMPLE_PHI
3621 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
3622 && bb
->loop_father
== def_bb
->loop_father
)
3624 if (vect_print_dump_info (REPORT_DETAILS
))
3625 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
3626 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
3627 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
3628 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
3629 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
3630 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_loop
);
3634 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3635 outer-loop-header-bb:
3641 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
3643 if (vect_print_dump_info (REPORT_DETAILS
))
3644 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
3647 case vect_unused_in_loop
:
3648 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
3649 vect_used_by_reduction
: vect_unused_in_loop
;
3651 case vect_used_in_outer_by_reduction
:
3652 relevant
= vect_used_by_reduction
;
3654 case vect_used_in_outer
:
3655 relevant
= vect_used_in_loop
;
3657 case vect_used_by_reduction
:
3658 case vect_used_in_loop
:
3666 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3667 outer-loop-header-bb:
3673 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
3675 if (vect_print_dump_info (REPORT_DETAILS
))
3676 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
3679 case vect_unused_in_loop
:
3680 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
) ?
3681 vect_used_in_outer_by_reduction
: vect_unused_in_loop
;
3684 case vect_used_in_outer_by_reduction
:
3685 case vect_used_in_outer
:
3688 case vect_used_by_reduction
:
3689 relevant
= vect_used_in_outer_by_reduction
;
3692 case vect_used_in_loop
:
3693 relevant
= vect_used_in_outer
;
3701 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
3706 /* Function vect_mark_stmts_to_be_vectorized.
3708 Not all stmts in the loop need to be vectorized. For example:
3717 Stmt 1 and 3 do not need to be vectorized, because loop control and
3718 addressing of vectorized data-refs are handled differently.
3720 This pass detects such stmts. */
3723 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
3725 VEC(gimple
,heap
) *worklist
;
3726 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3727 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3728 unsigned int nbbs
= loop
->num_nodes
;
3729 gimple_stmt_iterator si
;
3732 stmt_vec_info stmt_vinfo
;
3736 enum vect_relevant relevant
;
3738 if (vect_print_dump_info (REPORT_DETAILS
))
3739 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
3741 worklist
= VEC_alloc (gimple
, heap
, 64);
3743 /* 1. Init worklist. */
3744 for (i
= 0; i
< nbbs
; i
++)
3747 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
3749 phi
= gsi_stmt (si
);
3750 if (vect_print_dump_info (REPORT_DETAILS
))
3752 fprintf (vect_dump
, "init: phi relevant? ");
3753 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
3756 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
3757 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
3759 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3761 stmt
= gsi_stmt (si
);
3762 if (vect_print_dump_info (REPORT_DETAILS
))
3764 fprintf (vect_dump
, "init: stmt relevant? ");
3765 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3768 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
3769 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
3773 /* 2. Process_worklist */
3774 while (VEC_length (gimple
, worklist
) > 0)
3776 use_operand_p use_p
;
3779 stmt
= VEC_pop (gimple
, worklist
);
3780 if (vect_print_dump_info (REPORT_DETAILS
))
3782 fprintf (vect_dump
, "worklist: examine stmt: ");
3783 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3786 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3787 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3788 liveness and relevance properties of STMT. */
3789 stmt_vinfo
= vinfo_for_stmt (stmt
);
3790 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
3791 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
3793 /* Generally, the liveness and relevance properties of STMT are
3794 propagated as is to the DEF_STMTs of its USEs:
3795 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3796 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3798 One exception is when STMT has been identified as defining a reduction
3799 variable; in this case we set the liveness/relevance as follows:
3801 relevant = vect_used_by_reduction
3802 This is because we distinguish between two kinds of relevant stmts -
3803 those that are used by a reduction computation, and those that are
3804 (also) used by a regular computation. This allows us later on to
3805 identify stmts that are used solely by a reduction, and therefore the
3806 order of the results that they produce does not have to be kept.
3808 Reduction phis are expected to be used by a reduction stmt, or by
3809 in an outer loop; Other reduction stmts are expected to be
3810 in the loop, and possibly used by a stmt in an outer loop.
3811 Here are the expected values of "relevant" for reduction phis/stmts:
3814 vect_unused_in_loop ok
3815 vect_used_in_outer_by_reduction ok ok
3816 vect_used_in_outer ok ok
3817 vect_used_by_reduction ok
3818 vect_used_in_loop */
3820 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
3822 enum vect_relevant tmp_relevant
= relevant
;
3823 switch (tmp_relevant
)
3825 case vect_unused_in_loop
:
3826 gcc_assert (gimple_code (stmt
) != GIMPLE_PHI
);
3827 relevant
= vect_used_by_reduction
;
3830 case vect_used_in_outer_by_reduction
:
3831 case vect_used_in_outer
:
3832 gcc_assert (gimple_code (stmt
) != WIDEN_SUM_EXPR
3833 && gimple_code (stmt
) != DOT_PROD_EXPR
);
3836 case vect_used_by_reduction
:
3837 if (gimple_code (stmt
) == GIMPLE_PHI
)
3840 case vect_used_in_loop
:
3842 if (vect_print_dump_info (REPORT_DETAILS
))
3843 fprintf (vect_dump
, "unsupported use of reduction.");
3844 VEC_free (gimple
, heap
, worklist
);
3850 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
3852 tree op
= USE_FROM_PTR (use_p
);
3853 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
, &worklist
))
3855 VEC_free (gimple
, heap
, worklist
);
3859 } /* while worklist */
3861 VEC_free (gimple
, heap
, worklist
);
3866 /* Function vect_can_advance_ivs_p
3868 In case the number of iterations that LOOP iterates is unknown at compile
3869 time, an epilog loop will be generated, and the loop induction variables
3870 (IVs) will be "advanced" to the value they are supposed to take just before
3871 the epilog loop. Here we check that the access function of the loop IVs
3872 and the expression that represents the loop bound are simple enough.
3873 These restrictions will be relaxed in the future. */
3876 vect_can_advance_ivs_p (loop_vec_info loop_vinfo
)
3878 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3879 basic_block bb
= loop
->header
;
3881 gimple_stmt_iterator gsi
;
3883 /* Analyze phi functions of the loop header. */
3885 if (vect_print_dump_info (REPORT_DETAILS
))
3886 fprintf (vect_dump
, "vect_can_advance_ivs_p:");
3888 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3890 tree access_fn
= NULL
;
3891 tree evolution_part
;
3893 phi
= gsi_stmt (gsi
);
3894 if (vect_print_dump_info (REPORT_DETAILS
))
3896 fprintf (vect_dump
, "Analyze phi: ");
3897 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
3900 /* Skip virtual phi's. The data dependences that are associated with
3901 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3903 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
3905 if (vect_print_dump_info (REPORT_DETAILS
))
3906 fprintf (vect_dump
, "virtual phi. skip.");
3910 /* Skip reduction phis. */
3912 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi
)) == vect_reduction_def
)
3914 if (vect_print_dump_info (REPORT_DETAILS
))
3915 fprintf (vect_dump
, "reduc phi. skip.");
3919 /* Analyze the evolution function. */
3921 access_fn
= instantiate_parameters
3922 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
3926 if (vect_print_dump_info (REPORT_DETAILS
))
3927 fprintf (vect_dump
, "No Access function.");
3931 if (vect_print_dump_info (REPORT_DETAILS
))
3933 fprintf (vect_dump
, "Access function of PHI: ");
3934 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
3937 evolution_part
= evolution_part_in_loop_num (access_fn
, loop
->num
);
3939 if (evolution_part
== NULL_TREE
)
3941 if (vect_print_dump_info (REPORT_DETAILS
))
3942 fprintf (vect_dump
, "No evolution.");
3946 /* FORNOW: We do not transform initial conditions of IVs
3947 which evolution functions are a polynomial of degree >= 2. */
3949 if (tree_is_chrec (evolution_part
))
3957 /* Function vect_get_loop_niters.
3959 Determine how many iterations the loop is executed.
3960 If an expression that represents the number of iterations
3961 can be constructed, place it in NUMBER_OF_ITERATIONS.
3962 Return the loop exit condition. */
3965 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
3969 if (vect_print_dump_info (REPORT_DETAILS
))
3970 fprintf (vect_dump
, "=== get_loop_niters ===");
3972 niters
= number_of_exit_cond_executions (loop
);
3974 if (niters
!= NULL_TREE
3975 && niters
!= chrec_dont_know
)
3977 *number_of_iterations
= niters
;
3979 if (vect_print_dump_info (REPORT_DETAILS
))
3981 fprintf (vect_dump
, "==> get_loop_niters:" );
3982 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
3986 return get_loop_exit_condition (loop
);
3990 /* Function vect_analyze_loop_1.
3992 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3993 for it. The different analyses will record information in the
3994 loop_vec_info struct. This is a subset of the analyses applied in
3995 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3996 that is now considered for (outer-loop) vectorization. */
3998 static loop_vec_info
3999 vect_analyze_loop_1 (struct loop
*loop
)
4001 loop_vec_info loop_vinfo
;
4003 if (vect_print_dump_info (REPORT_DETAILS
))
4004 fprintf (vect_dump
, "===== analyze_loop_nest_1 =====");
4006 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4008 loop_vinfo
= vect_analyze_loop_form (loop
);
4011 if (vect_print_dump_info (REPORT_DETAILS
))
4012 fprintf (vect_dump
, "bad inner-loop form.");
4020 /* Function vect_analyze_loop_form.
4022 Verify that certain CFG restrictions hold, including:
4023 - the loop has a pre-header
4024 - the loop has a single entry and exit
4025 - the loop exit condition is simple enough, and the number of iterations
4026 can be analyzed (a countable loop). */
4029 vect_analyze_loop_form (struct loop
*loop
)
4031 loop_vec_info loop_vinfo
;
4033 tree number_of_iterations
= NULL
;
4034 loop_vec_info inner_loop_vinfo
= NULL
;
4036 if (vect_print_dump_info (REPORT_DETAILS
))
4037 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
4039 /* Different restrictions apply when we are considering an inner-most loop,
4040 vs. an outer (nested) loop.
4041 (FORNOW. May want to relax some of these restrictions in the future). */
4045 /* Inner-most loop. We currently require that the number of BBs is
4046 exactly 2 (the header and latch). Vectorizable inner-most loops
4057 if (loop
->num_nodes
!= 2)
4059 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4060 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
4064 if (empty_block_p (loop
->header
))
4066 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4067 fprintf (vect_dump
, "not vectorized: empty loop.");
4073 struct loop
*innerloop
= loop
->inner
;
4074 edge backedge
, entryedge
;
4076 /* Nested loop. We currently require that the loop is doubly-nested,
4077 contains a single inner loop, and the number of BBs is exactly 5.
4078 Vectorizable outer-loops look like this:
4090 The inner-loop has the properties expected of inner-most loops
4091 as described above. */
4093 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
4095 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4096 fprintf (vect_dump
, "not vectorized: multiple nested loops.");
4100 /* Analyze the inner-loop. */
4101 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
4102 if (!inner_loop_vinfo
)
4104 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4105 fprintf (vect_dump
, "not vectorized: Bad inner loop.");
4109 if (!expr_invariant_in_loop_p (loop
,
4110 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
4112 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4114 "not vectorized: inner-loop count not invariant.");
4115 destroy_loop_vec_info (inner_loop_vinfo
, true);
4119 if (loop
->num_nodes
!= 5)
4121 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4122 fprintf (vect_dump
, "not vectorized: too many BBs in loop.");
4123 destroy_loop_vec_info (inner_loop_vinfo
, true);
4127 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
4128 backedge
= EDGE_PRED (innerloop
->header
, 1);
4129 entryedge
= EDGE_PRED (innerloop
->header
, 0);
4130 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
4132 backedge
= EDGE_PRED (innerloop
->header
, 0);
4133 entryedge
= EDGE_PRED (innerloop
->header
, 1);
4136 if (entryedge
->src
!= loop
->header
4137 || !single_exit (innerloop
)
4138 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
4140 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4141 fprintf (vect_dump
, "not vectorized: unsupported outerloop form.");
4142 destroy_loop_vec_info (inner_loop_vinfo
, true);
4146 if (vect_print_dump_info (REPORT_DETAILS
))
4147 fprintf (vect_dump
, "Considering outer-loop vectorization.");
4150 if (!single_exit (loop
)
4151 || EDGE_COUNT (loop
->header
->preds
) != 2)
4153 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4155 if (!single_exit (loop
))
4156 fprintf (vect_dump
, "not vectorized: multiple exits.");
4157 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
4158 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
4160 if (inner_loop_vinfo
)
4161 destroy_loop_vec_info (inner_loop_vinfo
, true);
4165 /* We assume that the loop exit condition is at the end of the loop. i.e,
4166 that the loop is represented as a do-while (with a proper if-guard
4167 before the loop if needed), where the loop header contains all the
4168 executable statements, and the latch is empty. */
4169 if (!empty_block_p (loop
->latch
)
4170 || phi_nodes (loop
->latch
))
4172 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4173 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
4174 if (inner_loop_vinfo
)
4175 destroy_loop_vec_info (inner_loop_vinfo
, true);
4179 /* Make sure there exists a single-predecessor exit bb: */
4180 if (!single_pred_p (single_exit (loop
)->dest
))
4182 edge e
= single_exit (loop
);
4183 if (!(e
->flags
& EDGE_ABNORMAL
))
4185 split_loop_exit_edge (e
);
4186 if (vect_print_dump_info (REPORT_DETAILS
))
4187 fprintf (vect_dump
, "split exit edge.");
4191 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4192 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
4193 if (inner_loop_vinfo
)
4194 destroy_loop_vec_info (inner_loop_vinfo
, true);
4199 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
4202 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4203 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
4204 if (inner_loop_vinfo
)
4205 destroy_loop_vec_info (inner_loop_vinfo
, true);
4209 if (!number_of_iterations
)
4211 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4213 "not vectorized: number of iterations cannot be computed.");
4214 if (inner_loop_vinfo
)
4215 destroy_loop_vec_info (inner_loop_vinfo
, true);
4219 if (chrec_contains_undetermined (number_of_iterations
))
4221 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
4222 fprintf (vect_dump
, "Infinite number of iterations.");
4223 if (inner_loop_vinfo
)
4224 destroy_loop_vec_info (inner_loop_vinfo
, true);
4228 if (!NITERS_KNOWN_P (number_of_iterations
))
4230 if (vect_print_dump_info (REPORT_DETAILS
))
4232 fprintf (vect_dump
, "Symbolic number of iterations is ");
4233 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
4236 else if (TREE_INT_CST_LOW (number_of_iterations
) == 0)
4238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS
))
4239 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
4240 if (inner_loop_vinfo
)
4241 destroy_loop_vec_info (inner_loop_vinfo
, false);
4245 loop_vinfo
= new_loop_vec_info (loop
);
4246 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
4247 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = number_of_iterations
;
4249 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
4251 /* CHECKME: May want to keep it around it in the future. */
4252 if (inner_loop_vinfo
)
4253 destroy_loop_vec_info (inner_loop_vinfo
, false);
4255 gcc_assert (!loop
->aux
);
4256 loop
->aux
= loop_vinfo
;
4261 /* Function vect_analyze_loop.
4263 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4264 for it. The different analyses will record information in the
4265 loop_vec_info struct. */
4267 vect_analyze_loop (struct loop
*loop
)
4270 loop_vec_info loop_vinfo
;
4272 if (vect_print_dump_info (REPORT_DETAILS
))
4273 fprintf (vect_dump
, "===== analyze_loop_nest =====");
4275 if (loop_outer (loop
)
4276 && loop_vec_info_for_loop (loop_outer (loop
))
4277 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
4279 if (vect_print_dump_info (REPORT_DETAILS
))
4280 fprintf (vect_dump
, "outer-loop already vectorized.");
4284 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4286 loop_vinfo
= vect_analyze_loop_form (loop
);
4289 if (vect_print_dump_info (REPORT_DETAILS
))
4290 fprintf (vect_dump
, "bad loop form.");
4294 /* Find all data references in the loop (which correspond to vdefs/vuses)
4295 and analyze their evolution in the loop.
4297 FORNOW: Handle only simple, array references, which
4298 alignment can be forced, and aligned pointer-references. */
4300 ok
= vect_analyze_data_refs (loop_vinfo
);
4303 if (vect_print_dump_info (REPORT_DETAILS
))
4304 fprintf (vect_dump
, "bad data references.");
4305 destroy_loop_vec_info (loop_vinfo
, true);
4309 /* Classify all cross-iteration scalar data-flow cycles.
4310 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4312 vect_analyze_scalar_cycles (loop_vinfo
);
4314 vect_pattern_recog (loop_vinfo
);
4316 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4318 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
4321 if (vect_print_dump_info (REPORT_DETAILS
))
4322 fprintf (vect_dump
, "unexpected pattern.");
4323 destroy_loop_vec_info (loop_vinfo
, true);
4327 /* Analyze the alignment of the data-refs in the loop.
4328 Fail if a data reference is found that cannot be vectorized. */
4330 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
4333 if (vect_print_dump_info (REPORT_DETAILS
))
4334 fprintf (vect_dump
, "bad data alignment.");
4335 destroy_loop_vec_info (loop_vinfo
, true);
4339 ok
= vect_determine_vectorization_factor (loop_vinfo
);
4342 if (vect_print_dump_info (REPORT_DETAILS
))
4343 fprintf (vect_dump
, "can't determine vectorization factor.");
4344 destroy_loop_vec_info (loop_vinfo
, true);
4348 /* Analyze data dependences between the data-refs in the loop.
4349 FORNOW: fail at the first data dependence that we encounter. */
4351 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
4354 if (vect_print_dump_info (REPORT_DETAILS
))
4355 fprintf (vect_dump
, "bad data dependence.");
4356 destroy_loop_vec_info (loop_vinfo
, true);
4360 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4361 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4363 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
4366 if (vect_print_dump_info (REPORT_DETAILS
))
4367 fprintf (vect_dump
, "bad data access.");
4368 destroy_loop_vec_info (loop_vinfo
, true);
4372 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4373 It is important to call pruning after vect_analyze_data_ref_accesses,
4374 since we use grouping information gathered by interleaving analysis. */
4375 ok
= vect_prune_runtime_alias_test_list (loop_vinfo
);
4378 if (vect_print_dump_info (REPORT_DETAILS
))
4379 fprintf (vect_dump
, "too long list of versioning for alias "
4381 destroy_loop_vec_info (loop_vinfo
, true);
4385 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4386 ok
= vect_analyze_slp (loop_vinfo
);
4389 /* Decide which possible SLP instances to SLP. */
4390 vect_make_slp_decision (loop_vinfo
);
4392 /* Find stmts that need to be both vectorized and SLPed. */
4393 vect_detect_hybrid_slp (loop_vinfo
);
4396 /* This pass will decide on using loop versioning and/or loop peeling in
4397 order to enhance the alignment of data references in the loop. */
4399 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
4402 if (vect_print_dump_info (REPORT_DETAILS
))
4403 fprintf (vect_dump
, "bad data alignment.");
4404 destroy_loop_vec_info (loop_vinfo
, true);
4408 /* Scan all the operations in the loop and make sure they are
4411 ok
= vect_analyze_operations (loop_vinfo
);
4414 if (vect_print_dump_info (REPORT_DETAILS
))
4415 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
4416 destroy_loop_vec_info (loop_vinfo
, true);
4420 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;