]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-analyze.c
Fix ada enabled "make html".
[thirdparty/gcc.git] / gcc / tree-vect-analyze.c
CommitLineData
2e681715 1/* Analysis Utilities for Loop Vectorization.
f7064d11
DN
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "errors.h"
27#include "ggc.h"
28#include "tree.h"
29#include "basic-block.h"
30#include "diagnostic.h"
31#include "tree-flow.h"
32#include "tree-dump.h"
33#include "timevar.h"
34#include "cfgloop.h"
35#include "expr.h"
36#include "optabs.h"
37#include "tree-chrec.h"
38#include "tree-data-ref.h"
39#include "tree-scalar-evolution.h"
40#include "tree-vectorizer.h"
41
42/* Main analysis functions. */
43static loop_vec_info vect_analyze_loop_form (struct loop *);
44static bool vect_analyze_data_refs (loop_vec_info);
45static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46static bool vect_analyze_scalar_cycles (loop_vec_info);
47static bool vect_analyze_data_ref_accesses (loop_vec_info);
48static bool vect_analyze_data_ref_dependences (loop_vec_info);
49static bool vect_analyze_data_refs_alignment (loop_vec_info);
50static bool vect_compute_data_refs_alignment (loop_vec_info);
51static void vect_enhance_data_refs_alignment (loop_vec_info);
52static bool vect_analyze_operations (loop_vec_info);
5f55a1ba 53static bool vect_determine_vectorization_factor (loop_vec_info);
f7064d11
DN
54
55/* Utility functions for the analyses. */
56static bool exist_non_indexing_operands_for_use_p (tree, tree);
51d00891 57static void vect_mark_relevant (VEC(tree,heap) **, tree);
f7064d11
DN
58static bool vect_stmt_relevant_p (tree, loop_vec_info);
59static tree vect_get_loop_niters (struct loop *, tree *);
60static bool vect_analyze_data_ref_dependence
61 (struct data_reference *, struct data_reference *, loop_vec_info);
62static bool vect_compute_data_ref_alignment (struct data_reference *);
63static bool vect_analyze_data_ref_access (struct data_reference *);
64static struct data_reference * vect_analyze_pointer_ref_access
65 (tree, tree, bool, tree, tree *, tree *);
66static bool vect_can_advance_ivs_p (loop_vec_info);
67static tree vect_get_ptr_offset (tree, tree, tree *);
68static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 tree *, tree *);
70static bool vect_base_addr_differ_p (struct data_reference *,
71 struct data_reference *drb, bool *);
72static tree vect_object_analysis (tree, tree, bool, tree,
73 struct data_reference **, tree *, tree *,
8bb46326
DN
74 tree *, bool *, tree *, struct ptr_info_def **,
75 subvar_t *);
f7064d11
DN
76static tree vect_address_analysis (tree, tree, bool, tree,
77 struct data_reference *, tree *, tree *,
78 tree *, bool *);
f7064d11
DN
79
80
81/* Function vect_get_ptr_offset
82
83 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
84
85static tree
86vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
87 tree vectype ATTRIBUTE_UNUSED,
88 tree *offset ATTRIBUTE_UNUSED)
89{
90 /* TODO: Use alignment information. */
91 return NULL_TREE;
92}
93
94
95/* Function vect_analyze_offset_expr
96
97 Given an offset expression EXPR received from get_inner_reference, analyze
98 it and create an expression for INITIAL_OFFSET by substituting the variables
99 of EXPR with initial_condition of the corresponding access_fn in the loop.
100 E.g.,
101 for i
102 for (j = 3; j < N; j++)
103 a[j].b[i][j] = 0;
104
105 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
106 substituted, since its access_fn in the inner loop is i. 'j' will be
107 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
108 C` = 3 * C_j + C.
109
110 Compute MISALIGN (the misalignment of the data reference initial access from
111 its base) if possible. Misalignment can be calculated only if all the
112 variables can be substituted with constants, or if a variable is multiplied
113 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
114 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
115 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
116 VECTYPE_ALIGNMENT computation in the caller of this function).
117
118 STEP is an evolution of the data reference in this loop in bytes.
119 In the above example, STEP is C_j.
120
121 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
122 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
123 are NULL_TREEs. Otherwise, return TRUE.
124
125*/
126
127static bool
128vect_analyze_offset_expr (tree expr,
129 struct loop *loop,
130 tree vectype_alignment,
131 tree *initial_offset,
132 tree *misalign,
133 tree *step)
134{
135 tree oprnd0;
136 tree oprnd1;
137 tree left_offset = ssize_int (0);
138 tree right_offset = ssize_int (0);
139 tree left_misalign = ssize_int (0);
140 tree right_misalign = ssize_int (0);
141 tree left_step = ssize_int (0);
142 tree right_step = ssize_int (0);
143 enum tree_code code;
144 tree init, evolution;
145
146 *step = NULL_TREE;
147 *misalign = NULL_TREE;
148 *initial_offset = NULL_TREE;
149
150 /* Strip conversions that don't narrow the mode. */
151 expr = vect_strip_conversion (expr);
152 if (!expr)
153 return false;
154
155 /* Stop conditions:
156 1. Constant. */
157 if (TREE_CODE (expr) == INTEGER_CST)
158 {
159 *initial_offset = fold_convert (ssizetype, expr);
160 *misalign = fold_convert (ssizetype, expr);
161 *step = ssize_int (0);
162 return true;
163 }
164
165 /* 2. Variable. Try to substitute with initial_condition of the corresponding
166 access_fn in the current loop. */
167 if (SSA_VAR_P (expr))
168 {
169 tree access_fn = analyze_scalar_evolution (loop, expr);
170
171 if (access_fn == chrec_dont_know)
172 /* No access_fn. */
173 return false;
174
175 init = initial_condition_in_loop_num (access_fn, loop->num);
176 if (init == expr && !expr_invariant_in_loop_p (loop, init))
177 /* Not enough information: may be not loop invariant.
178 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
179 initial_condition is D, but it depends on i - loop's induction
180 variable. */
181 return false;
182
183 evolution = evolution_part_in_loop_num (access_fn, loop->num);
184 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
185 /* Evolution is not constant. */
186 return false;
187
188 if (TREE_CODE (init) == INTEGER_CST)
189 *misalign = fold_convert (ssizetype, init);
190 else
191 /* Not constant, misalignment cannot be calculated. */
192 *misalign = NULL_TREE;
193
194 *initial_offset = fold_convert (ssizetype, init);
195
196 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
197 return true;
198 }
199
200 /* Recursive computation. */
201 if (!BINARY_CLASS_P (expr))
202 {
203 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
204 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
205 {
206 fprintf (vect_dump, "Not binary expression ");
207 print_generic_expr (vect_dump, expr, TDF_SLIM);
208 }
209 return false;
210 }
211 oprnd0 = TREE_OPERAND (expr, 0);
212 oprnd1 = TREE_OPERAND (expr, 1);
213
214 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
215 &left_misalign, &left_step)
216 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
217 &right_offset, &right_misalign, &right_step))
218 return false;
219
220 /* The type of the operation: plus, minus or mult. */
221 code = TREE_CODE (expr);
222 switch (code)
223 {
224 case MULT_EXPR:
225 if (TREE_CODE (right_offset) != INTEGER_CST)
226 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
227 sized types.
228 FORNOW: We don't support such cases. */
229 return false;
230
231 /* Strip conversions that don't narrow the mode. */
232 left_offset = vect_strip_conversion (left_offset);
233 if (!left_offset)
234 return false;
235 /* Misalignment computation. */
236 if (SSA_VAR_P (left_offset))
237 {
238 /* If the left side contains variables that can't be substituted with
239 constants, we check if the right side is a multiple of ALIGNMENT.
240 */
241 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
242 fold_convert (ssizetype, vectype_alignment))))
243 *misalign = ssize_int (0);
244 else
245 /* If the remainder is not zero or the right side isn't constant,
246 we can't compute misalignment. */
247 *misalign = NULL_TREE;
248 }
249 else
250 {
251 /* The left operand was successfully substituted with constant. */
252 if (left_misalign)
253 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
254 NULL_TREE. */
255 *misalign = size_binop (code, left_misalign, right_misalign);
256 else
257 *misalign = NULL_TREE;
258 }
259
260 /* Step calculation. */
261 /* Multiply the step by the right operand. */
262 *step = size_binop (MULT_EXPR, left_step, right_offset);
263 break;
264
265 case PLUS_EXPR:
266 case MINUS_EXPR:
267 /* Combine the recursive calculations for step and misalignment. */
268 *step = size_binop (code, left_step, right_step);
269
270 if (left_misalign && right_misalign)
271 *misalign = size_binop (code, left_misalign, right_misalign);
272 else
273 *misalign = NULL_TREE;
274
275 break;
276
277 default:
278 gcc_unreachable ();
279 }
280
281 /* Compute offset. */
282 *initial_offset = fold_convert (ssizetype,
283 fold (build2 (code, TREE_TYPE (left_offset),
284 left_offset,
285 right_offset)));
286 return true;
287}
288
289
5f55a1ba
DN
290/* Function vect_determine_vectorization_factor
291
292 Determine the vectorization factor (VF). VF is the number of data elements
293 that are operated upon in parallel in a single iteration of the vectorized
294 loop. For example, when vectorizing a loop that operates on 4byte elements,
295 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
296 elements can fit in a single vector register.
297
298 We currently support vectorization of loops in which all types operated upon
299 are of the same size. Therefore this function currently sets VF according to
300 the size of the types operated upon, and fails if there are multiple sizes
301 in the loop.
302
303 VF is also the factor by which the loop iterations are strip-mined, e.g.:
304 original loop:
305 for (i=0; i<N; i++){
306 a[i] = b[i] + c[i];
307 }
308
309 vectorized loop:
310 for (i=0; i<N; i+=VF){
311 a[i:VF] = b[i:VF] + c[i:VF];
312 }
313*/
314
315static bool
316vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
317{
318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
319 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
320 int nbbs = loop->num_nodes;
321 block_stmt_iterator si;
322 unsigned int vectorization_factor = 0;
323 int i;
324 tree scalar_type;
325
326 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
327 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
328
329 for (i = 0; i < nbbs; i++)
330 {
331 basic_block bb = bbs[i];
332
333 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
334 {
335 tree stmt = bsi_stmt (si);
336 unsigned int nunits;
337 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
338 tree vectype;
339
340 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
341 {
342 fprintf (vect_dump, "==> examining statement: ");
343 print_generic_expr (vect_dump, stmt, TDF_SLIM);
344 }
345
346 gcc_assert (stmt_info);
347 /* skip stmts which do not need to be vectorized. */
348 if (!STMT_VINFO_RELEVANT_P (stmt_info))
349 continue;
350
351 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
352 {
353 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
354 LOOP_LOC (loop_vinfo)))
355 {
356 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
357 print_generic_expr (vect_dump, stmt, TDF_SLIM);
358 }
359 return false;
360 }
361
362 if (STMT_VINFO_DATA_REF (stmt_info))
363 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
364 else if (TREE_CODE (stmt) == MODIFY_EXPR)
365 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
366 else
367 scalar_type = TREE_TYPE (stmt);
368
369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
370 {
371 fprintf (vect_dump, "get vectype for scalar type: ");
372 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
373 }
374
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
377 {
378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
379 LOOP_LOC (loop_vinfo)))
380 {
381 fprintf (vect_dump, "not vectorized: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 }
384 return false;
385 }
386 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
387 {
388 fprintf (vect_dump, "vectype: ");
389 print_generic_expr (vect_dump, vectype, TDF_SLIM);
390 }
391 STMT_VINFO_VECTYPE (stmt_info) = vectype;
392
57d1677d 393 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5f55a1ba
DN
394 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
395 fprintf (vect_dump, "nunits = %d", nunits);
396
397 if (vectorization_factor)
398 {
399 /* FORNOW: don't allow mixed units.
400 This restriction will be relaxed in the future. */
401 if (nunits != vectorization_factor)
402 {
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
404 LOOP_LOC (loop_vinfo)))
405 fprintf (vect_dump, "not vectorized: mixed data-types");
406 return false;
407 }
408 }
409 else
410 vectorization_factor = nunits;
411
412#ifdef ENABLE_CHECKING
413 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
414 * vectorization_factor == UNITS_PER_SIMD_WORD);
415#endif
416 }
417 }
418
419 /* TODO: Analyze cost. Decide if worth while to vectorize. */
420
421 if (vectorization_factor <= 1)
422 {
423 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
424 LOOP_LOC (loop_vinfo)))
425 fprintf (vect_dump, "not vectorized: unsupported data-type");
426 return false;
427 }
428 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
429
430 return true;
431}
432
433
f7064d11
DN
434/* Function vect_analyze_operations.
435
436 Scan the loop stmts and make sure they are all vectorizable. */
437
438static bool
439vect_analyze_operations (loop_vec_info loop_vinfo)
440{
441 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
442 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
443 int nbbs = loop->num_nodes;
444 block_stmt_iterator si;
445 unsigned int vectorization_factor = 0;
446 int i;
447 bool ok;
f7064d11
DN
448
449 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
450 fprintf (vect_dump, "=== vect_analyze_operations ===");
451
5f55a1ba
DN
452 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
453 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
454
f7064d11
DN
455 for (i = 0; i < nbbs; i++)
456 {
457 basic_block bb = bbs[i];
458
459 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
460 {
461 tree stmt = bsi_stmt (si);
f7064d11 462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
f7064d11
DN
463
464 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
465 {
466 fprintf (vect_dump, "==> examining statement: ");
467 print_generic_expr (vect_dump, stmt, TDF_SLIM);
468 }
469
470 gcc_assert (stmt_info);
471
472 /* skip stmts which do not need to be vectorized.
473 this is expected to include:
474 - the COND_EXPR which is the loop exit condition
475 - any LABEL_EXPRs in the loop
476 - computations that are used only for array indexing or loop
477 control */
478
479 if (!STMT_VINFO_RELEVANT_P (stmt_info))
480 {
481 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
482 fprintf (vect_dump, "irrelevant.");
483 continue;
484 }
485
5f55a1ba
DN
486#ifdef ENABLE_CHECKING
487 if (STMT_VINFO_RELEVANT_P (stmt_info))
488 {
489 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
490 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
491 }
492#endif
f7064d11
DN
493
494 ok = (vectorizable_operation (stmt, NULL, NULL)
495 || vectorizable_assignment (stmt, NULL, NULL)
496 || vectorizable_load (stmt, NULL, NULL)
b52485c6
DP
497 || vectorizable_store (stmt, NULL, NULL)
498 || vectorizable_condition (stmt, NULL, NULL));
f7064d11
DN
499
500 if (!ok)
501 {
502 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
503 LOOP_LOC (loop_vinfo)))
504 {
505 fprintf (vect_dump, "not vectorized: stmt not supported: ");
506 print_generic_expr (vect_dump, stmt, TDF_SLIM);
507 }
508 return false;
509 }
f7064d11
DN
510 }
511 }
512
513 /* TODO: Analyze cost. Decide if worth while to vectorize. */
514
f7064d11
DN
515 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
516 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
517 fprintf (vect_dump,
518 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
519 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
520
521 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
522 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
523 {
524 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
525 LOOP_LOC (loop_vinfo)))
526 fprintf (vect_dump, "not vectorized: iteration count too small.");
527 return false;
528 }
529
530 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
531 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
532 {
533 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
534 fprintf (vect_dump, "epilog loop required.");
535 if (!vect_can_advance_ivs_p (loop_vinfo))
536 {
537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
538 LOOP_LOC (loop_vinfo)))
539 fprintf (vect_dump,
540 "not vectorized: can't create epilog loop 1.");
541 return false;
542 }
70388d94 543 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
f7064d11
DN
544 {
545 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
546 LOOP_LOC (loop_vinfo)))
547 fprintf (vect_dump,
548 "not vectorized: can't create epilog loop 2.");
549 return false;
550 }
551 }
552
553 return true;
554}
555
556
557/* Function exist_non_indexing_operands_for_use_p
558
559 USE is one of the uses attached to STMT. Check if USE is
560 used in STMT for anything other than indexing an array. */
561
562static bool
563exist_non_indexing_operands_for_use_p (tree use, tree stmt)
564{
565 tree operand;
566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
567
568 /* USE corresponds to some operand in STMT. If there is no data
569 reference in STMT, then any operand that corresponds to USE
570 is not indexing an array. */
571 if (!STMT_VINFO_DATA_REF (stmt_info))
572 return true;
573
574 /* STMT has a data_ref. FORNOW this means that its of one of
575 the following forms:
576 -1- ARRAY_REF = var
577 -2- var = ARRAY_REF
578 (This should have been verified in analyze_data_refs).
579
580 'var' in the second case corresponds to a def, not a use,
581 so USE cannot correspond to any operands that are not used
582 for array indexing.
583
584 Therefore, all we need to check is if STMT falls into the
585 first case, and whether var corresponds to USE. */
586
587 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
588 return false;
589
590 operand = TREE_OPERAND (stmt, 1);
591
592 if (TREE_CODE (operand) != SSA_NAME)
593 return false;
594
595 if (operand == use)
596 return true;
597
598 return false;
599}
600
601
602/* Function vect_analyze_scalar_cycles.
603
604 Examine the cross iteration def-use cycles of scalar variables, by
605 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
606 cycles that they represent do not impede vectorization.
607
608 FORNOW: Reduction as in the following loop, is not supported yet:
609 loop1:
610 for (i=0; i<N; i++)
611 sum += a[i];
612 The cross-iteration cycle corresponding to variable 'sum' will be
613 considered too complicated and will impede vectorization.
614
615 FORNOW: Induction as in the following loop, is not supported yet:
616 loop2:
617 for (i=0; i<N; i++)
618 a[i] = i;
619
620 However, the following loop *is* vectorizable:
621 loop3:
622 for (i=0; i<N; i++)
623 a[i] = b[i];
624
625 In both loops there exists a def-use cycle for the variable i:
626 loop: i_2 = PHI (i_0, i_1)
627 a[i_2] = ...;
628 i_1 = i_2 + 1;
629 GOTO loop;
630
631 The evolution of the above cycle is considered simple enough,
632 however, we also check that the cycle does not need to be
633 vectorized, i.e - we check that the variable that this cycle
634 defines is only used for array indexing or in stmts that do not
635 need to be vectorized. This is not the case in loop2, but it
636 *is* the case in loop3. */
637
638static bool
639vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
640{
641 tree phi;
642 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
643 basic_block bb = loop->header;
644 tree dummy;
645
646 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
647 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
648
649 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
650 {
651 tree access_fn = NULL;
652
653 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
654 {
655 fprintf (vect_dump, "Analyze phi: ");
656 print_generic_expr (vect_dump, phi, TDF_SLIM);
657 }
658
659 /* Skip virtual phi's. The data dependences that are associated with
660 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
661
662 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
663 {
664 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
665 fprintf (vect_dump, "virtual phi. skip.");
666 continue;
667 }
668
669 /* Analyze the evolution function. */
670
671 /* FORNOW: The only scalar cross-iteration cycles that we allow are
672 those of loop induction variables; This property is verified here.
673
674 Furthermore, if that induction variable is used in an operation
675 that needs to be vectorized (i.e, is not solely used to index
676 arrays and check the exit condition) - we do not support its
677 vectorization yet. This property is verified in vect_is_simple_use,
678 during vect_analyze_operations. */
679
680 access_fn = /* instantiate_parameters
681 (loop,*/
682 analyze_scalar_evolution (loop, PHI_RESULT (phi));
683
684 if (!access_fn)
685 {
686 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
687 LOOP_LOC (loop_vinfo)))
688 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
689 return false;
690 }
691
692 if (vect_print_dump_info (REPORT_DETAILS,
693 LOOP_LOC (loop_vinfo)))
694 {
695 fprintf (vect_dump, "Access function of PHI: ");
696 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
697 }
698
699 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
700 {
701 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
702 LOOP_LOC (loop_vinfo)))
703 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
704 return false;
705 }
706 }
707
708 return true;
709}
710
711
712/* Function vect_base_addr_differ_p.
713
714 This is the simplest data dependence test: determines whether the
715 data references A and B access the same array/region. Returns
716 false when the property is not computable at compile time.
717 Otherwise return true, and DIFFER_P will record the result. This
718 utility will not be necessary when alias_sets_conflict_p will be
719 less conservative. */
720
721static bool
722vect_base_addr_differ_p (struct data_reference *dra,
723 struct data_reference *drb,
724 bool *differ_p)
725{
726 tree stmt_a = DR_STMT (dra);
727 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
728 tree stmt_b = DR_STMT (drb);
729 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
730 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
731 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
732 tree type_a = TREE_TYPE (addr_a);
733 tree type_b = TREE_TYPE (addr_b);
734 HOST_WIDE_INT alias_set_a, alias_set_b;
735
736 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
737
738 /* Both references are ADDR_EXPR, i.e., we have the objects. */
739 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
740 return array_base_name_differ_p (dra, drb, differ_p);
741
742 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
743 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
744 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
745 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
746
747 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
748 {
749 *differ_p = true;
750 return true;
751 }
752
753 /* An instruction writing through a restricted pointer is "independent" of any
754 instruction reading or writing through a different pointer, in the same
755 block/scope. */
756 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
757 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
758 {
759 *differ_p = true;
760 return true;
761 }
762 return false;
763}
764
765
766/* Function vect_analyze_data_ref_dependence.
767
768 Return TRUE if there (might) exist a dependence between a memory-reference
769 DRA and a memory-reference DRB. */
770
771static bool
772vect_analyze_data_ref_dependence (struct data_reference *dra,
773 struct data_reference *drb,
774 loop_vec_info loop_vinfo)
775{
776 bool differ_p;
777 struct data_dependence_relation *ddr;
b52485c6
DP
778 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
779 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
780 int dist = 0;
781 unsigned int loop_depth = 0;
782 struct loop *loop_nest = loop;
783
f7064d11
DN
784
785 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
786 {
787 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
788 LOOP_LOC (loop_vinfo)))
789 {
790 fprintf (vect_dump,
791 "not vectorized: can't determine dependence between: ");
792 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
793 fprintf (vect_dump, " and ");
794 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
795 }
796 return true;
797 }
798
799 if (differ_p)
800 return false;
801
802 ddr = initialize_data_dependence_relation (dra, drb);
803 compute_affine_dependence (ddr);
804
805 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
806 return false;
b52485c6
DP
807
808 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
809 {
810 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
811 LOOP_LOC (loop_vinfo)))
812 {
813 fprintf (vect_dump,
814 "not vectorized: can't determine dependence between ");
815 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
816 fprintf (vect_dump, " and ");
817 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
818 }
819 return true;
820 }
821
822 /* Find loop depth. */
823 while (loop_nest)
824 {
825 if (loop_nest->outer && loop_nest->outer->outer)
826 {
827 loop_nest = loop_nest->outer;
828 loop_depth++;
829 }
830 else
831 break;
832 }
833
834 /* Compute distance vector. */
835 compute_subscript_distance (ddr);
836 build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth);
837
838 if (!DDR_DIST_VECT (ddr))
839 {
840 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
841 LOOP_LOC (loop_vinfo)))
842 {
843 fprintf (vect_dump, "not vectorized: bad dist vector for ");
844 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
845 fprintf (vect_dump, " and ");
846 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
847 }
848 return true;
849 }
850
851 dist = DDR_DIST_VECT (ddr)[loop_depth];
852
853 /* Same loop iteration. */
854 if (dist == 0)
855 {
856 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
857 fprintf (vect_dump, "dependence distance 0.");
858 return false;
859 }
860
861 if (dist >= vectorization_factor)
862 /* Dependence distance does not create dependence, as far as vectorization
863 is concerned, in this case. */
864 return false;
865
f7064d11
DN
866 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
867 LOOP_LOC (loop_vinfo)))
868 {
869 fprintf (vect_dump,
870 "not vectorized: possible dependence between data-refs ");
871 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
872 fprintf (vect_dump, " and ");
873 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
874 }
875
876 return true;
877}
878
879
880/* Function vect_analyze_data_ref_dependences.
881
882 Examine all the data references in the loop, and make sure there do not
b52485c6 883 exist any data dependences between them. */
f7064d11
DN
884
885static bool
886vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
887{
888 unsigned int i, j;
889 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
890 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
891
892 /* Examine store-store (output) dependences. */
893
894 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
895 fprintf (vect_dump, "=== vect_analyze_dependences ===");
896
897 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
898 fprintf (vect_dump, "compare all store-store pairs.");
899
900 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
901 {
902 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
903 {
904 struct data_reference *dra =
905 VARRAY_GENERIC_PTR (loop_write_refs, i);
906 struct data_reference *drb =
907 VARRAY_GENERIC_PTR (loop_write_refs, j);
908 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
909 return false;
910 }
911 }
912
913 /* Examine load-store (true/anti) dependences. */
914
915 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
916 fprintf (vect_dump, "compare all load-store pairs.");
917
918 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
919 {
920 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
921 {
922 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
923 struct data_reference *drb =
924 VARRAY_GENERIC_PTR (loop_write_refs, j);
925 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
926 return false;
927 }
928 }
929
930 return true;
931}
932
933
934/* Function vect_compute_data_ref_alignment
935
936 Compute the misalignment of the data reference DR.
937
938 Output:
939 1. If during the misalignment computation it is found that the data reference
940 cannot be vectorized then false is returned.
941 2. DR_MISALIGNMENT (DR) is defined.
942
943 FOR NOW: No analysis is actually performed. Misalignment is calculated
944 only for trivial cases. TODO. */
945
946static bool
947vect_compute_data_ref_alignment (struct data_reference *dr)
948{
949 tree stmt = DR_STMT (dr);
950 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
951 tree ref = DR_REF (dr);
952 tree vectype;
953 tree base, alignment;
954 bool base_aligned_p;
955 tree misalign;
956
957 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
958 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
959
960 /* Initialize misalignment to unknown. */
961 DR_MISALIGNMENT (dr) = -1;
962
963 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
964 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
965 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
966 vectype = STMT_VINFO_VECTYPE (stmt_info);
967
968 if (!misalign)
969 {
970 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
971 {
972 fprintf (vect_dump, "Unknown alignment for access: ");
973 print_generic_expr (vect_dump, base, TDF_SLIM);
974 }
975 return true;
976 }
977
978 if (!base_aligned_p)
979 {
980 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
981 {
982 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
983 {
984 fprintf (vect_dump, "can't force alignment of ref: ");
985 print_generic_expr (vect_dump, ref, TDF_SLIM);
986 }
987 return true;
988 }
989
990 /* Force the alignment of the decl.
991 NOTE: This is the only change to the code we make during
992 the analysis phase, before deciding to vectorize the loop. */
993 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
994 fprintf (vect_dump, "force alignment");
995 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
996 DECL_USER_ALIGN (base) = 1;
997 }
998
999 /* At this point we assume that the base is aligned. */
1000 gcc_assert (base_aligned_p
1001 || (TREE_CODE (base) == VAR_DECL
1002 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1003
1004 /* Alignment required, in bytes: */
1005 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1006
1007 /* Modulo alignment. */
1008 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1009 if (tree_int_cst_sgn (misalign) < 0)
1010 {
1011 /* Negative misalignment value. */
1012 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1013 fprintf (vect_dump, "unexpected misalign value");
1014 return false;
1015 }
1016
1017 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
1018
1019 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1020 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
1021
1022 return true;
1023}
1024
1025
1026/* Function vect_compute_data_refs_alignment
1027
1028 Compute the misalignment of data references in the loop.
1029 This pass may take place at function granularity instead of at loop
1030 granularity.
1031
1032 FOR NOW: No analysis is actually performed. Misalignment is calculated
1033 only for trivial cases. TODO. */
1034
1035static bool
1036vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1037{
1038 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1039 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1040 unsigned int i;
1041
1042 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1043 {
1044 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1045 if (!vect_compute_data_ref_alignment (dr))
1046 return false;
1047 }
1048
1049 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1050 {
1051 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1052 if (!vect_compute_data_ref_alignment (dr))
1053 return false;
1054 }
1055
1056 return true;
1057}
1058
1059
1060/* Function vect_enhance_data_refs_alignment
1061
1062 This pass will use loop versioning and loop peeling in order to enhance
1063 the alignment of data references in the loop.
1064
1065 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1066 original loop is to be vectorized; Any other loops that are created by
1067 the transformations performed in this pass - are not supposed to be
1068 vectorized. This restriction will be relaxed. */
1069
1070static void
1071vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1072{
1073 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1074 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
5f55a1ba
DN
1075 varray_type datarefs;
1076 struct data_reference *dr0 = NULL;
1077 unsigned int i, j;
f7064d11
DN
1078
1079 /*
1080 This pass will require a cost model to guide it whether to apply peeling
1081 or versioning or a combination of the two. For example, the scheme that
1082 intel uses when given a loop with several memory accesses, is as follows:
1083 choose one memory access ('p') which alignment you want to force by doing
1084 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1085 other accesses are not necessarily aligned, or (2) use loop versioning to
1086 generate one loop in which all accesses are aligned, and another loop in
1087 which only 'p' is necessarily aligned.
1088
1089 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1090 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1091 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1092
1093 Devising a cost model is the most critical aspect of this work. It will
1094 guide us on which access to peel for, whether to use loop versioning, how
1095 many versions to create, etc. The cost model will probably consist of
1096 generic considerations as well as target specific considerations (on
1097 powerpc for example, misaligned stores are more painful than misaligned
1098 loads).
1099
1100 Here is the general steps involved in alignment enhancements:
1101
1102 -- original loop, before alignment analysis:
1103 for (i=0; i<N; i++){
1104 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1105 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1106 }
1107
1108 -- After vect_compute_data_refs_alignment:
1109 for (i=0; i<N; i++){
1110 x = q[i]; # DR_MISALIGNMENT(q) = 3
1111 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1112 }
1113
1114 -- Possibility 1: we do loop versioning:
1115 if (p is aligned) {
1116 for (i=0; i<N; i++){ # loop 1A
1117 x = q[i]; # DR_MISALIGNMENT(q) = 3
1118 p[i] = y; # DR_MISALIGNMENT(p) = 0
1119 }
1120 }
1121 else {
1122 for (i=0; i<N; i++){ # loop 1B
1123 x = q[i]; # DR_MISALIGNMENT(q) = 3
1124 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1125 }
1126 }
1127
1128 -- Possibility 2: we do loop peeling:
1129 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1130 x = q[i];
1131 p[i] = y;
1132 }
1133 for (i = 3; i < N; i++){ # loop 2A
1134 x = q[i]; # DR_MISALIGNMENT(q) = 0
1135 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1136 }
1137
1138 -- Possibility 3: combination of loop peeling and versioning:
1139 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1140 x = q[i];
1141 p[i] = y;
1142 }
1143 if (p is aligned) {
1144 for (i = 3; i<N; i++){ # loop 3A
1145 x = q[i]; # DR_MISALIGNMENT(q) = 0
1146 p[i] = y; # DR_MISALIGNMENT(p) = 0
1147 }
1148 }
1149 else {
1150 for (i = 3; i<N; i++){ # loop 3B
1151 x = q[i]; # DR_MISALIGNMENT(q) = 0
1152 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1153 }
1154 }
1155
1156 These loops are later passed to loop_transform to be vectorized. The
1157 vectorizer will use the alignment information to guide the transformation
1158 (whether to generate regular loads/stores, or with special handling for
1159 misalignment).
1160 */
1161
1162 /* (1) Peeling to force alignment. */
1163
1164 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1165 Considerations:
1166 + How many accesses will become aligned due to the peeling
1167 - How many accesses will become unaligned due to the peeling,
1168 and the cost of misaligned accesses.
1169 - The cost of peeling (the extra runtime checks, the increase
1170 in code size).
1171
1172 The scheme we use FORNOW: peel to force the alignment of the first
1173 misaligned store in the loop.
1174 Rationale: misaligned stores are not yet supported.
1175
1176 TODO: Use a better cost model. */
1177
1178 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1179 {
5f55a1ba
DN
1180 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1181 if (!aligned_access_p (dr0))
1182 {
1183 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1184 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
f7064d11 1185 break;
5f55a1ba 1186 }
f7064d11
DN
1187 }
1188
f7064d11
DN
1189 /* (1.2) Update the alignment info according to the peeling factor.
1190 If the misalignment of the DR we peel for is M, then the
1191 peeling factor is VF - M, and the misalignment of each access DR_i
1192 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1193 If the misalignment of the DR we peel for is unknown, then the
1194 misalignment of each access DR_i in the loop is also unknown.
1195
5f55a1ba
DN
1196 TODO: - consider accesses that are known to have the same
1197 alignment, even if that alignment is unknown. */
f7064d11 1198
5f55a1ba 1199 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
f7064d11 1200 {
5f55a1ba
DN
1201 int mis;
1202 int npeel = 0;
1203
1204 if (known_alignment_for_access_p (dr0))
f7064d11 1205 {
5f55a1ba
DN
1206 /* Since it's known at compile time, compute the number of iterations
1207 in the peeled loop (the peeling factor) for use in updating
1208 DR_MISALIGNMENT values. The peeling factor is the vectorization
1209 factor minus the misalignment as an element count. */
1210 mis = DR_MISALIGNMENT (dr0);
1211 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1212 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
f7064d11 1213 }
5f55a1ba
DN
1214
1215 datarefs = loop_write_datarefs;
1216 for (j = 0; j < 2; j++)
f7064d11 1217 {
5f55a1ba
DN
1218 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1219 {
1220 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1221
1222 if (dr == dr0)
1223 continue;
1224 if (known_alignment_for_access_p (dr)
1225 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1226 DR_MISALIGNMENT (dr) = 0;
1227 else if (known_alignment_for_access_p (dr)
1228 && known_alignment_for_access_p (dr0))
1229 {
1230 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1231
1232 DR_MISALIGNMENT (dr) += npeel * drsize;
c4336539 1233 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
5f55a1ba
DN
1234 }
1235 else
1236 DR_MISALIGNMENT (dr) = -1;
1237 }
1238 datarefs = loop_read_datarefs;
f7064d11 1239 }
5f55a1ba
DN
1240
1241 DR_MISALIGNMENT (dr0) = 0;
f7064d11
DN
1242 }
1243}
1244
1245
1246/* Function vect_analyze_data_refs_alignment
1247
1248 Analyze the alignment of the data-references in the loop.
1249 FOR NOW: Until support for misliagned accesses is in place, only if all
1250 accesses are aligned can the loop be vectorized. This restriction will be
1251 relaxed. */
1252
1253static bool
1254vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1255{
1256 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1257 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1258 enum dr_alignment_support supportable_dr_alignment;
1259 unsigned int i;
1260
1261 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1262 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1263
1264
1265 /* This pass may take place at function granularity instead of at loop
1266 granularity. */
1267
1268 if (!vect_compute_data_refs_alignment (loop_vinfo))
1269 {
1270 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1271 LOOP_LOC (loop_vinfo)))
1272 fprintf (vect_dump,
1273 "not vectorized: can't calculate alignment for data ref.");
1274 return false;
1275 }
1276
1277
1278 /* This pass will decide on using loop versioning and/or loop peeling in
1279 order to enhance the alignment of data references in the loop. */
1280
1281 vect_enhance_data_refs_alignment (loop_vinfo);
1282
1283
1284 /* Finally, check that all the data references in the loop can be
1285 handled with respect to their alignment. */
1286
1287 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1288 {
1289 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1290 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1291 if (!supportable_dr_alignment)
1292 {
1293 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1294 LOOP_LOC (loop_vinfo)))
1295 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1296 return false;
1297 }
1298 if (supportable_dr_alignment != dr_aligned
1299 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1300 fprintf (vect_dump, "Vectorizing an unaligned access.");
1301 }
1302 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1303 {
1304 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1305 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1306 if (!supportable_dr_alignment)
1307 {
1308 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1309 LOOP_LOC (loop_vinfo)))
1310 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1311 return false;
1312 }
1313 if (supportable_dr_alignment != dr_aligned
1314 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1315 fprintf (vect_dump, "Vectorizing an unaligned access.");
1316 }
0e6b0daf
DN
1317 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1318 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1319 fprintf (vect_dump, "Alignment of access forced using peeling.");
f7064d11
DN
1320
1321 return true;
1322}
1323
1324
1325/* Function vect_analyze_data_ref_access.
1326
1327 Analyze the access pattern of the data-reference DR. For now, a data access
1328 has to consecutive to be considered vectorizable. */
1329
1330static bool
1331vect_analyze_data_ref_access (struct data_reference *dr)
1332{
1333 tree stmt = DR_STMT (dr);
1334 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1335 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1336 tree scalar_type = TREE_TYPE (DR_REF (dr));
1337
1338 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1339 {
1340 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1341 fprintf (vect_dump, "not consecutive access");
1342 return false;
1343 }
1344 return true;
1345}
1346
1347
1348/* Function vect_analyze_data_ref_accesses.
1349
1350 Analyze the access pattern of all the data references in the loop.
1351
1352 FORNOW: the only access pattern that is considered vectorizable is a
1353 simple step 1 (consecutive) access.
1354
1355 FORNOW: handle only arrays and pointer accesses. */
1356
1357static bool
1358vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1359{
1360 unsigned int i;
1361 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1362 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1363
1364 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1365 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1366
1367 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1368 {
1369 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1370 bool ok = vect_analyze_data_ref_access (dr);
1371 if (!ok)
1372 {
1373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1374 LOOP_LOC (loop_vinfo)))
1375 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1376 return false;
1377 }
1378 }
1379
1380 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1381 {
1382 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1383 bool ok = vect_analyze_data_ref_access (dr);
1384 if (!ok)
1385 {
1386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1387 LOOP_LOC (loop_vinfo)))
1388 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1389 return false;
1390 }
1391 }
1392
1393 return true;
1394}
1395
1396
1397/* Function vect_analyze_pointer_ref_access.
1398
1399 Input:
1400 STMT - a stmt that contains a data-ref.
1401 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1402 ACCESS_FN - the access function of MEMREF.
1403
1404 Output:
1405 If the data-ref access is vectorizable, return a data_reference structure
1406 that represents it (DR). Otherwise - return NULL.
1407 STEP - the stride of MEMREF in the loop.
1408 INIT - the initial condition of MEMREF in the loop.
1409*/
1410
1411static struct data_reference *
1412vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1413 tree access_fn, tree *ptr_init, tree *ptr_step)
1414{
1415 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1416 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1417 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1418 tree step, init;
1419 tree reftype, innertype;
1420 tree indx_access_fn;
1421 int loopnum = loop->num;
1422 struct data_reference *dr;
1423
1424 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1425 {
1426 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1427 LOOP_LOC (loop_vinfo)))
1428 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1429 return NULL;
1430 }
1431
1432 STRIP_NOPS (init);
1433
1434 if (!expr_invariant_in_loop_p (loop, init))
1435 {
1436 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1437 LOOP_LOC (loop_vinfo)))
1438 fprintf (vect_dump,
1439 "not vectorized: initial condition is not loop invariant.");
1440 return NULL;
1441 }
1442
1443 if (TREE_CODE (step) != INTEGER_CST)
1444 {
1445 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1446 LOOP_LOC (loop_vinfo)))
1447 fprintf (vect_dump,
1448 "not vectorized: non constant step for pointer access.");
1449 return NULL;
1450 }
1451
1452 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
2427fa80 1453 if (!POINTER_TYPE_P (reftype))
f7064d11
DN
1454 {
1455 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1456 LOOP_LOC (loop_vinfo)))
1457 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1458 return NULL;
1459 }
1460
d6efd7d6 1461 if (!POINTER_TYPE_P (TREE_TYPE (init)))
f7064d11
DN
1462 {
1463 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1464 LOOP_LOC (loop_vinfo)))
1465 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1466 return NULL;
1467 }
1468
1469 *ptr_step = fold_convert (ssizetype, step);
1470 innertype = TREE_TYPE (reftype);
d6efd7d6
DN
1471 if (!COMPLETE_TYPE_P (innertype))
1472 {
1473 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1474 LOOP_LOC (loop_vinfo)))
1475 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1476 return NULL;
1477 }
1478
f7064d11
DN
1479 /* Check that STEP is a multiple of type size. */
1480 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1481 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1482 {
1483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1484 LOOP_LOC (loop_vinfo)))
1485 fprintf (vect_dump, "not vectorized: non consecutive access.");
1486 return NULL;
1487 }
1488
1489 indx_access_fn =
1490 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1491 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1492 {
1493 fprintf (vect_dump, "Access function of ptr indx: ");
1494 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1495 }
1496 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1497 *ptr_init = init;
1498 return dr;
1499}
1500
1501
f7064d11
DN
1502/* Function vect_address_analysis
1503
1504 Return the BASE of the address expression EXPR.
1505 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1506
1507 Input:
1508 EXPR - the address expression that is being analyzed
1509 STMT - the statement that contains EXPR or its original memory reference
1510 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1511 VECTYPE - the type that defines the alignment (i.e, we compute
1512 alignment relative to TYPE_ALIGN(VECTYPE))
1513 DR - data_reference struct for the original memory reference
1514
1515 Output:
1516 BASE (returned value) - the base of the data reference EXPR.
1517 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1518 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1519 computation is impossible
1520 STEP - evolution of EXPR in the loop
1521 BASE_ALIGNED - indicates if BASE is aligned
1522
1523 If something unexpected is encountered (an unsupported form of data-ref),
1524 then NULL_TREE is returned.
1525 */
1526
1527static tree
1528vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1529 struct data_reference *dr, tree *offset, tree *misalign,
1530 tree *step, bool *base_aligned)
1531{
1532 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1533 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
2427fa80 1534 tree dummy;
8bb46326 1535 struct ptr_info_def *dummy1;
c75ab022 1536 subvar_t dummy2;
f7064d11
DN
1537
1538 switch (TREE_CODE (expr))
1539 {
1540 case PLUS_EXPR:
1541 case MINUS_EXPR:
1542 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1543 oprnd0 = TREE_OPERAND (expr, 0);
1544 oprnd1 = TREE_OPERAND (expr, 1);
1545
1546 STRIP_NOPS (oprnd0);
1547 STRIP_NOPS (oprnd1);
1548
1549 /* Recursively try to find the base of the address contained in EXPR.
1550 For offset, the returned base will be NULL. */
1551 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1552 &address_offset, &address_misalign, step,
1553 base_aligned);
1554
1555 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1556 &address_offset, &address_misalign, step,
1557 base_aligned);
1558
1559 /* We support cases where only one of the operands contains an
1560 address. */
1561 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1562 return NULL_TREE;
1563
1564 /* To revert STRIP_NOPS. */
1565 oprnd0 = TREE_OPERAND (expr, 0);
1566 oprnd1 = TREE_OPERAND (expr, 1);
1567
1568 offset_expr = base_addr0 ?
1569 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1570
1571 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1572 a number, we can add it to the misalignment value calculated for base,
1573 otherwise, misalignment is NULL. */
1574 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1575 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1576 offset_expr);
1577 else
1578 *misalign = NULL_TREE;
1579
1580 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1581 for base. */
1582 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1583 return base_addr0 ? base_addr0 : base_addr1;
1584
1585 case ADDR_EXPR:
c75ab022
DB
1586 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1587 is_read, vectype, &dr, offset,
1588 misalign, step, base_aligned,
8bb46326 1589 &dummy, &dummy1, &dummy2);
f7064d11
DN
1590 return base_address;
1591
1592 case SSA_NAME:
2427fa80 1593 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
f7064d11 1594 return NULL_TREE;
2427fa80 1595
f7064d11
DN
1596 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1597 {
1598 if (vect_get_ptr_offset (expr, vectype, misalign))
1599 *base_aligned = true;
1600 else
1601 *base_aligned = false;
1602 }
1603 else
1604 {
1605 *base_aligned = true;
1606 *misalign = ssize_int (0);
1607 }
1608 *offset = ssize_int (0);
1609 *step = ssize_int (0);
1610 return expr;
1611
1612 default:
1613 return NULL_TREE;
1614 }
1615}
1616
1617
1618/* Function vect_object_analysis
1619
1620 Return the BASE of the data reference MEMREF.
1621 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1622 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1623 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1624 instantiated with initial_conditions of access_functions of variables,
1625 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1626
1627 Function get_inner_reference is used for the above in case of ARRAY_REF and
1628 COMPONENT_REF.
1629
1630 The structure of the function is as follows:
1631 Part 1:
1632 Case 1. For handled_component_p refs
1633 1.1 call get_inner_reference
1634 1.1.1 analyze offset expr received from get_inner_reference
1635 1.2. build data-reference structure for MEMREF
1636 (fall through with BASE)
1637 Case 2. For declarations
1638 2.1 check alignment
1639 2.2 update DR_BASE_NAME if necessary for alias
1640 Case 3. For INDIRECT_REFs
1641 3.1 get the access function
1642 3.2 analyze evolution of MEMREF
1643 3.3 set data-reference structure for MEMREF
1644 3.4 call vect_address_analysis to analyze INIT of the access function
1645
1646 Part 2:
1647 Combine the results of object and address analysis to calculate
1648 INITIAL_OFFSET, STEP and misalignment info.
1649
1650 Input:
1651 MEMREF - the memory reference that is being analyzed
1652 STMT - the statement that contains MEMREF
1653 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1654 VECTYPE - the type that defines the alignment (i.e, we compute
1655 alignment relative to TYPE_ALIGN(VECTYPE))
1656
1657 Output:
1658 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1659 E.g, if MEMREF is a.b[k].c[i][j] the returned
1660 base is &a.
1661 DR - data_reference struct for MEMREF
1662 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1663 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1664 the computation is impossible
1665 STEP - evolution of the DR_REF in the loop
1666 BASE_ALIGNED - indicates if BASE is aligned
2427fa80 1667 MEMTAG - memory tag for aliasing purposes
8bb46326 1668 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
c75ab022 1669 SUBVAR - Sub-variables of the variable
f7064d11
DN
1670
1671 If something unexpected is encountered (an unsupported form of data-ref),
1672 then NULL_TREE is returned. */
1673
1674static tree
1675vect_object_analysis (tree memref, tree stmt, bool is_read,
1676 tree vectype, struct data_reference **dr,
1677 tree *offset, tree *misalign, tree *step,
c75ab022 1678 bool *base_aligned, tree *memtag,
8bb46326 1679 struct ptr_info_def **ptr_info, subvar_t *subvars)
f7064d11
DN
1680{
1681 tree base = NULL_TREE, base_address = NULL_TREE;
1682 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1683 tree object_step = ssize_int (0), address_step = ssize_int (0);
1684 bool object_base_aligned = true, address_base_aligned = true;
1685 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1686 HOST_WIDE_INT pbitsize, pbitpos;
1687 tree poffset, bit_pos_in_bytes;
1688 enum machine_mode pmode;
1689 int punsignedp, pvolatilep;
1690 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1691 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1692 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1693 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1694 struct data_reference *ptr_dr = NULL;
1695 tree access_fn, evolution_part, address_to_analyze;
8bb46326
DN
1696
1697 *ptr_info = NULL;
f7064d11
DN
1698
1699 /* Part 1: */
1700 /* Case 1. handled_component_p refs. */
1701 if (handled_component_p (memref))
1702 {
1703 /* 1.1 call get_inner_reference. */
1704 /* Find the base and the offset from it. */
1705 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1706 &pmode, &punsignedp, &pvolatilep, false);
1707 if (!base)
1708 return NULL_TREE;
1709
1710 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1711 if (poffset
1712 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1713 &object_offset, &object_misalign, &object_step))
1714 {
1715 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1716 {
1717 fprintf (vect_dump, "failed to compute offset or step for ");
1718 print_generic_expr (vect_dump, memref, TDF_SLIM);
1719 }
1720 return NULL_TREE;
1721 }
1722
1723 /* Add bit position to OFFSET and MISALIGN. */
1724
1725 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1726 /* Check that there is no remainder in bits. */
1727 if (pbitpos%BITS_PER_UNIT)
1728 {
1729 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1730 fprintf (vect_dump, "bit offset alignment.");
1731 return NULL_TREE;
1732 }
1733 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1734 if (object_misalign)
1735 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1736 bit_pos_in_bytes);
1737
1738 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1739 if (!(*dr))
1740 {
1741 if (TREE_CODE (memref) == ARRAY_REF)
1742 *dr = analyze_array (stmt, memref, is_read);
1743 else
1744 /* FORNOW. */
1745 return NULL_TREE;
1746 }
1747 memref = base; /* To continue analysis of BASE. */
1748 /* fall through */
1749 }
1750
1751 /* Part 1: Case 2. Declarations. */
1752 if (DECL_P (memref))
1753 {
1754 /* We expect to get a decl only if we already have a DR. */
1755 if (!(*dr))
1756 {
1757 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1758 {
1759 fprintf (vect_dump, "unhandled decl ");
1760 print_generic_expr (vect_dump, memref, TDF_SLIM);
1761 }
1762 return NULL_TREE;
1763 }
1764
1765 /* 2.1 check the alignment. */
1766 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1767 object_base_aligned = true;
1768 else
1769 object_base_aligned = false;
1770
1771 /* 2.2 update DR_BASE_NAME if necessary. */
1772 if (!DR_BASE_NAME ((*dr)))
1773 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1774 us to object. */
1775 DR_BASE_NAME ((*dr)) = memref;
1776
c75ab022
DB
1777 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1778 *subvars = get_subvars_for_var (memref);
f7064d11 1779 base_address = build_fold_addr_expr (memref);
2427fa80 1780 *memtag = memref;
f7064d11
DN
1781 }
1782
1783 /* Part 1: Case 3. INDIRECT_REFs. */
1784 else if (TREE_CODE (memref) == INDIRECT_REF)
8bb46326
DN
1785 {
1786 tree ptr_ref = TREE_OPERAND (memref, 0);
1787 if (TREE_CODE (ptr_ref) == SSA_NAME)
1788 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1789
f7064d11 1790 /* 3.1 get the access function. */
8bb46326 1791 access_fn = analyze_scalar_evolution (loop, ptr_ref);
f7064d11
DN
1792 if (!access_fn)
1793 {
1794 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1795 LOOP_LOC (loop_vinfo)))
1796 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1797 return NULL_TREE;
1798 }
1799 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1800 {
1801 fprintf (vect_dump, "Access function of ptr: ");
1802 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1803 }
1804
1805 /* 3.2 analyze evolution of MEMREF. */
1806 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1807 if (evolution_part)
1808 {
1809 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1810 access_fn, &ptr_init, &ptr_step);
1811 if (!(ptr_dr))
1812 return NULL_TREE;
1813
1814 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1815 address_to_analyze = ptr_init;
1816 }
1817 else
1818 {
1819 if (!(*dr))
1820 {
1821 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1822 LOOP_LOC (loop_vinfo)))
1823 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1824 return NULL_TREE;
1825 }
6cd3dd5b
IR
1826 /* Since there exists DR for MEMREF, we are analyzing the init of
1827 the access function, which not necessary has evolution in the
f7064d11 1828 loop. */
6cd3dd5b
IR
1829 address_to_analyze = initial_condition_in_loop_num (access_fn,
1830 loop->num);
f7064d11
DN
1831 }
1832
1833 /* 3.3 set data-reference structure for MEMREF. */
1834 *dr = (*dr) ? *dr : ptr_dr;
1835
1836 /* 3.4 call vect_address_analysis to analyze INIT of the access
1837 function. */
1838 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1839 vectype, *dr, &address_offset, &address_misalign,
1840 &address_step, &address_base_aligned);
2427fa80
IR
1841 if (!base_address)
1842 return NULL_TREE;
1843
1844 switch (TREE_CODE (base_address))
1845 {
1846 case SSA_NAME:
1847 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1848 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1849 *memtag = get_var_ann (
1850 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1851 break;
1852 case ADDR_EXPR:
1853 *memtag = TREE_OPERAND (base_address, 0);
1854 break;
1855 default:
1856 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1857 LOOP_LOC (loop_vinfo)))
1858 {
1859 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1860 print_generic_expr (vect_dump, memref, TDF_SLIM);
1861 }
1862 return NULL_TREE;
1863 }
f7064d11
DN
1864 }
1865
1866 if (!base_address)
1867 /* MEMREF cannot be analyzed. */
1868 return NULL_TREE;
1869
c75ab022
DB
1870 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1871 *subvars = get_subvars_for_var (*memtag);
1872
f7064d11 1873 /* Part 2: Combine the results of object and address analysis to calculate
6c6cfbfd 1874 INITIAL_OFFSET, STEP and misalignment info. */
f7064d11
DN
1875 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1876 if (object_misalign && address_misalign)
1877 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1878 else
1879 *misalign = NULL_TREE;
1880 *step = size_binop (PLUS_EXPR, object_step, address_step);
1881 *base_aligned = object_base_aligned && address_base_aligned;
1882
1883 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1884 {
1885 fprintf (vect_dump, "Results of object analysis for: ");
1886 print_generic_expr (vect_dump, memref, TDF_SLIM);
2427fa80
IR
1887 fprintf (vect_dump, "\n\tbase_address: ");
1888 print_generic_expr (vect_dump, base_address, TDF_SLIM);
f7064d11
DN
1889 fprintf (vect_dump, "\n\toffset: ");
1890 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1891 fprintf (vect_dump, "\n\tstep: ");
1892 print_generic_expr (vect_dump, *step, TDF_SLIM);
1893 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1894 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1895 }
1896 return base_address;
1897}
1898
1899
1900/* Function vect_analyze_data_refs.
1901
1902 Find all the data references in the loop.
1903
1904 The general structure of the analysis of data refs in the vectorizer is as
1905 follows:
1906 1- vect_analyze_data_refs(loop):
1907 Find and analyze all data-refs in the loop:
1908 foreach ref
1909 base_address = vect_object_analysis(ref)
f7064d11
DN
1910 1.1- vect_object_analysis(ref):
1911 Analyze ref, and build a DR (data_referece struct) for it;
1912 compute base, initial_offset, step and alignment.
1913 Call get_inner_reference for refs handled in this function.
1914 Call vect_addr_analysis(addr) to analyze pointer type expressions.
2427fa80 1915 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
8bb46326 1916 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
f7064d11
DN
1917 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1918 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1919 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1920
1921 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1922 which base is really an array (not a pointer) and which alignment
1923 can be forced. This restriction will be relaxed. */
1924
1925static bool
1926vect_analyze_data_refs (loop_vec_info loop_vinfo)
1927{
1928 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1929 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1930 int nbbs = loop->num_nodes;
1931 block_stmt_iterator si;
1932 int j;
1933 struct data_reference *dr;
1934
1935 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1936 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1937
1938 for (j = 0; j < nbbs; j++)
1939 {
1940 basic_block bb = bbs[j];
1941 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1942 {
1943 bool is_read = false;
1944 tree stmt = bsi_stmt (si);
1945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1946 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1947 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1948 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1949 varray_type *datarefs = NULL;
1950 int nvuses, nv_may_defs, nv_must_defs;
1951 tree memref = NULL;
1952 tree scalar_type, vectype;
1953 tree base, offset, misalign, step, tag;
8bb46326 1954 struct ptr_info_def *ptr_info;
f7064d11 1955 bool base_aligned;
e8b19a77 1956 subvar_t subvars = NULL;
f7064d11
DN
1957
1958 /* Assumption: there exists a data-ref in stmt, if and only if
1959 it has vuses/vdefs. */
1960
1961 if (!vuses && !v_may_defs && !v_must_defs)
1962 continue;
1963
1964 nvuses = NUM_VUSES (vuses);
1965 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1966 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1967
1968 if (nvuses && (nv_may_defs || nv_must_defs))
1969 {
1970 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1971 {
1972 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1973 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1974 }
1975 return false;
1976 }
1977
1978 if (TREE_CODE (stmt) != MODIFY_EXPR)
1979 {
1980 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1981 {
1982 fprintf (vect_dump, "unexpected vops in stmt: ");
1983 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1984 }
1985 return false;
1986 }
1987
1988 if (vuses)
1989 {
1990 memref = TREE_OPERAND (stmt, 1);
1991 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1992 is_read = true;
1993 }
1994 else /* vdefs */
1995 {
1996 memref = TREE_OPERAND (stmt, 0);
1997 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1998 is_read = false;
1999 }
2000
2001 scalar_type = TREE_TYPE (memref);
2002 vectype = get_vectype_for_scalar_type (scalar_type);
2003 if (!vectype)
2004 {
2005 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2006 {
2007 fprintf (vect_dump, "no vectype for stmt: ");
2008 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2009 fprintf (vect_dump, " scalar_type: ");
2010 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2011 }
2012 /* It is not possible to vectorize this data reference. */
2013 return false;
2014 }
2015 /* Analyze MEMREF. If it is of a supported form, build data_reference
2016 struct for it (DR). */
2017 dr = NULL;
2018 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
2019 &offset, &misalign, &step,
8bb46326
DN
2020 &base_aligned, &tag, &ptr_info,
2021 &subvars);
f7064d11
DN
2022 if (!base)
2023 {
2024 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2025 LOOP_LOC (loop_vinfo)))
2026 {
2027 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
2028 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2029 }
2030 return false;
2031 }
f7064d11
DN
2032 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
2033 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2034 STMT_VINFO_VECT_STEP (stmt_info) = step;
2035 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
2036 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
2037 STMT_VINFO_MEMTAG (stmt_info) = tag;
8bb46326 2038 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
c75ab022 2039 STMT_VINFO_SUBVARS (stmt_info) = subvars;
f7064d11
DN
2040 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2041 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2042 STMT_VINFO_DATA_REF (stmt_info) = dr;
2043 }
2044 }
2045
2046 return true;
2047}
2048
2049
2050/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2051
2052/* Function vect_mark_relevant.
2053
2054 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2055
2056static void
51d00891 2057vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt)
f7064d11
DN
2058{
2059 stmt_vec_info stmt_info;
2060
2061 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2062 fprintf (vect_dump, "mark relevant.");
2063
2064 if (TREE_CODE (stmt) == PHI_NODE)
2065 {
51d00891 2066 VEC_safe_push (tree, heap, *worklist, stmt);
f7064d11
DN
2067 return;
2068 }
2069
2070 stmt_info = vinfo_for_stmt (stmt);
2071
2072 if (!stmt_info)
2073 {
2074 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2075 {
2076 fprintf (vect_dump, "mark relevant: no stmt info!!.");
2077 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2078 }
2079 return;
2080 }
2081
2082 if (STMT_VINFO_RELEVANT_P (stmt_info))
2083 {
2084 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2085 fprintf (vect_dump, "already marked relevant.");
2086 return;
2087 }
2088
2089 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
51d00891 2090 VEC_safe_push (tree, heap, *worklist, stmt);
f7064d11
DN
2091}
2092
2093
2094/* Function vect_stmt_relevant_p.
2095
2096 Return true if STMT in loop that is represented by LOOP_VINFO is
2097 "relevant for vectorization".
2098
2099 A stmt is considered "relevant for vectorization" if:
2100 - it has uses outside the loop.
2101 - it has vdefs (it alters memory).
2102 - control stmts in the loop (except for the exit condition).
2103
2104 CHECKME: what other side effects would the vectorizer allow? */
2105
2106static bool
2107vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2108{
2109 v_may_def_optype v_may_defs;
2110 v_must_def_optype v_must_defs;
2111 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
f430bae8
AM
2112 ssa_op_iter op_iter;
2113 imm_use_iterator imm_iter;
2114 use_operand_p use_p;
2115 tree var;
f7064d11
DN
2116
2117 /* cond stmt other than loop exit cond. */
2118 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2119 return true;
2120
2121 /* changing memory. */
b0f81966 2122 if (TREE_CODE (stmt) == PHI_NODE)
f7064d11 2123 {
b0f81966
AM
2124 if (!is_gimple_reg (PHI_RESULT (stmt)))
2125 return false;
2126 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (stmt))
f7064d11 2127 {
b0f81966
AM
2128 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2129 if (!flow_bb_inside_loop_p (loop, bb))
2130 {
2131 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2132 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2133 return true;
2134 }
f7064d11 2135 }
b0f81966
AM
2136 return false;
2137 }
2138
2139 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2140 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2141 if (v_may_defs || v_must_defs)
2142 {
2143 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2144 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2145 return true;
f7064d11
DN
2146 }
2147
2148 /* uses outside the loop. */
f430bae8 2149 FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_DEF)
f7064d11 2150 {
f430bae8 2151 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
f7064d11 2152 {
f430bae8
AM
2153 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2154 if (!flow_bb_inside_loop_p (loop, bb))
2155 {
2156 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2157 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2158 return true;
2159 }
f7064d11
DN
2160 }
2161 }
2162
2163 return false;
2164}
2165
2166
2167/* Function vect_mark_stmts_to_be_vectorized.
2168
2169 Not all stmts in the loop need to be vectorized. For example:
2170
2171 for i...
2172 for j...
2173 1. T0 = i + j
2174 2. T1 = a[T0]
2175
2176 3. j = j + 1
2177
2178 Stmt 1 and 3 do not need to be vectorized, because loop control and
2179 addressing of vectorized data-refs are handled differently.
2180
2181 This pass detects such stmts. */
2182
2183static bool
2184vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2185{
51d00891 2186 VEC(tree,heap) *worklist;
f7064d11
DN
2187 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2188 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2189 unsigned int nbbs = loop->num_nodes;
2190 block_stmt_iterator si;
2191 tree stmt;
2192 stmt_ann_t ann;
2193 unsigned int i;
2194 int j;
2195 use_optype use_ops;
2196 stmt_vec_info stmt_info;
2197 basic_block bb;
2198 tree phi;
2199
2200 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2201 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2202
2203 bb = loop->header;
2204 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2205 {
2206 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2207 {
2208 fprintf (vect_dump, "init: phi relevant? ");
2209 print_generic_expr (vect_dump, phi, TDF_SLIM);
2210 }
2211
2212 if (vect_stmt_relevant_p (phi, loop_vinfo))
2213 {
2214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2215 LOOP_LOC (loop_vinfo)))
2216 fprintf (vect_dump, "unsupported reduction/induction.");
2217 return false;
2218 }
2219 }
2220
51d00891 2221 worklist = VEC_alloc (tree, heap, 64);
f7064d11
DN
2222
2223 /* 1. Init worklist. */
2224
2225 for (i = 0; i < nbbs; i++)
2226 {
2227 bb = bbs[i];
2228 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2229 {
2230 stmt = bsi_stmt (si);
2231
2232 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2233 {
2234 fprintf (vect_dump, "init: stmt relevant? ");
2235 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2236 }
2237
2238 stmt_info = vinfo_for_stmt (stmt);
2239 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2240
2241 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2242 vect_mark_relevant (&worklist, stmt);
2243 }
2244 }
2245
2246
2247 /* 2. Process_worklist */
2248
51d00891 2249 while (VEC_length (tree, worklist) > 0)
f7064d11 2250 {
51d00891 2251 stmt = VEC_pop (tree, worklist);
f7064d11
DN
2252
2253 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2254 {
2255 fprintf (vect_dump, "worklist: examine stmt: ");
2256 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2257 }
2258
2259 /* Examine the USES in this statement. Mark all the statements which
2260 feed this statement's uses as "relevant", unless the USE is used as
2261 an array index. */
2262
2263 if (TREE_CODE (stmt) == PHI_NODE)
2264 {
2265 /* follow the def-use chain inside the loop. */
2266 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2267 {
2268 tree arg = PHI_ARG_DEF (stmt, j);
2269 tree def_stmt = NULL_TREE;
2270 basic_block bb;
2271 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2272 {
2273 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2274 LOOP_LOC (loop_vinfo)))
2275 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
51d00891 2276 VEC_free (tree, heap, worklist);
f7064d11
DN
2277 return false;
2278 }
2279 if (!def_stmt)
2280 continue;
2281
2282 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2283 {
2284 fprintf (vect_dump, "worklist: def_stmt: ");
2285 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2286 }
2287
2288 bb = bb_for_stmt (def_stmt);
2289 if (flow_bb_inside_loop_p (loop, bb))
2290 vect_mark_relevant (&worklist, def_stmt);
2291 }
2292 }
2293
2294 ann = stmt_ann (stmt);
2295 use_ops = USE_OPS (ann);
2296
2297 for (i = 0; i < NUM_USES (use_ops); i++)
2298 {
2299 tree use = USE_OP (use_ops, i);
2300
2301 /* We are only interested in uses that need to be vectorized. Uses
2302 that are used for address computation are not considered relevant.
2303 */
2304 if (exist_non_indexing_operands_for_use_p (use, stmt))
2305 {
2306 tree def_stmt = NULL_TREE;
2307 basic_block bb;
2308 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2309 {
2310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2311 LOOP_LOC (loop_vinfo)))
2312 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
51d00891 2313 VEC_free (tree, heap, worklist);
f7064d11
DN
2314 return false;
2315 }
2316
2317 if (!def_stmt)
2318 continue;
2319
2320 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2321 {
2322 fprintf (vect_dump, "worklist: examine use %d: ", i);
2323 print_generic_expr (vect_dump, use, TDF_SLIM);
2324 }
2325
2326 bb = bb_for_stmt (def_stmt);
2327 if (flow_bb_inside_loop_p (loop, bb))
2328 vect_mark_relevant (&worklist, def_stmt);
2329 }
2330 }
2331 } /* while worklist */
2332
51d00891 2333 VEC_free (tree, heap, worklist);
f7064d11
DN
2334 return true;
2335}
2336
2337
2338/* Function vect_can_advance_ivs_p
2339
2340 In case the number of iterations that LOOP iterates in unknown at compile
2341 time, an epilog loop will be generated, and the loop induction variables
2342 (IVs) will be "advanced" to the value they are supposed to take just before
2343 the epilog loop. Here we check that the access function of the loop IVs
2344 and the expression that represents the loop bound are simple enough.
2345 These restrictions will be relaxed in the future. */
2346
2347static bool
2348vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2349{
2350 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2351 basic_block bb = loop->header;
2352 tree phi;
2353
2354 /* Analyze phi functions of the loop header. */
2355
2356 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2357 {
2358 tree access_fn = NULL;
2359 tree evolution_part;
2360
2361 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2362 {
2363 fprintf (vect_dump, "Analyze phi: ");
2364 print_generic_expr (vect_dump, phi, TDF_SLIM);
2365 }
2366
2367 /* Skip virtual phi's. The data dependences that are associated with
2368 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2369
2370 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2371 {
2372 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2373 fprintf (vect_dump, "virtual phi. skip.");
2374 continue;
2375 }
2376
2377 /* Analyze the evolution function. */
2378
2379 access_fn = instantiate_parameters
2380 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2381
2382 if (!access_fn)
2383 {
2384 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2385 fprintf (vect_dump, "No Access function.");
2386 return false;
2387 }
2388
2389 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2390 {
2391 fprintf (vect_dump, "Access function of PHI: ");
2392 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2393 }
2394
2395 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2396
2397 if (evolution_part == NULL_TREE)
2398 return false;
2399
2400 /* FORNOW: We do not transform initial conditions of IVs
2401 which evolution functions are a polynomial of degree >= 2. */
2402
2403 if (tree_is_chrec (evolution_part))
2404 return false;
2405 }
2406
2407 return true;
2408}
2409
2410
2411/* Function vect_get_loop_niters.
2412
2413 Determine how many iterations the loop is executed.
2414 If an expression that represents the number of iterations
2415 can be constructed, place it in NUMBER_OF_ITERATIONS.
2416 Return the loop exit condition. */
2417
2418static tree
2419vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2420{
2421 tree niters;
2422
2423 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2424 fprintf (vect_dump, "=== get_loop_niters ===");
2425
2426 niters = number_of_iterations_in_loop (loop);
2427
2428 if (niters != NULL_TREE
2429 && niters != chrec_dont_know)
2430 {
2431 *number_of_iterations = niters;
2432
2433 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2434 {
2435 fprintf (vect_dump, "==> get_loop_niters:" );
2436 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2437 }
2438 }
2439
2440 return get_loop_exit_condition (loop);
2441}
2442
2443
2444/* Function vect_analyze_loop_form.
2445
2446 Verify the following restrictions (some may be relaxed in the future):
2447 - it's an inner-most loop
2448 - number of BBs = 2 (which are the loop header and the latch)
2449 - the loop has a pre-header
2450 - the loop has a single entry and exit
2451 - the loop exit condition is simple enough, and the number of iterations
2452 can be analyzed (a countable loop). */
2453
2454static loop_vec_info
2455vect_analyze_loop_form (struct loop *loop)
2456{
2457 loop_vec_info loop_vinfo;
2458 tree loop_cond;
2459 tree number_of_iterations = NULL;
f7064d11
DN
2460 LOC loop_loc;
2461
2462 loop_loc = find_loop_location (loop);
2463
2464 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2465 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2466
2467 if (loop->inner)
2468 {
2469 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2470 fprintf (vect_dump, "not vectorized: nested loop.");
2471 return NULL;
2472 }
2473
2474 if (!loop->single_exit
2475 || loop->num_nodes != 2
70388d94 2476 || EDGE_COUNT (loop->header->preds) != 2)
f7064d11
DN
2477 {
2478 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2479 {
2480 if (!loop->single_exit)
2481 fprintf (vect_dump, "not vectorized: multiple exits.");
2482 else if (loop->num_nodes != 2)
2483 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2484 else if (EDGE_COUNT (loop->header->preds) != 2)
2485 fprintf (vect_dump, "not vectorized: too many incoming edges.");
f7064d11
DN
2486 }
2487
2488 return NULL;
2489 }
2490
2491 /* We assume that the loop exit condition is at the end of the loop. i.e,
2492 that the loop is represented as a do-while (with a proper if-guard
2493 before the loop if needed), where the loop header contains all the
2494 executable statements, and the latch is empty. */
2495 if (!empty_block_p (loop->latch))
2496 {
2497 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2498 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2499 return NULL;
2500 }
2501
f7064d11 2502 /* Make sure there exists a single-predecessor exit bb: */
c5cbcccf 2503 if (!single_pred_p (loop->single_exit->dest))
f7064d11 2504 {
ac59a959
DN
2505 edge e = loop->single_exit;
2506 if (!(e->flags & EDGE_ABNORMAL))
2507 {
d401de95 2508 split_loop_exit_edge (e);
ac59a959
DN
2509 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2510 fprintf (vect_dump, "split exit edge.");
ac59a959
DN
2511 }
2512 else
2513 {
2514 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2515 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2516 return NULL;
2517 }
f7064d11 2518 }
f7064d11
DN
2519
2520 if (empty_block_p (loop->header))
2521 {
2522 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2523 fprintf (vect_dump, "not vectorized: empty loop.");
2524 return NULL;
2525 }
2526
2527 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2528 if (!loop_cond)
2529 {
2530 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2531 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2532 return NULL;
2533 }
2534
2535 if (!number_of_iterations)
2536 {
2537 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2538 fprintf (vect_dump,
2539 "not vectorized: number of iterations cannot be computed.");
2540 return NULL;
2541 }
2542
2543 if (chrec_contains_undetermined (number_of_iterations))
2544 {
2545 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2546 fprintf (vect_dump, "Infinite number of iterations.");
2547 return false;
2548 }
2549
2550 loop_vinfo = new_loop_vec_info (loop);
2551 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2552
2553 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2554 {
2555 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2556 {
2557 fprintf (vect_dump, "Symbolic number of iterations is ");
2558 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2559 }
2560 }
2561 else
2562 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2563 {
2564 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2565 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2566 return NULL;
2567 }
2568
2569 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2570 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2571
2572 return loop_vinfo;
2573}
2574
2575
2576/* Function vect_analyze_loop.
2577
2578 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2579 for it. The different analyses will record information in the
2580 loop_vec_info struct. */
2581loop_vec_info
2582vect_analyze_loop (struct loop *loop)
2583{
2584 bool ok;
2585 loop_vec_info loop_vinfo;
2586
2587 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2588 fprintf (vect_dump, "===== analyze_loop_nest =====");
2589
2590 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2591
2592 loop_vinfo = vect_analyze_loop_form (loop);
2593 if (!loop_vinfo)
2594 {
2595 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2596 fprintf (vect_dump, "bad loop form.");
2597 return NULL;
2598 }
2599
2600 /* Find all data references in the loop (which correspond to vdefs/vuses)
2601 and analyze their evolution in the loop.
2602
2603 FORNOW: Handle only simple, array references, which
2604 alignment can be forced, and aligned pointer-references. */
2605
2606 ok = vect_analyze_data_refs (loop_vinfo);
2607 if (!ok)
2608 {
2609 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2610 fprintf (vect_dump, "bad data references.");
2611 destroy_loop_vec_info (loop_vinfo);
2612 return NULL;
2613 }
2614
2615 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2616
2617 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2618 if (!ok)
2619 {
2620 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2621 fprintf (vect_dump, "unexpected pattern.");
2622 destroy_loop_vec_info (loop_vinfo);
2623 return NULL;
2624 }
2625
2626 /* Check that all cross-iteration scalar data-flow cycles are OK.
2627 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2628
2629 ok = vect_analyze_scalar_cycles (loop_vinfo);
2630 if (!ok)
2631 {
2632 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2633 fprintf (vect_dump, "bad scalar cycle.");
2634 destroy_loop_vec_info (loop_vinfo);
2635 return NULL;
2636 }
2637
b52485c6
DP
2638 ok = vect_determine_vectorization_factor (loop_vinfo);
2639 if (!ok)
2640 {
2641 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2642 fprintf (vect_dump, "can't determine vectorization factor.");
2643 destroy_loop_vec_info (loop_vinfo);
2644 return NULL;
2645 }
2646
f7064d11
DN
2647 /* Analyze data dependences between the data-refs in the loop.
2648 FORNOW: fail at the first data dependence that we encounter. */
2649
2650 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2651 if (!ok)
2652 {
2653 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2654 fprintf (vect_dump, "bad data dependence.");
2655 destroy_loop_vec_info (loop_vinfo);
2656 return NULL;
2657 }
2658
2659 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2660 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2661
2662 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2663 if (!ok)
2664 {
2665 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2666 fprintf (vect_dump, "bad data access.");
2667 destroy_loop_vec_info (loop_vinfo);
2668 return NULL;
2669 }
2670
2671 /* Analyze the alignment of the data-refs in the loop.
2672 FORNOW: Only aligned accesses are handled. */
2673
2674 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2675 if (!ok)
2676 {
2677 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2678 fprintf (vect_dump, "bad data alignment.");
2679 destroy_loop_vec_info (loop_vinfo);
2680 return NULL;
2681 }
2682
2683 /* Scan all the operations in the loop and make sure they are
2684 vectorizable. */
2685
2686 ok = vect_analyze_operations (loop_vinfo);
2687 if (!ok)
2688 {
2689 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2690 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2691 destroy_loop_vec_info (loop_vinfo);
2692 return NULL;
2693 }
2694
2695 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2696
2697 return loop_vinfo;
2698}