]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-analyze.c
Fix failure with pragma once where buffer is NULL and buffer_valid is true.
[thirdparty/gcc.git] / gcc / tree-vect-analyze.c
CommitLineData
2e681715 1/* Analysis Utilities for Loop Vectorization.
f7064d11
DN
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
366ccddb
KC
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA. */
f7064d11
DN
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
f7064d11
DN
26#include "ggc.h"
27#include "tree.h"
28#include "basic-block.h"
29#include "diagnostic.h"
30#include "tree-flow.h"
31#include "tree-dump.h"
32#include "timevar.h"
33#include "cfgloop.h"
34#include "expr.h"
35#include "optabs.h"
c12cc930 36#include "params.h"
f7064d11
DN
37#include "tree-chrec.h"
38#include "tree-data-ref.h"
39#include "tree-scalar-evolution.h"
40#include "tree-vectorizer.h"
41
42/* Main analysis functions. */
43static loop_vec_info vect_analyze_loop_form (struct loop *);
44static bool vect_analyze_data_refs (loop_vec_info);
45static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
88088c03 46static void vect_analyze_scalar_cycles (loop_vec_info);
f7064d11
DN
47static bool vect_analyze_data_ref_accesses (loop_vec_info);
48static bool vect_analyze_data_ref_dependences (loop_vec_info);
49static bool vect_analyze_data_refs_alignment (loop_vec_info);
50static bool vect_compute_data_refs_alignment (loop_vec_info);
c12cc930 51static bool vect_enhance_data_refs_alignment (loop_vec_info);
f7064d11 52static bool vect_analyze_operations (loop_vec_info);
5f55a1ba 53static bool vect_determine_vectorization_factor (loop_vec_info);
f7064d11
DN
54
55/* Utility functions for the analyses. */
56static bool exist_non_indexing_operands_for_use_p (tree, tree);
88088c03
DN
57static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
f7064d11
DN
59static tree vect_get_loop_niters (struct loop *, tree *);
60static bool vect_analyze_data_ref_dependence
86a07404
IR
61 (struct data_dependence_relation *, loop_vec_info);
62static bool vect_compute_data_ref_alignment (struct data_reference *);
f7064d11 63static bool vect_analyze_data_ref_access (struct data_reference *);
f7064d11 64static bool vect_can_advance_ivs_p (loop_vec_info);
c12cc930
KB
65static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
67
f7064d11 68
5f55a1ba
DN
69/* Function vect_determine_vectorization_factor
70
71 Determine the vectorization factor (VF). VF is the number of data elements
72 that are operated upon in parallel in a single iteration of the vectorized
73 loop. For example, when vectorizing a loop that operates on 4byte elements,
74 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75 elements can fit in a single vector register.
76
77 We currently support vectorization of loops in which all types operated upon
78 are of the same size. Therefore this function currently sets VF according to
79 the size of the types operated upon, and fails if there are multiple sizes
80 in the loop.
81
82 VF is also the factor by which the loop iterations are strip-mined, e.g.:
83 original loop:
84 for (i=0; i<N; i++){
85 a[i] = b[i] + c[i];
86 }
87
88 vectorized loop:
89 for (i=0; i<N; i+=VF){
90 a[i:VF] = b[i:VF] + c[i:VF];
91 }
92*/
93
94static bool
95vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96{
97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99 int nbbs = loop->num_nodes;
100 block_stmt_iterator si;
101 unsigned int vectorization_factor = 0;
102 int i;
103 tree scalar_type;
104
00518cb1 105 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba
DN
106 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107
108 for (i = 0; i < nbbs; i++)
109 {
110 basic_block bb = bbs[i];
111
112 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113 {
114 tree stmt = bsi_stmt (si);
115 unsigned int nunits;
116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 tree vectype;
118
00518cb1 119 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba
DN
120 {
121 fprintf (vect_dump, "==> examining statement: ");
122 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 }
124
125 gcc_assert (stmt_info);
126 /* skip stmts which do not need to be vectorized. */
88088c03
DN
127 if (!STMT_VINFO_RELEVANT_P (stmt_info)
128 && !STMT_VINFO_LIVE_P (stmt_info))
129 {
00518cb1 130 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
131 fprintf (vect_dump, "skip.");
132 continue;
133 }
5f55a1ba
DN
134
135 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136 {
00518cb1 137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
5f55a1ba
DN
138 {
139 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140 print_generic_expr (vect_dump, stmt, TDF_SLIM);
141 }
142 return false;
143 }
144
145 if (STMT_VINFO_DATA_REF (stmt_info))
146 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
147 else if (TREE_CODE (stmt) == MODIFY_EXPR)
148 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
149 else
150 scalar_type = TREE_TYPE (stmt);
151
00518cb1 152 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba
DN
153 {
154 fprintf (vect_dump, "get vectype for scalar type: ");
155 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
156 }
157
158 vectype = get_vectype_for_scalar_type (scalar_type);
159 if (!vectype)
160 {
00518cb1 161 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
5f55a1ba
DN
162 {
163 fprintf (vect_dump, "not vectorized: unsupported data-type ");
164 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165 }
166 return false;
167 }
00518cb1 168 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba
DN
169 {
170 fprintf (vect_dump, "vectype: ");
171 print_generic_expr (vect_dump, vectype, TDF_SLIM);
172 }
173 STMT_VINFO_VECTYPE (stmt_info) = vectype;
174
57d1677d 175 nunits = TYPE_VECTOR_SUBPARTS (vectype);
00518cb1 176 if (vect_print_dump_info (REPORT_DETAILS))
5f55a1ba
DN
177 fprintf (vect_dump, "nunits = %d", nunits);
178
179 if (vectorization_factor)
180 {
181 /* FORNOW: don't allow mixed units.
182 This restriction will be relaxed in the future. */
183 if (nunits != vectorization_factor)
184 {
00518cb1 185 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
5f55a1ba
DN
186 fprintf (vect_dump, "not vectorized: mixed data-types");
187 return false;
188 }
189 }
190 else
191 vectorization_factor = nunits;
192
5f55a1ba
DN
193 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
194 * vectorization_factor == UNITS_PER_SIMD_WORD);
5f55a1ba
DN
195 }
196 }
197
198 /* TODO: Analyze cost. Decide if worth while to vectorize. */
199
200 if (vectorization_factor <= 1)
201 {
00518cb1 202 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
5f55a1ba
DN
203 fprintf (vect_dump, "not vectorized: unsupported data-type");
204 return false;
205 }
206 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
207
208 return true;
209}
210
211
f7064d11
DN
212/* Function vect_analyze_operations.
213
214 Scan the loop stmts and make sure they are all vectorizable. */
215
216static bool
217vect_analyze_operations (loop_vec_info loop_vinfo)
218{
219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
220 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
221 int nbbs = loop->num_nodes;
222 block_stmt_iterator si;
223 unsigned int vectorization_factor = 0;
224 int i;
225 bool ok;
88088c03
DN
226 tree phi;
227 stmt_vec_info stmt_info;
228 bool need_to_vectorize = false;
f7064d11 229
00518cb1 230 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
231 fprintf (vect_dump, "=== vect_analyze_operations ===");
232
5f55a1ba
DN
233 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
234 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
235
f7064d11
DN
236 for (i = 0; i < nbbs; i++)
237 {
238 basic_block bb = bbs[i];
239
88088c03
DN
240 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
241 {
242 stmt_info = vinfo_for_stmt (phi);
00518cb1 243 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
244 {
245 fprintf (vect_dump, "examining phi: ");
246 print_generic_expr (vect_dump, phi, TDF_SLIM);
247 }
248
249 gcc_assert (stmt_info);
250
251 if (STMT_VINFO_LIVE_P (stmt_info))
252 {
253 /* FORNOW: not yet supported. */
00518cb1 254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
88088c03
DN
255 fprintf (vect_dump, "not vectorized: value used after loop.");
256 return false;
257 }
258
61d3cdbb
DN
259 if (STMT_VINFO_RELEVANT_P (stmt_info))
260 {
261 /* Most likely a reduction-like computation that is used
262 in the loop. */
00518cb1 263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
61d3cdbb
DN
264 fprintf (vect_dump, "not vectorized: unsupported pattern.");
265 return false;
266 }
267 }
88088c03 268
f7064d11
DN
269 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
270 {
271 tree stmt = bsi_stmt (si);
f7064d11 272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
f7064d11 273
00518cb1 274 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
275 {
276 fprintf (vect_dump, "==> examining statement: ");
277 print_generic_expr (vect_dump, stmt, TDF_SLIM);
278 }
279
280 gcc_assert (stmt_info);
281
282 /* skip stmts which do not need to be vectorized.
283 this is expected to include:
284 - the COND_EXPR which is the loop exit condition
285 - any LABEL_EXPRs in the loop
286 - computations that are used only for array indexing or loop
287 control */
288
88088c03
DN
289 if (!STMT_VINFO_RELEVANT_P (stmt_info)
290 && !STMT_VINFO_LIVE_P (stmt_info))
f7064d11 291 {
00518cb1 292 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
293 fprintf (vect_dump, "irrelevant.");
294 continue;
295 }
296
5f55a1ba
DN
297 if (STMT_VINFO_RELEVANT_P (stmt_info))
298 {
299 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
300 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
f7064d11 301
88088c03
DN
302 ok = (vectorizable_operation (stmt, NULL, NULL)
303 || vectorizable_assignment (stmt, NULL, NULL)
304 || vectorizable_load (stmt, NULL, NULL)
305 || vectorizable_store (stmt, NULL, NULL)
306 || vectorizable_condition (stmt, NULL, NULL));
307
308 if (!ok)
309 {
00518cb1 310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
88088c03
DN
311 {
312 fprintf (vect_dump,
313 "not vectorized: relevant stmt not supported: ");
314 print_generic_expr (vect_dump, stmt, TDF_SLIM);
315 }
316 return false;
317 }
318 need_to_vectorize = true;
319 }
f7064d11 320
88088c03 321 if (STMT_VINFO_LIVE_P (stmt_info))
f7064d11 322 {
61d3cdbb
DN
323 ok = vectorizable_reduction (stmt, NULL, NULL);
324
325 if (ok)
326 need_to_vectorize = true;
327 else
328 ok = vectorizable_live_operation (stmt, NULL, NULL);
88088c03
DN
329
330 if (!ok)
f7064d11 331 {
00518cb1 332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
88088c03
DN
333 {
334 fprintf (vect_dump,
335 "not vectorized: live stmt not supported: ");
336 print_generic_expr (vect_dump, stmt, TDF_SLIM);
337 }
338 return false;
f7064d11 339 }
f7064d11 340 }
88088c03
DN
341 } /* stmts in bb */
342 } /* bbs */
f7064d11
DN
343
344 /* TODO: Analyze cost. Decide if worth while to vectorize. */
345
88088c03
DN
346 /* All operations in the loop are either irrelevant (deal with loop
347 control, or dead), or only used outside the loop and can be moved
348 out of the loop (e.g. invariants, inductions). The loop can be
349 optimized away by scalar optimizations. We're better off not
350 touching this loop. */
351 if (!need_to_vectorize)
352 {
00518cb1 353 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
354 fprintf (vect_dump,
355 "All the computation can be taken out of the loop.");
00518cb1 356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
88088c03
DN
357 fprintf (vect_dump,
358 "not vectorized: redundant loop. no profit to vectorize.");
359 return false;
360 }
361
f7064d11 362 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
00518cb1 363 && vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
364 fprintf (vect_dump,
365 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
366 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
367
368 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
369 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
370 {
00518cb1 371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
372 fprintf (vect_dump, "not vectorized: iteration count too small.");
373 return false;
374 }
375
376 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
c12cc930
KB
377 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
378 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
f7064d11 379 {
00518cb1 380 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
381 fprintf (vect_dump, "epilog loop required.");
382 if (!vect_can_advance_ivs_p (loop_vinfo))
383 {
00518cb1 384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
385 fprintf (vect_dump,
386 "not vectorized: can't create epilog loop 1.");
387 return false;
388 }
70388d94 389 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
f7064d11 390 {
00518cb1 391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
392 fprintf (vect_dump,
393 "not vectorized: can't create epilog loop 2.");
394 return false;
395 }
396 }
397
398 return true;
399}
400
401
402/* Function exist_non_indexing_operands_for_use_p
403
404 USE is one of the uses attached to STMT. Check if USE is
405 used in STMT for anything other than indexing an array. */
406
407static bool
408exist_non_indexing_operands_for_use_p (tree use, tree stmt)
409{
410 tree operand;
411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
412
413 /* USE corresponds to some operand in STMT. If there is no data
414 reference in STMT, then any operand that corresponds to USE
415 is not indexing an array. */
416 if (!STMT_VINFO_DATA_REF (stmt_info))
417 return true;
418
419 /* STMT has a data_ref. FORNOW this means that its of one of
420 the following forms:
421 -1- ARRAY_REF = var
422 -2- var = ARRAY_REF
423 (This should have been verified in analyze_data_refs).
424
425 'var' in the second case corresponds to a def, not a use,
426 so USE cannot correspond to any operands that are not used
427 for array indexing.
428
429 Therefore, all we need to check is if STMT falls into the
430 first case, and whether var corresponds to USE. */
431
432 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
433 return false;
434
435 operand = TREE_OPERAND (stmt, 1);
436
437 if (TREE_CODE (operand) != SSA_NAME)
438 return false;
439
440 if (operand == use)
441 return true;
442
443 return false;
444}
445
446
447/* Function vect_analyze_scalar_cycles.
448
449 Examine the cross iteration def-use cycles of scalar variables, by
88088c03
DN
450 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
451 following: invariant, induction, reduction, unknown.
452
453 Some forms of scalar cycles are not yet supported.
454
455 Example1: reduction: (unsupported yet)
f7064d11 456
f7064d11
DN
457 loop1:
458 for (i=0; i<N; i++)
459 sum += a[i];
f7064d11 460
88088c03
DN
461 Example2: induction: (unsupported yet)
462
f7064d11
DN
463 loop2:
464 for (i=0; i<N; i++)
465 a[i] = i;
466
88088c03
DN
467 Note: the following loop *is* vectorizable:
468
f7064d11
DN
469 loop3:
470 for (i=0; i<N; i++)
471 a[i] = b[i];
472
88088c03
DN
473 even though it has a def-use cycle caused by the induction variable i:
474
f7064d11
DN
475 loop: i_2 = PHI (i_0, i_1)
476 a[i_2] = ...;
477 i_1 = i_2 + 1;
478 GOTO loop;
479
88088c03
DN
480 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
481 it does not need to be vectorized because it is only used for array
482 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
483 loop2 on the other hand is relevant (it is being written to memory).
484*/
f7064d11 485
88088c03 486static void
f7064d11
DN
487vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
488{
489 tree phi;
490 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
491 basic_block bb = loop->header;
492 tree dummy;
493
00518cb1 494 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
495 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
496
497 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
498 {
499 tree access_fn = NULL;
88088c03
DN
500 tree def = PHI_RESULT (phi);
501 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
502 tree reduc_stmt;
f7064d11 503
00518cb1 504 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
505 {
506 fprintf (vect_dump, "Analyze phi: ");
507 print_generic_expr (vect_dump, phi, TDF_SLIM);
508 }
509
510 /* Skip virtual phi's. The data dependences that are associated with
511 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
512
88088c03 513 if (!is_gimple_reg (SSA_NAME_VAR (def)))
f7064d11 514 {
00518cb1 515 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
516 fprintf (vect_dump, "virtual phi. skip.");
517 continue;
518 }
519
88088c03 520 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
f7064d11 521
88088c03 522 /* Analyze the evolution function. */
f7064d11 523
88088c03 524 access_fn = analyze_scalar_evolution (loop, def);
f7064d11
DN
525
526 if (!access_fn)
88088c03 527 continue;
f7064d11 528
00518cb1 529 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
530 {
531 fprintf (vect_dump, "Access function of PHI: ");
532 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
533 }
534
88088c03 535 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
f7064d11 536 {
00518cb1 537 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
538 fprintf (vect_dump, "Detected induction.");
539 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
540 continue;
f7064d11 541 }
88088c03
DN
542
543 /* TODO: handle invariant phis */
544
545 reduc_stmt = vect_is_simple_reduction (loop, phi);
546 if (reduc_stmt)
547 {
00518cb1 548 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
549 fprintf (vect_dump, "Detected reduction.");
550 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
551 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
552 vect_reduction_def;
553 }
554 else
00518cb1 555 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
556 fprintf (vect_dump, "Unknown def-use cycle pattern.");
557
f7064d11
DN
558 }
559
88088c03 560 return;
f7064d11
DN
561}
562
563
f7064d11
DN
564/* Function vect_analyze_data_ref_dependence.
565
566 Return TRUE if there (might) exist a dependence between a memory-reference
567 DRA and a memory-reference DRB. */
86a07404 568
f7064d11 569static bool
86a07404
IR
570vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
571 loop_vec_info loop_vinfo)
f7064d11 572{
b52485c6
DP
573 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
574 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
575 int dist = 0;
576 unsigned int loop_depth = 0;
86a07404
IR
577 struct loop *loop_nest = loop;
578 struct data_reference *dra = DDR_A (ddr);
579 struct data_reference *drb = DDR_B (ddr);
580 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
bb748329 581 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
86a07404
IR
582
583 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
584 return false;
f7064d11 585
86a07404 586 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
f7064d11 587 {
00518cb1 588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
589 {
590 fprintf (vect_dump,
86a07404 591 "not vectorized: can't determine dependence between ");
f7064d11
DN
592 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
593 fprintf (vect_dump, " and ");
594 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
595 }
596 return true;
597 }
598
86a07404 599 if (!DDR_DIST_VECT (ddr))
b52485c6 600 {
00518cb1 601 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
b52485c6 602 {
86a07404 603 fprintf (vect_dump, "not vectorized: bad dist vector for ");
b52485c6
DP
604 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605 fprintf (vect_dump, " and ");
606 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607 }
608 return true;
86a07404 609 }
b52485c6
DP
610
611 /* Find loop depth. */
86a07404 612 while (loop_nest && loop_nest->outer && loop_nest->outer->outer)
b52485c6 613 {
86a07404
IR
614 loop_nest = loop_nest->outer;
615 loop_depth++;
b52485c6 616 }
86a07404 617
b52485c6 618 dist = DDR_DIST_VECT (ddr)[loop_depth];
86a07404
IR
619 if (vect_print_dump_info (REPORT_DR_DETAILS))
620 fprintf (vect_dump, "dependence distance = %d.",dist);
b52485c6
DP
621
622 /* Same loop iteration. */
bb748329 623 if (dist % vectorization_factor == 0)
b52485c6 624 {
bb748329
DN
625 /* Two references with distance zero have the same alignment. */
626 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
627 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
00518cb1 628 if (vect_print_dump_info (REPORT_ALIGNMENT))
86a07404
IR
629 fprintf (vect_dump, "accesses have the same alignment.");
630 if (vect_print_dump_info (REPORT_DR_DETAILS))
631 {
632 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
633 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
634 fprintf (vect_dump, " and ");
635 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
636 }
b52485c6 637 return false;
86a07404 638 }
b52485c6 639
86a07404
IR
640 if (abs (dist) >= vectorization_factor)
641 {
642 /* Dependence distance does not create dependence, as far as vectorization
643 is concerned, in this case. */
644 if (vect_print_dump_info (REPORT_DR_DETAILS))
645 fprintf (vect_dump, "dependence distance >= VF.");
646 return false;
647 }
648
00518cb1 649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
650 {
651 fprintf (vect_dump,
86a07404 652 "not vectorized: possible dependence between data-refs ");
f7064d11
DN
653 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
654 fprintf (vect_dump, " and ");
655 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
656 }
86a07404 657
f7064d11
DN
658 return true;
659}
660
661
662/* Function vect_analyze_data_ref_dependences.
86a07404 663
f7064d11 664 Examine all the data references in the loop, and make sure there do not
b52485c6 665 exist any data dependences between them. */
86a07404 666
f7064d11
DN
667static bool
668vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
669{
86a07404
IR
670 unsigned int i;
671 varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
f7064d11 672
86a07404 673 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11 674 fprintf (vect_dump, "=== vect_analyze_dependences ===");
86a07404
IR
675
676 for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
f7064d11 677 {
86a07404
IR
678 struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
679
680 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
681 return false;
f7064d11
DN
682 }
683
684 return true;
685}
686
687
688/* Function vect_compute_data_ref_alignment
689
690 Compute the misalignment of the data reference DR.
691
692 Output:
693 1. If during the misalignment computation it is found that the data reference
694 cannot be vectorized then false is returned.
695 2. DR_MISALIGNMENT (DR) is defined.
696
697 FOR NOW: No analysis is actually performed. Misalignment is calculated
698 only for trivial cases. TODO. */
699
700static bool
701vect_compute_data_ref_alignment (struct data_reference *dr)
702{
703 tree stmt = DR_STMT (dr);
704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
705 tree ref = DR_REF (dr);
706 tree vectype;
86a07404
IR
707 tree base, base_addr;
708 bool base_aligned;
f7064d11 709 tree misalign;
86a07404 710 tree aligned_to, alignment;
f7064d11 711
00518cb1 712 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
713 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
714
715 /* Initialize misalignment to unknown. */
716 DR_MISALIGNMENT (dr) = -1;
717
86a07404
IR
718 misalign = DR_OFFSET_MISALIGNMENT (dr);
719 aligned_to = DR_ALIGNED_TO (dr);
720 base_addr = DR_BASE_ADDRESS (dr);
721 base = build_fold_indirect_ref (base_addr);
f7064d11 722 vectype = STMT_VINFO_VECTYPE (stmt_info);
86a07404 723 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
f7064d11 724
86a07404
IR
725 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
726 || !misalign)
f7064d11 727 {
00518cb1 728 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
729 {
730 fprintf (vect_dump, "Unknown alignment for access: ");
731 print_generic_expr (vect_dump, base, TDF_SLIM);
732 }
733 return true;
734 }
735
86a07404
IR
736 if ((DECL_P (base)
737 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
738 alignment) >= 0)
739 || (TREE_CODE (base_addr) == SSA_NAME
740 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
741 TREE_TYPE (base_addr)))),
742 alignment) >= 0))
743 base_aligned = true;
744 else
745 base_aligned = false;
746
747 if (!base_aligned)
f7064d11
DN
748 {
749 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
750 {
00518cb1 751 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
752 {
753 fprintf (vect_dump, "can't force alignment of ref: ");
754 print_generic_expr (vect_dump, ref, TDF_SLIM);
755 }
756 return true;
757 }
758
759 /* Force the alignment of the decl.
760 NOTE: This is the only change to the code we make during
761 the analysis phase, before deciding to vectorize the loop. */
00518cb1 762 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
763 fprintf (vect_dump, "force alignment");
764 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
765 DECL_USER_ALIGN (base) = 1;
766 }
767
768 /* At this point we assume that the base is aligned. */
86a07404 769 gcc_assert (base_aligned
f7064d11
DN
770 || (TREE_CODE (base) == VAR_DECL
771 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
772
f7064d11
DN
773 /* Modulo alignment. */
774 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
86a07404 775
9b14a362 776 if (!host_integerp (misalign, 1))
f7064d11 777 {
9b14a362 778 /* Negative or overflowed misalignment value. */
00518cb1 779 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
780 fprintf (vect_dump, "unexpected misalign value");
781 return false;
782 }
783
9b14a362 784 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
f7064d11 785
00518cb1 786 if (vect_print_dump_info (REPORT_DETAILS))
86a07404
IR
787 {
788 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
789 print_generic_expr (vect_dump, ref, TDF_SLIM);
790 }
f7064d11
DN
791
792 return true;
793}
794
795
796/* Function vect_compute_data_refs_alignment
797
798 Compute the misalignment of data references in the loop.
c12cc930 799 Return FALSE if a data reference is found that cannot be vectorized. */
f7064d11
DN
800
801static bool
802vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
803{
86a07404 804 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
f7064d11
DN
805 unsigned int i;
806
86a07404 807 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
f7064d11 808 {
86a07404 809 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
f7064d11
DN
810 if (!vect_compute_data_ref_alignment (dr))
811 return false;
812 }
813
814 return true;
815}
816
817
c12cc930
KB
818/* Function vect_update_misalignment_for_peel
819
820 DR - the data reference whose misalignment is to be adjusted.
821 DR_PEEL - the data reference whose misalignment is being made
822 zero in the vector loop by the peel.
823 NPEEL - the number of iterations in the peel loop if the misalignment
824 of DR_PEEL is known at compile time. */
825
826static void
827vect_update_misalignment_for_peel (struct data_reference *dr,
828 struct data_reference *dr_peel, int npeel)
829{
830 unsigned int i;
831 int drsize;
832 VEC(dr_p,heap) *same_align_drs;
833 struct data_reference *current_dr;
834
835 if (known_alignment_for_access_p (dr)
836 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
837 {
838 DR_MISALIGNMENT (dr) = 0;
839 return;
840 }
841
842 /* It can be assumed that the data refs with the same alignment as dr_peel
843 are aligned in the vector loop. */
844 same_align_drs
845 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
846 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
847 {
848 if (current_dr != dr)
849 continue;
850 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
851 DR_MISALIGNMENT (dr) = 0;
852 return;
853 }
854
855 if (known_alignment_for_access_p (dr)
856 && known_alignment_for_access_p (dr_peel))
857 {
858 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
859 DR_MISALIGNMENT (dr) += npeel * drsize;
860 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
861 return;
862 }
863
864 DR_MISALIGNMENT (dr) = -1;
865}
866
867
868/* Function vect_verify_datarefs_alignment
869
870 Return TRUE if all data references in the loop can be
871 handled with respect to alignment. */
872
873static bool
874vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
875{
876 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
877 enum dr_alignment_support supportable_dr_alignment;
878 unsigned int i;
879
880 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
881 {
882 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
883 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
884 if (!supportable_dr_alignment)
885 {
886 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
887 {
888 if (DR_IS_READ (dr))
889 fprintf (vect_dump,
890 "not vectorized: unsupported unaligned load.");
891 else
892 fprintf (vect_dump,
893 "not vectorized: unsupported unaligned store.");
894 }
895 return false;
896 }
897 if (supportable_dr_alignment != dr_aligned
898 && vect_print_dump_info (REPORT_ALIGNMENT))
899 fprintf (vect_dump, "Vectorizing an unaligned access.");
900 }
901 return true;
902}
903
904
f7064d11
DN
905/* Function vect_enhance_data_refs_alignment
906
907 This pass will use loop versioning and loop peeling in order to enhance
908 the alignment of data references in the loop.
909
910 FOR NOW: we assume that whatever versioning/peeling takes place, only the
911 original loop is to be vectorized; Any other loops that are created by
912 the transformations performed in this pass - are not supposed to be
c12cc930
KB
913 vectorized. This restriction will be relaxed.
914
915 This pass will require a cost model to guide it whether to apply peeling
916 or versioning or a combination of the two. For example, the scheme that
917 intel uses when given a loop with several memory accesses, is as follows:
918 choose one memory access ('p') which alignment you want to force by doing
919 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
920 other accesses are not necessarily aligned, or (2) use loop versioning to
921 generate one loop in which all accesses are aligned, and another loop in
922 which only 'p' is necessarily aligned.
923
924 ("Automatic Intra-Register Vectorization for the Intel Architecture",
925 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
926 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
927
928 Devising a cost model is the most critical aspect of this work. It will
929 guide us on which access to peel for, whether to use loop versioning, how
930 many versions to create, etc. The cost model will probably consist of
931 generic considerations as well as target specific considerations (on
932 powerpc for example, misaligned stores are more painful than misaligned
933 loads).
934
935 Here are the general steps involved in alignment enhancements:
f7064d11 936
f7064d11
DN
937 -- original loop, before alignment analysis:
938 for (i=0; i<N; i++){
939 x = q[i]; # DR_MISALIGNMENT(q) = unknown
940 p[i] = y; # DR_MISALIGNMENT(p) = unknown
941 }
942
943 -- After vect_compute_data_refs_alignment:
944 for (i=0; i<N; i++){
945 x = q[i]; # DR_MISALIGNMENT(q) = 3
946 p[i] = y; # DR_MISALIGNMENT(p) = unknown
947 }
948
949 -- Possibility 1: we do loop versioning:
950 if (p is aligned) {
951 for (i=0; i<N; i++){ # loop 1A
952 x = q[i]; # DR_MISALIGNMENT(q) = 3
953 p[i] = y; # DR_MISALIGNMENT(p) = 0
954 }
c12cc930 955 }
f7064d11
DN
956 else {
957 for (i=0; i<N; i++){ # loop 1B
958 x = q[i]; # DR_MISALIGNMENT(q) = 3
959 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
960 }
961 }
c12cc930 962
f7064d11
DN
963 -- Possibility 2: we do loop peeling:
964 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
965 x = q[i];
966 p[i] = y;
967 }
968 for (i = 3; i < N; i++){ # loop 2A
969 x = q[i]; # DR_MISALIGNMENT(q) = 0
970 p[i] = y; # DR_MISALIGNMENT(p) = unknown
971 }
972
973 -- Possibility 3: combination of loop peeling and versioning:
974 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
975 x = q[i];
976 p[i] = y;
977 }
978 if (p is aligned) {
c12cc930 979 for (i = 3; i<N; i++){ # loop 3A
f7064d11
DN
980 x = q[i]; # DR_MISALIGNMENT(q) = 0
981 p[i] = y; # DR_MISALIGNMENT(p) = 0
982 }
c12cc930 983 }
f7064d11
DN
984 else {
985 for (i = 3; i<N; i++){ # loop 3B
986 x = q[i]; # DR_MISALIGNMENT(q) = 0
987 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
988 }
989 }
990
c12cc930
KB
991 These loops are later passed to loop_transform to be vectorized. The
992 vectorizer will use the alignment information to guide the transformation
993 (whether to generate regular loads/stores, or with special handling for
994 misalignment). */
995
996static bool
997vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
998{
999 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1000 enum dr_alignment_support supportable_dr_alignment;
1001 struct data_reference *dr0 = NULL;
1002 struct data_reference *dr;
1003 unsigned int i;
1004 bool do_peeling = false;
1005 bool do_versioning = false;
1006 bool stat;
1007
1008 /* While cost model enhancements are expected in the future, the high level
1009 view of the code at this time is as follows:
1010
1011 A) If there is a misaligned write then see if peeling to align this write
1012 can make all data references satisfy vect_supportable_dr_alignment.
1013 If so, update data structures as needed and return true. Note that
1014 at this time vect_supportable_dr_alignment is known to return false
1015 for a a misaligned write.
1016
1017 B) If peeling wasn't possible and there is a data reference with an
1018 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1019 then see if loop versioning checks can be used to make all data
1020 references satisfy vect_supportable_dr_alignment. If so, update
1021 data structures as needed and return true.
1022
1023 C) If neither peeling nor versioning were successful then return false if
1024 any data reference does not satisfy vect_supportable_dr_alignment.
1025
1026 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1027
1028 Note, Possibility 3 above (which is peeling and versioning together) is not
1029 being done at this time. */
f7064d11
DN
1030
1031 /* (1) Peeling to force alignment. */
1032
1033 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1034 Considerations:
1035 + How many accesses will become aligned due to the peeling
1036 - How many accesses will become unaligned due to the peeling,
1037 and the cost of misaligned accesses.
1038 - The cost of peeling (the extra runtime checks, the increase
1039 in code size).
1040
1041 The scheme we use FORNOW: peel to force the alignment of the first
1042 misaligned store in the loop.
1043 Rationale: misaligned stores are not yet supported.
1044
c12cc930 1045 TODO: Use a cost model. */
f7064d11 1046
c12cc930 1047 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
f7064d11 1048 {
c12cc930
KB
1049 dr = VARRAY_GENERIC_PTR (datarefs, i);
1050 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1051 {
1052 dr0 = dr;
1053 do_peeling = true;
1054 break;
1055 }
f7064d11
DN
1056 }
1057
c12cc930
KB
1058 /* Often peeling for alignment will require peeling for loop-bound, which in
1059 turn requires that we know how to adjust the loop ivs after the loop. */
1060 if (!vect_can_advance_ivs_p (loop_vinfo))
1061 do_peeling = false;
f7064d11 1062
c12cc930 1063 if (do_peeling)
f7064d11 1064 {
5f55a1ba
DN
1065 int mis;
1066 int npeel = 0;
1067
1068 if (known_alignment_for_access_p (dr0))
c12cc930
KB
1069 {
1070 /* Since it's known at compile time, compute the number of iterations
1071 in the peeled loop (the peeling factor) for use in updating
1072 DR_MISALIGNMENT values. The peeling factor is the vectorization
1073 factor minus the misalignment as an element count. */
1074 mis = DR_MISALIGNMENT (dr0);
1075 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1076 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1077 }
1078
1079 /* Ensure that all data refs can be vectorized after the peel. */
1080 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1081 {
1082 int save_misalignment;
1083
1084 dr = VARRAY_GENERIC_PTR (datarefs, i);
1085 if (dr == dr0)
1086 continue;
1087 save_misalignment = DR_MISALIGNMENT (dr);
1088 vect_update_misalignment_for_peel (dr, dr0, npeel);
1089 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1090 DR_MISALIGNMENT (dr) = save_misalignment;
1091
1092 if (!supportable_dr_alignment)
1093 {
1094 do_peeling = false;
1095 break;
1096 }
f7064d11 1097 }
5f55a1ba 1098
c12cc930
KB
1099 if (do_peeling)
1100 {
1101 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1102 If the misalignment of DR_i is identical to that of dr0 then set
1103 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1104 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1105 by the peeling factor times the element size of DR_i (MOD the
1106 vectorization factor times the size). Otherwise, the
1107 misalignment of DR_i must be set to unknown. */
5f55a1ba
DN
1108 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1109 {
c12cc930
KB
1110 dr = VARRAY_GENERIC_PTR (datarefs, i);
1111 if (dr == dr0)
1112 continue;
1113 vect_update_misalignment_for_peel (dr, dr0, npeel);
5f55a1ba 1114 }
5f55a1ba 1115
c12cc930
KB
1116 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1117 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1118 DR_MISALIGNMENT (dr0) = 0;
1119 if (vect_print_dump_info (REPORT_ALIGNMENT))
1120 fprintf (vect_dump, "Alignment of access forced using peeling.");
1121
1122 if (vect_print_dump_info (REPORT_DETAILS))
1123 fprintf (vect_dump, "Peeling for alignment will be applied.");
1124
1125 stat = vect_verify_datarefs_alignment (loop_vinfo);
1126 gcc_assert (stat);
1127 return stat;
1128 }
1129 }
1130
1131
1132 /* (2) Versioning to force alignment. */
1133
1134 /* Try versioning if:
1135 1) flag_tree_vect_loop_version is TRUE
1136 2) optimize_size is FALSE
1137 3) there is at least one unsupported misaligned data ref with an unknown
1138 misalignment, and
1139 4) all misaligned data refs with a known misalignment are supported, and
1140 5) the number of runtime alignment checks is within reason. */
1141
1142 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1143
1144 if (do_versioning)
1145 {
1146 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
bb748329 1147 {
c12cc930
KB
1148 dr = VARRAY_GENERIC_PTR (datarefs, i);
1149
1150 if (aligned_access_p (dr))
1151 continue;
1152
1153 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1154
1155 if (!supportable_dr_alignment)
1156 {
1157 tree stmt;
1158 int mask;
1159 tree vectype;
1160
1161 if (known_alignment_for_access_p (dr)
1162 || VEC_length (tree,
1163 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1164 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1165 {
1166 do_versioning = false;
1167 break;
1168 }
1169
1170 stmt = DR_STMT (dr);
1171 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1172 gcc_assert (vectype);
1173
1174 /* The rightmost bits of an aligned address must be zeros.
1175 Construct the mask needed for this test. For example,
1176 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1177 mask must be 15 = 0xf. */
1178 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1179
1180 /* FORNOW: use the same mask to test all potentially unaligned
1181 references in the loop. The vectorizer currently supports
1182 a single vector size, see the reference to
1183 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1184 vectorization factor is computed. */
1185 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1186 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1187 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1188 VEC_safe_push (tree, heap,
1189 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1190 DR_STMT (dr));
1191 }
1192 }
1193
1194 /* Versioning requires at least one misaligned data reference. */
1195 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1196 do_versioning = false;
1197 else if (!do_versioning)
1198 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1199 }
1200
1201 if (do_versioning)
1202 {
1203 VEC(tree,heap) *may_misalign_stmts
1204 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1205 tree stmt;
1206
1207 /* It can now be assumed that the data references in the statements
1208 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1209 of the loop being vectorized. */
1210 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1211 {
1212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1213 dr = STMT_VINFO_DATA_REF (stmt_info);
bb748329 1214 DR_MISALIGNMENT (dr) = 0;
c12cc930
KB
1215 if (vect_print_dump_info (REPORT_ALIGNMENT))
1216 fprintf (vect_dump, "Alignment of access forced using versioning.");
bb748329
DN
1217 }
1218
c12cc930
KB
1219 if (vect_print_dump_info (REPORT_DETAILS))
1220 fprintf (vect_dump, "Versioning for alignment will be applied.");
1221
1222 /* Peeling and versioning can't be done together at this time. */
1223 gcc_assert (! (do_peeling && do_versioning));
1224
1225 stat = vect_verify_datarefs_alignment (loop_vinfo);
1226 gcc_assert (stat);
1227 return stat;
f7064d11 1228 }
c12cc930
KB
1229
1230 /* This point is reached if neither peeling nor versioning is being done. */
1231 gcc_assert (! (do_peeling || do_versioning));
1232
1233 stat = vect_verify_datarefs_alignment (loop_vinfo);
1234 return stat;
f7064d11
DN
1235}
1236
1237
1238/* Function vect_analyze_data_refs_alignment
1239
1240 Analyze the alignment of the data-references in the loop.
c12cc930 1241 Return FALSE if a data reference is found that cannot be vectorized. */
f7064d11
DN
1242
1243static bool
1244vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1245{
00518cb1 1246 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1247 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1248
f7064d11
DN
1249 if (!vect_compute_data_refs_alignment (loop_vinfo))
1250 {
00518cb1 1251 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
1252 fprintf (vect_dump,
1253 "not vectorized: can't calculate alignment for data ref.");
1254 return false;
1255 }
1256
f7064d11
DN
1257 return true;
1258}
1259
1260
1261/* Function vect_analyze_data_ref_access.
1262
1263 Analyze the access pattern of the data-reference DR. For now, a data access
c12cc930 1264 has to be consecutive to be considered vectorizable. */
f7064d11
DN
1265
1266static bool
1267vect_analyze_data_ref_access (struct data_reference *dr)
1268{
86a07404 1269 tree step = DR_STEP (dr);
f7064d11
DN
1270 tree scalar_type = TREE_TYPE (DR_REF (dr));
1271
1272 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1273 {
00518cb1 1274 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1275 fprintf (vect_dump, "not consecutive access");
1276 return false;
1277 }
1278 return true;
1279}
1280
1281
1282/* Function vect_analyze_data_ref_accesses.
1283
1284 Analyze the access pattern of all the data references in the loop.
1285
1286 FORNOW: the only access pattern that is considered vectorizable is a
1287 simple step 1 (consecutive) access.
1288
1289 FORNOW: handle only arrays and pointer accesses. */
1290
1291static bool
1292vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1293{
1294 unsigned int i;
86a07404 1295 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
f7064d11 1296
00518cb1 1297 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1298 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1299
86a07404 1300 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
f7064d11 1301 {
86a07404
IR
1302 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1303 if (!vect_analyze_data_ref_access (dr))
f7064d11 1304 {
00518cb1 1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
1306 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1307 return false;
1308 }
1309 }
1310
1311 return true;
1312}
1313
1314
f7064d11
DN
1315/* Function vect_analyze_data_refs.
1316
86a07404 1317 Find all the data references in the loop.
f7064d11 1318
86a07404 1319 The general structure of the analysis of data refs in the vectorizer is as
f7064d11 1320 follows:
86a07404
IR
1321 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1322 find and analyze all data-refs in the loop and their dependences.
1323 2- vect_analyze_dependences(): apply dependence testing using ddrs.
f7064d11
DN
1324 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1325 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1326
86a07404 1327*/
f7064d11
DN
1328
1329static bool
86a07404 1330vect_analyze_data_refs (loop_vec_info loop_vinfo)
f7064d11
DN
1331{
1332 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
86a07404
IR
1333 unsigned int i;
1334 varray_type datarefs;
1335 tree scalar_type;
f7064d11 1336
00518cb1 1337 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1338 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1339
86a07404
IR
1340 compute_data_dependences_for_loop (loop, false,
1341 &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1342 &(LOOP_VINFO_DDRS (loop_vinfo)));
f7064d11 1343
86a07404
IR
1344 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1345 from stmt_vec_info struct to DR and vectype. */
1346 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1347 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1348 {
1349 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1350 tree stmt;
1351 stmt_vec_info stmt_info;
1352
1353 if (!dr || !DR_REF (dr))
1354 {
1355 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
d7770457 1356 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
86a07404
IR
1357 return false;
1358 }
1359
1360 /* Update DR field in stmt_vec_info struct. */
1361 stmt = DR_STMT (dr);
1362 stmt_info = vinfo_for_stmt (stmt);
1363
1364 if (STMT_VINFO_DATA_REF (stmt_info))
1365 {
1366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1367 {
1368 fprintf (vect_dump,
1369 "not vectorized: more than one data ref in stmt: ");
1370 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1371 }
1372 return false;
1373 }
1374 STMT_VINFO_DATA_REF (stmt_info) = dr;
1375
1376 /* Check that analysis of the data-ref succeeded. */
1377 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1378 || !DR_STEP (dr))
1379 {
1380 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1381 {
1382 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1383 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1384 }
1385 return false;
1386 }
1387 if (!DR_MEMTAG (dr))
1388 {
1389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1390 {
1391 fprintf (vect_dump, "not vectorized: no memory tag for ");
1392 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1393 }
1394 return false;
1395 }
1396
1397 /* Set vectype for STMT. */
1398 scalar_type = TREE_TYPE (DR_REF (dr));
1399 STMT_VINFO_VECTYPE (stmt_info) =
1400 get_vectype_for_scalar_type (scalar_type);
1401 if (!STMT_VINFO_VECTYPE (stmt_info))
1402 {
1403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1404 {
1405 fprintf (vect_dump,
1406 "not vectorized: no vectype for stmt: ");
1407 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1408 fprintf (vect_dump, " scalar_type: ");
1409 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1410 }
1411 return false;
1412 }
f7064d11 1413 }
86a07404 1414
f7064d11
DN
1415 return true;
1416}
1417
1418
1419/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1420
1421/* Function vect_mark_relevant.
1422
1423 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1424
1425static void
88088c03
DN
1426vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1427 bool relevant_p, bool live_p)
f7064d11 1428{
88088c03
DN
1429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1430 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1431 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
f7064d11 1432
00518cb1 1433 if (vect_print_dump_info (REPORT_DETAILS))
88088c03 1434 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
f7064d11 1435
88088c03 1436 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
61d3cdbb 1437 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
f7064d11 1438
88088c03 1439 if (TREE_CODE (stmt) == PHI_NODE)
61d3cdbb
DN
1440 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1441 or live will fail vectorization later on. */
88088c03 1442 return;
f7064d11 1443
88088c03
DN
1444 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1445 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
f7064d11 1446 {
00518cb1 1447 if (vect_print_dump_info (REPORT_DETAILS))
88088c03 1448 fprintf (vect_dump, "already marked relevant/live.");
f7064d11
DN
1449 return;
1450 }
1451
51d00891 1452 VEC_safe_push (tree, heap, *worklist, stmt);
f7064d11
DN
1453}
1454
1455
1456/* Function vect_stmt_relevant_p.
1457
1458 Return true if STMT in loop that is represented by LOOP_VINFO is
1459 "relevant for vectorization".
1460
1461 A stmt is considered "relevant for vectorization" if:
1462 - it has uses outside the loop.
1463 - it has vdefs (it alters memory).
1464 - control stmts in the loop (except for the exit condition).
1465
1466 CHECKME: what other side effects would the vectorizer allow? */
1467
1468static bool
88088c03
DN
1469vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1470 bool *relevant_p, bool *live_p)
f7064d11 1471{
f7064d11 1472 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
f430bae8
AM
1473 ssa_op_iter op_iter;
1474 imm_use_iterator imm_iter;
1475 use_operand_p use_p;
f47c96aa 1476 def_operand_p def_p;
f7064d11 1477
88088c03
DN
1478 *relevant_p = false;
1479 *live_p = false;
1480
f7064d11
DN
1481 /* cond stmt other than loop exit cond. */
1482 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
88088c03 1483 *relevant_p = true;
f7064d11
DN
1484
1485 /* changing memory. */
f47c96aa
AM
1486 if (TREE_CODE (stmt) != PHI_NODE)
1487 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1488 {
00518cb1 1489 if (vect_print_dump_info (REPORT_DETAILS))
f47c96aa 1490 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
88088c03 1491 *relevant_p = true;
f47c96aa 1492 }
f7064d11
DN
1493
1494 /* uses outside the loop. */
f47c96aa 1495 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
f7064d11 1496 {
f47c96aa 1497 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
f7064d11 1498 {
f430bae8
AM
1499 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1500 if (!flow_bb_inside_loop_p (loop, bb))
1501 {
00518cb1 1502 if (vect_print_dump_info (REPORT_DETAILS))
f430bae8 1503 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
88088c03
DN
1504
1505 /* We expect all such uses to be in the loop exit phis
1506 (because of loop closed form) */
1507 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1508 gcc_assert (bb == loop->single_exit->dest);
1509
1510 *live_p = true;
f430bae8 1511 }
f7064d11
DN
1512 }
1513 }
1514
88088c03 1515 return (*live_p || *relevant_p);
f7064d11
DN
1516}
1517
1518
1519/* Function vect_mark_stmts_to_be_vectorized.
1520
1521 Not all stmts in the loop need to be vectorized. For example:
1522
1523 for i...
1524 for j...
1525 1. T0 = i + j
1526 2. T1 = a[T0]
1527
1528 3. j = j + 1
1529
1530 Stmt 1 and 3 do not need to be vectorized, because loop control and
1531 addressing of vectorized data-refs are handled differently.
1532
1533 This pass detects such stmts. */
1534
1535static bool
1536vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1537{
51d00891 1538 VEC(tree,heap) *worklist;
f7064d11
DN
1539 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1540 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1541 unsigned int nbbs = loop->num_nodes;
1542 block_stmt_iterator si;
f47c96aa 1543 tree stmt, use;
88088c03 1544 stmt_ann_t ann;
f47c96aa 1545 ssa_op_iter iter;
f7064d11 1546 unsigned int i;
88088c03 1547 stmt_vec_info stmt_vinfo;
f7064d11
DN
1548 basic_block bb;
1549 tree phi;
88088c03
DN
1550 bool relevant_p, live_p;
1551 tree def, def_stmt;
1552 enum vect_def_type dt;
f7064d11 1553
00518cb1 1554 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1555 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1556
88088c03
DN
1557 worklist = VEC_alloc (tree, heap, 64);
1558
1559 /* 1. Init worklist. */
1560
f7064d11
DN
1561 bb = loop->header;
1562 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1563 {
00518cb1 1564 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1565 {
1566 fprintf (vect_dump, "init: phi relevant? ");
1567 print_generic_expr (vect_dump, phi, TDF_SLIM);
1568 }
1569
88088c03
DN
1570 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1571 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
f7064d11
DN
1572 }
1573
f7064d11
DN
1574 for (i = 0; i < nbbs; i++)
1575 {
1576 bb = bbs[i];
1577 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1578 {
1579 stmt = bsi_stmt (si);
1580
00518cb1 1581 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1582 {
1583 fprintf (vect_dump, "init: stmt relevant? ");
1584 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1585 }
1586
88088c03
DN
1587 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1588 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
f7064d11
DN
1589 }
1590 }
1591
1592
1593 /* 2. Process_worklist */
1594
51d00891 1595 while (VEC_length (tree, worklist) > 0)
f7064d11 1596 {
51d00891 1597 stmt = VEC_pop (tree, worklist);
f7064d11 1598
00518cb1 1599 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1600 {
1601 fprintf (vect_dump, "worklist: examine stmt: ");
1602 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1603 }
1604
88088c03
DN
1605 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1606 in the loop, mark the stmt that defines it (DEF_STMT) as
1607 relevant/irrelevant and live/dead according to the liveness and
1608 relevance properties of STMT.
1609 */
f7064d11 1610
88088c03 1611 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
f7064d11 1612
88088c03
DN
1613 ann = stmt_ann (stmt);
1614 stmt_vinfo = vinfo_for_stmt (stmt);
f7064d11 1615
88088c03
DN
1616 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1617 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1618
1619 /* Generally, the liveness and relevance properties of STMT are
1620 propagated to the DEF_STMTs of its USEs:
1621 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1622 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1623
1624 Exceptions:
1625
61d3cdbb
DN
1626 (case 1)
1627 If USE is used only for address computations (e.g. array indexing),
88088c03
DN
1628 which does not need to be directly vectorized, then the
1629 liveness/relevance of the respective DEF_STMT is left unchanged.
1630
61d3cdbb
DN
1631 (case 2)
1632 If STMT has been identified as defining a reduction variable, then
1633 we have two cases:
1634 (case 2.1)
1635 The last use of STMT is the reduction-variable, which is defined
1636 by a loop-header-phi. We don't want to mark the phi as live or
1637 relevant (because it does not need to be vectorized, it is handled
1638 as part of the vectorization of the reduction), so in this case we
1639 skip the call to vect_mark_relevant.
1640 (case 2.2)
1641 The rest of the uses of STMT are defined in the loop body. For
1642 the def_stmt of these uses we want to set liveness/relevance
1643 as follows:
1644 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1645 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1646 because even though STMT is classified as live (since it defines a
1647 value that is used across loop iterations) and irrelevant (since it
1648 is not used inside the loop), it will be vectorized, and therefore
1649 the corresponding DEF_STMTs need to marked as relevant.
88088c03
DN
1650 */
1651
61d3cdbb 1652 /* case 2.2: */
88088c03
DN
1653 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1654 {
1655 gcc_assert (!relevant_p && live_p);
1656 relevant_p = true;
1657 live_p = false;
1658 }
f7064d11 1659
f47c96aa 1660 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
f7064d11 1661 {
61d3cdbb
DN
1662 /* case 1: we are only interested in uses that need to be vectorized.
1663 Uses that are used for address computation are not considered
1664 relevant.
f7064d11 1665 */
61d3cdbb
DN
1666 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1667 continue;
f7064d11 1668
61d3cdbb
DN
1669 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1670 {
00518cb1 1671 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
61d3cdbb
DN
1672 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1673 VEC_free (tree, heap, worklist);
1674 return false;
1675 }
f7064d11 1676
61d3cdbb
DN
1677 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1678 continue;
f7064d11 1679
00518cb1 1680 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
1681 {
1682 fprintf (vect_dump, "worklist: examine use %d: ", i);
1683 print_generic_expr (vect_dump, use, TDF_SLIM);
1684 }
88088c03 1685
61d3cdbb
DN
1686 bb = bb_for_stmt (def_stmt);
1687 if (!flow_bb_inside_loop_p (loop, bb))
1688 continue;
88088c03 1689
61d3cdbb
DN
1690 /* case 2.1: the reduction-use does not mark the defining-phi
1691 as relevant. */
1692 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1693 && TREE_CODE (def_stmt) == PHI_NODE)
1694 continue;
1695
1696 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
f7064d11
DN
1697 }
1698 } /* while worklist */
1699
51d00891 1700 VEC_free (tree, heap, worklist);
f7064d11
DN
1701 return true;
1702}
1703
1704
1705/* Function vect_can_advance_ivs_p
1706
c12cc930 1707 In case the number of iterations that LOOP iterates is unknown at compile
f7064d11
DN
1708 time, an epilog loop will be generated, and the loop induction variables
1709 (IVs) will be "advanced" to the value they are supposed to take just before
1710 the epilog loop. Here we check that the access function of the loop IVs
1711 and the expression that represents the loop bound are simple enough.
1712 These restrictions will be relaxed in the future. */
1713
1714static bool
1715vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1716{
1717 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1718 basic_block bb = loop->header;
1719 tree phi;
1720
1721 /* Analyze phi functions of the loop header. */
1722
00518cb1 1723 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
1724 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1725
f7064d11
DN
1726 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1727 {
1728 tree access_fn = NULL;
1729 tree evolution_part;
1730
00518cb1 1731 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1732 {
1733 fprintf (vect_dump, "Analyze phi: ");
1734 print_generic_expr (vect_dump, phi, TDF_SLIM);
1735 }
1736
1737 /* Skip virtual phi's. The data dependences that are associated with
1738 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1739
1740 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1741 {
00518cb1 1742 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1743 fprintf (vect_dump, "virtual phi. skip.");
1744 continue;
1745 }
1746
61d3cdbb
DN
1747 /* Skip reduction phis. */
1748
1749 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1750 {
00518cb1 1751 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
1752 fprintf (vect_dump, "reduc phi. skip.");
1753 continue;
1754 }
1755
f7064d11
DN
1756 /* Analyze the evolution function. */
1757
1758 access_fn = instantiate_parameters
1759 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1760
1761 if (!access_fn)
1762 {
00518cb1 1763 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1764 fprintf (vect_dump, "No Access function.");
1765 return false;
1766 }
1767
00518cb1 1768 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1769 {
1770 fprintf (vect_dump, "Access function of PHI: ");
1771 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1772 }
1773
1774 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1775
1776 if (evolution_part == NULL_TREE)
88088c03 1777 {
00518cb1 1778 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
1779 fprintf (vect_dump, "No evolution.");
1780 return false;
1781 }
f7064d11
DN
1782
1783 /* FORNOW: We do not transform initial conditions of IVs
1784 which evolution functions are a polynomial of degree >= 2. */
1785
1786 if (tree_is_chrec (evolution_part))
1787 return false;
1788 }
1789
1790 return true;
1791}
1792
1793
1794/* Function vect_get_loop_niters.
1795
1796 Determine how many iterations the loop is executed.
1797 If an expression that represents the number of iterations
1798 can be constructed, place it in NUMBER_OF_ITERATIONS.
1799 Return the loop exit condition. */
1800
1801static tree
1802vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1803{
1804 tree niters;
1805
00518cb1 1806 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1807 fprintf (vect_dump, "=== get_loop_niters ===");
1808
1809 niters = number_of_iterations_in_loop (loop);
1810
1811 if (niters != NULL_TREE
1812 && niters != chrec_dont_know)
1813 {
1814 *number_of_iterations = niters;
1815
00518cb1 1816 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1817 {
1818 fprintf (vect_dump, "==> get_loop_niters:" );
1819 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1820 }
1821 }
1822
1823 return get_loop_exit_condition (loop);
1824}
1825
1826
1827/* Function vect_analyze_loop_form.
1828
1829 Verify the following restrictions (some may be relaxed in the future):
1830 - it's an inner-most loop
1831 - number of BBs = 2 (which are the loop header and the latch)
1832 - the loop has a pre-header
1833 - the loop has a single entry and exit
1834 - the loop exit condition is simple enough, and the number of iterations
1835 can be analyzed (a countable loop). */
1836
1837static loop_vec_info
1838vect_analyze_loop_form (struct loop *loop)
1839{
1840 loop_vec_info loop_vinfo;
1841 tree loop_cond;
1842 tree number_of_iterations = NULL;
f7064d11 1843
00518cb1 1844 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1845 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1846
1847 if (loop->inner)
1848 {
00518cb1 1849 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
f7064d11
DN
1850 fprintf (vect_dump, "not vectorized: nested loop.");
1851 return NULL;
1852 }
1853
1854 if (!loop->single_exit
1855 || loop->num_nodes != 2
70388d94 1856 || EDGE_COUNT (loop->header->preds) != 2)
f7064d11 1857 {
00518cb1 1858 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
f7064d11
DN
1859 {
1860 if (!loop->single_exit)
1861 fprintf (vect_dump, "not vectorized: multiple exits.");
1862 else if (loop->num_nodes != 2)
1863 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1864 else if (EDGE_COUNT (loop->header->preds) != 2)
1865 fprintf (vect_dump, "not vectorized: too many incoming edges.");
f7064d11
DN
1866 }
1867
1868 return NULL;
1869 }
1870
1871 /* We assume that the loop exit condition is at the end of the loop. i.e,
1872 that the loop is represented as a do-while (with a proper if-guard
1873 before the loop if needed), where the loop header contains all the
1874 executable statements, and the latch is empty. */
1875 if (!empty_block_p (loop->latch))
1876 {
00518cb1 1877 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
cc9795d4 1878 fprintf (vect_dump, "not vectorized: unexpected loop form.");
f7064d11
DN
1879 return NULL;
1880 }
1881
f7064d11 1882 /* Make sure there exists a single-predecessor exit bb: */
c5cbcccf 1883 if (!single_pred_p (loop->single_exit->dest))
f7064d11 1884 {
ac59a959
DN
1885 edge e = loop->single_exit;
1886 if (!(e->flags & EDGE_ABNORMAL))
1887 {
d401de95 1888 split_loop_exit_edge (e);
00518cb1 1889 if (vect_print_dump_info (REPORT_DETAILS))
ac59a959 1890 fprintf (vect_dump, "split exit edge.");
ac59a959
DN
1891 }
1892 else
1893 {
00518cb1 1894 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
ac59a959
DN
1895 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1896 return NULL;
1897 }
f7064d11 1898 }
f7064d11
DN
1899
1900 if (empty_block_p (loop->header))
1901 {
00518cb1 1902 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
f7064d11
DN
1903 fprintf (vect_dump, "not vectorized: empty loop.");
1904 return NULL;
1905 }
1906
1907 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1908 if (!loop_cond)
1909 {
00518cb1 1910 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
f7064d11
DN
1911 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1912 return NULL;
1913 }
1914
1915 if (!number_of_iterations)
1916 {
00518cb1 1917 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
f7064d11
DN
1918 fprintf (vect_dump,
1919 "not vectorized: number of iterations cannot be computed.");
1920 return NULL;
1921 }
1922
1923 if (chrec_contains_undetermined (number_of_iterations))
1924 {
00518cb1 1925 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
f7064d11
DN
1926 fprintf (vect_dump, "Infinite number of iterations.");
1927 return false;
1928 }
1929
1930 loop_vinfo = new_loop_vec_info (loop);
1931 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1932
1933 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1934 {
00518cb1 1935 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1936 {
1937 fprintf (vect_dump, "Symbolic number of iterations is ");
1938 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1939 }
1940 }
1941 else
1942 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1943 {
00518cb1 1944 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
f7064d11
DN
1945 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1946 return NULL;
1947 }
1948
1949 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
f7064d11
DN
1950
1951 return loop_vinfo;
1952}
1953
1954
1955/* Function vect_analyze_loop.
1956
1957 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1958 for it. The different analyses will record information in the
1959 loop_vec_info struct. */
1960loop_vec_info
1961vect_analyze_loop (struct loop *loop)
1962{
1963 bool ok;
1964 loop_vec_info loop_vinfo;
1965
00518cb1 1966 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1967 fprintf (vect_dump, "===== analyze_loop_nest =====");
1968
1969 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1970
1971 loop_vinfo = vect_analyze_loop_form (loop);
1972 if (!loop_vinfo)
1973 {
00518cb1 1974 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1975 fprintf (vect_dump, "bad loop form.");
1976 return NULL;
1977 }
1978
1979 /* Find all data references in the loop (which correspond to vdefs/vuses)
1980 and analyze their evolution in the loop.
1981
1982 FORNOW: Handle only simple, array references, which
1983 alignment can be forced, and aligned pointer-references. */
1984
1985 ok = vect_analyze_data_refs (loop_vinfo);
1986 if (!ok)
1987 {
00518cb1 1988 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
1989 fprintf (vect_dump, "bad data references.");
1990 destroy_loop_vec_info (loop_vinfo);
1991 return NULL;
1992 }
1993
88088c03
DN
1994 /* Classify all cross-iteration scalar data-flow cycles.
1995 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1996
1997 vect_analyze_scalar_cycles (loop_vinfo);
1998
f7064d11
DN
1999 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2000
2001 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2002 if (!ok)
2003 {
00518cb1 2004 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2005 fprintf (vect_dump, "unexpected pattern.");
2006 destroy_loop_vec_info (loop_vinfo);
2007 return NULL;
2008 }
2009
c12cc930
KB
2010 /* Analyze the alignment of the data-refs in the loop.
2011 Fail if a data reference is found that cannot be vectorized. */
2012
2013 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2014 if (!ok)
2015 {
2016 if (vect_print_dump_info (REPORT_DETAILS))
2017 fprintf (vect_dump, "bad data alignment.");
2018 destroy_loop_vec_info (loop_vinfo);
2019 return NULL;
2020 }
2021
b52485c6
DP
2022 ok = vect_determine_vectorization_factor (loop_vinfo);
2023 if (!ok)
2024 {
00518cb1 2025 if (vect_print_dump_info (REPORT_DETAILS))
b52485c6
DP
2026 fprintf (vect_dump, "can't determine vectorization factor.");
2027 destroy_loop_vec_info (loop_vinfo);
2028 return NULL;
2029 }
2030
f7064d11
DN
2031 /* Analyze data dependences between the data-refs in the loop.
2032 FORNOW: fail at the first data dependence that we encounter. */
2033
2034 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2035 if (!ok)
2036 {
00518cb1 2037 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2038 fprintf (vect_dump, "bad data dependence.");
2039 destroy_loop_vec_info (loop_vinfo);
2040 return NULL;
2041 }
2042
2043 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2044 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2045
2046 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2047 if (!ok)
2048 {
00518cb1 2049 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2050 fprintf (vect_dump, "bad data access.");
2051 destroy_loop_vec_info (loop_vinfo);
2052 return NULL;
2053 }
2054
c12cc930
KB
2055 /* This pass will decide on using loop versioning and/or loop peeling in
2056 order to enhance the alignment of data references in the loop. */
f7064d11 2057
c12cc930 2058 ok = vect_enhance_data_refs_alignment (loop_vinfo);
f7064d11
DN
2059 if (!ok)
2060 {
00518cb1 2061 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2062 fprintf (vect_dump, "bad data alignment.");
2063 destroy_loop_vec_info (loop_vinfo);
2064 return NULL;
2065 }
2066
2067 /* Scan all the operations in the loop and make sure they are
2068 vectorizable. */
2069
2070 ok = vect_analyze_operations (loop_vinfo);
2071 if (!ok)
2072 {
00518cb1 2073 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2074 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2075 destroy_loop_vec_info (loop_vinfo);
2076 return NULL;
2077 }
2078
2079 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2080
2081 return loop_vinfo;
2082}