]>
Commit | Line | Data |
---|---|---|
2e681715 | 1 | /* Analysis Utilities for Loop Vectorization. |
f7064d11 DN |
2 | Copyright (C) 2003,2004,2005 Free Software Foundation, Inc. |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "errors.h" | |
27 | #include "ggc.h" | |
28 | #include "tree.h" | |
29 | #include "basic-block.h" | |
30 | #include "diagnostic.h" | |
31 | #include "tree-flow.h" | |
32 | #include "tree-dump.h" | |
33 | #include "timevar.h" | |
34 | #include "cfgloop.h" | |
35 | #include "expr.h" | |
36 | #include "optabs.h" | |
37 | #include "tree-chrec.h" | |
38 | #include "tree-data-ref.h" | |
39 | #include "tree-scalar-evolution.h" | |
40 | #include "tree-vectorizer.h" | |
41 | ||
42 | /* Main analysis functions. */ | |
43 | static loop_vec_info vect_analyze_loop_form (struct loop *); | |
44 | static bool vect_analyze_data_refs (loop_vec_info); | |
45 | static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | |
46 | static bool vect_analyze_scalar_cycles (loop_vec_info); | |
47 | static bool vect_analyze_data_ref_accesses (loop_vec_info); | |
48 | static bool vect_analyze_data_ref_dependences (loop_vec_info); | |
49 | static bool vect_analyze_data_refs_alignment (loop_vec_info); | |
50 | static bool vect_compute_data_refs_alignment (loop_vec_info); | |
51 | static void vect_enhance_data_refs_alignment (loop_vec_info); | |
52 | static bool vect_analyze_operations (loop_vec_info); | |
5f55a1ba | 53 | static bool vect_determine_vectorization_factor (loop_vec_info); |
f7064d11 DN |
54 | |
55 | /* Utility functions for the analyses. */ | |
56 | static bool exist_non_indexing_operands_for_use_p (tree, tree); | |
51d00891 | 57 | static void vect_mark_relevant (VEC(tree,heap) **, tree); |
f7064d11 DN |
58 | static bool vect_stmt_relevant_p (tree, loop_vec_info); |
59 | static tree vect_get_loop_niters (struct loop *, tree *); | |
60 | static bool vect_analyze_data_ref_dependence | |
61 | (struct data_reference *, struct data_reference *, loop_vec_info); | |
62 | static bool vect_compute_data_ref_alignment (struct data_reference *); | |
63 | static bool vect_analyze_data_ref_access (struct data_reference *); | |
64 | static struct data_reference * vect_analyze_pointer_ref_access | |
65 | (tree, tree, bool, tree, tree *, tree *); | |
66 | static bool vect_can_advance_ivs_p (loop_vec_info); | |
67 | static tree vect_get_ptr_offset (tree, tree, tree *); | |
68 | static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *, | |
69 | tree *, tree *); | |
70 | static bool vect_base_addr_differ_p (struct data_reference *, | |
71 | struct data_reference *drb, bool *); | |
72 | static tree vect_object_analysis (tree, tree, bool, tree, | |
73 | struct data_reference **, tree *, tree *, | |
8bb46326 DN |
74 | tree *, bool *, tree *, struct ptr_info_def **, |
75 | subvar_t *); | |
f7064d11 DN |
76 | static tree vect_address_analysis (tree, tree, bool, tree, |
77 | struct data_reference *, tree *, tree *, | |
78 | tree *, bool *); | |
f7064d11 DN |
79 | |
80 | ||
81 | /* Function vect_get_ptr_offset | |
82 | ||
83 | Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */ | |
84 | ||
85 | static tree | |
86 | vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED, | |
87 | tree vectype ATTRIBUTE_UNUSED, | |
88 | tree *offset ATTRIBUTE_UNUSED) | |
89 | { | |
90 | /* TODO: Use alignment information. */ | |
91 | return NULL_TREE; | |
92 | } | |
93 | ||
94 | ||
95 | /* Function vect_analyze_offset_expr | |
96 | ||
97 | Given an offset expression EXPR received from get_inner_reference, analyze | |
98 | it and create an expression for INITIAL_OFFSET by substituting the variables | |
99 | of EXPR with initial_condition of the corresponding access_fn in the loop. | |
100 | E.g., | |
101 | for i | |
102 | for (j = 3; j < N; j++) | |
103 | a[j].b[i][j] = 0; | |
104 | ||
105 | For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be | |
106 | substituted, since its access_fn in the inner loop is i. 'j' will be | |
107 | substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where | |
108 | C` = 3 * C_j + C. | |
109 | ||
110 | Compute MISALIGN (the misalignment of the data reference initial access from | |
111 | its base) if possible. Misalignment can be calculated only if all the | |
112 | variables can be substituted with constants, or if a variable is multiplied | |
113 | by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot | |
114 | be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple | |
115 | of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo | |
116 | VECTYPE_ALIGNMENT computation in the caller of this function). | |
117 | ||
118 | STEP is an evolution of the data reference in this loop in bytes. | |
119 | In the above example, STEP is C_j. | |
120 | ||
121 | Return FALSE, if the analysis fails, e.g., there is no access_fn for a | |
122 | variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP) | |
123 | are NULL_TREEs. Otherwise, return TRUE. | |
124 | ||
125 | */ | |
126 | ||
127 | static bool | |
128 | vect_analyze_offset_expr (tree expr, | |
129 | struct loop *loop, | |
130 | tree vectype_alignment, | |
131 | tree *initial_offset, | |
132 | tree *misalign, | |
133 | tree *step) | |
134 | { | |
135 | tree oprnd0; | |
136 | tree oprnd1; | |
137 | tree left_offset = ssize_int (0); | |
138 | tree right_offset = ssize_int (0); | |
139 | tree left_misalign = ssize_int (0); | |
140 | tree right_misalign = ssize_int (0); | |
141 | tree left_step = ssize_int (0); | |
142 | tree right_step = ssize_int (0); | |
143 | enum tree_code code; | |
144 | tree init, evolution; | |
145 | ||
146 | *step = NULL_TREE; | |
147 | *misalign = NULL_TREE; | |
148 | *initial_offset = NULL_TREE; | |
149 | ||
150 | /* Strip conversions that don't narrow the mode. */ | |
151 | expr = vect_strip_conversion (expr); | |
152 | if (!expr) | |
153 | return false; | |
154 | ||
155 | /* Stop conditions: | |
156 | 1. Constant. */ | |
157 | if (TREE_CODE (expr) == INTEGER_CST) | |
158 | { | |
159 | *initial_offset = fold_convert (ssizetype, expr); | |
160 | *misalign = fold_convert (ssizetype, expr); | |
161 | *step = ssize_int (0); | |
162 | return true; | |
163 | } | |
164 | ||
165 | /* 2. Variable. Try to substitute with initial_condition of the corresponding | |
166 | access_fn in the current loop. */ | |
167 | if (SSA_VAR_P (expr)) | |
168 | { | |
169 | tree access_fn = analyze_scalar_evolution (loop, expr); | |
170 | ||
171 | if (access_fn == chrec_dont_know) | |
172 | /* No access_fn. */ | |
173 | return false; | |
174 | ||
175 | init = initial_condition_in_loop_num (access_fn, loop->num); | |
176 | if (init == expr && !expr_invariant_in_loop_p (loop, init)) | |
177 | /* Not enough information: may be not loop invariant. | |
178 | E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its | |
179 | initial_condition is D, but it depends on i - loop's induction | |
180 | variable. */ | |
181 | return false; | |
182 | ||
183 | evolution = evolution_part_in_loop_num (access_fn, loop->num); | |
184 | if (evolution && TREE_CODE (evolution) != INTEGER_CST) | |
185 | /* Evolution is not constant. */ | |
186 | return false; | |
187 | ||
188 | if (TREE_CODE (init) == INTEGER_CST) | |
189 | *misalign = fold_convert (ssizetype, init); | |
190 | else | |
191 | /* Not constant, misalignment cannot be calculated. */ | |
192 | *misalign = NULL_TREE; | |
193 | ||
194 | *initial_offset = fold_convert (ssizetype, init); | |
195 | ||
196 | *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0); | |
197 | return true; | |
198 | } | |
199 | ||
200 | /* Recursive computation. */ | |
201 | if (!BINARY_CLASS_P (expr)) | |
202 | { | |
203 | /* We expect to get binary expressions (PLUS/MINUS and MULT). */ | |
204 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
205 | { | |
206 | fprintf (vect_dump, "Not binary expression "); | |
207 | print_generic_expr (vect_dump, expr, TDF_SLIM); | |
208 | } | |
209 | return false; | |
210 | } | |
211 | oprnd0 = TREE_OPERAND (expr, 0); | |
212 | oprnd1 = TREE_OPERAND (expr, 1); | |
213 | ||
214 | if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset, | |
215 | &left_misalign, &left_step) | |
216 | || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment, | |
217 | &right_offset, &right_misalign, &right_step)) | |
218 | return false; | |
219 | ||
220 | /* The type of the operation: plus, minus or mult. */ | |
221 | code = TREE_CODE (expr); | |
222 | switch (code) | |
223 | { | |
224 | case MULT_EXPR: | |
225 | if (TREE_CODE (right_offset) != INTEGER_CST) | |
226 | /* RIGHT_OFFSET can be not constant. For example, for arrays of variable | |
227 | sized types. | |
228 | FORNOW: We don't support such cases. */ | |
229 | return false; | |
230 | ||
231 | /* Strip conversions that don't narrow the mode. */ | |
232 | left_offset = vect_strip_conversion (left_offset); | |
233 | if (!left_offset) | |
234 | return false; | |
235 | /* Misalignment computation. */ | |
236 | if (SSA_VAR_P (left_offset)) | |
237 | { | |
238 | /* If the left side contains variables that can't be substituted with | |
239 | constants, we check if the right side is a multiple of ALIGNMENT. | |
240 | */ | |
241 | if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset, | |
242 | fold_convert (ssizetype, vectype_alignment)))) | |
243 | *misalign = ssize_int (0); | |
244 | else | |
245 | /* If the remainder is not zero or the right side isn't constant, | |
246 | we can't compute misalignment. */ | |
247 | *misalign = NULL_TREE; | |
248 | } | |
249 | else | |
250 | { | |
251 | /* The left operand was successfully substituted with constant. */ | |
252 | if (left_misalign) | |
253 | /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is | |
254 | NULL_TREE. */ | |
255 | *misalign = size_binop (code, left_misalign, right_misalign); | |
256 | else | |
257 | *misalign = NULL_TREE; | |
258 | } | |
259 | ||
260 | /* Step calculation. */ | |
261 | /* Multiply the step by the right operand. */ | |
262 | *step = size_binop (MULT_EXPR, left_step, right_offset); | |
263 | break; | |
264 | ||
265 | case PLUS_EXPR: | |
266 | case MINUS_EXPR: | |
267 | /* Combine the recursive calculations for step and misalignment. */ | |
268 | *step = size_binop (code, left_step, right_step); | |
269 | ||
270 | if (left_misalign && right_misalign) | |
271 | *misalign = size_binop (code, left_misalign, right_misalign); | |
272 | else | |
273 | *misalign = NULL_TREE; | |
274 | ||
275 | break; | |
276 | ||
277 | default: | |
278 | gcc_unreachable (); | |
279 | } | |
280 | ||
281 | /* Compute offset. */ | |
282 | *initial_offset = fold_convert (ssizetype, | |
283 | fold (build2 (code, TREE_TYPE (left_offset), | |
284 | left_offset, | |
285 | right_offset))); | |
286 | return true; | |
287 | } | |
288 | ||
289 | ||
5f55a1ba DN |
290 | /* Function vect_determine_vectorization_factor |
291 | ||
292 | Determine the vectorization factor (VF). VF is the number of data elements | |
293 | that are operated upon in parallel in a single iteration of the vectorized | |
294 | loop. For example, when vectorizing a loop that operates on 4byte elements, | |
295 | on a target with vector size (VS) 16byte, the VF is set to 4, since 4 | |
296 | elements can fit in a single vector register. | |
297 | ||
298 | We currently support vectorization of loops in which all types operated upon | |
299 | are of the same size. Therefore this function currently sets VF according to | |
300 | the size of the types operated upon, and fails if there are multiple sizes | |
301 | in the loop. | |
302 | ||
303 | VF is also the factor by which the loop iterations are strip-mined, e.g.: | |
304 | original loop: | |
305 | for (i=0; i<N; i++){ | |
306 | a[i] = b[i] + c[i]; | |
307 | } | |
308 | ||
309 | vectorized loop: | |
310 | for (i=0; i<N; i+=VF){ | |
311 | a[i:VF] = b[i:VF] + c[i:VF]; | |
312 | } | |
313 | */ | |
314 | ||
315 | static bool | |
316 | vect_determine_vectorization_factor (loop_vec_info loop_vinfo) | |
317 | { | |
318 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
319 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
320 | int nbbs = loop->num_nodes; | |
321 | block_stmt_iterator si; | |
322 | unsigned int vectorization_factor = 0; | |
323 | int i; | |
324 | tree scalar_type; | |
325 | ||
326 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
327 | fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); | |
328 | ||
329 | for (i = 0; i < nbbs; i++) | |
330 | { | |
331 | basic_block bb = bbs[i]; | |
332 | ||
333 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
334 | { | |
335 | tree stmt = bsi_stmt (si); | |
336 | unsigned int nunits; | |
337 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
338 | tree vectype; | |
339 | ||
340 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
341 | { | |
342 | fprintf (vect_dump, "==> examining statement: "); | |
343 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
344 | } | |
345 | ||
346 | gcc_assert (stmt_info); | |
347 | /* skip stmts which do not need to be vectorized. */ | |
348 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
349 | continue; | |
350 | ||
351 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) | |
352 | { | |
353 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
354 | LOOP_LOC (loop_vinfo))) | |
355 | { | |
356 | fprintf (vect_dump, "not vectorized: vector stmt in loop:"); | |
357 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
358 | } | |
359 | return false; | |
360 | } | |
361 | ||
362 | if (STMT_VINFO_DATA_REF (stmt_info)) | |
363 | scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); | |
364 | else if (TREE_CODE (stmt) == MODIFY_EXPR) | |
365 | scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); | |
366 | else | |
367 | scalar_type = TREE_TYPE (stmt); | |
368 | ||
369 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
370 | { | |
371 | fprintf (vect_dump, "get vectype for scalar type: "); | |
372 | print_generic_expr (vect_dump, scalar_type, TDF_SLIM); | |
373 | } | |
374 | ||
375 | vectype = get_vectype_for_scalar_type (scalar_type); | |
376 | if (!vectype) | |
377 | { | |
378 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
379 | LOOP_LOC (loop_vinfo))) | |
380 | { | |
381 | fprintf (vect_dump, "not vectorized: unsupported data-type "); | |
382 | print_generic_expr (vect_dump, scalar_type, TDF_SLIM); | |
383 | } | |
384 | return false; | |
385 | } | |
386 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
387 | { | |
388 | fprintf (vect_dump, "vectype: "); | |
389 | print_generic_expr (vect_dump, vectype, TDF_SLIM); | |
390 | } | |
391 | STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
392 | ||
57d1677d | 393 | nunits = TYPE_VECTOR_SUBPARTS (vectype); |
5f55a1ba DN |
394 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) |
395 | fprintf (vect_dump, "nunits = %d", nunits); | |
396 | ||
397 | if (vectorization_factor) | |
398 | { | |
399 | /* FORNOW: don't allow mixed units. | |
400 | This restriction will be relaxed in the future. */ | |
401 | if (nunits != vectorization_factor) | |
402 | { | |
403 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
404 | LOOP_LOC (loop_vinfo))) | |
405 | fprintf (vect_dump, "not vectorized: mixed data-types"); | |
406 | return false; | |
407 | } | |
408 | } | |
409 | else | |
410 | vectorization_factor = nunits; | |
411 | ||
412 | #ifdef ENABLE_CHECKING | |
413 | gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) | |
414 | * vectorization_factor == UNITS_PER_SIMD_WORD); | |
415 | #endif | |
416 | } | |
417 | } | |
418 | ||
419 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |
420 | ||
421 | if (vectorization_factor <= 1) | |
422 | { | |
423 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
424 | LOOP_LOC (loop_vinfo))) | |
425 | fprintf (vect_dump, "not vectorized: unsupported data-type"); | |
426 | return false; | |
427 | } | |
428 | LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | |
429 | ||
430 | return true; | |
431 | } | |
432 | ||
433 | ||
f7064d11 DN |
434 | /* Function vect_analyze_operations. |
435 | ||
436 | Scan the loop stmts and make sure they are all vectorizable. */ | |
437 | ||
438 | static bool | |
439 | vect_analyze_operations (loop_vec_info loop_vinfo) | |
440 | { | |
441 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
442 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
443 | int nbbs = loop->num_nodes; | |
444 | block_stmt_iterator si; | |
445 | unsigned int vectorization_factor = 0; | |
446 | int i; | |
447 | bool ok; | |
f7064d11 DN |
448 | |
449 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
450 | fprintf (vect_dump, "=== vect_analyze_operations ==="); | |
451 | ||
5f55a1ba DN |
452 | gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); |
453 | vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
454 | ||
f7064d11 DN |
455 | for (i = 0; i < nbbs; i++) |
456 | { | |
457 | basic_block bb = bbs[i]; | |
458 | ||
459 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
460 | { | |
461 | tree stmt = bsi_stmt (si); | |
f7064d11 | 462 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
f7064d11 DN |
463 | |
464 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
465 | { | |
466 | fprintf (vect_dump, "==> examining statement: "); | |
467 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
468 | } | |
469 | ||
470 | gcc_assert (stmt_info); | |
471 | ||
472 | /* skip stmts which do not need to be vectorized. | |
473 | this is expected to include: | |
474 | - the COND_EXPR which is the loop exit condition | |
475 | - any LABEL_EXPRs in the loop | |
476 | - computations that are used only for array indexing or loop | |
477 | control */ | |
478 | ||
479 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
480 | { | |
481 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
482 | fprintf (vect_dump, "irrelevant."); | |
483 | continue; | |
484 | } | |
485 | ||
5f55a1ba DN |
486 | #ifdef ENABLE_CHECKING |
487 | if (STMT_VINFO_RELEVANT_P (stmt_info)) | |
488 | { | |
489 | gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))); | |
490 | gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); | |
491 | } | |
492 | #endif | |
f7064d11 DN |
493 | |
494 | ok = (vectorizable_operation (stmt, NULL, NULL) | |
495 | || vectorizable_assignment (stmt, NULL, NULL) | |
496 | || vectorizable_load (stmt, NULL, NULL) | |
b52485c6 DP |
497 | || vectorizable_store (stmt, NULL, NULL) |
498 | || vectorizable_condition (stmt, NULL, NULL)); | |
f7064d11 DN |
499 | |
500 | if (!ok) | |
501 | { | |
502 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
503 | LOOP_LOC (loop_vinfo))) | |
504 | { | |
505 | fprintf (vect_dump, "not vectorized: stmt not supported: "); | |
506 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
507 | } | |
508 | return false; | |
509 | } | |
f7064d11 DN |
510 | } |
511 | } | |
512 | ||
513 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |
514 | ||
f7064d11 DN |
515 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
516 | && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
517 | fprintf (vect_dump, | |
518 | "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, | |
519 | vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); | |
520 | ||
521 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
522 | && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor) | |
523 | { | |
524 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
525 | LOOP_LOC (loop_vinfo))) | |
526 | fprintf (vect_dump, "not vectorized: iteration count too small."); | |
527 | return false; | |
528 | } | |
529 | ||
530 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
531 | || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) | |
532 | { | |
533 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
534 | fprintf (vect_dump, "epilog loop required."); | |
535 | if (!vect_can_advance_ivs_p (loop_vinfo)) | |
536 | { | |
537 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
538 | LOOP_LOC (loop_vinfo))) | |
539 | fprintf (vect_dump, | |
540 | "not vectorized: can't create epilog loop 1."); | |
541 | return false; | |
542 | } | |
70388d94 | 543 | if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit)) |
f7064d11 DN |
544 | { |
545 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
546 | LOOP_LOC (loop_vinfo))) | |
547 | fprintf (vect_dump, | |
548 | "not vectorized: can't create epilog loop 2."); | |
549 | return false; | |
550 | } | |
551 | } | |
552 | ||
553 | return true; | |
554 | } | |
555 | ||
556 | ||
557 | /* Function exist_non_indexing_operands_for_use_p | |
558 | ||
559 | USE is one of the uses attached to STMT. Check if USE is | |
560 | used in STMT for anything other than indexing an array. */ | |
561 | ||
562 | static bool | |
563 | exist_non_indexing_operands_for_use_p (tree use, tree stmt) | |
564 | { | |
565 | tree operand; | |
566 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
567 | ||
568 | /* USE corresponds to some operand in STMT. If there is no data | |
569 | reference in STMT, then any operand that corresponds to USE | |
570 | is not indexing an array. */ | |
571 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
572 | return true; | |
573 | ||
574 | /* STMT has a data_ref. FORNOW this means that its of one of | |
575 | the following forms: | |
576 | -1- ARRAY_REF = var | |
577 | -2- var = ARRAY_REF | |
578 | (This should have been verified in analyze_data_refs). | |
579 | ||
580 | 'var' in the second case corresponds to a def, not a use, | |
581 | so USE cannot correspond to any operands that are not used | |
582 | for array indexing. | |
583 | ||
584 | Therefore, all we need to check is if STMT falls into the | |
585 | first case, and whether var corresponds to USE. */ | |
586 | ||
587 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) | |
588 | return false; | |
589 | ||
590 | operand = TREE_OPERAND (stmt, 1); | |
591 | ||
592 | if (TREE_CODE (operand) != SSA_NAME) | |
593 | return false; | |
594 | ||
595 | if (operand == use) | |
596 | return true; | |
597 | ||
598 | return false; | |
599 | } | |
600 | ||
601 | ||
602 | /* Function vect_analyze_scalar_cycles. | |
603 | ||
604 | Examine the cross iteration def-use cycles of scalar variables, by | |
605 | analyzing the loop (scalar) PHIs; verify that the cross iteration def-use | |
606 | cycles that they represent do not impede vectorization. | |
607 | ||
608 | FORNOW: Reduction as in the following loop, is not supported yet: | |
609 | loop1: | |
610 | for (i=0; i<N; i++) | |
611 | sum += a[i]; | |
612 | The cross-iteration cycle corresponding to variable 'sum' will be | |
613 | considered too complicated and will impede vectorization. | |
614 | ||
615 | FORNOW: Induction as in the following loop, is not supported yet: | |
616 | loop2: | |
617 | for (i=0; i<N; i++) | |
618 | a[i] = i; | |
619 | ||
620 | However, the following loop *is* vectorizable: | |
621 | loop3: | |
622 | for (i=0; i<N; i++) | |
623 | a[i] = b[i]; | |
624 | ||
625 | In both loops there exists a def-use cycle for the variable i: | |
626 | loop: i_2 = PHI (i_0, i_1) | |
627 | a[i_2] = ...; | |
628 | i_1 = i_2 + 1; | |
629 | GOTO loop; | |
630 | ||
631 | The evolution of the above cycle is considered simple enough, | |
632 | however, we also check that the cycle does not need to be | |
633 | vectorized, i.e - we check that the variable that this cycle | |
634 | defines is only used for array indexing or in stmts that do not | |
635 | need to be vectorized. This is not the case in loop2, but it | |
636 | *is* the case in loop3. */ | |
637 | ||
638 | static bool | |
639 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) | |
640 | { | |
641 | tree phi; | |
642 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
643 | basic_block bb = loop->header; | |
644 | tree dummy; | |
645 | ||
646 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
647 | fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); | |
648 | ||
649 | for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) | |
650 | { | |
651 | tree access_fn = NULL; | |
652 | ||
653 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
654 | { | |
655 | fprintf (vect_dump, "Analyze phi: "); | |
656 | print_generic_expr (vect_dump, phi, TDF_SLIM); | |
657 | } | |
658 | ||
659 | /* Skip virtual phi's. The data dependences that are associated with | |
660 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
661 | ||
662 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
663 | { | |
664 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
665 | fprintf (vect_dump, "virtual phi. skip."); | |
666 | continue; | |
667 | } | |
668 | ||
669 | /* Analyze the evolution function. */ | |
670 | ||
671 | /* FORNOW: The only scalar cross-iteration cycles that we allow are | |
672 | those of loop induction variables; This property is verified here. | |
673 | ||
674 | Furthermore, if that induction variable is used in an operation | |
675 | that needs to be vectorized (i.e, is not solely used to index | |
676 | arrays and check the exit condition) - we do not support its | |
677 | vectorization yet. This property is verified in vect_is_simple_use, | |
678 | during vect_analyze_operations. */ | |
679 | ||
680 | access_fn = /* instantiate_parameters | |
681 | (loop,*/ | |
682 | analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
683 | ||
684 | if (!access_fn) | |
685 | { | |
686 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
687 | LOOP_LOC (loop_vinfo))) | |
688 | fprintf (vect_dump, "not vectorized: unsupported scalar cycle."); | |
689 | return false; | |
690 | } | |
691 | ||
692 | if (vect_print_dump_info (REPORT_DETAILS, | |
693 | LOOP_LOC (loop_vinfo))) | |
694 | { | |
695 | fprintf (vect_dump, "Access function of PHI: "); | |
696 | print_generic_expr (vect_dump, access_fn, TDF_SLIM); | |
697 | } | |
698 | ||
699 | if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy)) | |
700 | { | |
701 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
702 | LOOP_LOC (loop_vinfo))) | |
703 | fprintf (vect_dump, "not vectorized: unsupported scalar cycle."); | |
704 | return false; | |
705 | } | |
706 | } | |
707 | ||
708 | return true; | |
709 | } | |
710 | ||
711 | ||
712 | /* Function vect_base_addr_differ_p. | |
713 | ||
714 | This is the simplest data dependence test: determines whether the | |
715 | data references A and B access the same array/region. Returns | |
716 | false when the property is not computable at compile time. | |
717 | Otherwise return true, and DIFFER_P will record the result. This | |
718 | utility will not be necessary when alias_sets_conflict_p will be | |
719 | less conservative. */ | |
720 | ||
721 | static bool | |
722 | vect_base_addr_differ_p (struct data_reference *dra, | |
723 | struct data_reference *drb, | |
724 | bool *differ_p) | |
725 | { | |
726 | tree stmt_a = DR_STMT (dra); | |
727 | stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a); | |
728 | tree stmt_b = DR_STMT (drb); | |
729 | stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b); | |
730 | tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a); | |
731 | tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b); | |
732 | tree type_a = TREE_TYPE (addr_a); | |
733 | tree type_b = TREE_TYPE (addr_b); | |
734 | HOST_WIDE_INT alias_set_a, alias_set_b; | |
735 | ||
736 | gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b)); | |
737 | ||
738 | /* Both references are ADDR_EXPR, i.e., we have the objects. */ | |
739 | if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR) | |
740 | return array_base_name_differ_p (dra, drb, differ_p); | |
741 | ||
742 | alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ? | |
743 | get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a); | |
744 | alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ? | |
745 | get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b); | |
746 | ||
747 | if (!alias_sets_conflict_p (alias_set_a, alias_set_b)) | |
748 | { | |
749 | *differ_p = true; | |
750 | return true; | |
751 | } | |
752 | ||
753 | /* An instruction writing through a restricted pointer is "independent" of any | |
754 | instruction reading or writing through a different pointer, in the same | |
755 | block/scope. */ | |
756 | else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra)) | |
757 | || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb))) | |
758 | { | |
759 | *differ_p = true; | |
760 | return true; | |
761 | } | |
762 | return false; | |
763 | } | |
764 | ||
765 | ||
766 | /* Function vect_analyze_data_ref_dependence. | |
767 | ||
768 | Return TRUE if there (might) exist a dependence between a memory-reference | |
769 | DRA and a memory-reference DRB. */ | |
770 | ||
771 | static bool | |
772 | vect_analyze_data_ref_dependence (struct data_reference *dra, | |
773 | struct data_reference *drb, | |
774 | loop_vec_info loop_vinfo) | |
775 | { | |
776 | bool differ_p; | |
777 | struct data_dependence_relation *ddr; | |
b52485c6 DP |
778 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
779 | int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
780 | int dist = 0; | |
781 | unsigned int loop_depth = 0; | |
782 | struct loop *loop_nest = loop; | |
783 | ||
f7064d11 DN |
784 | |
785 | if (!vect_base_addr_differ_p (dra, drb, &differ_p)) | |
786 | { | |
787 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
788 | LOOP_LOC (loop_vinfo))) | |
789 | { | |
790 | fprintf (vect_dump, | |
791 | "not vectorized: can't determine dependence between: "); | |
792 | print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); | |
793 | fprintf (vect_dump, " and "); | |
794 | print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); | |
795 | } | |
796 | return true; | |
797 | } | |
798 | ||
799 | if (differ_p) | |
800 | return false; | |
801 | ||
802 | ddr = initialize_data_dependence_relation (dra, drb); | |
803 | compute_affine_dependence (ddr); | |
804 | ||
805 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
806 | return false; | |
b52485c6 DP |
807 | |
808 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
809 | { | |
810 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
811 | LOOP_LOC (loop_vinfo))) | |
812 | { | |
813 | fprintf (vect_dump, | |
814 | "not vectorized: can't determine dependence between "); | |
815 | print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); | |
816 | fprintf (vect_dump, " and "); | |
817 | print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); | |
818 | } | |
819 | return true; | |
820 | } | |
821 | ||
822 | /* Find loop depth. */ | |
823 | while (loop_nest) | |
824 | { | |
825 | if (loop_nest->outer && loop_nest->outer->outer) | |
826 | { | |
827 | loop_nest = loop_nest->outer; | |
828 | loop_depth++; | |
829 | } | |
830 | else | |
831 | break; | |
832 | } | |
833 | ||
834 | /* Compute distance vector. */ | |
835 | compute_subscript_distance (ddr); | |
836 | build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth); | |
837 | ||
838 | if (!DDR_DIST_VECT (ddr)) | |
839 | { | |
840 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
841 | LOOP_LOC (loop_vinfo))) | |
842 | { | |
843 | fprintf (vect_dump, "not vectorized: bad dist vector for "); | |
844 | print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); | |
845 | fprintf (vect_dump, " and "); | |
846 | print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); | |
847 | } | |
848 | return true; | |
849 | } | |
850 | ||
851 | dist = DDR_DIST_VECT (ddr)[loop_depth]; | |
852 | ||
853 | /* Same loop iteration. */ | |
854 | if (dist == 0) | |
855 | { | |
856 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
857 | fprintf (vect_dump, "dependence distance 0."); | |
858 | return false; | |
859 | } | |
860 | ||
861 | if (dist >= vectorization_factor) | |
862 | /* Dependence distance does not create dependence, as far as vectorization | |
863 | is concerned, in this case. */ | |
864 | return false; | |
865 | ||
f7064d11 DN |
866 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, |
867 | LOOP_LOC (loop_vinfo))) | |
868 | { | |
869 | fprintf (vect_dump, | |
870 | "not vectorized: possible dependence between data-refs "); | |
871 | print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); | |
872 | fprintf (vect_dump, " and "); | |
873 | print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); | |
874 | } | |
875 | ||
876 | return true; | |
877 | } | |
878 | ||
879 | ||
880 | /* Function vect_analyze_data_ref_dependences. | |
881 | ||
882 | Examine all the data references in the loop, and make sure there do not | |
b52485c6 | 883 | exist any data dependences between them. */ |
f7064d11 DN |
884 | |
885 | static bool | |
886 | vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) | |
887 | { | |
888 | unsigned int i, j; | |
889 | varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
890 | varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
891 | ||
892 | /* Examine store-store (output) dependences. */ | |
893 | ||
894 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
895 | fprintf (vect_dump, "=== vect_analyze_dependences ==="); | |
896 | ||
897 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
898 | fprintf (vect_dump, "compare all store-store pairs."); | |
899 | ||
900 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++) | |
901 | { | |
902 | for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
903 | { | |
904 | struct data_reference *dra = | |
905 | VARRAY_GENERIC_PTR (loop_write_refs, i); | |
906 | struct data_reference *drb = | |
907 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
908 | if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo)) | |
909 | return false; | |
910 | } | |
911 | } | |
912 | ||
913 | /* Examine load-store (true/anti) dependences. */ | |
914 | ||
915 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
916 | fprintf (vect_dump, "compare all load-store pairs."); | |
917 | ||
918 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++) | |
919 | { | |
920 | for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
921 | { | |
922 | struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i); | |
923 | struct data_reference *drb = | |
924 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
925 | if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo)) | |
926 | return false; | |
927 | } | |
928 | } | |
929 | ||
930 | return true; | |
931 | } | |
932 | ||
933 | ||
934 | /* Function vect_compute_data_ref_alignment | |
935 | ||
936 | Compute the misalignment of the data reference DR. | |
937 | ||
938 | Output: | |
939 | 1. If during the misalignment computation it is found that the data reference | |
940 | cannot be vectorized then false is returned. | |
941 | 2. DR_MISALIGNMENT (DR) is defined. | |
942 | ||
943 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
944 | only for trivial cases. TODO. */ | |
945 | ||
946 | static bool | |
947 | vect_compute_data_ref_alignment (struct data_reference *dr) | |
948 | { | |
949 | tree stmt = DR_STMT (dr); | |
950 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
951 | tree ref = DR_REF (dr); | |
952 | tree vectype; | |
953 | tree base, alignment; | |
954 | bool base_aligned_p; | |
955 | tree misalign; | |
956 | ||
957 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
958 | fprintf (vect_dump, "vect_compute_data_ref_alignment:"); | |
959 | ||
960 | /* Initialize misalignment to unknown. */ | |
961 | DR_MISALIGNMENT (dr) = -1; | |
962 | ||
963 | misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info); | |
964 | base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info); | |
965 | base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)); | |
966 | vectype = STMT_VINFO_VECTYPE (stmt_info); | |
967 | ||
968 | if (!misalign) | |
969 | { | |
970 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
971 | { | |
972 | fprintf (vect_dump, "Unknown alignment for access: "); | |
973 | print_generic_expr (vect_dump, base, TDF_SLIM); | |
974 | } | |
975 | return true; | |
976 | } | |
977 | ||
978 | if (!base_aligned_p) | |
979 | { | |
980 | if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) | |
981 | { | |
982 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
983 | { | |
984 | fprintf (vect_dump, "can't force alignment of ref: "); | |
985 | print_generic_expr (vect_dump, ref, TDF_SLIM); | |
986 | } | |
987 | return true; | |
988 | } | |
989 | ||
990 | /* Force the alignment of the decl. | |
991 | NOTE: This is the only change to the code we make during | |
992 | the analysis phase, before deciding to vectorize the loop. */ | |
993 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
994 | fprintf (vect_dump, "force alignment"); | |
995 | DECL_ALIGN (base) = TYPE_ALIGN (vectype); | |
996 | DECL_USER_ALIGN (base) = 1; | |
997 | } | |
998 | ||
999 | /* At this point we assume that the base is aligned. */ | |
1000 | gcc_assert (base_aligned_p | |
1001 | || (TREE_CODE (base) == VAR_DECL | |
1002 | && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); | |
1003 | ||
1004 | /* Alignment required, in bytes: */ | |
1005 | alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); | |
1006 | ||
1007 | /* Modulo alignment. */ | |
1008 | misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment); | |
1009 | if (tree_int_cst_sgn (misalign) < 0) | |
1010 | { | |
1011 | /* Negative misalignment value. */ | |
1012 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1013 | fprintf (vect_dump, "unexpected misalign value"); | |
1014 | return false; | |
1015 | } | |
1016 | ||
1017 | DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1); | |
1018 | ||
1019 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1020 | fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr)); | |
1021 | ||
1022 | return true; | |
1023 | } | |
1024 | ||
1025 | ||
1026 | /* Function vect_compute_data_refs_alignment | |
1027 | ||
1028 | Compute the misalignment of data references in the loop. | |
1029 | This pass may take place at function granularity instead of at loop | |
1030 | granularity. | |
1031 | ||
1032 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
1033 | only for trivial cases. TODO. */ | |
1034 | ||
1035 | static bool | |
1036 | vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) | |
1037 | { | |
1038 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
1039 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
1040 | unsigned int i; | |
1041 | ||
1042 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
1043 | { | |
1044 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
1045 | if (!vect_compute_data_ref_alignment (dr)) | |
1046 | return false; | |
1047 | } | |
1048 | ||
1049 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
1050 | { | |
1051 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
1052 | if (!vect_compute_data_ref_alignment (dr)) | |
1053 | return false; | |
1054 | } | |
1055 | ||
1056 | return true; | |
1057 | } | |
1058 | ||
1059 | ||
1060 | /* Function vect_enhance_data_refs_alignment | |
1061 | ||
1062 | This pass will use loop versioning and loop peeling in order to enhance | |
1063 | the alignment of data references in the loop. | |
1064 | ||
1065 | FOR NOW: we assume that whatever versioning/peeling takes place, only the | |
1066 | original loop is to be vectorized; Any other loops that are created by | |
1067 | the transformations performed in this pass - are not supposed to be | |
1068 | vectorized. This restriction will be relaxed. */ | |
1069 | ||
1070 | static void | |
1071 | vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) | |
1072 | { | |
1073 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
1074 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
5f55a1ba DN |
1075 | varray_type datarefs; |
1076 | struct data_reference *dr0 = NULL; | |
1077 | unsigned int i, j; | |
f7064d11 DN |
1078 | |
1079 | /* | |
1080 | This pass will require a cost model to guide it whether to apply peeling | |
1081 | or versioning or a combination of the two. For example, the scheme that | |
1082 | intel uses when given a loop with several memory accesses, is as follows: | |
1083 | choose one memory access ('p') which alignment you want to force by doing | |
1084 | peeling. Then, either (1) generate a loop in which 'p' is aligned and all | |
1085 | other accesses are not necessarily aligned, or (2) use loop versioning to | |
1086 | generate one loop in which all accesses are aligned, and another loop in | |
1087 | which only 'p' is necessarily aligned. | |
1088 | ||
1089 | ("Automatic Intra-Register Vectorization for the Intel Architecture", | |
1090 | Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International | |
1091 | Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) | |
1092 | ||
1093 | Devising a cost model is the most critical aspect of this work. It will | |
1094 | guide us on which access to peel for, whether to use loop versioning, how | |
1095 | many versions to create, etc. The cost model will probably consist of | |
1096 | generic considerations as well as target specific considerations (on | |
1097 | powerpc for example, misaligned stores are more painful than misaligned | |
1098 | loads). | |
1099 | ||
1100 | Here is the general steps involved in alignment enhancements: | |
1101 | ||
1102 | -- original loop, before alignment analysis: | |
1103 | for (i=0; i<N; i++){ | |
1104 | x = q[i]; # DR_MISALIGNMENT(q) = unknown | |
1105 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
1106 | } | |
1107 | ||
1108 | -- After vect_compute_data_refs_alignment: | |
1109 | for (i=0; i<N; i++){ | |
1110 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
1111 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
1112 | } | |
1113 | ||
1114 | -- Possibility 1: we do loop versioning: | |
1115 | if (p is aligned) { | |
1116 | for (i=0; i<N; i++){ # loop 1A | |
1117 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
1118 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
1119 | } | |
1120 | } | |
1121 | else { | |
1122 | for (i=0; i<N; i++){ # loop 1B | |
1123 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
1124 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
1125 | } | |
1126 | } | |
1127 | ||
1128 | -- Possibility 2: we do loop peeling: | |
1129 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
1130 | x = q[i]; | |
1131 | p[i] = y; | |
1132 | } | |
1133 | for (i = 3; i < N; i++){ # loop 2A | |
1134 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
1135 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
1136 | } | |
1137 | ||
1138 | -- Possibility 3: combination of loop peeling and versioning: | |
1139 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
1140 | x = q[i]; | |
1141 | p[i] = y; | |
1142 | } | |
1143 | if (p is aligned) { | |
1144 | for (i = 3; i<N; i++){ # loop 3A | |
1145 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
1146 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
1147 | } | |
1148 | } | |
1149 | else { | |
1150 | for (i = 3; i<N; i++){ # loop 3B | |
1151 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
1152 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
1153 | } | |
1154 | } | |
1155 | ||
1156 | These loops are later passed to loop_transform to be vectorized. The | |
1157 | vectorizer will use the alignment information to guide the transformation | |
1158 | (whether to generate regular loads/stores, or with special handling for | |
1159 | misalignment). | |
1160 | */ | |
1161 | ||
1162 | /* (1) Peeling to force alignment. */ | |
1163 | ||
1164 | /* (1.1) Decide whether to perform peeling, and how many iterations to peel: | |
1165 | Considerations: | |
1166 | + How many accesses will become aligned due to the peeling | |
1167 | - How many accesses will become unaligned due to the peeling, | |
1168 | and the cost of misaligned accesses. | |
1169 | - The cost of peeling (the extra runtime checks, the increase | |
1170 | in code size). | |
1171 | ||
1172 | The scheme we use FORNOW: peel to force the alignment of the first | |
1173 | misaligned store in the loop. | |
1174 | Rationale: misaligned stores are not yet supported. | |
1175 | ||
1176 | TODO: Use a better cost model. */ | |
1177 | ||
1178 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
1179 | { | |
5f55a1ba DN |
1180 | dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i); |
1181 | if (!aligned_access_p (dr0)) | |
1182 | { | |
1183 | LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; | |
1184 | LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); | |
f7064d11 | 1185 | break; |
5f55a1ba | 1186 | } |
f7064d11 DN |
1187 | } |
1188 | ||
f7064d11 DN |
1189 | /* (1.2) Update the alignment info according to the peeling factor. |
1190 | If the misalignment of the DR we peel for is M, then the | |
1191 | peeling factor is VF - M, and the misalignment of each access DR_i | |
1192 | in the loop is DR_MISALIGNMENT (DR_i) + VF - M. | |
1193 | If the misalignment of the DR we peel for is unknown, then the | |
1194 | misalignment of each access DR_i in the loop is also unknown. | |
1195 | ||
5f55a1ba DN |
1196 | TODO: - consider accesses that are known to have the same |
1197 | alignment, even if that alignment is unknown. */ | |
f7064d11 | 1198 | |
5f55a1ba | 1199 | if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) |
f7064d11 | 1200 | { |
5f55a1ba DN |
1201 | int mis; |
1202 | int npeel = 0; | |
1203 | ||
1204 | if (known_alignment_for_access_p (dr0)) | |
f7064d11 | 1205 | { |
5f55a1ba DN |
1206 | /* Since it's known at compile time, compute the number of iterations |
1207 | in the peeled loop (the peeling factor) for use in updating | |
1208 | DR_MISALIGNMENT values. The peeling factor is the vectorization | |
1209 | factor minus the misalignment as an element count. */ | |
1210 | mis = DR_MISALIGNMENT (dr0); | |
1211 | mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); | |
1212 | npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis; | |
f7064d11 | 1213 | } |
5f55a1ba DN |
1214 | |
1215 | datarefs = loop_write_datarefs; | |
1216 | for (j = 0; j < 2; j++) | |
f7064d11 | 1217 | { |
5f55a1ba DN |
1218 | for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) |
1219 | { | |
1220 | struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); | |
1221 | ||
1222 | if (dr == dr0) | |
1223 | continue; | |
1224 | if (known_alignment_for_access_p (dr) | |
1225 | && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0)) | |
1226 | DR_MISALIGNMENT (dr) = 0; | |
1227 | else if (known_alignment_for_access_p (dr) | |
1228 | && known_alignment_for_access_p (dr0)) | |
1229 | { | |
1230 | int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); | |
1231 | ||
1232 | DR_MISALIGNMENT (dr) += npeel * drsize; | |
c4336539 | 1233 | DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD; |
5f55a1ba DN |
1234 | } |
1235 | else | |
1236 | DR_MISALIGNMENT (dr) = -1; | |
1237 | } | |
1238 | datarefs = loop_read_datarefs; | |
f7064d11 | 1239 | } |
5f55a1ba DN |
1240 | |
1241 | DR_MISALIGNMENT (dr0) = 0; | |
f7064d11 DN |
1242 | } |
1243 | } | |
1244 | ||
1245 | ||
1246 | /* Function vect_analyze_data_refs_alignment | |
1247 | ||
1248 | Analyze the alignment of the data-references in the loop. | |
1249 | FOR NOW: Until support for misliagned accesses is in place, only if all | |
1250 | accesses are aligned can the loop be vectorized. This restriction will be | |
1251 | relaxed. */ | |
1252 | ||
1253 | static bool | |
1254 | vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) | |
1255 | { | |
1256 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
1257 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
1258 | enum dr_alignment_support supportable_dr_alignment; | |
1259 | unsigned int i; | |
1260 | ||
1261 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1262 | fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ==="); | |
1263 | ||
1264 | ||
1265 | /* This pass may take place at function granularity instead of at loop | |
1266 | granularity. */ | |
1267 | ||
1268 | if (!vect_compute_data_refs_alignment (loop_vinfo)) | |
1269 | { | |
1270 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1271 | LOOP_LOC (loop_vinfo))) | |
1272 | fprintf (vect_dump, | |
1273 | "not vectorized: can't calculate alignment for data ref."); | |
1274 | return false; | |
1275 | } | |
1276 | ||
1277 | ||
1278 | /* This pass will decide on using loop versioning and/or loop peeling in | |
1279 | order to enhance the alignment of data references in the loop. */ | |
1280 | ||
1281 | vect_enhance_data_refs_alignment (loop_vinfo); | |
1282 | ||
1283 | ||
1284 | /* Finally, check that all the data references in the loop can be | |
1285 | handled with respect to their alignment. */ | |
1286 | ||
1287 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
1288 | { | |
1289 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
1290 | supportable_dr_alignment = vect_supportable_dr_alignment (dr); | |
1291 | if (!supportable_dr_alignment) | |
1292 | { | |
1293 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1294 | LOOP_LOC (loop_vinfo))) | |
1295 | fprintf (vect_dump, "not vectorized: unsupported unaligned load."); | |
1296 | return false; | |
1297 | } | |
1298 | if (supportable_dr_alignment != dr_aligned | |
1299 | && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))) | |
1300 | fprintf (vect_dump, "Vectorizing an unaligned access."); | |
1301 | } | |
1302 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
1303 | { | |
1304 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
1305 | supportable_dr_alignment = vect_supportable_dr_alignment (dr); | |
1306 | if (!supportable_dr_alignment) | |
1307 | { | |
1308 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1309 | LOOP_LOC (loop_vinfo))) | |
1310 | fprintf (vect_dump, "not vectorized: unsupported unaligned store."); | |
1311 | return false; | |
1312 | } | |
1313 | if (supportable_dr_alignment != dr_aligned | |
1314 | && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))) | |
1315 | fprintf (vect_dump, "Vectorizing an unaligned access."); | |
1316 | } | |
0e6b0daf DN |
1317 | if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo) |
1318 | && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))) | |
1319 | fprintf (vect_dump, "Alignment of access forced using peeling."); | |
f7064d11 DN |
1320 | |
1321 | return true; | |
1322 | } | |
1323 | ||
1324 | ||
1325 | /* Function vect_analyze_data_ref_access. | |
1326 | ||
1327 | Analyze the access pattern of the data-reference DR. For now, a data access | |
1328 | has to consecutive to be considered vectorizable. */ | |
1329 | ||
1330 | static bool | |
1331 | vect_analyze_data_ref_access (struct data_reference *dr) | |
1332 | { | |
1333 | tree stmt = DR_STMT (dr); | |
1334 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1335 | tree step = STMT_VINFO_VECT_STEP (stmt_info); | |
1336 | tree scalar_type = TREE_TYPE (DR_REF (dr)); | |
1337 | ||
1338 | if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) | |
1339 | { | |
1340 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1341 | fprintf (vect_dump, "not consecutive access"); | |
1342 | return false; | |
1343 | } | |
1344 | return true; | |
1345 | } | |
1346 | ||
1347 | ||
1348 | /* Function vect_analyze_data_ref_accesses. | |
1349 | ||
1350 | Analyze the access pattern of all the data references in the loop. | |
1351 | ||
1352 | FORNOW: the only access pattern that is considered vectorizable is a | |
1353 | simple step 1 (consecutive) access. | |
1354 | ||
1355 | FORNOW: handle only arrays and pointer accesses. */ | |
1356 | ||
1357 | static bool | |
1358 | vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) | |
1359 | { | |
1360 | unsigned int i; | |
1361 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
1362 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
1363 | ||
1364 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1365 | fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ==="); | |
1366 | ||
1367 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
1368 | { | |
1369 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
1370 | bool ok = vect_analyze_data_ref_access (dr); | |
1371 | if (!ok) | |
1372 | { | |
1373 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1374 | LOOP_LOC (loop_vinfo))) | |
1375 | fprintf (vect_dump, "not vectorized: complicated access pattern."); | |
1376 | return false; | |
1377 | } | |
1378 | } | |
1379 | ||
1380 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
1381 | { | |
1382 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
1383 | bool ok = vect_analyze_data_ref_access (dr); | |
1384 | if (!ok) | |
1385 | { | |
1386 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1387 | LOOP_LOC (loop_vinfo))) | |
1388 | fprintf (vect_dump, "not vectorized: complicated access pattern."); | |
1389 | return false; | |
1390 | } | |
1391 | } | |
1392 | ||
1393 | return true; | |
1394 | } | |
1395 | ||
1396 | ||
1397 | /* Function vect_analyze_pointer_ref_access. | |
1398 | ||
1399 | Input: | |
1400 | STMT - a stmt that contains a data-ref. | |
1401 | MEMREF - a data-ref in STMT, which is an INDIRECT_REF. | |
1402 | ACCESS_FN - the access function of MEMREF. | |
1403 | ||
1404 | Output: | |
1405 | If the data-ref access is vectorizable, return a data_reference structure | |
1406 | that represents it (DR). Otherwise - return NULL. | |
1407 | STEP - the stride of MEMREF in the loop. | |
1408 | INIT - the initial condition of MEMREF in the loop. | |
1409 | */ | |
1410 | ||
1411 | static struct data_reference * | |
1412 | vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read, | |
1413 | tree access_fn, tree *ptr_init, tree *ptr_step) | |
1414 | { | |
1415 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1416 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
1417 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1418 | tree step, init; | |
1419 | tree reftype, innertype; | |
1420 | tree indx_access_fn; | |
1421 | int loopnum = loop->num; | |
1422 | struct data_reference *dr; | |
1423 | ||
1424 | if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step)) | |
1425 | { | |
1426 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1427 | LOOP_LOC (loop_vinfo))) | |
1428 | fprintf (vect_dump, "not vectorized: pointer access is not simple."); | |
1429 | return NULL; | |
1430 | } | |
1431 | ||
1432 | STRIP_NOPS (init); | |
1433 | ||
1434 | if (!expr_invariant_in_loop_p (loop, init)) | |
1435 | { | |
1436 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1437 | LOOP_LOC (loop_vinfo))) | |
1438 | fprintf (vect_dump, | |
1439 | "not vectorized: initial condition is not loop invariant."); | |
1440 | return NULL; | |
1441 | } | |
1442 | ||
1443 | if (TREE_CODE (step) != INTEGER_CST) | |
1444 | { | |
1445 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1446 | LOOP_LOC (loop_vinfo))) | |
1447 | fprintf (vect_dump, | |
1448 | "not vectorized: non constant step for pointer access."); | |
1449 | return NULL; | |
1450 | } | |
1451 | ||
1452 | reftype = TREE_TYPE (TREE_OPERAND (memref, 0)); | |
2427fa80 | 1453 | if (!POINTER_TYPE_P (reftype)) |
f7064d11 DN |
1454 | { |
1455 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1456 | LOOP_LOC (loop_vinfo))) | |
1457 | fprintf (vect_dump, "not vectorized: unexpected pointer access form."); | |
1458 | return NULL; | |
1459 | } | |
1460 | ||
d6efd7d6 | 1461 | if (!POINTER_TYPE_P (TREE_TYPE (init))) |
f7064d11 DN |
1462 | { |
1463 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1464 | LOOP_LOC (loop_vinfo))) | |
1465 | fprintf (vect_dump, "not vectorized: unexpected pointer access form."); | |
1466 | return NULL; | |
1467 | } | |
1468 | ||
1469 | *ptr_step = fold_convert (ssizetype, step); | |
1470 | innertype = TREE_TYPE (reftype); | |
d6efd7d6 DN |
1471 | if (!COMPLETE_TYPE_P (innertype)) |
1472 | { | |
1473 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1474 | LOOP_LOC (loop_vinfo))) | |
1475 | fprintf (vect_dump, "not vectorized: pointer to incomplete type."); | |
1476 | return NULL; | |
1477 | } | |
1478 | ||
f7064d11 DN |
1479 | /* Check that STEP is a multiple of type size. */ |
1480 | if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step, | |
1481 | fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype))))) | |
1482 | { | |
1483 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1484 | LOOP_LOC (loop_vinfo))) | |
1485 | fprintf (vect_dump, "not vectorized: non consecutive access."); | |
1486 | return NULL; | |
1487 | } | |
1488 | ||
1489 | indx_access_fn = | |
1490 | build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node); | |
1491 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1492 | { | |
1493 | fprintf (vect_dump, "Access function of ptr indx: "); | |
1494 | print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM); | |
1495 | } | |
1496 | dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read); | |
1497 | *ptr_init = init; | |
1498 | return dr; | |
1499 | } | |
1500 | ||
1501 | ||
f7064d11 DN |
1502 | /* Function vect_address_analysis |
1503 | ||
1504 | Return the BASE of the address expression EXPR. | |
1505 | Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP. | |
1506 | ||
1507 | Input: | |
1508 | EXPR - the address expression that is being analyzed | |
1509 | STMT - the statement that contains EXPR or its original memory reference | |
1510 | IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR | |
1511 | VECTYPE - the type that defines the alignment (i.e, we compute | |
1512 | alignment relative to TYPE_ALIGN(VECTYPE)) | |
1513 | DR - data_reference struct for the original memory reference | |
1514 | ||
1515 | Output: | |
1516 | BASE (returned value) - the base of the data reference EXPR. | |
1517 | INITIAL_OFFSET - initial offset of EXPR from BASE (an expression) | |
1518 | MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the | |
1519 | computation is impossible | |
1520 | STEP - evolution of EXPR in the loop | |
1521 | BASE_ALIGNED - indicates if BASE is aligned | |
1522 | ||
1523 | If something unexpected is encountered (an unsupported form of data-ref), | |
1524 | then NULL_TREE is returned. | |
1525 | */ | |
1526 | ||
1527 | static tree | |
1528 | vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype, | |
1529 | struct data_reference *dr, tree *offset, tree *misalign, | |
1530 | tree *step, bool *base_aligned) | |
1531 | { | |
1532 | tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1; | |
1533 | tree address_offset = ssize_int (0), address_misalign = ssize_int (0); | |
2427fa80 | 1534 | tree dummy; |
8bb46326 | 1535 | struct ptr_info_def *dummy1; |
c75ab022 | 1536 | subvar_t dummy2; |
f7064d11 DN |
1537 | |
1538 | switch (TREE_CODE (expr)) | |
1539 | { | |
1540 | case PLUS_EXPR: | |
1541 | case MINUS_EXPR: | |
1542 | /* EXPR is of form {base +/- offset} (or {offset +/- base}). */ | |
1543 | oprnd0 = TREE_OPERAND (expr, 0); | |
1544 | oprnd1 = TREE_OPERAND (expr, 1); | |
1545 | ||
1546 | STRIP_NOPS (oprnd0); | |
1547 | STRIP_NOPS (oprnd1); | |
1548 | ||
1549 | /* Recursively try to find the base of the address contained in EXPR. | |
1550 | For offset, the returned base will be NULL. */ | |
1551 | base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr, | |
1552 | &address_offset, &address_misalign, step, | |
1553 | base_aligned); | |
1554 | ||
1555 | base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr, | |
1556 | &address_offset, &address_misalign, step, | |
1557 | base_aligned); | |
1558 | ||
1559 | /* We support cases where only one of the operands contains an | |
1560 | address. */ | |
1561 | if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1)) | |
1562 | return NULL_TREE; | |
1563 | ||
1564 | /* To revert STRIP_NOPS. */ | |
1565 | oprnd0 = TREE_OPERAND (expr, 0); | |
1566 | oprnd1 = TREE_OPERAND (expr, 1); | |
1567 | ||
1568 | offset_expr = base_addr0 ? | |
1569 | fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0); | |
1570 | ||
1571 | /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is | |
1572 | a number, we can add it to the misalignment value calculated for base, | |
1573 | otherwise, misalignment is NULL. */ | |
1574 | if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign) | |
1575 | *misalign = size_binop (TREE_CODE (expr), address_misalign, | |
1576 | offset_expr); | |
1577 | else | |
1578 | *misalign = NULL_TREE; | |
1579 | ||
1580 | /* Combine offset (from EXPR {base + offset}) with the offset calculated | |
1581 | for base. */ | |
1582 | *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr); | |
1583 | return base_addr0 ? base_addr0 : base_addr1; | |
1584 | ||
1585 | case ADDR_EXPR: | |
c75ab022 DB |
1586 | base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt, |
1587 | is_read, vectype, &dr, offset, | |
1588 | misalign, step, base_aligned, | |
8bb46326 | 1589 | &dummy, &dummy1, &dummy2); |
f7064d11 DN |
1590 | return base_address; |
1591 | ||
1592 | case SSA_NAME: | |
2427fa80 | 1593 | if (!POINTER_TYPE_P (TREE_TYPE (expr))) |
f7064d11 | 1594 | return NULL_TREE; |
2427fa80 | 1595 | |
f7064d11 DN |
1596 | if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype)) |
1597 | { | |
1598 | if (vect_get_ptr_offset (expr, vectype, misalign)) | |
1599 | *base_aligned = true; | |
1600 | else | |
1601 | *base_aligned = false; | |
1602 | } | |
1603 | else | |
1604 | { | |
1605 | *base_aligned = true; | |
1606 | *misalign = ssize_int (0); | |
1607 | } | |
1608 | *offset = ssize_int (0); | |
1609 | *step = ssize_int (0); | |
1610 | return expr; | |
1611 | ||
1612 | default: | |
1613 | return NULL_TREE; | |
1614 | } | |
1615 | } | |
1616 | ||
1617 | ||
1618 | /* Function vect_object_analysis | |
1619 | ||
1620 | Return the BASE of the data reference MEMREF. | |
1621 | Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP. | |
1622 | E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset | |
1623 | 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET | |
1624 | instantiated with initial_conditions of access_functions of variables, | |
1625 | modulo alignment, and STEP is the evolution of the DR_REF in this loop. | |
1626 | ||
1627 | Function get_inner_reference is used for the above in case of ARRAY_REF and | |
1628 | COMPONENT_REF. | |
1629 | ||
1630 | The structure of the function is as follows: | |
1631 | Part 1: | |
1632 | Case 1. For handled_component_p refs | |
1633 | 1.1 call get_inner_reference | |
1634 | 1.1.1 analyze offset expr received from get_inner_reference | |
1635 | 1.2. build data-reference structure for MEMREF | |
1636 | (fall through with BASE) | |
1637 | Case 2. For declarations | |
1638 | 2.1 check alignment | |
1639 | 2.2 update DR_BASE_NAME if necessary for alias | |
1640 | Case 3. For INDIRECT_REFs | |
1641 | 3.1 get the access function | |
1642 | 3.2 analyze evolution of MEMREF | |
1643 | 3.3 set data-reference structure for MEMREF | |
1644 | 3.4 call vect_address_analysis to analyze INIT of the access function | |
1645 | ||
1646 | Part 2: | |
1647 | Combine the results of object and address analysis to calculate | |
1648 | INITIAL_OFFSET, STEP and misalignment info. | |
1649 | ||
1650 | Input: | |
1651 | MEMREF - the memory reference that is being analyzed | |
1652 | STMT - the statement that contains MEMREF | |
1653 | IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF | |
1654 | VECTYPE - the type that defines the alignment (i.e, we compute | |
1655 | alignment relative to TYPE_ALIGN(VECTYPE)) | |
1656 | ||
1657 | Output: | |
1658 | BASE_ADDRESS (returned value) - the base address of the data reference MEMREF | |
1659 | E.g, if MEMREF is a.b[k].c[i][j] the returned | |
1660 | base is &a. | |
1661 | DR - data_reference struct for MEMREF | |
1662 | INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression) | |
1663 | MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if | |
1664 | the computation is impossible | |
1665 | STEP - evolution of the DR_REF in the loop | |
1666 | BASE_ALIGNED - indicates if BASE is aligned | |
2427fa80 | 1667 | MEMTAG - memory tag for aliasing purposes |
8bb46326 | 1668 | PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME |
c75ab022 | 1669 | SUBVAR - Sub-variables of the variable |
f7064d11 DN |
1670 | |
1671 | If something unexpected is encountered (an unsupported form of data-ref), | |
1672 | then NULL_TREE is returned. */ | |
1673 | ||
1674 | static tree | |
1675 | vect_object_analysis (tree memref, tree stmt, bool is_read, | |
1676 | tree vectype, struct data_reference **dr, | |
1677 | tree *offset, tree *misalign, tree *step, | |
c75ab022 | 1678 | bool *base_aligned, tree *memtag, |
8bb46326 | 1679 | struct ptr_info_def **ptr_info, subvar_t *subvars) |
f7064d11 DN |
1680 | { |
1681 | tree base = NULL_TREE, base_address = NULL_TREE; | |
1682 | tree object_offset = ssize_int (0), object_misalign = ssize_int (0); | |
1683 | tree object_step = ssize_int (0), address_step = ssize_int (0); | |
1684 | bool object_base_aligned = true, address_base_aligned = true; | |
1685 | tree address_offset = ssize_int (0), address_misalign = ssize_int (0); | |
1686 | HOST_WIDE_INT pbitsize, pbitpos; | |
1687 | tree poffset, bit_pos_in_bytes; | |
1688 | enum machine_mode pmode; | |
1689 | int punsignedp, pvolatilep; | |
1690 | tree ptr_step = ssize_int (0), ptr_init = NULL_TREE; | |
1691 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1692 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
1693 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1694 | struct data_reference *ptr_dr = NULL; | |
1695 | tree access_fn, evolution_part, address_to_analyze; | |
8bb46326 DN |
1696 | |
1697 | *ptr_info = NULL; | |
f7064d11 DN |
1698 | |
1699 | /* Part 1: */ | |
1700 | /* Case 1. handled_component_p refs. */ | |
1701 | if (handled_component_p (memref)) | |
1702 | { | |
1703 | /* 1.1 call get_inner_reference. */ | |
1704 | /* Find the base and the offset from it. */ | |
1705 | base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset, | |
1706 | &pmode, &punsignedp, &pvolatilep, false); | |
1707 | if (!base) | |
1708 | return NULL_TREE; | |
1709 | ||
1710 | /* 1.1.1 analyze offset expr received from get_inner_reference. */ | |
1711 | if (poffset | |
1712 | && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype), | |
1713 | &object_offset, &object_misalign, &object_step)) | |
1714 | { | |
1715 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1716 | { | |
1717 | fprintf (vect_dump, "failed to compute offset or step for "); | |
1718 | print_generic_expr (vect_dump, memref, TDF_SLIM); | |
1719 | } | |
1720 | return NULL_TREE; | |
1721 | } | |
1722 | ||
1723 | /* Add bit position to OFFSET and MISALIGN. */ | |
1724 | ||
1725 | bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT); | |
1726 | /* Check that there is no remainder in bits. */ | |
1727 | if (pbitpos%BITS_PER_UNIT) | |
1728 | { | |
1729 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1730 | fprintf (vect_dump, "bit offset alignment."); | |
1731 | return NULL_TREE; | |
1732 | } | |
1733 | object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset); | |
1734 | if (object_misalign) | |
1735 | object_misalign = size_binop (PLUS_EXPR, object_misalign, | |
1736 | bit_pos_in_bytes); | |
1737 | ||
1738 | /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */ | |
1739 | if (!(*dr)) | |
1740 | { | |
1741 | if (TREE_CODE (memref) == ARRAY_REF) | |
1742 | *dr = analyze_array (stmt, memref, is_read); | |
1743 | else | |
1744 | /* FORNOW. */ | |
1745 | return NULL_TREE; | |
1746 | } | |
1747 | memref = base; /* To continue analysis of BASE. */ | |
1748 | /* fall through */ | |
1749 | } | |
1750 | ||
1751 | /* Part 1: Case 2. Declarations. */ | |
1752 | if (DECL_P (memref)) | |
1753 | { | |
1754 | /* We expect to get a decl only if we already have a DR. */ | |
1755 | if (!(*dr)) | |
1756 | { | |
1757 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1758 | { | |
1759 | fprintf (vect_dump, "unhandled decl "); | |
1760 | print_generic_expr (vect_dump, memref, TDF_SLIM); | |
1761 | } | |
1762 | return NULL_TREE; | |
1763 | } | |
1764 | ||
1765 | /* 2.1 check the alignment. */ | |
1766 | if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype)) | |
1767 | object_base_aligned = true; | |
1768 | else | |
1769 | object_base_aligned = false; | |
1770 | ||
1771 | /* 2.2 update DR_BASE_NAME if necessary. */ | |
1772 | if (!DR_BASE_NAME ((*dr))) | |
1773 | /* For alias analysis. In case the analysis of INDIRECT_REF brought | |
1774 | us to object. */ | |
1775 | DR_BASE_NAME ((*dr)) = memref; | |
1776 | ||
c75ab022 DB |
1777 | if (SSA_VAR_P (memref) && var_can_have_subvars (memref)) |
1778 | *subvars = get_subvars_for_var (memref); | |
f7064d11 | 1779 | base_address = build_fold_addr_expr (memref); |
2427fa80 | 1780 | *memtag = memref; |
f7064d11 DN |
1781 | } |
1782 | ||
1783 | /* Part 1: Case 3. INDIRECT_REFs. */ | |
1784 | else if (TREE_CODE (memref) == INDIRECT_REF) | |
8bb46326 DN |
1785 | { |
1786 | tree ptr_ref = TREE_OPERAND (memref, 0); | |
1787 | if (TREE_CODE (ptr_ref) == SSA_NAME) | |
1788 | *ptr_info = SSA_NAME_PTR_INFO (ptr_ref); | |
1789 | ||
f7064d11 | 1790 | /* 3.1 get the access function. */ |
8bb46326 | 1791 | access_fn = analyze_scalar_evolution (loop, ptr_ref); |
f7064d11 DN |
1792 | if (!access_fn) |
1793 | { | |
1794 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1795 | LOOP_LOC (loop_vinfo))) | |
1796 | fprintf (vect_dump, "not vectorized: complicated pointer access."); | |
1797 | return NULL_TREE; | |
1798 | } | |
1799 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1800 | { | |
1801 | fprintf (vect_dump, "Access function of ptr: "); | |
1802 | print_generic_expr (vect_dump, access_fn, TDF_SLIM); | |
1803 | } | |
1804 | ||
1805 | /* 3.2 analyze evolution of MEMREF. */ | |
1806 | evolution_part = evolution_part_in_loop_num (access_fn, loop->num); | |
1807 | if (evolution_part) | |
1808 | { | |
1809 | ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read, | |
1810 | access_fn, &ptr_init, &ptr_step); | |
1811 | if (!(ptr_dr)) | |
1812 | return NULL_TREE; | |
1813 | ||
1814 | object_step = size_binop (PLUS_EXPR, object_step, ptr_step); | |
1815 | address_to_analyze = ptr_init; | |
1816 | } | |
1817 | else | |
1818 | { | |
1819 | if (!(*dr)) | |
1820 | { | |
1821 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1822 | LOOP_LOC (loop_vinfo))) | |
1823 | fprintf (vect_dump, "not vectorized: ptr is loop invariant."); | |
1824 | return NULL_TREE; | |
1825 | } | |
6cd3dd5b IR |
1826 | /* Since there exists DR for MEMREF, we are analyzing the init of |
1827 | the access function, which not necessary has evolution in the | |
f7064d11 | 1828 | loop. */ |
6cd3dd5b IR |
1829 | address_to_analyze = initial_condition_in_loop_num (access_fn, |
1830 | loop->num); | |
f7064d11 DN |
1831 | } |
1832 | ||
1833 | /* 3.3 set data-reference structure for MEMREF. */ | |
1834 | *dr = (*dr) ? *dr : ptr_dr; | |
1835 | ||
1836 | /* 3.4 call vect_address_analysis to analyze INIT of the access | |
1837 | function. */ | |
1838 | base_address = vect_address_analysis (address_to_analyze, stmt, is_read, | |
1839 | vectype, *dr, &address_offset, &address_misalign, | |
1840 | &address_step, &address_base_aligned); | |
2427fa80 IR |
1841 | if (!base_address) |
1842 | return NULL_TREE; | |
1843 | ||
1844 | switch (TREE_CODE (base_address)) | |
1845 | { | |
1846 | case SSA_NAME: | |
1847 | *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag; | |
1848 | if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME) | |
1849 | *memtag = get_var_ann ( | |
1850 | SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag; | |
1851 | break; | |
1852 | case ADDR_EXPR: | |
1853 | *memtag = TREE_OPERAND (base_address, 0); | |
1854 | break; | |
1855 | default: | |
1856 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
1857 | LOOP_LOC (loop_vinfo))) | |
1858 | { | |
1859 | fprintf (vect_dump, "not vectorized: no memtag ref: "); | |
1860 | print_generic_expr (vect_dump, memref, TDF_SLIM); | |
1861 | } | |
1862 | return NULL_TREE; | |
1863 | } | |
f7064d11 DN |
1864 | } |
1865 | ||
1866 | if (!base_address) | |
1867 | /* MEMREF cannot be analyzed. */ | |
1868 | return NULL_TREE; | |
1869 | ||
c75ab022 DB |
1870 | if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag)) |
1871 | *subvars = get_subvars_for_var (*memtag); | |
1872 | ||
f7064d11 | 1873 | /* Part 2: Combine the results of object and address analysis to calculate |
6c6cfbfd | 1874 | INITIAL_OFFSET, STEP and misalignment info. */ |
f7064d11 DN |
1875 | *offset = size_binop (PLUS_EXPR, object_offset, address_offset); |
1876 | if (object_misalign && address_misalign) | |
1877 | *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign); | |
1878 | else | |
1879 | *misalign = NULL_TREE; | |
1880 | *step = size_binop (PLUS_EXPR, object_step, address_step); | |
1881 | *base_aligned = object_base_aligned && address_base_aligned; | |
1882 | ||
1883 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1884 | { | |
1885 | fprintf (vect_dump, "Results of object analysis for: "); | |
1886 | print_generic_expr (vect_dump, memref, TDF_SLIM); | |
2427fa80 IR |
1887 | fprintf (vect_dump, "\n\tbase_address: "); |
1888 | print_generic_expr (vect_dump, base_address, TDF_SLIM); | |
f7064d11 DN |
1889 | fprintf (vect_dump, "\n\toffset: "); |
1890 | print_generic_expr (vect_dump, *offset, TDF_SLIM); | |
1891 | fprintf (vect_dump, "\n\tstep: "); | |
1892 | print_generic_expr (vect_dump, *step, TDF_SLIM); | |
1893 | fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned); | |
1894 | print_generic_expr (vect_dump, *misalign, TDF_SLIM); | |
1895 | } | |
1896 | return base_address; | |
1897 | } | |
1898 | ||
1899 | ||
1900 | /* Function vect_analyze_data_refs. | |
1901 | ||
1902 | Find all the data references in the loop. | |
1903 | ||
1904 | The general structure of the analysis of data refs in the vectorizer is as | |
1905 | follows: | |
1906 | 1- vect_analyze_data_refs(loop): | |
1907 | Find and analyze all data-refs in the loop: | |
1908 | foreach ref | |
1909 | base_address = vect_object_analysis(ref) | |
f7064d11 DN |
1910 | 1.1- vect_object_analysis(ref): |
1911 | Analyze ref, and build a DR (data_referece struct) for it; | |
1912 | compute base, initial_offset, step and alignment. | |
1913 | Call get_inner_reference for refs handled in this function. | |
1914 | Call vect_addr_analysis(addr) to analyze pointer type expressions. | |
2427fa80 | 1915 | Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment, |
8bb46326 | 1916 | ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly. |
f7064d11 DN |
1917 | 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR |
1918 | 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok. | |
1919 | 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. | |
1920 | ||
1921 | FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs | |
1922 | which base is really an array (not a pointer) and which alignment | |
1923 | can be forced. This restriction will be relaxed. */ | |
1924 | ||
1925 | static bool | |
1926 | vect_analyze_data_refs (loop_vec_info loop_vinfo) | |
1927 | { | |
1928 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1929 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1930 | int nbbs = loop->num_nodes; | |
1931 | block_stmt_iterator si; | |
1932 | int j; | |
1933 | struct data_reference *dr; | |
1934 | ||
1935 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1936 | fprintf (vect_dump, "=== vect_analyze_data_refs ==="); | |
1937 | ||
1938 | for (j = 0; j < nbbs; j++) | |
1939 | { | |
1940 | basic_block bb = bbs[j]; | |
1941 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
1942 | { | |
1943 | bool is_read = false; | |
1944 | tree stmt = bsi_stmt (si); | |
1945 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1946 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
1947 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
1948 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
1949 | varray_type *datarefs = NULL; | |
1950 | int nvuses, nv_may_defs, nv_must_defs; | |
1951 | tree memref = NULL; | |
1952 | tree scalar_type, vectype; | |
1953 | tree base, offset, misalign, step, tag; | |
8bb46326 | 1954 | struct ptr_info_def *ptr_info; |
f7064d11 | 1955 | bool base_aligned; |
e8b19a77 | 1956 | subvar_t subvars = NULL; |
f7064d11 DN |
1957 | |
1958 | /* Assumption: there exists a data-ref in stmt, if and only if | |
1959 | it has vuses/vdefs. */ | |
1960 | ||
1961 | if (!vuses && !v_may_defs && !v_must_defs) | |
1962 | continue; | |
1963 | ||
1964 | nvuses = NUM_VUSES (vuses); | |
1965 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
1966 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
1967 | ||
1968 | if (nvuses && (nv_may_defs || nv_must_defs)) | |
1969 | { | |
1970 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1971 | { | |
1972 | fprintf (vect_dump, "unexpected vdefs and vuses in stmt: "); | |
1973 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
1974 | } | |
1975 | return false; | |
1976 | } | |
1977 | ||
1978 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1979 | { | |
1980 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
1981 | { | |
1982 | fprintf (vect_dump, "unexpected vops in stmt: "); | |
1983 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
1984 | } | |
1985 | return false; | |
1986 | } | |
1987 | ||
1988 | if (vuses) | |
1989 | { | |
1990 | memref = TREE_OPERAND (stmt, 1); | |
1991 | datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
1992 | is_read = true; | |
1993 | } | |
1994 | else /* vdefs */ | |
1995 | { | |
1996 | memref = TREE_OPERAND (stmt, 0); | |
1997 | datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
1998 | is_read = false; | |
1999 | } | |
2000 | ||
2001 | scalar_type = TREE_TYPE (memref); | |
2002 | vectype = get_vectype_for_scalar_type (scalar_type); | |
2003 | if (!vectype) | |
2004 | { | |
2005 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2006 | { | |
2007 | fprintf (vect_dump, "no vectype for stmt: "); | |
2008 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
2009 | fprintf (vect_dump, " scalar_type: "); | |
2010 | print_generic_expr (vect_dump, scalar_type, TDF_DETAILS); | |
2011 | } | |
2012 | /* It is not possible to vectorize this data reference. */ | |
2013 | return false; | |
2014 | } | |
2015 | /* Analyze MEMREF. If it is of a supported form, build data_reference | |
2016 | struct for it (DR). */ | |
2017 | dr = NULL; | |
2018 | base = vect_object_analysis (memref, stmt, is_read, vectype, &dr, | |
2019 | &offset, &misalign, &step, | |
8bb46326 DN |
2020 | &base_aligned, &tag, &ptr_info, |
2021 | &subvars); | |
f7064d11 DN |
2022 | if (!base) |
2023 | { | |
2024 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
2025 | LOOP_LOC (loop_vinfo))) | |
2026 | { | |
2027 | fprintf (vect_dump, "not vectorized: unhandled data ref: "); | |
2028 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
2029 | } | |
2030 | return false; | |
2031 | } | |
f7064d11 DN |
2032 | STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base; |
2033 | STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset; | |
2034 | STMT_VINFO_VECT_STEP (stmt_info) = step; | |
2035 | STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign; | |
2036 | STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned; | |
2037 | STMT_VINFO_MEMTAG (stmt_info) = tag; | |
8bb46326 | 2038 | STMT_VINFO_PTR_INFO (stmt_info) = ptr_info; |
c75ab022 | 2039 | STMT_VINFO_SUBVARS (stmt_info) = subvars; |
f7064d11 DN |
2040 | STMT_VINFO_VECTYPE (stmt_info) = vectype; |
2041 | VARRAY_PUSH_GENERIC_PTR (*datarefs, dr); | |
2042 | STMT_VINFO_DATA_REF (stmt_info) = dr; | |
2043 | } | |
2044 | } | |
2045 | ||
2046 | return true; | |
2047 | } | |
2048 | ||
2049 | ||
2050 | /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ | |
2051 | ||
2052 | /* Function vect_mark_relevant. | |
2053 | ||
2054 | Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ | |
2055 | ||
2056 | static void | |
51d00891 | 2057 | vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt) |
f7064d11 DN |
2058 | { |
2059 | stmt_vec_info stmt_info; | |
2060 | ||
2061 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2062 | fprintf (vect_dump, "mark relevant."); | |
2063 | ||
2064 | if (TREE_CODE (stmt) == PHI_NODE) | |
2065 | { | |
51d00891 | 2066 | VEC_safe_push (tree, heap, *worklist, stmt); |
f7064d11 DN |
2067 | return; |
2068 | } | |
2069 | ||
2070 | stmt_info = vinfo_for_stmt (stmt); | |
2071 | ||
2072 | if (!stmt_info) | |
2073 | { | |
2074 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2075 | { | |
2076 | fprintf (vect_dump, "mark relevant: no stmt info!!."); | |
2077 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
2078 | } | |
2079 | return; | |
2080 | } | |
2081 | ||
2082 | if (STMT_VINFO_RELEVANT_P (stmt_info)) | |
2083 | { | |
2084 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2085 | fprintf (vect_dump, "already marked relevant."); | |
2086 | return; | |
2087 | } | |
2088 | ||
2089 | STMT_VINFO_RELEVANT_P (stmt_info) = 1; | |
51d00891 | 2090 | VEC_safe_push (tree, heap, *worklist, stmt); |
f7064d11 DN |
2091 | } |
2092 | ||
2093 | ||
2094 | /* Function vect_stmt_relevant_p. | |
2095 | ||
2096 | Return true if STMT in loop that is represented by LOOP_VINFO is | |
2097 | "relevant for vectorization". | |
2098 | ||
2099 | A stmt is considered "relevant for vectorization" if: | |
2100 | - it has uses outside the loop. | |
2101 | - it has vdefs (it alters memory). | |
2102 | - control stmts in the loop (except for the exit condition). | |
2103 | ||
2104 | CHECKME: what other side effects would the vectorizer allow? */ | |
2105 | ||
2106 | static bool | |
2107 | vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo) | |
2108 | { | |
2109 | v_may_def_optype v_may_defs; | |
2110 | v_must_def_optype v_must_defs; | |
2111 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
f430bae8 AM |
2112 | ssa_op_iter op_iter; |
2113 | imm_use_iterator imm_iter; | |
2114 | use_operand_p use_p; | |
2115 | tree var; | |
f7064d11 DN |
2116 | |
2117 | /* cond stmt other than loop exit cond. */ | |
2118 | if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) | |
2119 | return true; | |
2120 | ||
2121 | /* changing memory. */ | |
b0f81966 | 2122 | if (TREE_CODE (stmt) == PHI_NODE) |
f7064d11 | 2123 | { |
b0f81966 AM |
2124 | if (!is_gimple_reg (PHI_RESULT (stmt))) |
2125 | return false; | |
2126 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (stmt)) | |
f7064d11 | 2127 | { |
b0f81966 AM |
2128 | basic_block bb = bb_for_stmt (USE_STMT (use_p)); |
2129 | if (!flow_bb_inside_loop_p (loop, bb)) | |
2130 | { | |
2131 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2132 | fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop."); | |
2133 | return true; | |
2134 | } | |
f7064d11 | 2135 | } |
b0f81966 AM |
2136 | return false; |
2137 | } | |
2138 | ||
2139 | v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
2140 | v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
2141 | if (v_may_defs || v_must_defs) | |
2142 | { | |
2143 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2144 | fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs."); | |
2145 | return true; | |
f7064d11 DN |
2146 | } |
2147 | ||
2148 | /* uses outside the loop. */ | |
f430bae8 | 2149 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_DEF) |
f7064d11 | 2150 | { |
f430bae8 | 2151 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) |
f7064d11 | 2152 | { |
f430bae8 AM |
2153 | basic_block bb = bb_for_stmt (USE_STMT (use_p)); |
2154 | if (!flow_bb_inside_loop_p (loop, bb)) | |
2155 | { | |
2156 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2157 | fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop."); | |
2158 | return true; | |
2159 | } | |
f7064d11 DN |
2160 | } |
2161 | } | |
2162 | ||
2163 | return false; | |
2164 | } | |
2165 | ||
2166 | ||
2167 | /* Function vect_mark_stmts_to_be_vectorized. | |
2168 | ||
2169 | Not all stmts in the loop need to be vectorized. For example: | |
2170 | ||
2171 | for i... | |
2172 | for j... | |
2173 | 1. T0 = i + j | |
2174 | 2. T1 = a[T0] | |
2175 | ||
2176 | 3. j = j + 1 | |
2177 | ||
2178 | Stmt 1 and 3 do not need to be vectorized, because loop control and | |
2179 | addressing of vectorized data-refs are handled differently. | |
2180 | ||
2181 | This pass detects such stmts. */ | |
2182 | ||
2183 | static bool | |
2184 | vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) | |
2185 | { | |
51d00891 | 2186 | VEC(tree,heap) *worklist; |
f7064d11 DN |
2187 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
2188 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
2189 | unsigned int nbbs = loop->num_nodes; | |
2190 | block_stmt_iterator si; | |
2191 | tree stmt; | |
2192 | stmt_ann_t ann; | |
2193 | unsigned int i; | |
2194 | int j; | |
2195 | use_optype use_ops; | |
2196 | stmt_vec_info stmt_info; | |
2197 | basic_block bb; | |
2198 | tree phi; | |
2199 | ||
2200 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2201 | fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); | |
2202 | ||
2203 | bb = loop->header; | |
2204 | for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) | |
2205 | { | |
2206 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2207 | { | |
2208 | fprintf (vect_dump, "init: phi relevant? "); | |
2209 | print_generic_expr (vect_dump, phi, TDF_SLIM); | |
2210 | } | |
2211 | ||
2212 | if (vect_stmt_relevant_p (phi, loop_vinfo)) | |
2213 | { | |
2214 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
2215 | LOOP_LOC (loop_vinfo))) | |
2216 | fprintf (vect_dump, "unsupported reduction/induction."); | |
2217 | return false; | |
2218 | } | |
2219 | } | |
2220 | ||
51d00891 | 2221 | worklist = VEC_alloc (tree, heap, 64); |
f7064d11 DN |
2222 | |
2223 | /* 1. Init worklist. */ | |
2224 | ||
2225 | for (i = 0; i < nbbs; i++) | |
2226 | { | |
2227 | bb = bbs[i]; | |
2228 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
2229 | { | |
2230 | stmt = bsi_stmt (si); | |
2231 | ||
2232 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2233 | { | |
2234 | fprintf (vect_dump, "init: stmt relevant? "); | |
2235 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
2236 | } | |
2237 | ||
2238 | stmt_info = vinfo_for_stmt (stmt); | |
2239 | STMT_VINFO_RELEVANT_P (stmt_info) = 0; | |
2240 | ||
2241 | if (vect_stmt_relevant_p (stmt, loop_vinfo)) | |
2242 | vect_mark_relevant (&worklist, stmt); | |
2243 | } | |
2244 | } | |
2245 | ||
2246 | ||
2247 | /* 2. Process_worklist */ | |
2248 | ||
51d00891 | 2249 | while (VEC_length (tree, worklist) > 0) |
f7064d11 | 2250 | { |
51d00891 | 2251 | stmt = VEC_pop (tree, worklist); |
f7064d11 DN |
2252 | |
2253 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2254 | { | |
2255 | fprintf (vect_dump, "worklist: examine stmt: "); | |
2256 | print_generic_expr (vect_dump, stmt, TDF_SLIM); | |
2257 | } | |
2258 | ||
2259 | /* Examine the USES in this statement. Mark all the statements which | |
2260 | feed this statement's uses as "relevant", unless the USE is used as | |
2261 | an array index. */ | |
2262 | ||
2263 | if (TREE_CODE (stmt) == PHI_NODE) | |
2264 | { | |
2265 | /* follow the def-use chain inside the loop. */ | |
2266 | for (j = 0; j < PHI_NUM_ARGS (stmt); j++) | |
2267 | { | |
2268 | tree arg = PHI_ARG_DEF (stmt, j); | |
2269 | tree def_stmt = NULL_TREE; | |
2270 | basic_block bb; | |
2271 | if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt)) | |
2272 | { | |
2273 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
2274 | LOOP_LOC (loop_vinfo))) | |
2275 | fprintf (vect_dump, "not vectorized: unsupported use in stmt."); | |
51d00891 | 2276 | VEC_free (tree, heap, worklist); |
f7064d11 DN |
2277 | return false; |
2278 | } | |
2279 | if (!def_stmt) | |
2280 | continue; | |
2281 | ||
2282 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2283 | { | |
2284 | fprintf (vect_dump, "worklist: def_stmt: "); | |
2285 | print_generic_expr (vect_dump, def_stmt, TDF_SLIM); | |
2286 | } | |
2287 | ||
2288 | bb = bb_for_stmt (def_stmt); | |
2289 | if (flow_bb_inside_loop_p (loop, bb)) | |
2290 | vect_mark_relevant (&worklist, def_stmt); | |
2291 | } | |
2292 | } | |
2293 | ||
2294 | ann = stmt_ann (stmt); | |
2295 | use_ops = USE_OPS (ann); | |
2296 | ||
2297 | for (i = 0; i < NUM_USES (use_ops); i++) | |
2298 | { | |
2299 | tree use = USE_OP (use_ops, i); | |
2300 | ||
2301 | /* We are only interested in uses that need to be vectorized. Uses | |
2302 | that are used for address computation are not considered relevant. | |
2303 | */ | |
2304 | if (exist_non_indexing_operands_for_use_p (use, stmt)) | |
2305 | { | |
2306 | tree def_stmt = NULL_TREE; | |
2307 | basic_block bb; | |
2308 | if (!vect_is_simple_use (use, loop_vinfo, &def_stmt)) | |
2309 | { | |
2310 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, | |
2311 | LOOP_LOC (loop_vinfo))) | |
2312 | fprintf (vect_dump, "not vectorized: unsupported use in stmt."); | |
51d00891 | 2313 | VEC_free (tree, heap, worklist); |
f7064d11 DN |
2314 | return false; |
2315 | } | |
2316 | ||
2317 | if (!def_stmt) | |
2318 | continue; | |
2319 | ||
2320 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2321 | { | |
2322 | fprintf (vect_dump, "worklist: examine use %d: ", i); | |
2323 | print_generic_expr (vect_dump, use, TDF_SLIM); | |
2324 | } | |
2325 | ||
2326 | bb = bb_for_stmt (def_stmt); | |
2327 | if (flow_bb_inside_loop_p (loop, bb)) | |
2328 | vect_mark_relevant (&worklist, def_stmt); | |
2329 | } | |
2330 | } | |
2331 | } /* while worklist */ | |
2332 | ||
51d00891 | 2333 | VEC_free (tree, heap, worklist); |
f7064d11 DN |
2334 | return true; |
2335 | } | |
2336 | ||
2337 | ||
2338 | /* Function vect_can_advance_ivs_p | |
2339 | ||
2340 | In case the number of iterations that LOOP iterates in unknown at compile | |
2341 | time, an epilog loop will be generated, and the loop induction variables | |
2342 | (IVs) will be "advanced" to the value they are supposed to take just before | |
2343 | the epilog loop. Here we check that the access function of the loop IVs | |
2344 | and the expression that represents the loop bound are simple enough. | |
2345 | These restrictions will be relaxed in the future. */ | |
2346 | ||
2347 | static bool | |
2348 | vect_can_advance_ivs_p (loop_vec_info loop_vinfo) | |
2349 | { | |
2350 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2351 | basic_block bb = loop->header; | |
2352 | tree phi; | |
2353 | ||
2354 | /* Analyze phi functions of the loop header. */ | |
2355 | ||
2356 | for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) | |
2357 | { | |
2358 | tree access_fn = NULL; | |
2359 | tree evolution_part; | |
2360 | ||
2361 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2362 | { | |
2363 | fprintf (vect_dump, "Analyze phi: "); | |
2364 | print_generic_expr (vect_dump, phi, TDF_SLIM); | |
2365 | } | |
2366 | ||
2367 | /* Skip virtual phi's. The data dependences that are associated with | |
2368 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
2369 | ||
2370 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
2371 | { | |
2372 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2373 | fprintf (vect_dump, "virtual phi. skip."); | |
2374 | continue; | |
2375 | } | |
2376 | ||
2377 | /* Analyze the evolution function. */ | |
2378 | ||
2379 | access_fn = instantiate_parameters | |
2380 | (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); | |
2381 | ||
2382 | if (!access_fn) | |
2383 | { | |
2384 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2385 | fprintf (vect_dump, "No Access function."); | |
2386 | return false; | |
2387 | } | |
2388 | ||
2389 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2390 | { | |
2391 | fprintf (vect_dump, "Access function of PHI: "); | |
2392 | print_generic_expr (vect_dump, access_fn, TDF_SLIM); | |
2393 | } | |
2394 | ||
2395 | evolution_part = evolution_part_in_loop_num (access_fn, loop->num); | |
2396 | ||
2397 | if (evolution_part == NULL_TREE) | |
2398 | return false; | |
2399 | ||
2400 | /* FORNOW: We do not transform initial conditions of IVs | |
2401 | which evolution functions are a polynomial of degree >= 2. */ | |
2402 | ||
2403 | if (tree_is_chrec (evolution_part)) | |
2404 | return false; | |
2405 | } | |
2406 | ||
2407 | return true; | |
2408 | } | |
2409 | ||
2410 | ||
2411 | /* Function vect_get_loop_niters. | |
2412 | ||
2413 | Determine how many iterations the loop is executed. | |
2414 | If an expression that represents the number of iterations | |
2415 | can be constructed, place it in NUMBER_OF_ITERATIONS. | |
2416 | Return the loop exit condition. */ | |
2417 | ||
2418 | static tree | |
2419 | vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) | |
2420 | { | |
2421 | tree niters; | |
2422 | ||
2423 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2424 | fprintf (vect_dump, "=== get_loop_niters ==="); | |
2425 | ||
2426 | niters = number_of_iterations_in_loop (loop); | |
2427 | ||
2428 | if (niters != NULL_TREE | |
2429 | && niters != chrec_dont_know) | |
2430 | { | |
2431 | *number_of_iterations = niters; | |
2432 | ||
2433 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2434 | { | |
2435 | fprintf (vect_dump, "==> get_loop_niters:" ); | |
2436 | print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM); | |
2437 | } | |
2438 | } | |
2439 | ||
2440 | return get_loop_exit_condition (loop); | |
2441 | } | |
2442 | ||
2443 | ||
2444 | /* Function vect_analyze_loop_form. | |
2445 | ||
2446 | Verify the following restrictions (some may be relaxed in the future): | |
2447 | - it's an inner-most loop | |
2448 | - number of BBs = 2 (which are the loop header and the latch) | |
2449 | - the loop has a pre-header | |
2450 | - the loop has a single entry and exit | |
2451 | - the loop exit condition is simple enough, and the number of iterations | |
2452 | can be analyzed (a countable loop). */ | |
2453 | ||
2454 | static loop_vec_info | |
2455 | vect_analyze_loop_form (struct loop *loop) | |
2456 | { | |
2457 | loop_vec_info loop_vinfo; | |
2458 | tree loop_cond; | |
2459 | tree number_of_iterations = NULL; | |
f7064d11 DN |
2460 | LOC loop_loc; |
2461 | ||
2462 | loop_loc = find_loop_location (loop); | |
2463 | ||
2464 | if (vect_print_dump_info (REPORT_DETAILS, loop_loc)) | |
2465 | fprintf (vect_dump, "=== vect_analyze_loop_form ==="); | |
2466 | ||
2467 | if (loop->inner) | |
2468 | { | |
2469 | if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc)) | |
2470 | fprintf (vect_dump, "not vectorized: nested loop."); | |
2471 | return NULL; | |
2472 | } | |
2473 | ||
2474 | if (!loop->single_exit | |
2475 | || loop->num_nodes != 2 | |
70388d94 | 2476 | || EDGE_COUNT (loop->header->preds) != 2) |
f7064d11 DN |
2477 | { |
2478 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2479 | { | |
2480 | if (!loop->single_exit) | |
2481 | fprintf (vect_dump, "not vectorized: multiple exits."); | |
2482 | else if (loop->num_nodes != 2) | |
2483 | fprintf (vect_dump, "not vectorized: too many BBs in loop."); | |
2484 | else if (EDGE_COUNT (loop->header->preds) != 2) | |
2485 | fprintf (vect_dump, "not vectorized: too many incoming edges."); | |
f7064d11 DN |
2486 | } |
2487 | ||
2488 | return NULL; | |
2489 | } | |
2490 | ||
2491 | /* We assume that the loop exit condition is at the end of the loop. i.e, | |
2492 | that the loop is represented as a do-while (with a proper if-guard | |
2493 | before the loop if needed), where the loop header contains all the | |
2494 | executable statements, and the latch is empty. */ | |
2495 | if (!empty_block_p (loop->latch)) | |
2496 | { | |
2497 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2498 | fprintf (vect_dump, "not vectorized: unexpectd loop form."); | |
2499 | return NULL; | |
2500 | } | |
2501 | ||
f7064d11 | 2502 | /* Make sure there exists a single-predecessor exit bb: */ |
c5cbcccf | 2503 | if (!single_pred_p (loop->single_exit->dest)) |
f7064d11 | 2504 | { |
ac59a959 DN |
2505 | edge e = loop->single_exit; |
2506 | if (!(e->flags & EDGE_ABNORMAL)) | |
2507 | { | |
d401de95 | 2508 | split_loop_exit_edge (e); |
ac59a959 DN |
2509 | if (vect_print_dump_info (REPORT_DETAILS, loop_loc)) |
2510 | fprintf (vect_dump, "split exit edge."); | |
ac59a959 DN |
2511 | } |
2512 | else | |
2513 | { | |
2514 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2515 | fprintf (vect_dump, "not vectorized: abnormal loop exit edge."); | |
2516 | return NULL; | |
2517 | } | |
f7064d11 | 2518 | } |
f7064d11 DN |
2519 | |
2520 | if (empty_block_p (loop->header)) | |
2521 | { | |
2522 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2523 | fprintf (vect_dump, "not vectorized: empty loop."); | |
2524 | return NULL; | |
2525 | } | |
2526 | ||
2527 | loop_cond = vect_get_loop_niters (loop, &number_of_iterations); | |
2528 | if (!loop_cond) | |
2529 | { | |
2530 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2531 | fprintf (vect_dump, "not vectorized: complicated exit condition."); | |
2532 | return NULL; | |
2533 | } | |
2534 | ||
2535 | if (!number_of_iterations) | |
2536 | { | |
2537 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2538 | fprintf (vect_dump, | |
2539 | "not vectorized: number of iterations cannot be computed."); | |
2540 | return NULL; | |
2541 | } | |
2542 | ||
2543 | if (chrec_contains_undetermined (number_of_iterations)) | |
2544 | { | |
2545 | if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc)) | |
2546 | fprintf (vect_dump, "Infinite number of iterations."); | |
2547 | return false; | |
2548 | } | |
2549 | ||
2550 | loop_vinfo = new_loop_vec_info (loop); | |
2551 | LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; | |
2552 | ||
2553 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
2554 | { | |
2555 | if (vect_print_dump_info (REPORT_DETAILS, loop_loc)) | |
2556 | { | |
2557 | fprintf (vect_dump, "Symbolic number of iterations is "); | |
2558 | print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS); | |
2559 | } | |
2560 | } | |
2561 | else | |
2562 | if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) | |
2563 | { | |
2564 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc)) | |
2565 | fprintf (vect_dump, "not vectorized: number of iterations = 0."); | |
2566 | return NULL; | |
2567 | } | |
2568 | ||
2569 | LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; | |
2570 | LOOP_VINFO_LOC (loop_vinfo) = loop_loc; | |
2571 | ||
2572 | return loop_vinfo; | |
2573 | } | |
2574 | ||
2575 | ||
2576 | /* Function vect_analyze_loop. | |
2577 | ||
2578 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
2579 | for it. The different analyses will record information in the | |
2580 | loop_vec_info struct. */ | |
2581 | loop_vec_info | |
2582 | vect_analyze_loop (struct loop *loop) | |
2583 | { | |
2584 | bool ok; | |
2585 | loop_vec_info loop_vinfo; | |
2586 | ||
2587 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2588 | fprintf (vect_dump, "===== analyze_loop_nest ====="); | |
2589 | ||
2590 | /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ | |
2591 | ||
2592 | loop_vinfo = vect_analyze_loop_form (loop); | |
2593 | if (!loop_vinfo) | |
2594 | { | |
2595 | if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) | |
2596 | fprintf (vect_dump, "bad loop form."); | |
2597 | return NULL; | |
2598 | } | |
2599 | ||
2600 | /* Find all data references in the loop (which correspond to vdefs/vuses) | |
2601 | and analyze their evolution in the loop. | |
2602 | ||
2603 | FORNOW: Handle only simple, array references, which | |
2604 | alignment can be forced, and aligned pointer-references. */ | |
2605 | ||
2606 | ok = vect_analyze_data_refs (loop_vinfo); | |
2607 | if (!ok) | |
2608 | { | |
2609 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2610 | fprintf (vect_dump, "bad data references."); | |
2611 | destroy_loop_vec_info (loop_vinfo); | |
2612 | return NULL; | |
2613 | } | |
2614 | ||
2615 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ | |
2616 | ||
2617 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); | |
2618 | if (!ok) | |
2619 | { | |
2620 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2621 | fprintf (vect_dump, "unexpected pattern."); | |
2622 | destroy_loop_vec_info (loop_vinfo); | |
2623 | return NULL; | |
2624 | } | |
2625 | ||
2626 | /* Check that all cross-iteration scalar data-flow cycles are OK. | |
2627 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | |
2628 | ||
2629 | ok = vect_analyze_scalar_cycles (loop_vinfo); | |
2630 | if (!ok) | |
2631 | { | |
2632 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2633 | fprintf (vect_dump, "bad scalar cycle."); | |
2634 | destroy_loop_vec_info (loop_vinfo); | |
2635 | return NULL; | |
2636 | } | |
2637 | ||
b52485c6 DP |
2638 | ok = vect_determine_vectorization_factor (loop_vinfo); |
2639 | if (!ok) | |
2640 | { | |
2641 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2642 | fprintf (vect_dump, "can't determine vectorization factor."); | |
2643 | destroy_loop_vec_info (loop_vinfo); | |
2644 | return NULL; | |
2645 | } | |
2646 | ||
f7064d11 DN |
2647 | /* Analyze data dependences between the data-refs in the loop. |
2648 | FORNOW: fail at the first data dependence that we encounter. */ | |
2649 | ||
2650 | ok = vect_analyze_data_ref_dependences (loop_vinfo); | |
2651 | if (!ok) | |
2652 | { | |
2653 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2654 | fprintf (vect_dump, "bad data dependence."); | |
2655 | destroy_loop_vec_info (loop_vinfo); | |
2656 | return NULL; | |
2657 | } | |
2658 | ||
2659 | /* Analyze the access patterns of the data-refs in the loop (consecutive, | |
2660 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | |
2661 | ||
2662 | ok = vect_analyze_data_ref_accesses (loop_vinfo); | |
2663 | if (!ok) | |
2664 | { | |
2665 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2666 | fprintf (vect_dump, "bad data access."); | |
2667 | destroy_loop_vec_info (loop_vinfo); | |
2668 | return NULL; | |
2669 | } | |
2670 | ||
2671 | /* Analyze the alignment of the data-refs in the loop. | |
2672 | FORNOW: Only aligned accesses are handled. */ | |
2673 | ||
2674 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | |
2675 | if (!ok) | |
2676 | { | |
2677 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2678 | fprintf (vect_dump, "bad data alignment."); | |
2679 | destroy_loop_vec_info (loop_vinfo); | |
2680 | return NULL; | |
2681 | } | |
2682 | ||
2683 | /* Scan all the operations in the loop and make sure they are | |
2684 | vectorizable. */ | |
2685 | ||
2686 | ok = vect_analyze_operations (loop_vinfo); | |
2687 | if (!ok) | |
2688 | { | |
2689 | if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) | |
2690 | fprintf (vect_dump, "bad operation or unsupported loop bound."); | |
2691 | destroy_loop_vec_info (loop_vinfo); | |
2692 | return NULL; | |
2693 | } | |
2694 | ||
2695 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; | |
2696 | ||
2697 | return loop_vinfo; | |
2698 | } |