]>
Commit | Line | Data |
---|---|---|
79fe1b3b DN |
1 | /* Loop Vectorization |
2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. | |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | /* Loop Vectorization Pass. | |
23 | ||
24 | This pass tries to vectorize loops. This first implementation focuses on | |
25 | simple inner-most loops, with no conditional control flow, and a set of | |
26 | simple operations which vector form can be expressed using existing | |
27 | tree codes (PLUS, MULT etc). | |
28 | ||
29 | For example, the vectorizer transforms the following simple loop: | |
30 | ||
31 | short a[N]; short b[N]; short c[N]; int i; | |
32 | ||
33 | for (i=0; i<N; i++){ | |
34 | a[i] = b[i] + c[i]; | |
35 | } | |
36 | ||
37 | as if it was manually vectorized by rewriting the source code into: | |
38 | ||
39 | typedef int __attribute__((mode(V8HI))) v8hi; | |
40 | short a[N]; short b[N]; short c[N]; int i; | |
41 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | |
42 | v8hi va, vb, vc; | |
43 | ||
44 | for (i=0; i<N/8; i++){ | |
45 | vb = pb[i]; | |
46 | vc = pc[i]; | |
47 | va = vb + vc; | |
48 | pa[i] = va; | |
49 | } | |
50 | ||
51 | The main entry to this pass is vectorize_loops(), in which | |
52 | the vectorizer applies a set of analyses on a given set of loops, | |
53 | followed by the actual vectorization transformation for the loops that | |
54 | had successfully passed the analysis phase. | |
55 | ||
56 | Throughout this pass we make a distinction between two types of | |
57 | data: scalars (which are represented by SSA_NAMES), and memory references | |
58 | ("data-refs"). These two types of data require different handling both | |
59 | during analysis and transformation. The types of data-refs that the | |
6775f1f3 IR |
60 | vectorizer currently supports are ARRAY_REFS which base is an array DECL |
61 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer | |
62 | accesses are required to have a simple (consecutive) access pattern. | |
79fe1b3b DN |
63 | |
64 | Analysis phase: | |
65 | =============== | |
66 | The driver for the analysis phase is vect_analyze_loop_nest(). | |
67 | It applies a set of analyses, some of which rely on the scalar evolution | |
68 | analyzer (scev) developed by Sebastian Pop. | |
69 | ||
70 | During the analysis phase the vectorizer records some information | |
71 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | |
72 | loop, as well as general information about the loop as a whole, which is | |
73 | recorded in a "loop_vec_info" struct attached to each loop. | |
74 | ||
75 | Transformation phase: | |
76 | ===================== | |
77 | The loop transformation phase scans all the stmts in the loop, and | |
78 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | |
79 | the loop that needs to be vectorized. It insert the vector code sequence | |
80 | just before the scalar stmt S, and records a pointer to the vector code | |
81 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | |
82 | attached to S). This pointer will be used for the vectorization of following | |
83 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | |
84 | otherwise, we rely on dead code elimination for removing it. | |
85 | ||
86 | For example, say stmt S1 was vectorized into stmt VS1: | |
87 | ||
88 | VS1: vb = px[i]; | |
89 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
90 | S2: a = b; | |
91 | ||
92 | To vectorize stmt S2, the vectorizer first finds the stmt that defines | |
93 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the | |
94 | vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | |
95 | resulting sequence would be: | |
96 | ||
97 | VS1: vb = px[i]; | |
98 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
99 | VS2: va = vb; | |
100 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | |
101 | ||
102 | Operands that are not SSA_NAMEs, are data-refs that appear in | |
103 | load/store operations (like 'x[i]' in S1), and are handled differently. | |
104 | ||
105 | Target modeling: | |
106 | ================= | |
107 | Currently the only target specific information that is used is the | |
108 | size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can | |
109 | support different sizes of vectors, for now will need to specify one value | |
110 | for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future. | |
111 | ||
112 | Since we only vectorize operations which vector form can be | |
113 | expressed using existing tree codes, to verify that an operation is | |
114 | supported, the vectorizer checks the relevant optab at the relevant | |
115 | machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If | |
116 | the value found is CODE_FOR_nothing, then there's no target support, and | |
117 | we can't vectorize the stmt. | |
118 | ||
119 | For additional information on this project see: | |
120 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html | |
121 | */ | |
122 | ||
123 | #include "config.h" | |
124 | #include "system.h" | |
125 | #include "coretypes.h" | |
126 | #include "tm.h" | |
127 | #include "errors.h" | |
128 | #include "ggc.h" | |
129 | #include "tree.h" | |
130 | #include "target.h" | |
131 | ||
132 | #include "rtl.h" | |
133 | #include "basic-block.h" | |
134 | #include "diagnostic.h" | |
135 | #include "tree-flow.h" | |
136 | #include "tree-dump.h" | |
137 | #include "timevar.h" | |
138 | #include "cfgloop.h" | |
139 | #include "cfglayout.h" | |
140 | #include "expr.h" | |
141 | #include "optabs.h" | |
142 | #include "tree-chrec.h" | |
143 | #include "tree-data-ref.h" | |
144 | #include "tree-scalar-evolution.h" | |
145 | #include "tree-vectorizer.h" | |
146 | #include "tree-pass.h" | |
147 | ||
148 | /* Main analysis functions. */ | |
149 | static loop_vec_info vect_analyze_loop (struct loop *); | |
150 | static loop_vec_info vect_analyze_loop_form (struct loop *); | |
151 | static bool vect_analyze_data_refs (loop_vec_info); | |
152 | static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | |
153 | static bool vect_analyze_scalar_cycles (loop_vec_info); | |
154 | static bool vect_analyze_data_ref_accesses (loop_vec_info); | |
155 | static bool vect_analyze_data_refs_alignment (loop_vec_info); | |
156 | static void vect_compute_data_refs_alignment (loop_vec_info); | |
157 | static bool vect_analyze_operations (loop_vec_info); | |
158 | ||
159 | /* Main code transformation functions. */ | |
160 | static void vect_transform_loop (loop_vec_info, struct loops *); | |
161 | static void vect_transform_loop_bound (loop_vec_info); | |
162 | static bool vect_transform_stmt (tree, block_stmt_iterator *); | |
163 | static bool vectorizable_load (tree, block_stmt_iterator *, tree *); | |
164 | static bool vectorizable_store (tree, block_stmt_iterator *, tree *); | |
165 | static bool vectorizable_operation (tree, block_stmt_iterator *, tree *); | |
166 | static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); | |
167 | static void vect_align_data_ref (tree); | |
168 | static void vect_enhance_data_refs_alignment (loop_vec_info); | |
169 | ||
170 | /* Utility functions for the analyses. */ | |
171 | static bool vect_is_simple_use (tree , struct loop *, tree *); | |
172 | static bool exist_non_indexing_operands_for_use_p (tree, tree); | |
173 | static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); | |
174 | static void vect_mark_relevant (varray_type, tree); | |
175 | static bool vect_stmt_relevant_p (tree, loop_vec_info); | |
176 | static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *); | |
6775f1f3 | 177 | static bool vect_compute_data_ref_alignment |
79fe1b3b DN |
178 | (struct data_reference *, loop_vec_info); |
179 | static bool vect_analyze_data_ref_access (struct data_reference *); | |
180 | static bool vect_get_first_index (tree, tree *); | |
181 | static bool vect_can_force_dr_alignment_p (tree, unsigned int); | |
7ccf35ed DN |
182 | static struct data_reference * vect_analyze_pointer_ref_access |
183 | (tree, tree, bool); | |
6775f1f3 IR |
184 | static tree vect_get_base_and_bit_offset |
185 | (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*); | |
186 | static struct data_reference * vect_analyze_pointer_ref_access | |
187 | (tree, tree, bool); | |
188 | static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *); | |
189 | static tree vect_compute_array_ref_alignment | |
190 | (struct data_reference *, loop_vec_info, tree, tree *); | |
191 | static tree vect_get_ptr_offset (tree, tree, tree *); | |
192 | static tree vect_get_symbl_and_dr | |
193 | (tree, tree, bool, loop_vec_info, struct data_reference **); | |
79fe1b3b DN |
194 | |
195 | /* Utility functions for the code transformation. */ | |
196 | static tree vect_create_destination_var (tree, tree); | |
7ccf35ed DN |
197 | static tree vect_create_data_ref_ptr |
198 | (tree, block_stmt_iterator *, tree, tree *, bool); | |
199 | static tree vect_create_index_for_vector_ref | |
200 | (struct loop *, block_stmt_iterator *); | |
201 | static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); | |
79fe1b3b DN |
202 | static tree get_vectype_for_scalar_type (tree); |
203 | static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | |
204 | static tree vect_get_vec_def_for_operand (tree, tree); | |
205 | static tree vect_init_vector (tree, tree); | |
206 | static void vect_finish_stmt_generation | |
207 | (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); | |
208 | ||
209 | /* Utilities for creation and deletion of vec_info structs. */ | |
210 | loop_vec_info new_loop_vec_info (struct loop *loop); | |
211 | void destroy_loop_vec_info (loop_vec_info); | |
212 | stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop); | |
213 | ||
214 | static bool vect_debug_stats (struct loop *loop); | |
215 | static bool vect_debug_details (struct loop *loop); | |
216 | ||
217 | ||
218 | /* Function new_stmt_vec_info. | |
219 | ||
220 | Create and initialize a new stmt_vec_info struct for STMT. */ | |
221 | ||
222 | stmt_vec_info | |
223 | new_stmt_vec_info (tree stmt, struct loop *loop) | |
224 | { | |
225 | stmt_vec_info res; | |
226 | res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); | |
227 | ||
228 | STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
229 | STMT_VINFO_STMT (res) = stmt; | |
230 | STMT_VINFO_LOOP (res) = loop; | |
231 | STMT_VINFO_RELEVANT_P (res) = 0; | |
232 | STMT_VINFO_VECTYPE (res) = NULL; | |
233 | STMT_VINFO_VEC_STMT (res) = NULL; | |
234 | STMT_VINFO_DATA_REF (res) = NULL; | |
235 | STMT_VINFO_MEMTAG (res) = NULL; | |
6775f1f3 | 236 | STMT_VINFO_VECT_DR_BASE (res) = NULL; |
79fe1b3b DN |
237 | |
238 | return res; | |
239 | } | |
240 | ||
241 | ||
242 | /* Function new_loop_vec_info. | |
243 | ||
244 | Create and initialize a new loop_vec_info struct for LOOP, as well as | |
245 | stmt_vec_info structs for all the stmts in LOOP. */ | |
246 | ||
247 | loop_vec_info | |
248 | new_loop_vec_info (struct loop *loop) | |
249 | { | |
250 | loop_vec_info res; | |
251 | basic_block *bbs; | |
252 | block_stmt_iterator si; | |
253 | unsigned int i; | |
254 | ||
255 | res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); | |
256 | ||
257 | bbs = get_loop_body (loop); | |
258 | ||
259 | /* Create stmt_info for all stmts in the loop. */ | |
260 | for (i = 0; i < loop->num_nodes; i++) | |
261 | { | |
262 | basic_block bb = bbs[i]; | |
263 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
264 | { | |
265 | tree stmt = bsi_stmt (si); | |
266 | stmt_ann_t ann; | |
267 | ||
268 | get_stmt_operands (stmt); | |
269 | ann = stmt_ann (stmt); | |
270 | set_stmt_info (ann, new_stmt_vec_info (stmt, loop)); | |
271 | } | |
272 | } | |
273 | ||
274 | LOOP_VINFO_LOOP (res) = loop; | |
275 | LOOP_VINFO_BBS (res) = bbs; | |
276 | LOOP_VINFO_EXIT_COND (res) = NULL; | |
277 | LOOP_VINFO_NITERS (res) = -1; | |
278 | LOOP_VINFO_VECTORIZABLE_P (res) = 0; | |
279 | LOOP_VINFO_VECT_FACTOR (res) = 0; | |
280 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, | |
281 | "loop_write_datarefs"); | |
282 | VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, | |
283 | "loop_read_datarefs"); | |
284 | return res; | |
285 | } | |
286 | ||
287 | ||
288 | /* Function destroy_loop_vec_info. | |
289 | ||
290 | Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the | |
291 | stmts in the loop. */ | |
292 | ||
293 | void | |
294 | destroy_loop_vec_info (loop_vec_info loop_vinfo) | |
295 | { | |
296 | struct loop *loop; | |
297 | basic_block *bbs; | |
298 | int nbbs; | |
299 | block_stmt_iterator si; | |
300 | int j; | |
301 | ||
302 | if (!loop_vinfo) | |
303 | return; | |
304 | ||
305 | loop = LOOP_VINFO_LOOP (loop_vinfo); | |
306 | ||
307 | bbs = LOOP_VINFO_BBS (loop_vinfo); | |
308 | nbbs = loop->num_nodes; | |
309 | ||
310 | for (j = 0; j < nbbs; j++) | |
311 | { | |
312 | basic_block bb = bbs[j]; | |
313 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
314 | { | |
315 | tree stmt = bsi_stmt (si); | |
316 | stmt_ann_t ann = stmt_ann (stmt); | |
317 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
318 | free (stmt_info); | |
319 | set_stmt_info (ann, NULL); | |
320 | } | |
321 | } | |
322 | ||
323 | free (LOOP_VINFO_BBS (loop_vinfo)); | |
324 | varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
325 | varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
326 | ||
327 | free (loop_vinfo); | |
328 | } | |
329 | ||
330 | ||
331 | /* Function debug_loop_stats. | |
332 | ||
333 | For vectorization statistics dumps. */ | |
334 | ||
335 | static bool | |
336 | vect_debug_stats (struct loop *loop) | |
337 | { | |
338 | basic_block bb; | |
339 | block_stmt_iterator si; | |
340 | tree node = NULL_TREE; | |
341 | ||
342 | if (!dump_file || !(dump_flags & TDF_STATS)) | |
343 | return false; | |
344 | ||
345 | if (!loop) | |
346 | { | |
347 | fprintf (dump_file, "\n"); | |
348 | return true; | |
349 | } | |
350 | ||
351 | if (!loop->header) | |
352 | return false; | |
353 | ||
354 | bb = loop->header; | |
355 | ||
356 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
357 | { | |
358 | node = bsi_stmt (si); | |
359 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
360 | break; | |
361 | } | |
362 | ||
363 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
364 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
365 | { | |
366 | fprintf (dump_file, "\nloop at %s:%d: ", | |
367 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
368 | return true; | |
369 | } | |
370 | ||
371 | return false; | |
372 | } | |
373 | ||
374 | ||
375 | /* Function debug_loop_details. | |
376 | ||
377 | For vectorization debug dumps. */ | |
378 | ||
379 | static bool | |
380 | vect_debug_details (struct loop *loop) | |
381 | { | |
382 | basic_block bb; | |
383 | block_stmt_iterator si; | |
384 | tree node = NULL_TREE; | |
385 | ||
386 | if (!dump_file || !(dump_flags & TDF_DETAILS)) | |
387 | return false; | |
388 | ||
389 | if (!loop) | |
390 | { | |
391 | fprintf (dump_file, "\n"); | |
392 | return true; | |
393 | } | |
394 | ||
395 | if (!loop->header) | |
396 | return false; | |
397 | ||
398 | bb = loop->header; | |
399 | ||
400 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
401 | { | |
402 | node = bsi_stmt (si); | |
403 | if (node && EXPR_P (node) && EXPR_LOCUS (node)) | |
404 | break; | |
405 | } | |
406 | ||
407 | if (node && EXPR_P (node) && EXPR_LOCUS (node) | |
408 | && EXPR_FILENAME (node) && EXPR_LINENO (node)) | |
409 | { | |
410 | fprintf (dump_file, "\nloop at %s:%d: ", | |
411 | EXPR_FILENAME (node), EXPR_LINENO (node)); | |
412 | return true; | |
413 | } | |
414 | ||
415 | return false; | |
416 | } | |
417 | ||
6775f1f3 IR |
418 | |
419 | /* Function vect_get_ptr_offset | |
420 | ||
421 | Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */ | |
422 | ||
423 | static tree | |
424 | vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED, | |
425 | tree vectype ATTRIBUTE_UNUSED, | |
426 | tree *offset ATTRIBUTE_UNUSED) | |
427 | { | |
428 | /* TODO: Use alignment information. */ | |
429 | return NULL_TREE; | |
430 | } | |
431 | ||
432 | ||
433 | /* Function vect_get_base_and_bit_offset | |
434 | ||
435 | Return the BASE of the data reference EXPR. | |
436 | If VECTYPE is given, also compute the OFFSET from BASE in bits. | |
437 | E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in | |
438 | bits of 'a.b[i] + 4B' from a. | |
439 | ||
440 | Input: | |
441 | EXPR - the memory reference that is being analyzed | |
442 | DR - the data_reference struct of the _original_ memory reference | |
443 | (Note: DR_REF (DR) is not necessarily EXPR) | |
444 | VECTYPE - the type that defines the alignment (i.e, we compute | |
445 | alignment relative to TYPE_ALIGN(VECTYPE)) | |
79fe1b3b | 446 | |
6775f1f3 IR |
447 | Output: |
448 | BASE (returned value) - the base of the data reference EXPR. | |
449 | E.g, if EXPR is a.b[k].c[i][j] the returned | |
450 | base is a. | |
451 | OFFSET - offset of EXPR from BASE in bits | |
452 | BASE_ALIGNED_P - indicates if BASE is aligned | |
453 | ||
454 | If something unexpected is encountered (an unsupported form of data-ref), | |
455 | or if VECTYPE is given but OFFSET cannot be determined: | |
456 | then NULL_TREE is returned. */ | |
79fe1b3b DN |
457 | |
458 | static tree | |
6775f1f3 IR |
459 | vect_get_base_and_bit_offset (struct data_reference *dr, |
460 | tree expr, | |
461 | tree vectype, | |
462 | loop_vec_info loop_vinfo, | |
463 | tree *offset, | |
464 | bool *base_aligned_p) | |
79fe1b3b | 465 | { |
6775f1f3 IR |
466 | tree this_offset = size_zero_node; |
467 | tree base = NULL_TREE; | |
468 | tree next_ref; | |
469 | tree oprnd0, oprnd1; | |
470 | struct data_reference *array_dr; | |
471 | enum tree_code code = TREE_CODE (expr); | |
472 | ||
473 | *base_aligned_p = false; | |
79fe1b3b | 474 | |
6775f1f3 | 475 | switch (code) |
79fe1b3b | 476 | { |
6775f1f3 IR |
477 | /* These cases end the recursion: */ |
478 | case VAR_DECL: | |
479 | *offset = size_zero_node; | |
480 | if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype)) | |
481 | *base_aligned_p = true; | |
482 | return expr; | |
483 | ||
484 | case SSA_NAME: | |
485 | if (!vectype) | |
486 | return expr; | |
487 | ||
488 | if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE) | |
489 | return NULL_TREE; | |
490 | ||
491 | if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype)) | |
492 | { | |
493 | base = vect_get_ptr_offset (expr, vectype, offset); | |
494 | if (base) | |
495 | *base_aligned_p = true; | |
496 | } | |
497 | else | |
498 | { | |
499 | *base_aligned_p = true; | |
500 | *offset = size_zero_node; | |
501 | base = expr; | |
502 | } | |
503 | return base; | |
504 | ||
505 | case INTEGER_CST: | |
506 | *offset = int_const_binop (MULT_EXPR, expr, | |
507 | build_int_cst (NULL_TREE, BITS_PER_UNIT), 1); | |
508 | return expr; | |
509 | ||
510 | /* These cases continue the recursion: */ | |
511 | case COMPONENT_REF: | |
512 | oprnd0 = TREE_OPERAND (expr, 0); | |
513 | oprnd1 = TREE_OPERAND (expr, 1); | |
79fe1b3b DN |
514 | |
515 | this_offset = bit_position (oprnd1); | |
6775f1f3 IR |
516 | if (vectype && !host_integerp (this_offset, 1)) |
517 | return NULL_TREE; | |
518 | next_ref = oprnd0; | |
519 | break; | |
520 | ||
521 | case ADDR_EXPR: | |
522 | oprnd0 = TREE_OPERAND (expr, 0); | |
523 | next_ref = oprnd0; | |
524 | break; | |
525 | ||
526 | case INDIRECT_REF: | |
527 | oprnd0 = TREE_OPERAND (expr, 0); | |
528 | next_ref = oprnd0; | |
529 | break; | |
530 | ||
531 | case ARRAY_REF: | |
532 | if (DR_REF (dr) != expr) | |
533 | /* Build array data_reference struct if the existing DR_REF | |
534 | doesn't match EXPR. This happens, for example, when the | |
535 | EXPR is *T and T is initialized to &arr[indx]. The DR struct | |
536 | contains information on the access of T, not of arr. In order | |
537 | to continue the analysis, we create a new DR struct that | |
538 | describes the access of arr. | |
539 | */ | |
540 | array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr)); | |
541 | else | |
542 | array_dr = dr; | |
543 | ||
544 | next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo, | |
545 | vectype, &this_offset); | |
546 | if (!next_ref) | |
79fe1b3b | 547 | return NULL_TREE; |
79fe1b3b | 548 | |
6775f1f3 IR |
549 | if (vectype && |
550 | TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype)) | |
79fe1b3b | 551 | { |
6775f1f3 IR |
552 | *offset = this_offset; |
553 | *base_aligned_p = true; | |
554 | return next_ref; | |
555 | } | |
556 | break; | |
79fe1b3b | 557 | |
6775f1f3 IR |
558 | case PLUS_EXPR: |
559 | case MINUS_EXPR: | |
560 | /* In case we have a PLUS_EXPR of the form | |
561 | (oprnd0 + oprnd1), we assume that only oprnd0 determines the base. | |
562 | This is verified in vect_get_symbl_and_dr. */ | |
563 | oprnd0 = TREE_OPERAND (expr, 0); | |
564 | oprnd1 = TREE_OPERAND (expr, 1); | |
565 | ||
566 | base = vect_get_base_and_bit_offset | |
567 | (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p); | |
568 | if (vectype && !base) | |
569 | return NULL_TREE; | |
79fe1b3b | 570 | |
6775f1f3 IR |
571 | next_ref = oprnd0; |
572 | break; | |
79fe1b3b | 573 | |
6775f1f3 IR |
574 | default: |
575 | return NULL_TREE; | |
79fe1b3b DN |
576 | } |
577 | ||
6775f1f3 IR |
578 | base = vect_get_base_and_bit_offset (dr, next_ref, vectype, |
579 | loop_vinfo, offset, base_aligned_p); | |
580 | ||
581 | if (vectype && base) | |
582 | { | |
583 | *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1); | |
584 | if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset)) | |
585 | return NULL_TREE; | |
586 | ||
587 | if (vect_debug_details (NULL)) | |
588 | { | |
589 | print_generic_expr (dump_file, expr, TDF_SLIM); | |
590 | fprintf (dump_file, " --> total offset for ref: "); | |
591 | print_generic_expr (dump_file, *offset, TDF_SLIM); | |
592 | } | |
593 | } | |
594 | return base; | |
79fe1b3b DN |
595 | } |
596 | ||
597 | ||
6775f1f3 | 598 | |
79fe1b3b DN |
599 | /* Function vect_force_dr_alignment_p. |
600 | ||
601 | Returns whether the alignment of a DECL can be forced to be aligned | |
602 | on ALIGNMENT bit boundary. */ | |
603 | ||
604 | static bool | |
605 | vect_can_force_dr_alignment_p (tree decl, unsigned int alignment) | |
606 | { | |
607 | if (TREE_CODE (decl) != VAR_DECL) | |
608 | return false; | |
609 | ||
610 | if (DECL_EXTERNAL (decl)) | |
611 | return false; | |
612 | ||
613 | if (TREE_STATIC (decl)) | |
614 | return (alignment <= MAX_OFILE_ALIGNMENT); | |
615 | else | |
7a8554ce DN |
616 | /* This is not 100% correct. The absolute correct stack alignment |
617 | is STACK_BOUNDARY. We're supposed to hope, but not assume, that | |
618 | PREFERRED_STACK_BOUNDARY is honored by all translation units. | |
619 | However, until someone implements forced stack alignment, SSE | |
620 | isn't really usable without this. */ | |
621 | return (alignment <= PREFERRED_STACK_BOUNDARY); | |
79fe1b3b DN |
622 | } |
623 | ||
624 | ||
625 | /* Function vect_get_new_vect_var. | |
626 | ||
627 | Returns a name for a new variable. The current naming scheme appends the | |
628 | prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to | |
629 | the name of vectorizer generated variables, and appends that to NAME if | |
630 | provided. */ | |
631 | ||
632 | static tree | |
633 | vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) | |
634 | { | |
635 | const char *prefix; | |
636 | int prefix_len; | |
637 | tree new_vect_var; | |
638 | ||
639 | if (var_kind == vect_simple_var) | |
640 | prefix = "vect_"; | |
641 | else | |
642 | prefix = "vect_p"; | |
643 | ||
644 | prefix_len = strlen (prefix); | |
645 | ||
646 | if (name) | |
647 | new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); | |
648 | else | |
649 | new_vect_var = create_tmp_var (type, prefix); | |
650 | ||
651 | return new_vect_var; | |
652 | } | |
653 | ||
654 | ||
6775f1f3 | 655 | /* Function vect_create_index_for_vector_ref. |
79fe1b3b DN |
656 | |
657 | Create (and return) an index variable, along with it's update chain in the | |
658 | loop. This variable will be used to access a memory location in a vector | |
659 | operation. | |
660 | ||
661 | Input: | |
6775f1f3 | 662 | LOOP: The loop being vectorized. |
79fe1b3b DN |
663 | BSI: The block_stmt_iterator where STMT is. Any new stmts created by this |
664 | function can be added here, or in the loop pre-header. | |
665 | ||
6775f1f3 IR |
666 | Output: |
667 | Return an index that will be used to index a vector array. It is expected | |
668 | that a pointer to the first vector will be used as the base address for the | |
669 | indexed reference. | |
670 | ||
671 | FORNOW: we are not trying to be efficient, just creating a new index each | |
672 | time from scratch. At this time all vector references could use the same | |
673 | index. | |
674 | ||
675 | TODO: create only one index to be used by all vector references. Record | |
676 | the index in the LOOP_VINFO the first time this procedure is called and | |
677 | return it on subsequent calls. The increment of this index must be placed | |
678 | just before the conditional expression that ends the single block loop. */ | |
79fe1b3b DN |
679 | |
680 | static tree | |
6775f1f3 | 681 | vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi) |
79fe1b3b | 682 | { |
79fe1b3b | 683 | tree init, step; |
79fe1b3b | 684 | tree indx_before_incr, indx_after_incr; |
79fe1b3b | 685 | |
6775f1f3 IR |
686 | /* It is assumed that the base pointer used for vectorized access contains |
687 | the address of the first vector. Therefore the index used for vectorized | |
688 | access must be initialized to zero and incremented by 1. */ | |
79fe1b3b | 689 | |
6775f1f3 IR |
690 | init = integer_zero_node; |
691 | step = integer_one_node; | |
692 | ||
693 | /* Assuming that bsi_insert is used with BSI_NEW_STMT */ | |
694 | create_iv (init, step, NULL_TREE, loop, bsi, false, | |
695 | &indx_before_incr, &indx_after_incr); | |
79fe1b3b | 696 | |
6775f1f3 IR |
697 | return indx_before_incr; |
698 | } | |
79fe1b3b | 699 | |
79fe1b3b | 700 | |
6775f1f3 | 701 | /* Function vect_create_addr_base_for_vector_ref. |
79fe1b3b | 702 | |
6775f1f3 IR |
703 | Create an expression that computes the address of the first memory location |
704 | that will be accessed for a data reference. | |
79fe1b3b | 705 | |
6775f1f3 IR |
706 | Input: |
707 | STMT: The statement containing the data reference. | |
7ccf35ed DN |
708 | NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. |
709 | OFFSET: Optional. If supplied, it is be added to the initial address. | |
79fe1b3b | 710 | |
6775f1f3 IR |
711 | Output: |
712 | 1. Return an SSA_NAME whose value is the address of the memory location of the | |
713 | first vector of the data reference. | |
714 | 2. If new_stmt_list is not NULL_TREE after return then the caller must insert | |
715 | these statement(s) which define the returned SSA_NAME. | |
79fe1b3b | 716 | |
6775f1f3 | 717 | FORNOW: We are only handling array accesses with step 1. */ |
79fe1b3b | 718 | |
6775f1f3 IR |
719 | static tree |
720 | vect_create_addr_base_for_vector_ref (tree stmt, | |
7ccf35ed DN |
721 | tree *new_stmt_list, |
722 | tree offset) | |
6775f1f3 IR |
723 | { |
724 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
725 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
726 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
727 | tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info)); | |
728 | tree base_name = unshare_expr (DR_BASE_NAME (dr)); | |
729 | tree ref = DR_REF (dr); | |
730 | tree data_ref_base_type = TREE_TYPE (data_ref_base); | |
731 | tree scalar_type = TREE_TYPE (ref); | |
732 | tree scalar_ptr_type = build_pointer_type (scalar_type); | |
733 | tree access_fn; | |
734 | tree init_val, step, init_oval; | |
735 | bool ok; | |
736 | bool is_ptr_ref, is_array_ref, is_addr_expr; | |
737 | tree array_base; | |
738 | tree vec_stmt; | |
739 | tree new_temp; | |
740 | tree array_ref; | |
741 | tree addr_base, addr_expr; | |
742 | tree dest, new_stmt; | |
79fe1b3b | 743 | |
6775f1f3 IR |
744 | /* Only the access function of the last index is relevant (i_n in |
745 | a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */ | |
746 | access_fn = DR_ACCESS_FN (dr, 0); | |
747 | ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true); | |
748 | if (!ok) | |
749 | init_oval = integer_zero_node; | |
750 | ||
751 | is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE | |
752 | && TREE_CODE (data_ref_base) == SSA_NAME; | |
753 | is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE | |
754 | && (TREE_CODE (data_ref_base) == VAR_DECL | |
755 | || TREE_CODE (data_ref_base) == COMPONENT_REF | |
756 | || TREE_CODE (data_ref_base) == ARRAY_REF); | |
757 | is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR | |
758 | || TREE_CODE (data_ref_base) == PLUS_EXPR | |
759 | || TREE_CODE (data_ref_base) == MINUS_EXPR; | |
760 | gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr); | |
761 | ||
762 | /** Create: &(base[init_val]) | |
763 | ||
764 | if data_ref_base is an ARRAY_TYPE: | |
765 | base = data_ref_base | |
766 | ||
767 | if data_ref_base is the SSA_NAME of a POINTER_TYPE: | |
768 | base = *((scalar_array *) data_ref_base) | |
769 | **/ | |
770 | ||
771 | if (is_array_ref) | |
772 | array_base = data_ref_base; | |
773 | else /* is_ptr_ref or is_addr_expr */ | |
774 | { | |
775 | /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */ | |
776 | tree scalar_array_type = build_array_type (scalar_type, 0); | |
777 | tree scalar_array_ptr_type = build_pointer_type (scalar_array_type); | |
778 | tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr"); | |
779 | add_referenced_tmp_var (array_ptr); | |
780 | ||
781 | dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref"); | |
782 | add_referenced_tmp_var (dest); | |
7ccf35ed DN |
783 | data_ref_base = |
784 | force_gimple_operand (data_ref_base, &new_stmt, false, dest); | |
6775f1f3 IR |
785 | append_to_statement_list_force (new_stmt, new_stmt_list); |
786 | ||
787 | vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base); | |
788 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt); | |
789 | new_temp = make_ssa_name (array_ptr, vec_stmt); | |
790 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
791 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
792 | ||
793 | /* (*array_ptr) */ | |
794 | array_base = build_fold_indirect_ref (new_temp); | |
795 | } | |
796 | ||
797 | dest = create_tmp_var (TREE_TYPE (init_oval), "newinit"); | |
798 | add_referenced_tmp_var (dest); | |
799 | init_val = force_gimple_operand (init_oval, &new_stmt, false, dest); | |
800 | append_to_statement_list_force (new_stmt, new_stmt_list); | |
801 | ||
7ccf35ed DN |
802 | if (offset) |
803 | { | |
804 | tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset"); | |
805 | add_referenced_tmp_var (tmp); | |
806 | vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset); | |
807 | vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt); | |
808 | init_val = make_ssa_name (tmp, vec_stmt); | |
809 | TREE_OPERAND (vec_stmt, 0) = init_val; | |
810 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
811 | } | |
812 | ||
6775f1f3 IR |
813 | array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val, |
814 | NULL_TREE, NULL_TREE); | |
815 | addr_base = build_fold_addr_expr (array_ref); | |
816 | ||
817 | /* addr_expr = addr_base */ | |
818 | addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, | |
819 | get_name (base_name)); | |
820 | add_referenced_tmp_var (addr_expr); | |
821 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); | |
822 | new_temp = make_ssa_name (addr_expr, vec_stmt); | |
823 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
824 | append_to_statement_list_force (vec_stmt, new_stmt_list); | |
7ccf35ed | 825 | |
6775f1f3 | 826 | return new_temp; |
79fe1b3b DN |
827 | } |
828 | ||
829 | ||
830 | /* Function get_vectype_for_scalar_type. | |
831 | ||
832 | Returns the vector type corresponding to SCALAR_TYPE as supported | |
833 | by the target. */ | |
834 | ||
835 | static tree | |
836 | get_vectype_for_scalar_type (tree scalar_type) | |
837 | { | |
838 | enum machine_mode inner_mode = TYPE_MODE (scalar_type); | |
839 | int nbytes = GET_MODE_SIZE (inner_mode); | |
840 | int nunits; | |
6775f1f3 | 841 | tree vectype; |
79fe1b3b DN |
842 | |
843 | if (nbytes == 0) | |
844 | return NULL_TREE; | |
845 | ||
846 | /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) | |
847 | is expected. */ | |
848 | nunits = UNITS_PER_SIMD_WORD / nbytes; | |
849 | ||
6775f1f3 IR |
850 | vectype = build_vector_type (scalar_type, nunits); |
851 | if (TYPE_MODE (vectype) == BLKmode) | |
852 | return NULL_TREE; | |
853 | return vectype; | |
79fe1b3b DN |
854 | } |
855 | ||
856 | ||
857 | /* Function vect_align_data_ref. | |
858 | ||
859 | Handle mislignment of a memory accesses. | |
860 | ||
861 | FORNOW: Can't handle misaligned accesses. | |
862 | Make sure that the dataref is aligned. */ | |
863 | ||
864 | static void | |
865 | vect_align_data_ref (tree stmt) | |
866 | { | |
867 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
868 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); | |
869 | ||
870 | /* FORNOW: can't handle misaligned accesses; | |
871 | all accesses expected to be aligned. */ | |
1e128c5f | 872 | gcc_assert (aligned_access_p (dr)); |
79fe1b3b DN |
873 | } |
874 | ||
875 | ||
7ccf35ed | 876 | /* Function vect_create_data_ref_ptr. |
79fe1b3b DN |
877 | |
878 | Create a memory reference expression for vector access, to be used in a | |
7ccf35ed DN |
879 | vector load/store stmt. The reference is based on a new pointer to vector |
880 | type (vp). | |
79fe1b3b DN |
881 | |
882 | Input: | |
7ccf35ed DN |
883 | 1. STMT: a stmt that references memory. Expected to be of the form |
884 | MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. | |
885 | 2. BSI: block_stmt_iterator where new stmts can be added. | |
886 | 3. OFFSET (optional): an offset to be added to the initial address accessed | |
887 | by the data-ref in STMT. | |
888 | 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain | |
889 | pointing to the initial address. | |
79fe1b3b DN |
890 | |
891 | Output: | |
7ccf35ed DN |
892 | 1. Declare a new ptr to vector_type, and have it point to the base of the |
893 | data reference (initial addressed accessed by the data reference). | |
894 | For example, for vector of type V8HI, the following code is generated: | |
895 | ||
896 | v8hi *vp; | |
897 | vp = (v8hi *)initial_address; | |
898 | ||
899 | if OFFSET is not supplied: | |
900 | initial_address = &a[init]; | |
901 | if OFFSET is supplied: | |
902 | initial_address = &a[init + OFFSET]; | |
903 | ||
904 | Return the initial_address in INITIAL_ADDRESS. | |
905 | ||
906 | 2. Create a data-reference in the loop based on the new vector pointer vp, | |
907 | and using a new index variable 'idx' as follows: | |
908 | ||
909 | vp' = vp + update | |
910 | ||
911 | where if ONLY_INIT is true: | |
912 | update = zero | |
913 | and otherwise | |
914 | update = idx + vector_type_size | |
915 | ||
916 | Return the pointer vp'. | |
917 | ||
79fe1b3b DN |
918 | |
919 | FORNOW: handle only aligned and consecutive accesses. */ | |
920 | ||
921 | static tree | |
7ccf35ed DN |
922 | vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset, |
923 | tree *initial_address, bool only_init) | |
79fe1b3b | 924 | { |
7ccf35ed | 925 | tree base_name; |
79fe1b3b | 926 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
6775f1f3 IR |
927 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
928 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
79fe1b3b DN |
929 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
930 | tree vect_ptr_type; | |
931 | tree vect_ptr; | |
79fe1b3b | 932 | tree tag; |
6775f1f3 IR |
933 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); |
934 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
935 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
936 | int nvuses, nv_may_defs, nv_must_defs; | |
937 | int i; | |
938 | tree new_temp; | |
939 | tree vec_stmt; | |
940 | tree new_stmt_list = NULL_TREE; | |
941 | tree idx; | |
7ccf35ed | 942 | edge pe = loop_preheader_edge (loop); |
6775f1f3 | 943 | basic_block new_bb; |
7ccf35ed DN |
944 | tree vect_ptr_init; |
945 | tree vectype_size; | |
946 | tree ptr_update; | |
947 | tree data_ref_ptr; | |
79fe1b3b | 948 | |
6775f1f3 | 949 | base_name = unshare_expr (DR_BASE_NAME (dr)); |
79fe1b3b DN |
950 | if (vect_debug_details (NULL)) |
951 | { | |
7ccf35ed | 952 | tree data_ref_base = base_name; |
79fe1b3b DN |
953 | fprintf (dump_file, "create array_ref of type: "); |
954 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
6775f1f3 | 955 | if (TREE_CODE (data_ref_base) == VAR_DECL) |
7ccf35ed | 956 | fprintf (dump_file, "vectorizing a one dimensional array ref: "); |
6775f1f3 | 957 | else if (TREE_CODE (data_ref_base) == ARRAY_REF) |
7ccf35ed | 958 | fprintf (dump_file, "vectorizing a multidimensional array ref: "); |
6775f1f3 | 959 | else if (TREE_CODE (data_ref_base) == COMPONENT_REF) |
7ccf35ed | 960 | fprintf (dump_file, "vectorizing a record based array ref: "); |
6775f1f3 | 961 | else if (TREE_CODE (data_ref_base) == SSA_NAME) |
7ccf35ed | 962 | fprintf (dump_file, "vectorizing a pointer ref: "); |
6775f1f3 | 963 | print_generic_expr (dump_file, base_name, TDF_SLIM); |
79fe1b3b DN |
964 | } |
965 | ||
7ccf35ed DN |
966 | /** (1) Create the new vector-pointer variable: **/ |
967 | ||
968 | vect_ptr_type = build_pointer_type (vectype); | |
969 | vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, | |
970 | get_name (base_name)); | |
971 | add_referenced_tmp_var (vect_ptr); | |
972 | ||
973 | ||
974 | /** (2) Handle aliasing information of the new vector-pointer: **/ | |
975 | ||
79fe1b3b | 976 | tag = STMT_VINFO_MEMTAG (stmt_info); |
1e128c5f | 977 | gcc_assert (tag); |
79fe1b3b | 978 | get_var_ann (vect_ptr)->type_mem_tag = tag; |
7ccf35ed | 979 | |
79fe1b3b | 980 | /* Mark for renaming all aliased variables |
6775f1f3 IR |
981 | (i.e, the may-aliases of the type-mem-tag). */ |
982 | nvuses = NUM_VUSES (vuses); | |
983 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
984 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
985 | for (i = 0; i < nvuses; i++) | |
79fe1b3b | 986 | { |
6775f1f3 | 987 | tree use = VUSE_OP (vuses, i); |
79fe1b3b DN |
988 | if (TREE_CODE (use) == SSA_NAME) |
989 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid); | |
990 | } | |
6775f1f3 IR |
991 | for (i = 0; i < nv_may_defs; i++) |
992 | { | |
993 | tree def = V_MAY_DEF_RESULT (v_may_defs, i); | |
994 | if (TREE_CODE (def) == SSA_NAME) | |
995 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); | |
996 | } | |
997 | for (i = 0; i < nv_must_defs; i++) | |
998 | { | |
999 | tree def = V_MUST_DEF_OP (v_must_defs, i); | |
1000 | if (TREE_CODE (def) == SSA_NAME) | |
1001 | bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); | |
1002 | } | |
79fe1b3b | 1003 | |
79fe1b3b | 1004 | |
7ccf35ed DN |
1005 | /** (3) Calculate the initial address the vector-pointer, and set |
1006 | the vector-pointer to point to it before the loop: **/ | |
79fe1b3b | 1007 | |
7ccf35ed DN |
1008 | /* Create: (&(base[init_val+offset]) in the loop preheader. */ |
1009 | new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, | |
1010 | offset); | |
1011 | pe = loop_preheader_edge (loop); | |
1012 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); | |
1013 | gcc_assert (!new_bb); | |
1014 | *initial_address = new_temp; | |
1015 | ||
1016 | /* Create: p = (vectype *) initial_base */ | |
6775f1f3 | 1017 | vec_stmt = fold_convert (vect_ptr_type, new_temp); |
79fe1b3b DN |
1018 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); |
1019 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
1020 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
7ccf35ed DN |
1021 | new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); |
1022 | gcc_assert (!new_bb); | |
1023 | vect_ptr_init = TREE_OPERAND (vec_stmt, 0); | |
1024 | ||
1025 | ||
1026 | /** (4) Handle the updating of the vector-pointer inside the loop: **/ | |
1027 | ||
1028 | if (only_init) /* No update in loop is required. */ | |
1029 | return vect_ptr_init; | |
79fe1b3b | 1030 | |
6775f1f3 | 1031 | idx = vect_create_index_for_vector_ref (loop, bsi); |
79fe1b3b | 1032 | |
7ccf35ed DN |
1033 | /* Create: update = idx * vectype_size */ |
1034 | ptr_update = create_tmp_var (integer_type_node, "update"); | |
1035 | add_referenced_tmp_var (ptr_update); | |
1036 | vectype_size = build_int_cst (integer_type_node, | |
1037 | GET_MODE_SIZE (TYPE_MODE (vectype))); | |
1038 | vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size); | |
1039 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt); | |
1040 | new_temp = make_ssa_name (ptr_update, vec_stmt); | |
1041 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1042 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
79fe1b3b | 1043 | |
7ccf35ed DN |
1044 | /* Create: data_ref_ptr = vect_ptr_init + update */ |
1045 | vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp); | |
1046 | vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); | |
1047 | new_temp = make_ssa_name (vect_ptr, vec_stmt); | |
1048 | TREE_OPERAND (vec_stmt, 0) = new_temp; | |
1049 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
1050 | data_ref_ptr = TREE_OPERAND (vec_stmt, 0); | |
1051 | ||
1052 | return data_ref_ptr; | |
79fe1b3b DN |
1053 | } |
1054 | ||
1055 | ||
1056 | /* Function vect_create_destination_var. | |
1057 | ||
1058 | Create a new temporary of type VECTYPE. */ | |
1059 | ||
1060 | static tree | |
1061 | vect_create_destination_var (tree scalar_dest, tree vectype) | |
1062 | { | |
1063 | tree vec_dest; | |
1064 | const char *new_name; | |
1065 | ||
1e128c5f | 1066 | gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); |
79fe1b3b DN |
1067 | |
1068 | new_name = get_name (scalar_dest); | |
1069 | if (!new_name) | |
1070 | new_name = "var_"; | |
1071 | vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); | |
1072 | add_referenced_tmp_var (vec_dest); | |
1073 | ||
1074 | return vec_dest; | |
1075 | } | |
1076 | ||
1077 | ||
1078 | /* Function vect_init_vector. | |
1079 | ||
1080 | Insert a new stmt (INIT_STMT) that initializes a new vector variable with | |
1081 | the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be | |
1082 | used in the vectorization of STMT. */ | |
1083 | ||
1084 | static tree | |
1085 | vect_init_vector (tree stmt, tree vector_var) | |
1086 | { | |
1087 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
1088 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
1089 | tree new_var; | |
1090 | tree init_stmt; | |
1091 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
1092 | tree vec_oprnd; | |
1093 | edge pe; | |
1094 | tree new_temp; | |
6775f1f3 | 1095 | basic_block new_bb; |
79fe1b3b DN |
1096 | |
1097 | new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); | |
1098 | add_referenced_tmp_var (new_var); | |
1099 | ||
1100 | init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); | |
1101 | new_temp = make_ssa_name (new_var, init_stmt); | |
1102 | TREE_OPERAND (init_stmt, 0) = new_temp; | |
1103 | ||
1104 | pe = loop_preheader_edge (loop); | |
6775f1f3 IR |
1105 | new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); |
1106 | gcc_assert (!new_bb); | |
79fe1b3b DN |
1107 | |
1108 | if (vect_debug_details (NULL)) | |
1109 | { | |
1110 | fprintf (dump_file, "created new init_stmt: "); | |
1111 | print_generic_expr (dump_file, init_stmt, TDF_SLIM); | |
1112 | } | |
1113 | ||
1114 | vec_oprnd = TREE_OPERAND (init_stmt, 0); | |
1115 | return vec_oprnd; | |
1116 | } | |
1117 | ||
1118 | ||
1119 | /* Function vect_get_vec_def_for_operand. | |
1120 | ||
1121 | OP is an operand in STMT. This function returns a (vector) def that will be | |
1122 | used in the vectorized stmt for STMT. | |
1123 | ||
1124 | In the case that OP is an SSA_NAME which is defined in the loop, then | |
1125 | STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. | |
1126 | ||
1127 | In case OP is an invariant or constant, a new stmt that creates a vector def | |
1128 | needs to be introduced. */ | |
1129 | ||
1130 | static tree | |
1131 | vect_get_vec_def_for_operand (tree op, tree stmt) | |
1132 | { | |
1133 | tree vec_oprnd; | |
1134 | tree vec_stmt; | |
1135 | tree def_stmt; | |
1136 | stmt_vec_info def_stmt_info = NULL; | |
1137 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
1138 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
1139 | int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
1140 | struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); | |
1141 | basic_block bb; | |
1142 | tree vec_inv; | |
1143 | tree t = NULL_TREE; | |
1144 | tree def; | |
1145 | int i; | |
1146 | ||
1147 | if (vect_debug_details (NULL)) | |
1148 | { | |
1149 | fprintf (dump_file, "vect_get_vec_def_for_operand: "); | |
1150 | print_generic_expr (dump_file, op, TDF_SLIM); | |
1151 | } | |
1152 | ||
1153 | /** ===> Case 1: operand is a constant. **/ | |
1154 | ||
1155 | if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST) | |
1156 | { | |
1157 | /* Create 'vect_cst_ = {cst,cst,...,cst}' */ | |
1158 | ||
1159 | tree vec_cst; | |
1160 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
1161 | tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); | |
1162 | int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
1163 | tree t = NULL_TREE; | |
1164 | int i; | |
1165 | ||
1166 | /* Build a tree with vector elements. */ | |
1167 | if (vect_debug_details (NULL)) | |
1168 | fprintf (dump_file, "Create vector_cst. nunits = %d", nunits); | |
1169 | ||
1170 | for (i = nunits - 1; i >= 0; --i) | |
1171 | { | |
1172 | t = tree_cons (NULL_TREE, op, t); | |
1173 | } | |
1174 | vec_cst = build_vector (vectype, t); | |
1175 | return vect_init_vector (stmt, vec_cst); | |
1176 | } | |
1177 | ||
1e128c5f | 1178 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
79fe1b3b DN |
1179 | |
1180 | /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/ | |
1181 | ||
1182 | def_stmt = SSA_NAME_DEF_STMT (op); | |
1183 | def_stmt_info = vinfo_for_stmt (def_stmt); | |
1184 | ||
1185 | if (vect_debug_details (NULL)) | |
1186 | { | |
1187 | fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: "); | |
1188 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
1189 | } | |
1190 | ||
1191 | ||
1192 | /** ==> Case 2.1: operand is defined inside the loop. **/ | |
1193 | ||
1194 | if (def_stmt_info) | |
1195 | { | |
1196 | /* Get the def from the vectorized stmt. */ | |
1197 | ||
1198 | vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); | |
1e128c5f | 1199 | gcc_assert (vec_stmt); |
79fe1b3b DN |
1200 | vec_oprnd = TREE_OPERAND (vec_stmt, 0); |
1201 | return vec_oprnd; | |
1202 | } | |
1203 | ||
1204 | ||
1205 | /** ==> Case 2.2: operand is defined by the loop-header phi-node - | |
1206 | it is a reduction/induction. **/ | |
1207 | ||
1208 | bb = bb_for_stmt (def_stmt); | |
1209 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
1210 | { | |
1211 | if (vect_debug_details (NULL)) | |
1212 | fprintf (dump_file, "reduction/induction - unsupported."); | |
1e128c5f | 1213 | internal_error ("no support for reduction/induction"); /* FORNOW */ |
79fe1b3b DN |
1214 | } |
1215 | ||
1216 | ||
1217 | /** ==> Case 2.3: operand is defined outside the loop - | |
1218 | it is a loop invariant. */ | |
1219 | ||
1220 | switch (TREE_CODE (def_stmt)) | |
1221 | { | |
1222 | case PHI_NODE: | |
1223 | def = PHI_RESULT (def_stmt); | |
1224 | break; | |
1225 | case MODIFY_EXPR: | |
1226 | def = TREE_OPERAND (def_stmt, 0); | |
1227 | break; | |
1228 | case NOP_EXPR: | |
1229 | def = TREE_OPERAND (def_stmt, 0); | |
1e128c5f | 1230 | gcc_assert (IS_EMPTY_STMT (def_stmt)); |
79fe1b3b DN |
1231 | def = op; |
1232 | break; | |
1233 | default: | |
1234 | if (vect_debug_details (NULL)) | |
1235 | { | |
1236 | fprintf (dump_file, "unsupported defining stmt: "); | |
1237 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
1238 | } | |
1e128c5f | 1239 | internal_error ("unsupported defining stmt"); |
79fe1b3b DN |
1240 | } |
1241 | ||
1242 | /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */ | |
1243 | ||
1244 | if (vect_debug_details (NULL)) | |
1245 | fprintf (dump_file, "Create vector_inv."); | |
1246 | ||
1247 | for (i = nunits - 1; i >= 0; --i) | |
1248 | { | |
1249 | t = tree_cons (NULL_TREE, def, t); | |
1250 | } | |
1251 | ||
1252 | vec_inv = build_constructor (vectype, t); | |
1253 | return vect_init_vector (stmt, vec_inv); | |
1254 | } | |
1255 | ||
1256 | ||
1257 | /* Function vect_finish_stmt_generation. | |
1258 | ||
1259 | Insert a new stmt. */ | |
1260 | ||
1261 | static void | |
1262 | vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) | |
1263 | { | |
1264 | bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); | |
1265 | ||
1266 | if (vect_debug_details (NULL)) | |
1267 | { | |
1268 | fprintf (dump_file, "add new stmt: "); | |
1269 | print_generic_expr (dump_file, vec_stmt, TDF_SLIM); | |
1270 | } | |
1271 | ||
1272 | /* Make sure bsi points to the stmt that is being vectorized. */ | |
1273 | ||
7ccf35ed DN |
1274 | /* Assumption: any stmts created for the vectorization of stmt S were |
1275 | inserted before S. BSI is expected to point to S or some new stmt before S. */ | |
79fe1b3b DN |
1276 | |
1277 | while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) | |
1278 | bsi_next (bsi); | |
1e128c5f | 1279 | gcc_assert (stmt == bsi_stmt (*bsi)); |
79fe1b3b DN |
1280 | } |
1281 | ||
1282 | ||
1283 | /* Function vectorizable_assignment. | |
1284 | ||
1285 | Check if STMT performs an assignment (copy) that can be vectorized. | |
1286 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1287 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1288 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1289 | ||
1290 | static bool | |
1291 | vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1292 | { | |
1293 | tree vec_dest; | |
1294 | tree scalar_dest; | |
1295 | tree op; | |
1296 | tree vec_oprnd; | |
1297 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1298 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1299 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1300 | tree new_temp; | |
1301 | ||
1302 | /* Is vectorizable assignment? */ | |
1303 | ||
1304 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1305 | return false; | |
1306 | ||
1307 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1308 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
1309 | return false; | |
1310 | ||
1311 | op = TREE_OPERAND (stmt, 1); | |
1312 | if (!vect_is_simple_use (op, loop, NULL)) | |
1313 | { | |
1314 | if (vect_debug_details (NULL)) | |
1315 | fprintf (dump_file, "use not simple."); | |
1316 | return false; | |
1317 | } | |
1318 | ||
1319 | if (!vec_stmt) /* transformation not required. */ | |
1320 | { | |
1321 | STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; | |
1322 | return true; | |
1323 | } | |
1324 | ||
1325 | /** Trasform. **/ | |
1326 | if (vect_debug_details (NULL)) | |
1327 | fprintf (dump_file, "transform assignment."); | |
1328 | ||
1329 | /* Handle def. */ | |
1330 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1331 | ||
1332 | /* Handle use. */ | |
1333 | op = TREE_OPERAND (stmt, 1); | |
1334 | vec_oprnd = vect_get_vec_def_for_operand (op, stmt); | |
1335 | ||
1336 | /* Arguments are ready. create the new vector stmt. */ | |
1337 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); | |
1338 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
1339 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
1340 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1341 | ||
1342 | return true; | |
1343 | } | |
1344 | ||
1345 | ||
1346 | /* Function vectorizable_operation. | |
1347 | ||
1348 | Check if STMT performs a binary or unary operation that can be vectorized. | |
1349 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1350 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1351 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1352 | ||
1353 | static bool | |
1354 | vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1355 | { | |
1356 | tree vec_dest; | |
1357 | tree scalar_dest; | |
1358 | tree operation; | |
1359 | tree op0, op1 = NULL; | |
1360 | tree vec_oprnd0, vec_oprnd1=NULL; | |
1361 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1362 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1363 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1364 | int i; | |
1365 | enum tree_code code; | |
1366 | enum machine_mode vec_mode; | |
1367 | tree new_temp; | |
1368 | int op_type; | |
1369 | tree op; | |
1370 | optab optab; | |
1371 | ||
1372 | /* Is STMT a vectorizable binary/unary operation? */ | |
1373 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1374 | return false; | |
1375 | ||
1376 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) | |
1377 | return false; | |
1378 | ||
1379 | operation = TREE_OPERAND (stmt, 1); | |
1380 | code = TREE_CODE (operation); | |
1381 | optab = optab_for_tree_code (code, vectype); | |
1382 | ||
1383 | /* Support only unary or binary operations. */ | |
1384 | op_type = TREE_CODE_LENGTH (code); | |
1385 | if (op_type != unary_op && op_type != binary_op) | |
1386 | { | |
1387 | if (vect_debug_details (NULL)) | |
1388 | fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type); | |
1389 | return false; | |
1390 | } | |
1391 | ||
1392 | for (i = 0; i < op_type; i++) | |
1393 | { | |
1394 | op = TREE_OPERAND (operation, i); | |
1395 | if (!vect_is_simple_use (op, loop, NULL)) | |
1396 | { | |
1397 | if (vect_debug_details (NULL)) | |
1398 | fprintf (dump_file, "use not simple."); | |
1399 | return false; | |
1400 | } | |
1401 | } | |
1402 | ||
1403 | /* Supportable by target? */ | |
1404 | if (!optab) | |
1405 | { | |
1406 | if (vect_debug_details (NULL)) | |
1407 | fprintf (dump_file, "no optab."); | |
1408 | return false; | |
1409 | } | |
1410 | vec_mode = TYPE_MODE (vectype); | |
1411 | if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) | |
1412 | { | |
1413 | if (vect_debug_details (NULL)) | |
1414 | fprintf (dump_file, "op not supported by target."); | |
1415 | return false; | |
1416 | } | |
1417 | ||
1418 | if (!vec_stmt) /* transformation not required. */ | |
1419 | { | |
1420 | STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; | |
1421 | return true; | |
1422 | } | |
1423 | ||
1424 | /** Trasform. **/ | |
1425 | ||
1426 | if (vect_debug_details (NULL)) | |
1427 | fprintf (dump_file, "transform binary/unary operation."); | |
1428 | ||
1429 | /* Handle def. */ | |
1430 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1431 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1432 | ||
1433 | /* Handle uses. */ | |
1434 | op0 = TREE_OPERAND (operation, 0); | |
1435 | vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); | |
1436 | ||
1437 | if (op_type == binary_op) | |
1438 | { | |
1439 | op1 = TREE_OPERAND (operation, 1); | |
1440 | vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); | |
1441 | } | |
1442 | ||
1443 | /* Arguments are ready. create the new vector stmt. */ | |
1444 | ||
1445 | if (op_type == binary_op) | |
1446 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
1447 | build2 (code, vectype, vec_oprnd0, vec_oprnd1)); | |
1448 | else | |
1449 | *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, | |
1450 | build1 (code, vectype, vec_oprnd0)); | |
1451 | new_temp = make_ssa_name (vec_dest, *vec_stmt); | |
1452 | TREE_OPERAND (*vec_stmt, 0) = new_temp; | |
1453 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1454 | ||
1455 | return true; | |
1456 | } | |
1457 | ||
1458 | ||
1459 | /* Function vectorizable_store. | |
1460 | ||
1461 | Check if STMT defines a non scalar data-ref (array/pointer/structure) that | |
1462 | can be vectorized. | |
1463 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1464 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1465 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1466 | ||
1467 | static bool | |
1468 | vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1469 | { | |
1470 | tree scalar_dest; | |
1471 | tree data_ref; | |
1472 | tree op; | |
1473 | tree vec_oprnd1; | |
1474 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1475 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1476 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1477 | enum machine_mode vec_mode; | |
7ccf35ed | 1478 | tree dummy; |
79fe1b3b DN |
1479 | |
1480 | /* Is vectorizable store? */ | |
1481 | ||
1482 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1483 | return false; | |
1484 | ||
1485 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1486 | if (TREE_CODE (scalar_dest) != ARRAY_REF | |
1487 | && TREE_CODE (scalar_dest) != INDIRECT_REF) | |
1488 | return false; | |
1489 | ||
1490 | op = TREE_OPERAND (stmt, 1); | |
1491 | if (!vect_is_simple_use (op, loop, NULL)) | |
1492 | { | |
1493 | if (vect_debug_details (NULL)) | |
1494 | fprintf (dump_file, "use not simple."); | |
1495 | return false; | |
1496 | } | |
1497 | ||
1498 | vec_mode = TYPE_MODE (vectype); | |
1499 | /* FORNOW. In some cases can vectorize even if data-type not supported | |
1500 | (e.g. - array initialization with 0). */ | |
1501 | if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) | |
1502 | return false; | |
1503 | ||
1504 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
1505 | return false; | |
1506 | ||
7ccf35ed DN |
1507 | if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info))) |
1508 | return false; | |
1509 | ||
79fe1b3b DN |
1510 | if (!vec_stmt) /* transformation not required. */ |
1511 | { | |
1512 | STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; | |
1513 | return true; | |
1514 | } | |
1515 | ||
1516 | /** Trasform. **/ | |
1517 | ||
1518 | if (vect_debug_details (NULL)) | |
1519 | fprintf (dump_file, "transform store"); | |
1520 | ||
1521 | /* Handle use - get the vectorized def from the defining stmt. */ | |
1522 | vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); | |
1523 | ||
1524 | /* Handle def. */ | |
7ccf35ed DN |
1525 | /* FORNOW: make sure the data reference is aligned. */ |
1526 | vect_align_data_ref (stmt); | |
1527 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); | |
1528 | data_ref = build_fold_indirect_ref (data_ref); | |
79fe1b3b DN |
1529 | |
1530 | /* Arguments are ready. create the new vector stmt. */ | |
1531 | *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); | |
1532 | vect_finish_stmt_generation (stmt, *vec_stmt, bsi); | |
1533 | ||
1534 | return true; | |
1535 | } | |
1536 | ||
1537 | ||
1538 | /* vectorizable_load. | |
1539 | ||
1540 | Check if STMT reads a non scalar data-ref (array/pointer/structure) that | |
1541 | can be vectorized. | |
1542 | If VEC_STMT is also passed, vectorize the STMT: create a vectorized | |
1543 | stmt to replace it, put it in VEC_STMT, and insert it at BSI. | |
1544 | Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | |
1545 | ||
1546 | static bool | |
1547 | vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) | |
1548 | { | |
1549 | tree scalar_dest; | |
1550 | tree vec_dest = NULL; | |
1551 | tree data_ref = NULL; | |
1552 | tree op; | |
1553 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
7ccf35ed | 1554 | struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
79fe1b3b DN |
1555 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
1556 | tree new_temp; | |
7ccf35ed DN |
1557 | int mode; |
1558 | tree init_addr; | |
1559 | tree new_stmt; | |
1560 | tree dummy; | |
1561 | basic_block new_bb; | |
1562 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
1563 | edge pe = loop_preheader_edge (loop); | |
1564 | bool software_pipeline_loads_p = false; | |
79fe1b3b DN |
1565 | |
1566 | /* Is vectorizable load? */ | |
1567 | ||
1568 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
1569 | return false; | |
1570 | ||
1571 | scalar_dest = TREE_OPERAND (stmt, 0); | |
1572 | if (TREE_CODE (scalar_dest) != SSA_NAME) | |
1573 | return false; | |
1574 | ||
1575 | op = TREE_OPERAND (stmt, 1); | |
1576 | if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) | |
1577 | return false; | |
1578 | ||
1579 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
1580 | return false; | |
1581 | ||
7ccf35ed DN |
1582 | mode = (int) TYPE_MODE (vectype); |
1583 | ||
79fe1b3b | 1584 | /* FORNOW. In some cases can vectorize even if data-type not supported |
7ccf35ed DN |
1585 | (e.g. - data copies). */ |
1586 | if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) | |
1587 | { | |
1588 | if (vect_debug_details (loop)) | |
1589 | fprintf (dump_file, "Aligned load, but unsupported type."); | |
1590 | return false; | |
1591 | } | |
1592 | ||
1593 | if (!aligned_access_p (dr)) | |
1594 | { | |
1595 | if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing | |
1596 | && (!targetm.vectorize.builtin_mask_for_load | |
1597 | || targetm.vectorize.builtin_mask_for_load ())) | |
1598 | software_pipeline_loads_p = true; | |
1599 | else if (!targetm.vectorize.misaligned_mem_ok (mode)) | |
1600 | { | |
1601 | /* Possibly unaligned access, and can't sofware pipeline the loads */ | |
1602 | if (vect_debug_details (loop)) | |
1603 | fprintf (dump_file, "Arbitrary load not supported."); | |
1604 | return false; | |
1605 | } | |
1606 | } | |
79fe1b3b DN |
1607 | |
1608 | if (!vec_stmt) /* transformation not required. */ | |
1609 | { | |
1610 | STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; | |
1611 | return true; | |
1612 | } | |
1613 | ||
1614 | /** Trasform. **/ | |
1615 | ||
1616 | if (vect_debug_details (NULL)) | |
1617 | fprintf (dump_file, "transform load."); | |
1618 | ||
7ccf35ed DN |
1619 | if (!software_pipeline_loads_p) |
1620 | { | |
1621 | /* Create: | |
1622 | p = initial_addr; | |
1623 | indx = 0; | |
1624 | loop { | |
1625 | vec_dest = *(p); | |
1626 | indx = indx + 1; | |
1627 | } | |
1628 | */ | |
1629 | ||
1630 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1631 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); | |
1632 | if (aligned_access_p (dr)) | |
1633 | data_ref = build_fold_indirect_ref (data_ref); | |
1634 | else | |
1635 | { | |
1636 | int mis = DR_MISALIGNMENT (dr); | |
1637 | tree tmis = (mis == -1 ? | |
1638 | integer_zero_node : | |
1639 | build_int_cst (integer_type_node, mis)); | |
1640 | tmis = int_const_binop (MULT_EXPR, tmis, | |
1641 | build_int_cst (integer_type_node, BITS_PER_UNIT), 1); | |
1642 | data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); | |
1643 | } | |
1644 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
1645 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
1646 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
1647 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
1648 | } | |
1649 | else /* software-pipeline the loads */ | |
1650 | { | |
1651 | /* Create: | |
1652 | p1 = initial_addr; | |
1653 | msq_init = *(floor(p1)) | |
1654 | p2 = initial_addr + VS - 1; | |
1655 | magic = have_builtin ? builtin_result : initial_address; | |
1656 | indx = 0; | |
1657 | loop { | |
1658 | p2' = p2 + indx * vectype_size | |
1659 | lsq = *(floor(p2')) | |
1660 | vec_dest = realign_load (msq, lsq, magic) | |
1661 | indx = indx + 1; | |
1662 | msq = lsq; | |
1663 | } | |
1664 | */ | |
1665 | ||
1666 | tree offset; | |
1667 | tree magic; | |
1668 | tree phi_stmt; | |
1669 | tree msq_init; | |
1670 | tree msq, lsq; | |
1671 | tree dataref_ptr; | |
1672 | tree params; | |
1673 | ||
1674 | /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ | |
1675 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1676 | data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, | |
1677 | &init_addr, true); | |
1678 | data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); | |
1679 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
1680 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
1681 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
1682 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); | |
1683 | gcc_assert (!new_bb); | |
1684 | msq_init = TREE_OPERAND (new_stmt, 0); | |
1685 | ||
1686 | ||
1687 | /* <2> Create lsq = *(floor(p2')) in the loop */ | |
1688 | offset = build_int_cst (integer_type_node, | |
1689 | GET_MODE_NUNITS (TYPE_MODE (vectype))); | |
1690 | offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1); | |
1691 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1692 | dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); | |
1693 | data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); | |
1694 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); | |
1695 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
1696 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
1697 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
1698 | lsq = TREE_OPERAND (new_stmt, 0); | |
1699 | ||
1700 | ||
1701 | /* <3> */ | |
1702 | if (targetm.vectorize.builtin_mask_for_load) | |
1703 | { | |
1704 | /* Create permutation mask, if required, in loop preheader. */ | |
1705 | tree builtin_decl; | |
1706 | params = build_tree_list (NULL_TREE, init_addr); | |
1707 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1708 | builtin_decl = targetm.vectorize.builtin_mask_for_load (); | |
1709 | new_stmt = build_function_call_expr (builtin_decl, params); | |
1710 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); | |
1711 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
1712 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
1713 | new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); | |
1714 | gcc_assert (!new_bb); | |
1715 | magic = TREE_OPERAND (new_stmt, 0); | |
1716 | } | |
1717 | else | |
1718 | { | |
1719 | /* Use current address instead of init_addr for reduced reg pressure. */ | |
1720 | magic = dataref_ptr; | |
1721 | } | |
79fe1b3b | 1722 | |
79fe1b3b | 1723 | |
7ccf35ed DN |
1724 | /* <4> Create msq = phi <msq_init, lsq> in loop */ |
1725 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1726 | msq = make_ssa_name (vec_dest, NULL_TREE); | |
1727 | phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ | |
1728 | SSA_NAME_DEF_STMT (msq) = phi_stmt; | |
1729 | add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop)); | |
1730 | add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop)); | |
1731 | ||
79fe1b3b | 1732 | |
7ccf35ed DN |
1733 | /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ |
1734 | vec_dest = vect_create_destination_var (scalar_dest, vectype); | |
1735 | new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); | |
1736 | new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); | |
1737 | new_temp = make_ssa_name (vec_dest, new_stmt); | |
1738 | TREE_OPERAND (new_stmt, 0) = new_temp; | |
1739 | vect_finish_stmt_generation (stmt, new_stmt, bsi); | |
1740 | } | |
1741 | ||
1742 | *vec_stmt = new_stmt; | |
79fe1b3b DN |
1743 | return true; |
1744 | } | |
1745 | ||
1746 | ||
1747 | /* Function vect_transform_stmt. | |
1748 | ||
1749 | Create a vectorized stmt to replace STMT, and insert it at BSI. */ | |
1750 | ||
1751 | static bool | |
1752 | vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) | |
1753 | { | |
1754 | bool is_store = false; | |
1755 | tree vec_stmt = NULL_TREE; | |
1756 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1e128c5f | 1757 | bool done; |
79fe1b3b DN |
1758 | |
1759 | switch (STMT_VINFO_TYPE (stmt_info)) | |
1760 | { | |
1761 | case op_vec_info_type: | |
1e128c5f GB |
1762 | done = vectorizable_operation (stmt, bsi, &vec_stmt); |
1763 | gcc_assert (done); | |
79fe1b3b DN |
1764 | break; |
1765 | ||
1766 | case assignment_vec_info_type: | |
1e128c5f GB |
1767 | done = vectorizable_assignment (stmt, bsi, &vec_stmt); |
1768 | gcc_assert (done); | |
79fe1b3b DN |
1769 | break; |
1770 | ||
1771 | case load_vec_info_type: | |
1e128c5f GB |
1772 | done = vectorizable_load (stmt, bsi, &vec_stmt); |
1773 | gcc_assert (done); | |
79fe1b3b DN |
1774 | break; |
1775 | ||
1776 | case store_vec_info_type: | |
1e128c5f GB |
1777 | done = vectorizable_store (stmt, bsi, &vec_stmt); |
1778 | gcc_assert (done); | |
79fe1b3b DN |
1779 | is_store = true; |
1780 | break; | |
1781 | default: | |
1782 | if (vect_debug_details (NULL)) | |
1783 | fprintf (dump_file, "stmt not supported."); | |
1e128c5f | 1784 | gcc_unreachable (); |
79fe1b3b DN |
1785 | } |
1786 | ||
1787 | STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | |
1788 | ||
1789 | return is_store; | |
1790 | } | |
1791 | ||
1792 | ||
1793 | /* Function vect_transform_loop_bound. | |
1794 | ||
1795 | Create a new exit condition for the loop. */ | |
1796 | ||
1797 | static void | |
1798 | vect_transform_loop_bound (loop_vec_info loop_vinfo) | |
1799 | { | |
1800 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
82b85a85 | 1801 | edge exit_edge = loop->single_exit; |
79fe1b3b DN |
1802 | block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); |
1803 | tree indx_before_incr, indx_after_incr; | |
1804 | tree orig_cond_expr; | |
1805 | HOST_WIDE_INT old_N = 0; | |
1806 | int vf; | |
1807 | tree cond_stmt; | |
1808 | tree new_loop_bound; | |
1809 | tree cond; | |
1810 | tree lb_type; | |
1811 | ||
1e128c5f | 1812 | gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); |
79fe1b3b DN |
1813 | old_N = LOOP_VINFO_NITERS (loop_vinfo); |
1814 | vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
1815 | ||
79fe1b3b DN |
1816 | /* FORNOW: |
1817 | assuming number-of-iterations divides by the vectorization factor. */ | |
1e128c5f | 1818 | gcc_assert (!(old_N % vf)); |
79fe1b3b DN |
1819 | |
1820 | orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); | |
1e128c5f GB |
1821 | gcc_assert (orig_cond_expr); |
1822 | gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi)); | |
79fe1b3b | 1823 | |
82b85a85 ZD |
1824 | create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, |
1825 | &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); | |
79fe1b3b DN |
1826 | |
1827 | /* bsi_insert is using BSI_NEW_STMT. We need to bump it back | |
8c27b7d4 | 1828 | to point to the exit condition. */ |
79fe1b3b | 1829 | bsi_next (&loop_exit_bsi); |
1e128c5f | 1830 | gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr); |
79fe1b3b DN |
1831 | |
1832 | /* new loop exit test: */ | |
1833 | lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); | |
7d60be94 | 1834 | new_loop_bound = build_int_cst (lb_type, old_N/vf); |
79fe1b3b DN |
1835 | |
1836 | if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ | |
1837 | cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); | |
1838 | else /* 'then' edge loops back. */ | |
1839 | cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); | |
1840 | ||
1841 | cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, | |
1842 | TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); | |
1843 | ||
1844 | bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); | |
1845 | ||
1846 | /* remove old loop exit test: */ | |
1847 | bsi_remove (&loop_exit_bsi); | |
1848 | ||
1849 | if (vect_debug_details (NULL)) | |
1850 | print_generic_expr (dump_file, cond_stmt, TDF_SLIM); | |
1851 | } | |
1852 | ||
1853 | ||
1854 | /* Function vect_transform_loop. | |
1855 | ||
1856 | The analysis phase has determined that the loop is vectorizable. | |
1857 | Vectorize the loop - created vectorized stmts to replace the scalar | |
1858 | stmts in the loop, and update the loop exit condition. */ | |
1859 | ||
1860 | static void | |
1861 | vect_transform_loop (loop_vec_info loop_vinfo, | |
1862 | struct loops *loops ATTRIBUTE_UNUSED) | |
1863 | { | |
1864 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1865 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1866 | int nbbs = loop->num_nodes; | |
1867 | block_stmt_iterator si; | |
1868 | int i; | |
1869 | #ifdef ENABLE_CHECKING | |
1870 | int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
1871 | #endif | |
1872 | ||
1873 | if (vect_debug_details (NULL)) | |
1874 | fprintf (dump_file, "\n<<vec_transform_loop>>\n"); | |
1875 | ||
1876 | /* 1) Make sure the loop header has exactly two entries | |
1877 | 2) Make sure we have a preheader basic block. */ | |
1878 | ||
1e128c5f GB |
1879 | gcc_assert (loop->header->pred->pred_next); |
1880 | gcc_assert (!loop->header->pred->pred_next->pred_next); | |
79fe1b3b DN |
1881 | |
1882 | loop_split_edge_with (loop_preheader_edge (loop), NULL); | |
1883 | ||
1884 | ||
1885 | /* FORNOW: the vectorizer supports only loops which body consist | |
1886 | of one basic block (header + empty latch). When the vectorizer will | |
1887 | support more involved loop forms, the order by which the BBs are | |
1888 | traversed need to be reconsidered. */ | |
1889 | ||
1890 | for (i = 0; i < nbbs; i++) | |
1891 | { | |
1892 | basic_block bb = bbs[i]; | |
1893 | ||
1894 | for (si = bsi_start (bb); !bsi_end_p (si);) | |
1895 | { | |
1896 | tree stmt = bsi_stmt (si); | |
1897 | stmt_vec_info stmt_info; | |
1898 | bool is_store; | |
1899 | #ifdef ENABLE_CHECKING | |
1900 | tree vectype; | |
1901 | #endif | |
1902 | ||
1903 | if (vect_debug_details (NULL)) | |
1904 | { | |
1905 | fprintf (dump_file, "------>vectorizing statement: "); | |
1906 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
1907 | } | |
1908 | stmt_info = vinfo_for_stmt (stmt); | |
1e128c5f | 1909 | gcc_assert (stmt_info); |
79fe1b3b DN |
1910 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) |
1911 | { | |
1912 | bsi_next (&si); | |
1913 | continue; | |
1914 | } | |
1915 | #ifdef ENABLE_CHECKING | |
1916 | /* FORNOW: Verify that all stmts operate on the same number of | |
1917 | units and no inner unrolling is necessary. */ | |
1918 | vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1e128c5f GB |
1919 | gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype)) |
1920 | == vectorization_factor); | |
79fe1b3b DN |
1921 | #endif |
1922 | /* -------- vectorize statement ------------ */ | |
1923 | if (vect_debug_details (NULL)) | |
1924 | fprintf (dump_file, "transform statement."); | |
1925 | ||
1926 | is_store = vect_transform_stmt (stmt, &si); | |
1927 | if (is_store) | |
1928 | { | |
1929 | /* free the attached stmt_vec_info and remove the stmt. */ | |
1930 | stmt_ann_t ann = stmt_ann (stmt); | |
1931 | free (stmt_info); | |
1932 | set_stmt_info (ann, NULL); | |
1933 | bsi_remove (&si); | |
1934 | continue; | |
1935 | } | |
1936 | ||
1937 | bsi_next (&si); | |
1938 | } /* stmts in BB */ | |
1939 | } /* BBs in loop */ | |
1940 | ||
1941 | vect_transform_loop_bound (loop_vinfo); | |
1942 | ||
1943 | if (vect_debug_details (loop)) | |
1944 | fprintf (dump_file,"Success! loop vectorized."); | |
1945 | if (vect_debug_stats (loop)) | |
1946 | fprintf (dump_file, "LOOP VECTORIZED."); | |
1947 | } | |
1948 | ||
1949 | ||
1950 | /* Function vect_is_simple_use. | |
1951 | ||
1952 | Input: | |
1953 | LOOP - the loop that is being vectorized. | |
1954 | OPERAND - operand of a stmt in LOOP. | |
1955 | DEF - the defining stmt in case OPERAND is an SSA_NAME. | |
1956 | ||
1957 | Returns whether a stmt with OPERAND can be vectorized. | |
1958 | Supportable operands are constants, loop invariants, and operands that are | |
6cb38cd4 | 1959 | defined by the current iteration of the loop. Unsupportable operands are |
79fe1b3b DN |
1960 | those that are defined by a previous iteration of the loop (as is the case |
1961 | in reduction/induction computations). */ | |
1962 | ||
1963 | static bool | |
1964 | vect_is_simple_use (tree operand, struct loop *loop, tree *def) | |
1965 | { | |
1966 | tree def_stmt; | |
1967 | basic_block bb; | |
1968 | ||
1969 | if (def) | |
1970 | *def = NULL_TREE; | |
1971 | ||
1972 | if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) | |
1973 | return true; | |
1974 | ||
1975 | if (TREE_CODE (operand) != SSA_NAME) | |
1976 | return false; | |
1977 | ||
1978 | def_stmt = SSA_NAME_DEF_STMT (operand); | |
1979 | if (def_stmt == NULL_TREE ) | |
1980 | { | |
1981 | if (vect_debug_details (NULL)) | |
1982 | fprintf (dump_file, "no def_stmt."); | |
1983 | return false; | |
1984 | } | |
1985 | ||
1986 | /* empty stmt is expected only in case of a function argument. | |
1987 | (Otherwise - we expect a phi_node or a modify_expr). */ | |
1988 | if (IS_EMPTY_STMT (def_stmt)) | |
1989 | { | |
1990 | tree arg = TREE_OPERAND (def_stmt, 0); | |
1991 | if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) | |
1992 | return true; | |
1993 | if (vect_debug_details (NULL)) | |
1994 | { | |
1995 | fprintf (dump_file, "Unexpected empty stmt: "); | |
1996 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
1997 | } | |
1998 | return false; | |
1999 | } | |
2000 | ||
2001 | /* phi_node inside the loop indicates an induction/reduction pattern. | |
2002 | This is not supported yet. */ | |
2003 | bb = bb_for_stmt (def_stmt); | |
2004 | if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) | |
2005 | { | |
2006 | if (vect_debug_details (NULL)) | |
2007 | fprintf (dump_file, "reduction/induction - unsupported."); | |
2008 | return false; /* FORNOW: not supported yet. */ | |
2009 | } | |
2010 | ||
2011 | /* Expecting a modify_expr or a phi_node. */ | |
2012 | if (TREE_CODE (def_stmt) == MODIFY_EXPR | |
2013 | || TREE_CODE (def_stmt) == PHI_NODE) | |
2014 | { | |
2015 | if (def) | |
2016 | *def = def_stmt; | |
2017 | return true; | |
2018 | } | |
2019 | ||
2020 | return false; | |
2021 | } | |
2022 | ||
2023 | ||
2024 | /* Function vect_analyze_operations. | |
2025 | ||
2026 | Scan the loop stmts and make sure they are all vectorizable. */ | |
2027 | ||
2028 | static bool | |
2029 | vect_analyze_operations (loop_vec_info loop_vinfo) | |
2030 | { | |
2031 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2032 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
2033 | int nbbs = loop->num_nodes; | |
2034 | block_stmt_iterator si; | |
2035 | int vectorization_factor = 0; | |
2036 | int i; | |
2037 | bool ok; | |
2038 | tree scalar_type; | |
2039 | ||
2040 | if (vect_debug_details (NULL)) | |
2041 | fprintf (dump_file, "\n<<vect_analyze_operations>>\n"); | |
2042 | ||
2043 | for (i = 0; i < nbbs; i++) | |
2044 | { | |
2045 | basic_block bb = bbs[i]; | |
2046 | ||
2047 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
2048 | { | |
2049 | tree stmt = bsi_stmt (si); | |
2050 | int nunits; | |
2051 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2052 | tree vectype; | |
2053 | ||
2054 | if (vect_debug_details (NULL)) | |
2055 | { | |
2056 | fprintf (dump_file, "==> examining statement: "); | |
2057 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2058 | } | |
1e128c5f GB |
2059 | |
2060 | gcc_assert (stmt_info); | |
2061 | ||
79fe1b3b DN |
2062 | /* skip stmts which do not need to be vectorized. |
2063 | this is expected to include: | |
2064 | - the COND_EXPR which is the loop exit condition | |
2065 | - any LABEL_EXPRs in the loop | |
2066 | - computations that are used only for array indexing or loop | |
2067 | control */ | |
2068 | ||
2069 | if (!STMT_VINFO_RELEVANT_P (stmt_info)) | |
2070 | { | |
2071 | if (vect_debug_details (NULL)) | |
2072 | fprintf (dump_file, "irrelevant."); | |
2073 | continue; | |
2074 | } | |
2075 | ||
2076 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) | |
2077 | { | |
2078 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2079 | { | |
2080 | fprintf (dump_file, "not vectorized: vector stmt in loop:"); | |
2081 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2082 | } | |
2083 | return false; | |
2084 | } | |
2085 | ||
2086 | if (STMT_VINFO_DATA_REF (stmt_info)) | |
2087 | scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); | |
2088 | else if (TREE_CODE (stmt) == MODIFY_EXPR) | |
2089 | scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); | |
2090 | else | |
2091 | scalar_type = TREE_TYPE (stmt); | |
2092 | ||
2093 | if (vect_debug_details (NULL)) | |
2094 | { | |
2095 | fprintf (dump_file, "get vectype for scalar type: "); | |
2096 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
2097 | } | |
2098 | ||
2099 | vectype = get_vectype_for_scalar_type (scalar_type); | |
2100 | if (!vectype) | |
2101 | { | |
2102 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2103 | { | |
2104 | fprintf (dump_file, "not vectorized: unsupported data-type "); | |
2105 | print_generic_expr (dump_file, scalar_type, TDF_SLIM); | |
2106 | } | |
2107 | return false; | |
2108 | } | |
2109 | ||
2110 | if (vect_debug_details (NULL)) | |
2111 | { | |
2112 | fprintf (dump_file, "vectype: "); | |
2113 | print_generic_expr (dump_file, vectype, TDF_SLIM); | |
2114 | } | |
2115 | STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
2116 | ||
2117 | ok = (vectorizable_operation (stmt, NULL, NULL) | |
2118 | || vectorizable_assignment (stmt, NULL, NULL) | |
2119 | || vectorizable_load (stmt, NULL, NULL) | |
2120 | || vectorizable_store (stmt, NULL, NULL)); | |
2121 | ||
2122 | if (!ok) | |
2123 | { | |
2124 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2125 | { | |
2126 | fprintf (dump_file, "not vectorized: stmt not supported: "); | |
2127 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
2128 | } | |
2129 | return false; | |
2130 | } | |
2131 | ||
2132 | nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); | |
2133 | if (vect_debug_details (NULL)) | |
2134 | fprintf (dump_file, "nunits = %d", nunits); | |
2135 | ||
2136 | if (vectorization_factor) | |
2137 | { | |
2138 | /* FORNOW: don't allow mixed units. | |
2139 | This restriction will be relaxed in the future. */ | |
2140 | if (nunits != vectorization_factor) | |
2141 | { | |
2142 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2143 | fprintf (dump_file, "not vectorized: mixed data-types"); | |
2144 | return false; | |
2145 | } | |
2146 | } | |
2147 | else | |
2148 | vectorization_factor = nunits; | |
2149 | } | |
2150 | } | |
2151 | ||
2152 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | |
2153 | if (!vectorization_factor) | |
2154 | { | |
2155 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2156 | fprintf (dump_file, "not vectorized: unsupported data-type"); | |
2157 | return false; | |
2158 | } | |
2159 | LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | |
2160 | ||
2161 | /* FORNOW: handle only cases where the loop bound divides by the | |
2162 | vectorization factor. */ | |
2163 | ||
2164 | if (vect_debug_details (NULL)) | |
2165 | fprintf (dump_file, | |
2166 | "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, | |
2167 | vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo)); | |
2168 | ||
2169 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
2170 | { | |
2171 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2172 | fprintf (dump_file, "not vectorized: Unknown loop bound."); | |
2173 | return false; | |
2174 | } | |
2175 | ||
2176 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
2177 | && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0) | |
2178 | { | |
2179 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2180 | fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.", | |
2181 | vectorization_factor); | |
2182 | return false; | |
2183 | } | |
2184 | ||
2185 | return true; | |
2186 | } | |
2187 | ||
2188 | ||
2189 | /* Function exist_non_indexing_operands_for_use_p | |
2190 | ||
2191 | USE is one of the uses attached to STMT. Check if USE is | |
2192 | used in STMT for anything other than indexing an array. */ | |
2193 | ||
2194 | static bool | |
2195 | exist_non_indexing_operands_for_use_p (tree use, tree stmt) | |
2196 | { | |
2197 | tree operand; | |
2198 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2199 | ||
2200 | /* USE corresponds to some operand in STMT. If there is no data | |
2201 | reference in STMT, then any operand that corresponds to USE | |
2202 | is not indexing an array. */ | |
2203 | if (!STMT_VINFO_DATA_REF (stmt_info)) | |
2204 | return true; | |
2205 | ||
2206 | /* STMT has a data_ref. FORNOW this means that its of one of | |
2207 | the following forms: | |
2208 | -1- ARRAY_REF = var | |
2209 | -2- var = ARRAY_REF | |
2210 | (This should have been verified in analyze_data_refs). | |
2211 | ||
2212 | 'var' in the second case corresponds to a def, not a use, | |
2213 | so USE cannot correspond to any operands that are not used | |
2214 | for array indexing. | |
2215 | ||
2216 | Therefore, all we need to check is if STMT falls into the | |
2217 | first case, and whether var corresponds to USE. */ | |
2218 | ||
2219 | if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) | |
2220 | return false; | |
2221 | ||
2222 | operand = TREE_OPERAND (stmt, 1); | |
2223 | ||
2224 | if (TREE_CODE (operand) != SSA_NAME) | |
2225 | return false; | |
2226 | ||
2227 | if (operand == use) | |
2228 | return true; | |
2229 | ||
2230 | return false; | |
2231 | } | |
2232 | ||
2233 | ||
2234 | /* Function vect_is_simple_iv_evolution. | |
2235 | ||
2236 | FORNOW: A simple evolution of an induction variables in the loop is | |
2237 | considered a polynomial evolution with constant step. */ | |
2238 | ||
2239 | static bool | |
2240 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | |
2241 | tree * step, bool strict) | |
2242 | { | |
2243 | tree init_expr; | |
2244 | tree step_expr; | |
2245 | ||
2246 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | |
2247 | ||
2248 | /* When there is no evolution in this loop, the evolution function | |
2249 | is not "simple". */ | |
2250 | if (evolution_part == NULL_TREE) | |
2251 | return false; | |
2252 | ||
2253 | /* When the evolution is a polynomial of degree >= 2 | |
2254 | the evolution function is not "simple". */ | |
2255 | if (tree_is_chrec (evolution_part)) | |
2256 | return false; | |
2257 | ||
2258 | step_expr = evolution_part; | |
2259 | init_expr = initial_condition (access_fn); | |
2260 | ||
2261 | if (vect_debug_details (NULL)) | |
2262 | { | |
2263 | fprintf (dump_file, "step: "); | |
2264 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
2265 | fprintf (dump_file, ", init: "); | |
2266 | print_generic_expr (dump_file, init_expr, TDF_SLIM); | |
2267 | } | |
2268 | ||
2269 | *init = init_expr; | |
2270 | *step = step_expr; | |
2271 | ||
2272 | if (TREE_CODE (step_expr) != INTEGER_CST) | |
2273 | { | |
2274 | if (vect_debug_details (NULL)) | |
2275 | fprintf (dump_file, "step unknown."); | |
2276 | return false; | |
2277 | } | |
2278 | ||
2279 | if (strict) | |
2280 | if (!integer_onep (step_expr)) | |
2281 | { | |
2282 | if (vect_debug_details (NULL)) | |
2283 | print_generic_expr (dump_file, step_expr, TDF_SLIM); | |
2284 | return false; | |
2285 | } | |
2286 | ||
2287 | return true; | |
2288 | } | |
2289 | ||
2290 | ||
2291 | /* Function vect_analyze_scalar_cycles. | |
2292 | ||
2293 | Examine the cross iteration def-use cycles of scalar variables, by | |
2294 | analyzing the loop (scalar) PHIs; verify that the cross iteration def-use | |
2295 | cycles that they represent do not impede vectorization. | |
2296 | ||
2297 | FORNOW: Reduction as in the following loop, is not supported yet: | |
2298 | loop1: | |
2299 | for (i=0; i<N; i++) | |
2300 | sum += a[i]; | |
2301 | The cross-iteration cycle corresponding to variable 'sum' will be | |
2302 | considered too complicated and will impede vectorization. | |
2303 | ||
2304 | FORNOW: Induction as in the following loop, is not supported yet: | |
2305 | loop2: | |
2306 | for (i=0; i<N; i++) | |
2307 | a[i] = i; | |
2308 | ||
2309 | However, the following loop *is* vectorizable: | |
2310 | loop3: | |
2311 | for (i=0; i<N; i++) | |
2312 | a[i] = b[i]; | |
2313 | ||
2314 | In both loops there exists a def-use cycle for the variable i: | |
2315 | loop: i_2 = PHI (i_0, i_1) | |
2316 | a[i_2] = ...; | |
2317 | i_1 = i_2 + 1; | |
2318 | GOTO loop; | |
2319 | ||
2320 | The evolution of the above cycle is considered simple enough, | |
2321 | however, we also check that the cycle does not need to be | |
2322 | vectorized, i.e - we check that the variable that this cycle | |
2323 | defines is only used for array indexing or in stmts that do not | |
2324 | need to be vectorized. This is not the case in loop2, but it | |
2325 | *is* the case in loop3. */ | |
2326 | ||
2327 | static bool | |
2328 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) | |
2329 | { | |
2330 | tree phi; | |
2331 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2332 | basic_block bb = loop->header; | |
2333 | tree dummy; | |
2334 | ||
2335 | if (vect_debug_details (NULL)) | |
2336 | fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n"); | |
2337 | ||
2338 | for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) | |
2339 | { | |
2340 | tree access_fn = NULL; | |
2341 | ||
2342 | if (vect_debug_details (NULL)) | |
2343 | { | |
2344 | fprintf (dump_file, "Analyze phi: "); | |
2345 | print_generic_expr (dump_file, phi, TDF_SLIM); | |
2346 | } | |
2347 | ||
2348 | /* Skip virtual phi's. The data dependences that are associated with | |
2349 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
2350 | ||
2351 | if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
2352 | { | |
2353 | if (vect_debug_details (NULL)) | |
2354 | fprintf (dump_file, "virtual phi. skip."); | |
2355 | continue; | |
2356 | } | |
2357 | ||
2358 | /* Analyze the evolution function. */ | |
2359 | ||
2360 | /* FORNOW: The only scalar cross-iteration cycles that we allow are | |
2361 | those of loop induction variables; This property is verified here. | |
2362 | ||
2363 | Furthermore, if that induction variable is used in an operation | |
2364 | that needs to be vectorized (i.e, is not solely used to index | |
2365 | arrays and check the exit condition) - we do not support its | |
2366 | vectorization yet. This property is verified in vect_is_simple_use, | |
2367 | during vect_analyze_operations. */ | |
2368 | ||
6775f1f3 IR |
2369 | access_fn = /* instantiate_parameters |
2370 | (loop,*/ | |
2371 | analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
79fe1b3b DN |
2372 | |
2373 | if (!access_fn) | |
2374 | { | |
2375 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2376 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
2377 | return false; | |
2378 | } | |
2379 | ||
2380 | if (vect_debug_details (NULL)) | |
2381 | { | |
2382 | fprintf (dump_file, "Access function of PHI: "); | |
2383 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
2384 | } | |
2385 | ||
2386 | if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, | |
2387 | &dummy, false)) | |
2388 | { | |
2389 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2390 | fprintf (dump_file, "not vectorized: unsupported scalar cycle."); | |
2391 | return false; | |
2392 | } | |
2393 | } | |
2394 | ||
2395 | return true; | |
2396 | } | |
2397 | ||
2398 | ||
2399 | /* Function vect_analyze_data_ref_dependence. | |
2400 | ||
2401 | Return TRUE if there (might) exist a dependence between a memory-reference | |
2402 | DRA and a memory-reference DRB. */ | |
2403 | ||
2404 | static bool | |
2405 | vect_analyze_data_ref_dependence (struct data_reference *dra, | |
2406 | struct data_reference *drb, | |
2407 | struct loop *loop) | |
2408 | { | |
6775f1f3 | 2409 | bool differ_p; |
79fe1b3b | 2410 | struct data_dependence_relation *ddr; |
6775f1f3 | 2411 | |
79fe1b3b DN |
2412 | if (!array_base_name_differ_p (dra, drb, &differ_p)) |
2413 | { | |
6775f1f3 | 2414 | if (vect_debug_stats (loop) || vect_debug_details (loop)) |
79fe1b3b | 2415 | { |
6775f1f3 IR |
2416 | fprintf (dump_file, |
2417 | "not vectorized: can't determine dependence between: "); | |
79fe1b3b DN |
2418 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); |
2419 | fprintf (dump_file, " and "); | |
2420 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
2421 | } | |
2422 | return true; | |
2423 | } | |
2424 | ||
2425 | if (differ_p) | |
2426 | return false; | |
2427 | ||
2428 | ddr = initialize_data_dependence_relation (dra, drb); | |
2429 | compute_affine_dependence (ddr); | |
2430 | ||
2431 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
2432 | return false; | |
2433 | ||
2434 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
2435 | { | |
2436 | fprintf (dump_file, | |
2437 | "not vectorized: possible dependence between data-refs "); | |
2438 | print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); | |
2439 | fprintf (dump_file, " and "); | |
2440 | print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); | |
2441 | } | |
2442 | ||
2443 | return true; | |
2444 | } | |
2445 | ||
2446 | ||
2447 | /* Function vect_analyze_data_ref_dependences. | |
2448 | ||
2449 | Examine all the data references in the loop, and make sure there do not | |
2450 | exist any data dependences between them. | |
2451 | ||
2452 | TODO: dependences which distance is greater than the vectorization factor | |
2453 | can be ignored. */ | |
2454 | ||
2455 | static bool | |
2456 | vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) | |
2457 | { | |
2458 | unsigned int i, j; | |
2459 | varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
2460 | varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
2461 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2462 | ||
2463 | /* Examine store-store (output) dependences. */ | |
2464 | ||
2465 | if (vect_debug_details (NULL)) | |
2466 | fprintf (dump_file, "\n<<vect_analyze_dependences>>\n"); | |
2467 | ||
2468 | if (vect_debug_details (NULL)) | |
2469 | fprintf (dump_file, "compare all store-store pairs."); | |
2470 | ||
2471 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++) | |
2472 | { | |
2473 | for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
2474 | { | |
2475 | struct data_reference *dra = | |
2476 | VARRAY_GENERIC_PTR (loop_write_refs, i); | |
2477 | struct data_reference *drb = | |
2478 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
2479 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
2480 | return false; | |
2481 | } | |
2482 | } | |
2483 | ||
2484 | /* Examine load-store (true/anti) dependences. */ | |
2485 | ||
2486 | if (vect_debug_details (NULL)) | |
2487 | fprintf (dump_file, "compare all load-store pairs."); | |
2488 | ||
2489 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++) | |
2490 | { | |
2491 | for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) | |
2492 | { | |
2493 | struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i); | |
2494 | struct data_reference *drb = | |
2495 | VARRAY_GENERIC_PTR (loop_write_refs, j); | |
2496 | if (vect_analyze_data_ref_dependence (dra, drb, loop)) | |
2497 | return false; | |
2498 | } | |
2499 | } | |
2500 | ||
2501 | return true; | |
2502 | } | |
2503 | ||
2504 | ||
2505 | /* Function vect_get_first_index. | |
2506 | ||
2507 | REF is a data reference. | |
2508 | If it is an ARRAY_REF: if its lower bound is simple enough, | |
2509 | put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE. | |
2510 | If it is not an ARRAY_REF: REF has no "first index"; | |
2511 | ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */ | |
2512 | ||
2513 | static bool | |
2514 | vect_get_first_index (tree ref, tree *array_first_index) | |
2515 | { | |
2516 | tree array_start; | |
2517 | ||
2518 | if (TREE_CODE (ref) != ARRAY_REF) | |
2519 | *array_first_index = size_zero_node; | |
2520 | else | |
2521 | { | |
2522 | array_start = array_ref_low_bound (ref); | |
2523 | if (!host_integerp (array_start,0)) | |
2524 | { | |
2525 | if (vect_debug_details (NULL)) | |
2526 | { | |
2527 | fprintf (dump_file, "array min val not simple integer cst."); | |
2528 | print_generic_expr (dump_file, array_start, TDF_DETAILS); | |
2529 | } | |
2530 | return false; | |
2531 | } | |
2532 | *array_first_index = array_start; | |
2533 | } | |
2534 | ||
2535 | return true; | |
2536 | } | |
2537 | ||
2538 | ||
6775f1f3 IR |
2539 | /* Function vect_compute_array_base_alignment. |
2540 | A utility function of vect_compute_array_ref_alignment. | |
2541 | ||
2542 | Compute the misalignment of ARRAY in bits. | |
2543 | ||
2544 | Input: | |
2545 | ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE. | |
d4a9b3a3 | 2546 | VECTYPE - we are interested in the misalignment modulo the size of vectype. |
6775f1f3 IR |
2547 | if NULL: don't compute misalignment, just return the base of ARRAY. |
2548 | PREV_DIMENSIONS - initialized to one. | |
2549 | MISALIGNMENT - the computed misalignment in bits. | |
2550 | ||
2551 | Output: | |
2552 | If VECTYPE is not NULL: | |
2553 | Return NULL_TREE if the misalignment cannot be computed. Otherwise, return | |
2554 | the base of the array, and put the computed misalignment in MISALIGNMENT. | |
2555 | If VECTYPE is NULL: | |
2556 | Return the base of the array. | |
2557 | ||
2558 | For a[idx_N]...[idx_2][idx_1][idx_0], the address of | |
2559 | a[idx_N]...[idx_2][idx_1] is | |
2560 | {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ... | |
2561 | ... + idx_N * dim_0 * ... * dim_N-1}. | |
2562 | (The misalignment of &a is not checked here). | |
2563 | Note, that every term contains dim_0, therefore, if dim_0 is a | |
2564 | multiple of NUNITS, the whole sum is a multiple of NUNITS. | |
2565 | Otherwise, if idx_1 is constant, and dim_1 is a multiple of | |
2566 | NUINTS, we can say that the misalignment of the sum is equal to | |
2567 | the misalignment of {idx_1 * dim_0}. If idx_1 is not constant, | |
2568 | we can't determine this array misalignment, and we return | |
2569 | false. | |
2570 | We proceed recursively in this manner, accumulating total misalignment | |
2571 | and the multiplication of previous dimensions for correct misalignment | |
2572 | calculation. */ | |
2573 | ||
2574 | static tree | |
2575 | vect_compute_array_base_alignment (tree array, | |
2576 | tree vectype, | |
2577 | tree *prev_dimensions, | |
2578 | tree *misalignment) | |
2579 | { | |
2580 | tree index; | |
2581 | tree domain; | |
2582 | tree dimension_size; | |
2583 | tree mis; | |
2584 | tree bits_per_vectype; | |
2585 | tree bits_per_vectype_unit; | |
2586 | ||
2587 | /* The 'stop condition' of the recursion. */ | |
2588 | if (TREE_CODE (array) != ARRAY_REF) | |
2589 | return array; | |
2590 | ||
2591 | if (!vectype) | |
2592 | /* Just get the base decl. */ | |
2593 | return vect_compute_array_base_alignment | |
2594 | (TREE_OPERAND (array, 0), NULL, NULL, NULL); | |
2595 | ||
2596 | if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) || | |
2597 | !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions)) | |
2598 | return NULL_TREE; | |
2599 | ||
2600 | domain = TYPE_DOMAIN (TREE_TYPE (array)); | |
2601 | dimension_size = | |
2602 | int_const_binop (PLUS_EXPR, | |
2603 | int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain), | |
2604 | TYPE_MIN_VALUE (domain), 1), | |
2605 | size_one_node, 1); | |
2606 | ||
2607 | /* Check if the dimension size is a multiple of NUNITS, the remaining sum | |
2608 | is a multiple of NUNITS: | |
2609 | ||
2610 | dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ? | |
2611 | */ | |
2612 | mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size, | |
2613 | build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1); | |
2614 | if (integer_zerop (mis)) | |
2615 | /* This array is aligned. Continue just in order to get the base decl. */ | |
2616 | return vect_compute_array_base_alignment | |
2617 | (TREE_OPERAND (array, 0), NULL, NULL, NULL); | |
2618 | ||
2619 | index = TREE_OPERAND (array, 1); | |
2620 | if (!host_integerp (index, 1)) | |
2621 | /* The current index is not constant. */ | |
2622 | return NULL_TREE; | |
2623 | ||
2624 | index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0); | |
2625 | ||
2626 | bits_per_vectype = fold_convert (unsigned_type_node, | |
2627 | build_int_cst (NULL_TREE, BITS_PER_UNIT * | |
2628 | GET_MODE_SIZE (TYPE_MODE (vectype)))); | |
2629 | bits_per_vectype_unit = fold_convert (unsigned_type_node, | |
2630 | build_int_cst (NULL_TREE, BITS_PER_UNIT * | |
2631 | GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype))))); | |
2632 | ||
2633 | /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed | |
2634 | earlier: | |
2635 | ||
2636 | *misalignment = | |
2637 | (*misalignment + index_val * dimension_size * *prev_dimensions) | |
2638 | % vectype_nunits; | |
2639 | */ | |
2640 | ||
2641 | mis = int_const_binop (MULT_EXPR, index, dimension_size, 1); | |
2642 | mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1); | |
2643 | mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1); | |
2644 | mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1); | |
2645 | *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1); | |
2646 | ||
2647 | ||
2648 | *prev_dimensions = int_const_binop (MULT_EXPR, | |
2649 | *prev_dimensions, dimension_size, 1); | |
2650 | ||
2651 | return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype, | |
2652 | prev_dimensions, | |
2653 | misalignment); | |
2654 | } | |
2655 | ||
2656 | ||
79fe1b3b DN |
2657 | /* Function vect_compute_data_ref_alignment |
2658 | ||
2659 | Compute the misalignment of the data reference DR. | |
2660 | ||
6775f1f3 IR |
2661 | Output: |
2662 | 1. If during the misalignment computation it is found that the data reference | |
2663 | cannot be vectorized then false is returned. | |
2664 | 2. DR_MISALIGNMENT (DR) is defined. | |
2665 | ||
79fe1b3b DN |
2666 | FOR NOW: No analysis is actually performed. Misalignment is calculated |
2667 | only for trivial cases. TODO. */ | |
2668 | ||
6775f1f3 | 2669 | static bool |
79fe1b3b | 2670 | vect_compute_data_ref_alignment (struct data_reference *dr, |
6775f1f3 | 2671 | loop_vec_info loop_vinfo) |
79fe1b3b DN |
2672 | { |
2673 | tree stmt = DR_STMT (dr); | |
6775f1f3 | 2674 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
79fe1b3b DN |
2675 | tree ref = DR_REF (dr); |
2676 | tree vectype; | |
79fe1b3b | 2677 | tree scalar_type; |
79fe1b3b | 2678 | tree offset = size_zero_node; |
6775f1f3 IR |
2679 | tree base, bit_offset, alignment; |
2680 | tree unit_bits = fold_convert (unsigned_type_node, | |
2681 | build_int_cst (NULL_TREE, BITS_PER_UNIT)); | |
2682 | tree dr_base; | |
2683 | bool base_aligned_p; | |
2684 | ||
79fe1b3b DN |
2685 | if (vect_debug_details (NULL)) |
2686 | fprintf (dump_file, "vect_compute_data_ref_alignment:"); | |
2687 | ||
2688 | /* Initialize misalignment to unknown. */ | |
2689 | DR_MISALIGNMENT (dr) = -1; | |
2690 | ||
2691 | scalar_type = TREE_TYPE (ref); | |
2692 | vectype = get_vectype_for_scalar_type (scalar_type); | |
2693 | if (!vectype) | |
2694 | { | |
2695 | if (vect_debug_details (NULL)) | |
2696 | { | |
2697 | fprintf (dump_file, "no vectype for stmt: "); | |
2698 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
6775f1f3 | 2699 | fprintf (dump_file, " scalar_type: "); |
79fe1b3b DN |
2700 | print_generic_expr (dump_file, scalar_type, TDF_DETAILS); |
2701 | } | |
6775f1f3 IR |
2702 | /* It is not possible to vectorize this data reference. */ |
2703 | return false; | |
79fe1b3b | 2704 | } |
6775f1f3 IR |
2705 | gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF); |
2706 | ||
2707 | if (TREE_CODE (ref) == ARRAY_REF) | |
2708 | dr_base = ref; | |
2709 | else | |
2710 | dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info); | |
79fe1b3b | 2711 | |
6775f1f3 IR |
2712 | base = vect_get_base_and_bit_offset (dr, dr_base, vectype, |
2713 | loop_vinfo, &bit_offset, &base_aligned_p); | |
2714 | if (!base) | |
79fe1b3b | 2715 | { |
6775f1f3 | 2716 | if (vect_debug_details (NULL)) |
79fe1b3b | 2717 | { |
6775f1f3 IR |
2718 | fprintf (dump_file, "Unknown alignment for access: "); |
2719 | print_generic_expr (dump_file, | |
2720 | STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM); | |
79fe1b3b | 2721 | } |
6775f1f3 IR |
2722 | return true; |
2723 | } | |
79fe1b3b | 2724 | |
6775f1f3 IR |
2725 | if (!base_aligned_p) |
2726 | { | |
2727 | if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) | |
79fe1b3b DN |
2728 | { |
2729 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
2730 | { |
2731 | fprintf (dump_file, "can't force alignment of ref: "); | |
2732 | print_generic_expr (dump_file, ref, TDF_SLIM); | |
2733 | } | |
2734 | return true; | |
79fe1b3b | 2735 | } |
6775f1f3 IR |
2736 | |
2737 | /* Force the alignment of the decl. | |
2738 | NOTE: This is the only change to the code we make during | |
2739 | the analysis phase, before deciding to vectorize the loop. */ | |
2740 | if (vect_debug_details (NULL)) | |
2741 | fprintf (dump_file, "force alignment"); | |
2742 | DECL_ALIGN (base) = TYPE_ALIGN (vectype); | |
2743 | DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype); | |
2744 | } | |
79fe1b3b | 2745 | |
6775f1f3 IR |
2746 | /* At this point we assume that the base is aligned, and the offset from it |
2747 | (including index, if relevant) has been computed and is in BIT_OFFSET. */ | |
2748 | gcc_assert (base_aligned_p | |
2749 | || (TREE_CODE (base) == VAR_DECL | |
2750 | && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); | |
2751 | ||
2752 | /* Convert into bytes. */ | |
2753 | offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1); | |
2754 | /* Check that there is no remainder in bits. */ | |
2755 | bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1); | |
2756 | if (!integer_zerop (bit_offset)) | |
2757 | { | |
2758 | if (vect_debug_details (NULL)) | |
79fe1b3b | 2759 | { |
6775f1f3 IR |
2760 | fprintf (dump_file, "bit offset alignment: "); |
2761 | print_generic_expr (dump_file, bit_offset, TDF_SLIM); | |
79fe1b3b | 2762 | } |
6775f1f3 IR |
2763 | return false; |
2764 | } | |
2765 | ||
2766 | /* Alignment required, in bytes: */ | |
2767 | alignment = fold_convert (unsigned_type_node, | |
2768 | build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT)); | |
79fe1b3b | 2769 | |
6775f1f3 IR |
2770 | /* Modulo alignment. */ |
2771 | offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0); | |
2772 | if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset)) | |
2773 | { | |
2774 | if (vect_debug_details (NULL)) | |
2775 | fprintf (dump_file, "unexpected misalign value"); | |
2776 | return false; | |
79fe1b3b DN |
2777 | } |
2778 | ||
6775f1f3 | 2779 | DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1); |
79fe1b3b | 2780 | |
6775f1f3 IR |
2781 | if (vect_debug_details (NULL)) |
2782 | fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr)); | |
2783 | ||
2784 | return true; | |
2785 | } | |
2786 | ||
2787 | ||
2788 | /* Function vect_compute_array_ref_alignment | |
2789 | ||
2790 | Compute the alignment of an array-ref. | |
2791 | The alignment we compute here is relative to | |
2792 | TYPE_ALIGN(VECTYPE) boundary. | |
2793 | ||
2794 | Output: | |
2795 | OFFSET - the alignment in bits | |
2796 | Return value - the base of the array-ref. E.g, | |
2797 | if the array-ref is a.b[k].c[i][j] the returned | |
2798 | base is a.b[k].c | |
2799 | */ | |
2800 | ||
2801 | static tree | |
2802 | vect_compute_array_ref_alignment (struct data_reference *dr, | |
2803 | loop_vec_info loop_vinfo, | |
2804 | tree vectype, | |
2805 | tree *offset) | |
2806 | { | |
2807 | tree array_first_index = size_zero_node; | |
2808 | tree init; | |
2809 | tree ref = DR_REF (dr); | |
2810 | tree scalar_type = TREE_TYPE (ref); | |
2811 | tree oprnd0 = TREE_OPERAND (ref, 0); | |
2812 | tree dims = size_one_node; | |
2813 | tree misalign = size_zero_node; | |
2814 | tree next_ref, this_offset = size_zero_node; | |
2815 | tree nunits; | |
2816 | tree nbits; | |
2817 | ||
2818 | if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE) | |
2819 | /* The reference is an array without its last index. */ | |
2820 | next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign); | |
2821 | else | |
2822 | next_ref = | |
2823 | vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign); | |
2824 | if (!vectype) | |
2825 | /* Alignment is not requested. Just return the base. */ | |
2826 | return next_ref; | |
2827 | ||
2828 | /* Compute alignment. */ | |
2829 | if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref) | |
2830 | return NULL_TREE; | |
2831 | this_offset = misalign; | |
2832 | ||
2833 | /* Check the first index accessed. */ | |
79fe1b3b DN |
2834 | if (!vect_get_first_index (ref, &array_first_index)) |
2835 | { | |
2836 | if (vect_debug_details (NULL)) | |
2837 | fprintf (dump_file, "no first_index for array."); | |
6775f1f3 | 2838 | return NULL_TREE; |
79fe1b3b | 2839 | } |
79fe1b3b | 2840 | |
6775f1f3 IR |
2841 | /* Check the index of the array_ref. */ |
2842 | init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0), | |
2843 | LOOP_VINFO_LOOP (loop_vinfo)->num); | |
79fe1b3b | 2844 | |
6775f1f3 IR |
2845 | /* FORNOW: In order to simplify the handling of alignment, we make sure |
2846 | that the first location at which the array is accessed ('init') is on an | |
79fe1b3b | 2847 | 'NUNITS' boundary, since we are assuming here that 'array base' is aligned. |
6775f1f3 IR |
2848 | This is too conservative, since we require that |
2849 | both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of | |
79fe1b3b DN |
2850 | NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}. |
2851 | This should be relaxed in the future. */ | |
2852 | ||
6775f1f3 | 2853 | if (!init || !host_integerp (init, 0)) |
79fe1b3b DN |
2854 | { |
2855 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
2856 | fprintf (dump_file, "non constant init. "); |
2857 | return NULL_TREE; | |
79fe1b3b DN |
2858 | } |
2859 | ||
79fe1b3b | 2860 | /* bytes per scalar element: */ |
6775f1f3 IR |
2861 | nunits = fold_convert (unsigned_type_node, |
2862 | build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type)))); | |
2863 | nbits = int_const_binop (MULT_EXPR, nunits, | |
2864 | build_int_cst (NULL_TREE, BITS_PER_UNIT), 1); | |
79fe1b3b | 2865 | |
6775f1f3 | 2866 | /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */ |
79fe1b3b | 2867 | misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0); |
6775f1f3 IR |
2868 | misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0); |
2869 | misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0); | |
79fe1b3b | 2870 | |
6775f1f3 IR |
2871 | /* TODO: allow negative misalign values. */ |
2872 | if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign)) | |
79fe1b3b DN |
2873 | { |
2874 | if (vect_debug_details (NULL)) | |
6775f1f3 IR |
2875 | fprintf (dump_file, "unexpected misalign value"); |
2876 | return NULL_TREE; | |
79fe1b3b | 2877 | } |
6775f1f3 IR |
2878 | *offset = misalign; |
2879 | return next_ref; | |
79fe1b3b DN |
2880 | } |
2881 | ||
2882 | ||
2883 | /* Function vect_compute_data_refs_alignment | |
2884 | ||
2885 | Compute the misalignment of data references in the loop. | |
2886 | This pass may take place at function granularity instead of at loop | |
2887 | granularity. | |
2888 | ||
2889 | FOR NOW: No analysis is actually performed. Misalignment is calculated | |
2890 | only for trivial cases. TODO. */ | |
2891 | ||
2892 | static void | |
2893 | vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) | |
2894 | { | |
2895 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
2896 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
2897 | unsigned int i; | |
6775f1f3 | 2898 | |
79fe1b3b DN |
2899 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) |
2900 | { | |
2901 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
2902 | vect_compute_data_ref_alignment (dr, loop_vinfo); | |
2903 | } | |
2904 | ||
2905 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
2906 | { | |
2907 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
2908 | vect_compute_data_ref_alignment (dr, loop_vinfo); | |
2909 | } | |
2910 | } | |
2911 | ||
2912 | ||
2913 | /* Function vect_enhance_data_refs_alignment | |
2914 | ||
2915 | This pass will use loop versioning and loop peeling in order to enhance | |
2916 | the alignment of data references in the loop. | |
2917 | ||
2918 | FOR NOW: we assume that whatever versioning/peeling takes place, only the | |
2919 | original loop is to be vectorized; Any other loops that are created by | |
2920 | the transformations performed in this pass - are not supposed to be | |
2921 | vectorized. This restriction will be relaxed. | |
2922 | ||
2923 | FOR NOW: No transformation is actually performed. TODO. */ | |
2924 | ||
2925 | static void | |
7ccf35ed | 2926 | vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED) |
79fe1b3b DN |
2927 | { |
2928 | /* | |
2929 | This pass will require a cost model to guide it whether to apply peeling | |
2930 | or versioning or a combination of the two. For example, the scheme that | |
2931 | intel uses when given a loop with several memory accesses, is as follows: | |
2932 | choose one memory access ('p') which alignment you want to force by doing | |
2933 | peeling. Then, either (1) generate a loop in which 'p' is aligned and all | |
2934 | other accesses are not necessarily aligned, or (2) use loop versioning to | |
2935 | generate one loop in which all accesses are aligned, and another loop in | |
2936 | which only 'p' is necessarily aligned. | |
2937 | ||
2938 | ("Automatic Intra-Register Vectorization for the Intel Architecture", | |
2939 | Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International | |
2940 | Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) | |
2941 | ||
2942 | Devising a cost model is the most critical aspect of this work. It will | |
2943 | guide us on which access to peel for, whether to use loop versioning, how | |
2944 | many versions to create, etc. The cost model will probably consist of | |
2945 | generic considerations as well as target specific considerations (on | |
2946 | powerpc for example, misaligned stores are more painful than misaligned | |
2947 | loads). | |
2948 | ||
2949 | Here is the general steps involved in alignment enhancements: | |
2950 | ||
2951 | -- original loop, before alignment analysis: | |
2952 | for (i=0; i<N; i++){ | |
2953 | x = q[i]; # DR_MISALIGNMENT(q) = unknown | |
2954 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
2955 | } | |
2956 | ||
2957 | -- After vect_compute_data_refs_alignment: | |
2958 | for (i=0; i<N; i++){ | |
2959 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
2960 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
2961 | } | |
2962 | ||
2963 | -- Possibility 1: we do loop versioning: | |
2964 | if (p is aligned) { | |
2965 | for (i=0; i<N; i++){ # loop 1A | |
2966 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
2967 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
2968 | } | |
2969 | } | |
2970 | else { | |
2971 | for (i=0; i<N; i++){ # loop 1B | |
2972 | x = q[i]; # DR_MISALIGNMENT(q) = 3 | |
2973 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
2974 | } | |
2975 | } | |
2976 | ||
2977 | -- Possibility 2: we do loop peeling: | |
2978 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
2979 | x = q[i]; | |
2980 | p[i] = y; | |
2981 | } | |
2982 | for (i = 3; i < N; i++){ # loop 2A | |
2983 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
2984 | p[i] = y; # DR_MISALIGNMENT(p) = unknown | |
2985 | } | |
2986 | ||
2987 | -- Possibility 3: combination of loop peeling and versioning: | |
2988 | for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). | |
2989 | x = q[i]; | |
2990 | p[i] = y; | |
2991 | } | |
2992 | if (p is aligned) { | |
2993 | for (i = 3; i<N; i++){ # loop 3A | |
2994 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
2995 | p[i] = y; # DR_MISALIGNMENT(p) = 0 | |
2996 | } | |
2997 | } | |
2998 | else { | |
2999 | for (i = 3; i<N; i++){ # loop 3B | |
3000 | x = q[i]; # DR_MISALIGNMENT(q) = 0 | |
3001 | p[i] = y; # DR_MISALIGNMENT(p) = unaligned | |
3002 | } | |
3003 | } | |
3004 | ||
3005 | These loops are later passed to loop_transform to be vectorized. The | |
3006 | vectorizer will use the alignment information to guide the transformation | |
3007 | (whether to generate regular loads/stores, or with special handling for | |
3008 | misalignment). | |
3009 | */ | |
3010 | } | |
3011 | ||
3012 | ||
3013 | /* Function vect_analyze_data_refs_alignment | |
3014 | ||
3015 | Analyze the alignment of the data-references in the loop. | |
3016 | FOR NOW: Until support for misliagned accesses is in place, only if all | |
3017 | accesses are aligned can the loop be vectorized. This restriction will be | |
3018 | relaxed. */ | |
3019 | ||
3020 | static bool | |
3021 | vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) | |
3022 | { | |
3023 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
7ccf35ed DN |
3024 | /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/ |
3025 | ||
79fe1b3b DN |
3026 | unsigned int i; |
3027 | ||
3028 | if (vect_debug_details (NULL)) | |
3029 | fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n"); | |
3030 | ||
3031 | ||
3032 | /* This pass may take place at function granularity instead of at loop | |
3033 | granularity. */ | |
3034 | ||
3035 | vect_compute_data_refs_alignment (loop_vinfo); | |
3036 | ||
3037 | ||
3038 | /* This pass will use loop versioning and loop peeling in order to enhance | |
3039 | the alignment of data references in the loop. | |
3040 | FOR NOW: we assume that whatever versioning/peeling took place, the | |
3041 | original loop is to be vectorized. Any other loops that were created by | |
3042 | the transformations performed in this pass - are not supposed to be | |
3043 | vectorized. This restriction will be relaxed. */ | |
3044 | ||
3045 | vect_enhance_data_refs_alignment (loop_vinfo); | |
3046 | ||
3047 | ||
3048 | /* Finally, check that loop can be vectorized. | |
3049 | FOR NOW: Until support for misaligned accesses is in place, only if all | |
3050 | accesses are aligned can the loop be vectorized. This restriction will be | |
3051 | relaxed. */ | |
3052 | ||
3053 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
3054 | { | |
3055 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
3056 | if (!aligned_access_p (dr)) | |
3057 | { | |
3058 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
3059 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
3060 | fprintf (dump_file, "not vectorized: unaligned store."); | |
3061 | return false; | |
3062 | } | |
3063 | } | |
3064 | ||
7ccf35ed DN |
3065 | /* The vectorizer now supports misaligned loads, so we don't fail anymore |
3066 | in the presence of a misaligned read dataref. For some targets however | |
3067 | it may be preferable not to vectorize in such a case as misaligned | |
3068 | accesses are very costly. This should be considered in the future. */ | |
3069 | /* | |
79fe1b3b DN |
3070 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) |
3071 | { | |
3072 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
3073 | if (!aligned_access_p (dr)) | |
3074 | { | |
3075 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
3076 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
3077 | fprintf (dump_file, "not vectorized: unaligned load."); | |
3078 | return false; | |
3079 | } | |
3080 | } | |
7ccf35ed | 3081 | */ |
79fe1b3b DN |
3082 | |
3083 | return true; | |
3084 | } | |
3085 | ||
3086 | ||
3087 | /* Function vect_analyze_data_ref_access. | |
3088 | ||
3089 | Analyze the access pattern of the data-reference DR. For now, a data access | |
3090 | has to consecutive and aligned to be considered vectorizable. */ | |
3091 | ||
3092 | static bool | |
3093 | vect_analyze_data_ref_access (struct data_reference *dr) | |
3094 | { | |
3095 | varray_type access_fns = DR_ACCESS_FNS (dr); | |
3096 | tree access_fn; | |
3097 | tree init, step; | |
6775f1f3 | 3098 | unsigned int dimensions, i; |
79fe1b3b | 3099 | |
6775f1f3 IR |
3100 | /* Check that in case of multidimensional array ref A[i1][i2]..[iN], |
3101 | i1, i2, ..., iN-1 are loop invariant (to make sure that the memory | |
3102 | access is contiguous). */ | |
3103 | dimensions = VARRAY_ACTIVE_SIZE (access_fns); | |
3104 | ||
3105 | for (i = 1; i < dimensions; i++) /* Not including the last dimension. */ | |
79fe1b3b | 3106 | { |
6775f1f3 | 3107 | access_fn = DR_ACCESS_FN (dr, i); |
79fe1b3b | 3108 | |
6775f1f3 IR |
3109 | if (evolution_part_in_loop_num (access_fn, |
3110 | loop_containing_stmt (DR_STMT (dr))->num)) | |
3111 | { | |
3112 | /* Evolution part is not NULL in this loop (it is neither constant nor | |
3113 | invariant). */ | |
3114 | if (vect_debug_details (NULL)) | |
3115 | { | |
3116 | fprintf (dump_file, | |
3117 | "not vectorized: complicated multidimensional array access."); | |
3118 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
3119 | } | |
3120 | return false; | |
3121 | } | |
3122 | } | |
3123 | ||
3124 | access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */ | |
3125 | if (!evolution_function_is_constant_p (access_fn) | |
3126 | && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num, | |
3127 | access_fn, &init, &step, true)) | |
79fe1b3b DN |
3128 | { |
3129 | if (vect_debug_details (NULL)) | |
3130 | { | |
6775f1f3 | 3131 | fprintf (dump_file, "not vectorized: too complicated access function."); |
79fe1b3b DN |
3132 | print_generic_expr (dump_file, access_fn, TDF_SLIM); |
3133 | } | |
3134 | return false; | |
3135 | } | |
6775f1f3 | 3136 | |
79fe1b3b DN |
3137 | return true; |
3138 | } | |
3139 | ||
3140 | ||
3141 | /* Function vect_analyze_data_ref_accesses. | |
3142 | ||
3143 | Analyze the access pattern of all the data references in the loop. | |
3144 | ||
3145 | FORNOW: the only access pattern that is considered vectorizable is a | |
3146 | simple step 1 (consecutive) access. | |
3147 | ||
6775f1f3 | 3148 | FORNOW: handle only arrays and pointer accesses. */ |
79fe1b3b DN |
3149 | |
3150 | static bool | |
3151 | vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) | |
3152 | { | |
3153 | unsigned int i; | |
3154 | varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); | |
3155 | varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); | |
3156 | ||
3157 | if (vect_debug_details (NULL)) | |
3158 | fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n"); | |
3159 | ||
3160 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) | |
3161 | { | |
3162 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); | |
3163 | bool ok = vect_analyze_data_ref_access (dr); | |
3164 | if (!ok) | |
3165 | { | |
3166 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
3167 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
3168 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
3169 | return false; | |
3170 | } | |
3171 | } | |
3172 | ||
3173 | for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) | |
3174 | { | |
3175 | struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); | |
3176 | bool ok = vect_analyze_data_ref_access (dr); | |
3177 | if (!ok) | |
3178 | { | |
3179 | if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) | |
3180 | || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) | |
3181 | fprintf (dump_file, "not vectorized: complicated access pattern."); | |
3182 | return false; | |
3183 | } | |
3184 | } | |
3185 | ||
3186 | return true; | |
3187 | } | |
3188 | ||
3189 | ||
3190 | /* Function vect_analyze_pointer_ref_access. | |
3191 | ||
3192 | Input: | |
3193 | STMT - a stmt that contains a data-ref | |
3194 | MEMREF - a data-ref in STMT, which is an INDIRECT_REF. | |
3195 | ||
3196 | If the data-ref access is vectorizable, return a data_reference structure | |
3197 | that represents it (DR). Otherwise - return NULL. */ | |
3198 | ||
3199 | static struct data_reference * | |
3200 | vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read) | |
3201 | { | |
3202 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
3203 | struct loop *loop = STMT_VINFO_LOOP (stmt_info); | |
3204 | tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0)); | |
3205 | tree init, step; | |
3206 | int step_val; | |
3207 | tree reftype, innertype; | |
3208 | enum machine_mode innermode; | |
3209 | tree indx_access_fn; | |
3210 | int loopnum = loop->num; | |
3211 | struct data_reference *dr; | |
3212 | ||
3213 | if (!access_fn) | |
3214 | { | |
3215 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3216 | fprintf (dump_file, "not vectorized: complicated pointer access."); | |
3217 | return NULL; | |
3218 | } | |
3219 | ||
3220 | if (vect_debug_details (NULL)) | |
3221 | { | |
3222 | fprintf (dump_file, "Access function of ptr: "); | |
3223 | print_generic_expr (dump_file, access_fn, TDF_SLIM); | |
3224 | } | |
3225 | ||
3226 | if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false)) | |
3227 | { | |
3228 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3229 | fprintf (dump_file, "not vectorized: pointer access is not simple."); | |
3230 | return NULL; | |
3231 | } | |
3232 | ||
6775f1f3 IR |
3233 | STRIP_NOPS (init); |
3234 | ||
3235 | if (!host_integerp (step,0)) | |
79fe1b3b DN |
3236 | { |
3237 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3238 | fprintf (dump_file, | |
6775f1f3 | 3239 | "not vectorized: non constant step for pointer access."); |
79fe1b3b DN |
3240 | return NULL; |
3241 | } | |
3242 | ||
3243 | step_val = TREE_INT_CST_LOW (step); | |
3244 | ||
3245 | reftype = TREE_TYPE (TREE_OPERAND (memref, 0)); | |
3246 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
3247 | { | |
3248 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3249 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
3250 | return NULL; | |
3251 | } | |
3252 | ||
3253 | reftype = TREE_TYPE (init); | |
3254 | if (TREE_CODE (reftype) != POINTER_TYPE) | |
3255 | { | |
3256 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3257 | fprintf (dump_file, "not vectorized: unexpected pointer access form."); | |
3258 | return NULL; | |
3259 | } | |
3260 | ||
3261 | innertype = TREE_TYPE (reftype); | |
3262 | innermode = TYPE_MODE (innertype); | |
3263 | if (GET_MODE_SIZE (innermode) != step_val) | |
3264 | { | |
3265 | /* FORNOW: support only consecutive access */ | |
3266 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3267 | fprintf (dump_file, "not vectorized: non consecutive access."); | |
3268 | return NULL; | |
3269 | } | |
3270 | ||
3271 | indx_access_fn = | |
3272 | build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node); | |
3273 | if (vect_debug_details (NULL)) | |
3274 | { | |
3275 | fprintf (dump_file, "Access function of ptr indx: "); | |
3276 | print_generic_expr (dump_file, indx_access_fn, TDF_SLIM); | |
3277 | } | |
3278 | dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read); | |
3279 | return dr; | |
3280 | } | |
3281 | ||
3282 | ||
6775f1f3 IR |
3283 | /* Function vect_get_symbl_and_dr. |
3284 | ||
3285 | The function returns SYMBL - the relevant variable for | |
3286 | memory tag (for aliasing purposes). | |
3287 | Also data reference structure DR is created. | |
3288 | ||
3289 | Input: | |
3290 | MEMREF - data reference in STMT | |
3291 | IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF | |
3292 | ||
3293 | Output: | |
3294 | DR - data_reference struct for MEMREF | |
3295 | return value - the relevant variable for memory tag (for aliasing purposes). | |
3296 | ||
3297 | */ | |
3298 | ||
3299 | static tree | |
3300 | vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read, | |
3301 | loop_vec_info loop_vinfo, struct data_reference **dr) | |
3302 | { | |
3303 | tree symbl, oprnd0, oprnd1; | |
3304 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
3305 | tree offset; | |
3306 | tree array_base, base; | |
3307 | struct data_reference *new_dr; | |
3308 | bool base_aligned_p; | |
3309 | ||
3310 | *dr = NULL; | |
3311 | switch (TREE_CODE (memref)) | |
3312 | { | |
3313 | case INDIRECT_REF: | |
3314 | new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read); | |
3315 | if (! new_dr) | |
3316 | return NULL_TREE; | |
3317 | *dr = new_dr; | |
3318 | symbl = DR_BASE_NAME (new_dr); | |
3319 | STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl; | |
3320 | ||
3321 | switch (TREE_CODE (symbl)) | |
3322 | { | |
3323 | case PLUS_EXPR: | |
3324 | case MINUS_EXPR: | |
3325 | oprnd0 = TREE_OPERAND (symbl, 0); | |
3326 | oprnd1 = TREE_OPERAND (symbl, 1); | |
3327 | ||
3328 | STRIP_NOPS(oprnd1); | |
3329 | /* Only {address_base + offset} expressions are supported, | |
d4a9b3a3 | 3330 | where address_base can be POINTER_TYPE or ARRAY_TYPE and |
6775f1f3 IR |
3331 | offset can be anything but POINTER_TYPE or ARRAY_TYPE. |
3332 | TODO: swap operands if {offset + address_base}. */ | |
3333 | if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE | |
3334 | && TREE_CODE (oprnd1) != INTEGER_CST) | |
3335 | || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE) | |
3336 | return NULL_TREE; | |
3337 | ||
3338 | if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE) | |
3339 | symbl = oprnd0; | |
3340 | else | |
3341 | symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read, | |
3342 | loop_vinfo, &new_dr); | |
3343 | ||
3344 | case SSA_NAME: | |
3345 | case ADDR_EXPR: | |
3346 | /* symbl remains unchanged. */ | |
3347 | break; | |
3348 | ||
3349 | default: | |
3350 | if (vect_debug_details (NULL)) | |
3351 | { | |
3352 | fprintf (dump_file, "unhandled data ref: "); | |
3353 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
3354 | fprintf (dump_file, " (symbl "); | |
3355 | print_generic_expr (dump_file, symbl, TDF_SLIM); | |
3356 | fprintf (dump_file, ") in stmt "); | |
3357 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3358 | } | |
3359 | return NULL_TREE; | |
3360 | } | |
3361 | break; | |
3362 | ||
3363 | case ARRAY_REF: | |
3364 | offset = size_zero_node; | |
6775f1f3 IR |
3365 | |
3366 | /* Store the array base in the stmt info. | |
3367 | For one dimensional array ref a[i], the base is a, | |
3368 | for multidimensional a[i1][i2]..[iN], the base is | |
3369 | a[i1][i2]..[iN-1]. */ | |
3370 | array_base = TREE_OPERAND (memref, 0); | |
3371 | STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base; | |
3372 | ||
3373 | new_dr = analyze_array (stmt, memref, is_read); | |
3374 | *dr = new_dr; | |
3375 | ||
3376 | /* Find the relevant symbol for aliasing purposes. */ | |
3377 | base = DR_BASE_NAME (new_dr); | |
3378 | switch (TREE_CODE (base)) | |
3379 | { | |
3380 | case VAR_DECL: | |
3381 | symbl = base; | |
3382 | break; | |
3383 | ||
3384 | case INDIRECT_REF: | |
3385 | symbl = TREE_OPERAND (base, 0); | |
3386 | break; | |
3387 | ||
3388 | case COMPONENT_REF: | |
3389 | /* Could have recorded more accurate information - | |
3390 | i.e, the actual FIELD_DECL that is being referenced - | |
3391 | but later passes expect VAR_DECL as the nmt. */ | |
3392 | symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE, | |
3393 | loop_vinfo, &offset, &base_aligned_p); | |
3394 | if (symbl) | |
3395 | break; | |
3396 | /* fall through */ | |
3397 | default: | |
3398 | if (vect_debug_details (NULL)) | |
3399 | { | |
3400 | fprintf (dump_file, "unhandled struct/class field access "); | |
3401 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3402 | } | |
3403 | return NULL_TREE; | |
3404 | } | |
3405 | break; | |
3406 | ||
3407 | default: | |
3408 | if (vect_debug_details (NULL)) | |
3409 | { | |
3410 | fprintf (dump_file, "unhandled data ref: "); | |
3411 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
3412 | fprintf (dump_file, " in stmt "); | |
3413 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3414 | } | |
3415 | return NULL_TREE; | |
3416 | } | |
3417 | return symbl; | |
3418 | } | |
3419 | ||
3420 | ||
79fe1b3b DN |
3421 | /* Function vect_analyze_data_refs. |
3422 | ||
3423 | Find all the data references in the loop. | |
3424 | ||
6775f1f3 | 3425 | FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs |
79fe1b3b DN |
3426 | which base is really an array (not a pointer) and which alignment |
3427 | can be forced. This restriction will be relaxed. */ | |
3428 | ||
3429 | static bool | |
3430 | vect_analyze_data_refs (loop_vec_info loop_vinfo) | |
3431 | { | |
3432 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3433 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3434 | int nbbs = loop->num_nodes; | |
3435 | block_stmt_iterator si; | |
3436 | int j; | |
3437 | struct data_reference *dr; | |
6775f1f3 IR |
3438 | tree tag; |
3439 | tree address_base; | |
79fe1b3b DN |
3440 | |
3441 | if (vect_debug_details (NULL)) | |
3442 | fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n"); | |
3443 | ||
3444 | for (j = 0; j < nbbs; j++) | |
3445 | { | |
3446 | basic_block bb = bbs[j]; | |
3447 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
3448 | { | |
3449 | bool is_read = false; | |
3450 | tree stmt = bsi_stmt (si); | |
3451 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
3452 | v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
3453 | v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
3454 | vuse_optype vuses = STMT_VUSE_OPS (stmt); | |
3455 | varray_type *datarefs = NULL; | |
3456 | int nvuses, nv_may_defs, nv_must_defs; | |
3457 | tree memref = NULL; | |
79fe1b3b DN |
3458 | tree symbl; |
3459 | ||
3460 | /* Assumption: there exists a data-ref in stmt, if and only if | |
3461 | it has vuses/vdefs. */ | |
3462 | ||
3463 | if (!vuses && !v_may_defs && !v_must_defs) | |
3464 | continue; | |
3465 | ||
3466 | nvuses = NUM_VUSES (vuses); | |
3467 | nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); | |
3468 | nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); | |
3469 | ||
3470 | if (nvuses && (nv_may_defs || nv_must_defs)) | |
3471 | { | |
3472 | if (vect_debug_details (NULL)) | |
3473 | { | |
3474 | fprintf (dump_file, "unexpected vdefs and vuses in stmt: "); | |
3475 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3476 | } | |
3477 | return false; | |
3478 | } | |
3479 | ||
3480 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
3481 | { | |
3482 | if (vect_debug_details (NULL)) | |
3483 | { | |
3484 | fprintf (dump_file, "unexpected vops in stmt: "); | |
3485 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3486 | } | |
3487 | return false; | |
3488 | } | |
3489 | ||
3490 | if (vuses) | |
3491 | { | |
3492 | memref = TREE_OPERAND (stmt, 1); | |
3493 | datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo)); | |
3494 | is_read = true; | |
3495 | } | |
3496 | else /* vdefs */ | |
3497 | { | |
3498 | memref = TREE_OPERAND (stmt, 0); | |
3499 | datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); | |
3500 | is_read = false; | |
3501 | } | |
3502 | ||
6775f1f3 IR |
3503 | /* Analyze MEMREF. If it is of a supported form, build data_reference |
3504 | struct for it (DR) and find the relevant symbol for aliasing | |
3505 | purposes. */ | |
3506 | symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr); | |
3507 | if (!symbl) | |
79fe1b3b DN |
3508 | { |
3509 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3510 | { | |
6775f1f3 | 3511 | fprintf (dump_file, "not vectorized: unhandled data ref: "); |
79fe1b3b DN |
3512 | print_generic_expr (dump_file, stmt, TDF_SLIM); |
3513 | } | |
3514 | return false; | |
3515 | } | |
6775f1f3 | 3516 | |
79fe1b3b | 3517 | /* Find and record the memtag assigned to this data-ref. */ |
6775f1f3 | 3518 | switch (TREE_CODE (symbl)) |
79fe1b3b | 3519 | { |
6775f1f3 IR |
3520 | case VAR_DECL: |
3521 | STMT_VINFO_MEMTAG (stmt_info) = symbl; | |
3522 | break; | |
3523 | ||
3524 | case SSA_NAME: | |
79fe1b3b DN |
3525 | symbl = SSA_NAME_VAR (symbl); |
3526 | tag = get_var_ann (symbl)->type_mem_tag; | |
3527 | if (!tag) | |
3528 | { | |
3529 | tree ptr = TREE_OPERAND (memref, 0); | |
3530 | if (TREE_CODE (ptr) == SSA_NAME) | |
3531 | tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag; | |
3532 | } | |
3533 | if (!tag) | |
3534 | { | |
3535 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3536 | fprintf (dump_file, "not vectorized: no memtag for ref."); | |
3537 | return false; | |
3538 | } | |
3539 | STMT_VINFO_MEMTAG (stmt_info) = tag; | |
6775f1f3 IR |
3540 | break; |
3541 | ||
3542 | case ADDR_EXPR: | |
3543 | address_base = TREE_OPERAND (symbl, 0); | |
3544 | ||
3545 | switch (TREE_CODE (address_base)) | |
3546 | { | |
3547 | case ARRAY_REF: | |
3548 | dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr)); | |
3549 | STMT_VINFO_MEMTAG (stmt_info) = DR_BASE_NAME (dr); | |
3550 | break; | |
3551 | ||
3552 | case VAR_DECL: | |
3553 | STMT_VINFO_MEMTAG (stmt_info) = address_base; | |
3554 | break; | |
3555 | ||
3556 | default: | |
3557 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3558 | { | |
3559 | fprintf (dump_file, "not vectorized: unhandled address expression: "); | |
3560 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3561 | } | |
3562 | return false; | |
3563 | } | |
3564 | break; | |
3565 | ||
3566 | default: | |
79fe1b3b DN |
3567 | if (vect_debug_stats (loop) || vect_debug_details (loop)) |
3568 | { | |
3569 | fprintf (dump_file, "not vectorized: unsupported data-ref: "); | |
3570 | print_generic_expr (dump_file, memref, TDF_SLIM); | |
3571 | } | |
3572 | return false; | |
6775f1f3 | 3573 | } |
79fe1b3b DN |
3574 | |
3575 | VARRAY_PUSH_GENERIC_PTR (*datarefs, dr); | |
3576 | STMT_VINFO_DATA_REF (stmt_info) = dr; | |
3577 | } | |
3578 | } | |
3579 | ||
3580 | return true; | |
3581 | } | |
3582 | ||
3583 | ||
8c27b7d4 | 3584 | /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ |
79fe1b3b DN |
3585 | |
3586 | /* Function vect_mark_relevant. | |
3587 | ||
3588 | Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ | |
3589 | ||
3590 | static void | |
3591 | vect_mark_relevant (varray_type worklist, tree stmt) | |
3592 | { | |
3593 | stmt_vec_info stmt_info; | |
3594 | ||
3595 | if (vect_debug_details (NULL)) | |
3596 | fprintf (dump_file, "mark relevant."); | |
3597 | ||
3598 | if (TREE_CODE (stmt) == PHI_NODE) | |
3599 | { | |
3600 | VARRAY_PUSH_TREE (worklist, stmt); | |
3601 | return; | |
3602 | } | |
3603 | ||
3604 | stmt_info = vinfo_for_stmt (stmt); | |
3605 | ||
3606 | if (!stmt_info) | |
3607 | { | |
3608 | if (vect_debug_details (NULL)) | |
3609 | { | |
3610 | fprintf (dump_file, "mark relevant: no stmt info!!."); | |
3611 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3612 | } | |
3613 | return; | |
3614 | } | |
3615 | ||
3616 | if (STMT_VINFO_RELEVANT_P (stmt_info)) | |
3617 | { | |
3618 | if (vect_debug_details (NULL)) | |
3619 | fprintf (dump_file, "already marked relevant."); | |
3620 | return; | |
3621 | } | |
3622 | ||
3623 | STMT_VINFO_RELEVANT_P (stmt_info) = 1; | |
3624 | VARRAY_PUSH_TREE (worklist, stmt); | |
3625 | } | |
3626 | ||
3627 | ||
3628 | /* Function vect_stmt_relevant_p. | |
3629 | ||
3630 | Return true if STMT in loop that is represented by LOOP_VINFO is | |
3631 | "relevant for vectorization". | |
3632 | ||
3633 | A stmt is considered "relevant for vectorization" if: | |
3634 | - it has uses outside the loop. | |
3635 | - it has vdefs (it alters memory). | |
3636 | - control stmts in the loop (except for the exit condition). | |
3637 | ||
3638 | CHECKME: what other side effects would the vectorizer allow? */ | |
3639 | ||
3640 | static bool | |
3641 | vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo) | |
3642 | { | |
3643 | v_may_def_optype v_may_defs; | |
3644 | v_must_def_optype v_must_defs; | |
3645 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3646 | int i; | |
3647 | dataflow_t df; | |
3648 | int num_uses; | |
3649 | ||
3650 | /* cond stmt other than loop exit cond. */ | |
3651 | if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) | |
3652 | return true; | |
3653 | ||
3654 | /* changing memory. */ | |
3655 | v_may_defs = STMT_V_MAY_DEF_OPS (stmt); | |
3656 | v_must_defs = STMT_V_MUST_DEF_OPS (stmt); | |
3657 | if (v_may_defs || v_must_defs) | |
3658 | { | |
3659 | if (vect_debug_details (NULL)) | |
3660 | fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs."); | |
3661 | return true; | |
3662 | } | |
3663 | ||
3664 | /* uses outside the loop. */ | |
3665 | df = get_immediate_uses (stmt); | |
3666 | num_uses = num_immediate_uses (df); | |
3667 | for (i = 0; i < num_uses; i++) | |
3668 | { | |
3669 | tree use = immediate_use (df, i); | |
3670 | basic_block bb = bb_for_stmt (use); | |
3671 | if (!flow_bb_inside_loop_p (loop, bb)) | |
3672 | { | |
3673 | if (vect_debug_details (NULL)) | |
3674 | fprintf (dump_file, "vec_stmt_relevant_p: used out of loop."); | |
3675 | return true; | |
3676 | } | |
3677 | } | |
3678 | ||
3679 | return false; | |
3680 | } | |
3681 | ||
3682 | ||
3683 | /* Function vect_mark_stmts_to_be_vectorized. | |
3684 | ||
3685 | Not all stmts in the loop need to be vectorized. For example: | |
3686 | ||
3687 | for i... | |
3688 | for j... | |
3689 | 1. T0 = i + j | |
3690 | 2. T1 = a[T0] | |
3691 | ||
3692 | 3. j = j + 1 | |
3693 | ||
3694 | Stmt 1 and 3 do not need to be vectorized, because loop control and | |
3695 | addressing of vectorized data-refs are handled differently. | |
3696 | ||
3697 | This pass detects such stmts. */ | |
3698 | ||
3699 | static bool | |
3700 | vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) | |
3701 | { | |
3702 | varray_type worklist; | |
3703 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3704 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
3705 | unsigned int nbbs = loop->num_nodes; | |
3706 | block_stmt_iterator si; | |
3707 | tree stmt; | |
3708 | stmt_ann_t ann; | |
3709 | unsigned int i; | |
3710 | int j; | |
3711 | use_optype use_ops; | |
3712 | stmt_vec_info stmt_info; | |
3713 | ||
3714 | if (vect_debug_details (NULL)) | |
3715 | fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n"); | |
3716 | ||
3717 | VARRAY_TREE_INIT (worklist, 64, "work list"); | |
3718 | ||
3719 | /* 1. Init worklist. */ | |
3720 | ||
3721 | for (i = 0; i < nbbs; i++) | |
3722 | { | |
3723 | basic_block bb = bbs[i]; | |
3724 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
3725 | { | |
3726 | stmt = bsi_stmt (si); | |
3727 | ||
3728 | if (vect_debug_details (NULL)) | |
3729 | { | |
3730 | fprintf (dump_file, "init: stmt relevant? "); | |
3731 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3732 | } | |
3733 | ||
3734 | stmt_info = vinfo_for_stmt (stmt); | |
3735 | STMT_VINFO_RELEVANT_P (stmt_info) = 0; | |
3736 | ||
3737 | if (vect_stmt_relevant_p (stmt, loop_vinfo)) | |
3738 | vect_mark_relevant (worklist, stmt); | |
3739 | } | |
3740 | } | |
3741 | ||
3742 | ||
3743 | /* 2. Process_worklist */ | |
3744 | ||
3745 | while (VARRAY_ACTIVE_SIZE (worklist) > 0) | |
3746 | { | |
3747 | stmt = VARRAY_TOP_TREE (worklist); | |
3748 | VARRAY_POP (worklist); | |
3749 | ||
3750 | if (vect_debug_details (NULL)) | |
3751 | { | |
3752 | fprintf (dump_file, "worklist: examine stmt: "); | |
3753 | print_generic_expr (dump_file, stmt, TDF_SLIM); | |
3754 | } | |
3755 | ||
3756 | /* Examine the USES in this statement. Mark all the statements which | |
3757 | feed this statement's uses as "relevant", unless the USE is used as | |
3758 | an array index. */ | |
3759 | ||
3760 | if (TREE_CODE (stmt) == PHI_NODE) | |
3761 | { | |
3762 | /* follow the def-use chain inside the loop. */ | |
3763 | for (j = 0; j < PHI_NUM_ARGS (stmt); j++) | |
3764 | { | |
3765 | tree arg = PHI_ARG_DEF (stmt, j); | |
3766 | tree def_stmt = NULL_TREE; | |
3767 | basic_block bb; | |
3768 | if (!vect_is_simple_use (arg, loop, &def_stmt)) | |
3769 | { | |
3770 | if (vect_debug_details (NULL)) | |
3771 | fprintf (dump_file, "worklist: unsupported use."); | |
3772 | varray_clear (worklist); | |
3773 | return false; | |
3774 | } | |
3775 | if (!def_stmt) | |
3776 | continue; | |
3777 | ||
3778 | if (vect_debug_details (NULL)) | |
3779 | { | |
3780 | fprintf (dump_file, "worklist: def_stmt: "); | |
3781 | print_generic_expr (dump_file, def_stmt, TDF_SLIM); | |
3782 | } | |
3783 | ||
3784 | bb = bb_for_stmt (def_stmt); | |
3785 | if (flow_bb_inside_loop_p (loop, bb)) | |
3786 | vect_mark_relevant (worklist, def_stmt); | |
3787 | } | |
3788 | } | |
3789 | ||
3790 | ann = stmt_ann (stmt); | |
3791 | use_ops = USE_OPS (ann); | |
3792 | ||
3793 | for (i = 0; i < NUM_USES (use_ops); i++) | |
3794 | { | |
3795 | tree use = USE_OP (use_ops, i); | |
3796 | ||
3797 | /* We are only interested in uses that need to be vectorized. Uses | |
3798 | that are used for address computation are not considered relevant. | |
3799 | */ | |
3800 | if (exist_non_indexing_operands_for_use_p (use, stmt)) | |
3801 | { | |
3802 | tree def_stmt = NULL_TREE; | |
3803 | basic_block bb; | |
3804 | if (!vect_is_simple_use (use, loop, &def_stmt)) | |
3805 | { | |
3806 | if (vect_debug_details (NULL)) | |
3807 | fprintf (dump_file, "worklist: unsupported use."); | |
3808 | varray_clear (worklist); | |
3809 | return false; | |
3810 | } | |
3811 | ||
3812 | if (!def_stmt) | |
3813 | continue; | |
3814 | ||
3815 | if (vect_debug_details (NULL)) | |
3816 | { | |
3817 | fprintf (dump_file, "worklist: examine use %d: ", i); | |
3818 | print_generic_expr (dump_file, use, TDF_SLIM); | |
3819 | } | |
3820 | ||
3821 | bb = bb_for_stmt (def_stmt); | |
3822 | if (flow_bb_inside_loop_p (loop, bb)) | |
3823 | vect_mark_relevant (worklist, def_stmt); | |
3824 | } | |
3825 | } | |
3826 | } /* while worklist */ | |
3827 | ||
3828 | varray_clear (worklist); | |
3829 | return true; | |
3830 | } | |
3831 | ||
3832 | ||
3833 | /* Function vect_get_loop_niters. | |
3834 | ||
3835 | Determine how many iterations the loop is executed. */ | |
3836 | ||
3837 | static tree | |
3838 | vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations) | |
3839 | { | |
3840 | tree niters; | |
3841 | ||
3842 | if (vect_debug_details (NULL)) | |
3843 | fprintf (dump_file, "\n<<get_loop_niters>>\n"); | |
3844 | ||
3845 | niters = number_of_iterations_in_loop (loop); | |
3846 | ||
3847 | if (niters != NULL_TREE | |
3848 | && niters != chrec_dont_know | |
3849 | && host_integerp (niters,0)) | |
3850 | { | |
3851 | *number_of_iterations = TREE_INT_CST_LOW (niters); | |
3852 | ||
3853 | if (vect_debug_details (NULL)) | |
3854 | fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC, | |
3855 | *number_of_iterations); | |
3856 | } | |
3857 | ||
3858 | return get_loop_exit_condition (loop); | |
3859 | } | |
3860 | ||
3861 | ||
3862 | /* Function vect_analyze_loop_form. | |
3863 | ||
3864 | Verify the following restrictions (some may be relaxed in the future): | |
3865 | - it's an inner-most loop | |
3866 | - number of BBs = 2 (which are the loop header and the latch) | |
3867 | - the loop has a pre-header | |
3868 | - the loop has a single entry and exit | |
3869 | - the loop exit condition is simple enough, and the number of iterations | |
3870 | can be analyzed (a countable loop). */ | |
3871 | ||
3872 | static loop_vec_info | |
3873 | vect_analyze_loop_form (struct loop *loop) | |
3874 | { | |
3875 | loop_vec_info loop_vinfo; | |
3876 | tree loop_cond; | |
3877 | HOST_WIDE_INT number_of_iterations = -1; | |
3878 | ||
3879 | if (vect_debug_details (loop)) | |
3880 | fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n"); | |
3881 | ||
82b85a85 ZD |
3882 | if (loop->inner |
3883 | || !loop->single_exit | |
3884 | || loop->num_nodes != 2) | |
79fe1b3b DN |
3885 | { |
3886 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3887 | { | |
3888 | fprintf (dump_file, "not vectorized: bad loop form. "); | |
82b85a85 | 3889 | if (loop->inner) |
79fe1b3b | 3890 | fprintf (dump_file, "nested loop."); |
82b85a85 ZD |
3891 | else if (!loop->single_exit) |
3892 | fprintf (dump_file, "multiple exits."); | |
3893 | else if (loop->num_nodes != 2) | |
79fe1b3b | 3894 | fprintf (dump_file, "too many BBs in loop."); |
79fe1b3b DN |
3895 | } |
3896 | ||
3897 | return NULL; | |
3898 | } | |
3899 | ||
3900 | /* We assume that the loop exit condition is at the end of the loop. i.e, | |
3901 | that the loop is represented as a do-while (with a proper if-guard | |
3902 | before the loop if needed), where the loop header contains all the | |
3903 | executable statements, and the latch is empty. */ | |
3904 | if (!empty_block_p (loop->latch)) | |
3905 | { | |
3906 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3907 | fprintf (dump_file, "not vectorized: unexpectd loop form."); | |
3908 | return NULL; | |
3909 | } | |
3910 | ||
3911 | if (empty_block_p (loop->header)) | |
3912 | { | |
3913 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3914 | fprintf (dump_file, "not vectorized: empty loop."); | |
3915 | return NULL; | |
3916 | } | |
3917 | ||
3918 | loop_cond = vect_get_loop_niters (loop, &number_of_iterations); | |
3919 | if (!loop_cond) | |
3920 | { | |
3921 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3922 | fprintf (dump_file, "not vectorized: complicated exit condition."); | |
3923 | return NULL; | |
3924 | } | |
3925 | ||
3926 | if (number_of_iterations < 0) | |
3927 | { | |
3928 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3929 | fprintf (dump_file, "not vectorized: unknown loop bound."); | |
3930 | return NULL; | |
3931 | } | |
3932 | ||
3933 | if (number_of_iterations == 0) /* CHECKME: can this happen? */ | |
3934 | { | |
3935 | if (vect_debug_stats (loop) || vect_debug_details (loop)) | |
3936 | fprintf (dump_file, "not vectorized: number of iterations = 0."); | |
3937 | return NULL; | |
3938 | } | |
3939 | ||
3940 | loop_vinfo = new_loop_vec_info (loop); | |
3941 | LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; | |
3942 | LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; | |
3943 | ||
3944 | return loop_vinfo; | |
3945 | } | |
3946 | ||
3947 | ||
3948 | /* Function vect_analyze_loop. | |
3949 | ||
3950 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
3951 | for it. The different analyses will record information in the | |
3952 | loop_vec_info struct. */ | |
3953 | ||
3954 | static loop_vec_info | |
3955 | vect_analyze_loop (struct loop *loop) | |
3956 | { | |
3957 | bool ok; | |
3958 | loop_vec_info loop_vinfo; | |
3959 | ||
3960 | if (vect_debug_details (NULL)) | |
3961 | fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n"); | |
3962 | ||
3963 | /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ | |
3964 | ||
3965 | loop_vinfo = vect_analyze_loop_form (loop); | |
3966 | if (!loop_vinfo) | |
3967 | { | |
3968 | if (vect_debug_details (loop)) | |
3969 | fprintf (dump_file, "bad loop form."); | |
3970 | return NULL; | |
3971 | } | |
3972 | ||
3973 | /* Find all data references in the loop (which correspond to vdefs/vuses) | |
3974 | and analyze their evolution in the loop. | |
3975 | ||
6775f1f3 | 3976 | FORNOW: Handle only simple, array references, which |
79fe1b3b DN |
3977 | alignment can be forced, and aligned pointer-references. */ |
3978 | ||
3979 | ok = vect_analyze_data_refs (loop_vinfo); | |
3980 | if (!ok) | |
3981 | { | |
3982 | if (vect_debug_details (loop)) | |
3983 | fprintf (dump_file, "bad data references."); | |
3984 | destroy_loop_vec_info (loop_vinfo); | |
3985 | return NULL; | |
3986 | } | |
3987 | ||
79fe1b3b DN |
3988 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ |
3989 | ||
3990 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); | |
3991 | if (!ok) | |
3992 | { | |
3993 | if (vect_debug_details (loop)) | |
3994 | fprintf (dump_file, "unexpected pattern."); | |
3995 | if (vect_debug_details (loop)) | |
3996 | fprintf (dump_file, "not vectorized: unexpected pattern."); | |
3997 | destroy_loop_vec_info (loop_vinfo); | |
3998 | return NULL; | |
3999 | } | |
4000 | ||
79fe1b3b DN |
4001 | /* Check that all cross-iteration scalar data-flow cycles are OK. |
4002 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | |
4003 | ||
4004 | ok = vect_analyze_scalar_cycles (loop_vinfo); | |
4005 | if (!ok) | |
4006 | { | |
4007 | if (vect_debug_details (loop)) | |
4008 | fprintf (dump_file, "bad scalar cycle."); | |
4009 | destroy_loop_vec_info (loop_vinfo); | |
4010 | return NULL; | |
4011 | } | |
4012 | ||
79fe1b3b DN |
4013 | /* Analyze data dependences between the data-refs in the loop. |
4014 | FORNOW: fail at the first data dependence that we encounter. */ | |
4015 | ||
4016 | ok = vect_analyze_data_ref_dependences (loop_vinfo); | |
4017 | if (!ok) | |
4018 | { | |
4019 | if (vect_debug_details (loop)) | |
4020 | fprintf (dump_file, "bad data dependence."); | |
4021 | destroy_loop_vec_info (loop_vinfo); | |
4022 | return NULL; | |
4023 | } | |
4024 | ||
79fe1b3b DN |
4025 | /* Analyze the access patterns of the data-refs in the loop (consecutive, |
4026 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | |
4027 | ||
4028 | ok = vect_analyze_data_ref_accesses (loop_vinfo); | |
4029 | if (!ok) | |
4030 | { | |
4031 | if (vect_debug_details (loop)) | |
4032 | fprintf (dump_file, "bad data access."); | |
4033 | destroy_loop_vec_info (loop_vinfo); | |
4034 | return NULL; | |
4035 | } | |
4036 | ||
79fe1b3b DN |
4037 | /* Analyze the alignment of the data-refs in the loop. |
4038 | FORNOW: Only aligned accesses are handled. */ | |
4039 | ||
4040 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | |
4041 | if (!ok) | |
4042 | { | |
4043 | if (vect_debug_details (loop)) | |
4044 | fprintf (dump_file, "bad data alignment."); | |
4045 | destroy_loop_vec_info (loop_vinfo); | |
4046 | return NULL; | |
4047 | } | |
4048 | ||
79fe1b3b DN |
4049 | /* Scan all the operations in the loop and make sure they are |
4050 | vectorizable. */ | |
4051 | ||
4052 | ok = vect_analyze_operations (loop_vinfo); | |
4053 | if (!ok) | |
4054 | { | |
4055 | if (vect_debug_details (loop)) | |
4056 | fprintf (dump_file, "bad operation or unsupported loop bound."); | |
4057 | destroy_loop_vec_info (loop_vinfo); | |
4058 | return NULL; | |
4059 | } | |
4060 | ||
4061 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; | |
4062 | ||
4063 | return loop_vinfo; | |
4064 | } | |
4065 | ||
4066 | ||
4067 | /* Function need_imm_uses_for. | |
4068 | ||
4069 | Return whether we ought to include information for 'var' | |
4070 | when calculating immediate uses. For this pass we only want use | |
4071 | information for non-virtual variables. */ | |
4072 | ||
4073 | static bool | |
4074 | need_imm_uses_for (tree var) | |
4075 | { | |
4076 | return is_gimple_reg (var); | |
4077 | } | |
4078 | ||
4079 | ||
4080 | /* Function vectorize_loops. | |
4081 | ||
4082 | Entry Point to loop vectorization phase. */ | |
4083 | ||
4084 | void | |
4085 | vectorize_loops (struct loops *loops) | |
4086 | { | |
4087 | unsigned int i, loops_num; | |
4088 | unsigned int num_vectorized_loops = 0; | |
4089 | ||
4090 | /* Does the target support SIMD? */ | |
4091 | /* FORNOW: until more sophisticated machine modelling is in place. */ | |
4092 | if (!UNITS_PER_SIMD_WORD) | |
4093 | { | |
4094 | if (vect_debug_details (NULL)) | |
4095 | fprintf (dump_file, "vectorizer: target vector size is not defined."); | |
4096 | return; | |
4097 | } | |
4098 | ||
4099 | compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for); | |
4100 | ||
4101 | /* ----------- Analyze loops. ----------- */ | |
4102 | ||
4103 | /* If some loop was duplicated, it gets bigger number | |
4104 | than all previously defined loops. This fact allows us to run | |
4105 | only over initial loops skipping newly generated ones. */ | |
4106 | loops_num = loops->num; | |
4107 | for (i = 1; i < loops_num; i++) | |
4108 | { | |
4109 | loop_vec_info loop_vinfo; | |
4110 | struct loop *loop = loops->parray[i]; | |
4111 | ||
4112 | if (!loop) | |
4113 | continue; | |
4114 | ||
79fe1b3b DN |
4115 | loop_vinfo = vect_analyze_loop (loop); |
4116 | loop->aux = loop_vinfo; | |
4117 | ||
4118 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
4119 | continue; | |
4120 | ||
4121 | vect_transform_loop (loop_vinfo, loops); | |
4122 | num_vectorized_loops++; | |
4123 | } | |
4124 | ||
4125 | if (vect_debug_stats (NULL) || vect_debug_details (NULL)) | |
4126 | fprintf (dump_file, "\nvectorized %u loops in function.\n", | |
4127 | num_vectorized_loops); | |
4128 | ||
4129 | /* ----------- Finalize. ----------- */ | |
4130 | ||
4131 | free_df (); | |
4132 | for (i = 1; i < loops_num; i++) | |
4133 | { | |
4134 | struct loop *loop = loops->parray[i]; | |
6775f1f3 IR |
4135 | loop_vec_info loop_vinfo; |
4136 | ||
79fe1b3b | 4137 | if (!loop) |
6775f1f3 IR |
4138 | continue; |
4139 | loop_vinfo = loop->aux; | |
79fe1b3b DN |
4140 | destroy_loop_vec_info (loop_vinfo); |
4141 | loop->aux = NULL; | |
4142 | } | |
4143 | ||
79fe1b3b DN |
4144 | rewrite_into_ssa (false); |
4145 | if (bitmap_first_set_bit (vars_to_rename) >= 0) | |
4146 | { | |
4147 | /* The rewrite of ssa names may cause violation of loop closed ssa | |
4148 | form invariants. TODO -- avoid these rewrites completely. | |
4149 | Information in virtual phi nodes is sufficient for it. */ | |
4150 | rewrite_into_loop_closed_ssa (); | |
4151 | } | |
4152 | bitmap_clear (vars_to_rename); | |
4153 | } |