]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vectorizer.c
backport: basic-block.h: Include vec.h, errors.h.
[thirdparty/gcc.git] / gcc / tree-vectorizer.c
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121 */
122
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "tree-vectorizer.h"
146 #include "tree-pass.h"
147
148 /* Main analysis functions. */
149 static loop_vec_info vect_analyze_loop (struct loop *);
150 static loop_vec_info vect_analyze_loop_form (struct loop *);
151 static bool vect_analyze_data_refs (loop_vec_info);
152 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
153 static bool vect_analyze_scalar_cycles (loop_vec_info);
154 static bool vect_analyze_data_ref_accesses (loop_vec_info);
155 static bool vect_analyze_data_refs_alignment (loop_vec_info);
156 static void vect_compute_data_refs_alignment (loop_vec_info);
157 static bool vect_analyze_operations (loop_vec_info);
158
159 /* Main code transformation functions. */
160 static void vect_transform_loop (loop_vec_info, struct loops *);
161 static void vect_transform_loop_bound (loop_vec_info);
162 static bool vect_transform_stmt (tree, block_stmt_iterator *);
163 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
164 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
167 static void vect_align_data_ref (tree);
168 static void vect_enhance_data_refs_alignment (loop_vec_info);
169
170 /* Utility functions for the analyses. */
171 static bool vect_is_simple_use (tree , struct loop *, tree *);
172 static bool exist_non_indexing_operands_for_use_p (tree, tree);
173 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
174 static void vect_mark_relevant (varray_type, tree);
175 static bool vect_stmt_relevant_p (tree, loop_vec_info);
176 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
177 static bool vect_compute_data_ref_alignment
178 (struct data_reference *, loop_vec_info);
179 static bool vect_analyze_data_ref_access (struct data_reference *);
180 static bool vect_get_first_index (tree, tree *);
181 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
182 static struct data_reference * vect_analyze_pointer_ref_access
183 (tree, tree, bool);
184 static tree vect_get_base_and_bit_offset
185 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
186 static struct data_reference * vect_analyze_pointer_ref_access
187 (tree, tree, bool);
188 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
189 static tree vect_compute_array_ref_alignment
190 (struct data_reference *, loop_vec_info, tree, tree *);
191 static tree vect_get_ptr_offset (tree, tree, tree *);
192 static tree vect_get_symbl_and_dr
193 (tree, tree, bool, loop_vec_info, struct data_reference **);
194
195 /* Utility functions for the code transformation. */
196 static tree vect_create_destination_var (tree, tree);
197 static tree vect_create_data_ref_ptr
198 (tree, block_stmt_iterator *, tree, tree *, bool);
199 static tree vect_create_index_for_vector_ref
200 (struct loop *, block_stmt_iterator *);
201 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
202 static tree get_vectype_for_scalar_type (tree);
203 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
204 static tree vect_get_vec_def_for_operand (tree, tree);
205 static tree vect_init_vector (tree, tree);
206 static void vect_finish_stmt_generation
207 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
208
209 /* Utilities for creation and deletion of vec_info structs. */
210 loop_vec_info new_loop_vec_info (struct loop *loop);
211 void destroy_loop_vec_info (loop_vec_info);
212 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
213
214 static bool vect_debug_stats (struct loop *loop);
215 static bool vect_debug_details (struct loop *loop);
216
217
218 /* Function new_stmt_vec_info.
219
220 Create and initialize a new stmt_vec_info struct for STMT. */
221
222 stmt_vec_info
223 new_stmt_vec_info (tree stmt, struct loop *loop)
224 {
225 stmt_vec_info res;
226 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
227
228 STMT_VINFO_TYPE (res) = undef_vec_info_type;
229 STMT_VINFO_STMT (res) = stmt;
230 STMT_VINFO_LOOP (res) = loop;
231 STMT_VINFO_RELEVANT_P (res) = 0;
232 STMT_VINFO_VECTYPE (res) = NULL;
233 STMT_VINFO_VEC_STMT (res) = NULL;
234 STMT_VINFO_DATA_REF (res) = NULL;
235 STMT_VINFO_MEMTAG (res) = NULL;
236 STMT_VINFO_VECT_DR_BASE (res) = NULL;
237
238 return res;
239 }
240
241
242 /* Function new_loop_vec_info.
243
244 Create and initialize a new loop_vec_info struct for LOOP, as well as
245 stmt_vec_info structs for all the stmts in LOOP. */
246
247 loop_vec_info
248 new_loop_vec_info (struct loop *loop)
249 {
250 loop_vec_info res;
251 basic_block *bbs;
252 block_stmt_iterator si;
253 unsigned int i;
254
255 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
256
257 bbs = get_loop_body (loop);
258
259 /* Create stmt_info for all stmts in the loop. */
260 for (i = 0; i < loop->num_nodes; i++)
261 {
262 basic_block bb = bbs[i];
263 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
264 {
265 tree stmt = bsi_stmt (si);
266 stmt_ann_t ann;
267
268 get_stmt_operands (stmt);
269 ann = stmt_ann (stmt);
270 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
271 }
272 }
273
274 LOOP_VINFO_LOOP (res) = loop;
275 LOOP_VINFO_BBS (res) = bbs;
276 LOOP_VINFO_EXIT_COND (res) = NULL;
277 LOOP_VINFO_NITERS (res) = -1;
278 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
279 LOOP_VINFO_VECT_FACTOR (res) = 0;
280 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
281 "loop_write_datarefs");
282 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
283 "loop_read_datarefs");
284 return res;
285 }
286
287
288 /* Function destroy_loop_vec_info.
289
290 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
291 stmts in the loop. */
292
293 void
294 destroy_loop_vec_info (loop_vec_info loop_vinfo)
295 {
296 struct loop *loop;
297 basic_block *bbs;
298 int nbbs;
299 block_stmt_iterator si;
300 int j;
301
302 if (!loop_vinfo)
303 return;
304
305 loop = LOOP_VINFO_LOOP (loop_vinfo);
306
307 bbs = LOOP_VINFO_BBS (loop_vinfo);
308 nbbs = loop->num_nodes;
309
310 for (j = 0; j < nbbs; j++)
311 {
312 basic_block bb = bbs[j];
313 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
314 {
315 tree stmt = bsi_stmt (si);
316 stmt_ann_t ann = stmt_ann (stmt);
317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
318 free (stmt_info);
319 set_stmt_info (ann, NULL);
320 }
321 }
322
323 free (LOOP_VINFO_BBS (loop_vinfo));
324 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
325 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
326
327 free (loop_vinfo);
328 }
329
330
331 /* Function debug_loop_stats.
332
333 For vectorization statistics dumps. */
334
335 static bool
336 vect_debug_stats (struct loop *loop)
337 {
338 basic_block bb;
339 block_stmt_iterator si;
340 tree node = NULL_TREE;
341
342 if (!dump_file || !(dump_flags & TDF_STATS))
343 return false;
344
345 if (!loop)
346 {
347 fprintf (dump_file, "\n");
348 return true;
349 }
350
351 if (!loop->header)
352 return false;
353
354 bb = loop->header;
355
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
357 {
358 node = bsi_stmt (si);
359 if (node && EXPR_P (node) && EXPR_LOCUS (node))
360 break;
361 }
362
363 if (node && EXPR_P (node) && EXPR_LOCUS (node)
364 && EXPR_FILENAME (node) && EXPR_LINENO (node))
365 {
366 fprintf (dump_file, "\nloop at %s:%d: ",
367 EXPR_FILENAME (node), EXPR_LINENO (node));
368 return true;
369 }
370
371 return false;
372 }
373
374
375 /* Function debug_loop_details.
376
377 For vectorization debug dumps. */
378
379 static bool
380 vect_debug_details (struct loop *loop)
381 {
382 basic_block bb;
383 block_stmt_iterator si;
384 tree node = NULL_TREE;
385
386 if (!dump_file || !(dump_flags & TDF_DETAILS))
387 return false;
388
389 if (!loop)
390 {
391 fprintf (dump_file, "\n");
392 return true;
393 }
394
395 if (!loop->header)
396 return false;
397
398 bb = loop->header;
399
400 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
401 {
402 node = bsi_stmt (si);
403 if (node && EXPR_P (node) && EXPR_LOCUS (node))
404 break;
405 }
406
407 if (node && EXPR_P (node) && EXPR_LOCUS (node)
408 && EXPR_FILENAME (node) && EXPR_LINENO (node))
409 {
410 fprintf (dump_file, "\nloop at %s:%d: ",
411 EXPR_FILENAME (node), EXPR_LINENO (node));
412 return true;
413 }
414
415 return false;
416 }
417
418
419 /* Function vect_get_ptr_offset
420
421 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
422
423 static tree
424 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
425 tree vectype ATTRIBUTE_UNUSED,
426 tree *offset ATTRIBUTE_UNUSED)
427 {
428 /* TODO: Use alignment information. */
429 return NULL_TREE;
430 }
431
432
433 /* Function vect_get_base_and_bit_offset
434
435 Return the BASE of the data reference EXPR.
436 If VECTYPE is given, also compute the OFFSET from BASE in bits.
437 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
438 bits of 'a.b[i] + 4B' from a.
439
440 Input:
441 EXPR - the memory reference that is being analyzed
442 DR - the data_reference struct of the _original_ memory reference
443 (Note: DR_REF (DR) is not necessarily EXPR)
444 VECTYPE - the type that defines the alignment (i.e, we compute
445 alignment relative to TYPE_ALIGN(VECTYPE))
446
447 Output:
448 BASE (returned value) - the base of the data reference EXPR.
449 E.g, if EXPR is a.b[k].c[i][j] the returned
450 base is a.
451 OFFSET - offset of EXPR from BASE in bits
452 BASE_ALIGNED_P - indicates if BASE is aligned
453
454 If something unexpected is encountered (an unsupported form of data-ref),
455 or if VECTYPE is given but OFFSET cannot be determined:
456 then NULL_TREE is returned. */
457
458 static tree
459 vect_get_base_and_bit_offset (struct data_reference *dr,
460 tree expr,
461 tree vectype,
462 loop_vec_info loop_vinfo,
463 tree *offset,
464 bool *base_aligned_p)
465 {
466 tree this_offset = size_zero_node;
467 tree base = NULL_TREE;
468 tree next_ref;
469 tree oprnd0, oprnd1;
470 struct data_reference *array_dr;
471 enum tree_code code = TREE_CODE (expr);
472
473 *base_aligned_p = false;
474
475 switch (code)
476 {
477 /* These cases end the recursion: */
478 case VAR_DECL:
479 *offset = size_zero_node;
480 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
481 *base_aligned_p = true;
482 return expr;
483
484 case SSA_NAME:
485 if (!vectype)
486 return expr;
487
488 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
489 return NULL_TREE;
490
491 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
492 {
493 base = vect_get_ptr_offset (expr, vectype, offset);
494 if (base)
495 *base_aligned_p = true;
496 }
497 else
498 {
499 *base_aligned_p = true;
500 *offset = size_zero_node;
501 base = expr;
502 }
503 return base;
504
505 case INTEGER_CST:
506 *offset = int_const_binop (MULT_EXPR, expr,
507 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
508 return expr;
509
510 /* These cases continue the recursion: */
511 case COMPONENT_REF:
512 oprnd0 = TREE_OPERAND (expr, 0);
513 oprnd1 = TREE_OPERAND (expr, 1);
514
515 this_offset = bit_position (oprnd1);
516 if (vectype && !host_integerp (this_offset, 1))
517 return NULL_TREE;
518 next_ref = oprnd0;
519 break;
520
521 case ADDR_EXPR:
522 oprnd0 = TREE_OPERAND (expr, 0);
523 next_ref = oprnd0;
524 break;
525
526 case INDIRECT_REF:
527 oprnd0 = TREE_OPERAND (expr, 0);
528 next_ref = oprnd0;
529 break;
530
531 case ARRAY_REF:
532 if (DR_REF (dr) != expr)
533 /* Build array data_reference struct if the existing DR_REF
534 doesn't match EXPR. This happens, for example, when the
535 EXPR is *T and T is initialized to &arr[indx]. The DR struct
536 contains information on the access of T, not of arr. In order
537 to continue the analysis, we create a new DR struct that
538 describes the access of arr.
539 */
540 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
541 else
542 array_dr = dr;
543
544 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
545 vectype, &this_offset);
546 if (!next_ref)
547 return NULL_TREE;
548
549 if (vectype &&
550 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
551 {
552 *offset = this_offset;
553 *base_aligned_p = true;
554 return next_ref;
555 }
556 break;
557
558 case PLUS_EXPR:
559 case MINUS_EXPR:
560 /* In case we have a PLUS_EXPR of the form
561 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
562 This is verified in vect_get_symbl_and_dr. */
563 oprnd0 = TREE_OPERAND (expr, 0);
564 oprnd1 = TREE_OPERAND (expr, 1);
565
566 base = vect_get_base_and_bit_offset
567 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
568 if (vectype && !base)
569 return NULL_TREE;
570
571 next_ref = oprnd0;
572 break;
573
574 default:
575 return NULL_TREE;
576 }
577
578 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
579 loop_vinfo, offset, base_aligned_p);
580
581 if (vectype && base)
582 {
583 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
584 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
585 return NULL_TREE;
586
587 if (vect_debug_details (NULL))
588 {
589 print_generic_expr (dump_file, expr, TDF_SLIM);
590 fprintf (dump_file, " --> total offset for ref: ");
591 print_generic_expr (dump_file, *offset, TDF_SLIM);
592 }
593 }
594 return base;
595 }
596
597
598
599 /* Function vect_force_dr_alignment_p.
600
601 Returns whether the alignment of a DECL can be forced to be aligned
602 on ALIGNMENT bit boundary. */
603
604 static bool
605 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
606 {
607 if (TREE_CODE (decl) != VAR_DECL)
608 return false;
609
610 if (DECL_EXTERNAL (decl))
611 return false;
612
613 if (TREE_STATIC (decl))
614 return (alignment <= MAX_OFILE_ALIGNMENT);
615 else
616 /* This is not 100% correct. The absolute correct stack alignment
617 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
618 PREFERRED_STACK_BOUNDARY is honored by all translation units.
619 However, until someone implements forced stack alignment, SSE
620 isn't really usable without this. */
621 return (alignment <= PREFERRED_STACK_BOUNDARY);
622 }
623
624
625 /* Function vect_get_new_vect_var.
626
627 Returns a name for a new variable. The current naming scheme appends the
628 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
629 the name of vectorizer generated variables, and appends that to NAME if
630 provided. */
631
632 static tree
633 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
634 {
635 const char *prefix;
636 int prefix_len;
637 tree new_vect_var;
638
639 if (var_kind == vect_simple_var)
640 prefix = "vect_";
641 else
642 prefix = "vect_p";
643
644 prefix_len = strlen (prefix);
645
646 if (name)
647 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
648 else
649 new_vect_var = create_tmp_var (type, prefix);
650
651 return new_vect_var;
652 }
653
654
655 /* Function vect_create_index_for_vector_ref.
656
657 Create (and return) an index variable, along with it's update chain in the
658 loop. This variable will be used to access a memory location in a vector
659 operation.
660
661 Input:
662 LOOP: The loop being vectorized.
663 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
664 function can be added here, or in the loop pre-header.
665
666 Output:
667 Return an index that will be used to index a vector array. It is expected
668 that a pointer to the first vector will be used as the base address for the
669 indexed reference.
670
671 FORNOW: we are not trying to be efficient, just creating a new index each
672 time from scratch. At this time all vector references could use the same
673 index.
674
675 TODO: create only one index to be used by all vector references. Record
676 the index in the LOOP_VINFO the first time this procedure is called and
677 return it on subsequent calls. The increment of this index must be placed
678 just before the conditional expression that ends the single block loop. */
679
680 static tree
681 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
682 {
683 tree init, step;
684 tree indx_before_incr, indx_after_incr;
685
686 /* It is assumed that the base pointer used for vectorized access contains
687 the address of the first vector. Therefore the index used for vectorized
688 access must be initialized to zero and incremented by 1. */
689
690 init = integer_zero_node;
691 step = integer_one_node;
692
693 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
694 create_iv (init, step, NULL_TREE, loop, bsi, false,
695 &indx_before_incr, &indx_after_incr);
696
697 return indx_before_incr;
698 }
699
700
701 /* Function vect_create_addr_base_for_vector_ref.
702
703 Create an expression that computes the address of the first memory location
704 that will be accessed for a data reference.
705
706 Input:
707 STMT: The statement containing the data reference.
708 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
709 OFFSET: Optional. If supplied, it is be added to the initial address.
710
711 Output:
712 1. Return an SSA_NAME whose value is the address of the memory location of the
713 first vector of the data reference.
714 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
715 these statement(s) which define the returned SSA_NAME.
716
717 FORNOW: We are only handling array accesses with step 1. */
718
719 static tree
720 vect_create_addr_base_for_vector_ref (tree stmt,
721 tree *new_stmt_list,
722 tree offset)
723 {
724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
725 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
726 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
727 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
728 tree base_name = unshare_expr (DR_BASE_NAME (dr));
729 tree ref = DR_REF (dr);
730 tree data_ref_base_type = TREE_TYPE (data_ref_base);
731 tree scalar_type = TREE_TYPE (ref);
732 tree scalar_ptr_type = build_pointer_type (scalar_type);
733 tree access_fn;
734 tree init_val, step, init_oval;
735 bool ok;
736 bool is_ptr_ref, is_array_ref, is_addr_expr;
737 tree array_base;
738 tree vec_stmt;
739 tree new_temp;
740 tree array_ref;
741 tree addr_base, addr_expr;
742 tree dest, new_stmt;
743
744 /* Only the access function of the last index is relevant (i_n in
745 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
746 access_fn = DR_ACCESS_FN (dr, 0);
747 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
748 if (!ok)
749 init_oval = integer_zero_node;
750
751 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
752 && TREE_CODE (data_ref_base) == SSA_NAME;
753 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE
754 && (TREE_CODE (data_ref_base) == VAR_DECL
755 || TREE_CODE (data_ref_base) == COMPONENT_REF
756 || TREE_CODE (data_ref_base) == ARRAY_REF);
757 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
758 || TREE_CODE (data_ref_base) == PLUS_EXPR
759 || TREE_CODE (data_ref_base) == MINUS_EXPR;
760 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
761
762 /** Create: &(base[init_val])
763
764 if data_ref_base is an ARRAY_TYPE:
765 base = data_ref_base
766
767 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
768 base = *((scalar_array *) data_ref_base)
769 **/
770
771 if (is_array_ref)
772 array_base = data_ref_base;
773 else /* is_ptr_ref or is_addr_expr */
774 {
775 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
776 tree scalar_array_type = build_array_type (scalar_type, 0);
777 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
778 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
779 add_referenced_tmp_var (array_ptr);
780
781 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
782 add_referenced_tmp_var (dest);
783 data_ref_base =
784 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
785 append_to_statement_list_force (new_stmt, new_stmt_list);
786
787 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
788 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
789 new_temp = make_ssa_name (array_ptr, vec_stmt);
790 TREE_OPERAND (vec_stmt, 0) = new_temp;
791 append_to_statement_list_force (vec_stmt, new_stmt_list);
792
793 /* (*array_ptr) */
794 array_base = build_fold_indirect_ref (new_temp);
795 }
796
797 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
798 add_referenced_tmp_var (dest);
799 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
800 append_to_statement_list_force (new_stmt, new_stmt_list);
801
802 if (offset)
803 {
804 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
805 add_referenced_tmp_var (tmp);
806 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
807 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
808 init_val = make_ssa_name (tmp, vec_stmt);
809 TREE_OPERAND (vec_stmt, 0) = init_val;
810 append_to_statement_list_force (vec_stmt, new_stmt_list);
811 }
812
813 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
814 NULL_TREE, NULL_TREE);
815 addr_base = build_fold_addr_expr (array_ref);
816
817 /* addr_expr = addr_base */
818 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
819 get_name (base_name));
820 add_referenced_tmp_var (addr_expr);
821 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
822 new_temp = make_ssa_name (addr_expr, vec_stmt);
823 TREE_OPERAND (vec_stmt, 0) = new_temp;
824 append_to_statement_list_force (vec_stmt, new_stmt_list);
825
826 return new_temp;
827 }
828
829
830 /* Function get_vectype_for_scalar_type.
831
832 Returns the vector type corresponding to SCALAR_TYPE as supported
833 by the target. */
834
835 static tree
836 get_vectype_for_scalar_type (tree scalar_type)
837 {
838 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
839 int nbytes = GET_MODE_SIZE (inner_mode);
840 int nunits;
841 tree vectype;
842
843 if (nbytes == 0)
844 return NULL_TREE;
845
846 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
847 is expected. */
848 nunits = UNITS_PER_SIMD_WORD / nbytes;
849
850 vectype = build_vector_type (scalar_type, nunits);
851 if (TYPE_MODE (vectype) == BLKmode)
852 return NULL_TREE;
853 return vectype;
854 }
855
856
857 /* Function vect_align_data_ref.
858
859 Handle mislignment of a memory accesses.
860
861 FORNOW: Can't handle misaligned accesses.
862 Make sure that the dataref is aligned. */
863
864 static void
865 vect_align_data_ref (tree stmt)
866 {
867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
869
870 /* FORNOW: can't handle misaligned accesses;
871 all accesses expected to be aligned. */
872 gcc_assert (aligned_access_p (dr));
873 }
874
875
876 /* Function vect_create_data_ref_ptr.
877
878 Create a memory reference expression for vector access, to be used in a
879 vector load/store stmt. The reference is based on a new pointer to vector
880 type (vp).
881
882 Input:
883 1. STMT: a stmt that references memory. Expected to be of the form
884 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
885 2. BSI: block_stmt_iterator where new stmts can be added.
886 3. OFFSET (optional): an offset to be added to the initial address accessed
887 by the data-ref in STMT.
888 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
889 pointing to the initial address.
890
891 Output:
892 1. Declare a new ptr to vector_type, and have it point to the base of the
893 data reference (initial addressed accessed by the data reference).
894 For example, for vector of type V8HI, the following code is generated:
895
896 v8hi *vp;
897 vp = (v8hi *)initial_address;
898
899 if OFFSET is not supplied:
900 initial_address = &a[init];
901 if OFFSET is supplied:
902 initial_address = &a[init + OFFSET];
903
904 Return the initial_address in INITIAL_ADDRESS.
905
906 2. Create a data-reference in the loop based on the new vector pointer vp,
907 and using a new index variable 'idx' as follows:
908
909 vp' = vp + update
910
911 where if ONLY_INIT is true:
912 update = zero
913 and otherwise
914 update = idx + vector_type_size
915
916 Return the pointer vp'.
917
918
919 FORNOW: handle only aligned and consecutive accesses. */
920
921 static tree
922 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
923 tree *initial_address, bool only_init)
924 {
925 tree base_name;
926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
927 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
928 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
930 tree vect_ptr_type;
931 tree vect_ptr;
932 tree tag;
933 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
934 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
935 vuse_optype vuses = STMT_VUSE_OPS (stmt);
936 int nvuses, nv_may_defs, nv_must_defs;
937 int i;
938 tree new_temp;
939 tree vec_stmt;
940 tree new_stmt_list = NULL_TREE;
941 tree idx;
942 edge pe = loop_preheader_edge (loop);
943 basic_block new_bb;
944 tree vect_ptr_init;
945 tree vectype_size;
946 tree ptr_update;
947 tree data_ref_ptr;
948
949 base_name = unshare_expr (DR_BASE_NAME (dr));
950 if (vect_debug_details (NULL))
951 {
952 tree data_ref_base = base_name;
953 fprintf (dump_file, "create array_ref of type: ");
954 print_generic_expr (dump_file, vectype, TDF_SLIM);
955 if (TREE_CODE (data_ref_base) == VAR_DECL)
956 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
957 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
958 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
959 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
960 fprintf (dump_file, "vectorizing a record based array ref: ");
961 else if (TREE_CODE (data_ref_base) == SSA_NAME)
962 fprintf (dump_file, "vectorizing a pointer ref: ");
963 print_generic_expr (dump_file, base_name, TDF_SLIM);
964 }
965
966 /** (1) Create the new vector-pointer variable: **/
967
968 vect_ptr_type = build_pointer_type (vectype);
969 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
970 get_name (base_name));
971 add_referenced_tmp_var (vect_ptr);
972
973
974 /** (2) Handle aliasing information of the new vector-pointer: **/
975
976 tag = STMT_VINFO_MEMTAG (stmt_info);
977 gcc_assert (tag);
978 get_var_ann (vect_ptr)->type_mem_tag = tag;
979
980 /* Mark for renaming all aliased variables
981 (i.e, the may-aliases of the type-mem-tag). */
982 nvuses = NUM_VUSES (vuses);
983 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
984 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
985 for (i = 0; i < nvuses; i++)
986 {
987 tree use = VUSE_OP (vuses, i);
988 if (TREE_CODE (use) == SSA_NAME)
989 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
990 }
991 for (i = 0; i < nv_may_defs; i++)
992 {
993 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
994 if (TREE_CODE (def) == SSA_NAME)
995 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
996 }
997 for (i = 0; i < nv_must_defs; i++)
998 {
999 tree def = V_MUST_DEF_OP (v_must_defs, i);
1000 if (TREE_CODE (def) == SSA_NAME)
1001 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1002 }
1003
1004
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
1007
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1009 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1010 offset);
1011 pe = loop_preheader_edge (loop);
1012 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1013 gcc_assert (!new_bb);
1014 *initial_address = new_temp;
1015
1016 /* Create: p = (vectype *) initial_base */
1017 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1018 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1019 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1020 TREE_OPERAND (vec_stmt, 0) = new_temp;
1021 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1022 gcc_assert (!new_bb);
1023 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1024
1025
1026 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1027
1028 if (only_init) /* No update in loop is required. */
1029 return vect_ptr_init;
1030
1031 idx = vect_create_index_for_vector_ref (loop, bsi);
1032
1033 /* Create: update = idx * vectype_size */
1034 ptr_update = create_tmp_var (integer_type_node, "update");
1035 add_referenced_tmp_var (ptr_update);
1036 vectype_size = build_int_cst (integer_type_node,
1037 GET_MODE_SIZE (TYPE_MODE (vectype)));
1038 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1039 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1040 new_temp = make_ssa_name (ptr_update, vec_stmt);
1041 TREE_OPERAND (vec_stmt, 0) = new_temp;
1042 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1043
1044 /* Create: data_ref_ptr = vect_ptr_init + update */
1045 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1046 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1047 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1048 TREE_OPERAND (vec_stmt, 0) = new_temp;
1049 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1050 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1051
1052 return data_ref_ptr;
1053 }
1054
1055
1056 /* Function vect_create_destination_var.
1057
1058 Create a new temporary of type VECTYPE. */
1059
1060 static tree
1061 vect_create_destination_var (tree scalar_dest, tree vectype)
1062 {
1063 tree vec_dest;
1064 const char *new_name;
1065
1066 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1067
1068 new_name = get_name (scalar_dest);
1069 if (!new_name)
1070 new_name = "var_";
1071 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1072 add_referenced_tmp_var (vec_dest);
1073
1074 return vec_dest;
1075 }
1076
1077
1078 /* Function vect_init_vector.
1079
1080 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1081 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1082 used in the vectorization of STMT. */
1083
1084 static tree
1085 vect_init_vector (tree stmt, tree vector_var)
1086 {
1087 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1088 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1089 tree new_var;
1090 tree init_stmt;
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1092 tree vec_oprnd;
1093 edge pe;
1094 tree new_temp;
1095 basic_block new_bb;
1096
1097 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1098 add_referenced_tmp_var (new_var);
1099
1100 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1101 new_temp = make_ssa_name (new_var, init_stmt);
1102 TREE_OPERAND (init_stmt, 0) = new_temp;
1103
1104 pe = loop_preheader_edge (loop);
1105 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1106 gcc_assert (!new_bb);
1107
1108 if (vect_debug_details (NULL))
1109 {
1110 fprintf (dump_file, "created new init_stmt: ");
1111 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1112 }
1113
1114 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1115 return vec_oprnd;
1116 }
1117
1118
1119 /* Function vect_get_vec_def_for_operand.
1120
1121 OP is an operand in STMT. This function returns a (vector) def that will be
1122 used in the vectorized stmt for STMT.
1123
1124 In the case that OP is an SSA_NAME which is defined in the loop, then
1125 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1126
1127 In case OP is an invariant or constant, a new stmt that creates a vector def
1128 needs to be introduced. */
1129
1130 static tree
1131 vect_get_vec_def_for_operand (tree op, tree stmt)
1132 {
1133 tree vec_oprnd;
1134 tree vec_stmt;
1135 tree def_stmt;
1136 stmt_vec_info def_stmt_info = NULL;
1137 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1138 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1139 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1140 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1141 basic_block bb;
1142 tree vec_inv;
1143 tree t = NULL_TREE;
1144 tree def;
1145 int i;
1146
1147 if (vect_debug_details (NULL))
1148 {
1149 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1150 print_generic_expr (dump_file, op, TDF_SLIM);
1151 }
1152
1153 /** ===> Case 1: operand is a constant. **/
1154
1155 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1156 {
1157 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1158
1159 tree vec_cst;
1160 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1161 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1162 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1163 tree t = NULL_TREE;
1164 int i;
1165
1166 /* Build a tree with vector elements. */
1167 if (vect_debug_details (NULL))
1168 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1169
1170 for (i = nunits - 1; i >= 0; --i)
1171 {
1172 t = tree_cons (NULL_TREE, op, t);
1173 }
1174 vec_cst = build_vector (vectype, t);
1175 return vect_init_vector (stmt, vec_cst);
1176 }
1177
1178 gcc_assert (TREE_CODE (op) == SSA_NAME);
1179
1180 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1181
1182 def_stmt = SSA_NAME_DEF_STMT (op);
1183 def_stmt_info = vinfo_for_stmt (def_stmt);
1184
1185 if (vect_debug_details (NULL))
1186 {
1187 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
1188 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1189 }
1190
1191
1192 /** ==> Case 2.1: operand is defined inside the loop. **/
1193
1194 if (def_stmt_info)
1195 {
1196 /* Get the def from the vectorized stmt. */
1197
1198 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1199 gcc_assert (vec_stmt);
1200 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
1201 return vec_oprnd;
1202 }
1203
1204
1205 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1206 it is a reduction/induction. **/
1207
1208 bb = bb_for_stmt (def_stmt);
1209 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1210 {
1211 if (vect_debug_details (NULL))
1212 fprintf (dump_file, "reduction/induction - unsupported.");
1213 internal_error ("no support for reduction/induction"); /* FORNOW */
1214 }
1215
1216
1217 /** ==> Case 2.3: operand is defined outside the loop -
1218 it is a loop invariant. */
1219
1220 switch (TREE_CODE (def_stmt))
1221 {
1222 case PHI_NODE:
1223 def = PHI_RESULT (def_stmt);
1224 break;
1225 case MODIFY_EXPR:
1226 def = TREE_OPERAND (def_stmt, 0);
1227 break;
1228 case NOP_EXPR:
1229 def = TREE_OPERAND (def_stmt, 0);
1230 gcc_assert (IS_EMPTY_STMT (def_stmt));
1231 def = op;
1232 break;
1233 default:
1234 if (vect_debug_details (NULL))
1235 {
1236 fprintf (dump_file, "unsupported defining stmt: ");
1237 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1238 }
1239 internal_error ("unsupported defining stmt");
1240 }
1241
1242 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1243
1244 if (vect_debug_details (NULL))
1245 fprintf (dump_file, "Create vector_inv.");
1246
1247 for (i = nunits - 1; i >= 0; --i)
1248 {
1249 t = tree_cons (NULL_TREE, def, t);
1250 }
1251
1252 vec_inv = build_constructor (vectype, t);
1253 return vect_init_vector (stmt, vec_inv);
1254 }
1255
1256
1257 /* Function vect_finish_stmt_generation.
1258
1259 Insert a new stmt. */
1260
1261 static void
1262 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1263 {
1264 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1265
1266 if (vect_debug_details (NULL))
1267 {
1268 fprintf (dump_file, "add new stmt: ");
1269 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1270 }
1271
1272 /* Make sure bsi points to the stmt that is being vectorized. */
1273
1274 /* Assumption: any stmts created for the vectorization of stmt S were
1275 inserted before S. BSI is expected to point to S or some new stmt before S. */
1276
1277 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1278 bsi_next (bsi);
1279 gcc_assert (stmt == bsi_stmt (*bsi));
1280 }
1281
1282
1283 /* Function vectorizable_assignment.
1284
1285 Check if STMT performs an assignment (copy) that can be vectorized.
1286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1289
1290 static bool
1291 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1292 {
1293 tree vec_dest;
1294 tree scalar_dest;
1295 tree op;
1296 tree vec_oprnd;
1297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1298 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1299 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1300 tree new_temp;
1301
1302 /* Is vectorizable assignment? */
1303
1304 if (TREE_CODE (stmt) != MODIFY_EXPR)
1305 return false;
1306
1307 scalar_dest = TREE_OPERAND (stmt, 0);
1308 if (TREE_CODE (scalar_dest) != SSA_NAME)
1309 return false;
1310
1311 op = TREE_OPERAND (stmt, 1);
1312 if (!vect_is_simple_use (op, loop, NULL))
1313 {
1314 if (vect_debug_details (NULL))
1315 fprintf (dump_file, "use not simple.");
1316 return false;
1317 }
1318
1319 if (!vec_stmt) /* transformation not required. */
1320 {
1321 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1322 return true;
1323 }
1324
1325 /** Trasform. **/
1326 if (vect_debug_details (NULL))
1327 fprintf (dump_file, "transform assignment.");
1328
1329 /* Handle def. */
1330 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1331
1332 /* Handle use. */
1333 op = TREE_OPERAND (stmt, 1);
1334 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1335
1336 /* Arguments are ready. create the new vector stmt. */
1337 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1338 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1339 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1340 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1341
1342 return true;
1343 }
1344
1345
1346 /* Function vectorizable_operation.
1347
1348 Check if STMT performs a binary or unary operation that can be vectorized.
1349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1352
1353 static bool
1354 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1355 {
1356 tree vec_dest;
1357 tree scalar_dest;
1358 tree operation;
1359 tree op0, op1 = NULL;
1360 tree vec_oprnd0, vec_oprnd1=NULL;
1361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1362 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1363 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1364 int i;
1365 enum tree_code code;
1366 enum machine_mode vec_mode;
1367 tree new_temp;
1368 int op_type;
1369 tree op;
1370 optab optab;
1371
1372 /* Is STMT a vectorizable binary/unary operation? */
1373 if (TREE_CODE (stmt) != MODIFY_EXPR)
1374 return false;
1375
1376 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1377 return false;
1378
1379 operation = TREE_OPERAND (stmt, 1);
1380 code = TREE_CODE (operation);
1381 optab = optab_for_tree_code (code, vectype);
1382
1383 /* Support only unary or binary operations. */
1384 op_type = TREE_CODE_LENGTH (code);
1385 if (op_type != unary_op && op_type != binary_op)
1386 {
1387 if (vect_debug_details (NULL))
1388 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1389 return false;
1390 }
1391
1392 for (i = 0; i < op_type; i++)
1393 {
1394 op = TREE_OPERAND (operation, i);
1395 if (!vect_is_simple_use (op, loop, NULL))
1396 {
1397 if (vect_debug_details (NULL))
1398 fprintf (dump_file, "use not simple.");
1399 return false;
1400 }
1401 }
1402
1403 /* Supportable by target? */
1404 if (!optab)
1405 {
1406 if (vect_debug_details (NULL))
1407 fprintf (dump_file, "no optab.");
1408 return false;
1409 }
1410 vec_mode = TYPE_MODE (vectype);
1411 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1412 {
1413 if (vect_debug_details (NULL))
1414 fprintf (dump_file, "op not supported by target.");
1415 return false;
1416 }
1417
1418 if (!vec_stmt) /* transformation not required. */
1419 {
1420 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1421 return true;
1422 }
1423
1424 /** Trasform. **/
1425
1426 if (vect_debug_details (NULL))
1427 fprintf (dump_file, "transform binary/unary operation.");
1428
1429 /* Handle def. */
1430 scalar_dest = TREE_OPERAND (stmt, 0);
1431 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1432
1433 /* Handle uses. */
1434 op0 = TREE_OPERAND (operation, 0);
1435 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1436
1437 if (op_type == binary_op)
1438 {
1439 op1 = TREE_OPERAND (operation, 1);
1440 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1441 }
1442
1443 /* Arguments are ready. create the new vector stmt. */
1444
1445 if (op_type == binary_op)
1446 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1447 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1448 else
1449 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1450 build1 (code, vectype, vec_oprnd0));
1451 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1452 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1453 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1454
1455 return true;
1456 }
1457
1458
1459 /* Function vectorizable_store.
1460
1461 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1462 can be vectorized.
1463 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1464 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1465 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1466
1467 static bool
1468 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1469 {
1470 tree scalar_dest;
1471 tree data_ref;
1472 tree op;
1473 tree vec_oprnd1;
1474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1475 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1476 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1477 enum machine_mode vec_mode;
1478 tree dummy;
1479
1480 /* Is vectorizable store? */
1481
1482 if (TREE_CODE (stmt) != MODIFY_EXPR)
1483 return false;
1484
1485 scalar_dest = TREE_OPERAND (stmt, 0);
1486 if (TREE_CODE (scalar_dest) != ARRAY_REF
1487 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1488 return false;
1489
1490 op = TREE_OPERAND (stmt, 1);
1491 if (!vect_is_simple_use (op, loop, NULL))
1492 {
1493 if (vect_debug_details (NULL))
1494 fprintf (dump_file, "use not simple.");
1495 return false;
1496 }
1497
1498 vec_mode = TYPE_MODE (vectype);
1499 /* FORNOW. In some cases can vectorize even if data-type not supported
1500 (e.g. - array initialization with 0). */
1501 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1502 return false;
1503
1504 if (!STMT_VINFO_DATA_REF (stmt_info))
1505 return false;
1506
1507 if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1508 return false;
1509
1510 if (!vec_stmt) /* transformation not required. */
1511 {
1512 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1513 return true;
1514 }
1515
1516 /** Trasform. **/
1517
1518 if (vect_debug_details (NULL))
1519 fprintf (dump_file, "transform store");
1520
1521 /* Handle use - get the vectorized def from the defining stmt. */
1522 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1523
1524 /* Handle def. */
1525 /* FORNOW: make sure the data reference is aligned. */
1526 vect_align_data_ref (stmt);
1527 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1528 data_ref = build_fold_indirect_ref (data_ref);
1529
1530 /* Arguments are ready. create the new vector stmt. */
1531 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1532 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1533
1534 return true;
1535 }
1536
1537
1538 /* vectorizable_load.
1539
1540 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1541 can be vectorized.
1542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1545
1546 static bool
1547 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1548 {
1549 tree scalar_dest;
1550 tree vec_dest = NULL;
1551 tree data_ref = NULL;
1552 tree op;
1553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1554 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1555 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1556 tree new_temp;
1557 int mode;
1558 tree init_addr;
1559 tree new_stmt;
1560 tree dummy;
1561 basic_block new_bb;
1562 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1563 edge pe = loop_preheader_edge (loop);
1564 bool software_pipeline_loads_p = false;
1565
1566 /* Is vectorizable load? */
1567
1568 if (TREE_CODE (stmt) != MODIFY_EXPR)
1569 return false;
1570
1571 scalar_dest = TREE_OPERAND (stmt, 0);
1572 if (TREE_CODE (scalar_dest) != SSA_NAME)
1573 return false;
1574
1575 op = TREE_OPERAND (stmt, 1);
1576 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1577 return false;
1578
1579 if (!STMT_VINFO_DATA_REF (stmt_info))
1580 return false;
1581
1582 mode = (int) TYPE_MODE (vectype);
1583
1584 /* FORNOW. In some cases can vectorize even if data-type not supported
1585 (e.g. - data copies). */
1586 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1587 {
1588 if (vect_debug_details (loop))
1589 fprintf (dump_file, "Aligned load, but unsupported type.");
1590 return false;
1591 }
1592
1593 if (!aligned_access_p (dr))
1594 {
1595 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1596 && (!targetm.vectorize.builtin_mask_for_load
1597 || targetm.vectorize.builtin_mask_for_load ()))
1598 software_pipeline_loads_p = true;
1599 else if (!targetm.vectorize.misaligned_mem_ok (mode))
1600 {
1601 /* Possibly unaligned access, and can't software pipeline the loads */
1602 if (vect_debug_details (loop))
1603 fprintf (dump_file, "Arbitrary load not supported.");
1604 return false;
1605 }
1606 }
1607
1608 if (!vec_stmt) /* transformation not required. */
1609 {
1610 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1611 return true;
1612 }
1613
1614 /** Trasform. **/
1615
1616 if (vect_debug_details (NULL))
1617 fprintf (dump_file, "transform load.");
1618
1619 if (!software_pipeline_loads_p)
1620 {
1621 /* Create:
1622 p = initial_addr;
1623 indx = 0;
1624 loop {
1625 vec_dest = *(p);
1626 indx = indx + 1;
1627 }
1628 */
1629
1630 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1631 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1632 if (aligned_access_p (dr))
1633 data_ref = build_fold_indirect_ref (data_ref);
1634 else
1635 {
1636 int mis = DR_MISALIGNMENT (dr);
1637 tree tmis = (mis == -1 ?
1638 integer_zero_node :
1639 build_int_cst (integer_type_node, mis));
1640 tmis = int_const_binop (MULT_EXPR, tmis,
1641 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
1642 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1643 }
1644 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1645 new_temp = make_ssa_name (vec_dest, new_stmt);
1646 TREE_OPERAND (new_stmt, 0) = new_temp;
1647 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1648 }
1649 else /* software-pipeline the loads */
1650 {
1651 /* Create:
1652 p1 = initial_addr;
1653 msq_init = *(floor(p1))
1654 p2 = initial_addr + VS - 1;
1655 magic = have_builtin ? builtin_result : initial_address;
1656 indx = 0;
1657 loop {
1658 p2' = p2 + indx * vectype_size
1659 lsq = *(floor(p2'))
1660 vec_dest = realign_load (msq, lsq, magic)
1661 indx = indx + 1;
1662 msq = lsq;
1663 }
1664 */
1665
1666 tree offset;
1667 tree magic;
1668 tree phi_stmt;
1669 tree msq_init;
1670 tree msq, lsq;
1671 tree dataref_ptr;
1672 tree params;
1673
1674 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1676 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1677 &init_addr, true);
1678 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1679 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1680 new_temp = make_ssa_name (vec_dest, new_stmt);
1681 TREE_OPERAND (new_stmt, 0) = new_temp;
1682 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1683 gcc_assert (!new_bb);
1684 msq_init = TREE_OPERAND (new_stmt, 0);
1685
1686
1687 /* <2> Create lsq = *(floor(p2')) in the loop */
1688 offset = build_int_cst (integer_type_node,
1689 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1690 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1691 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1692 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1693 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1694 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1695 new_temp = make_ssa_name (vec_dest, new_stmt);
1696 TREE_OPERAND (new_stmt, 0) = new_temp;
1697 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1698 lsq = TREE_OPERAND (new_stmt, 0);
1699
1700
1701 /* <3> */
1702 if (targetm.vectorize.builtin_mask_for_load)
1703 {
1704 /* Create permutation mask, if required, in loop preheader. */
1705 tree builtin_decl;
1706 params = build_tree_list (NULL_TREE, init_addr);
1707 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1708 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1709 new_stmt = build_function_call_expr (builtin_decl, params);
1710 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1711 new_temp = make_ssa_name (vec_dest, new_stmt);
1712 TREE_OPERAND (new_stmt, 0) = new_temp;
1713 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1714 gcc_assert (!new_bb);
1715 magic = TREE_OPERAND (new_stmt, 0);
1716 }
1717 else
1718 {
1719 /* Use current address instead of init_addr for reduced reg pressure. */
1720 magic = dataref_ptr;
1721 }
1722
1723
1724 /* <4> Create msq = phi <msq_init, lsq> in loop */
1725 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1726 msq = make_ssa_name (vec_dest, NULL_TREE);
1727 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1728 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1729 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
1730 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
1731
1732
1733 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1734 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1735 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1736 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1737 new_temp = make_ssa_name (vec_dest, new_stmt);
1738 TREE_OPERAND (new_stmt, 0) = new_temp;
1739 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1740 }
1741
1742 *vec_stmt = new_stmt;
1743 return true;
1744 }
1745
1746
1747 /* Function vect_transform_stmt.
1748
1749 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1750
1751 static bool
1752 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1753 {
1754 bool is_store = false;
1755 tree vec_stmt = NULL_TREE;
1756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1757 bool done;
1758
1759 switch (STMT_VINFO_TYPE (stmt_info))
1760 {
1761 case op_vec_info_type:
1762 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1763 gcc_assert (done);
1764 break;
1765
1766 case assignment_vec_info_type:
1767 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1768 gcc_assert (done);
1769 break;
1770
1771 case load_vec_info_type:
1772 done = vectorizable_load (stmt, bsi, &vec_stmt);
1773 gcc_assert (done);
1774 break;
1775
1776 case store_vec_info_type:
1777 done = vectorizable_store (stmt, bsi, &vec_stmt);
1778 gcc_assert (done);
1779 is_store = true;
1780 break;
1781 default:
1782 if (vect_debug_details (NULL))
1783 fprintf (dump_file, "stmt not supported.");
1784 gcc_unreachable ();
1785 }
1786
1787 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1788
1789 return is_store;
1790 }
1791
1792
1793 /* Function vect_transform_loop_bound.
1794
1795 Create a new exit condition for the loop. */
1796
1797 static void
1798 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1799 {
1800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1801 edge exit_edge = loop->single_exit;
1802 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1803 tree indx_before_incr, indx_after_incr;
1804 tree orig_cond_expr;
1805 HOST_WIDE_INT old_N = 0;
1806 int vf;
1807 tree cond_stmt;
1808 tree new_loop_bound;
1809 tree cond;
1810 tree lb_type;
1811
1812 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1813 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1814 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1815
1816 /* FORNOW:
1817 assuming number-of-iterations divides by the vectorization factor. */
1818 gcc_assert (!(old_N % vf));
1819
1820 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1821 gcc_assert (orig_cond_expr);
1822 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1823
1824 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1825 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1826
1827 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1828 to point to the exit condition. */
1829 bsi_next (&loop_exit_bsi);
1830 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1831
1832 /* new loop exit test: */
1833 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1834 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1835
1836 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1837 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1838 else /* 'then' edge loops back. */
1839 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1840
1841 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1842 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1843
1844 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1845
1846 /* remove old loop exit test: */
1847 bsi_remove (&loop_exit_bsi);
1848
1849 if (vect_debug_details (NULL))
1850 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1851 }
1852
1853
1854 /* Function vect_transform_loop.
1855
1856 The analysis phase has determined that the loop is vectorizable.
1857 Vectorize the loop - created vectorized stmts to replace the scalar
1858 stmts in the loop, and update the loop exit condition. */
1859
1860 static void
1861 vect_transform_loop (loop_vec_info loop_vinfo,
1862 struct loops *loops ATTRIBUTE_UNUSED)
1863 {
1864 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1865 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1866 int nbbs = loop->num_nodes;
1867 block_stmt_iterator si;
1868 int i;
1869 #ifdef ENABLE_CHECKING
1870 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1871 #endif
1872
1873 if (vect_debug_details (NULL))
1874 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1875
1876 /* 1) Make sure the loop header has exactly two entries
1877 2) Make sure we have a preheader basic block. */
1878
1879 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1880
1881 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1882
1883
1884 /* FORNOW: the vectorizer supports only loops which body consist
1885 of one basic block (header + empty latch). When the vectorizer will
1886 support more involved loop forms, the order by which the BBs are
1887 traversed need to be reconsidered. */
1888
1889 for (i = 0; i < nbbs; i++)
1890 {
1891 basic_block bb = bbs[i];
1892
1893 for (si = bsi_start (bb); !bsi_end_p (si);)
1894 {
1895 tree stmt = bsi_stmt (si);
1896 stmt_vec_info stmt_info;
1897 bool is_store;
1898 #ifdef ENABLE_CHECKING
1899 tree vectype;
1900 #endif
1901
1902 if (vect_debug_details (NULL))
1903 {
1904 fprintf (dump_file, "------>vectorizing statement: ");
1905 print_generic_expr (dump_file, stmt, TDF_SLIM);
1906 }
1907 stmt_info = vinfo_for_stmt (stmt);
1908 gcc_assert (stmt_info);
1909 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1910 {
1911 bsi_next (&si);
1912 continue;
1913 }
1914 #ifdef ENABLE_CHECKING
1915 /* FORNOW: Verify that all stmts operate on the same number of
1916 units and no inner unrolling is necessary. */
1917 vectype = STMT_VINFO_VECTYPE (stmt_info);
1918 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1919 == vectorization_factor);
1920 #endif
1921 /* -------- vectorize statement ------------ */
1922 if (vect_debug_details (NULL))
1923 fprintf (dump_file, "transform statement.");
1924
1925 is_store = vect_transform_stmt (stmt, &si);
1926 if (is_store)
1927 {
1928 /* free the attached stmt_vec_info and remove the stmt. */
1929 stmt_ann_t ann = stmt_ann (stmt);
1930 free (stmt_info);
1931 set_stmt_info (ann, NULL);
1932 bsi_remove (&si);
1933 continue;
1934 }
1935
1936 bsi_next (&si);
1937 } /* stmts in BB */
1938 } /* BBs in loop */
1939
1940 vect_transform_loop_bound (loop_vinfo);
1941
1942 if (vect_debug_details (loop))
1943 fprintf (dump_file,"Success! loop vectorized.");
1944 if (vect_debug_stats (loop))
1945 fprintf (dump_file, "LOOP VECTORIZED.");
1946 }
1947
1948
1949 /* Function vect_is_simple_use.
1950
1951 Input:
1952 LOOP - the loop that is being vectorized.
1953 OPERAND - operand of a stmt in LOOP.
1954 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1955
1956 Returns whether a stmt with OPERAND can be vectorized.
1957 Supportable operands are constants, loop invariants, and operands that are
1958 defined by the current iteration of the loop. Unsupportable operands are
1959 those that are defined by a previous iteration of the loop (as is the case
1960 in reduction/induction computations). */
1961
1962 static bool
1963 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1964 {
1965 tree def_stmt;
1966 basic_block bb;
1967
1968 if (def)
1969 *def = NULL_TREE;
1970
1971 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1972 return true;
1973
1974 if (TREE_CODE (operand) != SSA_NAME)
1975 return false;
1976
1977 def_stmt = SSA_NAME_DEF_STMT (operand);
1978 if (def_stmt == NULL_TREE )
1979 {
1980 if (vect_debug_details (NULL))
1981 fprintf (dump_file, "no def_stmt.");
1982 return false;
1983 }
1984
1985 /* empty stmt is expected only in case of a function argument.
1986 (Otherwise - we expect a phi_node or a modify_expr). */
1987 if (IS_EMPTY_STMT (def_stmt))
1988 {
1989 tree arg = TREE_OPERAND (def_stmt, 0);
1990 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1991 return true;
1992 if (vect_debug_details (NULL))
1993 {
1994 fprintf (dump_file, "Unexpected empty stmt: ");
1995 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1996 }
1997 return false;
1998 }
1999
2000 /* phi_node inside the loop indicates an induction/reduction pattern.
2001 This is not supported yet. */
2002 bb = bb_for_stmt (def_stmt);
2003 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2004 {
2005 if (vect_debug_details (NULL))
2006 fprintf (dump_file, "reduction/induction - unsupported.");
2007 return false; /* FORNOW: not supported yet. */
2008 }
2009
2010 /* Expecting a modify_expr or a phi_node. */
2011 if (TREE_CODE (def_stmt) == MODIFY_EXPR
2012 || TREE_CODE (def_stmt) == PHI_NODE)
2013 {
2014 if (def)
2015 *def = def_stmt;
2016 return true;
2017 }
2018
2019 return false;
2020 }
2021
2022
2023 /* Function vect_analyze_operations.
2024
2025 Scan the loop stmts and make sure they are all vectorizable. */
2026
2027 static bool
2028 vect_analyze_operations (loop_vec_info loop_vinfo)
2029 {
2030 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2031 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2032 int nbbs = loop->num_nodes;
2033 block_stmt_iterator si;
2034 int vectorization_factor = 0;
2035 int i;
2036 bool ok;
2037 tree scalar_type;
2038
2039 if (vect_debug_details (NULL))
2040 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
2041
2042 for (i = 0; i < nbbs; i++)
2043 {
2044 basic_block bb = bbs[i];
2045
2046 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2047 {
2048 tree stmt = bsi_stmt (si);
2049 int nunits;
2050 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2051 tree vectype;
2052
2053 if (vect_debug_details (NULL))
2054 {
2055 fprintf (dump_file, "==> examining statement: ");
2056 print_generic_expr (dump_file, stmt, TDF_SLIM);
2057 }
2058
2059 gcc_assert (stmt_info);
2060
2061 /* skip stmts which do not need to be vectorized.
2062 this is expected to include:
2063 - the COND_EXPR which is the loop exit condition
2064 - any LABEL_EXPRs in the loop
2065 - computations that are used only for array indexing or loop
2066 control */
2067
2068 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2069 {
2070 if (vect_debug_details (NULL))
2071 fprintf (dump_file, "irrelevant.");
2072 continue;
2073 }
2074
2075 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
2076 {
2077 if (vect_debug_stats (loop) || vect_debug_details (loop))
2078 {
2079 fprintf (dump_file, "not vectorized: vector stmt in loop:");
2080 print_generic_expr (dump_file, stmt, TDF_SLIM);
2081 }
2082 return false;
2083 }
2084
2085 if (STMT_VINFO_DATA_REF (stmt_info))
2086 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
2087 else if (TREE_CODE (stmt) == MODIFY_EXPR)
2088 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
2089 else
2090 scalar_type = TREE_TYPE (stmt);
2091
2092 if (vect_debug_details (NULL))
2093 {
2094 fprintf (dump_file, "get vectype for scalar type: ");
2095 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2096 }
2097
2098 vectype = get_vectype_for_scalar_type (scalar_type);
2099 if (!vectype)
2100 {
2101 if (vect_debug_stats (loop) || vect_debug_details (loop))
2102 {
2103 fprintf (dump_file, "not vectorized: unsupported data-type ");
2104 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2105 }
2106 return false;
2107 }
2108
2109 if (vect_debug_details (NULL))
2110 {
2111 fprintf (dump_file, "vectype: ");
2112 print_generic_expr (dump_file, vectype, TDF_SLIM);
2113 }
2114 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2115
2116 ok = (vectorizable_operation (stmt, NULL, NULL)
2117 || vectorizable_assignment (stmt, NULL, NULL)
2118 || vectorizable_load (stmt, NULL, NULL)
2119 || vectorizable_store (stmt, NULL, NULL));
2120
2121 if (!ok)
2122 {
2123 if (vect_debug_stats (loop) || vect_debug_details (loop))
2124 {
2125 fprintf (dump_file, "not vectorized: stmt not supported: ");
2126 print_generic_expr (dump_file, stmt, TDF_SLIM);
2127 }
2128 return false;
2129 }
2130
2131 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2132 if (vect_debug_details (NULL))
2133 fprintf (dump_file, "nunits = %d", nunits);
2134
2135 if (vectorization_factor)
2136 {
2137 /* FORNOW: don't allow mixed units.
2138 This restriction will be relaxed in the future. */
2139 if (nunits != vectorization_factor)
2140 {
2141 if (vect_debug_stats (loop) || vect_debug_details (loop))
2142 fprintf (dump_file, "not vectorized: mixed data-types");
2143 return false;
2144 }
2145 }
2146 else
2147 vectorization_factor = nunits;
2148 }
2149 }
2150
2151 /* TODO: Analyze cost. Decide if worth while to vectorize. */
2152 if (!vectorization_factor)
2153 {
2154 if (vect_debug_stats (loop) || vect_debug_details (loop))
2155 fprintf (dump_file, "not vectorized: unsupported data-type");
2156 return false;
2157 }
2158 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2159
2160 /* FORNOW: handle only cases where the loop bound divides by the
2161 vectorization factor. */
2162
2163 if (vect_debug_details (NULL))
2164 fprintf (dump_file,
2165 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
2166 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
2167
2168 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2169 {
2170 if (vect_debug_stats (loop) || vect_debug_details (loop))
2171 fprintf (dump_file, "not vectorized: Unknown loop bound.");
2172 return false;
2173 }
2174
2175 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2176 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
2177 {
2178 if (vect_debug_stats (loop) || vect_debug_details (loop))
2179 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
2180 vectorization_factor);
2181 return false;
2182 }
2183
2184 return true;
2185 }
2186
2187
2188 /* Function exist_non_indexing_operands_for_use_p
2189
2190 USE is one of the uses attached to STMT. Check if USE is
2191 used in STMT for anything other than indexing an array. */
2192
2193 static bool
2194 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
2195 {
2196 tree operand;
2197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2198
2199 /* USE corresponds to some operand in STMT. If there is no data
2200 reference in STMT, then any operand that corresponds to USE
2201 is not indexing an array. */
2202 if (!STMT_VINFO_DATA_REF (stmt_info))
2203 return true;
2204
2205 /* STMT has a data_ref. FORNOW this means that its of one of
2206 the following forms:
2207 -1- ARRAY_REF = var
2208 -2- var = ARRAY_REF
2209 (This should have been verified in analyze_data_refs).
2210
2211 'var' in the second case corresponds to a def, not a use,
2212 so USE cannot correspond to any operands that are not used
2213 for array indexing.
2214
2215 Therefore, all we need to check is if STMT falls into the
2216 first case, and whether var corresponds to USE. */
2217
2218 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
2219 return false;
2220
2221 operand = TREE_OPERAND (stmt, 1);
2222
2223 if (TREE_CODE (operand) != SSA_NAME)
2224 return false;
2225
2226 if (operand == use)
2227 return true;
2228
2229 return false;
2230 }
2231
2232
2233 /* Function vect_is_simple_iv_evolution.
2234
2235 FORNOW: A simple evolution of an induction variables in the loop is
2236 considered a polynomial evolution with constant step. */
2237
2238 static bool
2239 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2240 tree * step, bool strict)
2241 {
2242 tree init_expr;
2243 tree step_expr;
2244
2245 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2246
2247 /* When there is no evolution in this loop, the evolution function
2248 is not "simple". */
2249 if (evolution_part == NULL_TREE)
2250 return false;
2251
2252 /* When the evolution is a polynomial of degree >= 2
2253 the evolution function is not "simple". */
2254 if (tree_is_chrec (evolution_part))
2255 return false;
2256
2257 step_expr = evolution_part;
2258 init_expr = initial_condition (access_fn);
2259
2260 if (vect_debug_details (NULL))
2261 {
2262 fprintf (dump_file, "step: ");
2263 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2264 fprintf (dump_file, ", init: ");
2265 print_generic_expr (dump_file, init_expr, TDF_SLIM);
2266 }
2267
2268 *init = init_expr;
2269 *step = step_expr;
2270
2271 if (TREE_CODE (step_expr) != INTEGER_CST)
2272 {
2273 if (vect_debug_details (NULL))
2274 fprintf (dump_file, "step unknown.");
2275 return false;
2276 }
2277
2278 if (strict)
2279 if (!integer_onep (step_expr))
2280 {
2281 if (vect_debug_details (NULL))
2282 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2283 return false;
2284 }
2285
2286 return true;
2287 }
2288
2289
2290 /* Function vect_analyze_scalar_cycles.
2291
2292 Examine the cross iteration def-use cycles of scalar variables, by
2293 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2294 cycles that they represent do not impede vectorization.
2295
2296 FORNOW: Reduction as in the following loop, is not supported yet:
2297 loop1:
2298 for (i=0; i<N; i++)
2299 sum += a[i];
2300 The cross-iteration cycle corresponding to variable 'sum' will be
2301 considered too complicated and will impede vectorization.
2302
2303 FORNOW: Induction as in the following loop, is not supported yet:
2304 loop2:
2305 for (i=0; i<N; i++)
2306 a[i] = i;
2307
2308 However, the following loop *is* vectorizable:
2309 loop3:
2310 for (i=0; i<N; i++)
2311 a[i] = b[i];
2312
2313 In both loops there exists a def-use cycle for the variable i:
2314 loop: i_2 = PHI (i_0, i_1)
2315 a[i_2] = ...;
2316 i_1 = i_2 + 1;
2317 GOTO loop;
2318
2319 The evolution of the above cycle is considered simple enough,
2320 however, we also check that the cycle does not need to be
2321 vectorized, i.e - we check that the variable that this cycle
2322 defines is only used for array indexing or in stmts that do not
2323 need to be vectorized. This is not the case in loop2, but it
2324 *is* the case in loop3. */
2325
2326 static bool
2327 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
2328 {
2329 tree phi;
2330 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2331 basic_block bb = loop->header;
2332 tree dummy;
2333
2334 if (vect_debug_details (NULL))
2335 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
2336
2337 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
2338 {
2339 tree access_fn = NULL;
2340
2341 if (vect_debug_details (NULL))
2342 {
2343 fprintf (dump_file, "Analyze phi: ");
2344 print_generic_expr (dump_file, phi, TDF_SLIM);
2345 }
2346
2347 /* Skip virtual phi's. The data dependences that are associated with
2348 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2349
2350 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2351 {
2352 if (vect_debug_details (NULL))
2353 fprintf (dump_file, "virtual phi. skip.");
2354 continue;
2355 }
2356
2357 /* Analyze the evolution function. */
2358
2359 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2360 those of loop induction variables; This property is verified here.
2361
2362 Furthermore, if that induction variable is used in an operation
2363 that needs to be vectorized (i.e, is not solely used to index
2364 arrays and check the exit condition) - we do not support its
2365 vectorization yet. This property is verified in vect_is_simple_use,
2366 during vect_analyze_operations. */
2367
2368 access_fn = /* instantiate_parameters
2369 (loop,*/
2370 analyze_scalar_evolution (loop, PHI_RESULT (phi));
2371
2372 if (!access_fn)
2373 {
2374 if (vect_debug_stats (loop) || vect_debug_details (loop))
2375 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2376 return false;
2377 }
2378
2379 if (vect_debug_details (NULL))
2380 {
2381 fprintf (dump_file, "Access function of PHI: ");
2382 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2383 }
2384
2385 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2386 &dummy, false))
2387 {
2388 if (vect_debug_stats (loop) || vect_debug_details (loop))
2389 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2390 return false;
2391 }
2392 }
2393
2394 return true;
2395 }
2396
2397
2398 /* Function vect_analyze_data_ref_dependence.
2399
2400 Return TRUE if there (might) exist a dependence between a memory-reference
2401 DRA and a memory-reference DRB. */
2402
2403 static bool
2404 vect_analyze_data_ref_dependence (struct data_reference *dra,
2405 struct data_reference *drb,
2406 struct loop *loop)
2407 {
2408 bool differ_p;
2409 struct data_dependence_relation *ddr;
2410
2411 if (!array_base_name_differ_p (dra, drb, &differ_p))
2412 {
2413 if (vect_debug_stats (loop) || vect_debug_details (loop))
2414 {
2415 fprintf (dump_file,
2416 "not vectorized: can't determine dependence between: ");
2417 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2418 fprintf (dump_file, " and ");
2419 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2420 }
2421 return true;
2422 }
2423
2424 if (differ_p)
2425 return false;
2426
2427 ddr = initialize_data_dependence_relation (dra, drb);
2428 compute_affine_dependence (ddr);
2429
2430 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2431 return false;
2432
2433 if (vect_debug_stats (loop) || vect_debug_details (loop))
2434 {
2435 fprintf (dump_file,
2436 "not vectorized: possible dependence between data-refs ");
2437 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2438 fprintf (dump_file, " and ");
2439 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2440 }
2441
2442 return true;
2443 }
2444
2445
2446 /* Function vect_analyze_data_ref_dependences.
2447
2448 Examine all the data references in the loop, and make sure there do not
2449 exist any data dependences between them.
2450
2451 TODO: dependences which distance is greater than the vectorization factor
2452 can be ignored. */
2453
2454 static bool
2455 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2456 {
2457 unsigned int i, j;
2458 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2459 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2461
2462 /* Examine store-store (output) dependences. */
2463
2464 if (vect_debug_details (NULL))
2465 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2466
2467 if (vect_debug_details (NULL))
2468 fprintf (dump_file, "compare all store-store pairs.");
2469
2470 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2471 {
2472 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2473 {
2474 struct data_reference *dra =
2475 VARRAY_GENERIC_PTR (loop_write_refs, i);
2476 struct data_reference *drb =
2477 VARRAY_GENERIC_PTR (loop_write_refs, j);
2478 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2479 return false;
2480 }
2481 }
2482
2483 /* Examine load-store (true/anti) dependences. */
2484
2485 if (vect_debug_details (NULL))
2486 fprintf (dump_file, "compare all load-store pairs.");
2487
2488 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2489 {
2490 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2491 {
2492 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2493 struct data_reference *drb =
2494 VARRAY_GENERIC_PTR (loop_write_refs, j);
2495 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2496 return false;
2497 }
2498 }
2499
2500 return true;
2501 }
2502
2503
2504 /* Function vect_get_first_index.
2505
2506 REF is a data reference.
2507 If it is an ARRAY_REF: if its lower bound is simple enough,
2508 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2509 If it is not an ARRAY_REF: REF has no "first index";
2510 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2511
2512 static bool
2513 vect_get_first_index (tree ref, tree *array_first_index)
2514 {
2515 tree array_start;
2516
2517 if (TREE_CODE (ref) != ARRAY_REF)
2518 *array_first_index = size_zero_node;
2519 else
2520 {
2521 array_start = array_ref_low_bound (ref);
2522 if (!host_integerp (array_start,0))
2523 {
2524 if (vect_debug_details (NULL))
2525 {
2526 fprintf (dump_file, "array min val not simple integer cst.");
2527 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2528 }
2529 return false;
2530 }
2531 *array_first_index = array_start;
2532 }
2533
2534 return true;
2535 }
2536
2537
2538 /* Function vect_compute_array_base_alignment.
2539 A utility function of vect_compute_array_ref_alignment.
2540
2541 Compute the misalignment of ARRAY in bits.
2542
2543 Input:
2544 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
2545 VECTYPE - we are interested in the misalignment modulo the size of vectype.
2546 if NULL: don't compute misalignment, just return the base of ARRAY.
2547 PREV_DIMENSIONS - initialized to one.
2548 MISALIGNMENT - the computed misalignment in bits.
2549
2550 Output:
2551 If VECTYPE is not NULL:
2552 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2553 the base of the array, and put the computed misalignment in MISALIGNMENT.
2554 If VECTYPE is NULL:
2555 Return the base of the array.
2556
2557 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2558 a[idx_N]...[idx_2][idx_1] is
2559 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2560 ... + idx_N * dim_0 * ... * dim_N-1}.
2561 (The misalignment of &a is not checked here).
2562 Note, that every term contains dim_0, therefore, if dim_0 is a
2563 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2564 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2565 NUINTS, we can say that the misalignment of the sum is equal to
2566 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2567 we can't determine this array misalignment, and we return
2568 false.
2569 We proceed recursively in this manner, accumulating total misalignment
2570 and the multiplication of previous dimensions for correct misalignment
2571 calculation. */
2572
2573 static tree
2574 vect_compute_array_base_alignment (tree array,
2575 tree vectype,
2576 tree *prev_dimensions,
2577 tree *misalignment)
2578 {
2579 tree index;
2580 tree domain;
2581 tree dimension_size;
2582 tree mis;
2583 tree bits_per_vectype;
2584 tree bits_per_vectype_unit;
2585
2586 /* The 'stop condition' of the recursion. */
2587 if (TREE_CODE (array) != ARRAY_REF)
2588 return array;
2589
2590 if (!vectype)
2591 /* Just get the base decl. */
2592 return vect_compute_array_base_alignment
2593 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2594
2595 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
2596 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
2597 return NULL_TREE;
2598
2599 domain = TYPE_DOMAIN (TREE_TYPE (array));
2600 dimension_size =
2601 int_const_binop (PLUS_EXPR,
2602 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
2603 TYPE_MIN_VALUE (domain), 1),
2604 size_one_node, 1);
2605
2606 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2607 is a multiple of NUNITS:
2608
2609 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2610 */
2611 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
2612 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
2613 if (integer_zerop (mis))
2614 /* This array is aligned. Continue just in order to get the base decl. */
2615 return vect_compute_array_base_alignment
2616 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2617
2618 index = TREE_OPERAND (array, 1);
2619 if (!host_integerp (index, 1))
2620 /* The current index is not constant. */
2621 return NULL_TREE;
2622
2623 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
2624
2625 bits_per_vectype = fold_convert (unsigned_type_node,
2626 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2627 GET_MODE_SIZE (TYPE_MODE (vectype))));
2628 bits_per_vectype_unit = fold_convert (unsigned_type_node,
2629 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2630 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
2631
2632 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2633 earlier:
2634
2635 *misalignment =
2636 (*misalignment + index_val * dimension_size * *prev_dimensions)
2637 % vectype_nunits;
2638 */
2639
2640 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
2641 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
2642 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
2643 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
2644 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
2645
2646
2647 *prev_dimensions = int_const_binop (MULT_EXPR,
2648 *prev_dimensions, dimension_size, 1);
2649
2650 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
2651 prev_dimensions,
2652 misalignment);
2653 }
2654
2655
2656 /* Function vect_compute_data_ref_alignment
2657
2658 Compute the misalignment of the data reference DR.
2659
2660 Output:
2661 1. If during the misalignment computation it is found that the data reference
2662 cannot be vectorized then false is returned.
2663 2. DR_MISALIGNMENT (DR) is defined.
2664
2665 FOR NOW: No analysis is actually performed. Misalignment is calculated
2666 only for trivial cases. TODO. */
2667
2668 static bool
2669 vect_compute_data_ref_alignment (struct data_reference *dr,
2670 loop_vec_info loop_vinfo)
2671 {
2672 tree stmt = DR_STMT (dr);
2673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2674 tree ref = DR_REF (dr);
2675 tree vectype;
2676 tree scalar_type;
2677 tree offset = size_zero_node;
2678 tree base, bit_offset, alignment;
2679 tree unit_bits = fold_convert (unsigned_type_node,
2680 build_int_cst (NULL_TREE, BITS_PER_UNIT));
2681 tree dr_base;
2682 bool base_aligned_p;
2683
2684 if (vect_debug_details (NULL))
2685 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2686
2687 /* Initialize misalignment to unknown. */
2688 DR_MISALIGNMENT (dr) = -1;
2689
2690 scalar_type = TREE_TYPE (ref);
2691 vectype = get_vectype_for_scalar_type (scalar_type);
2692 if (!vectype)
2693 {
2694 if (vect_debug_details (NULL))
2695 {
2696 fprintf (dump_file, "no vectype for stmt: ");
2697 print_generic_expr (dump_file, stmt, TDF_SLIM);
2698 fprintf (dump_file, " scalar_type: ");
2699 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2700 }
2701 /* It is not possible to vectorize this data reference. */
2702 return false;
2703 }
2704 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
2705
2706 if (TREE_CODE (ref) == ARRAY_REF)
2707 dr_base = ref;
2708 else
2709 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
2710
2711 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
2712 loop_vinfo, &bit_offset, &base_aligned_p);
2713 if (!base)
2714 {
2715 if (vect_debug_details (NULL))
2716 {
2717 fprintf (dump_file, "Unknown alignment for access: ");
2718 print_generic_expr (dump_file,
2719 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
2720 }
2721 return true;
2722 }
2723
2724 if (!base_aligned_p)
2725 {
2726 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
2727 {
2728 if (vect_debug_details (NULL))
2729 {
2730 fprintf (dump_file, "can't force alignment of ref: ");
2731 print_generic_expr (dump_file, ref, TDF_SLIM);
2732 }
2733 return true;
2734 }
2735
2736 /* Force the alignment of the decl.
2737 NOTE: This is the only change to the code we make during
2738 the analysis phase, before deciding to vectorize the loop. */
2739 if (vect_debug_details (NULL))
2740 fprintf (dump_file, "force alignment");
2741 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
2742 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
2743 }
2744
2745 /* At this point we assume that the base is aligned, and the offset from it
2746 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2747 gcc_assert (base_aligned_p
2748 || (TREE_CODE (base) == VAR_DECL
2749 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
2750
2751 /* Convert into bytes. */
2752 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2753 /* Check that there is no remainder in bits. */
2754 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2755 if (!integer_zerop (bit_offset))
2756 {
2757 if (vect_debug_details (NULL))
2758 {
2759 fprintf (dump_file, "bit offset alignment: ");
2760 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2761 }
2762 return false;
2763 }
2764
2765 /* Alignment required, in bytes: */
2766 alignment = fold_convert (unsigned_type_node,
2767 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
2768
2769 /* Modulo alignment. */
2770 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
2771 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
2772 {
2773 if (vect_debug_details (NULL))
2774 fprintf (dump_file, "unexpected misalign value");
2775 return false;
2776 }
2777
2778 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
2779
2780 if (vect_debug_details (NULL))
2781 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
2782
2783 return true;
2784 }
2785
2786
2787 /* Function vect_compute_array_ref_alignment
2788
2789 Compute the alignment of an array-ref.
2790 The alignment we compute here is relative to
2791 TYPE_ALIGN(VECTYPE) boundary.
2792
2793 Output:
2794 OFFSET - the alignment in bits
2795 Return value - the base of the array-ref. E.g,
2796 if the array-ref is a.b[k].c[i][j] the returned
2797 base is a.b[k].c
2798 */
2799
2800 static tree
2801 vect_compute_array_ref_alignment (struct data_reference *dr,
2802 loop_vec_info loop_vinfo,
2803 tree vectype,
2804 tree *offset)
2805 {
2806 tree array_first_index = size_zero_node;
2807 tree init;
2808 tree ref = DR_REF (dr);
2809 tree scalar_type = TREE_TYPE (ref);
2810 tree oprnd0 = TREE_OPERAND (ref, 0);
2811 tree dims = size_one_node;
2812 tree misalign = size_zero_node;
2813 tree next_ref, this_offset = size_zero_node;
2814 tree nunits;
2815 tree nbits;
2816
2817 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
2818 /* The reference is an array without its last index. */
2819 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
2820 else
2821 next_ref =
2822 vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
2823 if (!vectype)
2824 /* Alignment is not requested. Just return the base. */
2825 return next_ref;
2826
2827 /* Compute alignment. */
2828 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
2829 return NULL_TREE;
2830 this_offset = misalign;
2831
2832 /* Check the first index accessed. */
2833 if (!vect_get_first_index (ref, &array_first_index))
2834 {
2835 if (vect_debug_details (NULL))
2836 fprintf (dump_file, "no first_index for array.");
2837 return NULL_TREE;
2838 }
2839
2840 /* Check the index of the array_ref. */
2841 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
2842 LOOP_VINFO_LOOP (loop_vinfo)->num);
2843
2844 /* FORNOW: In order to simplify the handling of alignment, we make sure
2845 that the first location at which the array is accessed ('init') is on an
2846 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2847 This is too conservative, since we require that
2848 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2849 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2850 This should be relaxed in the future. */
2851
2852 if (!init || !host_integerp (init, 0))
2853 {
2854 if (vect_debug_details (NULL))
2855 fprintf (dump_file, "non constant init. ");
2856 return NULL_TREE;
2857 }
2858
2859 /* bytes per scalar element: */
2860 nunits = fold_convert (unsigned_type_node,
2861 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
2862 nbits = int_const_binop (MULT_EXPR, nunits,
2863 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
2864
2865 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
2866 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2867 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
2868 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
2869
2870 /* TODO: allow negative misalign values. */
2871 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
2872 {
2873 if (vect_debug_details (NULL))
2874 fprintf (dump_file, "unexpected misalign value");
2875 return NULL_TREE;
2876 }
2877 *offset = misalign;
2878 return next_ref;
2879 }
2880
2881
2882 /* Function vect_compute_data_refs_alignment
2883
2884 Compute the misalignment of data references in the loop.
2885 This pass may take place at function granularity instead of at loop
2886 granularity.
2887
2888 FOR NOW: No analysis is actually performed. Misalignment is calculated
2889 only for trivial cases. TODO. */
2890
2891 static void
2892 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2893 {
2894 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2895 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2896 unsigned int i;
2897
2898 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2899 {
2900 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2901 vect_compute_data_ref_alignment (dr, loop_vinfo);
2902 }
2903
2904 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2905 {
2906 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2907 vect_compute_data_ref_alignment (dr, loop_vinfo);
2908 }
2909 }
2910
2911
2912 /* Function vect_enhance_data_refs_alignment
2913
2914 This pass will use loop versioning and loop peeling in order to enhance
2915 the alignment of data references in the loop.
2916
2917 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2918 original loop is to be vectorized; Any other loops that are created by
2919 the transformations performed in this pass - are not supposed to be
2920 vectorized. This restriction will be relaxed.
2921
2922 FOR NOW: No transformation is actually performed. TODO. */
2923
2924 static void
2925 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
2926 {
2927 /*
2928 This pass will require a cost model to guide it whether to apply peeling
2929 or versioning or a combination of the two. For example, the scheme that
2930 intel uses when given a loop with several memory accesses, is as follows:
2931 choose one memory access ('p') which alignment you want to force by doing
2932 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2933 other accesses are not necessarily aligned, or (2) use loop versioning to
2934 generate one loop in which all accesses are aligned, and another loop in
2935 which only 'p' is necessarily aligned.
2936
2937 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2938 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2939 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2940
2941 Devising a cost model is the most critical aspect of this work. It will
2942 guide us on which access to peel for, whether to use loop versioning, how
2943 many versions to create, etc. The cost model will probably consist of
2944 generic considerations as well as target specific considerations (on
2945 powerpc for example, misaligned stores are more painful than misaligned
2946 loads).
2947
2948 Here is the general steps involved in alignment enhancements:
2949
2950 -- original loop, before alignment analysis:
2951 for (i=0; i<N; i++){
2952 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2953 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2954 }
2955
2956 -- After vect_compute_data_refs_alignment:
2957 for (i=0; i<N; i++){
2958 x = q[i]; # DR_MISALIGNMENT(q) = 3
2959 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2960 }
2961
2962 -- Possibility 1: we do loop versioning:
2963 if (p is aligned) {
2964 for (i=0; i<N; i++){ # loop 1A
2965 x = q[i]; # DR_MISALIGNMENT(q) = 3
2966 p[i] = y; # DR_MISALIGNMENT(p) = 0
2967 }
2968 }
2969 else {
2970 for (i=0; i<N; i++){ # loop 1B
2971 x = q[i]; # DR_MISALIGNMENT(q) = 3
2972 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2973 }
2974 }
2975
2976 -- Possibility 2: we do loop peeling:
2977 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2978 x = q[i];
2979 p[i] = y;
2980 }
2981 for (i = 3; i < N; i++){ # loop 2A
2982 x = q[i]; # DR_MISALIGNMENT(q) = 0
2983 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2984 }
2985
2986 -- Possibility 3: combination of loop peeling and versioning:
2987 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2988 x = q[i];
2989 p[i] = y;
2990 }
2991 if (p is aligned) {
2992 for (i = 3; i<N; i++){ # loop 3A
2993 x = q[i]; # DR_MISALIGNMENT(q) = 0
2994 p[i] = y; # DR_MISALIGNMENT(p) = 0
2995 }
2996 }
2997 else {
2998 for (i = 3; i<N; i++){ # loop 3B
2999 x = q[i]; # DR_MISALIGNMENT(q) = 0
3000 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
3001 }
3002 }
3003
3004 These loops are later passed to loop_transform to be vectorized. The
3005 vectorizer will use the alignment information to guide the transformation
3006 (whether to generate regular loads/stores, or with special handling for
3007 misalignment).
3008 */
3009 }
3010
3011
3012 /* Function vect_analyze_data_refs_alignment
3013
3014 Analyze the alignment of the data-references in the loop.
3015 FOR NOW: Until support for misliagned accesses is in place, only if all
3016 accesses are aligned can the loop be vectorized. This restriction will be
3017 relaxed. */
3018
3019 static bool
3020 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3021 {
3022 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3023 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
3024
3025 unsigned int i;
3026
3027 if (vect_debug_details (NULL))
3028 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
3029
3030
3031 /* This pass may take place at function granularity instead of at loop
3032 granularity. */
3033
3034 vect_compute_data_refs_alignment (loop_vinfo);
3035
3036
3037 /* This pass will use loop versioning and loop peeling in order to enhance
3038 the alignment of data references in the loop.
3039 FOR NOW: we assume that whatever versioning/peeling took place, the
3040 original loop is to be vectorized. Any other loops that were created by
3041 the transformations performed in this pass - are not supposed to be
3042 vectorized. This restriction will be relaxed. */
3043
3044 vect_enhance_data_refs_alignment (loop_vinfo);
3045
3046
3047 /* Finally, check that loop can be vectorized.
3048 FOR NOW: Until support for misaligned accesses is in place, only if all
3049 accesses are aligned can the loop be vectorized. This restriction will be
3050 relaxed. */
3051
3052 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3053 {
3054 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3055 if (!aligned_access_p (dr))
3056 {
3057 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3058 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3059 fprintf (dump_file, "not vectorized: unaligned store.");
3060 return false;
3061 }
3062 }
3063
3064 /* The vectorizer now supports misaligned loads, so we don't fail anymore
3065 in the presence of a misaligned read dataref. For some targets however
3066 it may be preferable not to vectorize in such a case as misaligned
3067 accesses are very costly. This should be considered in the future. */
3068 /*
3069 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3070 {
3071 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3072 if (!aligned_access_p (dr))
3073 {
3074 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3075 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3076 fprintf (dump_file, "not vectorized: unaligned load.");
3077 return false;
3078 }
3079 }
3080 */
3081
3082 return true;
3083 }
3084
3085
3086 /* Function vect_analyze_data_ref_access.
3087
3088 Analyze the access pattern of the data-reference DR. For now, a data access
3089 has to consecutive and aligned to be considered vectorizable. */
3090
3091 static bool
3092 vect_analyze_data_ref_access (struct data_reference *dr)
3093 {
3094 varray_type access_fns = DR_ACCESS_FNS (dr);
3095 tree access_fn;
3096 tree init, step;
3097 unsigned int dimensions, i;
3098
3099 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
3100 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
3101 access is contiguous). */
3102 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
3103
3104 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
3105 {
3106 access_fn = DR_ACCESS_FN (dr, i);
3107
3108 if (evolution_part_in_loop_num (access_fn,
3109 loop_containing_stmt (DR_STMT (dr))->num))
3110 {
3111 /* Evolution part is not NULL in this loop (it is neither constant nor
3112 invariant). */
3113 if (vect_debug_details (NULL))
3114 {
3115 fprintf (dump_file,
3116 "not vectorized: complicated multidimensional array access.");
3117 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3118 }
3119 return false;
3120 }
3121 }
3122
3123 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
3124 if (!evolution_function_is_constant_p (access_fn)
3125 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
3126 access_fn, &init, &step, true))
3127 {
3128 if (vect_debug_details (NULL))
3129 {
3130 fprintf (dump_file, "not vectorized: too complicated access function.");
3131 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3132 }
3133 return false;
3134 }
3135
3136 return true;
3137 }
3138
3139
3140 /* Function vect_analyze_data_ref_accesses.
3141
3142 Analyze the access pattern of all the data references in the loop.
3143
3144 FORNOW: the only access pattern that is considered vectorizable is a
3145 simple step 1 (consecutive) access.
3146
3147 FORNOW: handle only arrays and pointer accesses. */
3148
3149 static bool
3150 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
3151 {
3152 unsigned int i;
3153 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3154 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3155
3156 if (vect_debug_details (NULL))
3157 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
3158
3159 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3160 {
3161 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3162 bool ok = vect_analyze_data_ref_access (dr);
3163 if (!ok)
3164 {
3165 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3166 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3167 fprintf (dump_file, "not vectorized: complicated access pattern.");
3168 return false;
3169 }
3170 }
3171
3172 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3173 {
3174 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3175 bool ok = vect_analyze_data_ref_access (dr);
3176 if (!ok)
3177 {
3178 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3179 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3180 fprintf (dump_file, "not vectorized: complicated access pattern.");
3181 return false;
3182 }
3183 }
3184
3185 return true;
3186 }
3187
3188
3189 /* Function vect_analyze_pointer_ref_access.
3190
3191 Input:
3192 STMT - a stmt that contains a data-ref
3193 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
3194
3195 If the data-ref access is vectorizable, return a data_reference structure
3196 that represents it (DR). Otherwise - return NULL. */
3197
3198 static struct data_reference *
3199 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
3200 {
3201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3202 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
3203 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
3204 tree init, step;
3205 int step_val;
3206 tree reftype, innertype;
3207 enum machine_mode innermode;
3208 tree indx_access_fn;
3209 int loopnum = loop->num;
3210 struct data_reference *dr;
3211
3212 if (!access_fn)
3213 {
3214 if (vect_debug_stats (loop) || vect_debug_details (loop))
3215 fprintf (dump_file, "not vectorized: complicated pointer access.");
3216 return NULL;
3217 }
3218
3219 if (vect_debug_details (NULL))
3220 {
3221 fprintf (dump_file, "Access function of ptr: ");
3222 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3223 }
3224
3225 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
3226 {
3227 if (vect_debug_stats (loop) || vect_debug_details (loop))
3228 fprintf (dump_file, "not vectorized: pointer access is not simple.");
3229 return NULL;
3230 }
3231
3232 STRIP_NOPS (init);
3233
3234 if (!host_integerp (step,0))
3235 {
3236 if (vect_debug_stats (loop) || vect_debug_details (loop))
3237 fprintf (dump_file,
3238 "not vectorized: non constant step for pointer access.");
3239 return NULL;
3240 }
3241
3242 step_val = TREE_INT_CST_LOW (step);
3243
3244 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
3245 if (TREE_CODE (reftype) != POINTER_TYPE)
3246 {
3247 if (vect_debug_stats (loop) || vect_debug_details (loop))
3248 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3249 return NULL;
3250 }
3251
3252 reftype = TREE_TYPE (init);
3253 if (TREE_CODE (reftype) != POINTER_TYPE)
3254 {
3255 if (vect_debug_stats (loop) || vect_debug_details (loop))
3256 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3257 return NULL;
3258 }
3259
3260 innertype = TREE_TYPE (reftype);
3261 innermode = TYPE_MODE (innertype);
3262 if (GET_MODE_SIZE (innermode) != step_val)
3263 {
3264 /* FORNOW: support only consecutive access */
3265 if (vect_debug_stats (loop) || vect_debug_details (loop))
3266 fprintf (dump_file, "not vectorized: non consecutive access.");
3267 return NULL;
3268 }
3269
3270 indx_access_fn =
3271 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
3272 if (vect_debug_details (NULL))
3273 {
3274 fprintf (dump_file, "Access function of ptr indx: ");
3275 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
3276 }
3277 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
3278 return dr;
3279 }
3280
3281
3282 /* Function vect_get_symbl_and_dr.
3283
3284 The function returns SYMBL - the relevant variable for
3285 memory tag (for aliasing purposes).
3286 Also data reference structure DR is created.
3287
3288 Input:
3289 MEMREF - data reference in STMT
3290 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3291
3292 Output:
3293 DR - data_reference struct for MEMREF
3294 return value - the relevant variable for memory tag (for aliasing purposes).
3295
3296 */
3297
3298 static tree
3299 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
3300 loop_vec_info loop_vinfo, struct data_reference **dr)
3301 {
3302 tree symbl, oprnd0, oprnd1;
3303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3304 tree offset;
3305 tree array_base, base;
3306 struct data_reference *new_dr;
3307 bool base_aligned_p;
3308
3309 *dr = NULL;
3310 switch (TREE_CODE (memref))
3311 {
3312 case INDIRECT_REF:
3313 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
3314 if (! new_dr)
3315 return NULL_TREE;
3316 *dr = new_dr;
3317 symbl = DR_BASE_NAME (new_dr);
3318 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
3319
3320 switch (TREE_CODE (symbl))
3321 {
3322 case PLUS_EXPR:
3323 case MINUS_EXPR:
3324 oprnd0 = TREE_OPERAND (symbl, 0);
3325 oprnd1 = TREE_OPERAND (symbl, 1);
3326
3327 STRIP_NOPS(oprnd1);
3328 /* Only {address_base + offset} expressions are supported,
3329 where address_base can be POINTER_TYPE or ARRAY_TYPE and
3330 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3331 TODO: swap operands if {offset + address_base}. */
3332 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
3333 && TREE_CODE (oprnd1) != INTEGER_CST)
3334 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
3335 return NULL_TREE;
3336
3337 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
3338 symbl = oprnd0;
3339 else
3340 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
3341 loop_vinfo, &new_dr);
3342
3343 case SSA_NAME:
3344 case ADDR_EXPR:
3345 /* symbl remains unchanged. */
3346 break;
3347
3348 default:
3349 if (vect_debug_details (NULL))
3350 {
3351 fprintf (dump_file, "unhandled data ref: ");
3352 print_generic_expr (dump_file, memref, TDF_SLIM);
3353 fprintf (dump_file, " (symbl ");
3354 print_generic_expr (dump_file, symbl, TDF_SLIM);
3355 fprintf (dump_file, ") in stmt ");
3356 print_generic_expr (dump_file, stmt, TDF_SLIM);
3357 }
3358 return NULL_TREE;
3359 }
3360 break;
3361
3362 case ARRAY_REF:
3363 offset = size_zero_node;
3364
3365 /* Store the array base in the stmt info.
3366 For one dimensional array ref a[i], the base is a,
3367 for multidimensional a[i1][i2]..[iN], the base is
3368 a[i1][i2]..[iN-1]. */
3369 array_base = TREE_OPERAND (memref, 0);
3370 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
3371
3372 new_dr = analyze_array (stmt, memref, is_read);
3373 *dr = new_dr;
3374
3375 /* Find the relevant symbol for aliasing purposes. */
3376 base = DR_BASE_NAME (new_dr);
3377 switch (TREE_CODE (base))
3378 {
3379 case VAR_DECL:
3380 symbl = base;
3381 break;
3382
3383 case INDIRECT_REF:
3384 symbl = TREE_OPERAND (base, 0);
3385 break;
3386
3387 case COMPONENT_REF:
3388 /* Could have recorded more accurate information -
3389 i.e, the actual FIELD_DECL that is being referenced -
3390 but later passes expect VAR_DECL as the nmt. */
3391 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
3392 loop_vinfo, &offset, &base_aligned_p);
3393 if (symbl)
3394 break;
3395 /* fall through */
3396 default:
3397 if (vect_debug_details (NULL))
3398 {
3399 fprintf (dump_file, "unhandled struct/class field access ");
3400 print_generic_expr (dump_file, stmt, TDF_SLIM);
3401 }
3402 return NULL_TREE;
3403 }
3404 break;
3405
3406 default:
3407 if (vect_debug_details (NULL))
3408 {
3409 fprintf (dump_file, "unhandled data ref: ");
3410 print_generic_expr (dump_file, memref, TDF_SLIM);
3411 fprintf (dump_file, " in stmt ");
3412 print_generic_expr (dump_file, stmt, TDF_SLIM);
3413 }
3414 return NULL_TREE;
3415 }
3416 return symbl;
3417 }
3418
3419
3420 /* Function vect_analyze_data_refs.
3421
3422 Find all the data references in the loop.
3423
3424 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
3425 which base is really an array (not a pointer) and which alignment
3426 can be forced. This restriction will be relaxed. */
3427
3428 static bool
3429 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3430 {
3431 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3432 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3433 int nbbs = loop->num_nodes;
3434 block_stmt_iterator si;
3435 int j;
3436 struct data_reference *dr;
3437 tree tag;
3438 tree address_base;
3439
3440 if (vect_debug_details (NULL))
3441 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
3442
3443 for (j = 0; j < nbbs; j++)
3444 {
3445 basic_block bb = bbs[j];
3446 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3447 {
3448 bool is_read = false;
3449 tree stmt = bsi_stmt (si);
3450 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3451 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3452 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3453 vuse_optype vuses = STMT_VUSE_OPS (stmt);
3454 varray_type *datarefs = NULL;
3455 int nvuses, nv_may_defs, nv_must_defs;
3456 tree memref = NULL;
3457 tree symbl;
3458
3459 /* Assumption: there exists a data-ref in stmt, if and only if
3460 it has vuses/vdefs. */
3461
3462 if (!vuses && !v_may_defs && !v_must_defs)
3463 continue;
3464
3465 nvuses = NUM_VUSES (vuses);
3466 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
3467 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
3468
3469 if (nvuses && (nv_may_defs || nv_must_defs))
3470 {
3471 if (vect_debug_details (NULL))
3472 {
3473 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
3474 print_generic_expr (dump_file, stmt, TDF_SLIM);
3475 }
3476 return false;
3477 }
3478
3479 if (TREE_CODE (stmt) != MODIFY_EXPR)
3480 {
3481 if (vect_debug_details (NULL))
3482 {
3483 fprintf (dump_file, "unexpected vops in stmt: ");
3484 print_generic_expr (dump_file, stmt, TDF_SLIM);
3485 }
3486 return false;
3487 }
3488
3489 if (vuses)
3490 {
3491 memref = TREE_OPERAND (stmt, 1);
3492 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
3493 is_read = true;
3494 }
3495 else /* vdefs */
3496 {
3497 memref = TREE_OPERAND (stmt, 0);
3498 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
3499 is_read = false;
3500 }
3501
3502 /* Analyze MEMREF. If it is of a supported form, build data_reference
3503 struct for it (DR) and find the relevant symbol for aliasing
3504 purposes. */
3505 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
3506 if (!symbl)
3507 {
3508 if (vect_debug_stats (loop) || vect_debug_details (loop))
3509 {
3510 fprintf (dump_file, "not vectorized: unhandled data ref: ");
3511 print_generic_expr (dump_file, stmt, TDF_SLIM);
3512 }
3513 return false;
3514 }
3515
3516 /* Find and record the memtag assigned to this data-ref. */
3517 switch (TREE_CODE (symbl))
3518 {
3519 case VAR_DECL:
3520 STMT_VINFO_MEMTAG (stmt_info) = symbl;
3521 break;
3522
3523 case SSA_NAME:
3524 symbl = SSA_NAME_VAR (symbl);
3525 tag = get_var_ann (symbl)->type_mem_tag;
3526 if (!tag)
3527 {
3528 tree ptr = TREE_OPERAND (memref, 0);
3529 if (TREE_CODE (ptr) == SSA_NAME)
3530 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
3531 }
3532 if (!tag)
3533 {
3534 if (vect_debug_stats (loop) || vect_debug_details (loop))
3535 fprintf (dump_file, "not vectorized: no memtag for ref.");
3536 return false;
3537 }
3538 STMT_VINFO_MEMTAG (stmt_info) = tag;
3539 break;
3540
3541 case ADDR_EXPR:
3542 address_base = TREE_OPERAND (symbl, 0);
3543
3544 switch (TREE_CODE (address_base))
3545 {
3546 case ARRAY_REF:
3547 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
3548 STMT_VINFO_MEMTAG (stmt_info) = DR_BASE_NAME (dr);
3549 break;
3550
3551 case VAR_DECL:
3552 STMT_VINFO_MEMTAG (stmt_info) = address_base;
3553 break;
3554
3555 default:
3556 if (vect_debug_stats (loop) || vect_debug_details (loop))
3557 {
3558 fprintf (dump_file, "not vectorized: unhandled address expression: ");
3559 print_generic_expr (dump_file, stmt, TDF_SLIM);
3560 }
3561 return false;
3562 }
3563 break;
3564
3565 default:
3566 if (vect_debug_stats (loop) || vect_debug_details (loop))
3567 {
3568 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
3569 print_generic_expr (dump_file, memref, TDF_SLIM);
3570 }
3571 return false;
3572 }
3573
3574 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
3575 STMT_VINFO_DATA_REF (stmt_info) = dr;
3576 }
3577 }
3578
3579 return true;
3580 }
3581
3582
3583 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3584
3585 /* Function vect_mark_relevant.
3586
3587 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3588
3589 static void
3590 vect_mark_relevant (varray_type worklist, tree stmt)
3591 {
3592 stmt_vec_info stmt_info;
3593
3594 if (vect_debug_details (NULL))
3595 fprintf (dump_file, "mark relevant.");
3596
3597 if (TREE_CODE (stmt) == PHI_NODE)
3598 {
3599 VARRAY_PUSH_TREE (worklist, stmt);
3600 return;
3601 }
3602
3603 stmt_info = vinfo_for_stmt (stmt);
3604
3605 if (!stmt_info)
3606 {
3607 if (vect_debug_details (NULL))
3608 {
3609 fprintf (dump_file, "mark relevant: no stmt info!!.");
3610 print_generic_expr (dump_file, stmt, TDF_SLIM);
3611 }
3612 return;
3613 }
3614
3615 if (STMT_VINFO_RELEVANT_P (stmt_info))
3616 {
3617 if (vect_debug_details (NULL))
3618 fprintf (dump_file, "already marked relevant.");
3619 return;
3620 }
3621
3622 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
3623 VARRAY_PUSH_TREE (worklist, stmt);
3624 }
3625
3626
3627 /* Function vect_stmt_relevant_p.
3628
3629 Return true if STMT in loop that is represented by LOOP_VINFO is
3630 "relevant for vectorization".
3631
3632 A stmt is considered "relevant for vectorization" if:
3633 - it has uses outside the loop.
3634 - it has vdefs (it alters memory).
3635 - control stmts in the loop (except for the exit condition).
3636
3637 CHECKME: what other side effects would the vectorizer allow? */
3638
3639 static bool
3640 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
3641 {
3642 v_may_def_optype v_may_defs;
3643 v_must_def_optype v_must_defs;
3644 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3645 int i;
3646 dataflow_t df;
3647 int num_uses;
3648
3649 /* cond stmt other than loop exit cond. */
3650 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
3651 return true;
3652
3653 /* changing memory. */
3654 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3655 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3656 if (v_may_defs || v_must_defs)
3657 {
3658 if (vect_debug_details (NULL))
3659 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
3660 return true;
3661 }
3662
3663 /* uses outside the loop. */
3664 df = get_immediate_uses (stmt);
3665 num_uses = num_immediate_uses (df);
3666 for (i = 0; i < num_uses; i++)
3667 {
3668 tree use = immediate_use (df, i);
3669 basic_block bb = bb_for_stmt (use);
3670 if (!flow_bb_inside_loop_p (loop, bb))
3671 {
3672 if (vect_debug_details (NULL))
3673 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
3674 return true;
3675 }
3676 }
3677
3678 return false;
3679 }
3680
3681
3682 /* Function vect_mark_stmts_to_be_vectorized.
3683
3684 Not all stmts in the loop need to be vectorized. For example:
3685
3686 for i...
3687 for j...
3688 1. T0 = i + j
3689 2. T1 = a[T0]
3690
3691 3. j = j + 1
3692
3693 Stmt 1 and 3 do not need to be vectorized, because loop control and
3694 addressing of vectorized data-refs are handled differently.
3695
3696 This pass detects such stmts. */
3697
3698 static bool
3699 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3700 {
3701 varray_type worklist;
3702 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3703 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3704 unsigned int nbbs = loop->num_nodes;
3705 block_stmt_iterator si;
3706 tree stmt;
3707 stmt_ann_t ann;
3708 unsigned int i;
3709 int j;
3710 use_optype use_ops;
3711 stmt_vec_info stmt_info;
3712
3713 if (vect_debug_details (NULL))
3714 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3715
3716 VARRAY_TREE_INIT (worklist, 64, "work list");
3717
3718 /* 1. Init worklist. */
3719
3720 for (i = 0; i < nbbs; i++)
3721 {
3722 basic_block bb = bbs[i];
3723 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3724 {
3725 stmt = bsi_stmt (si);
3726
3727 if (vect_debug_details (NULL))
3728 {
3729 fprintf (dump_file, "init: stmt relevant? ");
3730 print_generic_expr (dump_file, stmt, TDF_SLIM);
3731 }
3732
3733 stmt_info = vinfo_for_stmt (stmt);
3734 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3735
3736 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3737 vect_mark_relevant (worklist, stmt);
3738 }
3739 }
3740
3741
3742 /* 2. Process_worklist */
3743
3744 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3745 {
3746 stmt = VARRAY_TOP_TREE (worklist);
3747 VARRAY_POP (worklist);
3748
3749 if (vect_debug_details (NULL))
3750 {
3751 fprintf (dump_file, "worklist: examine stmt: ");
3752 print_generic_expr (dump_file, stmt, TDF_SLIM);
3753 }
3754
3755 /* Examine the USES in this statement. Mark all the statements which
3756 feed this statement's uses as "relevant", unless the USE is used as
3757 an array index. */
3758
3759 if (TREE_CODE (stmt) == PHI_NODE)
3760 {
3761 /* follow the def-use chain inside the loop. */
3762 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3763 {
3764 tree arg = PHI_ARG_DEF (stmt, j);
3765 tree def_stmt = NULL_TREE;
3766 basic_block bb;
3767 if (!vect_is_simple_use (arg, loop, &def_stmt))
3768 {
3769 if (vect_debug_details (NULL))
3770 fprintf (dump_file, "worklist: unsupported use.");
3771 varray_clear (worklist);
3772 return false;
3773 }
3774 if (!def_stmt)
3775 continue;
3776
3777 if (vect_debug_details (NULL))
3778 {
3779 fprintf (dump_file, "worklist: def_stmt: ");
3780 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3781 }
3782
3783 bb = bb_for_stmt (def_stmt);
3784 if (flow_bb_inside_loop_p (loop, bb))
3785 vect_mark_relevant (worklist, def_stmt);
3786 }
3787 }
3788
3789 ann = stmt_ann (stmt);
3790 use_ops = USE_OPS (ann);
3791
3792 for (i = 0; i < NUM_USES (use_ops); i++)
3793 {
3794 tree use = USE_OP (use_ops, i);
3795
3796 /* We are only interested in uses that need to be vectorized. Uses
3797 that are used for address computation are not considered relevant.
3798 */
3799 if (exist_non_indexing_operands_for_use_p (use, stmt))
3800 {
3801 tree def_stmt = NULL_TREE;
3802 basic_block bb;
3803 if (!vect_is_simple_use (use, loop, &def_stmt))
3804 {
3805 if (vect_debug_details (NULL))
3806 fprintf (dump_file, "worklist: unsupported use.");
3807 varray_clear (worklist);
3808 return false;
3809 }
3810
3811 if (!def_stmt)
3812 continue;
3813
3814 if (vect_debug_details (NULL))
3815 {
3816 fprintf (dump_file, "worklist: examine use %d: ", i);
3817 print_generic_expr (dump_file, use, TDF_SLIM);
3818 }
3819
3820 bb = bb_for_stmt (def_stmt);
3821 if (flow_bb_inside_loop_p (loop, bb))
3822 vect_mark_relevant (worklist, def_stmt);
3823 }
3824 }
3825 } /* while worklist */
3826
3827 varray_clear (worklist);
3828 return true;
3829 }
3830
3831
3832 /* Function vect_get_loop_niters.
3833
3834 Determine how many iterations the loop is executed. */
3835
3836 static tree
3837 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3838 {
3839 tree niters;
3840
3841 if (vect_debug_details (NULL))
3842 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3843
3844 niters = number_of_iterations_in_loop (loop);
3845
3846 if (niters != NULL_TREE
3847 && niters != chrec_dont_know
3848 && host_integerp (niters,0))
3849 {
3850 *number_of_iterations = TREE_INT_CST_LOW (niters);
3851
3852 if (vect_debug_details (NULL))
3853 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3854 *number_of_iterations);
3855 }
3856
3857 return get_loop_exit_condition (loop);
3858 }
3859
3860
3861 /* Function vect_analyze_loop_form.
3862
3863 Verify the following restrictions (some may be relaxed in the future):
3864 - it's an inner-most loop
3865 - number of BBs = 2 (which are the loop header and the latch)
3866 - the loop has a pre-header
3867 - the loop has a single entry and exit
3868 - the loop exit condition is simple enough, and the number of iterations
3869 can be analyzed (a countable loop). */
3870
3871 static loop_vec_info
3872 vect_analyze_loop_form (struct loop *loop)
3873 {
3874 loop_vec_info loop_vinfo;
3875 tree loop_cond;
3876 HOST_WIDE_INT number_of_iterations = -1;
3877
3878 if (vect_debug_details (loop))
3879 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3880
3881 if (loop->inner
3882 || !loop->single_exit
3883 || loop->num_nodes != 2)
3884 {
3885 if (vect_debug_stats (loop) || vect_debug_details (loop))
3886 {
3887 fprintf (dump_file, "not vectorized: bad loop form. ");
3888 if (loop->inner)
3889 fprintf (dump_file, "nested loop.");
3890 else if (!loop->single_exit)
3891 fprintf (dump_file, "multiple exits.");
3892 else if (loop->num_nodes != 2)
3893 fprintf (dump_file, "too many BBs in loop.");
3894 }
3895
3896 return NULL;
3897 }
3898
3899 /* We assume that the loop exit condition is at the end of the loop. i.e,
3900 that the loop is represented as a do-while (with a proper if-guard
3901 before the loop if needed), where the loop header contains all the
3902 executable statements, and the latch is empty. */
3903 if (!empty_block_p (loop->latch))
3904 {
3905 if (vect_debug_stats (loop) || vect_debug_details (loop))
3906 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3907 return NULL;
3908 }
3909
3910 if (empty_block_p (loop->header))
3911 {
3912 if (vect_debug_stats (loop) || vect_debug_details (loop))
3913 fprintf (dump_file, "not vectorized: empty loop.");
3914 return NULL;
3915 }
3916
3917 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3918 if (!loop_cond)
3919 {
3920 if (vect_debug_stats (loop) || vect_debug_details (loop))
3921 fprintf (dump_file, "not vectorized: complicated exit condition.");
3922 return NULL;
3923 }
3924
3925 if (number_of_iterations < 0)
3926 {
3927 if (vect_debug_stats (loop) || vect_debug_details (loop))
3928 fprintf (dump_file, "not vectorized: unknown loop bound.");
3929 return NULL;
3930 }
3931
3932 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3933 {
3934 if (vect_debug_stats (loop) || vect_debug_details (loop))
3935 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3936 return NULL;
3937 }
3938
3939 loop_vinfo = new_loop_vec_info (loop);
3940 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3941 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3942
3943 return loop_vinfo;
3944 }
3945
3946
3947 /* Function vect_analyze_loop.
3948
3949 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3950 for it. The different analyses will record information in the
3951 loop_vec_info struct. */
3952
3953 static loop_vec_info
3954 vect_analyze_loop (struct loop *loop)
3955 {
3956 bool ok;
3957 loop_vec_info loop_vinfo;
3958
3959 if (vect_debug_details (NULL))
3960 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3961
3962 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3963
3964 loop_vinfo = vect_analyze_loop_form (loop);
3965 if (!loop_vinfo)
3966 {
3967 if (vect_debug_details (loop))
3968 fprintf (dump_file, "bad loop form.");
3969 return NULL;
3970 }
3971
3972 /* Find all data references in the loop (which correspond to vdefs/vuses)
3973 and analyze their evolution in the loop.
3974
3975 FORNOW: Handle only simple, array references, which
3976 alignment can be forced, and aligned pointer-references. */
3977
3978 ok = vect_analyze_data_refs (loop_vinfo);
3979 if (!ok)
3980 {
3981 if (vect_debug_details (loop))
3982 fprintf (dump_file, "bad data references.");
3983 destroy_loop_vec_info (loop_vinfo);
3984 return NULL;
3985 }
3986
3987 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3988
3989 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3990 if (!ok)
3991 {
3992 if (vect_debug_details (loop))
3993 fprintf (dump_file, "unexpected pattern.");
3994 if (vect_debug_details (loop))
3995 fprintf (dump_file, "not vectorized: unexpected pattern.");
3996 destroy_loop_vec_info (loop_vinfo);
3997 return NULL;
3998 }
3999
4000 /* Check that all cross-iteration scalar data-flow cycles are OK.
4001 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4002
4003 ok = vect_analyze_scalar_cycles (loop_vinfo);
4004 if (!ok)
4005 {
4006 if (vect_debug_details (loop))
4007 fprintf (dump_file, "bad scalar cycle.");
4008 destroy_loop_vec_info (loop_vinfo);
4009 return NULL;
4010 }
4011
4012 /* Analyze data dependences between the data-refs in the loop.
4013 FORNOW: fail at the first data dependence that we encounter. */
4014
4015 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4016 if (!ok)
4017 {
4018 if (vect_debug_details (loop))
4019 fprintf (dump_file, "bad data dependence.");
4020 destroy_loop_vec_info (loop_vinfo);
4021 return NULL;
4022 }
4023
4024 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4025 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4026
4027 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4028 if (!ok)
4029 {
4030 if (vect_debug_details (loop))
4031 fprintf (dump_file, "bad data access.");
4032 destroy_loop_vec_info (loop_vinfo);
4033 return NULL;
4034 }
4035
4036 /* Analyze the alignment of the data-refs in the loop.
4037 FORNOW: Only aligned accesses are handled. */
4038
4039 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4040 if (!ok)
4041 {
4042 if (vect_debug_details (loop))
4043 fprintf (dump_file, "bad data alignment.");
4044 destroy_loop_vec_info (loop_vinfo);
4045 return NULL;
4046 }
4047
4048 /* Scan all the operations in the loop and make sure they are
4049 vectorizable. */
4050
4051 ok = vect_analyze_operations (loop_vinfo);
4052 if (!ok)
4053 {
4054 if (vect_debug_details (loop))
4055 fprintf (dump_file, "bad operation or unsupported loop bound.");
4056 destroy_loop_vec_info (loop_vinfo);
4057 return NULL;
4058 }
4059
4060 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4061
4062 return loop_vinfo;
4063 }
4064
4065
4066 /* Function need_imm_uses_for.
4067
4068 Return whether we ought to include information for 'var'
4069 when calculating immediate uses. For this pass we only want use
4070 information for non-virtual variables. */
4071
4072 static bool
4073 need_imm_uses_for (tree var)
4074 {
4075 return is_gimple_reg (var);
4076 }
4077
4078
4079 /* Function vectorize_loops.
4080
4081 Entry Point to loop vectorization phase. */
4082
4083 void
4084 vectorize_loops (struct loops *loops)
4085 {
4086 unsigned int i, loops_num;
4087 unsigned int num_vectorized_loops = 0;
4088
4089 /* Does the target support SIMD? */
4090 /* FORNOW: until more sophisticated machine modelling is in place. */
4091 if (!UNITS_PER_SIMD_WORD)
4092 {
4093 if (vect_debug_details (NULL))
4094 fprintf (dump_file, "vectorizer: target vector size is not defined.");
4095 return;
4096 }
4097
4098 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
4099
4100 /* ----------- Analyze loops. ----------- */
4101
4102 /* If some loop was duplicated, it gets bigger number
4103 than all previously defined loops. This fact allows us to run
4104 only over initial loops skipping newly generated ones. */
4105 loops_num = loops->num;
4106 for (i = 1; i < loops_num; i++)
4107 {
4108 loop_vec_info loop_vinfo;
4109 struct loop *loop = loops->parray[i];
4110
4111 if (!loop)
4112 continue;
4113
4114 loop_vinfo = vect_analyze_loop (loop);
4115 loop->aux = loop_vinfo;
4116
4117 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
4118 continue;
4119
4120 vect_transform_loop (loop_vinfo, loops);
4121 num_vectorized_loops++;
4122 }
4123
4124 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
4125 fprintf (dump_file, "\nvectorized %u loops in function.\n",
4126 num_vectorized_loops);
4127
4128 /* ----------- Finalize. ----------- */
4129
4130 free_df ();
4131 for (i = 1; i < loops_num; i++)
4132 {
4133 struct loop *loop = loops->parray[i];
4134 loop_vec_info loop_vinfo;
4135
4136 if (!loop)
4137 continue;
4138 loop_vinfo = loop->aux;
4139 destroy_loop_vec_info (loop_vinfo);
4140 loop->aux = NULL;
4141 }
4142
4143 rewrite_into_ssa (false);
4144 if (bitmap_first_set_bit (vars_to_rename) >= 0)
4145 {
4146 /* The rewrite of ssa names may cause violation of loop closed ssa
4147 form invariants. TODO -- avoid these rewrites completely.
4148 Information in virtual phi nodes is sufficient for it. */
4149 rewrite_into_loop_closed_ssa ();
4150 }
4151 bitmap_clear (vars_to_rename);
4152 }