2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "tree-vectorizer.h"
146 #include "tree-pass.h"
148 /* Main analysis functions. */
149 static loop_vec_info
vect_analyze_loop (struct loop
*);
150 static loop_vec_info
vect_analyze_loop_form (struct loop
*);
151 static bool vect_analyze_data_refs (loop_vec_info
);
152 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info
);
153 static bool vect_analyze_scalar_cycles (loop_vec_info
);
154 static bool vect_analyze_data_ref_accesses (loop_vec_info
);
155 static bool vect_analyze_data_refs_alignment (loop_vec_info
);
156 static void vect_compute_data_refs_alignment (loop_vec_info
);
157 static bool vect_analyze_operations (loop_vec_info
);
159 /* Main code transformation functions. */
160 static void vect_transform_loop (loop_vec_info
, struct loops
*);
161 static void vect_transform_loop_bound (loop_vec_info
);
162 static bool vect_transform_stmt (tree
, block_stmt_iterator
*);
163 static bool vectorizable_load (tree
, block_stmt_iterator
*, tree
*);
164 static bool vectorizable_store (tree
, block_stmt_iterator
*, tree
*);
165 static bool vectorizable_operation (tree
, block_stmt_iterator
*, tree
*);
166 static bool vectorizable_assignment (tree
, block_stmt_iterator
*, tree
*);
167 static void vect_align_data_ref (tree
);
168 static void vect_enhance_data_refs_alignment (loop_vec_info
);
170 /* Utility functions for the analyses. */
171 static bool vect_is_simple_use (tree
, struct loop
*, tree
*);
172 static bool exist_non_indexing_operands_for_use_p (tree
, tree
);
173 static bool vect_is_simple_iv_evolution (unsigned, tree
, tree
*, tree
*, bool);
174 static void vect_mark_relevant (varray_type
, tree
);
175 static bool vect_stmt_relevant_p (tree
, loop_vec_info
);
176 static tree
vect_get_loop_niters (struct loop
*, HOST_WIDE_INT
*);
177 static bool vect_compute_data_ref_alignment
178 (struct data_reference
*, loop_vec_info
);
179 static bool vect_analyze_data_ref_access (struct data_reference
*);
180 static bool vect_get_first_index (tree
, tree
*);
181 static bool vect_can_force_dr_alignment_p (tree
, unsigned int);
182 static struct data_reference
* vect_analyze_pointer_ref_access
184 static tree vect_get_base_and_bit_offset
185 (struct data_reference
*, tree
, tree
, loop_vec_info
, tree
*, bool*);
186 static struct data_reference
* vect_analyze_pointer_ref_access
188 static tree
vect_compute_array_base_alignment (tree
, tree
, tree
*, tree
*);
189 static tree vect_compute_array_ref_alignment
190 (struct data_reference
*, loop_vec_info
, tree
, tree
*);
191 static tree
vect_get_ptr_offset (tree
, tree
, tree
*);
192 static tree vect_get_symbl_and_dr
193 (tree
, tree
, bool, loop_vec_info
, struct data_reference
**);
195 /* Utility functions for the code transformation. */
196 static tree
vect_create_destination_var (tree
, tree
);
197 static tree vect_create_data_ref_ptr
198 (tree
, block_stmt_iterator
*, tree
, tree
*, bool);
199 static tree vect_create_index_for_vector_ref
200 (struct loop
*, block_stmt_iterator
*);
201 static tree
vect_create_addr_base_for_vector_ref (tree
, tree
*, tree
);
202 static tree
get_vectype_for_scalar_type (tree
);
203 static tree
vect_get_new_vect_var (tree
, enum vect_var_kind
, const char *);
204 static tree
vect_get_vec_def_for_operand (tree
, tree
);
205 static tree
vect_init_vector (tree
, tree
);
206 static void vect_finish_stmt_generation
207 (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
);
209 /* Utilities for creation and deletion of vec_info structs. */
210 loop_vec_info
new_loop_vec_info (struct loop
*loop
);
211 void destroy_loop_vec_info (loop_vec_info
);
212 stmt_vec_info
new_stmt_vec_info (tree stmt
, struct loop
*loop
);
214 static bool vect_debug_stats (struct loop
*loop
);
215 static bool vect_debug_details (struct loop
*loop
);
218 /* Function new_stmt_vec_info.
220 Create and initialize a new stmt_vec_info struct for STMT. */
223 new_stmt_vec_info (tree stmt
, struct loop
*loop
)
226 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
228 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
229 STMT_VINFO_STMT (res
) = stmt
;
230 STMT_VINFO_LOOP (res
) = loop
;
231 STMT_VINFO_RELEVANT_P (res
) = 0;
232 STMT_VINFO_VECTYPE (res
) = NULL
;
233 STMT_VINFO_VEC_STMT (res
) = NULL
;
234 STMT_VINFO_DATA_REF (res
) = NULL
;
235 STMT_VINFO_MEMTAG (res
) = NULL
;
236 STMT_VINFO_VECT_DR_BASE (res
) = NULL
;
242 /* Function new_loop_vec_info.
244 Create and initialize a new loop_vec_info struct for LOOP, as well as
245 stmt_vec_info structs for all the stmts in LOOP. */
248 new_loop_vec_info (struct loop
*loop
)
252 block_stmt_iterator si
;
255 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
257 bbs
= get_loop_body (loop
);
259 /* Create stmt_info for all stmts in the loop. */
260 for (i
= 0; i
< loop
->num_nodes
; i
++)
262 basic_block bb
= bbs
[i
];
263 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
265 tree stmt
= bsi_stmt (si
);
268 get_stmt_operands (stmt
);
269 ann
= stmt_ann (stmt
);
270 set_stmt_info (ann
, new_stmt_vec_info (stmt
, loop
));
274 LOOP_VINFO_LOOP (res
) = loop
;
275 LOOP_VINFO_BBS (res
) = bbs
;
276 LOOP_VINFO_EXIT_COND (res
) = NULL
;
277 LOOP_VINFO_NITERS (res
) = -1;
278 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
279 LOOP_VINFO_VECT_FACTOR (res
) = 0;
280 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res
), 20,
281 "loop_write_datarefs");
282 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res
), 20,
283 "loop_read_datarefs");
288 /* Function destroy_loop_vec_info.
290 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
291 stmts in the loop. */
294 destroy_loop_vec_info (loop_vec_info loop_vinfo
)
299 block_stmt_iterator si
;
305 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
307 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
308 nbbs
= loop
->num_nodes
;
310 for (j
= 0; j
< nbbs
; j
++)
312 basic_block bb
= bbs
[j
];
313 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
315 tree stmt
= bsi_stmt (si
);
316 stmt_ann_t ann
= stmt_ann (stmt
);
317 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
319 set_stmt_info (ann
, NULL
);
323 free (LOOP_VINFO_BBS (loop_vinfo
));
324 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
325 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo
));
331 /* Function debug_loop_stats.
333 For vectorization statistics dumps. */
336 vect_debug_stats (struct loop
*loop
)
339 block_stmt_iterator si
;
340 tree node
= NULL_TREE
;
342 if (!dump_file
|| !(dump_flags
& TDF_STATS
))
347 fprintf (dump_file
, "\n");
356 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
358 node
= bsi_stmt (si
);
359 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
))
363 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
)
364 && EXPR_FILENAME (node
) && EXPR_LINENO (node
))
366 fprintf (dump_file
, "\nloop at %s:%d: ",
367 EXPR_FILENAME (node
), EXPR_LINENO (node
));
375 /* Function debug_loop_details.
377 For vectorization debug dumps. */
380 vect_debug_details (struct loop
*loop
)
383 block_stmt_iterator si
;
384 tree node
= NULL_TREE
;
386 if (!dump_file
|| !(dump_flags
& TDF_DETAILS
))
391 fprintf (dump_file
, "\n");
400 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
402 node
= bsi_stmt (si
);
403 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
))
407 if (node
&& EXPR_P (node
) && EXPR_LOCUS (node
)
408 && EXPR_FILENAME (node
) && EXPR_LINENO (node
))
410 fprintf (dump_file
, "\nloop at %s:%d: ",
411 EXPR_FILENAME (node
), EXPR_LINENO (node
));
419 /* Function vect_get_ptr_offset
421 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
424 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED
,
425 tree vectype ATTRIBUTE_UNUSED
,
426 tree
*offset ATTRIBUTE_UNUSED
)
428 /* TODO: Use alignment information. */
433 /* Function vect_get_base_and_bit_offset
435 Return the BASE of the data reference EXPR.
436 If VECTYPE is given, also compute the OFFSET from BASE in bits.
437 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
438 bits of 'a.b[i] + 4B' from a.
441 EXPR - the memory reference that is being analyzed
442 DR - the data_reference struct of the _original_ memory reference
443 (Note: DR_REF (DR) is not necessarily EXPR)
444 VECTYPE - the type that defines the alignment (i.e, we compute
445 alignment relative to TYPE_ALIGN(VECTYPE))
448 BASE (returned value) - the base of the data reference EXPR.
449 E.g, if EXPR is a.b[k].c[i][j] the returned
451 OFFSET - offset of EXPR from BASE in bits
452 BASE_ALIGNED_P - indicates if BASE is aligned
454 If something unexpected is encountered (an unsupported form of data-ref),
455 or if VECTYPE is given but OFFSET cannot be determined:
456 then NULL_TREE is returned. */
459 vect_get_base_and_bit_offset (struct data_reference
*dr
,
462 loop_vec_info loop_vinfo
,
464 bool *base_aligned_p
)
466 tree this_offset
= size_zero_node
;
467 tree base
= NULL_TREE
;
470 struct data_reference
*array_dr
;
471 enum tree_code code
= TREE_CODE (expr
);
473 *base_aligned_p
= false;
477 /* These cases end the recursion: */
479 *offset
= size_zero_node
;
480 if (vectype
&& DECL_ALIGN (expr
) >= TYPE_ALIGN (vectype
))
481 *base_aligned_p
= true;
488 if (TREE_CODE (TREE_TYPE (expr
)) != POINTER_TYPE
)
491 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr
))) < TYPE_ALIGN (vectype
))
493 base
= vect_get_ptr_offset (expr
, vectype
, offset
);
495 *base_aligned_p
= true;
499 *base_aligned_p
= true;
500 *offset
= size_zero_node
;
506 *offset
= int_const_binop (MULT_EXPR
, expr
,
507 build_int_cst (NULL_TREE
, BITS_PER_UNIT
), 1);
510 /* These cases continue the recursion: */
512 oprnd0
= TREE_OPERAND (expr
, 0);
513 oprnd1
= TREE_OPERAND (expr
, 1);
515 this_offset
= bit_position (oprnd1
);
516 if (vectype
&& !host_integerp (this_offset
, 1))
522 oprnd0
= TREE_OPERAND (expr
, 0);
527 oprnd0
= TREE_OPERAND (expr
, 0);
532 if (DR_REF (dr
) != expr
)
533 /* Build array data_reference struct if the existing DR_REF
534 doesn't match EXPR. This happens, for example, when the
535 EXPR is *T and T is initialized to &arr[indx]. The DR struct
536 contains information on the access of T, not of arr. In order
537 to continue the analysis, we create a new DR struct that
538 describes the access of arr.
540 array_dr
= analyze_array (DR_STMT (dr
), expr
, DR_IS_READ (dr
));
544 next_ref
= vect_compute_array_ref_alignment (array_dr
, loop_vinfo
,
545 vectype
, &this_offset
);
550 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref
))) >= TYPE_ALIGN (vectype
))
552 *offset
= this_offset
;
553 *base_aligned_p
= true;
560 /* In case we have a PLUS_EXPR of the form
561 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
562 This is verified in vect_get_symbl_and_dr. */
563 oprnd0
= TREE_OPERAND (expr
, 0);
564 oprnd1
= TREE_OPERAND (expr
, 1);
566 base
= vect_get_base_and_bit_offset
567 (dr
, oprnd1
, vectype
, loop_vinfo
, &this_offset
, base_aligned_p
);
568 if (vectype
&& !base
)
578 base
= vect_get_base_and_bit_offset (dr
, next_ref
, vectype
,
579 loop_vinfo
, offset
, base_aligned_p
);
583 *offset
= int_const_binop (PLUS_EXPR
, *offset
, this_offset
, 1);
584 if (!host_integerp (*offset
, 1) || TREE_OVERFLOW (*offset
))
587 if (vect_debug_details (NULL
))
589 print_generic_expr (dump_file
, expr
, TDF_SLIM
);
590 fprintf (dump_file
, " --> total offset for ref: ");
591 print_generic_expr (dump_file
, *offset
, TDF_SLIM
);
599 /* Function vect_force_dr_alignment_p.
601 Returns whether the alignment of a DECL can be forced to be aligned
602 on ALIGNMENT bit boundary. */
605 vect_can_force_dr_alignment_p (tree decl
, unsigned int alignment
)
607 if (TREE_CODE (decl
) != VAR_DECL
)
610 if (DECL_EXTERNAL (decl
))
613 if (TREE_STATIC (decl
))
614 return (alignment
<= MAX_OFILE_ALIGNMENT
);
616 /* This is not 100% correct. The absolute correct stack alignment
617 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
618 PREFERRED_STACK_BOUNDARY is honored by all translation units.
619 However, until someone implements forced stack alignment, SSE
620 isn't really usable without this. */
621 return (alignment
<= PREFERRED_STACK_BOUNDARY
);
625 /* Function vect_get_new_vect_var.
627 Returns a name for a new variable. The current naming scheme appends the
628 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
629 the name of vectorizer generated variables, and appends that to NAME if
633 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
639 if (var_kind
== vect_simple_var
)
644 prefix_len
= strlen (prefix
);
647 new_vect_var
= create_tmp_var (type
, concat (prefix
, name
, NULL
));
649 new_vect_var
= create_tmp_var (type
, prefix
);
655 /* Function vect_create_index_for_vector_ref.
657 Create (and return) an index variable, along with it's update chain in the
658 loop. This variable will be used to access a memory location in a vector
662 LOOP: The loop being vectorized.
663 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
664 function can be added here, or in the loop pre-header.
667 Return an index that will be used to index a vector array. It is expected
668 that a pointer to the first vector will be used as the base address for the
671 FORNOW: we are not trying to be efficient, just creating a new index each
672 time from scratch. At this time all vector references could use the same
675 TODO: create only one index to be used by all vector references. Record
676 the index in the LOOP_VINFO the first time this procedure is called and
677 return it on subsequent calls. The increment of this index must be placed
678 just before the conditional expression that ends the single block loop. */
681 vect_create_index_for_vector_ref (struct loop
*loop
, block_stmt_iterator
*bsi
)
684 tree indx_before_incr
, indx_after_incr
;
686 /* It is assumed that the base pointer used for vectorized access contains
687 the address of the first vector. Therefore the index used for vectorized
688 access must be initialized to zero and incremented by 1. */
690 init
= integer_zero_node
;
691 step
= integer_one_node
;
693 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
694 create_iv (init
, step
, NULL_TREE
, loop
, bsi
, false,
695 &indx_before_incr
, &indx_after_incr
);
697 return indx_before_incr
;
701 /* Function vect_create_addr_base_for_vector_ref.
703 Create an expression that computes the address of the first memory location
704 that will be accessed for a data reference.
707 STMT: The statement containing the data reference.
708 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
709 OFFSET: Optional. If supplied, it is be added to the initial address.
712 1. Return an SSA_NAME whose value is the address of the memory location of the
713 first vector of the data reference.
714 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
715 these statement(s) which define the returned SSA_NAME.
717 FORNOW: We are only handling array accesses with step 1. */
720 vect_create_addr_base_for_vector_ref (tree stmt
,
724 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
725 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
726 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
727 tree data_ref_base
= unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info
));
728 tree base_name
= unshare_expr (DR_BASE_NAME (dr
));
729 tree ref
= DR_REF (dr
);
730 tree data_ref_base_type
= TREE_TYPE (data_ref_base
);
731 tree scalar_type
= TREE_TYPE (ref
);
732 tree scalar_ptr_type
= build_pointer_type (scalar_type
);
734 tree init_val
, step
, init_oval
;
736 bool is_ptr_ref
, is_array_ref
, is_addr_expr
;
741 tree addr_base
, addr_expr
;
744 /* Only the access function of the last index is relevant (i_n in
745 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
746 access_fn
= DR_ACCESS_FN (dr
, 0);
747 ok
= vect_is_simple_iv_evolution (loop
->num
, access_fn
, &init_oval
, &step
, true);
749 init_oval
= integer_zero_node
;
751 is_ptr_ref
= TREE_CODE (data_ref_base_type
) == POINTER_TYPE
752 && TREE_CODE (data_ref_base
) == SSA_NAME
;
753 is_array_ref
= TREE_CODE (data_ref_base_type
) == ARRAY_TYPE
754 && (TREE_CODE (data_ref_base
) == VAR_DECL
755 || TREE_CODE (data_ref_base
) == COMPONENT_REF
756 || TREE_CODE (data_ref_base
) == ARRAY_REF
);
757 is_addr_expr
= TREE_CODE (data_ref_base
) == ADDR_EXPR
758 || TREE_CODE (data_ref_base
) == PLUS_EXPR
759 || TREE_CODE (data_ref_base
) == MINUS_EXPR
;
760 gcc_assert (is_ptr_ref
|| is_array_ref
|| is_addr_expr
);
762 /** Create: &(base[init_val])
764 if data_ref_base is an ARRAY_TYPE:
767 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
768 base = *((scalar_array *) data_ref_base)
772 array_base
= data_ref_base
;
773 else /* is_ptr_ref or is_addr_expr */
775 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
776 tree scalar_array_type
= build_array_type (scalar_type
, 0);
777 tree scalar_array_ptr_type
= build_pointer_type (scalar_array_type
);
778 tree array_ptr
= create_tmp_var (scalar_array_ptr_type
, "array_ptr");
779 add_referenced_tmp_var (array_ptr
);
781 dest
= create_tmp_var (TREE_TYPE (data_ref_base
), "dataref");
782 add_referenced_tmp_var (dest
);
784 force_gimple_operand (data_ref_base
, &new_stmt
, false, dest
);
785 append_to_statement_list_force (new_stmt
, new_stmt_list
);
787 vec_stmt
= fold_convert (scalar_array_ptr_type
, data_ref_base
);
788 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, array_ptr
, vec_stmt
);
789 new_temp
= make_ssa_name (array_ptr
, vec_stmt
);
790 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
791 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
794 array_base
= build_fold_indirect_ref (new_temp
);
797 dest
= create_tmp_var (TREE_TYPE (init_oval
), "newinit");
798 add_referenced_tmp_var (dest
);
799 init_val
= force_gimple_operand (init_oval
, &new_stmt
, false, dest
);
800 append_to_statement_list_force (new_stmt
, new_stmt_list
);
804 tree tmp
= create_tmp_var (TREE_TYPE (init_val
), "offset");
805 add_referenced_tmp_var (tmp
);
806 vec_stmt
= build2 (PLUS_EXPR
, TREE_TYPE (init_val
), init_val
, offset
);
807 vec_stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (init_val
), tmp
, vec_stmt
);
808 init_val
= make_ssa_name (tmp
, vec_stmt
);
809 TREE_OPERAND (vec_stmt
, 0) = init_val
;
810 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
813 array_ref
= build4 (ARRAY_REF
, scalar_type
, array_base
, init_val
,
814 NULL_TREE
, NULL_TREE
);
815 addr_base
= build_fold_addr_expr (array_ref
);
817 /* addr_expr = addr_base */
818 addr_expr
= vect_get_new_vect_var (scalar_ptr_type
, vect_pointer_var
,
819 get_name (base_name
));
820 add_referenced_tmp_var (addr_expr
);
821 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, addr_expr
, addr_base
);
822 new_temp
= make_ssa_name (addr_expr
, vec_stmt
);
823 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
824 append_to_statement_list_force (vec_stmt
, new_stmt_list
);
830 /* Function get_vectype_for_scalar_type.
832 Returns the vector type corresponding to SCALAR_TYPE as supported
836 get_vectype_for_scalar_type (tree scalar_type
)
838 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
839 int nbytes
= GET_MODE_SIZE (inner_mode
);
846 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
848 nunits
= UNITS_PER_SIMD_WORD
/ nbytes
;
850 vectype
= build_vector_type (scalar_type
, nunits
);
851 if (TYPE_MODE (vectype
) == BLKmode
)
857 /* Function vect_align_data_ref.
859 Handle mislignment of a memory accesses.
861 FORNOW: Can't handle misaligned accesses.
862 Make sure that the dataref is aligned. */
865 vect_align_data_ref (tree stmt
)
867 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
868 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
870 /* FORNOW: can't handle misaligned accesses;
871 all accesses expected to be aligned. */
872 gcc_assert (aligned_access_p (dr
));
876 /* Function vect_create_data_ref_ptr.
878 Create a memory reference expression for vector access, to be used in a
879 vector load/store stmt. The reference is based on a new pointer to vector
883 1. STMT: a stmt that references memory. Expected to be of the form
884 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
885 2. BSI: block_stmt_iterator where new stmts can be added.
886 3. OFFSET (optional): an offset to be added to the initial address accessed
887 by the data-ref in STMT.
888 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
889 pointing to the initial address.
892 1. Declare a new ptr to vector_type, and have it point to the base of the
893 data reference (initial addressed accessed by the data reference).
894 For example, for vector of type V8HI, the following code is generated:
897 vp = (v8hi *)initial_address;
899 if OFFSET is not supplied:
900 initial_address = &a[init];
901 if OFFSET is supplied:
902 initial_address = &a[init + OFFSET];
904 Return the initial_address in INITIAL_ADDRESS.
906 2. Create a data-reference in the loop based on the new vector pointer vp,
907 and using a new index variable 'idx' as follows:
911 where if ONLY_INIT is true:
914 update = idx + vector_type_size
916 Return the pointer vp'.
919 FORNOW: handle only aligned and consecutive accesses. */
922 vect_create_data_ref_ptr (tree stmt
, block_stmt_iterator
*bsi
, tree offset
,
923 tree
*initial_address
, bool only_init
)
926 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
927 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
928 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
929 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
933 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
934 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
935 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
936 int nvuses
, nv_may_defs
, nv_must_defs
;
940 tree new_stmt_list
= NULL_TREE
;
942 edge pe
= loop_preheader_edge (loop
);
949 base_name
= unshare_expr (DR_BASE_NAME (dr
));
950 if (vect_debug_details (NULL
))
952 tree data_ref_base
= base_name
;
953 fprintf (dump_file
, "create array_ref of type: ");
954 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
955 if (TREE_CODE (data_ref_base
) == VAR_DECL
)
956 fprintf (dump_file
, "vectorizing a one dimensional array ref: ");
957 else if (TREE_CODE (data_ref_base
) == ARRAY_REF
)
958 fprintf (dump_file
, "vectorizing a multidimensional array ref: ");
959 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
960 fprintf (dump_file
, "vectorizing a record based array ref: ");
961 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
962 fprintf (dump_file
, "vectorizing a pointer ref: ");
963 print_generic_expr (dump_file
, base_name
, TDF_SLIM
);
966 /** (1) Create the new vector-pointer variable: **/
968 vect_ptr_type
= build_pointer_type (vectype
);
969 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
970 get_name (base_name
));
971 add_referenced_tmp_var (vect_ptr
);
974 /** (2) Handle aliasing information of the new vector-pointer: **/
976 tag
= STMT_VINFO_MEMTAG (stmt_info
);
978 get_var_ann (vect_ptr
)->type_mem_tag
= tag
;
980 /* Mark for renaming all aliased variables
981 (i.e, the may-aliases of the type-mem-tag). */
982 nvuses
= NUM_VUSES (vuses
);
983 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
984 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
985 for (i
= 0; i
< nvuses
; i
++)
987 tree use
= VUSE_OP (vuses
, i
);
988 if (TREE_CODE (use
) == SSA_NAME
)
989 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (use
))->uid
);
991 for (i
= 0; i
< nv_may_defs
; i
++)
993 tree def
= V_MAY_DEF_RESULT (v_may_defs
, i
);
994 if (TREE_CODE (def
) == SSA_NAME
)
995 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
997 for (i
= 0; i
< nv_must_defs
; i
++)
999 tree def
= V_MUST_DEF_OP (v_must_defs
, i
);
1000 if (TREE_CODE (def
) == SSA_NAME
)
1001 bitmap_set_bit (vars_to_rename
, var_ann (SSA_NAME_VAR (def
))->uid
);
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1009 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
1011 pe
= loop_preheader_edge (loop
);
1012 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt_list
);
1013 gcc_assert (!new_bb
);
1014 *initial_address
= new_temp
;
1016 /* Create: p = (vectype *) initial_base */
1017 vec_stmt
= fold_convert (vect_ptr_type
, new_temp
);
1018 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
1019 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
1020 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
1021 new_bb
= bsi_insert_on_edge_immediate (pe
, vec_stmt
);
1022 gcc_assert (!new_bb
);
1023 vect_ptr_init
= TREE_OPERAND (vec_stmt
, 0);
1026 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1028 if (only_init
) /* No update in loop is required. */
1029 return vect_ptr_init
;
1031 idx
= vect_create_index_for_vector_ref (loop
, bsi
);
1033 /* Create: update = idx * vectype_size */
1034 ptr_update
= create_tmp_var (integer_type_node
, "update");
1035 add_referenced_tmp_var (ptr_update
);
1036 vectype_size
= build_int_cst (integer_type_node
,
1037 GET_MODE_SIZE (TYPE_MODE (vectype
)));
1038 vec_stmt
= build2 (MULT_EXPR
, integer_type_node
, idx
, vectype_size
);
1039 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, ptr_update
, vec_stmt
);
1040 new_temp
= make_ssa_name (ptr_update
, vec_stmt
);
1041 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
1042 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
1044 /* Create: data_ref_ptr = vect_ptr_init + update */
1045 vec_stmt
= build2 (PLUS_EXPR
, vect_ptr_type
, vect_ptr_init
, new_temp
);
1046 vec_stmt
= build2 (MODIFY_EXPR
, void_type_node
, vect_ptr
, vec_stmt
);
1047 new_temp
= make_ssa_name (vect_ptr
, vec_stmt
);
1048 TREE_OPERAND (vec_stmt
, 0) = new_temp
;
1049 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
1050 data_ref_ptr
= TREE_OPERAND (vec_stmt
, 0);
1052 return data_ref_ptr
;
1056 /* Function vect_create_destination_var.
1058 Create a new temporary of type VECTYPE. */
1061 vect_create_destination_var (tree scalar_dest
, tree vectype
)
1064 const char *new_name
;
1066 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
1068 new_name
= get_name (scalar_dest
);
1071 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, new_name
);
1072 add_referenced_tmp_var (vec_dest
);
1078 /* Function vect_init_vector.
1080 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1081 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1082 used in the vectorization of STMT. */
1085 vect_init_vector (tree stmt
, tree vector_var
)
1087 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1088 struct loop
*loop
= STMT_VINFO_LOOP (stmt_vinfo
);
1091 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1097 new_var
= vect_get_new_vect_var (vectype
, vect_simple_var
, "cst_");
1098 add_referenced_tmp_var (new_var
);
1100 init_stmt
= build2 (MODIFY_EXPR
, vectype
, new_var
, vector_var
);
1101 new_temp
= make_ssa_name (new_var
, init_stmt
);
1102 TREE_OPERAND (init_stmt
, 0) = new_temp
;
1104 pe
= loop_preheader_edge (loop
);
1105 new_bb
= bsi_insert_on_edge_immediate (pe
, init_stmt
);
1106 gcc_assert (!new_bb
);
1108 if (vect_debug_details (NULL
))
1110 fprintf (dump_file
, "created new init_stmt: ");
1111 print_generic_expr (dump_file
, init_stmt
, TDF_SLIM
);
1114 vec_oprnd
= TREE_OPERAND (init_stmt
, 0);
1119 /* Function vect_get_vec_def_for_operand.
1121 OP is an operand in STMT. This function returns a (vector) def that will be
1122 used in the vectorized stmt for STMT.
1124 In the case that OP is an SSA_NAME which is defined in the loop, then
1125 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1127 In case OP is an invariant or constant, a new stmt that creates a vector def
1128 needs to be introduced. */
1131 vect_get_vec_def_for_operand (tree op
, tree stmt
)
1136 stmt_vec_info def_stmt_info
= NULL
;
1137 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1138 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1139 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
1140 struct loop
*loop
= STMT_VINFO_LOOP (stmt_vinfo
);
1147 if (vect_debug_details (NULL
))
1149 fprintf (dump_file
, "vect_get_vec_def_for_operand: ");
1150 print_generic_expr (dump_file
, op
, TDF_SLIM
);
1153 /** ===> Case 1: operand is a constant. **/
1155 if (TREE_CODE (op
) == INTEGER_CST
|| TREE_CODE (op
) == REAL_CST
)
1157 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1160 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1161 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1162 int nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
1166 /* Build a tree with vector elements. */
1167 if (vect_debug_details (NULL
))
1168 fprintf (dump_file
, "Create vector_cst. nunits = %d", nunits
);
1170 for (i
= nunits
- 1; i
>= 0; --i
)
1172 t
= tree_cons (NULL_TREE
, op
, t
);
1174 vec_cst
= build_vector (vectype
, t
);
1175 return vect_init_vector (stmt
, vec_cst
);
1178 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
1180 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1182 def_stmt
= SSA_NAME_DEF_STMT (op
);
1183 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1185 if (vect_debug_details (NULL
))
1187 fprintf (dump_file
, "vect_get_vec_def_for_operand: def_stmt: ");
1188 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
1192 /** ==> Case 2.1: operand is defined inside the loop. **/
1196 /* Get the def from the vectorized stmt. */
1198 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1199 gcc_assert (vec_stmt
);
1200 vec_oprnd
= TREE_OPERAND (vec_stmt
, 0);
1205 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1206 it is a reduction/induction. **/
1208 bb
= bb_for_stmt (def_stmt
);
1209 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
1211 if (vect_debug_details (NULL
))
1212 fprintf (dump_file
, "reduction/induction - unsupported.");
1213 internal_error ("no support for reduction/induction"); /* FORNOW */
1217 /** ==> Case 2.3: operand is defined outside the loop -
1218 it is a loop invariant. */
1220 switch (TREE_CODE (def_stmt
))
1223 def
= PHI_RESULT (def_stmt
);
1226 def
= TREE_OPERAND (def_stmt
, 0);
1229 def
= TREE_OPERAND (def_stmt
, 0);
1230 gcc_assert (IS_EMPTY_STMT (def_stmt
));
1234 if (vect_debug_details (NULL
))
1236 fprintf (dump_file
, "unsupported defining stmt: ");
1237 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
1239 internal_error ("unsupported defining stmt");
1242 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1244 if (vect_debug_details (NULL
))
1245 fprintf (dump_file
, "Create vector_inv.");
1247 for (i
= nunits
- 1; i
>= 0; --i
)
1249 t
= tree_cons (NULL_TREE
, def
, t
);
1252 vec_inv
= build_constructor (vectype
, t
);
1253 return vect_init_vector (stmt
, vec_inv
);
1257 /* Function vect_finish_stmt_generation.
1259 Insert a new stmt. */
1262 vect_finish_stmt_generation (tree stmt
, tree vec_stmt
, block_stmt_iterator
*bsi
)
1264 bsi_insert_before (bsi
, vec_stmt
, BSI_SAME_STMT
);
1266 if (vect_debug_details (NULL
))
1268 fprintf (dump_file
, "add new stmt: ");
1269 print_generic_expr (dump_file
, vec_stmt
, TDF_SLIM
);
1272 /* Make sure bsi points to the stmt that is being vectorized. */
1274 /* Assumption: any stmts created for the vectorization of stmt S were
1275 inserted before S. BSI is expected to point to S or some new stmt before S. */
1277 while (stmt
!= bsi_stmt (*bsi
) && !bsi_end_p (*bsi
))
1279 gcc_assert (stmt
== bsi_stmt (*bsi
));
1283 /* Function vectorizable_assignment.
1285 Check if STMT performs an assignment (copy) that can be vectorized.
1286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1291 vectorizable_assignment (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1297 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1298 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1299 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1302 /* Is vectorizable assignment? */
1304 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1307 scalar_dest
= TREE_OPERAND (stmt
, 0);
1308 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1311 op
= TREE_OPERAND (stmt
, 1);
1312 if (!vect_is_simple_use (op
, loop
, NULL
))
1314 if (vect_debug_details (NULL
))
1315 fprintf (dump_file
, "use not simple.");
1319 if (!vec_stmt
) /* transformation not required. */
1321 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1326 if (vect_debug_details (NULL
))
1327 fprintf (dump_file
, "transform assignment.");
1330 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1333 op
= TREE_OPERAND (stmt
, 1);
1334 vec_oprnd
= vect_get_vec_def_for_operand (op
, stmt
);
1336 /* Arguments are ready. create the new vector stmt. */
1337 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, vec_oprnd
);
1338 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1339 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1340 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1346 /* Function vectorizable_operation.
1348 Check if STMT performs a binary or unary operation that can be vectorized.
1349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1354 vectorizable_operation (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1359 tree op0
, op1
= NULL
;
1360 tree vec_oprnd0
, vec_oprnd1
=NULL
;
1361 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1362 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1363 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1365 enum tree_code code
;
1366 enum machine_mode vec_mode
;
1372 /* Is STMT a vectorizable binary/unary operation? */
1373 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1376 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) != SSA_NAME
)
1379 operation
= TREE_OPERAND (stmt
, 1);
1380 code
= TREE_CODE (operation
);
1381 optab
= optab_for_tree_code (code
, vectype
);
1383 /* Support only unary or binary operations. */
1384 op_type
= TREE_CODE_LENGTH (code
);
1385 if (op_type
!= unary_op
&& op_type
!= binary_op
)
1387 if (vect_debug_details (NULL
))
1388 fprintf (dump_file
, "num. args = %d (not unary/binary op).", op_type
);
1392 for (i
= 0; i
< op_type
; i
++)
1394 op
= TREE_OPERAND (operation
, i
);
1395 if (!vect_is_simple_use (op
, loop
, NULL
))
1397 if (vect_debug_details (NULL
))
1398 fprintf (dump_file
, "use not simple.");
1403 /* Supportable by target? */
1406 if (vect_debug_details (NULL
))
1407 fprintf (dump_file
, "no optab.");
1410 vec_mode
= TYPE_MODE (vectype
);
1411 if (optab
->handlers
[(int) vec_mode
].insn_code
== CODE_FOR_nothing
)
1413 if (vect_debug_details (NULL
))
1414 fprintf (dump_file
, "op not supported by target.");
1418 if (!vec_stmt
) /* transformation not required. */
1420 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
1426 if (vect_debug_details (NULL
))
1427 fprintf (dump_file
, "transform binary/unary operation.");
1430 scalar_dest
= TREE_OPERAND (stmt
, 0);
1431 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1434 op0
= TREE_OPERAND (operation
, 0);
1435 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
1437 if (op_type
== binary_op
)
1439 op1
= TREE_OPERAND (operation
, 1);
1440 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
1443 /* Arguments are ready. create the new vector stmt. */
1445 if (op_type
== binary_op
)
1446 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1447 build2 (code
, vectype
, vec_oprnd0
, vec_oprnd1
));
1449 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
,
1450 build1 (code
, vectype
, vec_oprnd0
));
1451 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
1452 TREE_OPERAND (*vec_stmt
, 0) = new_temp
;
1453 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1459 /* Function vectorizable_store.
1461 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1463 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1464 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1465 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1468 vectorizable_store (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1474 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1475 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1476 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1477 enum machine_mode vec_mode
;
1480 /* Is vectorizable store? */
1482 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1485 scalar_dest
= TREE_OPERAND (stmt
, 0);
1486 if (TREE_CODE (scalar_dest
) != ARRAY_REF
1487 && TREE_CODE (scalar_dest
) != INDIRECT_REF
)
1490 op
= TREE_OPERAND (stmt
, 1);
1491 if (!vect_is_simple_use (op
, loop
, NULL
))
1493 if (vect_debug_details (NULL
))
1494 fprintf (dump_file
, "use not simple.");
1498 vec_mode
= TYPE_MODE (vectype
);
1499 /* FORNOW. In some cases can vectorize even if data-type not supported
1500 (e.g. - array initialization with 0). */
1501 if (mov_optab
->handlers
[(int)vec_mode
].insn_code
== CODE_FOR_nothing
)
1504 if (!STMT_VINFO_DATA_REF (stmt_info
))
1507 if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info
)))
1510 if (!vec_stmt
) /* transformation not required. */
1512 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
1518 if (vect_debug_details (NULL
))
1519 fprintf (dump_file
, "transform store");
1521 /* Handle use - get the vectorized def from the defining stmt. */
1522 vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt
);
1525 /* FORNOW: make sure the data reference is aligned. */
1526 vect_align_data_ref (stmt
);
1527 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1528 data_ref
= build_fold_indirect_ref (data_ref
);
1530 /* Arguments are ready. create the new vector stmt. */
1531 *vec_stmt
= build2 (MODIFY_EXPR
, vectype
, data_ref
, vec_oprnd1
);
1532 vect_finish_stmt_generation (stmt
, *vec_stmt
, bsi
);
1538 /* vectorizable_load.
1540 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1547 vectorizable_load (tree stmt
, block_stmt_iterator
*bsi
, tree
*vec_stmt
)
1550 tree vec_dest
= NULL
;
1551 tree data_ref
= NULL
;
1553 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1554 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1555 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1562 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
1563 edge pe
= loop_preheader_edge (loop
);
1564 bool software_pipeline_loads_p
= false;
1566 /* Is vectorizable load? */
1568 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1571 scalar_dest
= TREE_OPERAND (stmt
, 0);
1572 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1575 op
= TREE_OPERAND (stmt
, 1);
1576 if (TREE_CODE (op
) != ARRAY_REF
&& TREE_CODE (op
) != INDIRECT_REF
)
1579 if (!STMT_VINFO_DATA_REF (stmt_info
))
1582 mode
= (int) TYPE_MODE (vectype
);
1584 /* FORNOW. In some cases can vectorize even if data-type not supported
1585 (e.g. - data copies). */
1586 if (mov_optab
->handlers
[mode
].insn_code
== CODE_FOR_nothing
)
1588 if (vect_debug_details (loop
))
1589 fprintf (dump_file
, "Aligned load, but unsupported type.");
1593 if (!aligned_access_p (dr
))
1595 if (vec_realign_load_optab
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
1596 && (!targetm
.vectorize
.builtin_mask_for_load
1597 || targetm
.vectorize
.builtin_mask_for_load ()))
1598 software_pipeline_loads_p
= true;
1599 else if (!targetm
.vectorize
.misaligned_mem_ok (mode
))
1601 /* Possibly unaligned access, and can't software pipeline the loads */
1602 if (vect_debug_details (loop
))
1603 fprintf (dump_file
, "Arbitrary load not supported.");
1608 if (!vec_stmt
) /* transformation not required. */
1610 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
1616 if (vect_debug_details (NULL
))
1617 fprintf (dump_file
, "transform load.");
1619 if (!software_pipeline_loads_p
)
1630 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1631 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
, &dummy
, false);
1632 if (aligned_access_p (dr
))
1633 data_ref
= build_fold_indirect_ref (data_ref
);
1636 int mis
= DR_MISALIGNMENT (dr
);
1637 tree tmis
= (mis
== -1 ?
1639 build_int_cst (integer_type_node
, mis
));
1640 tmis
= int_const_binop (MULT_EXPR
, tmis
,
1641 build_int_cst (integer_type_node
, BITS_PER_UNIT
), 1);
1642 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, data_ref
, tmis
);
1644 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1645 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1646 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1647 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1649 else /* software-pipeline the loads */
1653 msq_init = *(floor(p1))
1654 p2 = initial_addr + VS - 1;
1655 magic = have_builtin ? builtin_result : initial_address;
1658 p2' = p2 + indx * vectype_size
1660 vec_dest = realign_load (msq, lsq, magic)
1674 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1675 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1676 data_ref
= vect_create_data_ref_ptr (stmt
, bsi
, NULL_TREE
,
1678 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, data_ref
);
1679 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1680 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1681 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1682 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1683 gcc_assert (!new_bb
);
1684 msq_init
= TREE_OPERAND (new_stmt
, 0);
1687 /* <2> Create lsq = *(floor(p2')) in the loop */
1688 offset
= build_int_cst (integer_type_node
,
1689 GET_MODE_NUNITS (TYPE_MODE (vectype
)));
1690 offset
= int_const_binop (MINUS_EXPR
, offset
, integer_one_node
, 1);
1691 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1692 dataref_ptr
= vect_create_data_ref_ptr (stmt
, bsi
, offset
, &dummy
, false);
1693 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
1694 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, data_ref
);
1695 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1696 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1697 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1698 lsq
= TREE_OPERAND (new_stmt
, 0);
1702 if (targetm
.vectorize
.builtin_mask_for_load
)
1704 /* Create permutation mask, if required, in loop preheader. */
1706 params
= build_tree_list (NULL_TREE
, init_addr
);
1707 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1708 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
1709 new_stmt
= build_function_call_expr (builtin_decl
, params
);
1710 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1711 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1712 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1713 new_bb
= bsi_insert_on_edge_immediate (pe
, new_stmt
);
1714 gcc_assert (!new_bb
);
1715 magic
= TREE_OPERAND (new_stmt
, 0);
1719 /* Use current address instead of init_addr for reduced reg pressure. */
1720 magic
= dataref_ptr
;
1724 /* <4> Create msq = phi <msq_init, lsq> in loop */
1725 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1726 msq
= make_ssa_name (vec_dest
, NULL_TREE
);
1727 phi_stmt
= create_phi_node (msq
, loop
->header
); /* CHECKME */
1728 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
1729 add_phi_arg (&phi_stmt
, msq_init
, loop_preheader_edge (loop
));
1730 add_phi_arg (&phi_stmt
, lsq
, loop_latch_edge (loop
));
1733 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1734 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1735 new_stmt
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
, magic
);
1736 new_stmt
= build2 (MODIFY_EXPR
, vectype
, vec_dest
, new_stmt
);
1737 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1738 TREE_OPERAND (new_stmt
, 0) = new_temp
;
1739 vect_finish_stmt_generation (stmt
, new_stmt
, bsi
);
1742 *vec_stmt
= new_stmt
;
1747 /* Function vect_transform_stmt.
1749 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1752 vect_transform_stmt (tree stmt
, block_stmt_iterator
*bsi
)
1754 bool is_store
= false;
1755 tree vec_stmt
= NULL_TREE
;
1756 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1759 switch (STMT_VINFO_TYPE (stmt_info
))
1761 case op_vec_info_type
:
1762 done
= vectorizable_operation (stmt
, bsi
, &vec_stmt
);
1766 case assignment_vec_info_type
:
1767 done
= vectorizable_assignment (stmt
, bsi
, &vec_stmt
);
1771 case load_vec_info_type
:
1772 done
= vectorizable_load (stmt
, bsi
, &vec_stmt
);
1776 case store_vec_info_type
:
1777 done
= vectorizable_store (stmt
, bsi
, &vec_stmt
);
1782 if (vect_debug_details (NULL
))
1783 fprintf (dump_file
, "stmt not supported.");
1787 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
1793 /* Function vect_transform_loop_bound.
1795 Create a new exit condition for the loop. */
1798 vect_transform_loop_bound (loop_vec_info loop_vinfo
)
1800 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1801 edge exit_edge
= loop
->single_exit
;
1802 block_stmt_iterator loop_exit_bsi
= bsi_last (exit_edge
->src
);
1803 tree indx_before_incr
, indx_after_incr
;
1804 tree orig_cond_expr
;
1805 HOST_WIDE_INT old_N
= 0;
1808 tree new_loop_bound
;
1812 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
));
1813 old_N
= LOOP_VINFO_NITERS (loop_vinfo
);
1814 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1817 assuming number-of-iterations divides by the vectorization factor. */
1818 gcc_assert (!(old_N
% vf
));
1820 orig_cond_expr
= LOOP_VINFO_EXIT_COND (loop_vinfo
);
1821 gcc_assert (orig_cond_expr
);
1822 gcc_assert (orig_cond_expr
== bsi_stmt (loop_exit_bsi
));
1824 create_iv (integer_zero_node
, integer_one_node
, NULL_TREE
, loop
,
1825 &loop_exit_bsi
, false, &indx_before_incr
, &indx_after_incr
);
1827 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1828 to point to the exit condition. */
1829 bsi_next (&loop_exit_bsi
);
1830 gcc_assert (bsi_stmt (loop_exit_bsi
) == orig_cond_expr
);
1832 /* new loop exit test: */
1833 lb_type
= TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr
, 0), 1));
1834 new_loop_bound
= build_int_cst (lb_type
, old_N
/vf
);
1836 if (exit_edge
->flags
& EDGE_TRUE_VALUE
) /* 'then' edge exits the loop. */
1837 cond
= build2 (GE_EXPR
, boolean_type_node
, indx_after_incr
, new_loop_bound
);
1838 else /* 'then' edge loops back. */
1839 cond
= build2 (LT_EXPR
, boolean_type_node
, indx_after_incr
, new_loop_bound
);
1841 cond_stmt
= build3 (COND_EXPR
, TREE_TYPE (orig_cond_expr
), cond
,
1842 TREE_OPERAND (orig_cond_expr
, 1), TREE_OPERAND (orig_cond_expr
, 2));
1844 bsi_insert_before (&loop_exit_bsi
, cond_stmt
, BSI_SAME_STMT
);
1846 /* remove old loop exit test: */
1847 bsi_remove (&loop_exit_bsi
);
1849 if (vect_debug_details (NULL
))
1850 print_generic_expr (dump_file
, cond_stmt
, TDF_SLIM
);
1854 /* Function vect_transform_loop.
1856 The analysis phase has determined that the loop is vectorizable.
1857 Vectorize the loop - created vectorized stmts to replace the scalar
1858 stmts in the loop, and update the loop exit condition. */
1861 vect_transform_loop (loop_vec_info loop_vinfo
,
1862 struct loops
*loops ATTRIBUTE_UNUSED
)
1864 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1865 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1866 int nbbs
= loop
->num_nodes
;
1867 block_stmt_iterator si
;
1869 #ifdef ENABLE_CHECKING
1870 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1873 if (vect_debug_details (NULL
))
1874 fprintf (dump_file
, "\n<<vec_transform_loop>>\n");
1876 /* 1) Make sure the loop header has exactly two entries
1877 2) Make sure we have a preheader basic block. */
1879 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
1881 loop_split_edge_with (loop_preheader_edge (loop
), NULL
);
1884 /* FORNOW: the vectorizer supports only loops which body consist
1885 of one basic block (header + empty latch). When the vectorizer will
1886 support more involved loop forms, the order by which the BBs are
1887 traversed need to be reconsidered. */
1889 for (i
= 0; i
< nbbs
; i
++)
1891 basic_block bb
= bbs
[i
];
1893 for (si
= bsi_start (bb
); !bsi_end_p (si
);)
1895 tree stmt
= bsi_stmt (si
);
1896 stmt_vec_info stmt_info
;
1898 #ifdef ENABLE_CHECKING
1902 if (vect_debug_details (NULL
))
1904 fprintf (dump_file
, "------>vectorizing statement: ");
1905 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
1907 stmt_info
= vinfo_for_stmt (stmt
);
1908 gcc_assert (stmt_info
);
1909 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1914 #ifdef ENABLE_CHECKING
1915 /* FORNOW: Verify that all stmts operate on the same number of
1916 units and no inner unrolling is necessary. */
1917 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1918 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype
))
1919 == vectorization_factor
);
1921 /* -------- vectorize statement ------------ */
1922 if (vect_debug_details (NULL
))
1923 fprintf (dump_file
, "transform statement.");
1925 is_store
= vect_transform_stmt (stmt
, &si
);
1928 /* free the attached stmt_vec_info and remove the stmt. */
1929 stmt_ann_t ann
= stmt_ann (stmt
);
1931 set_stmt_info (ann
, NULL
);
1940 vect_transform_loop_bound (loop_vinfo
);
1942 if (vect_debug_details (loop
))
1943 fprintf (dump_file
,"Success! loop vectorized.");
1944 if (vect_debug_stats (loop
))
1945 fprintf (dump_file
, "LOOP VECTORIZED.");
1949 /* Function vect_is_simple_use.
1952 LOOP - the loop that is being vectorized.
1953 OPERAND - operand of a stmt in LOOP.
1954 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1956 Returns whether a stmt with OPERAND can be vectorized.
1957 Supportable operands are constants, loop invariants, and operands that are
1958 defined by the current iteration of the loop. Unsupportable operands are
1959 those that are defined by a previous iteration of the loop (as is the case
1960 in reduction/induction computations). */
1963 vect_is_simple_use (tree operand
, struct loop
*loop
, tree
*def
)
1971 if (TREE_CODE (operand
) == INTEGER_CST
|| TREE_CODE (operand
) == REAL_CST
)
1974 if (TREE_CODE (operand
) != SSA_NAME
)
1977 def_stmt
= SSA_NAME_DEF_STMT (operand
);
1978 if (def_stmt
== NULL_TREE
)
1980 if (vect_debug_details (NULL
))
1981 fprintf (dump_file
, "no def_stmt.");
1985 /* empty stmt is expected only in case of a function argument.
1986 (Otherwise - we expect a phi_node or a modify_expr). */
1987 if (IS_EMPTY_STMT (def_stmt
))
1989 tree arg
= TREE_OPERAND (def_stmt
, 0);
1990 if (TREE_CODE (arg
) == INTEGER_CST
|| TREE_CODE (arg
) == REAL_CST
)
1992 if (vect_debug_details (NULL
))
1994 fprintf (dump_file
, "Unexpected empty stmt: ");
1995 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
2000 /* phi_node inside the loop indicates an induction/reduction pattern.
2001 This is not supported yet. */
2002 bb
= bb_for_stmt (def_stmt
);
2003 if (TREE_CODE (def_stmt
) == PHI_NODE
&& flow_bb_inside_loop_p (loop
, bb
))
2005 if (vect_debug_details (NULL
))
2006 fprintf (dump_file
, "reduction/induction - unsupported.");
2007 return false; /* FORNOW: not supported yet. */
2010 /* Expecting a modify_expr or a phi_node. */
2011 if (TREE_CODE (def_stmt
) == MODIFY_EXPR
2012 || TREE_CODE (def_stmt
) == PHI_NODE
)
2023 /* Function vect_analyze_operations.
2025 Scan the loop stmts and make sure they are all vectorizable. */
2028 vect_analyze_operations (loop_vec_info loop_vinfo
)
2030 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2031 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2032 int nbbs
= loop
->num_nodes
;
2033 block_stmt_iterator si
;
2034 int vectorization_factor
= 0;
2039 if (vect_debug_details (NULL
))
2040 fprintf (dump_file
, "\n<<vect_analyze_operations>>\n");
2042 for (i
= 0; i
< nbbs
; i
++)
2044 basic_block bb
= bbs
[i
];
2046 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
2048 tree stmt
= bsi_stmt (si
);
2050 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2053 if (vect_debug_details (NULL
))
2055 fprintf (dump_file
, "==> examining statement: ");
2056 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2059 gcc_assert (stmt_info
);
2061 /* skip stmts which do not need to be vectorized.
2062 this is expected to include:
2063 - the COND_EXPR which is the loop exit condition
2064 - any LABEL_EXPRs in the loop
2065 - computations that are used only for array indexing or loop
2068 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2070 if (vect_debug_details (NULL
))
2071 fprintf (dump_file
, "irrelevant.");
2075 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt
))))
2077 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2079 fprintf (dump_file
, "not vectorized: vector stmt in loop:");
2080 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2085 if (STMT_VINFO_DATA_REF (stmt_info
))
2086 scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
2087 else if (TREE_CODE (stmt
) == MODIFY_EXPR
)
2088 scalar_type
= TREE_TYPE (TREE_OPERAND (stmt
, 0));
2090 scalar_type
= TREE_TYPE (stmt
);
2092 if (vect_debug_details (NULL
))
2094 fprintf (dump_file
, "get vectype for scalar type: ");
2095 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
2098 vectype
= get_vectype_for_scalar_type (scalar_type
);
2101 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2103 fprintf (dump_file
, "not vectorized: unsupported data-type ");
2104 print_generic_expr (dump_file
, scalar_type
, TDF_SLIM
);
2109 if (vect_debug_details (NULL
))
2111 fprintf (dump_file
, "vectype: ");
2112 print_generic_expr (dump_file
, vectype
, TDF_SLIM
);
2114 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
2116 ok
= (vectorizable_operation (stmt
, NULL
, NULL
)
2117 || vectorizable_assignment (stmt
, NULL
, NULL
)
2118 || vectorizable_load (stmt
, NULL
, NULL
)
2119 || vectorizable_store (stmt
, NULL
, NULL
));
2123 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2125 fprintf (dump_file
, "not vectorized: stmt not supported: ");
2126 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2131 nunits
= GET_MODE_NUNITS (TYPE_MODE (vectype
));
2132 if (vect_debug_details (NULL
))
2133 fprintf (dump_file
, "nunits = %d", nunits
);
2135 if (vectorization_factor
)
2137 /* FORNOW: don't allow mixed units.
2138 This restriction will be relaxed in the future. */
2139 if (nunits
!= vectorization_factor
)
2141 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2142 fprintf (dump_file
, "not vectorized: mixed data-types");
2147 vectorization_factor
= nunits
;
2151 /* TODO: Analyze cost. Decide if worth while to vectorize. */
2152 if (!vectorization_factor
)
2154 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2155 fprintf (dump_file
, "not vectorized: unsupported data-type");
2158 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
2160 /* FORNOW: handle only cases where the loop bound divides by the
2161 vectorization factor. */
2163 if (vect_debug_details (NULL
))
2165 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
2166 vectorization_factor
, LOOP_VINFO_NITERS (loop_vinfo
));
2168 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2170 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2171 fprintf (dump_file
, "not vectorized: Unknown loop bound.");
2175 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2176 && LOOP_VINFO_NITERS (loop_vinfo
) % vectorization_factor
!= 0)
2178 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2179 fprintf (dump_file
, "not vectorized: loop bound doesn't divided by %d.",
2180 vectorization_factor
);
2188 /* Function exist_non_indexing_operands_for_use_p
2190 USE is one of the uses attached to STMT. Check if USE is
2191 used in STMT for anything other than indexing an array. */
2194 exist_non_indexing_operands_for_use_p (tree use
, tree stmt
)
2197 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2199 /* USE corresponds to some operand in STMT. If there is no data
2200 reference in STMT, then any operand that corresponds to USE
2201 is not indexing an array. */
2202 if (!STMT_VINFO_DATA_REF (stmt_info
))
2205 /* STMT has a data_ref. FORNOW this means that its of one of
2206 the following forms:
2209 (This should have been verified in analyze_data_refs).
2211 'var' in the second case corresponds to a def, not a use,
2212 so USE cannot correspond to any operands that are not used
2215 Therefore, all we need to check is if STMT falls into the
2216 first case, and whether var corresponds to USE. */
2218 if (TREE_CODE (TREE_OPERAND (stmt
, 0)) == SSA_NAME
)
2221 operand
= TREE_OPERAND (stmt
, 1);
2223 if (TREE_CODE (operand
) != SSA_NAME
)
2233 /* Function vect_is_simple_iv_evolution.
2235 FORNOW: A simple evolution of an induction variables in the loop is
2236 considered a polynomial evolution with constant step. */
2239 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
2240 tree
* step
, bool strict
)
2245 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
2247 /* When there is no evolution in this loop, the evolution function
2249 if (evolution_part
== NULL_TREE
)
2252 /* When the evolution is a polynomial of degree >= 2
2253 the evolution function is not "simple". */
2254 if (tree_is_chrec (evolution_part
))
2257 step_expr
= evolution_part
;
2258 init_expr
= initial_condition (access_fn
);
2260 if (vect_debug_details (NULL
))
2262 fprintf (dump_file
, "step: ");
2263 print_generic_expr (dump_file
, step_expr
, TDF_SLIM
);
2264 fprintf (dump_file
, ", init: ");
2265 print_generic_expr (dump_file
, init_expr
, TDF_SLIM
);
2271 if (TREE_CODE (step_expr
) != INTEGER_CST
)
2273 if (vect_debug_details (NULL
))
2274 fprintf (dump_file
, "step unknown.");
2279 if (!integer_onep (step_expr
))
2281 if (vect_debug_details (NULL
))
2282 print_generic_expr (dump_file
, step_expr
, TDF_SLIM
);
2290 /* Function vect_analyze_scalar_cycles.
2292 Examine the cross iteration def-use cycles of scalar variables, by
2293 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2294 cycles that they represent do not impede vectorization.
2296 FORNOW: Reduction as in the following loop, is not supported yet:
2300 The cross-iteration cycle corresponding to variable 'sum' will be
2301 considered too complicated and will impede vectorization.
2303 FORNOW: Induction as in the following loop, is not supported yet:
2308 However, the following loop *is* vectorizable:
2313 In both loops there exists a def-use cycle for the variable i:
2314 loop: i_2 = PHI (i_0, i_1)
2319 The evolution of the above cycle is considered simple enough,
2320 however, we also check that the cycle does not need to be
2321 vectorized, i.e - we check that the variable that this cycle
2322 defines is only used for array indexing or in stmts that do not
2323 need to be vectorized. This is not the case in loop2, but it
2324 *is* the case in loop3. */
2327 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
2330 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2331 basic_block bb
= loop
->header
;
2334 if (vect_debug_details (NULL
))
2335 fprintf (dump_file
, "\n<<vect_analyze_scalar_cycles>>\n");
2337 for (phi
= phi_nodes (bb
); phi
; phi
= TREE_CHAIN (phi
))
2339 tree access_fn
= NULL
;
2341 if (vect_debug_details (NULL
))
2343 fprintf (dump_file
, "Analyze phi: ");
2344 print_generic_expr (dump_file
, phi
, TDF_SLIM
);
2347 /* Skip virtual phi's. The data dependences that are associated with
2348 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2350 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi
))))
2352 if (vect_debug_details (NULL
))
2353 fprintf (dump_file
, "virtual phi. skip.");
2357 /* Analyze the evolution function. */
2359 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2360 those of loop induction variables; This property is verified here.
2362 Furthermore, if that induction variable is used in an operation
2363 that needs to be vectorized (i.e, is not solely used to index
2364 arrays and check the exit condition) - we do not support its
2365 vectorization yet. This property is verified in vect_is_simple_use,
2366 during vect_analyze_operations. */
2368 access_fn
= /* instantiate_parameters
2370 analyze_scalar_evolution (loop
, PHI_RESULT (phi
));
2374 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2375 fprintf (dump_file
, "not vectorized: unsupported scalar cycle.");
2379 if (vect_debug_details (NULL
))
2381 fprintf (dump_file
, "Access function of PHI: ");
2382 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
2385 if (!vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dummy
,
2388 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2389 fprintf (dump_file
, "not vectorized: unsupported scalar cycle.");
2398 /* Function vect_analyze_data_ref_dependence.
2400 Return TRUE if there (might) exist a dependence between a memory-reference
2401 DRA and a memory-reference DRB. */
2404 vect_analyze_data_ref_dependence (struct data_reference
*dra
,
2405 struct data_reference
*drb
,
2409 struct data_dependence_relation
*ddr
;
2411 if (!array_base_name_differ_p (dra
, drb
, &differ_p
))
2413 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2416 "not vectorized: can't determine dependence between: ");
2417 print_generic_expr (dump_file
, DR_REF (dra
), TDF_SLIM
);
2418 fprintf (dump_file
, " and ");
2419 print_generic_expr (dump_file
, DR_REF (drb
), TDF_SLIM
);
2427 ddr
= initialize_data_dependence_relation (dra
, drb
);
2428 compute_affine_dependence (ddr
);
2430 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
2433 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
2436 "not vectorized: possible dependence between data-refs ");
2437 print_generic_expr (dump_file
, DR_REF (dra
), TDF_SLIM
);
2438 fprintf (dump_file
, " and ");
2439 print_generic_expr (dump_file
, DR_REF (drb
), TDF_SLIM
);
2446 /* Function vect_analyze_data_ref_dependences.
2448 Examine all the data references in the loop, and make sure there do not
2449 exist any data dependences between them.
2451 TODO: dependences which distance is greater than the vectorization factor
2455 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
)
2458 varray_type loop_write_refs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
2459 varray_type loop_read_refs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
2460 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2462 /* Examine store-store (output) dependences. */
2464 if (vect_debug_details (NULL
))
2465 fprintf (dump_file
, "\n<<vect_analyze_dependences>>\n");
2467 if (vect_debug_details (NULL
))
2468 fprintf (dump_file
, "compare all store-store pairs.");
2470 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_refs
); i
++)
2472 for (j
= i
+ 1; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
2474 struct data_reference
*dra
=
2475 VARRAY_GENERIC_PTR (loop_write_refs
, i
);
2476 struct data_reference
*drb
=
2477 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
2478 if (vect_analyze_data_ref_dependence (dra
, drb
, loop
))
2483 /* Examine load-store (true/anti) dependences. */
2485 if (vect_debug_details (NULL
))
2486 fprintf (dump_file
, "compare all load-store pairs.");
2488 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_refs
); i
++)
2490 for (j
= 0; j
< VARRAY_ACTIVE_SIZE (loop_write_refs
); j
++)
2492 struct data_reference
*dra
= VARRAY_GENERIC_PTR (loop_read_refs
, i
);
2493 struct data_reference
*drb
=
2494 VARRAY_GENERIC_PTR (loop_write_refs
, j
);
2495 if (vect_analyze_data_ref_dependence (dra
, drb
, loop
))
2504 /* Function vect_get_first_index.
2506 REF is a data reference.
2507 If it is an ARRAY_REF: if its lower bound is simple enough,
2508 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2509 If it is not an ARRAY_REF: REF has no "first index";
2510 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2513 vect_get_first_index (tree ref
, tree
*array_first_index
)
2517 if (TREE_CODE (ref
) != ARRAY_REF
)
2518 *array_first_index
= size_zero_node
;
2521 array_start
= array_ref_low_bound (ref
);
2522 if (!host_integerp (array_start
,0))
2524 if (vect_debug_details (NULL
))
2526 fprintf (dump_file
, "array min val not simple integer cst.");
2527 print_generic_expr (dump_file
, array_start
, TDF_DETAILS
);
2531 *array_first_index
= array_start
;
2538 /* Function vect_compute_array_base_alignment.
2539 A utility function of vect_compute_array_ref_alignment.
2541 Compute the misalignment of ARRAY in bits.
2544 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
2545 VECTYPE - we are interested in the misalignment modulo the size of vectype.
2546 if NULL: don't compute misalignment, just return the base of ARRAY.
2547 PREV_DIMENSIONS - initialized to one.
2548 MISALIGNMENT - the computed misalignment in bits.
2551 If VECTYPE is not NULL:
2552 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2553 the base of the array, and put the computed misalignment in MISALIGNMENT.
2555 Return the base of the array.
2557 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2558 a[idx_N]...[idx_2][idx_1] is
2559 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2560 ... + idx_N * dim_0 * ... * dim_N-1}.
2561 (The misalignment of &a is not checked here).
2562 Note, that every term contains dim_0, therefore, if dim_0 is a
2563 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2564 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2565 NUINTS, we can say that the misalignment of the sum is equal to
2566 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2567 we can't determine this array misalignment, and we return
2569 We proceed recursively in this manner, accumulating total misalignment
2570 and the multiplication of previous dimensions for correct misalignment
2574 vect_compute_array_base_alignment (tree array
,
2576 tree
*prev_dimensions
,
2581 tree dimension_size
;
2583 tree bits_per_vectype
;
2584 tree bits_per_vectype_unit
;
2586 /* The 'stop condition' of the recursion. */
2587 if (TREE_CODE (array
) != ARRAY_REF
)
2591 /* Just get the base decl. */
2592 return vect_compute_array_base_alignment
2593 (TREE_OPERAND (array
, 0), NULL
, NULL
, NULL
);
2595 if (!host_integerp (*misalignment
, 1) || TREE_OVERFLOW (*misalignment
) ||
2596 !host_integerp (*prev_dimensions
, 1) || TREE_OVERFLOW (*prev_dimensions
))
2599 domain
= TYPE_DOMAIN (TREE_TYPE (array
));
2601 int_const_binop (PLUS_EXPR
,
2602 int_const_binop (MINUS_EXPR
, TYPE_MAX_VALUE (domain
),
2603 TYPE_MIN_VALUE (domain
), 1),
2606 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2607 is a multiple of NUNITS:
2609 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2611 mis
= int_const_binop (TRUNC_MOD_EXPR
, dimension_size
,
2612 build_int_cst (NULL_TREE
, GET_MODE_NUNITS (TYPE_MODE (vectype
))), 1);
2613 if (integer_zerop (mis
))
2614 /* This array is aligned. Continue just in order to get the base decl. */
2615 return vect_compute_array_base_alignment
2616 (TREE_OPERAND (array
, 0), NULL
, NULL
, NULL
);
2618 index
= TREE_OPERAND (array
, 1);
2619 if (!host_integerp (index
, 1))
2620 /* The current index is not constant. */
2623 index
= int_const_binop (MINUS_EXPR
, index
, TYPE_MIN_VALUE (domain
), 0);
2625 bits_per_vectype
= fold_convert (unsigned_type_node
,
2626 build_int_cst (NULL_TREE
, BITS_PER_UNIT
*
2627 GET_MODE_SIZE (TYPE_MODE (vectype
))));
2628 bits_per_vectype_unit
= fold_convert (unsigned_type_node
,
2629 build_int_cst (NULL_TREE
, BITS_PER_UNIT
*
2630 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype
)))));
2632 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2636 (*misalignment + index_val * dimension_size * *prev_dimensions)
2640 mis
= int_const_binop (MULT_EXPR
, index
, dimension_size
, 1);
2641 mis
= int_const_binop (MULT_EXPR
, mis
, *prev_dimensions
, 1);
2642 mis
= int_const_binop (MULT_EXPR
, mis
, bits_per_vectype_unit
, 1);
2643 mis
= int_const_binop (PLUS_EXPR
, *misalignment
, mis
, 1);
2644 *misalignment
= int_const_binop (TRUNC_MOD_EXPR
, mis
, bits_per_vectype
, 1);
2647 *prev_dimensions
= int_const_binop (MULT_EXPR
,
2648 *prev_dimensions
, dimension_size
, 1);
2650 return vect_compute_array_base_alignment (TREE_OPERAND (array
, 0), vectype
,
2656 /* Function vect_compute_data_ref_alignment
2658 Compute the misalignment of the data reference DR.
2661 1. If during the misalignment computation it is found that the data reference
2662 cannot be vectorized then false is returned.
2663 2. DR_MISALIGNMENT (DR) is defined.
2665 FOR NOW: No analysis is actually performed. Misalignment is calculated
2666 only for trivial cases. TODO. */
2669 vect_compute_data_ref_alignment (struct data_reference
*dr
,
2670 loop_vec_info loop_vinfo
)
2672 tree stmt
= DR_STMT (dr
);
2673 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2674 tree ref
= DR_REF (dr
);
2677 tree offset
= size_zero_node
;
2678 tree base
, bit_offset
, alignment
;
2679 tree unit_bits
= fold_convert (unsigned_type_node
,
2680 build_int_cst (NULL_TREE
, BITS_PER_UNIT
));
2682 bool base_aligned_p
;
2684 if (vect_debug_details (NULL
))
2685 fprintf (dump_file
, "vect_compute_data_ref_alignment:");
2687 /* Initialize misalignment to unknown. */
2688 DR_MISALIGNMENT (dr
) = -1;
2690 scalar_type
= TREE_TYPE (ref
);
2691 vectype
= get_vectype_for_scalar_type (scalar_type
);
2694 if (vect_debug_details (NULL
))
2696 fprintf (dump_file
, "no vectype for stmt: ");
2697 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
2698 fprintf (dump_file
, " scalar_type: ");
2699 print_generic_expr (dump_file
, scalar_type
, TDF_DETAILS
);
2701 /* It is not possible to vectorize this data reference. */
2704 gcc_assert (TREE_CODE (ref
) == ARRAY_REF
|| TREE_CODE (ref
) == INDIRECT_REF
);
2706 if (TREE_CODE (ref
) == ARRAY_REF
)
2709 dr_base
= STMT_VINFO_VECT_DR_BASE (stmt_info
);
2711 base
= vect_get_base_and_bit_offset (dr
, dr_base
, vectype
,
2712 loop_vinfo
, &bit_offset
, &base_aligned_p
);
2715 if (vect_debug_details (NULL
))
2717 fprintf (dump_file
, "Unknown alignment for access: ");
2718 print_generic_expr (dump_file
,
2719 STMT_VINFO_VECT_DR_BASE (stmt_info
), TDF_SLIM
);
2724 if (!base_aligned_p
)
2726 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
)))
2728 if (vect_debug_details (NULL
))
2730 fprintf (dump_file
, "can't force alignment of ref: ");
2731 print_generic_expr (dump_file
, ref
, TDF_SLIM
);
2736 /* Force the alignment of the decl.
2737 NOTE: This is the only change to the code we make during
2738 the analysis phase, before deciding to vectorize the loop. */
2739 if (vect_debug_details (NULL
))
2740 fprintf (dump_file
, "force alignment");
2741 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
2742 DECL_USER_ALIGN (base
) = TYPE_ALIGN (vectype
);
2745 /* At this point we assume that the base is aligned, and the offset from it
2746 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2747 gcc_assert (base_aligned_p
2748 || (TREE_CODE (base
) == VAR_DECL
2749 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
2751 /* Convert into bytes. */
2752 offset
= int_const_binop (TRUNC_DIV_EXPR
, bit_offset
, unit_bits
, 1);
2753 /* Check that there is no remainder in bits. */
2754 bit_offset
= int_const_binop (TRUNC_MOD_EXPR
, bit_offset
, unit_bits
, 1);
2755 if (!integer_zerop (bit_offset
))
2757 if (vect_debug_details (NULL
))
2759 fprintf (dump_file
, "bit offset alignment: ");
2760 print_generic_expr (dump_file
, bit_offset
, TDF_SLIM
);
2765 /* Alignment required, in bytes: */
2766 alignment
= fold_convert (unsigned_type_node
,
2767 build_int_cst (NULL_TREE
, TYPE_ALIGN (vectype
)/BITS_PER_UNIT
));
2769 /* Modulo alignment. */
2770 offset
= int_const_binop (TRUNC_MOD_EXPR
, offset
, alignment
, 0);
2771 if (!host_integerp (offset
, 1) || TREE_OVERFLOW (offset
))
2773 if (vect_debug_details (NULL
))
2774 fprintf (dump_file
, "unexpected misalign value");
2778 DR_MISALIGNMENT (dr
) = tree_low_cst (offset
, 1);
2780 if (vect_debug_details (NULL
))
2781 fprintf (dump_file
, "misalign = %d", DR_MISALIGNMENT (dr
));
2787 /* Function vect_compute_array_ref_alignment
2789 Compute the alignment of an array-ref.
2790 The alignment we compute here is relative to
2791 TYPE_ALIGN(VECTYPE) boundary.
2794 OFFSET - the alignment in bits
2795 Return value - the base of the array-ref. E.g,
2796 if the array-ref is a.b[k].c[i][j] the returned
2801 vect_compute_array_ref_alignment (struct data_reference
*dr
,
2802 loop_vec_info loop_vinfo
,
2806 tree array_first_index
= size_zero_node
;
2808 tree ref
= DR_REF (dr
);
2809 tree scalar_type
= TREE_TYPE (ref
);
2810 tree oprnd0
= TREE_OPERAND (ref
, 0);
2811 tree dims
= size_one_node
;
2812 tree misalign
= size_zero_node
;
2813 tree next_ref
, this_offset
= size_zero_node
;
2817 if (TREE_CODE (TREE_TYPE (ref
)) == ARRAY_TYPE
)
2818 /* The reference is an array without its last index. */
2819 next_ref
= vect_compute_array_base_alignment (ref
, vectype
, &dims
, &misalign
);
2822 vect_compute_array_base_alignment (oprnd0
, vectype
, &dims
, &misalign
);
2824 /* Alignment is not requested. Just return the base. */
2827 /* Compute alignment. */
2828 if (!host_integerp (misalign
, 1) || TREE_OVERFLOW (misalign
) || !next_ref
)
2830 this_offset
= misalign
;
2832 /* Check the first index accessed. */
2833 if (!vect_get_first_index (ref
, &array_first_index
))
2835 if (vect_debug_details (NULL
))
2836 fprintf (dump_file
, "no first_index for array.");
2840 /* Check the index of the array_ref. */
2841 init
= initial_condition_in_loop_num (DR_ACCESS_FN (dr
, 0),
2842 LOOP_VINFO_LOOP (loop_vinfo
)->num
);
2844 /* FORNOW: In order to simplify the handling of alignment, we make sure
2845 that the first location at which the array is accessed ('init') is on an
2846 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2847 This is too conservative, since we require that
2848 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2849 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2850 This should be relaxed in the future. */
2852 if (!init
|| !host_integerp (init
, 0))
2854 if (vect_debug_details (NULL
))
2855 fprintf (dump_file
, "non constant init. ");
2859 /* bytes per scalar element: */
2860 nunits
= fold_convert (unsigned_type_node
,
2861 build_int_cst (NULL_TREE
, GET_MODE_SIZE (TYPE_MODE (scalar_type
))));
2862 nbits
= int_const_binop (MULT_EXPR
, nunits
,
2863 build_int_cst (NULL_TREE
, BITS_PER_UNIT
), 1);
2865 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
2866 misalign
= int_const_binop (MINUS_EXPR
, init
, array_first_index
, 0);
2867 misalign
= int_const_binop (MULT_EXPR
, misalign
, nbits
, 0);
2868 misalign
= int_const_binop (PLUS_EXPR
, misalign
, this_offset
, 0);
2870 /* TODO: allow negative misalign values. */
2871 if (!host_integerp (misalign
, 1) || TREE_OVERFLOW (misalign
))
2873 if (vect_debug_details (NULL
))
2874 fprintf (dump_file
, "unexpected misalign value");
2882 /* Function vect_compute_data_refs_alignment
2884 Compute the misalignment of data references in the loop.
2885 This pass may take place at function granularity instead of at loop
2888 FOR NOW: No analysis is actually performed. Misalignment is calculated
2889 only for trivial cases. TODO. */
2892 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
)
2894 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
2895 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
2898 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
2900 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
2901 vect_compute_data_ref_alignment (dr
, loop_vinfo
);
2904 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
2906 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
2907 vect_compute_data_ref_alignment (dr
, loop_vinfo
);
2912 /* Function vect_enhance_data_refs_alignment
2914 This pass will use loop versioning and loop peeling in order to enhance
2915 the alignment of data references in the loop.
2917 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2918 original loop is to be vectorized; Any other loops that are created by
2919 the transformations performed in this pass - are not supposed to be
2920 vectorized. This restriction will be relaxed.
2922 FOR NOW: No transformation is actually performed. TODO. */
2925 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED
)
2928 This pass will require a cost model to guide it whether to apply peeling
2929 or versioning or a combination of the two. For example, the scheme that
2930 intel uses when given a loop with several memory accesses, is as follows:
2931 choose one memory access ('p') which alignment you want to force by doing
2932 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2933 other accesses are not necessarily aligned, or (2) use loop versioning to
2934 generate one loop in which all accesses are aligned, and another loop in
2935 which only 'p' is necessarily aligned.
2937 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2938 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2939 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2941 Devising a cost model is the most critical aspect of this work. It will
2942 guide us on which access to peel for, whether to use loop versioning, how
2943 many versions to create, etc. The cost model will probably consist of
2944 generic considerations as well as target specific considerations (on
2945 powerpc for example, misaligned stores are more painful than misaligned
2948 Here is the general steps involved in alignment enhancements:
2950 -- original loop, before alignment analysis:
2951 for (i=0; i<N; i++){
2952 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2953 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2956 -- After vect_compute_data_refs_alignment:
2957 for (i=0; i<N; i++){
2958 x = q[i]; # DR_MISALIGNMENT(q) = 3
2959 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2962 -- Possibility 1: we do loop versioning:
2964 for (i=0; i<N; i++){ # loop 1A
2965 x = q[i]; # DR_MISALIGNMENT(q) = 3
2966 p[i] = y; # DR_MISALIGNMENT(p) = 0
2970 for (i=0; i<N; i++){ # loop 1B
2971 x = q[i]; # DR_MISALIGNMENT(q) = 3
2972 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2976 -- Possibility 2: we do loop peeling:
2977 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2981 for (i = 3; i < N; i++){ # loop 2A
2982 x = q[i]; # DR_MISALIGNMENT(q) = 0
2983 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2986 -- Possibility 3: combination of loop peeling and versioning:
2987 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2992 for (i = 3; i<N; i++){ # loop 3A
2993 x = q[i]; # DR_MISALIGNMENT(q) = 0
2994 p[i] = y; # DR_MISALIGNMENT(p) = 0
2998 for (i = 3; i<N; i++){ # loop 3B
2999 x = q[i]; # DR_MISALIGNMENT(q) = 0
3000 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
3004 These loops are later passed to loop_transform to be vectorized. The
3005 vectorizer will use the alignment information to guide the transformation
3006 (whether to generate regular loads/stores, or with special handling for
3012 /* Function vect_analyze_data_refs_alignment
3014 Analyze the alignment of the data-references in the loop.
3015 FOR NOW: Until support for misliagned accesses is in place, only if all
3016 accesses are aligned can the loop be vectorized. This restriction will be
3020 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
)
3022 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
3023 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
3027 if (vect_debug_details (NULL
))
3028 fprintf (dump_file
, "\n<<vect_analyze_data_refs_alignment>>\n");
3031 /* This pass may take place at function granularity instead of at loop
3034 vect_compute_data_refs_alignment (loop_vinfo
);
3037 /* This pass will use loop versioning and loop peeling in order to enhance
3038 the alignment of data references in the loop.
3039 FOR NOW: we assume that whatever versioning/peeling took place, the
3040 original loop is to be vectorized. Any other loops that were created by
3041 the transformations performed in this pass - are not supposed to be
3042 vectorized. This restriction will be relaxed. */
3044 vect_enhance_data_refs_alignment (loop_vinfo
);
3047 /* Finally, check that loop can be vectorized.
3048 FOR NOW: Until support for misaligned accesses is in place, only if all
3049 accesses are aligned can the loop be vectorized. This restriction will be
3052 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
3054 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
3055 if (!aligned_access_p (dr
))
3057 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
3058 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
3059 fprintf (dump_file
, "not vectorized: unaligned store.");
3064 /* The vectorizer now supports misaligned loads, so we don't fail anymore
3065 in the presence of a misaligned read dataref. For some targets however
3066 it may be preferable not to vectorize in such a case as misaligned
3067 accesses are very costly. This should be considered in the future. */
3069 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3071 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3072 if (!aligned_access_p (dr))
3074 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3075 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3076 fprintf (dump_file, "not vectorized: unaligned load.");
3086 /* Function vect_analyze_data_ref_access.
3088 Analyze the access pattern of the data-reference DR. For now, a data access
3089 has to consecutive and aligned to be considered vectorizable. */
3092 vect_analyze_data_ref_access (struct data_reference
*dr
)
3094 varray_type access_fns
= DR_ACCESS_FNS (dr
);
3097 unsigned int dimensions
, i
;
3099 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
3100 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
3101 access is contiguous). */
3102 dimensions
= VARRAY_ACTIVE_SIZE (access_fns
);
3104 for (i
= 1; i
< dimensions
; i
++) /* Not including the last dimension. */
3106 access_fn
= DR_ACCESS_FN (dr
, i
);
3108 if (evolution_part_in_loop_num (access_fn
,
3109 loop_containing_stmt (DR_STMT (dr
))->num
))
3111 /* Evolution part is not NULL in this loop (it is neither constant nor
3113 if (vect_debug_details (NULL
))
3116 "not vectorized: complicated multidimensional array access.");
3117 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
3123 access_fn
= DR_ACCESS_FN (dr
, 0); /* The last dimension access function. */
3124 if (!evolution_function_is_constant_p (access_fn
)
3125 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr
))->num
,
3126 access_fn
, &init
, &step
, true))
3128 if (vect_debug_details (NULL
))
3130 fprintf (dump_file
, "not vectorized: too complicated access function.");
3131 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
3140 /* Function vect_analyze_data_ref_accesses.
3142 Analyze the access pattern of all the data references in the loop.
3144 FORNOW: the only access pattern that is considered vectorizable is a
3145 simple step 1 (consecutive) access.
3147 FORNOW: handle only arrays and pointer accesses. */
3150 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
)
3153 varray_type loop_write_datarefs
= LOOP_VINFO_DATAREF_WRITES (loop_vinfo
);
3154 varray_type loop_read_datarefs
= LOOP_VINFO_DATAREF_READS (loop_vinfo
);
3156 if (vect_debug_details (NULL
))
3157 fprintf (dump_file
, "\n<<vect_analyze_data_ref_accesses>>\n");
3159 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_write_datarefs
); i
++)
3161 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_write_datarefs
, i
);
3162 bool ok
= vect_analyze_data_ref_access (dr
);
3165 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
3166 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
3167 fprintf (dump_file
, "not vectorized: complicated access pattern.");
3172 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (loop_read_datarefs
); i
++)
3174 struct data_reference
*dr
= VARRAY_GENERIC_PTR (loop_read_datarefs
, i
);
3175 bool ok
= vect_analyze_data_ref_access (dr
);
3178 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo
))
3179 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo
)))
3180 fprintf (dump_file
, "not vectorized: complicated access pattern.");
3189 /* Function vect_analyze_pointer_ref_access.
3192 STMT - a stmt that contains a data-ref
3193 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
3195 If the data-ref access is vectorizable, return a data_reference structure
3196 that represents it (DR). Otherwise - return NULL. */
3198 static struct data_reference
*
3199 vect_analyze_pointer_ref_access (tree memref
, tree stmt
, bool is_read
)
3201 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3202 struct loop
*loop
= STMT_VINFO_LOOP (stmt_info
);
3203 tree access_fn
= analyze_scalar_evolution (loop
, TREE_OPERAND (memref
, 0));
3206 tree reftype
, innertype
;
3207 enum machine_mode innermode
;
3208 tree indx_access_fn
;
3209 int loopnum
= loop
->num
;
3210 struct data_reference
*dr
;
3214 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3215 fprintf (dump_file
, "not vectorized: complicated pointer access.");
3219 if (vect_debug_details (NULL
))
3221 fprintf (dump_file
, "Access function of ptr: ");
3222 print_generic_expr (dump_file
, access_fn
, TDF_SLIM
);
3225 if (!vect_is_simple_iv_evolution (loopnum
, access_fn
, &init
, &step
, false))
3227 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3228 fprintf (dump_file
, "not vectorized: pointer access is not simple.");
3234 if (!host_integerp (step
,0))
3236 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3238 "not vectorized: non constant step for pointer access.");
3242 step_val
= TREE_INT_CST_LOW (step
);
3244 reftype
= TREE_TYPE (TREE_OPERAND (memref
, 0));
3245 if (TREE_CODE (reftype
) != POINTER_TYPE
)
3247 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3248 fprintf (dump_file
, "not vectorized: unexpected pointer access form.");
3252 reftype
= TREE_TYPE (init
);
3253 if (TREE_CODE (reftype
) != POINTER_TYPE
)
3255 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3256 fprintf (dump_file
, "not vectorized: unexpected pointer access form.");
3260 innertype
= TREE_TYPE (reftype
);
3261 innermode
= TYPE_MODE (innertype
);
3262 if (GET_MODE_SIZE (innermode
) != step_val
)
3264 /* FORNOW: support only consecutive access */
3265 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3266 fprintf (dump_file
, "not vectorized: non consecutive access.");
3271 build_polynomial_chrec (loopnum
, integer_zero_node
, integer_one_node
);
3272 if (vect_debug_details (NULL
))
3274 fprintf (dump_file
, "Access function of ptr indx: ");
3275 print_generic_expr (dump_file
, indx_access_fn
, TDF_SLIM
);
3277 dr
= init_data_ref (stmt
, memref
, init
, indx_access_fn
, is_read
);
3282 /* Function vect_get_symbl_and_dr.
3284 The function returns SYMBL - the relevant variable for
3285 memory tag (for aliasing purposes).
3286 Also data reference structure DR is created.
3289 MEMREF - data reference in STMT
3290 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3293 DR - data_reference struct for MEMREF
3294 return value - the relevant variable for memory tag (for aliasing purposes).
3299 vect_get_symbl_and_dr (tree memref
, tree stmt
, bool is_read
,
3300 loop_vec_info loop_vinfo
, struct data_reference
**dr
)
3302 tree symbl
, oprnd0
, oprnd1
;
3303 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3305 tree array_base
, base
;
3306 struct data_reference
*new_dr
;
3307 bool base_aligned_p
;
3310 switch (TREE_CODE (memref
))
3313 new_dr
= vect_analyze_pointer_ref_access (memref
, stmt
, is_read
);
3317 symbl
= DR_BASE_NAME (new_dr
);
3318 STMT_VINFO_VECT_DR_BASE (stmt_info
) = symbl
;
3320 switch (TREE_CODE (symbl
))
3324 oprnd0
= TREE_OPERAND (symbl
, 0);
3325 oprnd1
= TREE_OPERAND (symbl
, 1);
3328 /* Only {address_base + offset} expressions are supported,
3329 where address_base can be POINTER_TYPE or ARRAY_TYPE and
3330 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3331 TODO: swap operands if {offset + address_base}. */
3332 if ((TREE_CODE (TREE_TYPE (oprnd1
)) == POINTER_TYPE
3333 && TREE_CODE (oprnd1
) != INTEGER_CST
)
3334 || TREE_CODE (TREE_TYPE (oprnd1
)) == ARRAY_TYPE
)
3337 if (TREE_CODE (TREE_TYPE (oprnd0
)) == POINTER_TYPE
)
3340 symbl
= vect_get_symbl_and_dr (oprnd0
, stmt
, is_read
,
3341 loop_vinfo
, &new_dr
);
3345 /* symbl remains unchanged. */
3349 if (vect_debug_details (NULL
))
3351 fprintf (dump_file
, "unhandled data ref: ");
3352 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
3353 fprintf (dump_file
, " (symbl ");
3354 print_generic_expr (dump_file
, symbl
, TDF_SLIM
);
3355 fprintf (dump_file
, ") in stmt ");
3356 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3363 offset
= size_zero_node
;
3365 /* Store the array base in the stmt info.
3366 For one dimensional array ref a[i], the base is a,
3367 for multidimensional a[i1][i2]..[iN], the base is
3368 a[i1][i2]..[iN-1]. */
3369 array_base
= TREE_OPERAND (memref
, 0);
3370 STMT_VINFO_VECT_DR_BASE (stmt_info
) = array_base
;
3372 new_dr
= analyze_array (stmt
, memref
, is_read
);
3375 /* Find the relevant symbol for aliasing purposes. */
3376 base
= DR_BASE_NAME (new_dr
);
3377 switch (TREE_CODE (base
))
3384 symbl
= TREE_OPERAND (base
, 0);
3388 /* Could have recorded more accurate information -
3389 i.e, the actual FIELD_DECL that is being referenced -
3390 but later passes expect VAR_DECL as the nmt. */
3391 symbl
= vect_get_base_and_bit_offset (new_dr
, base
, NULL_TREE
,
3392 loop_vinfo
, &offset
, &base_aligned_p
);
3397 if (vect_debug_details (NULL
))
3399 fprintf (dump_file
, "unhandled struct/class field access ");
3400 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3407 if (vect_debug_details (NULL
))
3409 fprintf (dump_file
, "unhandled data ref: ");
3410 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
3411 fprintf (dump_file
, " in stmt ");
3412 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3420 /* Function vect_analyze_data_refs.
3422 Find all the data references in the loop.
3424 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
3425 which base is really an array (not a pointer) and which alignment
3426 can be forced. This restriction will be relaxed. */
3429 vect_analyze_data_refs (loop_vec_info loop_vinfo
)
3431 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3432 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3433 int nbbs
= loop
->num_nodes
;
3434 block_stmt_iterator si
;
3436 struct data_reference
*dr
;
3440 if (vect_debug_details (NULL
))
3441 fprintf (dump_file
, "\n<<vect_analyze_data_refs>>\n");
3443 for (j
= 0; j
< nbbs
; j
++)
3445 basic_block bb
= bbs
[j
];
3446 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
3448 bool is_read
= false;
3449 tree stmt
= bsi_stmt (si
);
3450 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3451 v_may_def_optype v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
3452 v_must_def_optype v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
3453 vuse_optype vuses
= STMT_VUSE_OPS (stmt
);
3454 varray_type
*datarefs
= NULL
;
3455 int nvuses
, nv_may_defs
, nv_must_defs
;
3459 /* Assumption: there exists a data-ref in stmt, if and only if
3460 it has vuses/vdefs. */
3462 if (!vuses
&& !v_may_defs
&& !v_must_defs
)
3465 nvuses
= NUM_VUSES (vuses
);
3466 nv_may_defs
= NUM_V_MAY_DEFS (v_may_defs
);
3467 nv_must_defs
= NUM_V_MUST_DEFS (v_must_defs
);
3469 if (nvuses
&& (nv_may_defs
|| nv_must_defs
))
3471 if (vect_debug_details (NULL
))
3473 fprintf (dump_file
, "unexpected vdefs and vuses in stmt: ");
3474 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3479 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
3481 if (vect_debug_details (NULL
))
3483 fprintf (dump_file
, "unexpected vops in stmt: ");
3484 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3491 memref
= TREE_OPERAND (stmt
, 1);
3492 datarefs
= &(LOOP_VINFO_DATAREF_READS (loop_vinfo
));
3497 memref
= TREE_OPERAND (stmt
, 0);
3498 datarefs
= &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo
));
3502 /* Analyze MEMREF. If it is of a supported form, build data_reference
3503 struct for it (DR) and find the relevant symbol for aliasing
3505 symbl
= vect_get_symbl_and_dr (memref
, stmt
, is_read
, loop_vinfo
, &dr
);
3508 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3510 fprintf (dump_file
, "not vectorized: unhandled data ref: ");
3511 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3516 /* Find and record the memtag assigned to this data-ref. */
3517 switch (TREE_CODE (symbl
))
3520 STMT_VINFO_MEMTAG (stmt_info
) = symbl
;
3524 symbl
= SSA_NAME_VAR (symbl
);
3525 tag
= get_var_ann (symbl
)->type_mem_tag
;
3528 tree ptr
= TREE_OPERAND (memref
, 0);
3529 if (TREE_CODE (ptr
) == SSA_NAME
)
3530 tag
= get_var_ann (SSA_NAME_VAR (ptr
))->type_mem_tag
;
3534 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3535 fprintf (dump_file
, "not vectorized: no memtag for ref.");
3538 STMT_VINFO_MEMTAG (stmt_info
) = tag
;
3542 address_base
= TREE_OPERAND (symbl
, 0);
3544 switch (TREE_CODE (address_base
))
3547 dr
= analyze_array (stmt
, TREE_OPERAND (symbl
, 0), DR_IS_READ(dr
));
3548 STMT_VINFO_MEMTAG (stmt_info
) = DR_BASE_NAME (dr
);
3552 STMT_VINFO_MEMTAG (stmt_info
) = address_base
;
3556 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3558 fprintf (dump_file
, "not vectorized: unhandled address expression: ");
3559 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3566 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3568 fprintf (dump_file
, "not vectorized: unsupported data-ref: ");
3569 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
3574 VARRAY_PUSH_GENERIC_PTR (*datarefs
, dr
);
3575 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3583 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3585 /* Function vect_mark_relevant.
3587 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3590 vect_mark_relevant (varray_type worklist
, tree stmt
)
3592 stmt_vec_info stmt_info
;
3594 if (vect_debug_details (NULL
))
3595 fprintf (dump_file
, "mark relevant.");
3597 if (TREE_CODE (stmt
) == PHI_NODE
)
3599 VARRAY_PUSH_TREE (worklist
, stmt
);
3603 stmt_info
= vinfo_for_stmt (stmt
);
3607 if (vect_debug_details (NULL
))
3609 fprintf (dump_file
, "mark relevant: no stmt info!!.");
3610 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3615 if (STMT_VINFO_RELEVANT_P (stmt_info
))
3617 if (vect_debug_details (NULL
))
3618 fprintf (dump_file
, "already marked relevant.");
3622 STMT_VINFO_RELEVANT_P (stmt_info
) = 1;
3623 VARRAY_PUSH_TREE (worklist
, stmt
);
3627 /* Function vect_stmt_relevant_p.
3629 Return true if STMT in loop that is represented by LOOP_VINFO is
3630 "relevant for vectorization".
3632 A stmt is considered "relevant for vectorization" if:
3633 - it has uses outside the loop.
3634 - it has vdefs (it alters memory).
3635 - control stmts in the loop (except for the exit condition).
3637 CHECKME: what other side effects would the vectorizer allow? */
3640 vect_stmt_relevant_p (tree stmt
, loop_vec_info loop_vinfo
)
3642 v_may_def_optype v_may_defs
;
3643 v_must_def_optype v_must_defs
;
3644 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3649 /* cond stmt other than loop exit cond. */
3650 if (is_ctrl_stmt (stmt
) && (stmt
!= LOOP_VINFO_EXIT_COND (loop_vinfo
)))
3653 /* changing memory. */
3654 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
3655 v_must_defs
= STMT_V_MUST_DEF_OPS (stmt
);
3656 if (v_may_defs
|| v_must_defs
)
3658 if (vect_debug_details (NULL
))
3659 fprintf (dump_file
, "vec_stmt_relevant_p: stmt has vdefs.");
3663 /* uses outside the loop. */
3664 df
= get_immediate_uses (stmt
);
3665 num_uses
= num_immediate_uses (df
);
3666 for (i
= 0; i
< num_uses
; i
++)
3668 tree use
= immediate_use (df
, i
);
3669 basic_block bb
= bb_for_stmt (use
);
3670 if (!flow_bb_inside_loop_p (loop
, bb
))
3672 if (vect_debug_details (NULL
))
3673 fprintf (dump_file
, "vec_stmt_relevant_p: used out of loop.");
3682 /* Function vect_mark_stmts_to_be_vectorized.
3684 Not all stmts in the loop need to be vectorized. For example:
3693 Stmt 1 and 3 do not need to be vectorized, because loop control and
3694 addressing of vectorized data-refs are handled differently.
3696 This pass detects such stmts. */
3699 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
3701 varray_type worklist
;
3702 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3703 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3704 unsigned int nbbs
= loop
->num_nodes
;
3705 block_stmt_iterator si
;
3711 stmt_vec_info stmt_info
;
3713 if (vect_debug_details (NULL
))
3714 fprintf (dump_file
, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3716 VARRAY_TREE_INIT (worklist
, 64, "work list");
3718 /* 1. Init worklist. */
3720 for (i
= 0; i
< nbbs
; i
++)
3722 basic_block bb
= bbs
[i
];
3723 for (si
= bsi_start (bb
); !bsi_end_p (si
); bsi_next (&si
))
3725 stmt
= bsi_stmt (si
);
3727 if (vect_debug_details (NULL
))
3729 fprintf (dump_file
, "init: stmt relevant? ");
3730 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3733 stmt_info
= vinfo_for_stmt (stmt
);
3734 STMT_VINFO_RELEVANT_P (stmt_info
) = 0;
3736 if (vect_stmt_relevant_p (stmt
, loop_vinfo
))
3737 vect_mark_relevant (worklist
, stmt
);
3742 /* 2. Process_worklist */
3744 while (VARRAY_ACTIVE_SIZE (worklist
) > 0)
3746 stmt
= VARRAY_TOP_TREE (worklist
);
3747 VARRAY_POP (worklist
);
3749 if (vect_debug_details (NULL
))
3751 fprintf (dump_file
, "worklist: examine stmt: ");
3752 print_generic_expr (dump_file
, stmt
, TDF_SLIM
);
3755 /* Examine the USES in this statement. Mark all the statements which
3756 feed this statement's uses as "relevant", unless the USE is used as
3759 if (TREE_CODE (stmt
) == PHI_NODE
)
3761 /* follow the def-use chain inside the loop. */
3762 for (j
= 0; j
< PHI_NUM_ARGS (stmt
); j
++)
3764 tree arg
= PHI_ARG_DEF (stmt
, j
);
3765 tree def_stmt
= NULL_TREE
;
3767 if (!vect_is_simple_use (arg
, loop
, &def_stmt
))
3769 if (vect_debug_details (NULL
))
3770 fprintf (dump_file
, "worklist: unsupported use.");
3771 varray_clear (worklist
);
3777 if (vect_debug_details (NULL
))
3779 fprintf (dump_file
, "worklist: def_stmt: ");
3780 print_generic_expr (dump_file
, def_stmt
, TDF_SLIM
);
3783 bb
= bb_for_stmt (def_stmt
);
3784 if (flow_bb_inside_loop_p (loop
, bb
))
3785 vect_mark_relevant (worklist
, def_stmt
);
3789 ann
= stmt_ann (stmt
);
3790 use_ops
= USE_OPS (ann
);
3792 for (i
= 0; i
< NUM_USES (use_ops
); i
++)
3794 tree use
= USE_OP (use_ops
, i
);
3796 /* We are only interested in uses that need to be vectorized. Uses
3797 that are used for address computation are not considered relevant.
3799 if (exist_non_indexing_operands_for_use_p (use
, stmt
))
3801 tree def_stmt
= NULL_TREE
;
3803 if (!vect_is_simple_use (use
, loop
, &def_stmt
))
3805 if (vect_debug_details (NULL
))
3806 fprintf (dump_file
, "worklist: unsupported use.");
3807 varray_clear (worklist
);
3814 if (vect_debug_details (NULL
))
3816 fprintf (dump_file
, "worklist: examine use %d: ", i
);
3817 print_generic_expr (dump_file
, use
, TDF_SLIM
);
3820 bb
= bb_for_stmt (def_stmt
);
3821 if (flow_bb_inside_loop_p (loop
, bb
))
3822 vect_mark_relevant (worklist
, def_stmt
);
3825 } /* while worklist */
3827 varray_clear (worklist
);
3832 /* Function vect_get_loop_niters.
3834 Determine how many iterations the loop is executed. */
3837 vect_get_loop_niters (struct loop
*loop
, HOST_WIDE_INT
*number_of_iterations
)
3841 if (vect_debug_details (NULL
))
3842 fprintf (dump_file
, "\n<<get_loop_niters>>\n");
3844 niters
= number_of_iterations_in_loop (loop
);
3846 if (niters
!= NULL_TREE
3847 && niters
!= chrec_dont_know
3848 && host_integerp (niters
,0))
3850 *number_of_iterations
= TREE_INT_CST_LOW (niters
);
3852 if (vect_debug_details (NULL
))
3853 fprintf (dump_file
, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC
,
3854 *number_of_iterations
);
3857 return get_loop_exit_condition (loop
);
3861 /* Function vect_analyze_loop_form.
3863 Verify the following restrictions (some may be relaxed in the future):
3864 - it's an inner-most loop
3865 - number of BBs = 2 (which are the loop header and the latch)
3866 - the loop has a pre-header
3867 - the loop has a single entry and exit
3868 - the loop exit condition is simple enough, and the number of iterations
3869 can be analyzed (a countable loop). */
3871 static loop_vec_info
3872 vect_analyze_loop_form (struct loop
*loop
)
3874 loop_vec_info loop_vinfo
;
3876 HOST_WIDE_INT number_of_iterations
= -1;
3878 if (vect_debug_details (loop
))
3879 fprintf (dump_file
, "\n<<vect_analyze_loop_form>>\n");
3882 || !loop
->single_exit
3883 || loop
->num_nodes
!= 2)
3885 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3887 fprintf (dump_file
, "not vectorized: bad loop form. ");
3889 fprintf (dump_file
, "nested loop.");
3890 else if (!loop
->single_exit
)
3891 fprintf (dump_file
, "multiple exits.");
3892 else if (loop
->num_nodes
!= 2)
3893 fprintf (dump_file
, "too many BBs in loop.");
3899 /* We assume that the loop exit condition is at the end of the loop. i.e,
3900 that the loop is represented as a do-while (with a proper if-guard
3901 before the loop if needed), where the loop header contains all the
3902 executable statements, and the latch is empty. */
3903 if (!empty_block_p (loop
->latch
))
3905 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3906 fprintf (dump_file
, "not vectorized: unexpectd loop form.");
3910 if (empty_block_p (loop
->header
))
3912 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3913 fprintf (dump_file
, "not vectorized: empty loop.");
3917 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
3920 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3921 fprintf (dump_file
, "not vectorized: complicated exit condition.");
3925 if (number_of_iterations
< 0)
3927 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3928 fprintf (dump_file
, "not vectorized: unknown loop bound.");
3932 if (number_of_iterations
== 0) /* CHECKME: can this happen? */
3934 if (vect_debug_stats (loop
) || vect_debug_details (loop
))
3935 fprintf (dump_file
, "not vectorized: number of iterations = 0.");
3939 loop_vinfo
= new_loop_vec_info (loop
);
3940 LOOP_VINFO_EXIT_COND (loop_vinfo
) = loop_cond
;
3941 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
3947 /* Function vect_analyze_loop.
3949 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3950 for it. The different analyses will record information in the
3951 loop_vec_info struct. */
3953 static loop_vec_info
3954 vect_analyze_loop (struct loop
*loop
)
3957 loop_vec_info loop_vinfo
;
3959 if (vect_debug_details (NULL
))
3960 fprintf (dump_file
, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3962 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3964 loop_vinfo
= vect_analyze_loop_form (loop
);
3967 if (vect_debug_details (loop
))
3968 fprintf (dump_file
, "bad loop form.");
3972 /* Find all data references in the loop (which correspond to vdefs/vuses)
3973 and analyze their evolution in the loop.
3975 FORNOW: Handle only simple, array references, which
3976 alignment can be forced, and aligned pointer-references. */
3978 ok
= vect_analyze_data_refs (loop_vinfo
);
3981 if (vect_debug_details (loop
))
3982 fprintf (dump_file
, "bad data references.");
3983 destroy_loop_vec_info (loop_vinfo
);
3987 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3989 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
3992 if (vect_debug_details (loop
))
3993 fprintf (dump_file
, "unexpected pattern.");
3994 if (vect_debug_details (loop
))
3995 fprintf (dump_file
, "not vectorized: unexpected pattern.");
3996 destroy_loop_vec_info (loop_vinfo
);
4000 /* Check that all cross-iteration scalar data-flow cycles are OK.
4001 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4003 ok
= vect_analyze_scalar_cycles (loop_vinfo
);
4006 if (vect_debug_details (loop
))
4007 fprintf (dump_file
, "bad scalar cycle.");
4008 destroy_loop_vec_info (loop_vinfo
);
4012 /* Analyze data dependences between the data-refs in the loop.
4013 FORNOW: fail at the first data dependence that we encounter. */
4015 ok
= vect_analyze_data_ref_dependences (loop_vinfo
);
4018 if (vect_debug_details (loop
))
4019 fprintf (dump_file
, "bad data dependence.");
4020 destroy_loop_vec_info (loop_vinfo
);
4024 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4025 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4027 ok
= vect_analyze_data_ref_accesses (loop_vinfo
);
4030 if (vect_debug_details (loop
))
4031 fprintf (dump_file
, "bad data access.");
4032 destroy_loop_vec_info (loop_vinfo
);
4036 /* Analyze the alignment of the data-refs in the loop.
4037 FORNOW: Only aligned accesses are handled. */
4039 ok
= vect_analyze_data_refs_alignment (loop_vinfo
);
4042 if (vect_debug_details (loop
))
4043 fprintf (dump_file
, "bad data alignment.");
4044 destroy_loop_vec_info (loop_vinfo
);
4048 /* Scan all the operations in the loop and make sure they are
4051 ok
= vect_analyze_operations (loop_vinfo
);
4054 if (vect_debug_details (loop
))
4055 fprintf (dump_file
, "bad operation or unsupported loop bound.");
4056 destroy_loop_vec_info (loop_vinfo
);
4060 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
4066 /* Function need_imm_uses_for.
4068 Return whether we ought to include information for 'var'
4069 when calculating immediate uses. For this pass we only want use
4070 information for non-virtual variables. */
4073 need_imm_uses_for (tree var
)
4075 return is_gimple_reg (var
);
4079 /* Function vectorize_loops.
4081 Entry Point to loop vectorization phase. */
4084 vectorize_loops (struct loops
*loops
)
4086 unsigned int i
, loops_num
;
4087 unsigned int num_vectorized_loops
= 0;
4089 /* Does the target support SIMD? */
4090 /* FORNOW: until more sophisticated machine modelling is in place. */
4091 if (!UNITS_PER_SIMD_WORD
)
4093 if (vect_debug_details (NULL
))
4094 fprintf (dump_file
, "vectorizer: target vector size is not defined.");
4098 compute_immediate_uses (TDFA_USE_OPS
, need_imm_uses_for
);
4100 /* ----------- Analyze loops. ----------- */
4102 /* If some loop was duplicated, it gets bigger number
4103 than all previously defined loops. This fact allows us to run
4104 only over initial loops skipping newly generated ones. */
4105 loops_num
= loops
->num
;
4106 for (i
= 1; i
< loops_num
; i
++)
4108 loop_vec_info loop_vinfo
;
4109 struct loop
*loop
= loops
->parray
[i
];
4114 loop_vinfo
= vect_analyze_loop (loop
);
4115 loop
->aux
= loop_vinfo
;
4117 if (!loop_vinfo
|| !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
))
4120 vect_transform_loop (loop_vinfo
, loops
);
4121 num_vectorized_loops
++;
4124 if (vect_debug_stats (NULL
) || vect_debug_details (NULL
))
4125 fprintf (dump_file
, "\nvectorized %u loops in function.\n",
4126 num_vectorized_loops
);
4128 /* ----------- Finalize. ----------- */
4131 for (i
= 1; i
< loops_num
; i
++)
4133 struct loop
*loop
= loops
->parray
[i
];
4134 loop_vec_info loop_vinfo
;
4138 loop_vinfo
= loop
->aux
;
4139 destroy_loop_vec_info (loop_vinfo
);
4143 rewrite_into_ssa (false);
4144 if (bitmap_first_set_bit (vars_to_rename
) >= 0)
4146 /* The rewrite of ssa names may cause violation of loop closed ssa
4147 form invariants. TODO -- avoid these rewrites completely.
4148 Information in virtual phi nodes is sufficient for it. */
4149 rewrite_into_loop_closed_ssa ();
4151 bitmap_clear (vars_to_rename
);