]>
Commit | Line | Data |
---|---|---|
ebfd146a | 1 | /* Vectorizer |
d1e082c2 | 2 | Copyright (C) 2003-2013 Free Software Foundation, Inc. |
b8698a0f | 3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |
79fe1b3b DN |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
79fe1b3b DN |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
79fe1b3b | 20 | |
ebfd146a | 21 | /* Loop and basic block vectorizer. |
7ccf35ed | 22 | |
b8698a0f L |
23 | This file contains drivers for the three vectorizers: |
24 | (1) loop vectorizer (inter-iteration parallelism), | |
ebfd146a IR |
25 | (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop |
26 | vectorizer) | |
27 | (3) BB vectorizer (out-of-loops), aka SLP | |
b8698a0f | 28 | |
ebfd146a | 29 | The rest of the vectorizer's code is organized as follows: |
b8698a0f L |
30 | - tree-vect-loop.c - loop specific parts such as reductions, etc. These are |
31 | used by drivers (1) and (2). | |
32 | - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by | |
33 | drivers (1) and (2). | |
34 | - tree-vect-slp.c - BB vectorization specific analysis and transformation, | |
ebfd146a IR |
35 | used by drivers (2) and (3). |
36 | - tree-vect-stmts.c - statements analysis and transformation (used by all). | |
b8698a0f | 37 | - tree-vect-data-refs.c - vectorizer specific data-refs analysis and |
ebfd146a IR |
38 | manipulations (used by all). |
39 | - tree-vect-patterns.c - vectorizable code patterns detector (used by all) | |
40 | ||
41 | Here's a poor attempt at illustrating that: | |
42 | ||
43 | tree-vectorizer.c: | |
44 | loop_vect() loop_aware_slp() slp_vect() | |
45 | | / \ / | |
46 | | / \ / | |
47 | tree-vect-loop.c tree-vect-slp.c | |
48 | | \ \ / / | | |
49 | | \ \/ / | | |
50 | | \ /\ / | | |
51 | | \ / \ / | | |
52 | tree-vect-stmts.c tree-vect-data-refs.c | |
53 | \ / | |
54 | tree-vect-patterns.c | |
55 | */ | |
89d67cca | 56 | |
ebfd146a IR |
57 | #include "config.h" |
58 | #include "system.h" | |
59 | #include "coretypes.h" | |
78c60e3d | 60 | #include "dumpfile.h" |
ebfd146a IR |
61 | #include "tm.h" |
62 | #include "ggc.h" | |
63 | #include "tree.h" | |
cf835838 | 64 | #include "tree-pretty-print.h" |
ebfd146a | 65 | #include "tree-flow.h" |
ebfd146a | 66 | #include "cfgloop.h" |
ebfd146a IR |
67 | #include "tree-vectorizer.h" |
68 | #include "tree-pass.h" | |
89d67cca | 69 | |
a70d6342 | 70 | /* Loop or bb location. */ |
8644a673 | 71 | LOC vect_location; |
ad2dd72a | 72 | |
ebfd146a | 73 | /* Vector mapping GIMPLE stmt to stmt_vec_info. */ |
9771b263 | 74 | vec<vec_void_p> stmt_vec_info_vec; |
89d67cca | 75 | |
ebfd146a | 76 | \f |
79fe1b3b | 77 | /* Function vectorize_loops. |
b8698a0f | 78 | |
8644a673 | 79 | Entry point to loop vectorization phase. */ |
79fe1b3b | 80 | |
4d2280f6 | 81 | unsigned |
d73be268 | 82 | vectorize_loops (void) |
79fe1b3b | 83 | { |
b52485c6 | 84 | unsigned int i; |
79fe1b3b | 85 | unsigned int num_vectorized_loops = 0; |
42fd6772 ZD |
86 | unsigned int vect_loops_num; |
87 | loop_iterator li; | |
88 | struct loop *loop; | |
79fe1b3b | 89 | |
0fc822d0 | 90 | vect_loops_num = number_of_loops (cfun); |
f9be04cd RG |
91 | |
92 | /* Bail out if there are no loops. */ | |
93 | if (vect_loops_num <= 1) | |
94 | return 0; | |
95 | ||
726a989a RB |
96 | init_stmt_vec_info_vec (); |
97 | ||
79fe1b3b DN |
98 | /* ----------- Analyze loops. ----------- */ |
99 | ||
b8698a0f | 100 | /* If some loop was duplicated, it gets bigger number |
ff802fa1 | 101 | than all previously defined loops. This fact allows us to run |
79fe1b3b | 102 | only over initial loops skipping newly generated ones. */ |
677cc14d | 103 | FOR_EACH_LOOP (li, loop, 0) |
8bcf15f6 JH |
104 | if (optimize_loop_nest_for_speed_p (loop)) |
105 | { | |
106 | loop_vec_info loop_vinfo; | |
8644a673 | 107 | vect_location = find_loop_location (loop); |
84df911b | 108 | if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC |
73fbfcad | 109 | && dump_enabled_p ()) |
78c60e3d SS |
110 | dump_printf (MSG_ALL, "\nAnalyzing loop at %s:%d\n", |
111 | LOC_FILE (vect_location), LOC_LINE (vect_location)); | |
7cd3603b | 112 | |
8bcf15f6 JH |
113 | loop_vinfo = vect_analyze_loop (loop); |
114 | loop->aux = loop_vinfo; | |
79fe1b3b | 115 | |
8bcf15f6 JH |
116 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) |
117 | continue; | |
79fe1b3b | 118 | |
84df911b | 119 | if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC |
73fbfcad | 120 | && dump_enabled_p ()) |
78c60e3d SS |
121 | dump_printf (MSG_ALL, "\n\nVectorizing loop at %s:%d\n", |
122 | LOC_FILE (vect_location), LOC_LINE (vect_location)); | |
8bcf15f6 JH |
123 | vect_transform_loop (loop_vinfo); |
124 | num_vectorized_loops++; | |
125 | } | |
8644a673 IR |
126 | |
127 | vect_location = UNKNOWN_LOC; | |
79fe1b3b | 128 | |
01902653 | 129 | statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); |
73fbfcad SS |
130 | if (dump_enabled_p () |
131 | || (num_vectorized_loops > 0 && dump_enabled_p ())) | |
78c60e3d SS |
132 | dump_printf_loc (MSG_ALL, vect_location, |
133 | "vectorized %u loops in function.\n", | |
134 | num_vectorized_loops); | |
79fe1b3b DN |
135 | |
136 | /* ----------- Finalize. ----------- */ | |
137 | ||
b52485c6 | 138 | for (i = 1; i < vect_loops_num; i++) |
79fe1b3b | 139 | { |
6775f1f3 IR |
140 | loop_vec_info loop_vinfo; |
141 | ||
0fc822d0 | 142 | loop = get_loop (cfun, i); |
79fe1b3b | 143 | if (!loop) |
6775f1f3 | 144 | continue; |
3d9a9f94 | 145 | loop_vinfo = (loop_vec_info) loop->aux; |
d29de1bf | 146 | destroy_loop_vec_info (loop_vinfo, true); |
79fe1b3b DN |
147 | loop->aux = NULL; |
148 | } | |
4d2280f6 | 149 | |
726a989a RB |
150 | free_stmt_vec_info_vec (); |
151 | ||
789c34e3 RB |
152 | if (num_vectorized_loops > 0) |
153 | { | |
154 | /* If we vectorized any loop only virtual SSA form needs to be updated. | |
155 | ??? Also while we try hard to update loop-closed SSA form we fail | |
156 | to properly do this in some corner-cases (see PR56286). */ | |
157 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); | |
158 | return TODO_cleanup_cfg; | |
159 | } | |
160 | ||
161 | return 0; | |
79fe1b3b | 162 | } |
b8698a0f | 163 | |
f4b3ca72 | 164 | |
a70d6342 IR |
165 | /* Entry point to basic block SLP phase. */ |
166 | ||
167 | static unsigned int | |
168 | execute_vect_slp (void) | |
169 | { | |
170 | basic_block bb; | |
171 | ||
a70d6342 IR |
172 | init_stmt_vec_info_vec (); |
173 | ||
174 | FOR_EACH_BB (bb) | |
175 | { | |
176 | vect_location = find_bb_location (bb); | |
177 | ||
178 | if (vect_slp_analyze_bb (bb)) | |
179 | { | |
180 | vect_slp_transform_bb (bb); | |
73fbfcad | 181 | if (dump_enabled_p ()) |
78c60e3d SS |
182 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
183 | "basic block vectorized using SLP\n"); | |
a70d6342 IR |
184 | } |
185 | } | |
186 | ||
187 | free_stmt_vec_info_vec (); | |
188 | return 0; | |
189 | } | |
190 | ||
191 | static bool | |
192 | gate_vect_slp (void) | |
193 | { | |
b8698a0f | 194 | /* Apply SLP either if the vectorizer is on and the user didn't specify |
a70d6342 | 195 | whether to run SLP or not, or if the SLP flag was set by the user. */ |
b8698a0f | 196 | return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) |
a70d6342 IR |
197 | || flag_tree_slp_vectorize == 1); |
198 | } | |
199 | ||
200 | struct gimple_opt_pass pass_slp_vectorize = | |
201 | { | |
202 | { | |
203 | GIMPLE_PASS, | |
204 | "slp", /* name */ | |
2b4e6bf1 SS |
205 | OPTGROUP_LOOP |
206 | | OPTGROUP_VEC, /* optinfo_flags */ | |
a70d6342 IR |
207 | gate_vect_slp, /* gate */ |
208 | execute_vect_slp, /* execute */ | |
209 | NULL, /* sub */ | |
210 | NULL, /* next */ | |
211 | 0, /* static_pass_number */ | |
212 | TV_TREE_SLP_VECTORIZATION, /* tv_id */ | |
213 | PROP_ssa | PROP_cfg, /* properties_required */ | |
214 | 0, /* properties_provided */ | |
215 | 0, /* properties_destroyed */ | |
216 | 0, /* todo_flags_start */ | |
bb313b93 | 217 | TODO_verify_ssa |
b8698a0f | 218 | | TODO_update_ssa |
a70d6342 IR |
219 | | TODO_verify_stmts /* todo_flags_finish */ |
220 | } | |
221 | }; | |
222 | ||
223 | ||
f4b3ca72 JH |
224 | /* Increase alignment of global arrays to improve vectorization potential. |
225 | TODO: | |
226 | - Consider also structs that have an array field. | |
227 | - Use ipa analysis to prune arrays that can't be vectorized? | |
228 | This should involve global alignment analysis and in the future also | |
229 | array padding. */ | |
230 | ||
231 | static unsigned int | |
232 | increase_alignment (void) | |
233 | { | |
234 | struct varpool_node *vnode; | |
235 | ||
a3d7af04 SS |
236 | vect_location = UNKNOWN_LOC; |
237 | ||
f4b3ca72 | 238 | /* Increase the alignment of all global arrays for vectorization. */ |
65c70e6b | 239 | FOR_EACH_DEFINED_VARIABLE (vnode) |
f4b3ca72 | 240 | { |
960bfb69 | 241 | tree vectype, decl = vnode->symbol.decl; |
cba146eb | 242 | tree t; |
f4b3ca72 JH |
243 | unsigned int alignment; |
244 | ||
cba146eb PB |
245 | t = TREE_TYPE(decl); |
246 | if (TREE_CODE (t) != ARRAY_TYPE) | |
ebfd146a | 247 | continue; |
cba146eb | 248 | vectype = get_vectype_for_scalar_type (strip_array_types (t)); |
f4b3ca72 | 249 | if (!vectype) |
ebfd146a | 250 | continue; |
f4b3ca72 JH |
251 | alignment = TYPE_ALIGN (vectype); |
252 | if (DECL_ALIGN (decl) >= alignment) | |
ebfd146a | 253 | continue; |
f4b3ca72 JH |
254 | |
255 | if (vect_can_force_dr_alignment_p (decl, alignment)) | |
ebfd146a IR |
256 | { |
257 | DECL_ALIGN (decl) = TYPE_ALIGN (vectype); | |
258 | DECL_USER_ALIGN (decl) = 1; | |
78c60e3d SS |
259 | dump_printf (MSG_NOTE, "Increasing alignment of decl: "); |
260 | dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); | |
261 | dump_printf (MSG_NOTE, "\n"); | |
ebfd146a | 262 | } |
f4b3ca72 JH |
263 | } |
264 | return 0; | |
265 | } | |
266 | ||
ebfd146a | 267 | |
fe1c7546 | 268 | static bool |
f4b3ca72 JH |
269 | gate_increase_alignment (void) |
270 | { | |
271 | return flag_section_anchors && flag_tree_vectorize; | |
272 | } | |
273 | ||
ebfd146a IR |
274 | |
275 | struct simple_ipa_opt_pass pass_ipa_increase_alignment = | |
f4b3ca72 | 276 | { |
8ddbbcae JH |
277 | { |
278 | SIMPLE_IPA_PASS, | |
ebfd146a | 279 | "increase_alignment", /* name */ |
2b4e6bf1 SS |
280 | OPTGROUP_LOOP |
281 | | OPTGROUP_VEC, /* optinfo_flags */ | |
ebfd146a IR |
282 | gate_increase_alignment, /* gate */ |
283 | increase_alignment, /* execute */ | |
284 | NULL, /* sub */ | |
285 | NULL, /* next */ | |
286 | 0, /* static_pass_number */ | |
a222c01a | 287 | TV_IPA_OPT, /* tv_id */ |
ebfd146a IR |
288 | 0, /* properties_required */ |
289 | 0, /* properties_provided */ | |
290 | 0, /* properties_destroyed */ | |
291 | 0, /* todo_flags_start */ | |
292 | 0 /* todo_flags_finish */ | |
8ddbbcae | 293 | } |
f4b3ca72 | 294 | }; |