]>
Commit | Line | Data |
---|---|---|
fb85abff | 1 | /* Vectorizer |
ce084dfc | 2 | Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
3 | Free Software Foundation, Inc. | |
48e1416a | 4 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |
c91e8223 | 5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 10 | Software Foundation; either version 3, or (at your option) any later |
c91e8223 | 11 | version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
c91e8223 | 21 | |
fb85abff | 22 | /* Loop and basic block vectorizer. |
b056d812 | 23 | |
48e1416a | 24 | This file contains drivers for the three vectorizers: |
25 | (1) loop vectorizer (inter-iteration parallelism), | |
fb85abff | 26 | (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop |
27 | vectorizer) | |
28 | (3) BB vectorizer (out-of-loops), aka SLP | |
48e1416a | 29 | |
fb85abff | 30 | The rest of the vectorizer's code is organized as follows: |
48e1416a | 31 | - tree-vect-loop.c - loop specific parts such as reductions, etc. These are |
32 | used by drivers (1) and (2). | |
33 | - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by | |
34 | drivers (1) and (2). | |
35 | - tree-vect-slp.c - BB vectorization specific analysis and transformation, | |
fb85abff | 36 | used by drivers (2) and (3). |
37 | - tree-vect-stmts.c - statements analysis and transformation (used by all). | |
48e1416a | 38 | - tree-vect-data-refs.c - vectorizer specific data-refs analysis and |
fb85abff | 39 | manipulations (used by all). |
40 | - tree-vect-patterns.c - vectorizable code patterns detector (used by all) | |
41 | ||
42 | Here's a poor attempt at illustrating that: | |
43 | ||
44 | tree-vectorizer.c: | |
45 | loop_vect() loop_aware_slp() slp_vect() | |
46 | | / \ / | |
47 | | / \ / | |
48 | tree-vect-loop.c tree-vect-slp.c | |
49 | | \ \ / / | | |
50 | | \ \/ / | | |
51 | | \ /\ / | | |
52 | | \ / \ / | | |
53 | tree-vect-stmts.c tree-vect-data-refs.c | |
54 | \ / | |
55 | tree-vect-patterns.c | |
56 | */ | |
c6c91d61 | 57 | |
fb85abff | 58 | #include "config.h" |
59 | #include "system.h" | |
60 | #include "coretypes.h" | |
7bd765d4 | 61 | #include "dumpfile.h" |
fb85abff | 62 | #include "tm.h" |
63 | #include "ggc.h" | |
64 | #include "tree.h" | |
ce084dfc | 65 | #include "tree-pretty-print.h" |
fb85abff | 66 | #include "tree-flow.h" |
fb85abff | 67 | #include "cfgloop.h" |
fb85abff | 68 | #include "tree-vectorizer.h" |
69 | #include "tree-pass.h" | |
c6c91d61 | 70 | |
37545e54 | 71 | /* Loop or bb location. */ |
f083cd24 | 72 | LOC vect_location; |
52394a67 | 73 | |
fb85abff | 74 | /* Vector mapping GIMPLE stmt to stmt_vec_info. */ |
f1f41a6c | 75 | vec<vec_void_p> stmt_vec_info_vec; |
c6c91d61 | 76 | |
fb85abff | 77 | \f |
c91e8223 | 78 | /* Function vectorize_loops. |
48e1416a | 79 | |
f083cd24 | 80 | Entry point to loop vectorization phase. */ |
c91e8223 | 81 | |
d4ec02d0 | 82 | unsigned |
7194de72 | 83 | vectorize_loops (void) |
c91e8223 | 84 | { |
e9705e7f | 85 | unsigned int i; |
c91e8223 | 86 | unsigned int num_vectorized_loops = 0; |
17519ba0 | 87 | unsigned int vect_loops_num; |
88 | loop_iterator li; | |
89 | struct loop *loop; | |
c91e8223 | 90 | |
d74ee105 | 91 | vect_loops_num = number_of_loops (); |
92 | ||
93 | /* Bail out if there are no loops. */ | |
94 | if (vect_loops_num <= 1) | |
95 | return 0; | |
96 | ||
75a70cf9 | 97 | init_stmt_vec_info_vec (); |
98 | ||
c91e8223 | 99 | /* ----------- Analyze loops. ----------- */ |
100 | ||
48e1416a | 101 | /* If some loop was duplicated, it gets bigger number |
282bf14c | 102 | than all previously defined loops. This fact allows us to run |
c91e8223 | 103 | only over initial loops skipping newly generated ones. */ |
3bbbcdff | 104 | FOR_EACH_LOOP (li, loop, 0) |
7baffbd3 | 105 | if (optimize_loop_nest_for_speed_p (loop)) |
106 | { | |
107 | loop_vec_info loop_vinfo; | |
f083cd24 | 108 | vect_location = find_loop_location (loop); |
8bdd40f5 | 109 | if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC |
6d8fb6cf | 110 | && dump_enabled_p ()) |
7bd765d4 | 111 | dump_printf (MSG_ALL, "\nAnalyzing loop at %s:%d\n", |
112 | LOC_FILE (vect_location), LOC_LINE (vect_location)); | |
87a658a6 | 113 | |
7baffbd3 | 114 | loop_vinfo = vect_analyze_loop (loop); |
115 | loop->aux = loop_vinfo; | |
c91e8223 | 116 | |
7baffbd3 | 117 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) |
118 | continue; | |
c91e8223 | 119 | |
8bdd40f5 | 120 | if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC |
6d8fb6cf | 121 | && dump_enabled_p ()) |
7bd765d4 | 122 | dump_printf (MSG_ALL, "\n\nVectorizing loop at %s:%d\n", |
123 | LOC_FILE (vect_location), LOC_LINE (vect_location)); | |
7baffbd3 | 124 | vect_transform_loop (loop_vinfo); |
125 | num_vectorized_loops++; | |
126 | } | |
f083cd24 | 127 | |
128 | vect_location = UNKNOWN_LOC; | |
c91e8223 | 129 | |
581f8050 | 130 | statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); |
6d8fb6cf | 131 | if (dump_enabled_p () |
132 | || (num_vectorized_loops > 0 && dump_enabled_p ())) | |
7bd765d4 | 133 | dump_printf_loc (MSG_ALL, vect_location, |
134 | "vectorized %u loops in function.\n", | |
135 | num_vectorized_loops); | |
c91e8223 | 136 | |
137 | /* ----------- Finalize. ----------- */ | |
138 | ||
e9705e7f | 139 | for (i = 1; i < vect_loops_num; i++) |
c91e8223 | 140 | { |
9ce81338 | 141 | loop_vec_info loop_vinfo; |
142 | ||
17519ba0 | 143 | loop = get_loop (i); |
c91e8223 | 144 | if (!loop) |
9ce81338 | 145 | continue; |
45ba1503 | 146 | loop_vinfo = (loop_vec_info) loop->aux; |
221e9a92 | 147 | destroy_loop_vec_info (loop_vinfo, true); |
c91e8223 | 148 | loop->aux = NULL; |
149 | } | |
d4ec02d0 | 150 | |
75a70cf9 | 151 | free_stmt_vec_info_vec (); |
152 | ||
d4ec02d0 | 153 | return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; |
c91e8223 | 154 | } |
48e1416a | 155 | |
f37a5008 | 156 | |
37545e54 | 157 | /* Entry point to basic block SLP phase. */ |
158 | ||
159 | static unsigned int | |
160 | execute_vect_slp (void) | |
161 | { | |
162 | basic_block bb; | |
163 | ||
37545e54 | 164 | init_stmt_vec_info_vec (); |
165 | ||
166 | FOR_EACH_BB (bb) | |
167 | { | |
168 | vect_location = find_bb_location (bb); | |
169 | ||
170 | if (vect_slp_analyze_bb (bb)) | |
171 | { | |
172 | vect_slp_transform_bb (bb); | |
6d8fb6cf | 173 | if (dump_enabled_p ()) |
7bd765d4 | 174 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
175 | "basic block vectorized using SLP\n"); | |
37545e54 | 176 | } |
177 | } | |
178 | ||
179 | free_stmt_vec_info_vec (); | |
180 | return 0; | |
181 | } | |
182 | ||
183 | static bool | |
184 | gate_vect_slp (void) | |
185 | { | |
48e1416a | 186 | /* Apply SLP either if the vectorizer is on and the user didn't specify |
37545e54 | 187 | whether to run SLP or not, or if the SLP flag was set by the user. */ |
48e1416a | 188 | return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) |
37545e54 | 189 | || flag_tree_slp_vectorize == 1); |
190 | } | |
191 | ||
192 | struct gimple_opt_pass pass_slp_vectorize = | |
193 | { | |
194 | { | |
195 | GIMPLE_PASS, | |
196 | "slp", /* name */ | |
c7875731 | 197 | OPTGROUP_LOOP |
198 | | OPTGROUP_VEC, /* optinfo_flags */ | |
37545e54 | 199 | gate_vect_slp, /* gate */ |
200 | execute_vect_slp, /* execute */ | |
201 | NULL, /* sub */ | |
202 | NULL, /* next */ | |
203 | 0, /* static_pass_number */ | |
204 | TV_TREE_SLP_VECTORIZATION, /* tv_id */ | |
205 | PROP_ssa | PROP_cfg, /* properties_required */ | |
206 | 0, /* properties_provided */ | |
207 | 0, /* properties_destroyed */ | |
208 | 0, /* todo_flags_start */ | |
209 | TODO_ggc_collect | |
210 | | TODO_verify_ssa | |
48e1416a | 211 | | TODO_update_ssa |
37545e54 | 212 | | TODO_verify_stmts /* todo_flags_finish */ |
213 | } | |
214 | }; | |
215 | ||
216 | ||
f37a5008 | 217 | /* Increase alignment of global arrays to improve vectorization potential. |
218 | TODO: | |
219 | - Consider also structs that have an array field. | |
220 | - Use ipa analysis to prune arrays that can't be vectorized? | |
221 | This should involve global alignment analysis and in the future also | |
222 | array padding. */ | |
223 | ||
224 | static unsigned int | |
225 | increase_alignment (void) | |
226 | { | |
227 | struct varpool_node *vnode; | |
228 | ||
229 | /* Increase the alignment of all global arrays for vectorization. */ | |
7c455d87 | 230 | FOR_EACH_DEFINED_VARIABLE (vnode) |
f37a5008 | 231 | { |
7d0d0ce1 | 232 | tree vectype, decl = vnode->symbol.decl; |
41fba625 | 233 | tree t; |
f37a5008 | 234 | unsigned int alignment; |
235 | ||
41fba625 | 236 | t = TREE_TYPE(decl); |
237 | if (TREE_CODE (t) != ARRAY_TYPE) | |
fb85abff | 238 | continue; |
41fba625 | 239 | vectype = get_vectype_for_scalar_type (strip_array_types (t)); |
f37a5008 | 240 | if (!vectype) |
fb85abff | 241 | continue; |
f37a5008 | 242 | alignment = TYPE_ALIGN (vectype); |
243 | if (DECL_ALIGN (decl) >= alignment) | |
fb85abff | 244 | continue; |
f37a5008 | 245 | |
246 | if (vect_can_force_dr_alignment_p (decl, alignment)) | |
fb85abff | 247 | { |
248 | DECL_ALIGN (decl) = TYPE_ALIGN (vectype); | |
249 | DECL_USER_ALIGN (decl) = 1; | |
7bd765d4 | 250 | dump_printf (MSG_NOTE, "Increasing alignment of decl: "); |
251 | dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); | |
252 | dump_printf (MSG_NOTE, "\n"); | |
fb85abff | 253 | } |
f37a5008 | 254 | } |
255 | return 0; | |
256 | } | |
257 | ||
fb85abff | 258 | |
45e20252 | 259 | static bool |
f37a5008 | 260 | gate_increase_alignment (void) |
261 | { | |
262 | return flag_section_anchors && flag_tree_vectorize; | |
263 | } | |
264 | ||
fb85abff | 265 | |
266 | struct simple_ipa_opt_pass pass_ipa_increase_alignment = | |
f37a5008 | 267 | { |
20099e35 | 268 | { |
269 | SIMPLE_IPA_PASS, | |
fb85abff | 270 | "increase_alignment", /* name */ |
c7875731 | 271 | OPTGROUP_LOOP |
272 | | OPTGROUP_VEC, /* optinfo_flags */ | |
fb85abff | 273 | gate_increase_alignment, /* gate */ |
274 | increase_alignment, /* execute */ | |
275 | NULL, /* sub */ | |
276 | NULL, /* next */ | |
277 | 0, /* static_pass_number */ | |
4b366dd3 | 278 | TV_IPA_OPT, /* tv_id */ |
fb85abff | 279 | 0, /* properties_required */ |
280 | 0, /* properties_provided */ | |
281 | 0, /* properties_destroyed */ | |
282 | 0, /* todo_flags_start */ | |
283 | 0 /* todo_flags_finish */ | |
20099e35 | 284 | } |
f37a5008 | 285 | }; |