]>
Commit | Line | Data |
---|---|---|
fb85abff | 1 | /* Vectorizer |
d353bf18 | 2 | Copyright (C) 2003-2015 Free Software Foundation, Inc. |
48e1416a | 3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |
c91e8223 | 4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
c91e8223 | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
c91e8223 | 20 | |
fb85abff | 21 | /* Loop and basic block vectorizer. |
b056d812 | 22 | |
48e1416a | 23 | This file contains drivers for the three vectorizers: |
24 | (1) loop vectorizer (inter-iteration parallelism), | |
fb85abff | 25 | (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop |
26 | vectorizer) | |
27 | (3) BB vectorizer (out-of-loops), aka SLP | |
48e1416a | 28 | |
fb85abff | 29 | The rest of the vectorizer's code is organized as follows: |
48e1416a | 30 | - tree-vect-loop.c - loop specific parts such as reductions, etc. These are |
31 | used by drivers (1) and (2). | |
32 | - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by | |
33 | drivers (1) and (2). | |
34 | - tree-vect-slp.c - BB vectorization specific analysis and transformation, | |
fb85abff | 35 | used by drivers (2) and (3). |
36 | - tree-vect-stmts.c - statements analysis and transformation (used by all). | |
48e1416a | 37 | - tree-vect-data-refs.c - vectorizer specific data-refs analysis and |
fb85abff | 38 | manipulations (used by all). |
39 | - tree-vect-patterns.c - vectorizable code patterns detector (used by all) | |
40 | ||
41 | Here's a poor attempt at illustrating that: | |
42 | ||
43 | tree-vectorizer.c: | |
44 | loop_vect() loop_aware_slp() slp_vect() | |
45 | | / \ / | |
46 | | / \ / | |
47 | tree-vect-loop.c tree-vect-slp.c | |
48 | | \ \ / / | | |
49 | | \ \/ / | | |
50 | | \ /\ / | | |
51 | | \ / \ / | | |
52 | tree-vect-stmts.c tree-vect-data-refs.c | |
53 | \ / | |
54 | tree-vect-patterns.c | |
55 | */ | |
c6c91d61 | 56 | |
fb85abff | 57 | #include "config.h" |
58 | #include "system.h" | |
59 | #include "coretypes.h" | |
7bd765d4 | 60 | #include "dumpfile.h" |
fb85abff | 61 | #include "tm.h" |
b20a8bb4 | 62 | #include "hash-set.h" |
63 | #include "machmode.h" | |
64 | #include "vec.h" | |
65 | #include "double-int.h" | |
66 | #include "input.h" | |
67 | #include "alias.h" | |
68 | #include "symtab.h" | |
69 | #include "wide-int.h" | |
70 | #include "inchash.h" | |
fb85abff | 71 | #include "tree.h" |
b20a8bb4 | 72 | #include "fold-const.h" |
9ed99284 | 73 | #include "stor-layout.h" |
ce084dfc | 74 | #include "tree-pretty-print.h" |
94ea8568 | 75 | #include "predict.h" |
94ea8568 | 76 | #include "hard-reg-set.h" |
77 | #include "input.h" | |
78 | #include "function.h" | |
79 | #include "dominance.h" | |
80 | #include "cfg.h" | |
bc61cadb | 81 | #include "basic-block.h" |
82 | #include "tree-ssa-alias.h" | |
83 | #include "internal-fn.h" | |
84 | #include "gimple-expr.h" | |
85 | #include "is-a.h" | |
073c1fd5 | 86 | #include "gimple.h" |
dcf1a1ec | 87 | #include "gimple-iterator.h" |
88 | #include "gimple-walk.h" | |
073c1fd5 | 89 | #include "gimple-ssa.h" |
1140c305 | 90 | #include "hash-map.h" |
91 | #include "plugin-api.h" | |
92 | #include "ipa-ref.h" | |
073c1fd5 | 93 | #include "cgraph.h" |
94 | #include "tree-phinodes.h" | |
95 | #include "ssa-iterators.h" | |
05d9c18a | 96 | #include "tree-ssa-loop-manip.h" |
c71d3c24 | 97 | #include "tree-cfg.h" |
fb85abff | 98 | #include "cfgloop.h" |
fb85abff | 99 | #include "tree-vectorizer.h" |
100 | #include "tree-pass.h" | |
3d483a94 | 101 | #include "tree-ssa-propagate.h" |
23e1875f | 102 | #include "dbgcnt.h" |
c71d3c24 | 103 | #include "gimple-fold.h" |
ef3f2b6f | 104 | #include "tree-scalar-evolution.h" |
105 | ||
c6c91d61 | 106 | |
37545e54 | 107 | /* Loop or bb location. */ |
36f39b2e | 108 | source_location vect_location; |
52394a67 | 109 | |
fb85abff | 110 | /* Vector mapping GIMPLE stmt to stmt_vec_info. */ |
f1f41a6c | 111 | vec<vec_void_p> stmt_vec_info_vec; |
3d483a94 | 112 | \f |
113 | /* For mapping simduid to vectorization factor. */ | |
114 | ||
115 | struct simduid_to_vf : typed_free_remove<simduid_to_vf> | |
116 | { | |
117 | unsigned int simduid; | |
118 | int vf; | |
119 | ||
120 | /* hash_table support. */ | |
9969c043 | 121 | typedef simduid_to_vf *value_type; |
122 | typedef simduid_to_vf *compare_type; | |
123 | static inline hashval_t hash (const simduid_to_vf *); | |
124 | static inline int equal (const simduid_to_vf *, const simduid_to_vf *); | |
3d483a94 | 125 | }; |
126 | ||
127 | inline hashval_t | |
9969c043 | 128 | simduid_to_vf::hash (const simduid_to_vf *p) |
3d483a94 | 129 | { |
130 | return p->simduid; | |
131 | } | |
132 | ||
133 | inline int | |
9969c043 | 134 | simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2) |
3d483a94 | 135 | { |
136 | return p1->simduid == p2->simduid; | |
137 | } | |
138 | ||
139 | /* This hash maps the OMP simd array to the corresponding simduid used | |
140 | to index into it. Like thus, | |
141 | ||
142 | _7 = GOMP_SIMD_LANE (simduid.0) | |
143 | ... | |
144 | ... | |
145 | D.1737[_7] = stuff; | |
146 | ||
147 | ||
bc7bff74 | 148 | This hash maps from the OMP simd array (D.1737[]) to DECL_UID of |
149 | simduid.0. */ | |
3d483a94 | 150 | |
151 | struct simd_array_to_simduid : typed_free_remove<simd_array_to_simduid> | |
152 | { | |
153 | tree decl; | |
154 | unsigned int simduid; | |
155 | ||
156 | /* hash_table support. */ | |
9969c043 | 157 | typedef simd_array_to_simduid *value_type; |
158 | typedef simd_array_to_simduid *compare_type; | |
159 | static inline hashval_t hash (const simd_array_to_simduid *); | |
160 | static inline int equal (const simd_array_to_simduid *, | |
161 | const simd_array_to_simduid *); | |
3d483a94 | 162 | }; |
163 | ||
164 | inline hashval_t | |
9969c043 | 165 | simd_array_to_simduid::hash (const simd_array_to_simduid *p) |
3d483a94 | 166 | { |
167 | return DECL_UID (p->decl); | |
168 | } | |
169 | ||
170 | inline int | |
9969c043 | 171 | simd_array_to_simduid::equal (const simd_array_to_simduid *p1, |
172 | const simd_array_to_simduid *p2) | |
3d483a94 | 173 | { |
174 | return p1->decl == p2->decl; | |
175 | } | |
176 | ||
177 | /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF and IFN_GOMP_SIMD_LAST_LANE | |
178 | into their corresponding constants. */ | |
179 | ||
180 | static void | |
c1f445d2 | 181 | adjust_simduid_builtins (hash_table<simduid_to_vf> **htab) |
3d483a94 | 182 | { |
183 | basic_block bb; | |
184 | ||
fc00614f | 185 | FOR_EACH_BB_FN (bb, cfun) |
3d483a94 | 186 | { |
187 | gimple_stmt_iterator i; | |
188 | ||
189 | for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) | |
190 | { | |
191 | unsigned int vf = 1; | |
192 | enum internal_fn ifn; | |
193 | gimple stmt = gsi_stmt (i); | |
194 | tree t; | |
195 | if (!is_gimple_call (stmt) | |
196 | || !gimple_call_internal_p (stmt)) | |
197 | continue; | |
198 | ifn = gimple_call_internal_fn (stmt); | |
199 | switch (ifn) | |
200 | { | |
201 | case IFN_GOMP_SIMD_LANE: | |
202 | case IFN_GOMP_SIMD_VF: | |
203 | case IFN_GOMP_SIMD_LAST_LANE: | |
204 | break; | |
205 | default: | |
206 | continue; | |
207 | } | |
208 | tree arg = gimple_call_arg (stmt, 0); | |
209 | gcc_assert (arg != NULL_TREE); | |
210 | gcc_assert (TREE_CODE (arg) == SSA_NAME); | |
211 | simduid_to_vf *p = NULL, data; | |
212 | data.simduid = DECL_UID (SSA_NAME_VAR (arg)); | |
c1f445d2 | 213 | if (*htab) |
214 | p = (*htab)->find (&data); | |
3d483a94 | 215 | if (p) |
216 | vf = p->vf; | |
217 | switch (ifn) | |
218 | { | |
219 | case IFN_GOMP_SIMD_VF: | |
220 | t = build_int_cst (unsigned_type_node, vf); | |
221 | break; | |
222 | case IFN_GOMP_SIMD_LANE: | |
223 | t = build_int_cst (unsigned_type_node, 0); | |
224 | break; | |
225 | case IFN_GOMP_SIMD_LAST_LANE: | |
226 | t = gimple_call_arg (stmt, 1); | |
227 | break; | |
228 | default: | |
229 | gcc_unreachable (); | |
230 | } | |
231 | update_call_from_tree (&i, t); | |
232 | } | |
233 | } | |
234 | } | |
c6c91d61 | 235 | |
3d483a94 | 236 | /* Helper structure for note_simd_array_uses. */ |
237 | ||
238 | struct note_simd_array_uses_struct | |
239 | { | |
c1f445d2 | 240 | hash_table<simd_array_to_simduid> **htab; |
3d483a94 | 241 | unsigned int simduid; |
242 | }; | |
243 | ||
244 | /* Callback for note_simd_array_uses, called through walk_gimple_op. */ | |
245 | ||
246 | static tree | |
247 | note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data) | |
248 | { | |
249 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; | |
250 | struct note_simd_array_uses_struct *ns | |
251 | = (struct note_simd_array_uses_struct *) wi->info; | |
252 | ||
253 | if (TYPE_P (*tp)) | |
254 | *walk_subtrees = 0; | |
255 | else if (VAR_P (*tp) | |
256 | && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp)) | |
257 | && DECL_CONTEXT (*tp) == current_function_decl) | |
258 | { | |
259 | simd_array_to_simduid data; | |
c1f445d2 | 260 | if (!*ns->htab) |
261 | *ns->htab = new hash_table<simd_array_to_simduid> (15); | |
3d483a94 | 262 | data.decl = *tp; |
263 | data.simduid = ns->simduid; | |
c1f445d2 | 264 | simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT); |
3d483a94 | 265 | if (*slot == NULL) |
266 | { | |
267 | simd_array_to_simduid *p = XNEW (simd_array_to_simduid); | |
268 | *p = data; | |
269 | *slot = p; | |
270 | } | |
271 | else if ((*slot)->simduid != ns->simduid) | |
272 | (*slot)->simduid = -1U; | |
273 | *walk_subtrees = 0; | |
274 | } | |
275 | return NULL_TREE; | |
276 | } | |
277 | ||
278 | /* Find "omp simd array" temporaries and map them to corresponding | |
279 | simduid. */ | |
280 | ||
281 | static void | |
c1f445d2 | 282 | note_simd_array_uses (hash_table<simd_array_to_simduid> **htab) |
3d483a94 | 283 | { |
284 | basic_block bb; | |
285 | gimple_stmt_iterator gsi; | |
286 | struct walk_stmt_info wi; | |
287 | struct note_simd_array_uses_struct ns; | |
288 | ||
289 | memset (&wi, 0, sizeof (wi)); | |
290 | wi.info = &ns; | |
291 | ns.htab = htab; | |
292 | ||
fc00614f | 293 | FOR_EACH_BB_FN (bb, cfun) |
3d483a94 | 294 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
295 | { | |
296 | gimple stmt = gsi_stmt (gsi); | |
297 | if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) | |
298 | continue; | |
299 | switch (gimple_call_internal_fn (stmt)) | |
300 | { | |
301 | case IFN_GOMP_SIMD_LANE: | |
302 | case IFN_GOMP_SIMD_VF: | |
303 | case IFN_GOMP_SIMD_LAST_LANE: | |
304 | break; | |
305 | default: | |
306 | continue; | |
307 | } | |
308 | tree lhs = gimple_call_lhs (stmt); | |
309 | if (lhs == NULL_TREE) | |
310 | continue; | |
311 | imm_use_iterator use_iter; | |
312 | gimple use_stmt; | |
313 | ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0))); | |
314 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) | |
315 | if (!is_gimple_debug (use_stmt)) | |
316 | walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi); | |
317 | } | |
318 | } | |
fb85abff | 319 | \f |
23e1875f | 320 | /* A helper function to free data refs. */ |
321 | ||
322 | void | |
323 | vect_destroy_datarefs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) | |
324 | { | |
325 | vec<data_reference_p> datarefs; | |
326 | struct data_reference *dr; | |
327 | unsigned int i; | |
328 | ||
329 | if (loop_vinfo) | |
330 | datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); | |
331 | else | |
332 | datarefs = BB_VINFO_DATAREFS (bb_vinfo); | |
333 | ||
334 | FOR_EACH_VEC_ELT (datarefs, i, dr) | |
335 | if (dr->aux) | |
336 | { | |
337 | free (dr->aux); | |
338 | dr->aux = NULL; | |
339 | } | |
340 | ||
341 | free_data_refs (datarefs); | |
342 | } | |
343 | ||
344 | ||
c71d3c24 | 345 | /* If LOOP has been versioned during ifcvt, return the internal call |
346 | guarding it. */ | |
347 | ||
348 | static gimple | |
349 | vect_loop_vectorized_call (struct loop *loop) | |
350 | { | |
351 | basic_block bb = loop_preheader_edge (loop)->src; | |
352 | gimple g; | |
353 | do | |
354 | { | |
355 | g = last_stmt (bb); | |
356 | if (g) | |
357 | break; | |
358 | if (!single_pred_p (bb)) | |
359 | break; | |
360 | bb = single_pred (bb); | |
361 | } | |
362 | while (1); | |
363 | if (g && gimple_code (g) == GIMPLE_COND) | |
364 | { | |
365 | gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
366 | gsi_prev (&gsi); | |
367 | if (!gsi_end_p (gsi)) | |
368 | { | |
369 | g = gsi_stmt (gsi); | |
370 | if (is_gimple_call (g) | |
371 | && gimple_call_internal_p (g) | |
372 | && gimple_call_internal_fn (g) == IFN_LOOP_VECTORIZED | |
373 | && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num | |
374 | || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) | |
375 | return g; | |
376 | } | |
377 | } | |
378 | return NULL; | |
379 | } | |
380 | ||
381 | /* Fold LOOP_VECTORIZED internal call G to VALUE and | |
382 | update any immediate uses of it's LHS. */ | |
383 | ||
384 | static void | |
385 | fold_loop_vectorized_call (gimple g, tree value) | |
386 | { | |
387 | tree lhs = gimple_call_lhs (g); | |
388 | use_operand_p use_p; | |
389 | imm_use_iterator iter; | |
390 | gimple use_stmt; | |
391 | gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
392 | ||
393 | update_call_from_tree (&gsi, value); | |
394 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
cf4f5d5e | 395 | { |
396 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
397 | SET_USE (use_p, value); | |
398 | update_stmt (use_stmt); | |
399 | } | |
c71d3c24 | 400 | } |
401 | ||
c91e8223 | 402 | /* Function vectorize_loops. |
48e1416a | 403 | |
f083cd24 | 404 | Entry point to loop vectorization phase. */ |
c91e8223 | 405 | |
d4ec02d0 | 406 | unsigned |
7194de72 | 407 | vectorize_loops (void) |
c91e8223 | 408 | { |
e9705e7f | 409 | unsigned int i; |
c91e8223 | 410 | unsigned int num_vectorized_loops = 0; |
17519ba0 | 411 | unsigned int vect_loops_num; |
17519ba0 | 412 | struct loop *loop; |
c1f445d2 | 413 | hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; |
414 | hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | |
c71d3c24 | 415 | bool any_ifcvt_loops = false; |
416 | unsigned ret = 0; | |
c91e8223 | 417 | |
41f75a99 | 418 | vect_loops_num = number_of_loops (cfun); |
d74ee105 | 419 | |
420 | /* Bail out if there are no loops. */ | |
421 | if (vect_loops_num <= 1) | |
3d483a94 | 422 | { |
423 | if (cfun->has_simduid_loops) | |
c1f445d2 | 424 | adjust_simduid_builtins (&simduid_to_vf_htab); |
3d483a94 | 425 | return 0; |
426 | } | |
427 | ||
428 | if (cfun->has_simduid_loops) | |
429 | note_simd_array_uses (&simd_array_to_simduid_htab); | |
d74ee105 | 430 | |
75a70cf9 | 431 | init_stmt_vec_info_vec (); |
432 | ||
c91e8223 | 433 | /* ----------- Analyze loops. ----------- */ |
434 | ||
48e1416a | 435 | /* If some loop was duplicated, it gets bigger number |
282bf14c | 436 | than all previously defined loops. This fact allows us to run |
c91e8223 | 437 | only over initial loops skipping newly generated ones. */ |
f21d4d00 | 438 | FOR_EACH_LOOP (loop, 0) |
c71d3c24 | 439 | if (loop->dont_vectorize) |
440 | any_ifcvt_loops = true; | |
441 | else if ((flag_tree_loop_vectorize | |
442 | && optimize_loop_nest_for_speed_p (loop)) | |
4c73695b | 443 | || loop->force_vectorize) |
7baffbd3 | 444 | { |
445 | loop_vec_info loop_vinfo; | |
f083cd24 | 446 | vect_location = find_loop_location (loop); |
36f39b2e | 447 | if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION |
6d8fb6cf | 448 | && dump_enabled_p ()) |
b055bc88 | 449 | dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n", |
36f39b2e | 450 | LOCATION_FILE (vect_location), |
451 | LOCATION_LINE (vect_location)); | |
87a658a6 | 452 | |
7baffbd3 | 453 | loop_vinfo = vect_analyze_loop (loop); |
454 | loop->aux = loop_vinfo; | |
c91e8223 | 455 | |
7baffbd3 | 456 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) |
457 | continue; | |
c91e8223 | 458 | |
23e1875f | 459 | if (!dbg_cnt (vect_loop)) |
460 | break; | |
461 | ||
c71d3c24 | 462 | gimple loop_vectorized_call = vect_loop_vectorized_call (loop); |
463 | if (loop_vectorized_call) | |
464 | { | |
465 | tree arg = gimple_call_arg (loop_vectorized_call, 1); | |
466 | basic_block *bbs; | |
467 | unsigned int i; | |
468 | struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); | |
469 | ||
470 | LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; | |
471 | gcc_checking_assert (vect_loop_vectorized_call | |
472 | (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)) | |
473 | == loop_vectorized_call); | |
474 | bbs = get_loop_body (scalar_loop); | |
475 | for (i = 0; i < scalar_loop->num_nodes; i++) | |
476 | { | |
477 | basic_block bb = bbs[i]; | |
478 | gimple_stmt_iterator gsi; | |
479 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); | |
480 | gsi_next (&gsi)) | |
481 | { | |
482 | gimple phi = gsi_stmt (gsi); | |
483 | gimple_set_uid (phi, 0); | |
484 | } | |
485 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); | |
486 | gsi_next (&gsi)) | |
487 | { | |
488 | gimple stmt = gsi_stmt (gsi); | |
489 | gimple_set_uid (stmt, 0); | |
490 | } | |
491 | } | |
492 | free (bbs); | |
493 | } | |
494 | ||
36f39b2e | 495 | if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION |
6d8fb6cf | 496 | && dump_enabled_p ()) |
a21425b5 | 497 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
6ee2edad | 498 | "loop vectorized\n"); |
7baffbd3 | 499 | vect_transform_loop (loop_vinfo); |
500 | num_vectorized_loops++; | |
3d483a94 | 501 | /* Now that the loop has been vectorized, allow it to be unrolled |
502 | etc. */ | |
4c73695b | 503 | loop->force_vectorize = false; |
3d483a94 | 504 | |
505 | if (loop->simduid) | |
506 | { | |
507 | simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); | |
c1f445d2 | 508 | if (!simduid_to_vf_htab) |
509 | simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); | |
3d483a94 | 510 | simduid_to_vf_data->simduid = DECL_UID (loop->simduid); |
511 | simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; | |
c1f445d2 | 512 | *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) |
3d483a94 | 513 | = simduid_to_vf_data; |
514 | } | |
c71d3c24 | 515 | |
516 | if (loop_vectorized_call) | |
517 | { | |
518 | fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node); | |
519 | ret |= TODO_cleanup_cfg; | |
520 | } | |
7baffbd3 | 521 | } |
f083cd24 | 522 | |
36f39b2e | 523 | vect_location = UNKNOWN_LOCATION; |
c91e8223 | 524 | |
581f8050 | 525 | statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); |
6d8fb6cf | 526 | if (dump_enabled_p () |
527 | || (num_vectorized_loops > 0 && dump_enabled_p ())) | |
b055bc88 | 528 | dump_printf_loc (MSG_NOTE, vect_location, |
7bd765d4 | 529 | "vectorized %u loops in function.\n", |
530 | num_vectorized_loops); | |
c91e8223 | 531 | |
532 | /* ----------- Finalize. ----------- */ | |
533 | ||
c71d3c24 | 534 | if (any_ifcvt_loops) |
535 | for (i = 1; i < vect_loops_num; i++) | |
536 | { | |
537 | loop = get_loop (cfun, i); | |
538 | if (loop && loop->dont_vectorize) | |
539 | { | |
540 | gimple g = vect_loop_vectorized_call (loop); | |
541 | if (g) | |
542 | { | |
543 | fold_loop_vectorized_call (g, boolean_false_node); | |
544 | ret |= TODO_cleanup_cfg; | |
545 | } | |
546 | } | |
547 | } | |
548 | ||
e9705e7f | 549 | for (i = 1; i < vect_loops_num; i++) |
c91e8223 | 550 | { |
9ce81338 | 551 | loop_vec_info loop_vinfo; |
552 | ||
41f75a99 | 553 | loop = get_loop (cfun, i); |
c91e8223 | 554 | if (!loop) |
9ce81338 | 555 | continue; |
45ba1503 | 556 | loop_vinfo = (loop_vec_info) loop->aux; |
221e9a92 | 557 | destroy_loop_vec_info (loop_vinfo, true); |
c91e8223 | 558 | loop->aux = NULL; |
559 | } | |
d4ec02d0 | 560 | |
75a70cf9 | 561 | free_stmt_vec_info_vec (); |
562 | ||
3d483a94 | 563 | /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE} builtins. */ |
564 | if (cfun->has_simduid_loops) | |
c1f445d2 | 565 | adjust_simduid_builtins (&simduid_to_vf_htab); |
3d483a94 | 566 | |
567 | /* Shrink any "omp array simd" temporary arrays to the | |
568 | actual vectorization factors. */ | |
c1f445d2 | 569 | if (simd_array_to_simduid_htab) |
3d483a94 | 570 | { |
c1f445d2 | 571 | for (hash_table<simd_array_to_simduid>::iterator iter |
572 | = simd_array_to_simduid_htab->begin (); | |
573 | iter != simd_array_to_simduid_htab->end (); ++iter) | |
2933f7af | 574 | if ((*iter)->simduid != -1U) |
3d483a94 | 575 | { |
2933f7af | 576 | tree decl = (*iter)->decl; |
3d483a94 | 577 | int vf = 1; |
c1f445d2 | 578 | if (simduid_to_vf_htab) |
3d483a94 | 579 | { |
580 | simduid_to_vf *p = NULL, data; | |
2933f7af | 581 | data.simduid = (*iter)->simduid; |
c1f445d2 | 582 | p = simduid_to_vf_htab->find (&data); |
3d483a94 | 583 | if (p) |
584 | vf = p->vf; | |
585 | } | |
586 | tree atype | |
587 | = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf); | |
588 | TREE_TYPE (decl) = atype; | |
589 | relayout_decl (decl); | |
590 | } | |
591 | ||
c1f445d2 | 592 | delete simd_array_to_simduid_htab; |
3d483a94 | 593 | } |
c1f445d2 | 594 | delete simduid_to_vf_htab; |
595 | simduid_to_vf_htab = NULL; | |
3d483a94 | 596 | |
f55f91f5 | 597 | if (num_vectorized_loops > 0) |
598 | { | |
599 | /* If we vectorized any loop only virtual SSA form needs to be updated. | |
600 | ??? Also while we try hard to update loop-closed SSA form we fail | |
601 | to properly do this in some corner-cases (see PR56286). */ | |
602 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); | |
603 | return TODO_cleanup_cfg; | |
604 | } | |
605 | ||
c71d3c24 | 606 | return ret; |
c91e8223 | 607 | } |
48e1416a | 608 | |
f37a5008 | 609 | |
37545e54 | 610 | /* Entry point to basic block SLP phase. */ |
611 | ||
cbe8bda8 | 612 | namespace { |
613 | ||
614 | const pass_data pass_data_slp_vectorize = | |
37545e54 | 615 | { |
cbe8bda8 | 616 | GIMPLE_PASS, /* type */ |
617 | "slp", /* name */ | |
618 | OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
cbe8bda8 | 619 | TV_TREE_SLP_VECTORIZATION, /* tv_id */ |
620 | ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
621 | 0, /* properties_provided */ | |
622 | 0, /* properties_destroyed */ | |
623 | 0, /* todo_flags_start */ | |
8b88439e | 624 | TODO_update_ssa, /* todo_flags_finish */ |
37545e54 | 625 | }; |
626 | ||
cbe8bda8 | 627 | class pass_slp_vectorize : public gimple_opt_pass |
628 | { | |
629 | public: | |
9af5ce0c | 630 | pass_slp_vectorize (gcc::context *ctxt) |
631 | : gimple_opt_pass (pass_data_slp_vectorize, ctxt) | |
cbe8bda8 | 632 | {} |
633 | ||
634 | /* opt_pass methods: */ | |
ef3f2b6f | 635 | opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); } |
31315c24 | 636 | virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; } |
65b0537f | 637 | virtual unsigned int execute (function *); |
cbe8bda8 | 638 | |
639 | }; // class pass_slp_vectorize | |
640 | ||
65b0537f | 641 | unsigned int |
642 | pass_slp_vectorize::execute (function *fun) | |
643 | { | |
644 | basic_block bb; | |
645 | ||
ef3f2b6f | 646 | bool in_loop_pipeline = scev_initialized_p (); |
647 | if (!in_loop_pipeline) | |
648 | { | |
649 | loop_optimizer_init (LOOPS_NORMAL); | |
650 | scev_initialize (); | |
651 | } | |
652 | ||
65b0537f | 653 | init_stmt_vec_info_vec (); |
654 | ||
655 | FOR_EACH_BB_FN (bb, fun) | |
656 | { | |
657 | vect_location = find_bb_location (bb); | |
658 | ||
659 | if (vect_slp_analyze_bb (bb)) | |
660 | { | |
661 | if (!dbg_cnt (vect_slp)) | |
662 | break; | |
663 | ||
664 | vect_slp_transform_bb (bb); | |
665 | if (dump_enabled_p ()) | |
666 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
667 | "basic block vectorized\n"); | |
668 | } | |
669 | } | |
670 | ||
671 | free_stmt_vec_info_vec (); | |
ef3f2b6f | 672 | |
673 | if (!in_loop_pipeline) | |
674 | { | |
675 | scev_finalize (); | |
676 | loop_optimizer_finalize (); | |
677 | } | |
678 | ||
65b0537f | 679 | return 0; |
680 | } | |
681 | ||
cbe8bda8 | 682 | } // anon namespace |
683 | ||
684 | gimple_opt_pass * | |
685 | make_pass_slp_vectorize (gcc::context *ctxt) | |
686 | { | |
687 | return new pass_slp_vectorize (ctxt); | |
688 | } | |
689 | ||
37545e54 | 690 | |
f37a5008 | 691 | /* Increase alignment of global arrays to improve vectorization potential. |
692 | TODO: | |
693 | - Consider also structs that have an array field. | |
694 | - Use ipa analysis to prune arrays that can't be vectorized? | |
695 | This should involve global alignment analysis and in the future also | |
696 | array padding. */ | |
697 | ||
698 | static unsigned int | |
699 | increase_alignment (void) | |
700 | { | |
098f44bc | 701 | varpool_node *vnode; |
f37a5008 | 702 | |
36f39b2e | 703 | vect_location = UNKNOWN_LOCATION; |
c4c46233 | 704 | |
f37a5008 | 705 | /* Increase the alignment of all global arrays for vectorization. */ |
7c455d87 | 706 | FOR_EACH_DEFINED_VARIABLE (vnode) |
f37a5008 | 707 | { |
02774f2d | 708 | tree vectype, decl = vnode->decl; |
41fba625 | 709 | tree t; |
f37a5008 | 710 | unsigned int alignment; |
711 | ||
9af5ce0c | 712 | t = TREE_TYPE (decl); |
41fba625 | 713 | if (TREE_CODE (t) != ARRAY_TYPE) |
fb85abff | 714 | continue; |
41fba625 | 715 | vectype = get_vectype_for_scalar_type (strip_array_types (t)); |
f37a5008 | 716 | if (!vectype) |
fb85abff | 717 | continue; |
f37a5008 | 718 | alignment = TYPE_ALIGN (vectype); |
719 | if (DECL_ALIGN (decl) >= alignment) | |
fb85abff | 720 | continue; |
f37a5008 | 721 | |
722 | if (vect_can_force_dr_alignment_p (decl, alignment)) | |
fb85abff | 723 | { |
331d5983 | 724 | vnode->increase_alignment (TYPE_ALIGN (vectype)); |
7bd765d4 | 725 | dump_printf (MSG_NOTE, "Increasing alignment of decl: "); |
726 | dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); | |
727 | dump_printf (MSG_NOTE, "\n"); | |
fb85abff | 728 | } |
f37a5008 | 729 | } |
730 | return 0; | |
731 | } | |
732 | ||
fb85abff | 733 | |
cbe8bda8 | 734 | namespace { |
735 | ||
736 | const pass_data pass_data_ipa_increase_alignment = | |
f37a5008 | 737 | { |
cbe8bda8 | 738 | SIMPLE_IPA_PASS, /* type */ |
739 | "increase_alignment", /* name */ | |
740 | OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
cbe8bda8 | 741 | TV_IPA_OPT, /* tv_id */ |
742 | 0, /* properties_required */ | |
743 | 0, /* properties_provided */ | |
744 | 0, /* properties_destroyed */ | |
745 | 0, /* todo_flags_start */ | |
746 | 0, /* todo_flags_finish */ | |
f37a5008 | 747 | }; |
cbe8bda8 | 748 | |
749 | class pass_ipa_increase_alignment : public simple_ipa_opt_pass | |
750 | { | |
751 | public: | |
9af5ce0c | 752 | pass_ipa_increase_alignment (gcc::context *ctxt) |
753 | : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) | |
cbe8bda8 | 754 | {} |
755 | ||
756 | /* opt_pass methods: */ | |
31315c24 | 757 | virtual bool gate (function *) |
758 | { | |
759 | return flag_section_anchors && flag_tree_loop_vectorize; | |
760 | } | |
761 | ||
65b0537f | 762 | virtual unsigned int execute (function *) { return increase_alignment (); } |
cbe8bda8 | 763 | |
764 | }; // class pass_ipa_increase_alignment | |
765 | ||
766 | } // anon namespace | |
767 | ||
768 | simple_ipa_opt_pass * | |
769 | make_pass_ipa_increase_alignment (gcc::context *ctxt) | |
770 | { | |
771 | return new pass_ipa_increase_alignment (ctxt); | |
772 | } |