]>
Commit | Line | Data |
---|---|---|
fb85abff | 1 | /* Vectorizer |
fbd26352 | 2 | Copyright (C) 2003-2019 Free Software Foundation, Inc. |
48e1416a | 3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |
c91e8223 | 4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
c91e8223 | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
c91e8223 | 20 | |
fb85abff | 21 | /* Loop and basic block vectorizer. |
b056d812 | 22 | |
48e1416a | 23 | This file contains drivers for the three vectorizers: |
24 | (1) loop vectorizer (inter-iteration parallelism), | |
fb85abff | 25 | (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop |
26 | vectorizer) | |
27 | (3) BB vectorizer (out-of-loops), aka SLP | |
48e1416a | 28 | |
fb85abff | 29 | The rest of the vectorizer's code is organized as follows: |
48e1416a | 30 | - tree-vect-loop.c - loop specific parts such as reductions, etc. These are |
31 | used by drivers (1) and (2). | |
32 | - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by | |
33 | drivers (1) and (2). | |
34 | - tree-vect-slp.c - BB vectorization specific analysis and transformation, | |
fb85abff | 35 | used by drivers (2) and (3). |
36 | - tree-vect-stmts.c - statements analysis and transformation (used by all). | |
48e1416a | 37 | - tree-vect-data-refs.c - vectorizer specific data-refs analysis and |
fb85abff | 38 | manipulations (used by all). |
39 | - tree-vect-patterns.c - vectorizable code patterns detector (used by all) | |
40 | ||
41 | Here's a poor attempt at illustrating that: | |
42 | ||
43 | tree-vectorizer.c: | |
44 | loop_vect() loop_aware_slp() slp_vect() | |
45 | | / \ / | |
46 | | / \ / | |
47 | tree-vect-loop.c tree-vect-slp.c | |
48 | | \ \ / / | | |
49 | | \ \/ / | | |
50 | | \ /\ / | | |
51 | | \ / \ / | | |
52 | tree-vect-stmts.c tree-vect-data-refs.c | |
53 | \ / | |
54 | tree-vect-patterns.c | |
55 | */ | |
c6c91d61 | 56 | |
fb85abff | 57 | #include "config.h" |
58 | #include "system.h" | |
59 | #include "coretypes.h" | |
9ef16211 | 60 | #include "backend.h" |
fb85abff | 61 | #include "tree.h" |
9ef16211 | 62 | #include "gimple.h" |
7c29e30e | 63 | #include "predict.h" |
64 | #include "tree-pass.h" | |
9ef16211 | 65 | #include "ssa.h" |
7c29e30e | 66 | #include "cgraph.h" |
b20a8bb4 | 67 | #include "fold-const.h" |
9ed99284 | 68 | #include "stor-layout.h" |
dcf1a1ec | 69 | #include "gimple-iterator.h" |
70 | #include "gimple-walk.h" | |
05d9c18a | 71 | #include "tree-ssa-loop-manip.h" |
d5e80d93 | 72 | #include "tree-ssa-loop-niter.h" |
c71d3c24 | 73 | #include "tree-cfg.h" |
fb85abff | 74 | #include "cfgloop.h" |
fb85abff | 75 | #include "tree-vectorizer.h" |
3d483a94 | 76 | #include "tree-ssa-propagate.h" |
23e1875f | 77 | #include "dbgcnt.h" |
ef3f2b6f | 78 | #include "tree-scalar-evolution.h" |
30a86690 | 79 | #include "stringpool.h" |
80 | #include "attribs.h" | |
c863e35b | 81 | #include "gimple-pretty-print.h" |
ed9370cc | 82 | #include "opt-problem.h" |
9f445421 | 83 | #include "internal-fn.h" |
ef3f2b6f | 84 | |
c6c91d61 | 85 | |
c309657f | 86 | /* Loop or bb location, with hotness information. */ |
87 | dump_user_location_t vect_location; | |
52394a67 | 88 | |
72ea15e5 | 89 | /* auto_purge_vect_location's dtor: reset the vect_location |
90 | global, to avoid stale location_t values that could reference | |
91 | GC-ed blocks. */ | |
92 | ||
93 | auto_purge_vect_location::~auto_purge_vect_location () | |
94 | { | |
95 | vect_location = dump_user_location_t (); | |
96 | } | |
97 | ||
c863e35b | 98 | /* Dump a cost entry according to args to F. */ |
99 | ||
100 | void | |
101 | dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind, | |
524665d0 | 102 | stmt_vec_info stmt_info, int misalign, unsigned cost, |
c863e35b | 103 | enum vect_cost_model_location where) |
104 | { | |
105 | fprintf (f, "%p ", data); | |
106 | if (stmt_info) | |
107 | { | |
108 | print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM); | |
109 | fprintf (f, " "); | |
110 | } | |
111 | else | |
112 | fprintf (f, "<unknown> "); | |
113 | fprintf (f, "%d times ", count); | |
114 | const char *ks = "unknown"; | |
115 | switch (kind) | |
116 | { | |
117 | case scalar_stmt: | |
118 | ks = "scalar_stmt"; | |
119 | break; | |
120 | case scalar_load: | |
121 | ks = "scalar_load"; | |
122 | break; | |
123 | case scalar_store: | |
124 | ks = "scalar_store"; | |
125 | break; | |
126 | case vector_stmt: | |
127 | ks = "vector_stmt"; | |
128 | break; | |
129 | case vector_load: | |
130 | ks = "vector_load"; | |
131 | break; | |
132 | case vector_gather_load: | |
133 | ks = "vector_gather_load"; | |
134 | break; | |
135 | case unaligned_load: | |
136 | ks = "unaligned_load"; | |
137 | break; | |
138 | case unaligned_store: | |
139 | ks = "unaligned_store"; | |
140 | break; | |
141 | case vector_store: | |
4f6aea41 | 142 | ks = "vector_store"; |
c863e35b | 143 | break; |
144 | case vector_scatter_store: | |
4f6aea41 | 145 | ks = "vector_scatter_store"; |
c863e35b | 146 | break; |
147 | case vec_to_scalar: | |
4f6aea41 | 148 | ks = "vec_to_scalar"; |
c863e35b | 149 | break; |
150 | case scalar_to_vec: | |
4f6aea41 | 151 | ks = "scalar_to_vec"; |
c863e35b | 152 | break; |
153 | case cond_branch_not_taken: | |
4f6aea41 | 154 | ks = "cond_branch_not_taken"; |
c863e35b | 155 | break; |
156 | case cond_branch_taken: | |
4f6aea41 | 157 | ks = "cond_branch_taken"; |
c863e35b | 158 | break; |
159 | case vec_perm: | |
4f6aea41 | 160 | ks = "vec_perm"; |
c863e35b | 161 | break; |
162 | case vec_promote_demote: | |
4f6aea41 | 163 | ks = "vec_promote_demote"; |
c863e35b | 164 | break; |
165 | case vec_construct: | |
4f6aea41 | 166 | ks = "vec_construct"; |
c863e35b | 167 | break; |
168 | } | |
169 | fprintf (f, "%s ", ks); | |
170 | if (kind == unaligned_load || kind == unaligned_store) | |
171 | fprintf (f, "(misalign %d) ", misalign); | |
524665d0 | 172 | fprintf (f, "costs %u ", cost); |
c863e35b | 173 | const char *ws = "unknown"; |
174 | switch (where) | |
175 | { | |
176 | case vect_prologue: | |
177 | ws = "prologue"; | |
178 | break; | |
179 | case vect_body: | |
180 | ws = "body"; | |
181 | break; | |
182 | case vect_epilogue: | |
183 | ws = "epilogue"; | |
184 | break; | |
185 | } | |
186 | fprintf (f, "in %s\n", ws); | |
187 | } | |
3d483a94 | 188 | \f |
189 | /* For mapping simduid to vectorization factor. */ | |
190 | ||
298e7f9a | 191 | struct simduid_to_vf : free_ptr_hash<simduid_to_vf> |
3d483a94 | 192 | { |
193 | unsigned int simduid; | |
d75596cd | 194 | poly_uint64 vf; |
3d483a94 | 195 | |
196 | /* hash_table support. */ | |
9969c043 | 197 | static inline hashval_t hash (const simduid_to_vf *); |
198 | static inline int equal (const simduid_to_vf *, const simduid_to_vf *); | |
3d483a94 | 199 | }; |
200 | ||
201 | inline hashval_t | |
9969c043 | 202 | simduid_to_vf::hash (const simduid_to_vf *p) |
3d483a94 | 203 | { |
204 | return p->simduid; | |
205 | } | |
206 | ||
207 | inline int | |
9969c043 | 208 | simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2) |
3d483a94 | 209 | { |
210 | return p1->simduid == p2->simduid; | |
211 | } | |
212 | ||
213 | /* This hash maps the OMP simd array to the corresponding simduid used | |
214 | to index into it. Like thus, | |
215 | ||
216 | _7 = GOMP_SIMD_LANE (simduid.0) | |
217 | ... | |
218 | ... | |
219 | D.1737[_7] = stuff; | |
220 | ||
221 | ||
bc7bff74 | 222 | This hash maps from the OMP simd array (D.1737[]) to DECL_UID of |
223 | simduid.0. */ | |
3d483a94 | 224 | |
298e7f9a | 225 | struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid> |
3d483a94 | 226 | { |
227 | tree decl; | |
228 | unsigned int simduid; | |
229 | ||
230 | /* hash_table support. */ | |
9969c043 | 231 | static inline hashval_t hash (const simd_array_to_simduid *); |
232 | static inline int equal (const simd_array_to_simduid *, | |
233 | const simd_array_to_simduid *); | |
3d483a94 | 234 | }; |
235 | ||
236 | inline hashval_t | |
9969c043 | 237 | simd_array_to_simduid::hash (const simd_array_to_simduid *p) |
3d483a94 | 238 | { |
239 | return DECL_UID (p->decl); | |
240 | } | |
241 | ||
242 | inline int | |
9969c043 | 243 | simd_array_to_simduid::equal (const simd_array_to_simduid *p1, |
244 | const simd_array_to_simduid *p2) | |
3d483a94 | 245 | { |
246 | return p1->decl == p2->decl; | |
247 | } | |
248 | ||
43895be5 | 249 | /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE, |
250 | into their corresponding constants and remove | |
251 | IFN_GOMP_SIMD_ORDERED_{START,END}. */ | |
3d483a94 | 252 | |
253 | static void | |
9918db44 | 254 | adjust_simduid_builtins (hash_table<simduid_to_vf> *htab) |
3d483a94 | 255 | { |
256 | basic_block bb; | |
257 | ||
fc00614f | 258 | FOR_EACH_BB_FN (bb, cfun) |
3d483a94 | 259 | { |
260 | gimple_stmt_iterator i; | |
261 | ||
43895be5 | 262 | for (i = gsi_start_bb (bb); !gsi_end_p (i); ) |
3d483a94 | 263 | { |
d75596cd | 264 | poly_uint64 vf = 1; |
3d483a94 | 265 | enum internal_fn ifn; |
42acab1c | 266 | gimple *stmt = gsi_stmt (i); |
3d483a94 | 267 | tree t; |
268 | if (!is_gimple_call (stmt) | |
269 | || !gimple_call_internal_p (stmt)) | |
43895be5 | 270 | { |
271 | gsi_next (&i); | |
272 | continue; | |
273 | } | |
3d483a94 | 274 | ifn = gimple_call_internal_fn (stmt); |
275 | switch (ifn) | |
276 | { | |
277 | case IFN_GOMP_SIMD_LANE: | |
278 | case IFN_GOMP_SIMD_VF: | |
279 | case IFN_GOMP_SIMD_LAST_LANE: | |
280 | break; | |
43895be5 | 281 | case IFN_GOMP_SIMD_ORDERED_START: |
282 | case IFN_GOMP_SIMD_ORDERED_END: | |
a9833286 | 283 | if (integer_onep (gimple_call_arg (stmt, 0))) |
284 | { | |
285 | enum built_in_function bcode | |
286 | = (ifn == IFN_GOMP_SIMD_ORDERED_START | |
287 | ? BUILT_IN_GOMP_ORDERED_START | |
288 | : BUILT_IN_GOMP_ORDERED_END); | |
289 | gimple *g | |
290 | = gimple_build_call (builtin_decl_explicit (bcode), 0); | |
291 | tree vdef = gimple_vdef (stmt); | |
292 | gimple_set_vdef (g, vdef); | |
293 | SSA_NAME_DEF_STMT (vdef) = g; | |
294 | gimple_set_vuse (g, gimple_vuse (stmt)); | |
295 | gsi_replace (&i, g, true); | |
296 | continue; | |
297 | } | |
43895be5 | 298 | gsi_remove (&i, true); |
299 | unlink_stmt_vdef (stmt); | |
300 | continue; | |
3d483a94 | 301 | default: |
43895be5 | 302 | gsi_next (&i); |
3d483a94 | 303 | continue; |
304 | } | |
305 | tree arg = gimple_call_arg (stmt, 0); | |
306 | gcc_assert (arg != NULL_TREE); | |
307 | gcc_assert (TREE_CODE (arg) == SSA_NAME); | |
308 | simduid_to_vf *p = NULL, data; | |
309 | data.simduid = DECL_UID (SSA_NAME_VAR (arg)); | |
ce9b77d9 | 310 | /* Need to nullify loop safelen field since it's value is not |
311 | valid after transformation. */ | |
312 | if (bb->loop_father && bb->loop_father->safelen > 0) | |
313 | bb->loop_father->safelen = 0; | |
9918db44 | 314 | if (htab) |
315 | { | |
316 | p = htab->find (&data); | |
317 | if (p) | |
318 | vf = p->vf; | |
319 | } | |
3d483a94 | 320 | switch (ifn) |
321 | { | |
322 | case IFN_GOMP_SIMD_VF: | |
323 | t = build_int_cst (unsigned_type_node, vf); | |
324 | break; | |
325 | case IFN_GOMP_SIMD_LANE: | |
326 | t = build_int_cst (unsigned_type_node, 0); | |
327 | break; | |
328 | case IFN_GOMP_SIMD_LAST_LANE: | |
329 | t = gimple_call_arg (stmt, 1); | |
330 | break; | |
331 | default: | |
332 | gcc_unreachable (); | |
333 | } | |
dc185174 | 334 | tree lhs = gimple_call_lhs (stmt); |
335 | if (lhs) | |
336 | replace_uses_by (lhs, t); | |
337 | release_defs (stmt); | |
338 | gsi_remove (&i, true); | |
3d483a94 | 339 | } |
340 | } | |
341 | } | |
c6c91d61 | 342 | |
3d483a94 | 343 | /* Helper structure for note_simd_array_uses. */ |
344 | ||
345 | struct note_simd_array_uses_struct | |
346 | { | |
c1f445d2 | 347 | hash_table<simd_array_to_simduid> **htab; |
3d483a94 | 348 | unsigned int simduid; |
349 | }; | |
350 | ||
351 | /* Callback for note_simd_array_uses, called through walk_gimple_op. */ | |
352 | ||
353 | static tree | |
354 | note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data) | |
355 | { | |
356 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; | |
357 | struct note_simd_array_uses_struct *ns | |
358 | = (struct note_simd_array_uses_struct *) wi->info; | |
359 | ||
360 | if (TYPE_P (*tp)) | |
361 | *walk_subtrees = 0; | |
362 | else if (VAR_P (*tp) | |
363 | && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp)) | |
364 | && DECL_CONTEXT (*tp) == current_function_decl) | |
365 | { | |
366 | simd_array_to_simduid data; | |
c1f445d2 | 367 | if (!*ns->htab) |
368 | *ns->htab = new hash_table<simd_array_to_simduid> (15); | |
3d483a94 | 369 | data.decl = *tp; |
370 | data.simduid = ns->simduid; | |
c1f445d2 | 371 | simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT); |
3d483a94 | 372 | if (*slot == NULL) |
373 | { | |
374 | simd_array_to_simduid *p = XNEW (simd_array_to_simduid); | |
375 | *p = data; | |
376 | *slot = p; | |
377 | } | |
378 | else if ((*slot)->simduid != ns->simduid) | |
379 | (*slot)->simduid = -1U; | |
380 | *walk_subtrees = 0; | |
381 | } | |
382 | return NULL_TREE; | |
383 | } | |
384 | ||
385 | /* Find "omp simd array" temporaries and map them to corresponding | |
386 | simduid. */ | |
387 | ||
388 | static void | |
c1f445d2 | 389 | note_simd_array_uses (hash_table<simd_array_to_simduid> **htab) |
3d483a94 | 390 | { |
391 | basic_block bb; | |
392 | gimple_stmt_iterator gsi; | |
393 | struct walk_stmt_info wi; | |
394 | struct note_simd_array_uses_struct ns; | |
395 | ||
396 | memset (&wi, 0, sizeof (wi)); | |
397 | wi.info = &ns; | |
398 | ns.htab = htab; | |
399 | ||
fc00614f | 400 | FOR_EACH_BB_FN (bb, cfun) |
3d483a94 | 401 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
402 | { | |
42acab1c | 403 | gimple *stmt = gsi_stmt (gsi); |
3d483a94 | 404 | if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) |
405 | continue; | |
406 | switch (gimple_call_internal_fn (stmt)) | |
407 | { | |
408 | case IFN_GOMP_SIMD_LANE: | |
409 | case IFN_GOMP_SIMD_VF: | |
410 | case IFN_GOMP_SIMD_LAST_LANE: | |
411 | break; | |
412 | default: | |
413 | continue; | |
414 | } | |
415 | tree lhs = gimple_call_lhs (stmt); | |
416 | if (lhs == NULL_TREE) | |
417 | continue; | |
418 | imm_use_iterator use_iter; | |
42acab1c | 419 | gimple *use_stmt; |
3d483a94 | 420 | ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0))); |
421 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) | |
422 | if (!is_gimple_debug (use_stmt)) | |
423 | walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi); | |
424 | } | |
425 | } | |
9918db44 | 426 | |
427 | /* Shrink arrays with "omp simd array" attribute to the corresponding | |
428 | vectorization factor. */ | |
429 | ||
430 | static void | |
431 | shrink_simd_arrays | |
432 | (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab, | |
433 | hash_table<simduid_to_vf> *simduid_to_vf_htab) | |
434 | { | |
435 | for (hash_table<simd_array_to_simduid>::iterator iter | |
436 | = simd_array_to_simduid_htab->begin (); | |
437 | iter != simd_array_to_simduid_htab->end (); ++iter) | |
438 | if ((*iter)->simduid != -1U) | |
439 | { | |
440 | tree decl = (*iter)->decl; | |
d75596cd | 441 | poly_uint64 vf = 1; |
9918db44 | 442 | if (simduid_to_vf_htab) |
443 | { | |
444 | simduid_to_vf *p = NULL, data; | |
445 | data.simduid = (*iter)->simduid; | |
446 | p = simduid_to_vf_htab->find (&data); | |
447 | if (p) | |
448 | vf = p->vf; | |
449 | } | |
450 | tree atype | |
451 | = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf); | |
452 | TREE_TYPE (decl) = atype; | |
453 | relayout_decl (decl); | |
454 | } | |
455 | ||
456 | delete simd_array_to_simduid_htab; | |
457 | } | |
fb85abff | 458 | \f |
e15e8a2a | 459 | /* Initialize the vec_info with kind KIND_IN and target cost data |
460 | TARGET_COST_DATA_IN. */ | |
461 | ||
a99aba41 | 462 | vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in, |
463 | vec_info_shared *shared_) | |
e15e8a2a | 464 | : kind (kind_in), |
a99aba41 | 465 | shared (shared_), |
e15e8a2a | 466 | target_cost_data (target_cost_data_in) |
467 | { | |
d8ef42d0 | 468 | stmt_vec_infos.create (50); |
e15e8a2a | 469 | } |
23e1875f | 470 | |
e15e8a2a | 471 | vec_info::~vec_info () |
23e1875f | 472 | { |
e15e8a2a | 473 | slp_instance instance; |
23e1875f | 474 | unsigned int i; |
475 | ||
e15e8a2a | 476 | FOR_EACH_VEC_ELT (slp_instances, i, instance) |
2068679d | 477 | vect_free_slp_instance (instance, true); |
e15e8a2a | 478 | |
e15e8a2a | 479 | destroy_cost_data (target_cost_data); |
c626a338 | 480 | free_stmt_vec_infos (); |
23e1875f | 481 | } |
482 | ||
a99aba41 | 483 | vec_info_shared::vec_info_shared () |
484 | : datarefs (vNULL), | |
485 | datarefs_copy (vNULL), | |
486 | ddrs (vNULL) | |
487 | { | |
488 | } | |
489 | ||
490 | vec_info_shared::~vec_info_shared () | |
491 | { | |
492 | free_data_refs (datarefs); | |
493 | free_dependence_relations (ddrs); | |
494 | datarefs_copy.release (); | |
495 | } | |
496 | ||
497 | void | |
498 | vec_info_shared::save_datarefs () | |
499 | { | |
500 | if (!flag_checking) | |
501 | return; | |
502 | datarefs_copy.reserve_exact (datarefs.length ()); | |
503 | for (unsigned i = 0; i < datarefs.length (); ++i) | |
504 | datarefs_copy.quick_push (*datarefs[i]); | |
505 | } | |
506 | ||
507 | void | |
508 | vec_info_shared::check_datarefs () | |
509 | { | |
510 | if (!flag_checking) | |
511 | return; | |
512 | gcc_assert (datarefs.length () == datarefs_copy.length ()); | |
513 | for (unsigned i = 0; i < datarefs.length (); ++i) | |
514 | if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0) | |
515 | gcc_unreachable (); | |
516 | } | |
517 | ||
04b2391d | 518 | /* Record that STMT belongs to the vectorizable region. Create and return |
519 | an associated stmt_vec_info. */ | |
520 | ||
521 | stmt_vec_info | |
522 | vec_info::add_stmt (gimple *stmt) | |
523 | { | |
c626a338 | 524 | stmt_vec_info res = new_stmt_vec_info (stmt); |
04b2391d | 525 | set_vinfo_for_stmt (stmt, res); |
526 | return res; | |
527 | } | |
528 | ||
03c0d666 | 529 | /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise |
530 | return null. It is safe to call this function on any statement, even if | |
531 | it might not be part of the vectorizable region. */ | |
532 | ||
533 | stmt_vec_info | |
534 | vec_info::lookup_stmt (gimple *stmt) | |
535 | { | |
536 | unsigned int uid = gimple_uid (stmt); | |
537 | if (uid > 0 && uid - 1 < stmt_vec_infos.length ()) | |
538 | { | |
539 | stmt_vec_info res = stmt_vec_infos[uid - 1]; | |
540 | if (res && res->stmt == stmt) | |
541 | return res; | |
542 | } | |
543 | return NULL; | |
544 | } | |
545 | ||
9cfd4e76 | 546 | /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info, |
547 | return that stmt_vec_info, otherwise return null. It is safe to call | |
548 | this on arbitrary operands. */ | |
549 | ||
550 | stmt_vec_info | |
551 | vec_info::lookup_def (tree name) | |
552 | { | |
553 | if (TREE_CODE (name) == SSA_NAME | |
554 | && !SSA_NAME_IS_DEFAULT_DEF (name)) | |
555 | return lookup_stmt (SSA_NAME_DEF_STMT (name)); | |
556 | return NULL; | |
557 | } | |
558 | ||
aaac0b10 | 559 | /* See whether there is a single non-debug statement that uses LHS and |
560 | whether that statement has an associated stmt_vec_info. Return the | |
561 | stmt_vec_info if so, otherwise return null. */ | |
562 | ||
563 | stmt_vec_info | |
564 | vec_info::lookup_single_use (tree lhs) | |
565 | { | |
566 | use_operand_p dummy; | |
567 | gimple *use_stmt; | |
568 | if (single_imm_use (lhs, &dummy, &use_stmt)) | |
569 | return lookup_stmt (use_stmt); | |
570 | return NULL; | |
571 | } | |
572 | ||
db72d3bf | 573 | /* Return vectorization information about DR. */ |
574 | ||
575 | dr_vec_info * | |
576 | vec_info::lookup_dr (data_reference *dr) | |
577 | { | |
578 | stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr)); | |
579 | /* DR_STMT should never refer to a stmt in a pattern replacement. */ | |
580 | gcc_checking_assert (!is_pattern_stmt_p (stmt_info)); | |
581 | return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt); | |
582 | } | |
583 | ||
5f02ee72 | 584 | /* Record that NEW_STMT_INFO now implements the same data reference |
585 | as OLD_STMT_INFO. */ | |
586 | ||
587 | void | |
588 | vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info) | |
589 | { | |
590 | gcc_assert (!is_pattern_stmt_p (old_stmt_info)); | |
591 | STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info; | |
592 | new_stmt_info->dr_aux = old_stmt_info->dr_aux; | |
593 | STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info) | |
594 | = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info); | |
595 | STMT_VINFO_GATHER_SCATTER_P (new_stmt_info) | |
596 | = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info); | |
597 | } | |
598 | ||
f525c1af | 599 | /* Permanently remove the statement described by STMT_INFO from the |
600 | function. */ | |
601 | ||
602 | void | |
603 | vec_info::remove_stmt (stmt_vec_info stmt_info) | |
604 | { | |
605 | gcc_assert (!stmt_info->pattern_stmt_p); | |
c652091a | 606 | set_vinfo_for_stmt (stmt_info->stmt, NULL); |
f525c1af | 607 | gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt); |
608 | unlink_stmt_vdef (stmt_info->stmt); | |
609 | gsi_remove (&si, true); | |
610 | release_defs (stmt_info->stmt); | |
611 | free_stmt_vec_info (stmt_info); | |
612 | } | |
613 | ||
a5071338 | 614 | /* Replace the statement at GSI by NEW_STMT, both the vectorization |
615 | information and the function itself. STMT_INFO describes the statement | |
616 | at GSI. */ | |
617 | ||
618 | void | |
619 | vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info, | |
620 | gimple *new_stmt) | |
621 | { | |
622 | gimple *old_stmt = stmt_info->stmt; | |
623 | gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi)); | |
624 | set_vinfo_for_stmt (old_stmt, NULL); | |
625 | set_vinfo_for_stmt (new_stmt, stmt_info); | |
626 | stmt_info->stmt = new_stmt; | |
627 | gsi_replace (gsi, new_stmt, true); | |
628 | } | |
629 | ||
c626a338 | 630 | /* Create and initialize a new stmt_vec_info struct for STMT. */ |
631 | ||
632 | stmt_vec_info | |
633 | vec_info::new_stmt_vec_info (gimple *stmt) | |
634 | { | |
635 | stmt_vec_info res = XCNEW (struct _stmt_vec_info); | |
636 | res->vinfo = this; | |
637 | res->stmt = stmt; | |
638 | ||
639 | STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
640 | STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; | |
641 | STMT_VINFO_VECTORIZABLE (res) = true; | |
642 | STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION; | |
643 | STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK; | |
f92474f8 | 644 | STMT_VINFO_SLP_VECT_ONLY (res) = false; |
c626a338 | 645 | |
646 | if (gimple_code (stmt) == GIMPLE_PHI | |
647 | && is_loop_header_bb_p (gimple_bb (stmt))) | |
648 | STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; | |
649 | else | |
650 | STMT_VINFO_DEF_TYPE (res) = vect_internal_def; | |
651 | ||
652 | STMT_VINFO_SAME_ALIGN_REFS (res).create (0); | |
653 | STMT_SLP_TYPE (res) = loop_vect; | |
654 | ||
655 | /* This is really "uninitialized" until vect_compute_data_ref_alignment. */ | |
656 | res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED; | |
657 | ||
658 | return res; | |
659 | } | |
660 | ||
661 | /* Associate STMT with INFO. */ | |
662 | ||
663 | void | |
664 | vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info) | |
665 | { | |
666 | unsigned int uid = gimple_uid (stmt); | |
667 | if (uid == 0) | |
668 | { | |
669 | gcc_checking_assert (info); | |
670 | uid = stmt_vec_infos.length () + 1; | |
671 | gimple_set_uid (stmt, uid); | |
672 | stmt_vec_infos.safe_push (info); | |
673 | } | |
674 | else | |
675 | { | |
a477acc5 | 676 | gcc_checking_assert (info == NULL); |
c626a338 | 677 | stmt_vec_infos[uid - 1] = info; |
678 | } | |
679 | } | |
680 | ||
681 | /* Free the contents of stmt_vec_infos. */ | |
682 | ||
683 | void | |
684 | vec_info::free_stmt_vec_infos (void) | |
685 | { | |
686 | unsigned int i; | |
687 | stmt_vec_info info; | |
688 | FOR_EACH_VEC_ELT (stmt_vec_infos, i, info) | |
a477acc5 | 689 | if (info != NULL) |
c626a338 | 690 | free_stmt_vec_info (info); |
691 | stmt_vec_infos.release (); | |
692 | } | |
693 | ||
694 | /* Free STMT_INFO. */ | |
695 | ||
696 | void | |
697 | vec_info::free_stmt_vec_info (stmt_vec_info stmt_info) | |
698 | { | |
699 | if (stmt_info->pattern_stmt_p) | |
700 | { | |
701 | gimple_set_bb (stmt_info->stmt, NULL); | |
702 | tree lhs = gimple_get_lhs (stmt_info->stmt); | |
703 | if (lhs && TREE_CODE (lhs) == SSA_NAME) | |
704 | release_ssa_name (lhs); | |
705 | } | |
706 | ||
707 | STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release (); | |
708 | STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); | |
709 | free (stmt_info); | |
710 | } | |
711 | ||
d5e80d93 | 712 | /* A helper function to free scev and LOOP niter information, as well as |
713 | clear loop constraint LOOP_C_FINITE. */ | |
714 | ||
715 | void | |
716 | vect_free_loop_info_assumptions (struct loop *loop) | |
717 | { | |
718 | scev_reset_htab (); | |
719 | /* We need to explicitly reset upper bound information since they are | |
46480a95 | 720 | used even after free_numbers_of_iterations_estimates. */ |
d5e80d93 | 721 | loop->any_upper_bound = false; |
722 | loop->any_likely_upper_bound = false; | |
46480a95 | 723 | free_numbers_of_iterations_estimates (loop); |
d5e80d93 | 724 | loop_constraint_clear (loop, LOOP_C_FINITE); |
725 | } | |
23e1875f | 726 | |
c71d3c24 | 727 | /* If LOOP has been versioned during ifcvt, return the internal call |
728 | guarding it. */ | |
729 | ||
42acab1c | 730 | static gimple * |
c71d3c24 | 731 | vect_loop_vectorized_call (struct loop *loop) |
732 | { | |
733 | basic_block bb = loop_preheader_edge (loop)->src; | |
42acab1c | 734 | gimple *g; |
c71d3c24 | 735 | do |
736 | { | |
737 | g = last_stmt (bb); | |
738 | if (g) | |
739 | break; | |
740 | if (!single_pred_p (bb)) | |
741 | break; | |
742 | bb = single_pred (bb); | |
743 | } | |
744 | while (1); | |
745 | if (g && gimple_code (g) == GIMPLE_COND) | |
746 | { | |
747 | gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
748 | gsi_prev (&gsi); | |
749 | if (!gsi_end_p (gsi)) | |
750 | { | |
751 | g = gsi_stmt (gsi); | |
7408cd7d | 752 | if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED) |
c71d3c24 | 753 | && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num |
754 | || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) | |
755 | return g; | |
756 | } | |
757 | } | |
758 | return NULL; | |
759 | } | |
760 | ||
d391dfdc | 761 | /* If LOOP has been versioned during loop distribution, return the gurading |
762 | internal call. */ | |
763 | ||
764 | static gimple * | |
765 | vect_loop_dist_alias_call (struct loop *loop) | |
766 | { | |
767 | basic_block bb; | |
768 | basic_block entry; | |
769 | struct loop *outer, *orig; | |
770 | gimple_stmt_iterator gsi; | |
771 | gimple *g; | |
772 | ||
773 | if (loop->orig_loop_num == 0) | |
774 | return NULL; | |
775 | ||
776 | orig = get_loop (cfun, loop->orig_loop_num); | |
777 | if (orig == NULL) | |
778 | { | |
779 | /* The original loop is somehow destroyed. Clear the information. */ | |
780 | loop->orig_loop_num = 0; | |
781 | return NULL; | |
782 | } | |
783 | ||
784 | if (loop != orig) | |
785 | bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header); | |
786 | else | |
787 | bb = loop_preheader_edge (loop)->src; | |
788 | ||
789 | outer = bb->loop_father; | |
790 | entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
791 | ||
792 | /* Look upward in dominance tree. */ | |
793 | for (; bb != entry && flow_bb_inside_loop_p (outer, bb); | |
794 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) | |
795 | { | |
796 | g = last_stmt (bb); | |
797 | if (g == NULL || gimple_code (g) != GIMPLE_COND) | |
798 | continue; | |
799 | ||
800 | gsi = gsi_for_stmt (g); | |
801 | gsi_prev (&gsi); | |
802 | if (gsi_end_p (gsi)) | |
803 | continue; | |
804 | ||
805 | g = gsi_stmt (gsi); | |
806 | /* The guarding internal function call must have the same distribution | |
807 | alias id. */ | |
808 | if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS) | |
809 | && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num)) | |
810 | return g; | |
811 | } | |
812 | return NULL; | |
813 | } | |
814 | ||
e0e5f8dc | 815 | /* Set the uids of all the statements in basic blocks inside loop |
816 | represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal | |
817 | call guarding the loop which has been if converted. */ | |
818 | static void | |
42acab1c | 819 | set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) |
e0e5f8dc | 820 | { |
821 | tree arg = gimple_call_arg (loop_vectorized_call, 1); | |
822 | basic_block *bbs; | |
823 | unsigned int i; | |
824 | struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); | |
825 | ||
826 | LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; | |
ccd0a9f9 | 827 | gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) |
e0e5f8dc | 828 | == loop_vectorized_call); |
ccd0a9f9 | 829 | /* If we are going to vectorize outer loop, prevent vectorization |
830 | of the inner loop in the scalar loop - either the scalar loop is | |
831 | thrown away, so it is a wasted work, or is used only for | |
832 | a few iterations. */ | |
833 | if (scalar_loop->inner) | |
834 | { | |
835 | gimple *g = vect_loop_vectorized_call (scalar_loop->inner); | |
836 | if (g) | |
837 | { | |
838 | arg = gimple_call_arg (g, 0); | |
839 | get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true; | |
d391dfdc | 840 | fold_loop_internal_call (g, boolean_false_node); |
ccd0a9f9 | 841 | } |
842 | } | |
e0e5f8dc | 843 | bbs = get_loop_body (scalar_loop); |
844 | for (i = 0; i < scalar_loop->num_nodes; i++) | |
845 | { | |
846 | basic_block bb = bbs[i]; | |
847 | gimple_stmt_iterator gsi; | |
848 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
849 | { | |
42acab1c | 850 | gimple *phi = gsi_stmt (gsi); |
e0e5f8dc | 851 | gimple_set_uid (phi, 0); |
852 | } | |
853 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
854 | { | |
42acab1c | 855 | gimple *stmt = gsi_stmt (gsi); |
e0e5f8dc | 856 | gimple_set_uid (stmt, 0); |
857 | } | |
858 | } | |
859 | free (bbs); | |
860 | } | |
c71d3c24 | 861 | |
8c25bf3b | 862 | /* Try to vectorize LOOP. */ |
863 | ||
864 | static unsigned | |
865 | try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, | |
866 | unsigned *num_vectorized_loops, | |
867 | loop_p loop, loop_vec_info orig_loop_vinfo, | |
868 | gimple *loop_vectorized_call, | |
869 | gimple *loop_dist_alias_call) | |
870 | { | |
871 | unsigned ret = 0; | |
a99aba41 | 872 | vec_info_shared shared; |
72ea15e5 | 873 | auto_purge_vect_location sentinel; |
8c25bf3b | 874 | vect_location = find_loop_location (loop); |
c309657f | 875 | if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION |
8c25bf3b | 876 | && dump_enabled_p ()) |
ed9370cc | 877 | dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS, |
878 | "\nAnalyzing loop at %s:%d\n", | |
c309657f | 879 | LOCATION_FILE (vect_location.get_location_t ()), |
880 | LOCATION_LINE (vect_location.get_location_t ())); | |
8c25bf3b | 881 | |
ed9370cc | 882 | /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */ |
883 | opt_loop_vec_info loop_vinfo | |
884 | = vect_analyze_loop (loop, orig_loop_vinfo, &shared); | |
8c25bf3b | 885 | loop->aux = loop_vinfo; |
886 | ||
ed9370cc | 887 | if (!loop_vinfo) |
888 | if (dump_enabled_p ()) | |
889 | if (opt_problem *problem = loop_vinfo.get_problem ()) | |
890 | { | |
891 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
892 | "couldn't vectorize loop\n"); | |
893 | problem->emit_and_clear (); | |
894 | } | |
895 | ||
8c25bf3b | 896 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) |
897 | { | |
898 | /* Free existing information if loop is analyzed with some | |
899 | assumptions. */ | |
900 | if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
901 | vect_free_loop_info_assumptions (loop); | |
902 | ||
903 | /* If we applied if-conversion then try to vectorize the | |
904 | BB of innermost loops. | |
905 | ??? Ideally BB vectorization would learn to vectorize | |
906 | control flow by applying if-conversion on-the-fly, the | |
907 | following retains the if-converted loop body even when | |
908 | only non-if-converted parts took part in BB vectorization. */ | |
909 | if (flag_tree_slp_vectorize != 0 | |
910 | && loop_vectorized_call | |
911 | && ! loop->inner) | |
912 | { | |
913 | basic_block bb = loop->header; | |
9f445421 | 914 | bool require_loop_vectorize = false; |
8c25bf3b | 915 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); |
916 | !gsi_end_p (gsi); gsi_next (&gsi)) | |
917 | { | |
918 | gimple *stmt = gsi_stmt (gsi); | |
9f445421 | 919 | gcall *call = dyn_cast <gcall *> (stmt); |
920 | if (call && gimple_call_internal_p (call)) | |
8c25bf3b | 921 | { |
9f445421 | 922 | internal_fn ifn = gimple_call_internal_fn (call); |
923 | if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE | |
924 | /* Don't keep the if-converted parts when the ifn with | |
925 | specifc type is not supported by the backend. */ | |
926 | || (direct_internal_fn_p (ifn) | |
927 | && !direct_internal_fn_supported_p | |
928 | (call, OPTIMIZE_FOR_SPEED))) | |
929 | { | |
930 | require_loop_vectorize = true; | |
931 | break; | |
932 | } | |
8c25bf3b | 933 | } |
934 | gimple_set_uid (stmt, -1); | |
935 | gimple_set_visited (stmt, false); | |
936 | } | |
9f445421 | 937 | if (!require_loop_vectorize && vect_slp_bb (bb)) |
8c25bf3b | 938 | { |
91f42adc | 939 | if (dump_enabled_p ()) |
940 | dump_printf_loc (MSG_NOTE, vect_location, | |
941 | "basic block vectorized\n"); | |
8c25bf3b | 942 | fold_loop_internal_call (loop_vectorized_call, |
943 | boolean_true_node); | |
944 | loop_vectorized_call = NULL; | |
945 | ret |= TODO_cleanup_cfg; | |
946 | } | |
947 | } | |
948 | /* If outer loop vectorization fails for LOOP_VECTORIZED guarded | |
949 | loop, don't vectorize its inner loop; we'll attempt to | |
950 | vectorize LOOP_VECTORIZED guarded inner loop of the scalar | |
951 | loop version. */ | |
952 | if (loop_vectorized_call && loop->inner) | |
953 | loop->inner->dont_vectorize = true; | |
954 | return ret; | |
955 | } | |
956 | ||
957 | if (!dbg_cnt (vect_loop)) | |
958 | { | |
959 | /* Free existing information if loop is analyzed with some | |
960 | assumptions. */ | |
961 | if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
962 | vect_free_loop_info_assumptions (loop); | |
963 | return ret; | |
964 | } | |
965 | ||
966 | if (loop_vectorized_call) | |
967 | set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); | |
968 | ||
969 | unsigned HOST_WIDE_INT bytes; | |
91f42adc | 970 | if (dump_enabled_p ()) |
971 | { | |
972 | if (current_vector_size.is_constant (&bytes)) | |
973 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
974 | "loop vectorized using %wu byte vectors\n", bytes); | |
975 | else | |
976 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
977 | "loop vectorized using variable length vectors\n"); | |
978 | } | |
8c25bf3b | 979 | |
980 | loop_p new_loop = vect_transform_loop (loop_vinfo); | |
981 | (*num_vectorized_loops)++; | |
982 | /* Now that the loop has been vectorized, allow it to be unrolled | |
983 | etc. */ | |
984 | loop->force_vectorize = false; | |
985 | ||
986 | if (loop->simduid) | |
987 | { | |
988 | simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); | |
989 | if (!simduid_to_vf_htab) | |
990 | simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); | |
991 | simduid_to_vf_data->simduid = DECL_UID (loop->simduid); | |
992 | simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; | |
993 | *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) | |
994 | = simduid_to_vf_data; | |
995 | } | |
996 | ||
997 | if (loop_vectorized_call) | |
998 | { | |
999 | fold_loop_internal_call (loop_vectorized_call, boolean_true_node); | |
1000 | loop_vectorized_call = NULL; | |
1001 | ret |= TODO_cleanup_cfg; | |
1002 | } | |
1003 | if (loop_dist_alias_call) | |
1004 | { | |
1005 | tree value = gimple_call_arg (loop_dist_alias_call, 1); | |
1006 | fold_loop_internal_call (loop_dist_alias_call, value); | |
1007 | loop_dist_alias_call = NULL; | |
1008 | ret |= TODO_cleanup_cfg; | |
1009 | } | |
1010 | ||
1011 | /* Epilogue of vectorized loop must be vectorized too. */ | |
1012 | if (new_loop) | |
1013 | ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, | |
1014 | new_loop, loop_vinfo, NULL, NULL); | |
1015 | ||
1016 | return ret; | |
1017 | } | |
1018 | ||
1019 | /* Try to vectorize LOOP. */ | |
1020 | ||
1021 | static unsigned | |
1022 | try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab, | |
1023 | unsigned *num_vectorized_loops, loop_p loop) | |
1024 | { | |
1025 | if (!((flag_tree_loop_vectorize | |
1026 | && optimize_loop_nest_for_speed_p (loop)) | |
1027 | || loop->force_vectorize)) | |
1028 | return 0; | |
1029 | ||
1030 | return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, | |
1031 | loop, NULL, | |
1032 | vect_loop_vectorized_call (loop), | |
1033 | vect_loop_dist_alias_call (loop)); | |
1034 | } | |
1035 | ||
1036 | ||
c91e8223 | 1037 | /* Function vectorize_loops. |
48e1416a | 1038 | |
f083cd24 | 1039 | Entry point to loop vectorization phase. */ |
c91e8223 | 1040 | |
d4ec02d0 | 1041 | unsigned |
7194de72 | 1042 | vectorize_loops (void) |
c91e8223 | 1043 | { |
e9705e7f | 1044 | unsigned int i; |
c91e8223 | 1045 | unsigned int num_vectorized_loops = 0; |
17519ba0 | 1046 | unsigned int vect_loops_num; |
17519ba0 | 1047 | struct loop *loop; |
c1f445d2 | 1048 | hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; |
1049 | hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | |
c71d3c24 | 1050 | bool any_ifcvt_loops = false; |
1051 | unsigned ret = 0; | |
c91e8223 | 1052 | |
41f75a99 | 1053 | vect_loops_num = number_of_loops (cfun); |
d74ee105 | 1054 | |
1055 | /* Bail out if there are no loops. */ | |
1056 | if (vect_loops_num <= 1) | |
9918db44 | 1057 | return 0; |
3d483a94 | 1058 | |
1059 | if (cfun->has_simduid_loops) | |
1060 | note_simd_array_uses (&simd_array_to_simduid_htab); | |
d74ee105 | 1061 | |
c91e8223 | 1062 | /* ----------- Analyze loops. ----------- */ |
1063 | ||
48e1416a | 1064 | /* If some loop was duplicated, it gets bigger number |
282bf14c | 1065 | than all previously defined loops. This fact allows us to run |
c91e8223 | 1066 | only over initial loops skipping newly generated ones. */ |
f21d4d00 | 1067 | FOR_EACH_LOOP (loop, 0) |
c71d3c24 | 1068 | if (loop->dont_vectorize) |
7baffbd3 | 1069 | { |
ccd0a9f9 | 1070 | any_ifcvt_loops = true; |
1071 | /* If-conversion sometimes versions both the outer loop | |
1072 | (for the case when outer loop vectorization might be | |
1073 | desirable) as well as the inner loop in the scalar version | |
1074 | of the loop. So we have: | |
1075 | if (LOOP_VECTORIZED (1, 3)) | |
1076 | { | |
1077 | loop1 | |
1078 | loop2 | |
1079 | } | |
1080 | else | |
1081 | loop3 (copy of loop1) | |
1082 | if (LOOP_VECTORIZED (4, 5)) | |
1083 | loop4 (copy of loop2) | |
1084 | else | |
1085 | loop5 (copy of loop4) | |
1086 | If FOR_EACH_LOOP gives us loop3 first (which has | |
1087 | dont_vectorize set), make sure to process loop1 before loop4; | |
1088 | so that we can prevent vectorization of loop4 if loop1 | |
1089 | is successfully vectorized. */ | |
1090 | if (loop->inner) | |
1091 | { | |
1092 | gimple *loop_vectorized_call | |
1093 | = vect_loop_vectorized_call (loop); | |
1094 | if (loop_vectorized_call | |
1095 | && vect_loop_vectorized_call (loop->inner)) | |
1096 | { | |
1097 | tree arg = gimple_call_arg (loop_vectorized_call, 0); | |
1098 | struct loop *vector_loop | |
1099 | = get_loop (cfun, tree_to_shwi (arg)); | |
1100 | if (vector_loop && vector_loop != loop) | |
1101 | { | |
ccd0a9f9 | 1102 | /* Make sure we don't vectorize it twice. */ |
8c25bf3b | 1103 | vector_loop->dont_vectorize = true; |
1104 | ret |= try_vectorize_loop (simduid_to_vf_htab, | |
1105 | &num_vectorized_loops, | |
1106 | vector_loop); | |
ccd0a9f9 | 1107 | } |
1108 | } | |
1109 | } | |
1110 | } | |
1111 | else | |
8c25bf3b | 1112 | ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops, |
1113 | loop); | |
f083cd24 | 1114 | |
c309657f | 1115 | vect_location = dump_user_location_t (); |
c91e8223 | 1116 | |
581f8050 | 1117 | statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); |
6d8fb6cf | 1118 | if (dump_enabled_p () |
1119 | || (num_vectorized_loops > 0 && dump_enabled_p ())) | |
b055bc88 | 1120 | dump_printf_loc (MSG_NOTE, vect_location, |
7bd765d4 | 1121 | "vectorized %u loops in function.\n", |
1122 | num_vectorized_loops); | |
c91e8223 | 1123 | |
1124 | /* ----------- Finalize. ----------- */ | |
1125 | ||
c71d3c24 | 1126 | if (any_ifcvt_loops) |
ed572d0a | 1127 | for (i = 1; i < number_of_loops (cfun); i++) |
c71d3c24 | 1128 | { |
1129 | loop = get_loop (cfun, i); | |
1130 | if (loop && loop->dont_vectorize) | |
1131 | { | |
42acab1c | 1132 | gimple *g = vect_loop_vectorized_call (loop); |
c71d3c24 | 1133 | if (g) |
1134 | { | |
d391dfdc | 1135 | fold_loop_internal_call (g, boolean_false_node); |
1136 | ret |= TODO_cleanup_cfg; | |
1137 | g = NULL; | |
1138 | } | |
1139 | else | |
1140 | g = vect_loop_dist_alias_call (loop); | |
1141 | ||
1142 | if (g) | |
1143 | { | |
1144 | fold_loop_internal_call (g, boolean_false_node); | |
c71d3c24 | 1145 | ret |= TODO_cleanup_cfg; |
1146 | } | |
1147 | } | |
1148 | } | |
1149 | ||
8c25bf3b | 1150 | for (i = 1; i < number_of_loops (cfun); i++) |
c91e8223 | 1151 | { |
9ce81338 | 1152 | loop_vec_info loop_vinfo; |
cfd9ca84 | 1153 | bool has_mask_store; |
9ce81338 | 1154 | |
41f75a99 | 1155 | loop = get_loop (cfun, i); |
8c25bf3b | 1156 | if (!loop || !loop->aux) |
9ce81338 | 1157 | continue; |
45ba1503 | 1158 | loop_vinfo = (loop_vec_info) loop->aux; |
8c25bf3b | 1159 | has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); |
e15e8a2a | 1160 | delete loop_vinfo; |
e75a6670 | 1161 | if (has_mask_store |
1162 | && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE)) | |
cfd9ca84 | 1163 | optimize_mask_stores (loop); |
c91e8223 | 1164 | loop->aux = NULL; |
1165 | } | |
d4ec02d0 | 1166 | |
43895be5 | 1167 | /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
3d483a94 | 1168 | if (cfun->has_simduid_loops) |
9918db44 | 1169 | adjust_simduid_builtins (simduid_to_vf_htab); |
3d483a94 | 1170 | |
1171 | /* Shrink any "omp array simd" temporary arrays to the | |
1172 | actual vectorization factors. */ | |
c1f445d2 | 1173 | if (simd_array_to_simduid_htab) |
9918db44 | 1174 | shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab); |
1175 | delete simduid_to_vf_htab; | |
1176 | cfun->has_simduid_loops = false; | |
3d483a94 | 1177 | |
f55f91f5 | 1178 | if (num_vectorized_loops > 0) |
1179 | { | |
1180 | /* If we vectorized any loop only virtual SSA form needs to be updated. | |
1181 | ??? Also while we try hard to update loop-closed SSA form we fail | |
1182 | to properly do this in some corner-cases (see PR56286). */ | |
1183 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); | |
1184 | return TODO_cleanup_cfg; | |
1185 | } | |
1186 | ||
c71d3c24 | 1187 | return ret; |
c91e8223 | 1188 | } |
48e1416a | 1189 | |
f37a5008 | 1190 | |
9918db44 | 1191 | /* Entry point to the simduid cleanup pass. */ |
1192 | ||
1193 | namespace { | |
1194 | ||
1195 | const pass_data pass_data_simduid_cleanup = | |
1196 | { | |
1197 | GIMPLE_PASS, /* type */ | |
1198 | "simduid", /* name */ | |
1199 | OPTGROUP_NONE, /* optinfo_flags */ | |
1200 | TV_NONE, /* tv_id */ | |
1201 | ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
1202 | 0, /* properties_provided */ | |
1203 | 0, /* properties_destroyed */ | |
1204 | 0, /* todo_flags_start */ | |
1205 | 0, /* todo_flags_finish */ | |
1206 | }; | |
1207 | ||
1208 | class pass_simduid_cleanup : public gimple_opt_pass | |
1209 | { | |
1210 | public: | |
1211 | pass_simduid_cleanup (gcc::context *ctxt) | |
1212 | : gimple_opt_pass (pass_data_simduid_cleanup, ctxt) | |
1213 | {} | |
1214 | ||
1215 | /* opt_pass methods: */ | |
1216 | opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); } | |
1217 | virtual bool gate (function *fun) { return fun->has_simduid_loops; } | |
1218 | virtual unsigned int execute (function *); | |
1219 | ||
1220 | }; // class pass_simduid_cleanup | |
1221 | ||
1222 | unsigned int | |
1223 | pass_simduid_cleanup::execute (function *fun) | |
1224 | { | |
1225 | hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | |
1226 | ||
1227 | note_simd_array_uses (&simd_array_to_simduid_htab); | |
1228 | ||
43895be5 | 1229 | /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
9918db44 | 1230 | adjust_simduid_builtins (NULL); |
1231 | ||
1232 | /* Shrink any "omp array simd" temporary arrays to the | |
1233 | actual vectorization factors. */ | |
1234 | if (simd_array_to_simduid_htab) | |
1235 | shrink_simd_arrays (simd_array_to_simduid_htab, NULL); | |
1236 | fun->has_simduid_loops = false; | |
1237 | return 0; | |
1238 | } | |
1239 | ||
1240 | } // anon namespace | |
1241 | ||
1242 | gimple_opt_pass * | |
1243 | make_pass_simduid_cleanup (gcc::context *ctxt) | |
1244 | { | |
1245 | return new pass_simduid_cleanup (ctxt); | |
1246 | } | |
1247 | ||
1248 | ||
37545e54 | 1249 | /* Entry point to basic block SLP phase. */ |
1250 | ||
cbe8bda8 | 1251 | namespace { |
1252 | ||
1253 | const pass_data pass_data_slp_vectorize = | |
37545e54 | 1254 | { |
cbe8bda8 | 1255 | GIMPLE_PASS, /* type */ |
1256 | "slp", /* name */ | |
1257 | OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
cbe8bda8 | 1258 | TV_TREE_SLP_VECTORIZATION, /* tv_id */ |
1259 | ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
1260 | 0, /* properties_provided */ | |
1261 | 0, /* properties_destroyed */ | |
1262 | 0, /* todo_flags_start */ | |
8b88439e | 1263 | TODO_update_ssa, /* todo_flags_finish */ |
37545e54 | 1264 | }; |
1265 | ||
cbe8bda8 | 1266 | class pass_slp_vectorize : public gimple_opt_pass |
1267 | { | |
1268 | public: | |
9af5ce0c | 1269 | pass_slp_vectorize (gcc::context *ctxt) |
1270 | : gimple_opt_pass (pass_data_slp_vectorize, ctxt) | |
cbe8bda8 | 1271 | {} |
1272 | ||
1273 | /* opt_pass methods: */ | |
ef3f2b6f | 1274 | opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); } |
31315c24 | 1275 | virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; } |
65b0537f | 1276 | virtual unsigned int execute (function *); |
cbe8bda8 | 1277 | |
1278 | }; // class pass_slp_vectorize | |
1279 | ||
65b0537f | 1280 | unsigned int |
1281 | pass_slp_vectorize::execute (function *fun) | |
1282 | { | |
72ea15e5 | 1283 | auto_purge_vect_location sentinel; |
65b0537f | 1284 | basic_block bb; |
1285 | ||
ef3f2b6f | 1286 | bool in_loop_pipeline = scev_initialized_p (); |
1287 | if (!in_loop_pipeline) | |
1288 | { | |
1289 | loop_optimizer_init (LOOPS_NORMAL); | |
1290 | scev_initialize (); | |
1291 | } | |
1292 | ||
c256513d | 1293 | /* Mark all stmts as not belonging to the current region and unvisited. */ |
4c7587f5 | 1294 | FOR_EACH_BB_FN (bb, fun) |
1295 | { | |
1296 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); | |
1297 | gsi_next (&gsi)) | |
c256513d | 1298 | { |
1299 | gimple *stmt = gsi_stmt (gsi); | |
1300 | gimple_set_uid (stmt, -1); | |
1301 | gimple_set_visited (stmt, false); | |
1302 | } | |
4c7587f5 | 1303 | } |
1304 | ||
65b0537f | 1305 | FOR_EACH_BB_FN (bb, fun) |
1306 | { | |
0a08c1bc | 1307 | if (vect_slp_bb (bb)) |
91f42adc | 1308 | if (dump_enabled_p ()) |
1309 | dump_printf_loc (MSG_NOTE, vect_location, "basic block vectorized\n"); | |
65b0537f | 1310 | } |
1311 | ||
ef3f2b6f | 1312 | if (!in_loop_pipeline) |
1313 | { | |
1314 | scev_finalize (); | |
1315 | loop_optimizer_finalize (); | |
1316 | } | |
1317 | ||
65b0537f | 1318 | return 0; |
1319 | } | |
1320 | ||
cbe8bda8 | 1321 | } // anon namespace |
1322 | ||
1323 | gimple_opt_pass * | |
1324 | make_pass_slp_vectorize (gcc::context *ctxt) | |
1325 | { | |
1326 | return new pass_slp_vectorize (ctxt); | |
1327 | } | |
1328 | ||
37545e54 | 1329 | |
f37a5008 | 1330 | /* Increase alignment of global arrays to improve vectorization potential. |
1331 | TODO: | |
1332 | - Consider also structs that have an array field. | |
1333 | - Use ipa analysis to prune arrays that can't be vectorized? | |
1334 | This should involve global alignment analysis and in the future also | |
1335 | array padding. */ | |
1336 | ||
5da368e3 | 1337 | static unsigned get_vec_alignment_for_type (tree); |
1338 | static hash_map<tree, unsigned> *type_align_map; | |
1339 | ||
1340 | /* Return alignment of array's vector type corresponding to scalar type. | |
1341 | 0 if no vector type exists. */ | |
1342 | static unsigned | |
1343 | get_vec_alignment_for_array_type (tree type) | |
1344 | { | |
1345 | gcc_assert (TREE_CODE (type) == ARRAY_TYPE); | |
69e56cc0 | 1346 | poly_uint64 array_size, vector_size; |
5da368e3 | 1347 | |
1348 | tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); | |
1349 | if (!vectype | |
69e56cc0 | 1350 | || !poly_int_tree_p (TYPE_SIZE (type), &array_size) |
1351 | || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size) | |
1352 | || maybe_lt (array_size, vector_size)) | |
5da368e3 | 1353 | return 0; |
1354 | ||
1355 | return TYPE_ALIGN (vectype); | |
1356 | } | |
1357 | ||
1358 | /* Return alignment of field having maximum alignment of vector type | |
1359 | corresponding to it's scalar type. For now, we only consider fields whose | |
1360 | offset is a multiple of it's vector alignment. | |
1361 | 0 if no suitable field is found. */ | |
1362 | static unsigned | |
1363 | get_vec_alignment_for_record_type (tree type) | |
1364 | { | |
1365 | gcc_assert (TREE_CODE (type) == RECORD_TYPE); | |
1366 | ||
1367 | unsigned max_align = 0, alignment; | |
1368 | HOST_WIDE_INT offset; | |
1369 | tree offset_tree; | |
1370 | ||
1371 | if (TYPE_PACKED (type)) | |
1372 | return 0; | |
1373 | ||
1374 | unsigned *slot = type_align_map->get (type); | |
1375 | if (slot) | |
1376 | return *slot; | |
1377 | ||
1378 | for (tree field = first_field (type); | |
1379 | field != NULL_TREE; | |
1380 | field = DECL_CHAIN (field)) | |
1381 | { | |
1382 | /* Skip if not FIELD_DECL or if alignment is set by user. */ | |
1383 | if (TREE_CODE (field) != FIELD_DECL | |
1384 | || DECL_USER_ALIGN (field) | |
1385 | || DECL_ARTIFICIAL (field)) | |
1386 | continue; | |
1387 | ||
1388 | /* We don't need to process the type further if offset is variable, | |
1389 | since the offsets of remaining members will also be variable. */ | |
1390 | if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST | |
1391 | || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) | |
1392 | break; | |
1393 | ||
1394 | /* Similarly stop processing the type if offset_tree | |
1395 | does not fit in unsigned HOST_WIDE_INT. */ | |
1396 | offset_tree = bit_position (field); | |
1397 | if (!tree_fits_uhwi_p (offset_tree)) | |
1398 | break; | |
1399 | ||
1400 | offset = tree_to_uhwi (offset_tree); | |
1401 | alignment = get_vec_alignment_for_type (TREE_TYPE (field)); | |
1402 | ||
1403 | /* Get maximum alignment of vectorized field/array among those members | |
1404 | whose offset is multiple of the vector alignment. */ | |
1405 | if (alignment | |
1406 | && (offset % alignment == 0) | |
1407 | && (alignment > max_align)) | |
1408 | max_align = alignment; | |
1409 | } | |
1410 | ||
1411 | type_align_map->put (type, max_align); | |
1412 | return max_align; | |
1413 | } | |
1414 | ||
1415 | /* Return alignment of vector type corresponding to decl's scalar type | |
1416 | or 0 if it doesn't exist or the vector alignment is lesser than | |
1417 | decl's alignment. */ | |
1418 | static unsigned | |
1419 | get_vec_alignment_for_type (tree type) | |
1420 | { | |
1421 | if (type == NULL_TREE) | |
1422 | return 0; | |
1423 | ||
1424 | gcc_assert (TYPE_P (type)); | |
1425 | ||
1426 | static unsigned alignment = 0; | |
1427 | switch (TREE_CODE (type)) | |
1428 | { | |
1429 | case ARRAY_TYPE: | |
1430 | alignment = get_vec_alignment_for_array_type (type); | |
1431 | break; | |
1432 | case RECORD_TYPE: | |
1433 | alignment = get_vec_alignment_for_record_type (type); | |
1434 | break; | |
1435 | default: | |
1436 | alignment = 0; | |
1437 | break; | |
1438 | } | |
1439 | ||
1440 | return (alignment > TYPE_ALIGN (type)) ? alignment : 0; | |
1441 | } | |
1442 | ||
1443 | /* Entry point to increase_alignment pass. */ | |
f37a5008 | 1444 | static unsigned int |
1445 | increase_alignment (void) | |
1446 | { | |
098f44bc | 1447 | varpool_node *vnode; |
f37a5008 | 1448 | |
c309657f | 1449 | vect_location = dump_user_location_t (); |
5da368e3 | 1450 | type_align_map = new hash_map<tree, unsigned>; |
c4c46233 | 1451 | |
f37a5008 | 1452 | /* Increase the alignment of all global arrays for vectorization. */ |
7c455d87 | 1453 | FOR_EACH_DEFINED_VARIABLE (vnode) |
f37a5008 | 1454 | { |
5da368e3 | 1455 | tree decl = vnode->decl; |
f37a5008 | 1456 | unsigned int alignment; |
1457 | ||
5da368e3 | 1458 | if ((decl_in_symtab_p (decl) |
1459 | && !symtab_node::get (decl)->can_increase_alignment_p ()) | |
1460 | || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) | |
1461 | continue; | |
1462 | ||
1463 | alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); | |
1464 | if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) | |
fb85abff | 1465 | { |
5da368e3 | 1466 | vnode->increase_alignment (alignment); |
91f42adc | 1467 | if (dump_enabled_p ()) |
1468 | dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl); | |
fb85abff | 1469 | } |
f37a5008 | 1470 | } |
5da368e3 | 1471 | |
1472 | delete type_align_map; | |
f37a5008 | 1473 | return 0; |
1474 | } | |
1475 | ||
fb85abff | 1476 | |
cbe8bda8 | 1477 | namespace { |
1478 | ||
1479 | const pass_data pass_data_ipa_increase_alignment = | |
f37a5008 | 1480 | { |
cbe8bda8 | 1481 | SIMPLE_IPA_PASS, /* type */ |
1482 | "increase_alignment", /* name */ | |
1483 | OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
cbe8bda8 | 1484 | TV_IPA_OPT, /* tv_id */ |
1485 | 0, /* properties_required */ | |
1486 | 0, /* properties_provided */ | |
1487 | 0, /* properties_destroyed */ | |
1488 | 0, /* todo_flags_start */ | |
1489 | 0, /* todo_flags_finish */ | |
f37a5008 | 1490 | }; |
cbe8bda8 | 1491 | |
1492 | class pass_ipa_increase_alignment : public simple_ipa_opt_pass | |
1493 | { | |
1494 | public: | |
9af5ce0c | 1495 | pass_ipa_increase_alignment (gcc::context *ctxt) |
1496 | : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) | |
cbe8bda8 | 1497 | {} |
1498 | ||
1499 | /* opt_pass methods: */ | |
31315c24 | 1500 | virtual bool gate (function *) |
1501 | { | |
1502 | return flag_section_anchors && flag_tree_loop_vectorize; | |
1503 | } | |
1504 | ||
65b0537f | 1505 | virtual unsigned int execute (function *) { return increase_alignment (); } |
cbe8bda8 | 1506 | |
1507 | }; // class pass_ipa_increase_alignment | |
1508 | ||
1509 | } // anon namespace | |
1510 | ||
1511 | simple_ipa_opt_pass * | |
1512 | make_pass_ipa_increase_alignment (gcc::context *ctxt) | |
1513 | { | |
1514 | return new pass_ipa_increase_alignment (ctxt); | |
1515 | } |