]>
Commit | Line | Data |
---|---|---|
fb85abff | 1 | /* Vectorizer |
fbd26352 | 2 | Copyright (C) 2003-2019 Free Software Foundation, Inc. |
c91e8223 | 3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
c91e8223 | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
c91e8223 | 20 | |
21 | #ifndef GCC_TREE_VECTORIZER_H | |
22 | #define GCC_TREE_VECTORIZER_H | |
23 | ||
2e966e2a | 24 | typedef class _stmt_vec_info *stmt_vec_info; |
1cb23a78 | 25 | |
fb85abff | 26 | #include "tree-data-ref.h" |
4f372c2c | 27 | #include "tree-hash-traits.h" |
f4ac3f3e | 28 | #include "target.h" |
fb85abff | 29 | |
c91e8223 | 30 | /* Used for naming of new temporaries. */ |
31 | enum vect_var_kind { | |
32 | vect_simple_var, | |
ea8f3370 | 33 | vect_pointer_var, |
dab48979 | 34 | vect_scalar_var, |
35 | vect_mask_var | |
c91e8223 | 36 | }; |
37 | ||
4a61a337 | 38 | /* Defines type of operation. */ |
c91e8223 | 39 | enum operation_type { |
40 | unary_op = 1, | |
4a61a337 | 41 | binary_op, |
42 | ternary_op | |
c91e8223 | 43 | }; |
44 | ||
1a9b4618 | 45 | /* Define type of available alignment support. */ |
46 | enum dr_alignment_support { | |
47 | dr_unaligned_unsupported, | |
48 | dr_unaligned_supported, | |
b0eb8c66 | 49 | dr_explicit_realign, |
50 | dr_explicit_realign_optimized, | |
1a9b4618 | 51 | dr_aligned |
52 | }; | |
53 | ||
ce10738f | 54 | /* Define type of def-use cross-iteration cycle. */ |
e12906b9 | 55 | enum vect_def_type { |
bc620c5c | 56 | vect_uninitialized_def = 0, |
f083cd24 | 57 | vect_constant_def = 1, |
58 | vect_external_def, | |
59 | vect_internal_def, | |
e12906b9 | 60 | vect_induction_def, |
61 | vect_reduction_def, | |
7aa0d350 | 62 | vect_double_reduction_def, |
ade2ac53 | 63 | vect_nested_cycle, |
e12906b9 | 64 | vect_unknown_def_type |
65 | }; | |
66 | ||
d09d8733 | 67 | /* Define type of reduction. */ |
68 | enum vect_reduction_type { | |
69 | TREE_CODE_REDUCTION, | |
b4552064 | 70 | COND_REDUCTION, |
56fb8e9d | 71 | INTEGER_INDUC_COND_REDUCTION, |
3bf95150 | 72 | CONST_COND_REDUCTION, |
73 | ||
74 | /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop | |
75 | to implement: | |
76 | ||
77 | for (int i = 0; i < VF; ++i) | |
78 | res = cond[i] ? val[i] : res; */ | |
d77809a4 | 79 | EXTRACT_LAST_REDUCTION, |
80 | ||
81 | /* Use a folding reduction within the loop to implement: | |
82 | ||
83 | for (int i = 0; i < VF; ++i) | |
84 | res = res OP val[i]; | |
85 | ||
86 | (with no reassocation). */ | |
87 | FOLD_LEFT_REDUCTION | |
d09d8733 | 88 | }; |
89 | ||
07be02da | 90 | #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ |
91 | || ((D) == vect_double_reduction_def) \ | |
92 | || ((D) == vect_nested_cycle)) | |
93 | ||
4db2b577 | 94 | /* Structure to encapsulate information about a group of like |
95 | instructions to be presented to the target cost model. */ | |
6dc50383 | 96 | struct stmt_info_for_cost { |
4db2b577 | 97 | int count; |
98 | enum vect_cost_for_stmt kind; | |
c863e35b | 99 | enum vect_cost_model_location where; |
1aeaa139 | 100 | stmt_vec_info stmt_info; |
4db2b577 | 101 | int misalign; |
6dc50383 | 102 | }; |
4db2b577 | 103 | |
f1f41a6c | 104 | typedef vec<stmt_info_for_cost> stmt_vector_for_cost; |
4db2b577 | 105 | |
4f372c2c | 106 | /* Maps base addresses to an innermost_loop_behavior that gives the maximum |
107 | known alignment for that base. */ | |
108 | typedef hash_map<tree_operand_hash, | |
109 | innermost_loop_behavior *> vec_base_alignments; | |
110 | ||
c6895939 | 111 | /************************************************************************ |
112 | SLP | |
113 | ************************************************************************/ | |
40bcc7c2 | 114 | typedef struct _slp_tree *slp_tree; |
c6895939 | 115 | |
b0f64919 | 116 | /* A computation tree of an SLP instance. Each node corresponds to a group of |
c6895939 | 117 | stmts to be packed in a SIMD stmt. */ |
40bcc7c2 | 118 | struct _slp_tree { |
b0f64919 | 119 | /* Nodes that contain def-stmts of this node statements operands. */ |
40bcc7c2 | 120 | vec<slp_tree> children; |
c6895939 | 121 | /* A group of scalar stmts to be vectorized together. */ |
06bb64b8 | 122 | vec<stmt_vec_info> stmts; |
678e3d6e | 123 | /* Load permutation relative to the stores, NULL if there is no |
124 | permutation. */ | |
125 | vec<unsigned> load_permutation; | |
c6895939 | 126 | /* Vectorized stmt/s. */ |
dc1fb456 | 127 | vec<stmt_vec_info> vec_stmts; |
48e1416a | 128 | /* Number of vector stmts that are created to replace the group of scalar |
129 | stmts. It is calculated during the transformation phase as the number of | |
130 | scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF | |
c6895939 | 131 | divided by vector size. */ |
132 | unsigned int vec_stmts_size; | |
f1c1105c | 133 | /* Reference count in the SLP graph. */ |
134 | unsigned int refcnt; | |
66e30248 | 135 | /* Whether the scalar computations use two different operators. */ |
136 | bool two_operators; | |
6d37c111 | 137 | /* The DEF type of this node. */ |
138 | enum vect_def_type def_type; | |
40bcc7c2 | 139 | }; |
c6895939 | 140 | |
141 | ||
142 | /* SLP instance is a sequence of stmts in a loop that can be packed into | |
143 | SIMD stmts. */ | |
251317e4 | 144 | typedef class _slp_instance { |
145 | public: | |
c6895939 | 146 | /* The root of SLP tree. */ |
147 | slp_tree root; | |
148 | ||
149 | /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ | |
150 | unsigned int group_size; | |
151 | ||
152 | /* The unrolling factor required to vectorized this SLP instance. */ | |
d75596cd | 153 | poly_uint64 unrolling_factor; |
c6895939 | 154 | |
a0515226 | 155 | /* The group of nodes that contain loads of this SLP instance. */ |
f1f41a6c | 156 | vec<slp_tree> loads; |
6154acba | 157 | |
158 | /* The SLP node containing the reduction PHIs. */ | |
159 | slp_tree reduc_phis; | |
c6895939 | 160 | } *slp_instance; |
161 | ||
c6895939 | 162 | |
163 | /* Access Functions. */ | |
164 | #define SLP_INSTANCE_TREE(S) (S)->root | |
165 | #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size | |
166 | #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor | |
a0515226 | 167 | #define SLP_INSTANCE_LOADS(S) (S)->loads |
c6895939 | 168 | |
b0f64919 | 169 | #define SLP_TREE_CHILDREN(S) (S)->children |
c6895939 | 170 | #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts |
171 | #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts | |
172 | #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size | |
678e3d6e | 173 | #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation |
66e30248 | 174 | #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators |
6d37c111 | 175 | #define SLP_TREE_DEF_TYPE(S) (S)->def_type |
b0f64919 | 176 | |
b0f64919 | 177 | |
0822b158 | 178 | |
f68a7726 | 179 | /* Describes two objects whose addresses must be unequal for the vectorized |
180 | loop to be valid. */ | |
181 | typedef std::pair<tree, tree> vec_object_pair; | |
182 | ||
e85b4a5e | 183 | /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. |
184 | UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ | |
251317e4 | 185 | class vec_lower_bound { |
186 | public: | |
e85b4a5e | 187 | vec_lower_bound () {} |
188 | vec_lower_bound (tree e, bool u, poly_uint64 m) | |
189 | : expr (e), unsigned_p (u), min_value (m) {} | |
190 | ||
191 | tree expr; | |
192 | bool unsigned_p; | |
193 | poly_uint64 min_value; | |
194 | }; | |
195 | ||
a99aba41 | 196 | /* Vectorizer state shared between different analyses like vector sizes |
197 | of the same CFG region. */ | |
251317e4 | 198 | class vec_info_shared { |
199 | public: | |
a99aba41 | 200 | vec_info_shared(); |
201 | ~vec_info_shared(); | |
202 | ||
203 | void save_datarefs(); | |
204 | void check_datarefs(); | |
205 | ||
206 | /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
207 | vec<data_reference_p> datarefs; | |
208 | vec<data_reference> datarefs_copy; | |
209 | ||
210 | /* The loop nest in which the data dependences are computed. */ | |
211 | auto_vec<loop_p> loop_nest; | |
212 | ||
213 | /* All data dependences. Freed by free_dependence_relations, so not | |
214 | an auto_vec. */ | |
215 | vec<ddr_p> ddrs; | |
216 | }; | |
217 | ||
e2c5c678 | 218 | /* Vectorizer state common between loop and basic-block vectorization. */ |
251317e4 | 219 | class vec_info { |
220 | public: | |
e15e8a2a | 221 | enum vec_kind { bb, loop }; |
222 | ||
a99aba41 | 223 | vec_info (vec_kind, void *, vec_info_shared *); |
e15e8a2a | 224 | ~vec_info (); |
225 | ||
04b2391d | 226 | stmt_vec_info add_stmt (gimple *); |
03c0d666 | 227 | stmt_vec_info lookup_stmt (gimple *); |
9cfd4e76 | 228 | stmt_vec_info lookup_def (tree); |
aaac0b10 | 229 | stmt_vec_info lookup_single_use (tree); |
2e966e2a | 230 | class dr_vec_info *lookup_dr (data_reference *); |
5f02ee72 | 231 | void move_dr (stmt_vec_info, stmt_vec_info); |
f525c1af | 232 | void remove_stmt (stmt_vec_info); |
a5071338 | 233 | void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *); |
04b2391d | 234 | |
e15e8a2a | 235 | /* The type of vectorization. */ |
236 | vec_kind kind; | |
e2c5c678 | 237 | |
a99aba41 | 238 | /* Shared vectorizer state. */ |
239 | vec_info_shared *shared; | |
240 | ||
d8ef42d0 | 241 | /* The mapping of GIMPLE UID to stmt_vec_info. */ |
1cb23a78 | 242 | vec<stmt_vec_info> stmt_vec_infos; |
d8ef42d0 | 243 | |
e2c5c678 | 244 | /* All SLP instances. */ |
e15e8a2a | 245 | auto_vec<slp_instance> slp_instances; |
e2c5c678 | 246 | |
4f372c2c | 247 | /* Maps base addresses to an innermost_loop_behavior that gives the maximum |
248 | known alignment for that base. */ | |
249 | vec_base_alignments base_alignments; | |
250 | ||
e2c5c678 | 251 | /* All interleaving chains of stores, represented by the first |
252 | stmt in the chain. */ | |
14dca1d8 | 253 | auto_vec<stmt_vec_info> grouped_stores; |
e2c5c678 | 254 | |
255 | /* Cost data used by the target cost model. */ | |
256 | void *target_cost_data; | |
c626a338 | 257 | |
258 | private: | |
259 | stmt_vec_info new_stmt_vec_info (gimple *stmt); | |
260 | void set_vinfo_for_stmt (gimple *, stmt_vec_info); | |
261 | void free_stmt_vec_infos (); | |
262 | void free_stmt_vec_info (stmt_vec_info); | |
e2c5c678 | 263 | }; |
264 | ||
2e966e2a | 265 | class _loop_vec_info; |
266 | class _bb_vec_info; | |
e2c5c678 | 267 | |
268 | template<> | |
269 | template<> | |
270 | inline bool | |
271 | is_a_helper <_loop_vec_info *>::test (vec_info *i) | |
272 | { | |
273 | return i->kind == vec_info::loop; | |
274 | } | |
275 | ||
276 | template<> | |
277 | template<> | |
278 | inline bool | |
279 | is_a_helper <_bb_vec_info *>::test (vec_info *i) | |
280 | { | |
281 | return i->kind == vec_info::bb; | |
282 | } | |
283 | ||
3e871d4d | 284 | |
60b29a7e | 285 | /* In general, we can divide the vector statements in a vectorized loop |
286 | into related groups ("rgroups") and say that for each rgroup there is | |
287 | some nS such that the rgroup operates on nS values from one scalar | |
288 | iteration followed by nS values from the next. That is, if VF is the | |
289 | vectorization factor of the loop, the rgroup operates on a sequence: | |
290 | ||
291 | (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS) | |
292 | ||
293 | where (i,j) represents a scalar value with index j in a scalar | |
294 | iteration with index i. | |
295 | ||
296 | [ We use the term "rgroup" to emphasise that this grouping isn't | |
297 | necessarily the same as the grouping of statements used elsewhere. | |
298 | For example, if we implement a group of scalar loads using gather | |
299 | loads, we'll use a separate gather load for each scalar load, and | |
300 | thus each gather load will belong to its own rgroup. ] | |
301 | ||
302 | In general this sequence will occupy nV vectors concatenated | |
303 | together. If these vectors have nL lanes each, the total number | |
304 | of scalar values N is given by: | |
305 | ||
306 | N = nS * VF = nV * nL | |
307 | ||
308 | None of nS, VF, nV and nL are required to be a power of 2. nS and nV | |
309 | are compile-time constants but VF and nL can be variable (if the target | |
310 | supports variable-length vectors). | |
311 | ||
312 | In classical vectorization, each iteration of the vector loop would | |
313 | handle exactly VF iterations of the original scalar loop. However, | |
314 | in a fully-masked loop, a particular iteration of the vector loop | |
315 | might handle fewer than VF iterations of the scalar loop. The vector | |
316 | lanes that correspond to iterations of the scalar loop are said to be | |
317 | "active" and the other lanes are said to be "inactive". | |
318 | ||
319 | In a fully-masked loop, many rgroups need to be masked to ensure that | |
320 | they have no effect for the inactive lanes. Each such rgroup needs a | |
321 | sequence of booleans in the same order as above, but with each (i,j) | |
322 | replaced by a boolean that indicates whether iteration i is active. | |
323 | This sequence occupies nV vector masks that again have nL lanes each. | |
324 | Thus the mask sequence as a whole consists of VF independent booleans | |
325 | that are each repeated nS times. | |
326 | ||
327 | We make the simplifying assumption that if a sequence of nV masks is | |
328 | suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by | |
329 | VIEW_CONVERTing it. This holds for all current targets that support | |
330 | fully-masked loops. For example, suppose the scalar loop is: | |
331 | ||
332 | float *f; | |
333 | double *d; | |
334 | for (int i = 0; i < n; ++i) | |
335 | { | |
336 | f[i * 2 + 0] += 1.0f; | |
337 | f[i * 2 + 1] += 2.0f; | |
338 | d[i] += 3.0; | |
339 | } | |
340 | ||
341 | and suppose that vectors have 256 bits. The vectorized f accesses | |
342 | will belong to one rgroup and the vectorized d access to another: | |
343 | ||
344 | f rgroup: nS = 2, nV = 1, nL = 8 | |
345 | d rgroup: nS = 1, nV = 1, nL = 4 | |
346 | VF = 4 | |
347 | ||
348 | [ In this simple example the rgroups do correspond to the normal | |
349 | SLP grouping scheme. ] | |
350 | ||
351 | If only the first three lanes are active, the masks we need are: | |
352 | ||
353 | f rgroup: 1 1 | 1 1 | 1 1 | 0 0 | |
354 | d rgroup: 1 | 1 | 1 | 0 | |
355 | ||
356 | Here we can use a mask calculated for f's rgroup for d's, but not | |
357 | vice versa. | |
358 | ||
359 | Thus for each value of nV, it is enough to provide nV masks, with the | |
360 | mask being calculated based on the highest nL (or, equivalently, based | |
361 | on the highest nS) required by any rgroup with that nV. We therefore | |
362 | represent the entire collection of masks as a two-level table, with the | |
363 | first level being indexed by nV - 1 (since nV == 0 doesn't exist) and | |
364 | the second being indexed by the mask index 0 <= i < nV. */ | |
365 | ||
366 | /* The masks needed by rgroups with nV vectors, according to the | |
367 | description above. */ | |
368 | struct rgroup_masks { | |
369 | /* The largest nS for all rgroups that use these masks. */ | |
370 | unsigned int max_nscalars_per_iter; | |
371 | ||
372 | /* The type of mask to use, based on the highest nS recorded above. */ | |
373 | tree mask_type; | |
374 | ||
375 | /* A vector of nV masks, in iteration order. */ | |
376 | vec<tree> masks; | |
377 | }; | |
378 | ||
379 | typedef auto_vec<rgroup_masks> vec_loop_masks; | |
380 | ||
4e58562d | 381 | /*-----------------------------------------------------------------*/ |
382 | /* Info on vectorized loops. */ | |
383 | /*-----------------------------------------------------------------*/ | |
251317e4 | 384 | typedef class _loop_vec_info : public vec_info { |
385 | public: | |
2e966e2a | 386 | _loop_vec_info (class loop *, vec_info_shared *); |
e15e8a2a | 387 | ~_loop_vec_info (); |
4e58562d | 388 | |
389 | /* The loop to which this info struct refers to. */ | |
2e966e2a | 390 | class loop *loop; |
4e58562d | 391 | |
392 | /* The loop basic blocks. */ | |
393 | basic_block *bbs; | |
394 | ||
796f6cba | 395 | /* Number of latch executions. */ |
396 | tree num_itersm1; | |
4e58562d | 397 | /* Number of iterations. */ |
398 | tree num_iters; | |
796f6cba | 399 | /* Number of iterations of the original loop. */ |
be53c6d4 | 400 | tree num_iters_unchanged; |
d5e80d93 | 401 | /* Condition under which this loop is analyzed and versioned. */ |
402 | tree num_iters_assumptions; | |
4e58562d | 403 | |
f92474f8 | 404 | /* Threshold of number of iterations below which vectorization will not be |
004a94a5 | 405 | performed. It is calculated from MIN_PROFITABLE_ITERS and |
406 | PARAM_MIN_VECT_LOOP_BOUND. */ | |
407 | unsigned int th; | |
408 | ||
7456a7ea | 409 | /* When applying loop versioning, the vector form should only be used |
410 | if the number of scalar iterations is >= this value, on top of all | |
411 | the other requirements. Ignored when loop versioning is not being | |
412 | used. */ | |
413 | poly_uint64 versioning_threshold; | |
414 | ||
4e58562d | 415 | /* Unrolling factor */ |
d75596cd | 416 | poly_uint64 vectorization_factor; |
4e58562d | 417 | |
4a85c0b1 | 418 | /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR |
419 | if there is no particular limit. */ | |
420 | unsigned HOST_WIDE_INT max_vectorization_factor; | |
421 | ||
60b29a7e | 422 | /* The masks that a fully-masked loop should use to avoid operating |
423 | on inactive scalars. */ | |
424 | vec_loop_masks masks; | |
425 | ||
6753a4bf | 426 | /* If we are using a loop mask to align memory addresses, this variable |
427 | contains the number of vector elements that we should skip in the | |
428 | first iteration of the vector loop (i.e. the number of leading | |
429 | elements that should be false in the first mask). */ | |
430 | tree mask_skip_niters; | |
431 | ||
60b29a7e | 432 | /* Type of the variables to use in the WHILE_ULT call for fully-masked |
433 | loops. */ | |
434 | tree mask_compare_type; | |
435 | ||
1d86b8dc | 436 | /* For #pragma omp simd if (x) loops the x expression. If constant 0, |
437 | the loop should not be vectorized, if constant non-zero, simd_if_cond | |
438 | shouldn't be set and loop vectorized normally, if SSA_NAME, the loop | |
439 | should be versioned on that condition, using scalar loop if the condition | |
440 | is false and vectorized loop otherwise. */ | |
441 | tree simd_if_cond; | |
442 | ||
ef871d99 | 443 | /* Type of the IV to use in the WHILE_ULT call for fully-masked |
444 | loops. */ | |
445 | tree iv_type; | |
446 | ||
4e58562d | 447 | /* Unknown DRs according to which loop was peeled. */ |
2e966e2a | 448 | class dr_vec_info *unaligned_dr; |
4e58562d | 449 | |
39b8f742 | 450 | /* peeling_for_alignment indicates whether peeling for alignment will take |
451 | place, and what the peeling factor should be: | |
452 | peeling_for_alignment = X means: | |
453 | If X=0: Peeling for alignment will not be applied. | |
454 | If X>0: Peel first X iterations. | |
455 | If X=-1: Generate a runtime test to calculate the number of iterations | |
456 | to be peeled, using the dataref recorded in the field | |
457 | unaligned_dr. */ | |
458 | int peeling_for_alignment; | |
4e58562d | 459 | |
25e3c2e8 | 460 | /* The mask used to check the alignment of pointers or arrays. */ |
461 | int ptr_mask; | |
462 | ||
45b13dc3 | 463 | /* Data Dependence Relations defining address ranges that are candidates |
464 | for a run-time aliasing check. */ | |
e15e8a2a | 465 | auto_vec<ddr_p> may_alias_ddrs; |
45b13dc3 | 466 | |
8a7b0f48 | 467 | /* Data Dependence Relations defining address ranges together with segment |
468 | lengths from which the run-time aliasing check is built. */ | |
e15e8a2a | 469 | auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; |
8a7b0f48 | 470 | |
f68a7726 | 471 | /* Check that the addresses of each pair of objects is unequal. */ |
e15e8a2a | 472 | auto_vec<vec_object_pair> check_unequal_addrs; |
f68a7726 | 473 | |
e85b4a5e | 474 | /* List of values that are required to be nonzero. This is used to check |
475 | whether things like "x[i * n] += 1;" are safe and eventually gets added | |
476 | to the checks for lower bounds below. */ | |
477 | auto_vec<tree> check_nonzero; | |
478 | ||
479 | /* List of values that need to be checked for a minimum value. */ | |
480 | auto_vec<vec_lower_bound> lower_bounds; | |
481 | ||
25e3c2e8 | 482 | /* Statements in the loop that have data references that are candidates for a |
483 | runtime (loop versioning) misalignment check. */ | |
ab98e625 | 484 | auto_vec<stmt_vec_info> may_misalign_stmts; |
25e3c2e8 | 485 | |
eefa05c8 | 486 | /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ |
f4649a92 | 487 | auto_vec<stmt_vec_info> reductions; |
0822b158 | 488 | |
39a5d6b1 | 489 | /* All reduction chains in the loop, represented by the first |
490 | stmt in the chain. */ | |
14dca1d8 | 491 | auto_vec<stmt_vec_info> reduction_chains; |
39a5d6b1 | 492 | |
2a9a3444 | 493 | /* Cost vector for a single scalar iteration. */ |
e15e8a2a | 494 | auto_vec<stmt_info_for_cost> scalar_cost_vec; |
2a9a3444 | 495 | |
f404501a | 496 | /* Map of IV base/step expressions to inserted name in the preheader. */ |
497 | hash_map<tree_operand_hash, tree> *ivexpr_map; | |
498 | ||
da008d72 | 499 | /* Map of OpenMP "omp simd array" scan variables to corresponding |
500 | rhs of the store of the initializer. */ | |
501 | hash_map<tree, tree> *scan_map; | |
502 | ||
487798e2 | 503 | /* The unrolling factor needed to SLP the loop. In case of that pure SLP is |
504 | applied to the loop, i.e., no unrolling is needed, this is 1. */ | |
d75596cd | 505 | poly_uint64 slp_unrolling_factor; |
487798e2 | 506 | |
2a9a3444 | 507 | /* Cost of a single scalar iteration. */ |
508 | int single_scalar_iteration_cost; | |
509 | ||
487798e2 | 510 | /* Is the loop vectorizable? */ |
511 | bool vectorizable; | |
512 | ||
60b29a7e | 513 | /* Records whether we still have the option of using a fully-masked loop. */ |
514 | bool can_fully_mask_p; | |
515 | ||
516 | /* True if have decided to use a fully-masked loop. */ | |
517 | bool fully_masked_p; | |
518 | ||
ee612634 | 519 | /* When we have grouped data accesses with gaps, we may introduce invalid |
a4ee7fac | 520 | memory accesses. We peel the last iteration of the loop to prevent |
521 | this. */ | |
522 | bool peeling_for_gaps; | |
523 | ||
36f39b2e | 524 | /* When the number of iterations is not a multiple of the vector size |
525 | we need to peel off iterations at the end to form an epilogue loop. */ | |
526 | bool peeling_for_niter; | |
527 | ||
ba69439f | 528 | /* Reductions are canonicalized so that the last operand is the reduction |
529 | operand. If this places a constant into RHS1, this decanonicalizes | |
530 | GIMPLE for other phases, so we must track when this has occurred and | |
531 | fix it up. */ | |
532 | bool operands_swapped; | |
533 | ||
c7a8722c | 534 | /* True if there are no loop carried data dependencies in the loop. |
535 | If loop->safelen <= 1, then this is always true, either the loop | |
536 | didn't have any loop carried data dependencies, or the loop is being | |
537 | vectorized guarded with some runtime alias checks, or couldn't | |
538 | be vectorized at all, but then this field shouldn't be used. | |
539 | For loop->safelen >= 2, the user has asserted that there are no | |
540 | backward dependencies, but there still could be loop carried forward | |
541 | dependencies in such loops. This flag will be false if normal | |
542 | vectorizer data dependency analysis would fail or require versioning | |
543 | for alias, but because of loop->safelen >= 2 it has been vectorized | |
544 | even without versioning for alias. E.g. in: | |
545 | #pragma omp simd | |
546 | for (int i = 0; i < m; i++) | |
547 | a[i] = a[i + k] * c; | |
548 | (or #pragma simd or #pragma ivdep) we can vectorize this and it will | |
549 | DTRT even for k > 0 && k < m, but without safelen we would not | |
550 | vectorize this, so this field would be false. */ | |
551 | bool no_data_dependencies; | |
552 | ||
487798e2 | 553 | /* Mark loops having masked stores. */ |
554 | bool has_mask_store; | |
555 | ||
e3b3a12f | 556 | /* Queued scaling factor for the scalar loop. */ |
557 | profile_probability scalar_loop_scaling; | |
558 | ||
c71d3c24 | 559 | /* If if-conversion versioned this loop before conversion, this is the |
560 | loop version without if-conversion. */ | |
2e966e2a | 561 | class loop *scalar_loop; |
c71d3c24 | 562 | |
5b631e09 | 563 | /* For loops being epilogues of already vectorized loops |
564 | this points to the original vectorized loop. Otherwise NULL. */ | |
565 | _loop_vec_info *orig_loop_info; | |
566 | ||
4e58562d | 567 | } *loop_vec_info; |
568 | ||
25e3c2e8 | 569 | /* Access Functions. */ |
10095225 | 570 | #define LOOP_VINFO_LOOP(L) (L)->loop |
571 | #define LOOP_VINFO_BBS(L) (L)->bbs | |
796f6cba | 572 | #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 |
10095225 | 573 | #define LOOP_VINFO_NITERS(L) (L)->num_iters |
796f6cba | 574 | /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after |
575 | prologue peeling retain total unchanged scalar loop iterations for | |
576 | cost model. */ | |
10095225 | 577 | #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged |
d5e80d93 | 578 | #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions |
004a94a5 | 579 | #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th |
7456a7ea | 580 | #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold |
10095225 | 581 | #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable |
60b29a7e | 582 | #define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p |
583 | #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p | |
10095225 | 584 | #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor |
4a85c0b1 | 585 | #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor |
60b29a7e | 586 | #define LOOP_VINFO_MASKS(L) (L)->masks |
6753a4bf | 587 | #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters |
60b29a7e | 588 | #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type |
ef871d99 | 589 | #define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type |
10095225 | 590 | #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask |
a99aba41 | 591 | #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest |
592 | #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs | |
593 | #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs | |
10095225 | 594 | #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) |
313a5120 | 595 | #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment |
10095225 | 596 | #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr |
597 | #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts | |
10095225 | 598 | #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs |
8a7b0f48 | 599 | #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs |
f68a7726 | 600 | #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs |
e85b4a5e | 601 | #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero |
602 | #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds | |
ee612634 | 603 | #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores |
10095225 | 604 | #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances |
c6895939 | 605 | #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor |
eefa05c8 | 606 | #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions |
39a5d6b1 | 607 | #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains |
4db2b577 | 608 | #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data |
a4ee7fac | 609 | #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps |
ba69439f | 610 | #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped |
313a5120 | 611 | #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter |
c7a8722c | 612 | #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies |
c71d3c24 | 613 | #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop |
e3b3a12f | 614 | #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling |
cfd9ca84 | 615 | #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store |
2a9a3444 | 616 | #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec |
617 | #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost | |
5b631e09 | 618 | #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info |
1d86b8dc | 619 | #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond |
4e58562d | 620 | |
d5e80d93 | 621 | #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ |
72ffab3c | 622 | ((L)->may_misalign_stmts.length () > 0) |
d5e80d93 | 623 | #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ |
f68a7726 | 624 | ((L)->comp_alias_ddrs.length () > 0 \ |
e85b4a5e | 625 | || (L)->check_unequal_addrs.length () > 0 \ |
626 | || (L)->lower_bounds.length () > 0) | |
d5e80d93 | 627 | #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ |
628 | (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) | |
1d86b8dc | 629 | #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \ |
630 | (LOOP_VINFO_SIMD_IF_COND (L)) | |
d5e80d93 | 631 | #define LOOP_REQUIRES_VERSIONING(L) \ |
632 | (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ | |
633 | || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ | |
1d86b8dc | 634 | || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ |
635 | || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) | |
33bbe730 | 636 | |
10095225 | 637 | #define LOOP_VINFO_NITERS_KNOWN_P(L) \ |
313a5120 | 638 | (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) |
4e58562d | 639 | |
5b631e09 | 640 | #define LOOP_VINFO_EPILOGUE_P(L) \ |
641 | (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) | |
642 | ||
4a85c0b1 | 643 | #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ |
644 | (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) | |
5b631e09 | 645 | |
ed9370cc | 646 | /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL |
647 | value signifies success, and a NULL value signifies failure, supporting | |
648 | propagating an opt_problem * describing the failure back up the call | |
649 | stack. */ | |
650 | typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info; | |
651 | ||
221e9a92 | 652 | static inline loop_vec_info |
2e966e2a | 653 | loop_vec_info_for_loop (class loop *loop) |
221e9a92 | 654 | { |
655 | return (loop_vec_info) loop->aux; | |
656 | } | |
657 | ||
251317e4 | 658 | typedef class _bb_vec_info : public vec_info |
e2c5c678 | 659 | { |
251317e4 | 660 | public: |
a99aba41 | 661 | _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator, vec_info_shared *); |
e15e8a2a | 662 | ~_bb_vec_info (); |
663 | ||
37545e54 | 664 | basic_block bb; |
4c7587f5 | 665 | gimple_stmt_iterator region_begin; |
666 | gimple_stmt_iterator region_end; | |
37545e54 | 667 | } *bb_vec_info; |
668 | ||
4db2b577 | 669 | #define BB_VINFO_BB(B) (B)->bb |
670 | #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores | |
671 | #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances | |
a99aba41 | 672 | #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs |
673 | #define BB_VINFO_DDRS(B) (B)->shared->ddrs | |
4db2b577 | 674 | #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data |
37545e54 | 675 | |
676 | static inline bb_vec_info | |
677 | vec_info_for_bb (basic_block bb) | |
678 | { | |
679 | return (bb_vec_info) bb->aux; | |
680 | } | |
681 | ||
c91e8223 | 682 | /*-----------------------------------------------------------------*/ |
683 | /* Info on vectorized defs. */ | |
684 | /*-----------------------------------------------------------------*/ | |
685 | enum stmt_vec_info_type { | |
686 | undef_vec_info_type = 0, | |
687 | load_vec_info_type, | |
688 | store_vec_info_type, | |
09e31a48 | 689 | shift_vec_info_type, |
c91e8223 | 690 | op_vec_info_type, |
22c2f6bd | 691 | call_vec_info_type, |
d09768a4 | 692 | call_simd_clone_vec_info_type, |
e9705e7f | 693 | assignment_vec_info_type, |
ea8f3370 | 694 | condition_vec_info_type, |
dab48979 | 695 | comparison_vec_info_type, |
c6c91d61 | 696 | reduc_vec_info_type, |
6fada017 | 697 | induc_vec_info_type, |
c6c91d61 | 698 | type_promotion_vec_info_type, |
9d8bf4aa | 699 | type_demotion_vec_info_type, |
221e9a92 | 700 | type_conversion_vec_info_type, |
701 | loop_exit_ctrl_vec_info_type | |
c6c91d61 | 702 | }; |
703 | ||
48e1416a | 704 | /* Indicates whether/how a variable is used in the scope of loop/basic |
f083cd24 | 705 | block. */ |
c6c91d61 | 706 | enum vect_relevant { |
f083cd24 | 707 | vect_unused_in_scope = 0, |
75aae5b4 | 708 | |
709 | /* The def is only used outside the loop. */ | |
710 | vect_used_only_live, | |
ade2ac53 | 711 | /* The def is in the inner loop, and the use is in the outer loop, and the |
712 | use is a reduction stmt. */ | |
221e9a92 | 713 | vect_used_in_outer_by_reduction, |
ade2ac53 | 714 | /* The def is in the inner loop, and the use is in the outer loop (and is |
715 | not part of reduction). */ | |
221e9a92 | 716 | vect_used_in_outer, |
bfe8bfe9 | 717 | |
718 | /* defs that feed computations that end up (only) in a reduction. These | |
48e1416a | 719 | defs may be used by non-reduction stmts, but eventually, any |
720 | computations/values that are affected by these defs are used to compute | |
721 | a reduction (i.e. don't get stored to memory, for example). We use this | |
722 | to identify computations that we can change the order in which they are | |
bfe8bfe9 | 723 | computed. */ |
c6c91d61 | 724 | vect_used_by_reduction, |
bfe8bfe9 | 725 | |
48e1416a | 726 | vect_used_in_scope |
c91e8223 | 727 | }; |
728 | ||
c6895939 | 729 | /* The type of vectorization that can be applied to the stmt: regular loop-based |
730 | vectorization; pure SLP - the stmt is a part of SLP instances and does not | |
731 | have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is | |
732 | a part of SLP instance and also must be loop-based vectorized, since it has | |
48e1416a | 733 | uses outside SLP sequences. |
734 | ||
735 | In the loop context the meanings of pure and hybrid SLP are slightly | |
736 | different. By saying that pure SLP is applied to the loop, we mean that we | |
737 | exploit only intra-iteration parallelism in the loop; i.e., the loop can be | |
738 | vectorized without doing any conceptual unrolling, cause we don't pack | |
739 | together stmts from different iterations, only within a single iteration. | |
740 | Loop hybrid SLP means that we exploit both intra-iteration and | |
c6895939 | 741 | inter-iteration parallelism (e.g., number of elements in the vector is 4 |
48e1416a | 742 | and the slp-group-size is 2, in which case we don't have enough parallelism |
743 | within an iteration, so we obtain the rest of the parallelism from subsequent | |
c6895939 | 744 | iterations by unrolling the loop by 2). */ |
48e1416a | 745 | enum slp_vect_type { |
c6895939 | 746 | loop_vect = 0, |
747 | pure_slp, | |
748 | hybrid | |
749 | }; | |
750 | ||
0f54e40f | 751 | /* Says whether a statement is a load, a store of a vectorized statement |
752 | result, or a store of an invariant value. */ | |
753 | enum vec_load_store_type { | |
754 | VLS_LOAD, | |
755 | VLS_STORE, | |
756 | VLS_STORE_INVARIANT | |
757 | }; | |
758 | ||
85b53a1f | 759 | /* Describes how we're going to vectorize an individual load or store, |
760 | or a group of loads or stores. */ | |
761 | enum vect_memory_access_type { | |
989ceec3 | 762 | /* An access to an invariant address. This is used only for loads. */ |
763 | VMAT_INVARIANT, | |
764 | ||
85b53a1f | 765 | /* A simple contiguous access. */ |
766 | VMAT_CONTIGUOUS, | |
767 | ||
989ceec3 | 768 | /* A contiguous access that goes down in memory rather than up, |
769 | with no additional permutation. This is used only for stores | |
770 | of invariants. */ | |
771 | VMAT_CONTIGUOUS_DOWN, | |
772 | ||
85b53a1f | 773 | /* A simple contiguous access in which the elements need to be permuted |
774 | after loading or before storing. Only used for loop vectorization; | |
775 | SLP uses separate permutes. */ | |
776 | VMAT_CONTIGUOUS_PERMUTE, | |
777 | ||
989ceec3 | 778 | /* A simple contiguous access in which the elements need to be reversed |
779 | after loading or before storing. */ | |
780 | VMAT_CONTIGUOUS_REVERSE, | |
781 | ||
85b53a1f | 782 | /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ |
783 | VMAT_LOAD_STORE_LANES, | |
784 | ||
785 | /* An access in which each scalar element is loaded or stored | |
786 | individually. */ | |
787 | VMAT_ELEMENTWISE, | |
788 | ||
789 | /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped | |
790 | SLP accesses. Each unrolled iteration uses a contiguous load | |
791 | or store for the whole group, but the groups from separate iterations | |
792 | are combined in the same way as for VMAT_ELEMENTWISE. */ | |
793 | VMAT_STRIDED_SLP, | |
794 | ||
795 | /* The access uses gather loads or scatter stores. */ | |
796 | VMAT_GATHER_SCATTER | |
797 | }; | |
c6895939 | 798 | |
251317e4 | 799 | class dr_vec_info { |
800 | public: | |
5f02ee72 | 801 | /* The data reference itself. */ |
802 | data_reference *dr; | |
803 | /* The statement that contains the data reference. */ | |
804 | stmt_vec_info stmt; | |
a99aba41 | 805 | /* The misalignment in bytes of the reference, or -1 if not known. */ |
806 | int misalignment; | |
807 | /* The byte alignment that we'd ideally like the reference to have, | |
808 | and the value that misalignment is measured against. */ | |
e092c20e | 809 | poly_uint64 target_alignment; |
a99aba41 | 810 | /* If true the alignment of base_decl needs to be increased. */ |
811 | bool base_misaligned; | |
812 | tree base_decl; | |
813 | }; | |
814 | ||
f1168a33 | 815 | typedef struct data_reference *dr_p; |
f1168a33 | 816 | |
251317e4 | 817 | class _stmt_vec_info { |
818 | public: | |
c91e8223 | 819 | |
820 | enum stmt_vec_info_type type; | |
821 | ||
609c710b | 822 | /* Indicates whether this stmts is part of a computation whose result is |
823 | used outside the loop. */ | |
824 | bool live; | |
825 | ||
826 | /* Stmt is part of some pattern (computation idiom) */ | |
827 | bool in_pattern_p; | |
828 | ||
e05b01ad | 829 | /* True if the statement was created during pattern recognition as |
830 | part of the replacement for RELATED_STMT. This implies that the | |
831 | statement isn't part of any basic block, although for convenience | |
832 | its gimple_bb is the same as for RELATED_STMT. */ | |
833 | bool pattern_stmt_p; | |
834 | ||
487798e2 | 835 | /* Is this statement vectorizable or should it be skipped in (partial) |
836 | vectorization. */ | |
837 | bool vectorizable; | |
838 | ||
c91e8223 | 839 | /* The stmt to which this info struct refers to. */ |
42acab1c | 840 | gimple *stmt; |
c91e8223 | 841 | |
e2c5c678 | 842 | /* The vec_info with respect to which STMT is vectorized. */ |
843 | vec_info *vinfo; | |
c91e8223 | 844 | |
b334cbba | 845 | /* The vector type to be used for the LHS of this statement. */ |
c91e8223 | 846 | tree vectype; |
847 | ||
848 | /* The vectorized version of the stmt. */ | |
435515db | 849 | stmt_vec_info vectorized_stmt; |
c91e8223 | 850 | |
851 | ||
16ed3c2c | 852 | /* The following is relevant only for stmts that contain a non-scalar |
48e1416a | 853 | data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have |
16ed3c2c | 854 | at most one such data-ref. */ |
c91e8223 | 855 | |
5f02ee72 | 856 | dr_vec_info dr_aux; |
a99aba41 | 857 | |
b0eb8c66 | 858 | /* Information about the data-ref relative to this loop |
859 | nest (the loop that is being considered for vectorization). */ | |
9e879814 | 860 | innermost_loop_behavior dr_wrt_vec_loop; |
b0eb8c66 | 861 | |
559260b3 | 862 | /* For loop PHI nodes, the base and evolution part of it. This makes sure |
86faead7 | 863 | this information is still available in vect_update_ivs_after_vectorizer |
864 | where we may not be able to re-analyze the PHI nodes evolution as | |
865 | peeling for the prologue loop can make it unanalyzable. The evolution | |
559260b3 | 866 | part is still correct after peeling, but the base may have changed from |
867 | the version here. */ | |
868 | tree loop_phi_evolution_base_unchanged; | |
86faead7 | 869 | tree loop_phi_evolution_part; |
870 | ||
48e1416a | 871 | /* Used for various bookkeeping purposes, generally holding a pointer to |
872 | some other stmt S that is in some way "related" to this stmt. | |
4a61a337 | 873 | Current use of this field is: |
48e1416a | 874 | If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is |
875 | true): S is the "pattern stmt" that represents (and replaces) the | |
876 | sequence of stmts that constitutes the pattern. Similarly, the | |
877 | related_stmt of the "pattern stmt" points back to this stmt (which is | |
878 | the last stmt in the original sequence of stmts that constitutes the | |
4a61a337 | 879 | pattern). */ |
aebdbd31 | 880 | stmt_vec_info related_stmt; |
4a61a337 | 881 | |
da611310 | 882 | /* Used to keep a sequence of def stmts of a pattern stmt if such exists. |
883 | The sequence is attached to the original statement rather than the | |
884 | pattern statement. */ | |
18937389 | 885 | gimple_seq pattern_def_seq; |
45eea33f | 886 | |
f1168a33 | 887 | /* List of datarefs that are known to have the same alignment as the dataref |
888 | of this stmt. */ | |
f1f41a6c | 889 | vec<dr_p> same_align_refs; |
f1168a33 | 890 | |
295327ab | 891 | /* Selected SIMD clone's function info. First vector element |
892 | is SIMD clone's function decl, followed by a pair of trees (base + step) | |
893 | for linear arguments (pair of NULLs for other arguments). */ | |
894 | vec<tree> simd_clone_info; | |
d09768a4 | 895 | |
e12906b9 | 896 | /* Classify the def of this stmt. */ |
897 | enum vect_def_type def_type; | |
898 | ||
609c710b | 899 | /* Whether the stmt is SLPed, loop-based vectorized, or both. */ |
900 | enum slp_vect_type slp_type; | |
901 | ||
21009880 | 902 | /* Interleaving and reduction chains info. */ |
903 | /* First element in the group. */ | |
cd24aa3c | 904 | stmt_vec_info first_element; |
21009880 | 905 | /* Pointer to the next element in the group. */ |
cd24aa3c | 906 | stmt_vec_info next_element; |
21009880 | 907 | /* The size of the group. */ |
6b8dbb53 | 908 | unsigned int size; |
909 | /* For stores, number of stores from this group seen. We vectorize the last | |
910 | one. */ | |
911 | unsigned int store_count; | |
912 | /* For loads only, the gap from the previous load. For consecutive loads, GAP | |
913 | is 1. */ | |
914 | unsigned int gap; | |
609c710b | 915 | |
a8cf7702 | 916 | /* The minimum negative dependence distance this stmt participates in |
917 | or zero if none. */ | |
918 | unsigned int min_neg_dist; | |
919 | ||
609c710b | 920 | /* Not all stmts in the loop need to be vectorized. e.g, the increment |
921 | of the loop induction variable and computation of array indexes. relevant | |
922 | indicates whether the stmt needs to be vectorized. */ | |
923 | enum vect_relevant relevant; | |
867c03eb | 924 | |
0bd6d857 | 925 | /* For loads if this is a gather, for stores if this is a scatter. */ |
926 | bool gather_scatter_p; | |
e1c75243 | 927 | |
928 | /* True if this is an access with loop-invariant stride. */ | |
929 | bool strided_p; | |
3d483a94 | 930 | |
487798e2 | 931 | /* For both loads and stores. */ |
b05c7e43 | 932 | unsigned simd_lane_access_p : 3; |
487798e2 | 933 | |
85b53a1f | 934 | /* Classifies how the load or store is going to be implemented |
935 | for loop vectorization. */ | |
936 | vect_memory_access_type memory_access_type; | |
937 | ||
d09d8733 | 938 | /* For reduction loops, this is the type of reduction. */ |
939 | enum vect_reduction_type v_reduc_type; | |
940 | ||
834a2c29 | 941 | /* For CONST_COND_REDUCTION, record the reduc code. */ |
942 | enum tree_code const_cond_reduc_code; | |
943 | ||
119a8852 | 944 | /* On a reduction PHI the reduction type as detected by |
945 | vect_force_simple_reduction. */ | |
946 | enum vect_reduction_type reduc_type; | |
947 | ||
44b24fa0 | 948 | /* On a reduction PHI the def returned by vect_force_simple_reduction. |
949 | On the def returned by vect_force_simple_reduction the | |
950 | corresponding PHI. */ | |
04eefad5 | 951 | stmt_vec_info reduc_def; |
119a8852 | 952 | |
0d85be19 | 953 | /* The number of scalar stmt references from active SLP instances. */ |
954 | unsigned int num_slp_uses; | |
18bbd2f1 | 955 | |
956 | /* If nonzero, the lhs of the statement could be truncated to this | |
957 | many bits without affecting any users of the result. */ | |
958 | unsigned int min_output_precision; | |
959 | ||
960 | /* If nonzero, all non-boolean input operands have the same precision, | |
961 | and they could each be truncated to this many bits without changing | |
962 | the result. */ | |
963 | unsigned int min_input_precision; | |
964 | ||
965 | /* If OPERATION_BITS is nonzero, the statement could be performed on | |
966 | an integer with the sign and number of bits given by OPERATION_SIGN | |
967 | and OPERATION_BITS without changing the result. */ | |
968 | unsigned int operation_precision; | |
969 | signop operation_sign; | |
f92474f8 | 970 | |
971 | /* True if this is only suitable for SLP vectorization. */ | |
972 | bool slp_vect_only_p; | |
04b2391d | 973 | }; |
c91e8223 | 974 | |
cf60da07 | 975 | /* Information about a gather/scatter call. */ |
976 | struct gather_scatter_info { | |
1619606c | 977 | /* The internal function to use for the gather/scatter operation, |
978 | or IFN_LAST if a built-in function should be used instead. */ | |
979 | internal_fn ifn; | |
980 | ||
981 | /* The FUNCTION_DECL for the built-in gather/scatter function, | |
982 | or null if an internal function should be used instead. */ | |
cf60da07 | 983 | tree decl; |
984 | ||
985 | /* The loop-invariant base value. */ | |
986 | tree base; | |
987 | ||
988 | /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ | |
989 | tree offset; | |
990 | ||
991 | /* Each offset element should be multiplied by this amount before | |
992 | being added to the base. */ | |
993 | int scale; | |
994 | ||
995 | /* The definition type for the vectorized offset. */ | |
996 | enum vect_def_type offset_dt; | |
997 | ||
998 | /* The type of the vectorized offset. */ | |
999 | tree offset_vectype; | |
1619606c | 1000 | |
1001 | /* The type of the scalar elements after loading or before storing. */ | |
1002 | tree element_type; | |
1003 | ||
1004 | /* The type of the scalar elements being loaded or stored. */ | |
1005 | tree memory_type; | |
cf60da07 | 1006 | }; |
1007 | ||
c91e8223 | 1008 | /* Access Functions. */ |
6b8dbb53 | 1009 | #define STMT_VINFO_TYPE(S) (S)->type |
1010 | #define STMT_VINFO_STMT(S) (S)->stmt | |
e2c5c678 | 1011 | inline loop_vec_info |
1012 | STMT_VINFO_LOOP_VINFO (stmt_vec_info stmt_vinfo) | |
1013 | { | |
1014 | if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_vinfo->vinfo)) | |
1015 | return loop_vinfo; | |
1016 | return NULL; | |
1017 | } | |
1018 | inline bb_vec_info | |
1019 | STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo) | |
1020 | { | |
1021 | if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (stmt_vinfo->vinfo)) | |
1022 | return bb_vinfo; | |
1023 | return NULL; | |
1024 | } | |
6b8dbb53 | 1025 | #define STMT_VINFO_RELEVANT(S) (S)->relevant |
1026 | #define STMT_VINFO_LIVE_P(S) (S)->live | |
1027 | #define STMT_VINFO_VECTYPE(S) (S)->vectype | |
1028 | #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt | |
6ea6a380 | 1029 | #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable |
5f02ee72 | 1030 | #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) |
0bd6d857 | 1031 | #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p |
e1c75243 | 1032 | #define STMT_VINFO_STRIDED_P(S) (S)->strided_p |
85b53a1f | 1033 | #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type |
3d483a94 | 1034 | #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p |
d09d8733 | 1035 | #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type |
834a2c29 | 1036 | #define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code |
b0eb8c66 | 1037 | |
9e879814 | 1038 | #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop |
1039 | #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address | |
1040 | #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init | |
1041 | #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset | |
1042 | #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step | |
a5456a6d | 1043 | #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment |
1044 | #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ | |
1045 | (S)->dr_wrt_vec_loop.base_misalignment | |
a7e05ef2 | 1046 | #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ |
1047 | (S)->dr_wrt_vec_loop.offset_alignment | |
668dd7dc | 1048 | #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ |
1049 | (S)->dr_wrt_vec_loop.step_alignment | |
b0eb8c66 | 1050 | |
5f02ee72 | 1051 | #define STMT_VINFO_DR_INFO(S) \ |
1052 | (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux) | |
1053 | ||
6b8dbb53 | 1054 | #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p |
1055 | #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | |
18937389 | 1056 | #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq |
6b8dbb53 | 1057 | #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs |
295327ab | 1058 | #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info |
6b8dbb53 | 1059 | #define STMT_VINFO_DEF_TYPE(S) (S)->def_type |
5f02ee72 | 1060 | #define STMT_VINFO_GROUPED_ACCESS(S) \ |
1061 | ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S)) | |
559260b3 | 1062 | #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged |
86faead7 | 1063 | #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part |
a8cf7702 | 1064 | #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist |
0d85be19 | 1065 | #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses |
119a8852 | 1066 | #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type |
1067 | #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def | |
f92474f8 | 1068 | #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p |
21009880 | 1069 | |
5f02ee72 | 1070 | #define DR_GROUP_FIRST_ELEMENT(S) \ |
1071 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) | |
1072 | #define DR_GROUP_NEXT_ELEMENT(S) \ | |
1073 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) | |
1074 | #define DR_GROUP_SIZE(S) \ | |
1075 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) | |
1076 | #define DR_GROUP_STORE_COUNT(S) \ | |
1077 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) | |
1078 | #define DR_GROUP_GAP(S) \ | |
1079 | (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) | |
5f02ee72 | 1080 | |
1081 | #define REDUC_GROUP_FIRST_ELEMENT(S) \ | |
1082 | (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) | |
1083 | #define REDUC_GROUP_NEXT_ELEMENT(S) \ | |
1084 | (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element) | |
1085 | #define REDUC_GROUP_SIZE(S) \ | |
1086 | (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size) | |
c91e8223 | 1087 | |
f083cd24 | 1088 | #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) |
867c03eb | 1089 | |
c6895939 | 1090 | #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) |
1091 | #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) | |
1092 | #define STMT_SLP_TYPE(S) (S)->slp_type | |
1093 | ||
0822b158 | 1094 | #define VECT_MAX_COST 1000 |
1095 | ||
862bb3cd | 1096 | /* The maximum number of intermediate steps required in multi-step type |
1097 | conversion. */ | |
1098 | #define MAX_INTERM_CVT_STEPS 3 | |
1099 | ||
d75596cd | 1100 | #define MAX_VECTORIZATION_FACTOR INT_MAX |
91a74fc6 | 1101 | |
69fcaae3 | 1102 | /* Nonzero if TYPE represents a (scalar) boolean type or type |
1103 | in the middle-end compatible with it (unsigned precision 1 integral | |
1104 | types). Used to determine which types should be vectorized as | |
1105 | VECTOR_BOOLEAN_TYPE_P. */ | |
1106 | ||
1107 | #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \ | |
1108 | (TREE_CODE (TYPE) == BOOLEAN_TYPE \ | |
1109 | || ((TREE_CODE (TYPE) == INTEGER_TYPE \ | |
1110 | || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ | |
1111 | && TYPE_PRECISION (TYPE) == 1 \ | |
1112 | && TYPE_UNSIGNED (TYPE))) | |
1113 | ||
ecc42a77 | 1114 | static inline bool |
2e966e2a | 1115 | nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info) |
ecc42a77 | 1116 | { |
1117 | return (loop->inner | |
1118 | && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); | |
1119 | } | |
1120 | ||
282bf14c | 1121 | /* Return TRUE if a statement represented by STMT_INFO is a part of a |
1122 | pattern. */ | |
1123 | ||
213448e9 | 1124 | static inline bool |
1125 | is_pattern_stmt_p (stmt_vec_info stmt_info) | |
1126 | { | |
e05b01ad | 1127 | return stmt_info->pattern_stmt_p; |
213448e9 | 1128 | } |
1129 | ||
4a59791f | 1130 | /* If STMT_INFO is a pattern statement, return the statement that it |
1131 | replaces, otherwise return STMT_INFO itself. */ | |
1132 | ||
1133 | inline stmt_vec_info | |
1134 | vect_orig_stmt (stmt_vec_info stmt_info) | |
1135 | { | |
1136 | if (is_pattern_stmt_p (stmt_info)) | |
1137 | return STMT_VINFO_RELATED_STMT (stmt_info); | |
1138 | return stmt_info; | |
1139 | } | |
1140 | ||
eeab9fc5 | 1141 | /* Return the later statement between STMT1_INFO and STMT2_INFO. */ |
1142 | ||
1143 | static inline stmt_vec_info | |
1144 | get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) | |
1145 | { | |
1146 | if (gimple_uid (vect_orig_stmt (stmt1_info)->stmt) | |
1147 | > gimple_uid (vect_orig_stmt (stmt2_info)->stmt)) | |
1148 | return stmt1_info; | |
1149 | else | |
1150 | return stmt2_info; | |
1151 | } | |
1152 | ||
0b7ea3a9 | 1153 | /* If STMT_INFO has been replaced by a pattern statement, return the |
1154 | replacement statement, otherwise return STMT_INFO itself. */ | |
1155 | ||
1156 | inline stmt_vec_info | |
1157 | vect_stmt_to_vectorize (stmt_vec_info stmt_info) | |
1158 | { | |
1159 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
1160 | return STMT_VINFO_RELATED_STMT (stmt_info); | |
1161 | return stmt_info; | |
1162 | } | |
1163 | ||
282bf14c | 1164 | /* Return true if BB is a loop header. */ |
1165 | ||
221e9a92 | 1166 | static inline bool |
1167 | is_loop_header_bb_p (basic_block bb) | |
1168 | { | |
1169 | if (bb == (bb->loop_father)->header) | |
1170 | return true; | |
e95895ef | 1171 | gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); |
221e9a92 | 1172 | return false; |
1173 | } | |
1174 | ||
282bf14c | 1175 | /* Return pow2 (X). */ |
1176 | ||
862bb3cd | 1177 | static inline int |
1178 | vect_pow2 (int x) | |
1179 | { | |
1180 | int i, res = 1; | |
1181 | ||
1182 | for (i = 0; i < x; i++) | |
1183 | res *= 2; | |
1184 | ||
1185 | return res; | |
1186 | } | |
84a15e8f | 1187 | |
f97dec81 | 1188 | /* Alias targetm.vectorize.builtin_vectorization_cost. */ |
1189 | ||
1190 | static inline int | |
1191 | builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
1192 | tree vectype, int misalign) | |
1193 | { | |
1194 | return targetm.vectorize.builtin_vectorization_cost (type_of_cost, | |
1195 | vectype, misalign); | |
1196 | } | |
1197 | ||
f4ac3f3e | 1198 | /* Get cost by calling cost target builtin. */ |
1199 | ||
1200 | static inline | |
1201 | int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) | |
1202 | { | |
f97dec81 | 1203 | return builtin_vectorization_cost (type_of_cost, NULL, 0); |
f4ac3f3e | 1204 | } |
1205 | ||
4db2b577 | 1206 | /* Alias targetm.vectorize.init_cost. */ |
1207 | ||
1208 | static inline void * | |
2e966e2a | 1209 | init_cost (class loop *loop_info) |
4db2b577 | 1210 | { |
1211 | return targetm.vectorize.init_cost (loop_info); | |
1212 | } | |
1213 | ||
c863e35b | 1214 | extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt, |
524665d0 | 1215 | stmt_vec_info, int, unsigned, |
1216 | enum vect_cost_model_location); | |
c863e35b | 1217 | |
4db2b577 | 1218 | /* Alias targetm.vectorize.add_stmt_cost. */ |
1219 | ||
1220 | static inline unsigned | |
1221 | add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | |
f97dec81 | 1222 | stmt_vec_info stmt_info, int misalign, |
1223 | enum vect_cost_model_location where) | |
4db2b577 | 1224 | { |
524665d0 | 1225 | unsigned cost = targetm.vectorize.add_stmt_cost (data, count, kind, |
1226 | stmt_info, misalign, where); | |
c863e35b | 1227 | if (dump_file && (dump_flags & TDF_DETAILS)) |
524665d0 | 1228 | dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, |
1229 | cost, where); | |
1230 | return cost; | |
4db2b577 | 1231 | } |
1232 | ||
1233 | /* Alias targetm.vectorize.finish_cost. */ | |
1234 | ||
f97dec81 | 1235 | static inline void |
1236 | finish_cost (void *data, unsigned *prologue_cost, | |
1237 | unsigned *body_cost, unsigned *epilogue_cost) | |
4db2b577 | 1238 | { |
f97dec81 | 1239 | targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost); |
4db2b577 | 1240 | } |
1241 | ||
1242 | /* Alias targetm.vectorize.destroy_cost_data. */ | |
1243 | ||
1244 | static inline void | |
1245 | destroy_cost_data (void *data) | |
1246 | { | |
1247 | targetm.vectorize.destroy_cost_data (data); | |
1248 | } | |
1249 | ||
c863e35b | 1250 | inline void |
1251 | add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec) | |
1252 | { | |
1253 | stmt_info_for_cost *cost; | |
1254 | unsigned i; | |
1255 | FOR_EACH_VEC_ELT (*cost_vec, i, cost) | |
1aeaa139 | 1256 | add_stmt_cost (data, cost->count, cost->kind, cost->stmt_info, |
c863e35b | 1257 | cost->misalign, cost->where); |
1258 | } | |
1259 | ||
c91e8223 | 1260 | /*-----------------------------------------------------------------*/ |
1261 | /* Info on data references alignment. */ | |
1262 | /*-----------------------------------------------------------------*/ | |
a99aba41 | 1263 | #define DR_MISALIGNMENT_UNKNOWN (-1) |
1264 | #define DR_MISALIGNMENT_UNINITIALIZED (-2) | |
1265 | ||
23e1875f | 1266 | inline void |
abc9513d | 1267 | set_dr_misalignment (dr_vec_info *dr_info, int val) |
23e1875f | 1268 | { |
abc9513d | 1269 | dr_info->misalignment = val; |
23e1875f | 1270 | } |
1271 | ||
1272 | inline int | |
abc9513d | 1273 | dr_misalignment (dr_vec_info *dr_info) |
23e1875f | 1274 | { |
abc9513d | 1275 | int misalign = dr_info->misalignment; |
a99aba41 | 1276 | gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); |
1277 | return misalign; | |
23e1875f | 1278 | } |
c91e8223 | 1279 | |
39b8f742 | 1280 | /* Reflects actual alignment of first access in the vectorized loop, |
1281 | taking into account peeling/versioning if applied. */ | |
23e1875f | 1282 | #define DR_MISALIGNMENT(DR) dr_misalignment (DR) |
1283 | #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) | |
c91e8223 | 1284 | |
aec313e5 | 1285 | /* Only defined once DR_MISALIGNMENT is defined. */ |
abc9513d | 1286 | #define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment) |
aec313e5 | 1287 | |
abc9513d | 1288 | /* Return true if data access DR_INFO is aligned to its target alignment |
aec313e5 | 1289 | (which may be less than a full vector). */ |
282bf14c | 1290 | |
c91e8223 | 1291 | static inline bool |
abc9513d | 1292 | aligned_access_p (dr_vec_info *dr_info) |
c91e8223 | 1293 | { |
abc9513d | 1294 | return (DR_MISALIGNMENT (dr_info) == 0); |
c91e8223 | 1295 | } |
1296 | ||
282bf14c | 1297 | /* Return TRUE if the alignment of the data access is known, and FALSE |
1298 | otherwise. */ | |
1299 | ||
c91e8223 | 1300 | static inline bool |
abc9513d | 1301 | known_alignment_for_access_p (dr_vec_info *dr_info) |
c91e8223 | 1302 | { |
abc9513d | 1303 | return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN); |
c91e8223 | 1304 | } |
1305 | ||
aec313e5 | 1306 | /* Return the minimum alignment in bytes that the vectorized version |
abc9513d | 1307 | of DR_INFO is guaranteed to have. */ |
aec313e5 | 1308 | |
1309 | static inline unsigned int | |
abc9513d | 1310 | vect_known_alignment_in_bytes (dr_vec_info *dr_info) |
aec313e5 | 1311 | { |
abc9513d | 1312 | if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN) |
1313 | return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); | |
1314 | if (DR_MISALIGNMENT (dr_info) == 0) | |
e092c20e | 1315 | return known_alignment (DR_TARGET_ALIGNMENT (dr_info)); |
abc9513d | 1316 | return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info); |
aec313e5 | 1317 | } |
1318 | ||
abc9513d | 1319 | /* Return the behavior of DR_INFO with respect to the vectorization context |
9e879814 | 1320 | (which for outer loop vectorization might not be the behavior recorded |
abc9513d | 1321 | in DR_INFO itself). */ |
9e879814 | 1322 | |
1323 | static inline innermost_loop_behavior * | |
abc9513d | 1324 | vect_dr_behavior (dr_vec_info *dr_info) |
9e879814 | 1325 | { |
abc9513d | 1326 | stmt_vec_info stmt_info = dr_info->stmt; |
9e879814 | 1327 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
1328 | if (loop_vinfo == NULL | |
a73182ff | 1329 | || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)) |
abc9513d | 1330 | return &DR_INNERMOST (dr_info->dr); |
9e879814 | 1331 | else |
1332 | return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); | |
1333 | } | |
1dbf9bd1 | 1334 | |
1335 | /* Return true if the vect cost model is unlimited. */ | |
1336 | static inline bool | |
3e398f5b | 1337 | unlimited_cost_model (loop_p loop) |
1dbf9bd1 | 1338 | { |
4c73695b | 1339 | if (loop != NULL && loop->force_vectorize |
3e398f5b | 1340 | && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) |
1341 | return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; | |
1342 | return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); | |
1dbf9bd1 | 1343 | } |
1344 | ||
6753a4bf | 1345 | /* Return true if the loop described by LOOP_VINFO is fully-masked and |
1346 | if the first iteration should use a partial mask in order to achieve | |
1347 | alignment. */ | |
1348 | ||
1349 | static inline bool | |
1350 | vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo) | |
1351 | { | |
1352 | return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
1353 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); | |
1354 | } | |
1355 | ||
d75596cd | 1356 | /* Return the number of vectors of type VECTYPE that are needed to get |
1357 | NUNITS elements. NUNITS should be based on the vectorization factor, | |
1358 | so it is always a known multiple of the number of elements in VECTYPE. */ | |
1359 | ||
1360 | static inline unsigned int | |
1361 | vect_get_num_vectors (poly_uint64 nunits, tree vectype) | |
1362 | { | |
1363 | return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); | |
1364 | } | |
1365 | ||
4eb17cb6 | 1366 | /* Return the number of copies needed for loop vectorization when |
1367 | a statement operates on vectors of type VECTYPE. This is the | |
1368 | vectorization factor divided by the number of elements in | |
1369 | VECTYPE and is always known at compile time. */ | |
1370 | ||
1371 | static inline unsigned int | |
1372 | vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) | |
1373 | { | |
d75596cd | 1374 | return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); |
1375 | } | |
1376 | ||
1377 | /* Update maximum unit count *MAX_NUNITS so that it accounts for | |
1378 | the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 | |
1379 | if we haven't yet recorded any vector types. */ | |
1380 | ||
1381 | static inline void | |
1382 | vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) | |
1383 | { | |
1384 | /* All unit counts have the form current_vector_size * X for some | |
1385 | rational X, so two unit sizes must have a common multiple. | |
1386 | Everything is a multiple of the initial value of 1. */ | |
1387 | poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
1388 | *max_nunits = force_common_multiple (*max_nunits, nunits); | |
1389 | } | |
1390 | ||
1391 | /* Return the vectorization factor that should be used for costing | |
1392 | purposes while vectorizing the loop described by LOOP_VINFO. | |
1393 | Pick a reasonable estimate if the vectorization factor isn't | |
1394 | known at compile time. */ | |
1395 | ||
1396 | static inline unsigned int | |
1397 | vect_vf_for_cost (loop_vec_info loop_vinfo) | |
1398 | { | |
1399 | return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
4eb17cb6 | 1400 | } |
1401 | ||
09de8b78 | 1402 | /* Estimate the number of elements in VEC_TYPE for costing purposes. |
1403 | Pick a reasonable estimate if the exact number isn't known at | |
1404 | compile time. */ | |
1405 | ||
1406 | static inline unsigned int | |
1407 | vect_nunits_for_cost (tree vec_type) | |
1408 | { | |
1409 | return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); | |
1410 | } | |
1411 | ||
60b29a7e | 1412 | /* Return the maximum possible vectorization factor for LOOP_VINFO. */ |
1413 | ||
1414 | static inline unsigned HOST_WIDE_INT | |
1415 | vect_max_vf (loop_vec_info loop_vinfo) | |
1416 | { | |
1417 | unsigned HOST_WIDE_INT vf; | |
1418 | if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) | |
1419 | return vf; | |
1420 | return MAX_VECTORIZATION_FACTOR; | |
1421 | } | |
1422 | ||
abc9513d | 1423 | /* Return the size of the value accessed by unvectorized data reference |
1424 | DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated | |
1425 | for the associated gimple statement, since that guarantees that DR_INFO | |
1426 | accesses either a scalar or a scalar equivalent. ("Scalar equivalent" | |
1427 | here includes things like V1SI, which can be vectorized in the same way | |
33482edf | 1428 | as a plain SI.) */ |
1429 | ||
1430 | inline unsigned int | |
abc9513d | 1431 | vect_get_scalar_dr_size (dr_vec_info *dr_info) |
33482edf | 1432 | { |
abc9513d | 1433 | return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); |
33482edf | 1434 | } |
1435 | ||
c309657f | 1436 | /* Source location + hotness information. */ |
1437 | extern dump_user_location_t vect_location; | |
fb85abff | 1438 | |
b18ceb23 | 1439 | /* A macro for calling: |
1440 | dump_begin_scope (MSG, vect_location); | |
1441 | via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc, | |
1442 | and then calling | |
1443 | dump_end_scope (); | |
1444 | once the object goes out of scope, thus capturing the nesting of | |
9ddd8fa7 | 1445 | the scopes. |
1446 | ||
1447 | These scopes affect dump messages within them: dump messages at the | |
1448 | top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those | |
1449 | in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */ | |
88f6eb8f | 1450 | |
1451 | #define DUMP_VECT_SCOPE(MSG) \ | |
b18ceb23 | 1452 | AUTO_DUMP_SCOPE (MSG, vect_location) |
88f6eb8f | 1453 | |
72ea15e5 | 1454 | /* A sentinel class for ensuring that the "vect_location" global gets |
1455 | reset at the end of a scope. | |
1456 | ||
1457 | The "vect_location" global is used during dumping and contains a | |
1458 | location_t, which could contain references to a tree block via the | |
1459 | ad-hoc data. This data is used for tracking inlining information, | |
1460 | but it's not a GC root; it's simply assumed that such locations never | |
1461 | get accessed if the blocks are optimized away. | |
1462 | ||
1463 | Hence we need to ensure that such locations are purged at the end | |
1464 | of any operations using them (e.g. via this class). */ | |
1465 | ||
1466 | class auto_purge_vect_location | |
1467 | { | |
1468 | public: | |
1469 | ~auto_purge_vect_location (); | |
1470 | }; | |
1471 | ||
c91e8223 | 1472 | /*-----------------------------------------------------------------*/ |
1473 | /* Function prototypes. */ | |
1474 | /*-----------------------------------------------------------------*/ | |
1475 | ||
48e1416a | 1476 | /* Simple loop peeling and versioning utilities for vectorizer's purposes - |
fb85abff | 1477 | in tree-vect-loop-manip.c. */ |
2e966e2a | 1478 | extern void vect_set_loop_condition (class loop *, loop_vec_info, |
60b29a7e | 1479 | tree, tree, tree, bool); |
2e966e2a | 1480 | extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge); |
1481 | class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, | |
1482 | class loop *, edge); | |
1483 | class loop *vect_loop_versioning (loop_vec_info, unsigned int, bool, | |
44245620 | 1484 | poly_uint64); |
2e966e2a | 1485 | extern class loop *vect_do_peeling (loop_vec_info, tree, tree, |
cde959e7 | 1486 | tree *, tree *, tree *, int, bool, bool); |
6753a4bf | 1487 | extern void vect_prepare_for_masked_peels (loop_vec_info); |
2e966e2a | 1488 | extern dump_user_location_t find_loop_location (class loop *); |
fb85abff | 1489 | extern bool vect_can_advance_ivs_p (loop_vec_info); |
c91e8223 | 1490 | |
fb85abff | 1491 | /* In tree-vect-stmts.c. */ |
3106770a | 1492 | extern poly_uint64 current_vector_size; |
f2983e95 | 1493 | extern tree get_vectype_for_scalar_type (tree); |
41b4a935 | 1494 | extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64); |
dab48979 | 1495 | extern tree get_mask_type_for_scalar_type (tree); |
b334cbba | 1496 | extern tree get_same_sized_vectype (tree, tree); |
60b29a7e | 1497 | extern bool vect_get_loop_mask_type (loop_vec_info); |
bf8b3614 | 1498 | extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, |
bfa5bad6 | 1499 | stmt_vec_info * = NULL, gimple ** = NULL); |
bf8b3614 | 1500 | extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, |
bfa5bad6 | 1501 | tree *, stmt_vec_info * = NULL, |
1502 | gimple ** = NULL); | |
ecc42a77 | 1503 | extern bool supportable_widening_operation (enum tree_code, stmt_vec_info, |
1504 | tree, tree, enum tree_code *, | |
42acab1c | 1505 | enum tree_code *, int *, |
1506 | vec<tree> *); | |
b334cbba | 1507 | extern bool supportable_narrowing_operation (enum tree_code, tree, tree, |
1508 | enum tree_code *, | |
f1f41a6c | 1509 | int *, vec<tree> *); |
4db2b577 | 1510 | extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, |
f97dec81 | 1511 | enum vect_cost_for_stmt, stmt_vec_info, |
1512 | int, enum vect_cost_model_location); | |
ecc42a77 | 1513 | extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *); |
1514 | extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *, | |
585ed623 | 1515 | gimple_stmt_iterator *); |
2403338f | 1516 | extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *); |
ecc42a77 | 1517 | extern tree vect_get_store_rhs (stmt_vec_info); |
1518 | extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type); | |
1519 | extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL); | |
1520 | extern void vect_get_vec_defs (tree, tree, stmt_vec_info, vec<tree> *, | |
44b24fa0 | 1521 | vec<tree> *, slp_tree); |
c0dd122a | 1522 | extern void vect_get_vec_defs_for_stmt_copy (vec_info *, |
44b24fa0 | 1523 | vec<tree> *, vec<tree> *); |
ecc42a77 | 1524 | extern tree vect_init_vector (stmt_vec_info, tree, tree, |
fb85abff | 1525 | gimple_stmt_iterator *); |
c0dd122a | 1526 | extern tree vect_get_vec_def_for_stmt_copy (vec_info *, tree); |
ecc42a77 | 1527 | extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *, |
9632f098 | 1528 | slp_tree, slp_instance); |
ecc42a77 | 1529 | extern void vect_remove_stores (stmt_vec_info); |
ed9370cc | 1530 | extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, |
1531 | slp_instance, stmt_vector_for_cost *); | |
ecc42a77 | 1532 | extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *, |
98acf890 | 1533 | stmt_vec_info *, bool, slp_tree, |
c863e35b | 1534 | stmt_vector_for_cost *); |
2fbb03c0 | 1535 | extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *, |
1536 | stmt_vec_info *, slp_tree, | |
1537 | stmt_vector_for_cost *); | |
1ce0a2db | 1538 | extern void vect_get_load_cost (stmt_vec_info, int, bool, |
4db2b577 | 1539 | unsigned int *, unsigned int *, |
f97dec81 | 1540 | stmt_vector_for_cost *, |
1541 | stmt_vector_for_cost *, bool); | |
1ce0a2db | 1542 | extern void vect_get_store_cost (stmt_vec_info, int, |
4db2b577 | 1543 | unsigned int *, stmt_vector_for_cost *); |
45eea33f | 1544 | extern bool vect_supportable_shift (enum tree_code, tree); |
25eb7c31 | 1545 | extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); |
1546 | extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); | |
2e966e2a | 1547 | extern void optimize_mask_stores (class loop*); |
60b29a7e | 1548 | extern gcall *vect_gen_while (tree, tree, tree); |
6753a4bf | 1549 | extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); |
ed9370cc | 1550 | extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, |
1551 | tree *); | |
1552 | extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info); | |
48e1416a | 1553 | |
fb85abff | 1554 | /* In tree-vect-data-refs.c. */ |
e092c20e | 1555 | extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); |
fb85abff | 1556 | extern enum dr_alignment_support vect_supportable_dr_alignment |
abc9513d | 1557 | (dr_vec_info *, bool); |
ecc42a77 | 1558 | extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *, |
fb85abff | 1559 | HOST_WIDE_INT *); |
ed9370cc | 1560 | extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); |
c256513d | 1561 | extern bool vect_slp_analyze_instance_dependence (slp_instance); |
ed9370cc | 1562 | extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); |
1563 | extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); | |
1564 | extern opt_result vect_verify_datarefs_alignment (loop_vec_info); | |
2f6fec15 | 1565 | extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); |
ed9370cc | 1566 | extern opt_result vect_analyze_data_ref_accesses (vec_info *); |
1567 | extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); | |
1d2c127d | 1568 | extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int, |
1569 | signop, int, internal_fn *, tree *); | |
ecc42a77 | 1570 | extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, |
cf60da07 | 1571 | gather_scatter_info *); |
ed9370cc | 1572 | extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, |
1573 | vec<data_reference_p> *); | |
2403338f | 1574 | extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); |
4f372c2c | 1575 | extern void vect_record_base_alignments (vec_info *); |
2e966e2a | 1576 | extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, class loop *, tree, |
bd5ba09f | 1577 | tree *, gimple_stmt_iterator *, |
3c8b7bc7 | 1578 | gimple **, bool, |
1f9a3b5c | 1579 | tree = NULL_TREE, tree = NULL_TREE); |
ecc42a77 | 1580 | extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, |
1581 | stmt_vec_info, tree); | |
1c4c7e32 | 1582 | extern void vect_copy_ref_info (tree, tree); |
fb85abff | 1583 | extern tree vect_create_destination_var (tree, tree); |
ee612634 | 1584 | extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); |
2dd8e84c | 1585 | extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
bc691ae4 | 1586 | extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); |
2dd8e84c | 1587 | extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
ecc42a77 | 1588 | extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info, |
f1f41a6c | 1589 | gimple_stmt_iterator *, vec<tree> *); |
ecc42a77 | 1590 | extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *, |
1591 | tree *, enum dr_alignment_support, tree, | |
2e966e2a | 1592 | class loop **); |
ecc42a77 | 1593 | extern void vect_transform_grouped_load (stmt_vec_info, vec<tree> , int, |
fb85abff | 1594 | gimple_stmt_iterator *); |
ecc42a77 | 1595 | extern void vect_record_grouped_load_vectors (stmt_vec_info, vec<tree>); |
fb85abff | 1596 | extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); |
23ffec42 | 1597 | extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, |
1598 | const char * = NULL); | |
ecc42a77 | 1599 | extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, |
9e879814 | 1600 | tree, tree = NULL_TREE); |
fb85abff | 1601 | |
1602 | /* In tree-vect-loop.c. */ | |
1603 | /* FORNOW: Used in tree-parloops.c. */ | |
f4649a92 | 1604 | extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info, |
1605 | bool *, bool); | |
ef871d99 | 1606 | extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); |
5051abaf | 1607 | /* Used in gimple-loop-interchange.c. */ |
c309657f | 1608 | extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, |
5051abaf | 1609 | enum tree_code); |
fb85abff | 1610 | /* Drive for loop analysis stage. */ |
2e966e2a | 1611 | extern opt_loop_vec_info vect_analyze_loop (class loop *, |
ed9370cc | 1612 | loop_vec_info, |
1613 | vec_info_shared *); | |
3a815241 | 1614 | extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); |
cde959e7 | 1615 | extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, |
1616 | tree *, bool); | |
60b29a7e | 1617 | extern tree vect_halve_mask_nunits (tree); |
1618 | extern tree vect_double_mask_nunits (tree); | |
1619 | extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *, | |
1620 | unsigned int, tree); | |
1621 | extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *, | |
1622 | unsigned int, tree, unsigned int); | |
1623 | ||
fb85abff | 1624 | /* Drive for loop transformation stage. */ |
2e966e2a | 1625 | extern class loop *vect_transform_loop (loop_vec_info); |
1626 | extern opt_loop_vec_info vect_analyze_loop_form (class loop *, | |
ed9370cc | 1627 | vec_info_shared *); |
ecc42a77 | 1628 | extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *, |
435515db | 1629 | slp_tree, int, stmt_vec_info *, |
c863e35b | 1630 | stmt_vector_for_cost *); |
ecc42a77 | 1631 | extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *, |
435515db | 1632 | stmt_vec_info *, slp_tree, slp_instance, |
c863e35b | 1633 | stmt_vector_for_cost *); |
ecc42a77 | 1634 | extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, |
435515db | 1635 | stmt_vec_info *, slp_tree, |
c863e35b | 1636 | stmt_vector_for_cost *); |
ecc42a77 | 1637 | extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *); |
fec8b6d0 | 1638 | extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); |
7a66d0cf | 1639 | extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, |
1640 | stmt_vector_for_cost *, | |
f97dec81 | 1641 | stmt_vector_for_cost *, |
1642 | stmt_vector_for_cost *); | |
f404501a | 1643 | extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); |
4a61a337 | 1644 | |
fb85abff | 1645 | /* In tree-vect-slp.c. */ |
2068679d | 1646 | extern void vect_free_slp_instance (slp_instance, bool); |
678e3d6e | 1647 | extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , |
d75596cd | 1648 | gimple_stmt_iterator *, poly_uint64, |
1649 | slp_instance, bool, unsigned *); | |
1c57101b | 1650 | extern bool vect_slp_analyze_operations (vec_info *); |
02e9bec2 | 1651 | extern void vect_schedule_slp (vec_info *); |
ed9370cc | 1652 | extern opt_result vect_analyze_slp (vec_info *, unsigned); |
bc937a44 | 1653 | extern bool vect_make_slp_decision (loop_vec_info); |
fb85abff | 1654 | extern void vect_detect_hybrid_slp (loop_vec_info); |
4f0d4cce | 1655 | extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *); |
0a08c1bc | 1656 | extern bool vect_slp_bb (basic_block); |
3d9c962c | 1657 | extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); |
ecc42a77 | 1658 | extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); |
633af029 | 1659 | extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode, |
1660 | unsigned int * = NULL, | |
1661 | tree * = NULL, tree * = NULL); | |
1662 | extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>, | |
1663 | unsigned int, vec<tree> &); | |
ecc42a77 | 1664 | extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); |
fb85abff | 1665 | |
1666 | /* In tree-vect-patterns.c. */ | |
4a61a337 | 1667 | /* Pattern recognition functions. |
1668 | Additional pattern recognition functions can (and will) be added | |
1669 | in the future. */ | |
e2c5c678 | 1670 | void vect_pattern_recog (vec_info *); |
4a61a337 | 1671 | |
10230637 | 1672 | /* In tree-vectorizer.c. */ |
1673 | unsigned vectorize_loops (void); | |
2e966e2a | 1674 | void vect_free_loop_info_assumptions (class loop *); |
1675 | gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL); | |
0decb676 | 1676 | |
c91e8223 | 1677 | |
1678 | #endif /* GCC_TREE_VECTORIZER_H */ |