]>
Commit | Line | Data |
---|---|---|
07c03fb0 | 1 | /* If-conversion for vectorizer. |
8e8f6434 | 2 | Copyright (C) 2004-2018 Free Software Foundation, Inc. |
07c03fb0 | 3 | Contributed by Devang Patel <dpatel@apple.com> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
07c03fb0 | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
07c03fb0 | 20 | |
b01e3c9b | 21 | /* This pass implements a tree level if-conversion of loops. Its |
22 | initial goal is to help the vectorizer to vectorize loops with | |
23 | conditions. | |
07c03fb0 | 24 | |
25 | A short description of if-conversion: | |
26 | ||
9b482bc6 | 27 | o Decide if a loop is if-convertible or not. |
07c03fb0 | 28 | o Walk all loop basic blocks in breadth first order (BFS order). |
29 | o Remove conditional statements (at the end of basic block) | |
35a40aad | 30 | and propagate condition into destination basic blocks' |
07c03fb0 | 31 | predicate list. |
32 | o Replace modify expression with conditional modify expression | |
33 | using current basic block's condition. | |
34 | o Merge all basic blocks | |
35 | o Replace phi nodes with conditional modify expr | |
36 | o Merge all basic blocks into header | |
37 | ||
38 | Sample transformation: | |
39 | ||
40 | INPUT | |
41 | ----- | |
42 | ||
43 | # i_23 = PHI <0(0), i_18(10)>; | |
44 | <L0>:; | |
45 | j_15 = A[i_23]; | |
46 | if (j_15 > 41) goto <L1>; else goto <L17>; | |
47 | ||
48 | <L17>:; | |
49 | goto <bb 3> (<L3>); | |
50 | ||
51 | <L1>:; | |
52 | ||
53 | # iftmp.2_4 = PHI <0(8), 42(2)>; | |
54 | <L3>:; | |
55 | A[i_23] = iftmp.2_4; | |
56 | i_18 = i_23 + 1; | |
57 | if (i_18 <= 15) goto <L19>; else goto <L18>; | |
58 | ||
59 | <L19>:; | |
60 | goto <bb 1> (<L0>); | |
61 | ||
62 | <L18>:; | |
63 | ||
64 | OUTPUT | |
65 | ------ | |
66 | ||
67 | # i_23 = PHI <0(0), i_18(10)>; | |
68 | <L0>:; | |
69 | j_15 = A[i_23]; | |
35a40aad | 70 | |
07c03fb0 | 71 | <L3>:; |
72 | iftmp.2_4 = j_15 > 41 ? 42 : 0; | |
73 | A[i_23] = iftmp.2_4; | |
74 | i_18 = i_23 + 1; | |
75 | if (i_18 <= 15) goto <L19>; else goto <L18>; | |
35a40aad | 76 | |
07c03fb0 | 77 | <L19>:; |
78 | goto <bb 1> (<L0>); | |
79 | ||
80 | <L18>:; | |
81 | */ | |
82 | ||
83 | #include "config.h" | |
84 | #include "system.h" | |
85 | #include "coretypes.h" | |
9ef16211 | 86 | #include "backend.h" |
7c29e30e | 87 | #include "rtl.h" |
07c03fb0 | 88 | #include "tree.h" |
9ef16211 | 89 | #include "gimple.h" |
7c29e30e | 90 | #include "cfghooks.h" |
91 | #include "tree-pass.h" | |
9ef16211 | 92 | #include "ssa.h" |
7c29e30e | 93 | #include "expmed.h" |
94 | #include "optabs-query.h" | |
7c29e30e | 95 | #include "gimple-pretty-print.h" |
9ef16211 | 96 | #include "alias.h" |
b20a8bb4 | 97 | #include "fold-const.h" |
9ed99284 | 98 | #include "stor-layout.h" |
bc61cadb | 99 | #include "gimple-fold.h" |
a8783bee | 100 | #include "gimplify.h" |
dcf1a1ec | 101 | #include "gimple-iterator.h" |
e795d6e1 | 102 | #include "gimplify-me.h" |
073c1fd5 | 103 | #include "tree-cfg.h" |
073c1fd5 | 104 | #include "tree-into-ssa.h" |
69ee5dbb | 105 | #include "tree-ssa.h" |
07c03fb0 | 106 | #include "cfgloop.h" |
07c03fb0 | 107 | #include "tree-data-ref.h" |
108 | #include "tree-scalar-evolution.h" | |
189d0706 | 109 | #include "tree-ssa-loop.h" |
110 | #include "tree-ssa-loop-niter.h" | |
c71d3c24 | 111 | #include "tree-ssa-loop-ivopts.h" |
112 | #include "tree-ssa-address.h" | |
4bd0f929 | 113 | #include "dbgcnt.h" |
f4ff098a | 114 | #include "tree-hash-traits.h" |
ee0e4f73 | 115 | #include "varasm.h" |
d73c1238 | 116 | #include "builtins.h" |
d2276060 | 117 | #include "params.h" |
2e063ded | 118 | #include "cfganal.h" |
03821886 | 119 | #include "internal-fn.h" |
120 | #include "fold-const.h" | |
67763c7f | 121 | #include "tree-ssa-sccvn.h" |
9ab8df54 | 122 | |
123 | /* Only handle PHIs with no more arguments unless we are asked to by | |
124 | simd pragma. */ | |
125 | #define MAX_PHI_ARG_NUM \ | |
126 | ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS)) | |
127 | ||
03821886 | 128 | /* True if we've converted a statement that was only executed when some |
129 | condition C was true, and if for correctness we need to predicate the | |
130 | statement to ensure that it is a no-op when C is false. See | |
131 | predicate_statements for the kinds of predication we support. */ | |
132 | static bool need_to_predicate; | |
07c03fb0 | 133 | |
9ab8df54 | 134 | /* Indicate if there are any complicated PHIs that need to be handled in |
135 | if-conversion. Complicated PHI has more than two arguments and can't | |
136 | be degenerated to two arguments PHI. See more information in comment | |
137 | before phi_convertible_by_degenerating_args. */ | |
138 | static bool any_complicated_phi; | |
139 | ||
bd6f374c | 140 | /* Hash for struct innermost_loop_behavior. It depends on the user to |
141 | free the memory. */ | |
142 | ||
143 | struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior> | |
144 | { | |
145 | static inline hashval_t hash (const value_type &); | |
146 | static inline bool equal (const value_type &, | |
147 | const compare_type &); | |
148 | }; | |
149 | ||
150 | inline hashval_t | |
151 | innermost_loop_behavior_hash::hash (const value_type &e) | |
152 | { | |
153 | hashval_t hash; | |
154 | ||
155 | hash = iterative_hash_expr (e->base_address, 0); | |
156 | hash = iterative_hash_expr (e->offset, hash); | |
157 | hash = iterative_hash_expr (e->init, hash); | |
158 | return iterative_hash_expr (e->step, hash); | |
159 | } | |
160 | ||
161 | inline bool | |
162 | innermost_loop_behavior_hash::equal (const value_type &e1, | |
163 | const compare_type &e2) | |
164 | { | |
165 | if ((e1->base_address && !e2->base_address) | |
166 | || (!e1->base_address && e2->base_address) | |
167 | || (!e1->offset && e2->offset) | |
168 | || (e1->offset && !e2->offset) | |
169 | || (!e1->init && e2->init) | |
170 | || (e1->init && !e2->init) | |
171 | || (!e1->step && e2->step) | |
172 | || (e1->step && !e2->step)) | |
173 | return false; | |
174 | ||
175 | if (e1->base_address && e2->base_address | |
176 | && !operand_equal_p (e1->base_address, e2->base_address, 0)) | |
177 | return false; | |
178 | if (e1->offset && e2->offset | |
179 | && !operand_equal_p (e1->offset, e2->offset, 0)) | |
180 | return false; | |
181 | if (e1->init && e2->init | |
182 | && !operand_equal_p (e1->init, e2->init, 0)) | |
183 | return false; | |
184 | if (e1->step && e2->step | |
185 | && !operand_equal_p (e1->step, e2->step, 0)) | |
186 | return false; | |
187 | ||
188 | return true; | |
189 | } | |
190 | ||
07c03fb0 | 191 | /* List of basic blocks in if-conversion-suitable order. */ |
192 | static basic_block *ifc_bbs; | |
193 | ||
bd6f374c | 194 | /* Hash table to store <DR's innermost loop behavior, DR> pairs. */ |
195 | static hash_map<innermost_loop_behavior_hash, | |
196 | data_reference_p> *innermost_DR_map; | |
ee0e4f73 | 197 | |
bd6f374c | 198 | /* Hash table to store <base reference, DR> pairs. */ |
ee0e4f73 | 199 | static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map; |
200 | ||
03821886 | 201 | /* List of redundant SSA names: the first should be replaced by the second. */ |
202 | static vec< std::pair<tree, tree> > redundant_ssa_names; | |
203 | ||
fa683b76 | 204 | /* Structure used to predicate basic blocks. This is attached to the |
205 | ->aux field of the BBs in the loop to be if-converted. */ | |
04009ada | 206 | struct bb_predicate { |
fa683b76 | 207 | |
208 | /* The condition under which this basic block is executed. */ | |
209 | tree predicate; | |
210 | ||
211 | /* PREDICATE is gimplified, and the sequence of statements is | |
212 | recorded here, in order to avoid the duplication of computations | |
213 | that occur in previous conditions. See PR44483. */ | |
214 | gimple_seq predicate_gimplified_stmts; | |
04009ada | 215 | }; |
fa683b76 | 216 | |
217 | /* Returns true when the basic block BB has a predicate. */ | |
218 | ||
219 | static inline bool | |
220 | bb_has_predicate (basic_block bb) | |
221 | { | |
222 | return bb->aux != NULL; | |
223 | } | |
224 | ||
225 | /* Returns the gimplified predicate for basic block BB. */ | |
226 | ||
227 | static inline tree | |
228 | bb_predicate (basic_block bb) | |
229 | { | |
04009ada | 230 | return ((struct bb_predicate *) bb->aux)->predicate; |
fa683b76 | 231 | } |
232 | ||
233 | /* Sets the gimplified predicate COND for basic block BB. */ | |
234 | ||
235 | static inline void | |
236 | set_bb_predicate (basic_block bb, tree cond) | |
237 | { | |
56db02b2 | 238 | gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR |
239 | && is_gimple_condexpr (TREE_OPERAND (cond, 0))) | |
240 | || is_gimple_condexpr (cond)); | |
04009ada | 241 | ((struct bb_predicate *) bb->aux)->predicate = cond; |
fa683b76 | 242 | } |
243 | ||
244 | /* Returns the sequence of statements of the gimplification of the | |
245 | predicate for basic block BB. */ | |
246 | ||
247 | static inline gimple_seq | |
248 | bb_predicate_gimplified_stmts (basic_block bb) | |
249 | { | |
04009ada | 250 | return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts; |
fa683b76 | 251 | } |
252 | ||
253 | /* Sets the sequence of statements STMTS of the gimplification of the | |
254 | predicate for basic block BB. */ | |
255 | ||
256 | static inline void | |
257 | set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) | |
258 | { | |
04009ada | 259 | ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts; |
fa683b76 | 260 | } |
261 | ||
262 | /* Adds the sequence of statements STMTS to the sequence of statements | |
263 | of the predicate for basic block BB. */ | |
264 | ||
265 | static inline void | |
266 | add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) | |
267 | { | |
5cc7d4f7 | 268 | /* We might have updated some stmts in STMTS via force_gimple_operand |
269 | calling fold_stmt and that producing multiple stmts. Delink immediate | |
270 | uses so update_ssa after loop versioning doesn't get confused for | |
271 | the not yet inserted predicates. | |
272 | ??? This should go away once we reliably avoid updating stmts | |
273 | not in any BB. */ | |
274 | for (gimple_stmt_iterator gsi = gsi_start (stmts); | |
275 | !gsi_end_p (gsi); gsi_next (&gsi)) | |
276 | { | |
277 | gimple *stmt = gsi_stmt (gsi); | |
278 | delink_stmt_imm_use (stmt); | |
279 | gimple_set_modified (stmt, true); | |
280 | } | |
c3deca25 | 281 | gimple_seq_add_seq_without_update |
04009ada | 282 | (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts); |
fa683b76 | 283 | } |
284 | ||
285 | /* Initializes to TRUE the predicate of basic block BB. */ | |
286 | ||
287 | static inline void | |
288 | init_bb_predicate (basic_block bb) | |
289 | { | |
04009ada | 290 | bb->aux = XNEW (struct bb_predicate); |
fa683b76 | 291 | set_bb_predicate_gimplified_stmts (bb, NULL); |
48c9b1fe | 292 | set_bb_predicate (bb, boolean_true_node); |
fa683b76 | 293 | } |
294 | ||
d2bbbc2d | 295 | /* Release the SSA_NAMEs associated with the predicate of basic block BB. */ |
fa683b76 | 296 | |
297 | static inline void | |
c71d3c24 | 298 | release_bb_predicate (basic_block bb) |
fa683b76 | 299 | { |
c71d3c24 | 300 | gimple_seq stmts = bb_predicate_gimplified_stmts (bb); |
fa683b76 | 301 | if (stmts) |
302 | { | |
d2bbbc2d | 303 | /* Ensure that these stmts haven't yet been added to a bb. */ |
c3deca25 | 304 | if (flag_checking) |
305 | for (gimple_stmt_iterator i = gsi_start (stmts); | |
306 | !gsi_end_p (i); gsi_next (&i)) | |
d2bbbc2d | 307 | gcc_assert (! gimple_bb (gsi_stmt (i))); |
fa683b76 | 308 | |
d2bbbc2d | 309 | /* Discard them. */ |
310 | gimple_seq_discard (stmts); | |
c71d3c24 | 311 | set_bb_predicate_gimplified_stmts (bb, NULL); |
fa683b76 | 312 | } |
c71d3c24 | 313 | } |
314 | ||
315 | /* Free the predicate of basic block BB. */ | |
fa683b76 | 316 | |
c71d3c24 | 317 | static inline void |
318 | free_bb_predicate (basic_block bb) | |
319 | { | |
320 | if (!bb_has_predicate (bb)) | |
321 | return; | |
322 | ||
323 | release_bb_predicate (bb); | |
fa683b76 | 324 | free (bb->aux); |
325 | bb->aux = NULL; | |
326 | } | |
327 | ||
c71d3c24 | 328 | /* Reinitialize predicate of BB with the true predicate. */ |
48c9b1fe | 329 | |
330 | static inline void | |
331 | reset_bb_predicate (basic_block bb) | |
332 | { | |
c71d3c24 | 333 | if (!bb_has_predicate (bb)) |
334 | init_bb_predicate (bb); | |
335 | else | |
336 | { | |
337 | release_bb_predicate (bb); | |
338 | set_bb_predicate (bb, boolean_true_node); | |
339 | } | |
48c9b1fe | 340 | } |
341 | ||
3b91ccd9 | 342 | /* Returns a new SSA_NAME of type TYPE that is assigned the value of |
343 | the expression EXPR. Inserts the statement created for this | |
344 | computation before GSI and leaves the iterator GSI at the same | |
345 | statement. */ | |
07c03fb0 | 346 | |
3b91ccd9 | 347 | static tree |
348 | ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi) | |
07c03fb0 | 349 | { |
03d37e4e | 350 | tree new_name = make_temp_ssa_name (type, NULL, "_ifc_"); |
42acab1c | 351 | gimple *stmt = gimple_build_assign (new_name, expr); |
ebd01afe | 352 | gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi))); |
3b91ccd9 | 353 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
03d37e4e | 354 | return new_name; |
1055d96a | 355 | } |
356 | ||
97efb92e | 357 | /* Return true when COND is a false predicate. */ |
358 | ||
359 | static inline bool | |
360 | is_false_predicate (tree cond) | |
361 | { | |
a8876617 | 362 | return (cond != NULL_TREE |
363 | && (cond == boolean_false_node | |
364 | || integer_zerop (cond))); | |
97efb92e | 365 | } |
366 | ||
16d6ea49 | 367 | /* Return true when COND is a true predicate. */ |
368 | ||
369 | static inline bool | |
370 | is_true_predicate (tree cond) | |
371 | { | |
372 | return (cond == NULL_TREE | |
373 | || cond == boolean_true_node | |
374 | || integer_onep (cond)); | |
375 | } | |
376 | ||
377 | /* Returns true when BB has a predicate that is not trivial: true or | |
378 | NULL_TREE. */ | |
379 | ||
380 | static inline bool | |
381 | is_predicated (basic_block bb) | |
382 | { | |
fa683b76 | 383 | return !is_true_predicate (bb_predicate (bb)); |
16d6ea49 | 384 | } |
385 | ||
74a43fe9 | 386 | /* Parses the predicate COND and returns its comparison code and |
387 | operands OP0 and OP1. */ | |
388 | ||
389 | static enum tree_code | |
390 | parse_predicate (tree cond, tree *op0, tree *op1) | |
391 | { | |
42acab1c | 392 | gimple *s; |
74a43fe9 | 393 | |
394 | if (TREE_CODE (cond) == SSA_NAME | |
395 | && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond))) | |
396 | { | |
397 | if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison) | |
398 | { | |
399 | *op0 = gimple_assign_rhs1 (s); | |
400 | *op1 = gimple_assign_rhs2 (s); | |
401 | return gimple_assign_rhs_code (s); | |
402 | } | |
403 | ||
404 | else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR) | |
405 | { | |
406 | tree op = gimple_assign_rhs1 (s); | |
407 | tree type = TREE_TYPE (op); | |
408 | enum tree_code code = parse_predicate (op, op0, op1); | |
409 | ||
410 | return code == ERROR_MARK ? ERROR_MARK | |
93633022 | 411 | : invert_tree_comparison (code, HONOR_NANS (type)); |
74a43fe9 | 412 | } |
413 | ||
414 | return ERROR_MARK; | |
415 | } | |
416 | ||
21c8a0ab | 417 | if (COMPARISON_CLASS_P (cond)) |
74a43fe9 | 418 | { |
419 | *op0 = TREE_OPERAND (cond, 0); | |
420 | *op1 = TREE_OPERAND (cond, 1); | |
421 | return TREE_CODE (cond); | |
422 | } | |
423 | ||
424 | return ERROR_MARK; | |
425 | } | |
426 | ||
4be7307c | 427 | /* Returns the fold of predicate C1 OR C2 at location LOC. */ |
428 | ||
429 | static tree | |
430 | fold_or_predicates (location_t loc, tree c1, tree c2) | |
431 | { | |
432 | tree op1a, op1b, op2a, op2b; | |
433 | enum tree_code code1 = parse_predicate (c1, &op1a, &op1b); | |
434 | enum tree_code code2 = parse_predicate (c2, &op2a, &op2b); | |
435 | ||
436 | if (code1 != ERROR_MARK && code2 != ERROR_MARK) | |
437 | { | |
438 | tree t = maybe_fold_or_comparisons (code1, op1a, op1b, | |
439 | code2, op2a, op2b); | |
440 | if (t) | |
441 | return t; | |
442 | } | |
443 | ||
444 | return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2); | |
445 | } | |
446 | ||
ed0124a2 | 447 | /* Returns either a COND_EXPR or the folded expression if the folded |
448 | expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR, | |
449 | a constant or a SSA_NAME. */ | |
450 | ||
451 | static tree | |
452 | fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs) | |
453 | { | |
454 | tree rhs1, lhs1, cond_expr; | |
040b4eee | 455 | |
456 | /* If COND is comparison r != 0 and r has boolean type, convert COND | |
457 | to SSA_NAME to accept by vect bool pattern. */ | |
458 | if (TREE_CODE (cond) == NE_EXPR) | |
459 | { | |
460 | tree op0 = TREE_OPERAND (cond, 0); | |
461 | tree op1 = TREE_OPERAND (cond, 1); | |
462 | if (TREE_CODE (op0) == SSA_NAME | |
463 | && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE | |
464 | && (integer_zerop (op1))) | |
465 | cond = op0; | |
466 | } | |
00a2230f | 467 | cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs); |
ed0124a2 | 468 | |
469 | if (cond_expr == NULL_TREE) | |
470 | return build3 (COND_EXPR, type, cond, rhs, lhs); | |
471 | ||
472 | STRIP_USELESS_TYPE_CONVERSION (cond_expr); | |
473 | ||
00a2230f | 474 | if (is_gimple_val (cond_expr)) |
ed0124a2 | 475 | return cond_expr; |
476 | ||
477 | if (TREE_CODE (cond_expr) == ABS_EXPR) | |
478 | { | |
479 | rhs1 = TREE_OPERAND (cond_expr, 1); | |
480 | STRIP_USELESS_TYPE_CONVERSION (rhs1); | |
00a2230f | 481 | if (is_gimple_val (rhs1)) |
ed0124a2 | 482 | return build1 (ABS_EXPR, type, rhs1); |
483 | } | |
484 | ||
485 | if (TREE_CODE (cond_expr) == MIN_EXPR | |
486 | || TREE_CODE (cond_expr) == MAX_EXPR) | |
487 | { | |
488 | lhs1 = TREE_OPERAND (cond_expr, 0); | |
489 | STRIP_USELESS_TYPE_CONVERSION (lhs1); | |
490 | rhs1 = TREE_OPERAND (cond_expr, 1); | |
491 | STRIP_USELESS_TYPE_CONVERSION (rhs1); | |
00a2230f | 492 | if (is_gimple_val (rhs1) && is_gimple_val (lhs1)) |
ed0124a2 | 493 | return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1); |
494 | } | |
495 | return build3 (COND_EXPR, type, cond, rhs, lhs); | |
496 | } | |
497 | ||
c71d3c24 | 498 | /* Add condition NC to the predicate list of basic block BB. LOOP is |
ee080c0c | 499 | the loop to be if-converted. Use predicate of cd-equivalent block |
500 | for join bb if it exists: we call basic blocks bb1 and bb2 | |
501 | cd-equivalent if they are executed under the same condition. */ | |
1055d96a | 502 | |
16d6ea49 | 503 | static inline void |
c71d3c24 | 504 | add_to_predicate_list (struct loop *loop, basic_block bb, tree nc) |
1055d96a | 505 | { |
56db02b2 | 506 | tree bc, *tp; |
ee080c0c | 507 | basic_block dom_bb; |
74a43fe9 | 508 | |
509 | if (is_true_predicate (nc)) | |
510 | return; | |
511 | ||
ee080c0c | 512 | /* If dominance tells us this basic block is always executed, |
513 | don't record any predicates for it. */ | |
514 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
515 | return; | |
c71d3c24 | 516 | |
ee080c0c | 517 | dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); |
518 | /* We use notion of cd equivalence to get simpler predicate for | |
519 | join block, e.g. if join block has 2 predecessors with predicates | |
520 | p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of | |
521 | p1 & p2 | p1 & !p2. */ | |
522 | if (dom_bb != loop->header | |
523 | && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb) | |
524 | { | |
525 | gcc_assert (flow_bb_inside_loop_p (loop, dom_bb)); | |
526 | bc = bb_predicate (dom_bb); | |
3c7578f3 | 527 | if (!is_true_predicate (bc)) |
528 | set_bb_predicate (bb, bc); | |
529 | else | |
530 | gcc_assert (is_true_predicate (bb_predicate (bb))); | |
ee080c0c | 531 | if (dump_file && (dump_flags & TDF_DETAILS)) |
532 | fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n", | |
533 | dom_bb->index, bb->index); | |
534 | return; | |
c71d3c24 | 535 | } |
ee080c0c | 536 | |
537 | if (!is_predicated (bb)) | |
538 | bc = nc; | |
74a43fe9 | 539 | else |
540 | { | |
74a43fe9 | 541 | bc = bb_predicate (bb); |
4be7307c | 542 | bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc); |
56db02b2 | 543 | if (is_true_predicate (bc)) |
544 | { | |
545 | reset_bb_predicate (bb); | |
546 | return; | |
547 | } | |
74a43fe9 | 548 | } |
549 | ||
56db02b2 | 550 | /* Allow a TRUTH_NOT_EXPR around the main predicate. */ |
551 | if (TREE_CODE (bc) == TRUTH_NOT_EXPR) | |
552 | tp = &TREE_OPERAND (bc, 0); | |
553 | else | |
554 | tp = &bc; | |
555 | if (!is_gimple_condexpr (*tp)) | |
74a43fe9 | 556 | { |
557 | gimple_seq stmts; | |
56db02b2 | 558 | *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE); |
74a43fe9 | 559 | add_bb_predicate_gimplified_stmts (bb, stmts); |
560 | } | |
56db02b2 | 561 | set_bb_predicate (bb, bc); |
1055d96a | 562 | } |
563 | ||
60e0f7c4 | 564 | /* Add the condition COND to the previous condition PREV_COND, and add |
565 | this to the predicate list of the destination of edge E. LOOP is | |
566 | the loop to be if-converted. */ | |
1055d96a | 567 | |
16d6ea49 | 568 | static void |
1055d96a | 569 | add_to_dst_predicate_list (struct loop *loop, edge e, |
60e0f7c4 | 570 | tree prev_cond, tree cond) |
1055d96a | 571 | { |
1055d96a | 572 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
16d6ea49 | 573 | return; |
1055d96a | 574 | |
16d6ea49 | 575 | if (!is_true_predicate (prev_cond)) |
576 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, | |
577 | prev_cond, cond); | |
07c03fb0 | 578 | |
040b4eee | 579 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest)) |
580 | add_to_predicate_list (loop, e->dest, cond); | |
1055d96a | 581 | } |
58bc8309 | 582 | |
b01e3c9b | 583 | /* Return true if one of the successor edges of BB exits LOOP. */ |
07c03fb0 | 584 | |
1055d96a | 585 | static bool |
586 | bb_with_exit_edge_p (struct loop *loop, basic_block bb) | |
587 | { | |
588 | edge e; | |
589 | edge_iterator ei; | |
07c03fb0 | 590 | |
1055d96a | 591 | FOR_EACH_EDGE (e, ei, bb->succs) |
592 | if (loop_exit_edge_p (loop, e)) | |
b01e3c9b | 593 | return true; |
07c03fb0 | 594 | |
b01e3c9b | 595 | return false; |
1055d96a | 596 | } |
63a1777b | 597 | |
4bd8a059 | 598 | /* Given PHI which has more than two arguments, this function checks if |
599 | it's if-convertible by degenerating its arguments. Specifically, if | |
600 | below two conditions are satisfied: | |
601 | ||
602 | 1) Number of PHI arguments with different values equals to 2 and one | |
603 | argument has the only occurrence. | |
604 | 2) The edge corresponding to the unique argument isn't critical edge. | |
605 | ||
606 | Such PHI can be handled as PHIs have only two arguments. For example, | |
607 | below PHI: | |
608 | ||
609 | res = PHI <A_1(e1), A_1(e2), A_2(e3)>; | |
610 | ||
611 | can be transformed into: | |
612 | ||
613 | res = (predicate of e3) ? A_2 : A_1; | |
614 | ||
615 | Return TRUE if it is the case, FALSE otherwise. */ | |
616 | ||
617 | static bool | |
618 | phi_convertible_by_degenerating_args (gphi *phi) | |
619 | { | |
620 | edge e; | |
621 | tree arg, t1 = NULL, t2 = NULL; | |
622 | unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0; | |
623 | unsigned int num_args = gimple_phi_num_args (phi); | |
624 | ||
625 | gcc_assert (num_args > 2); | |
626 | ||
627 | for (i = 0; i < num_args; i++) | |
628 | { | |
629 | arg = gimple_phi_arg_def (phi, i); | |
630 | if (t1 == NULL || operand_equal_p (t1, arg, 0)) | |
631 | { | |
632 | n1++; | |
633 | i1 = i; | |
634 | t1 = arg; | |
635 | } | |
636 | else if (t2 == NULL || operand_equal_p (t2, arg, 0)) | |
637 | { | |
638 | n2++; | |
639 | i2 = i; | |
640 | t2 = arg; | |
641 | } | |
642 | else | |
643 | return false; | |
644 | } | |
645 | ||
646 | if (n1 != 1 && n2 != 1) | |
647 | return false; | |
648 | ||
649 | /* Check if the edge corresponding to the unique arg is critical. */ | |
650 | e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2); | |
651 | if (EDGE_COUNT (e->src->succs) > 1) | |
652 | return false; | |
653 | ||
654 | return true; | |
655 | } | |
656 | ||
b01e3c9b | 657 | /* Return true when PHI is if-convertible. PHI is part of loop LOOP |
9ab8df54 | 658 | and it belongs to basic block BB. Note at this point, it is sure |
659 | that PHI is if-convertible. This function updates global variable | |
660 | ANY_COMPLICATED_PHI if PHI is complicated. */ | |
07c03fb0 | 661 | |
662 | static bool | |
e6ee4c61 | 663 | if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi) |
07c03fb0 | 664 | { |
665 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
666 | { | |
667 | fprintf (dump_file, "-------------------------\n"); | |
75a70cf9 | 668 | print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); |
07c03fb0 | 669 | } |
35a40aad | 670 | |
9ab8df54 | 671 | if (bb != loop->header |
672 | && gimple_phi_num_args (phi) > 2 | |
673 | && !phi_convertible_by_degenerating_args (phi)) | |
674 | any_complicated_phi = true; | |
35a40aad | 675 | |
07c03fb0 | 676 | return true; |
677 | } | |
678 | ||
9cabbd00 | 679 | /* Records the status of a data reference. This struct is attached to |
680 | each DR->aux field. */ | |
681 | ||
682 | struct ifc_dr { | |
0308f68b | 683 | bool rw_unconditionally; |
684 | bool w_unconditionally; | |
685 | bool written_at_least_once; | |
9cabbd00 | 686 | |
0308f68b | 687 | tree rw_predicate; |
688 | tree w_predicate; | |
689 | tree base_w_predicate; | |
9cabbd00 | 690 | }; |
691 | ||
692 | #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) | |
0308f68b | 693 | #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once) |
9cabbd00 | 694 | #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) |
0308f68b | 695 | #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally) |
9cabbd00 | 696 | |
ee0e4f73 | 697 | /* Iterates over DR's and stores refs, DR and base refs, DR pairs in |
698 | HASH tables. While storing them in HASH table, it checks if the | |
699 | reference is unconditionally read or written and stores that as a flag | |
700 | information. For base reference it checks if it is written atlest once | |
701 | unconditionally and stores it as flag information along with DR. | |
702 | In other words for every data reference A in STMT there exist other | |
703 | accesses to a data reference with the same base with predicates that | |
704 | add up (OR-up) to the true predicate: this ensures that the data | |
705 | reference A is touched (read or written) on every iteration of the | |
706 | if-converted loop. */ | |
707 | static void | |
708 | hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a) | |
e1cc68bd | 709 | { |
e1cc68bd | 710 | |
ee0e4f73 | 711 | data_reference_p *master_dr, *base_master_dr; |
ee0e4f73 | 712 | tree base_ref = DR_BASE_OBJECT (a); |
bd6f374c | 713 | innermost_loop_behavior *innermost = &DR_INNERMOST (a); |
ee0e4f73 | 714 | tree ca = bb_predicate (gimple_bb (DR_STMT (a))); |
715 | bool exist1, exist2; | |
e1cc68bd | 716 | |
bd6f374c | 717 | master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1); |
ee0e4f73 | 718 | if (!exist1) |
0308f68b | 719 | *master_dr = a; |
720 | ||
721 | if (DR_IS_WRITE (a)) | |
ee0e4f73 | 722 | { |
0308f68b | 723 | IFC_DR (*master_dr)->w_predicate |
724 | = fold_or_predicates (UNKNOWN_LOCATION, ca, | |
725 | IFC_DR (*master_dr)->w_predicate); | |
726 | if (is_true_predicate (IFC_DR (*master_dr)->w_predicate)) | |
727 | DR_W_UNCONDITIONALLY (*master_dr) = true; | |
ee0e4f73 | 728 | } |
0308f68b | 729 | IFC_DR (*master_dr)->rw_predicate |
730 | = fold_or_predicates (UNKNOWN_LOCATION, ca, | |
731 | IFC_DR (*master_dr)->rw_predicate); | |
732 | if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate)) | |
733 | DR_RW_UNCONDITIONALLY (*master_dr) = true; | |
e1cc68bd | 734 | |
9d1206d5 | 735 | if (DR_IS_WRITE (a)) |
ee0e4f73 | 736 | { |
9d1206d5 | 737 | base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2); |
9d1206d5 | 738 | if (!exist2) |
0308f68b | 739 | *base_master_dr = a; |
740 | IFC_DR (*base_master_dr)->base_w_predicate | |
741 | = fold_or_predicates (UNKNOWN_LOCATION, ca, | |
742 | IFC_DR (*base_master_dr)->base_w_predicate); | |
743 | if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate)) | |
744 | DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true; | |
9d1206d5 | 745 | } |
e1cc68bd | 746 | } |
747 | ||
189d0706 | 748 | /* Return TRUE if can prove the index IDX of an array reference REF is |
749 | within array bound. Return false otherwise. */ | |
750 | ||
751 | static bool | |
752 | idx_within_array_bound (tree ref, tree *idx, void *dta) | |
753 | { | |
30b5769f | 754 | wi::overflow_type overflow; |
189d0706 | 755 | widest_int niter, valid_niter, delta, wi_step; |
756 | tree ev, init, step; | |
757 | tree low, high; | |
758 | struct loop *loop = (struct loop*) dta; | |
759 | ||
760 | /* Only support within-bound access for array references. */ | |
761 | if (TREE_CODE (ref) != ARRAY_REF) | |
762 | return false; | |
763 | ||
764 | /* For arrays at the end of the structure, we are not guaranteed that they | |
765 | do not really extend over their declared size. However, for arrays of | |
766 | size greater than one, this is unlikely to be intended. */ | |
767 | if (array_at_struct_end_p (ref)) | |
768 | return false; | |
769 | ||
770 | ev = analyze_scalar_evolution (loop, *idx); | |
771 | ev = instantiate_parameters (loop, ev); | |
772 | init = initial_condition (ev); | |
773 | step = evolution_part_in_loop_num (ev, loop->num); | |
774 | ||
775 | if (!init || TREE_CODE (init) != INTEGER_CST | |
776 | || (step && TREE_CODE (step) != INTEGER_CST)) | |
777 | return false; | |
778 | ||
779 | low = array_ref_low_bound (ref); | |
780 | high = array_ref_up_bound (ref); | |
781 | ||
782 | /* The case of nonconstant bounds could be handled, but it would be | |
783 | complicated. */ | |
784 | if (TREE_CODE (low) != INTEGER_CST | |
785 | || !high || TREE_CODE (high) != INTEGER_CST) | |
786 | return false; | |
787 | ||
788 | /* Check if the intial idx is within bound. */ | |
789 | if (wi::to_widest (init) < wi::to_widest (low) | |
790 | || wi::to_widest (init) > wi::to_widest (high)) | |
791 | return false; | |
792 | ||
793 | /* The idx is always within bound. */ | |
794 | if (!step || integer_zerop (step)) | |
795 | return true; | |
796 | ||
797 | if (!max_loop_iterations (loop, &niter)) | |
798 | return false; | |
799 | ||
800 | if (wi::to_widest (step) < 0) | |
801 | { | |
802 | delta = wi::to_widest (init) - wi::to_widest (low); | |
803 | wi_step = -wi::to_widest (step); | |
804 | } | |
805 | else | |
806 | { | |
807 | delta = wi::to_widest (high) - wi::to_widest (init); | |
808 | wi_step = wi::to_widest (step); | |
809 | } | |
810 | ||
811 | valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow); | |
812 | /* The iteration space of idx is within array bound. */ | |
813 | if (!overflow && niter <= valid_niter) | |
814 | return true; | |
815 | ||
816 | return false; | |
817 | } | |
818 | ||
819 | /* Return TRUE if ref is a within bound array reference. */ | |
820 | ||
821 | static bool | |
822 | ref_within_array_bound (gimple *stmt, tree ref) | |
823 | { | |
824 | struct loop *loop = loop_containing_stmt (stmt); | |
825 | ||
826 | gcc_assert (loop != NULL); | |
827 | return for_each_index (&ref, idx_within_array_bound, loop); | |
828 | } | |
829 | ||
830 | ||
831 | /* Given a memory reference expression T, return TRUE if base object | |
832 | it refers to is writable. The base object of a memory reference | |
833 | is the main object being referenced, which is returned by function | |
834 | get_base_address. */ | |
835 | ||
836 | static bool | |
837 | base_object_writable (tree ref) | |
838 | { | |
839 | tree base_tree = get_base_address (ref); | |
840 | ||
841 | return (base_tree | |
842 | && DECL_P (base_tree) | |
843 | && decl_binds_to_current_def_p (base_tree) | |
844 | && !TREE_READONLY (base_tree)); | |
845 | } | |
846 | ||
e1cc68bd | 847 | /* Return true when the memory references of STMT won't trap in the |
848 | if-converted code. There are two things that we have to check for: | |
849 | ||
850 | - writes to memory occur to writable memory: if-conversion of | |
851 | memory writes transforms the conditional memory writes into | |
852 | unconditional writes, i.e. "if (cond) A[i] = foo" is transformed | |
853 | into "A[i] = cond ? foo : A[i]", and as the write to memory may not | |
854 | be executed at all in the original code, it may be a readonly | |
855 | memory. To check that A is not const-qualified, we check that | |
856 | there exists at least an unconditional write to A in the current | |
857 | function. | |
858 | ||
859 | - reads or writes to memory are valid memory accesses for every | |
860 | iteration. To check that the memory accesses are correctly formed | |
861 | and that we are allowed to read and write in these locations, we | |
862 | check that the memory accesses to be if-converted occur at every | |
ee0e4f73 | 863 | iteration unconditionally. |
864 | ||
865 | Returns true for the memory reference in STMT, same memory reference | |
866 | is read or written unconditionally atleast once and the base memory | |
867 | reference is written unconditionally once. This is to check reference | |
868 | will not write fault. Also retuns true if the memory reference is | |
869 | unconditionally read once then we are conditionally writing to memory | |
870 | which is defined as read and write and is bound to the definition | |
871 | we are seeing. */ | |
e1cc68bd | 872 | static bool |
ee0e4f73 | 873 | ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs) |
e1cc68bd | 874 | { |
dbc55555 | 875 | /* If DR didn't see a reference here we can't use it to tell |
876 | whether the ref traps or not. */ | |
877 | if (gimple_uid (stmt) == 0) | |
878 | return false; | |
879 | ||
ee0e4f73 | 880 | data_reference_p *master_dr, *base_master_dr; |
881 | data_reference_p a = drs[gimple_uid (stmt) - 1]; | |
882 | ||
ee0e4f73 | 883 | tree base = DR_BASE_OBJECT (a); |
bd6f374c | 884 | innermost_loop_behavior *innermost = &DR_INNERMOST (a); |
ee0e4f73 | 885 | |
886 | gcc_assert (DR_STMT (a) == stmt); | |
bd6f374c | 887 | gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a) |
888 | || DR_INIT (a) || DR_STEP (a)); | |
ee0e4f73 | 889 | |
bd6f374c | 890 | master_dr = innermost_DR_map->get (innermost); |
891 | gcc_assert (master_dr != NULL); | |
ee0e4f73 | 892 | |
ee0e4f73 | 893 | base_master_dr = baseref_DR_map->get (base); |
894 | ||
0308f68b | 895 | /* If a is unconditionally written to it doesn't trap. */ |
896 | if (DR_W_UNCONDITIONALLY (*master_dr)) | |
897 | return true; | |
898 | ||
189d0706 | 899 | /* If a is unconditionally accessed then ... |
900 | ||
901 | Even a is conditional access, we can treat it as an unconditional | |
902 | one if it's an array reference and all its index are within array | |
903 | bound. */ | |
904 | if (DR_RW_UNCONDITIONALLY (*master_dr) | |
905 | || ref_within_array_bound (stmt, DR_REF (a))) | |
ee0e4f73 | 906 | { |
0308f68b | 907 | /* an unconditional read won't trap. */ |
908 | if (DR_IS_READ (a)) | |
909 | return true; | |
910 | ||
911 | /* an unconditionaly write won't trap if the base is written | |
912 | to unconditionally. */ | |
9d1206d5 | 913 | if (base_master_dr |
0308f68b | 914 | && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) |
d2276060 | 915 | return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); |
189d0706 | 916 | /* or the base is known to be not readonly. */ |
917 | else if (base_object_writable (DR_REF (a))) | |
918 | return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); | |
ee0e4f73 | 919 | } |
189d0706 | 920 | |
ee0e4f73 | 921 | return false; |
e1cc68bd | 922 | } |
923 | ||
c71d3c24 | 924 | /* Return true if STMT could be converted into a masked load or store |
925 | (conditional load or store based on a mask computed from bb predicate). */ | |
926 | ||
927 | static bool | |
42acab1c | 928 | ifcvt_can_use_mask_load_store (gimple *stmt) |
c71d3c24 | 929 | { |
c71d3c24 | 930 | /* Check whether this is a load or store. */ |
03821886 | 931 | tree lhs = gimple_assign_lhs (stmt); |
932 | bool is_load; | |
933 | tree ref; | |
c71d3c24 | 934 | if (gimple_store_p (stmt)) |
935 | { | |
936 | if (!is_gimple_val (gimple_assign_rhs1 (stmt))) | |
937 | return false; | |
938 | is_load = false; | |
939 | ref = lhs; | |
940 | } | |
941 | else if (gimple_assign_load_p (stmt)) | |
942 | { | |
943 | is_load = true; | |
944 | ref = gimple_assign_rhs1 (stmt); | |
945 | } | |
946 | else | |
947 | return false; | |
948 | ||
949 | if (may_be_nonaddressable_p (ref)) | |
950 | return false; | |
951 | ||
952 | /* Mask should be integer mode of the same size as the load/store | |
953 | mode. */ | |
03821886 | 954 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
2cf1bb25 | 955 | if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode)) |
c71d3c24 | 956 | return false; |
957 | ||
f636f094 | 958 | if (can_vec_mask_load_store_p (mode, VOIDmode, is_load)) |
c71d3c24 | 959 | return true; |
960 | ||
961 | return false; | |
962 | } | |
963 | ||
03821886 | 964 | /* Return true if STMT could be converted from an operation that is |
965 | unconditional to one that is conditional on a bb predicate mask. */ | |
966 | ||
967 | static bool | |
968 | ifcvt_can_predicate (gimple *stmt) | |
969 | { | |
970 | basic_block bb = gimple_bb (stmt); | |
971 | ||
972 | if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize) | |
973 | || bb->loop_father->dont_vectorize | |
974 | || gimple_has_volatile_ops (stmt)) | |
975 | return false; | |
976 | ||
977 | if (gimple_assign_single_p (stmt)) | |
978 | return ifcvt_can_use_mask_load_store (stmt); | |
979 | ||
980 | tree_code code = gimple_assign_rhs_code (stmt); | |
981 | tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
982 | tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
983 | if (!types_compatible_p (lhs_type, rhs_type)) | |
984 | return false; | |
985 | internal_fn cond_fn = get_conditional_internal_fn (code); | |
986 | return (cond_fn != IFN_LAST | |
987 | && vectorized_internal_fn_supported_p (cond_fn, lhs_type)); | |
988 | } | |
989 | ||
b01e3c9b | 990 | /* Return true when STMT is if-convertible. |
991 | ||
75a70cf9 | 992 | GIMPLE_ASSIGN statement is not if-convertible if, |
a1660ced | 993 | - it is not movable, |
994 | - it could trap, | |
3b91ccd9 | 995 | - LHS is not var decl. */ |
07c03fb0 | 996 | |
997 | static bool | |
42acab1c | 998 | if_convertible_gimple_assign_stmt_p (gimple *stmt, |
db5c1c9a | 999 | vec<data_reference_p> refs) |
07c03fb0 | 1000 | { |
b01e3c9b | 1001 | tree lhs = gimple_assign_lhs (stmt); |
73772494 | 1002 | |
07c03fb0 | 1003 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1004 | { | |
1005 | fprintf (dump_file, "-------------------------\n"); | |
75a70cf9 | 1006 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
07c03fb0 | 1007 | } |
35a40aad | 1008 | |
3b91ccd9 | 1009 | if (!is_gimple_reg_type (TREE_TYPE (lhs))) |
1010 | return false; | |
1011 | ||
73772494 | 1012 | /* Some of these constrains might be too conservative. */ |
75a70cf9 | 1013 | if (stmt_ends_bb_p (stmt) |
1014 | || gimple_has_volatile_ops (stmt) | |
73772494 | 1015 | || (TREE_CODE (lhs) == SSA_NAME |
1016 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) | |
75a70cf9 | 1017 | || gimple_has_side_effects (stmt)) |
07c03fb0 | 1018 | { |
1019 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
73772494 | 1020 | fprintf (dump_file, "stmt not suitable for ifcvt\n"); |
07c03fb0 | 1021 | return false; |
1022 | } | |
1023 | ||
c71d3c24 | 1024 | /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because |
1025 | in between if_convertible_loop_p and combine_blocks | |
1026 | we can perform loop versioning. */ | |
1027 | gimple_set_plf (stmt, GF_PLF_2, false); | |
1028 | ||
0308f68b | 1029 | if ((! gimple_vuse (stmt) |
1030 | || gimple_could_trap_p_1 (stmt, false, false) | |
1031 | || ! ifcvt_memrefs_wont_trap (stmt, refs)) | |
1032 | && gimple_could_trap_p (stmt)) | |
07c03fb0 | 1033 | { |
03821886 | 1034 | if (ifcvt_can_predicate (stmt)) |
c71d3c24 | 1035 | { |
1036 | gimple_set_plf (stmt, GF_PLF_2, true); | |
03821886 | 1037 | need_to_predicate = true; |
c71d3c24 | 1038 | return true; |
1039 | } | |
07c03fb0 | 1040 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1041 | fprintf (dump_file, "tree could trap...\n"); | |
1042 | return false; | |
1043 | } | |
1044 | ||
f67e390a | 1045 | /* When if-converting stores force versioning, likewise if we |
1046 | ended up generating store data races. */ | |
1047 | if (gimple_vdef (stmt)) | |
03821886 | 1048 | need_to_predicate = true; |
07c03fb0 | 1049 | |
07c03fb0 | 1050 | return true; |
1051 | } | |
1052 | ||
b01e3c9b | 1053 | /* Return true when STMT is if-convertible. |
1054 | ||
1055 | A statement is if-convertible if: | |
c2c4377d | 1056 | - it is an if-convertible GIMPLE_ASSIGN, |
040b4eee | 1057 | - it is a GIMPLE_LABEL or a GIMPLE_COND, |
1058 | - it is builtins call. */ | |
07c03fb0 | 1059 | |
1060 | static bool | |
db5c1c9a | 1061 | if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs) |
07c03fb0 | 1062 | { |
75a70cf9 | 1063 | switch (gimple_code (stmt)) |
07c03fb0 | 1064 | { |
75a70cf9 | 1065 | case GIMPLE_LABEL: |
9845d120 | 1066 | case GIMPLE_DEBUG: |
b01e3c9b | 1067 | case GIMPLE_COND: |
1068 | return true; | |
35a40aad | 1069 | |
9845d120 | 1070 | case GIMPLE_ASSIGN: |
db5c1c9a | 1071 | return if_convertible_gimple_assign_stmt_p (stmt, refs); |
35a40aad | 1072 | |
2cac353d | 1073 | case GIMPLE_CALL: |
1074 | { | |
1075 | tree fndecl = gimple_call_fndecl (stmt); | |
1076 | if (fndecl) | |
1077 | { | |
1078 | int flags = gimple_call_flags (stmt); | |
1079 | if ((flags & ECF_CONST) | |
1080 | && !(flags & ECF_LOOPING_CONST_OR_PURE) | |
1081 | /* We can only vectorize some builtins at the moment, | |
1082 | so restrict if-conversion to those. */ | |
a0e9bfbb | 1083 | && fndecl_built_in_p (fndecl)) |
2cac353d | 1084 | return true; |
1085 | } | |
1086 | return false; | |
1087 | } | |
1088 | ||
07c03fb0 | 1089 | default: |
1090 | /* Don't know what to do with 'em so don't do anything. */ | |
1091 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1092 | { | |
1093 | fprintf (dump_file, "don't know what to do\n"); | |
75a70cf9 | 1094 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
07c03fb0 | 1095 | } |
1096 | return false; | |
07c03fb0 | 1097 | } |
1098 | ||
1099 | return true; | |
1100 | } | |
1101 | ||
040b4eee | 1102 | /* Assumes that BB has more than 1 predecessors. |
1103 | Returns false if at least one successor is not on critical edge | |
1104 | and true otherwise. */ | |
1105 | ||
1106 | static inline bool | |
1107 | all_preds_critical_p (basic_block bb) | |
1108 | { | |
1109 | edge e; | |
1110 | edge_iterator ei; | |
1111 | ||
1112 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1113 | if (EDGE_COUNT (e->src->succs) == 1) | |
1114 | return false; | |
1115 | return true; | |
1116 | } | |
1117 | ||
b01e3c9b | 1118 | /* Return true when BB is if-convertible. This routine does not check |
1119 | basic block's statements and phis. | |
1120 | ||
1121 | A basic block is not if-convertible if: | |
1122 | - it is non-empty and it is after the exit block (in BFS order), | |
1123 | - it is after the exit block but before the latch, | |
1124 | - its edges are not normal. | |
1125 | ||
1126 | EXIT_BB is the basic block containing the exit of the LOOP. BB is | |
1127 | inside LOOP. */ | |
07c03fb0 | 1128 | |
35a40aad | 1129 | static bool |
ea3f88a7 | 1130 | if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb) |
07c03fb0 | 1131 | { |
1132 | edge e; | |
cd665a06 | 1133 | edge_iterator ei; |
07c03fb0 | 1134 | |
1135 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1136 | fprintf (dump_file, "----------[%d]-------------\n", bb->index); | |
35a40aad | 1137 | |
040b4eee | 1138 | if (EDGE_COUNT (bb->succs) > 2) |
1139 | return false; | |
1140 | ||
ea3f88a7 | 1141 | if (exit_bb) |
07c03fb0 | 1142 | { |
1143 | if (bb != loop->latch) | |
1144 | { | |
1145 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1146 | fprintf (dump_file, "basic block after exit bb but before latch\n"); | |
1147 | return false; | |
1148 | } | |
1149 | else if (!empty_block_p (bb)) | |
1150 | { | |
1055d96a | 1151 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1152 | fprintf (dump_file, "non empty basic block after exit bb\n"); | |
1153 | return false; | |
1154 | } | |
1155 | else if (bb == loop->latch | |
1156 | && bb != exit_bb | |
1157 | && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb)) | |
1158 | { | |
1159 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1160 | fprintf (dump_file, "latch is not dominated by exit_block\n"); | |
1161 | return false; | |
1162 | } | |
1163 | } | |
1164 | ||
1165 | /* Be less adventurous and handle only normal edges. */ | |
1166 | FOR_EACH_EDGE (e, ei, bb->succs) | |
5147ec07 | 1167 | if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP)) |
1055d96a | 1168 | { |
1169 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b01e3c9b | 1170 | fprintf (dump_file, "Difficult to handle edges\n"); |
1055d96a | 1171 | return false; |
1172 | } | |
1173 | ||
1174 | return true; | |
1175 | } | |
1176 | ||
b01e3c9b | 1177 | /* Return true when all predecessor blocks of BB are visited. The |
1178 | VISITED bitmap keeps track of the visited blocks. */ | |
1055d96a | 1179 | |
1180 | static bool | |
1181 | pred_blocks_visited_p (basic_block bb, bitmap *visited) | |
1182 | { | |
1183 | edge e; | |
1184 | edge_iterator ei; | |
1185 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1186 | if (!bitmap_bit_p (*visited, e->src->index)) | |
1187 | return false; | |
1188 | ||
1189 | return true; | |
1190 | } | |
1191 | ||
1192 | /* Get body of a LOOP in suitable order for if-conversion. It is | |
1193 | caller's responsibility to deallocate basic block list. | |
1194 | If-conversion suitable order is, breadth first sort (BFS) order | |
1195 | with an additional constraint: select a block only if all its | |
1196 | predecessors are already selected. */ | |
1197 | ||
1198 | static basic_block * | |
1199 | get_loop_body_in_if_conv_order (const struct loop *loop) | |
1200 | { | |
1201 | basic_block *blocks, *blocks_in_bfs_order; | |
1202 | basic_block bb; | |
1203 | bitmap visited; | |
1204 | unsigned int index = 0; | |
1205 | unsigned int visited_count = 0; | |
1206 | ||
1207 | gcc_assert (loop->num_nodes); | |
34154e27 | 1208 | gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)); |
1055d96a | 1209 | |
1210 | blocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1211 | visited = BITMAP_ALLOC (NULL); | |
1212 | ||
1213 | blocks_in_bfs_order = get_loop_body_in_bfs_order (loop); | |
1214 | ||
1215 | index = 0; | |
1216 | while (index < loop->num_nodes) | |
1217 | { | |
1218 | bb = blocks_in_bfs_order [index]; | |
1219 | ||
1220 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
1221 | { | |
1222 | free (blocks_in_bfs_order); | |
1223 | BITMAP_FREE (visited); | |
1224 | free (blocks); | |
1225 | return NULL; | |
1226 | } | |
1227 | ||
1228 | if (!bitmap_bit_p (visited, bb->index)) | |
1229 | { | |
1230 | if (pred_blocks_visited_p (bb, &visited) | |
1231 | || bb == loop->header) | |
1232 | { | |
1233 | /* This block is now visited. */ | |
1234 | bitmap_set_bit (visited, bb->index); | |
1235 | blocks[visited_count++] = bb; | |
1236 | } | |
07c03fb0 | 1237 | } |
35a40aad | 1238 | |
1055d96a | 1239 | index++; |
07c03fb0 | 1240 | |
1055d96a | 1241 | if (index == loop->num_nodes |
1242 | && visited_count != loop->num_nodes) | |
1243 | /* Not done yet. */ | |
1244 | index = 0; | |
1245 | } | |
1246 | free (blocks_in_bfs_order); | |
1247 | BITMAP_FREE (visited); | |
1248 | return blocks; | |
07c03fb0 | 1249 | } |
1250 | ||
60e0f7c4 | 1251 | /* Returns true when the analysis of the predicates for all the basic |
1252 | blocks in LOOP succeeded. | |
1253 | ||
fa683b76 | 1254 | predicate_bbs first allocates the predicates of the basic blocks. |
c814c39c | 1255 | These fields are then initialized with the tree expressions |
1256 | representing the predicates under which a basic block is executed | |
1257 | in the LOOP. As the loop->header is executed at each iteration, it | |
1258 | has the "true" predicate. Other statements executed under a | |
1259 | condition are predicated with that condition, for example | |
60e0f7c4 | 1260 | |
1261 | | if (x) | |
1262 | | S1; | |
1263 | | else | |
1264 | | S2; | |
1265 | ||
5f7a9884 | 1266 | S1 will be predicated with "x", and |
1267 | S2 will be predicated with "!x". */ | |
60e0f7c4 | 1268 | |
c71d3c24 | 1269 | static void |
60e0f7c4 | 1270 | predicate_bbs (loop_p loop) |
1271 | { | |
1272 | unsigned int i; | |
1273 | ||
1274 | for (i = 0; i < loop->num_nodes; i++) | |
fa683b76 | 1275 | init_bb_predicate (ifc_bbs[i]); |
60e0f7c4 | 1276 | |
1277 | for (i = 0; i < loop->num_nodes; i++) | |
1278 | { | |
fa683b76 | 1279 | basic_block bb = ifc_bbs[i]; |
1280 | tree cond; | |
42acab1c | 1281 | gimple *stmt; |
60e0f7c4 | 1282 | |
040b4eee | 1283 | /* The loop latch and loop exit block are always executed and |
1284 | have no extra conditions to be processed: skip them. */ | |
1285 | if (bb == loop->latch | |
1286 | || bb_with_exit_edge_p (loop, bb)) | |
fa683b76 | 1287 | { |
040b4eee | 1288 | reset_bb_predicate (bb); |
fa683b76 | 1289 | continue; |
1290 | } | |
1291 | ||
1292 | cond = bb_predicate (bb); | |
c71d3c24 | 1293 | stmt = last_stmt (bb); |
1294 | if (stmt && gimple_code (stmt) == GIMPLE_COND) | |
60e0f7c4 | 1295 | { |
c71d3c24 | 1296 | tree c2; |
1297 | edge true_edge, false_edge; | |
1298 | location_t loc = gimple_location (stmt); | |
040b4eee | 1299 | tree c = build2_loc (loc, gimple_cond_code (stmt), |
c71d3c24 | 1300 | boolean_type_node, |
1301 | gimple_cond_lhs (stmt), | |
1302 | gimple_cond_rhs (stmt)); | |
1303 | ||
1304 | /* Add new condition into destination's predicate list. */ | |
1305 | extract_true_false_edges_from_block (gimple_bb (stmt), | |
1306 | &true_edge, &false_edge); | |
1307 | ||
1308 | /* If C is true, then TRUE_EDGE is taken. */ | |
1309 | add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond), | |
1310 | unshare_expr (c)); | |
1311 | ||
1312 | /* If C is false, then FALSE_EDGE is taken. */ | |
1313 | c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, | |
1314 | unshare_expr (c)); | |
1315 | add_to_dst_predicate_list (loop, false_edge, | |
1316 | unshare_expr (cond), c2); | |
1317 | ||
1318 | cond = NULL_TREE; | |
60e0f7c4 | 1319 | } |
1320 | ||
1321 | /* If current bb has only one successor, then consider it as an | |
1322 | unconditional goto. */ | |
1323 | if (single_succ_p (bb)) | |
1324 | { | |
1325 | basic_block bb_n = single_succ (bb); | |
1326 | ||
1327 | /* The successor bb inherits the predicate of its | |
1328 | predecessor. If there is no predicate in the predecessor | |
1329 | bb, then consider the successor bb as always executed. */ | |
1330 | if (cond == NULL_TREE) | |
1331 | cond = boolean_true_node; | |
1332 | ||
c71d3c24 | 1333 | add_to_predicate_list (loop, bb_n, cond); |
60e0f7c4 | 1334 | } |
1335 | } | |
1336 | ||
1337 | /* The loop header is always executed. */ | |
48c9b1fe | 1338 | reset_bb_predicate (loop->header); |
fa683b76 | 1339 | gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL |
1340 | && bb_predicate_gimplified_stmts (loop->latch) == NULL); | |
60e0f7c4 | 1341 | } |
1342 | ||
84769861 | 1343 | /* Build region by adding loop pre-header and post-header blocks. */ |
1344 | ||
1345 | static vec<basic_block> | |
1346 | build_region (struct loop *loop) | |
1347 | { | |
1348 | vec<basic_block> region = vNULL; | |
1349 | basic_block exit_bb = NULL; | |
1350 | ||
1351 | gcc_assert (ifc_bbs); | |
1352 | /* The first element is loop pre-header. */ | |
1353 | region.safe_push (loop_preheader_edge (loop)->src); | |
1354 | ||
1355 | for (unsigned int i = 0; i < loop->num_nodes; i++) | |
1356 | { | |
1357 | basic_block bb = ifc_bbs[i]; | |
1358 | region.safe_push (bb); | |
1359 | /* Find loop postheader. */ | |
1360 | edge e; | |
1361 | edge_iterator ei; | |
1362 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1363 | if (loop_exit_edge_p (loop, e)) | |
1364 | { | |
1365 | exit_bb = e->dest; | |
1366 | break; | |
1367 | } | |
1368 | } | |
1369 | /* The last element is loop post-header. */ | |
1370 | gcc_assert (exit_bb); | |
1371 | region.safe_push (exit_bb); | |
1372 | return region; | |
1373 | } | |
1374 | ||
e1cc68bd | 1375 | /* Return true when LOOP is if-convertible. This is a helper function |
1376 | for if_convertible_loop_p. REFS and DDRS are initialized and freed | |
1377 | in if_convertible_loop_p. */ | |
07c03fb0 | 1378 | |
1379 | static bool | |
db5c1c9a | 1380 | if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs) |
07c03fb0 | 1381 | { |
07c03fb0 | 1382 | unsigned int i; |
ea3f88a7 | 1383 | basic_block exit_bb = NULL; |
84769861 | 1384 | vec<basic_block> region; |
07c03fb0 | 1385 | |
0d4f3f8c | 1386 | if (find_data_references_in_loop (loop, refs) == chrec_dont_know) |
e1cc68bd | 1387 | return false; |
fa6e55f9 | 1388 | |
07c03fb0 | 1389 | calculate_dominance_info (CDI_DOMINATORS); |
07c03fb0 | 1390 | |
1391 | /* Allow statements that can be handled during if-conversion. */ | |
1392 | ifc_bbs = get_loop_body_in_if_conv_order (loop); | |
1393 | if (!ifc_bbs) | |
1394 | { | |
1395 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
d11c70fa | 1396 | fprintf (dump_file, "Irreducible loop\n"); |
07c03fb0 | 1397 | return false; |
1398 | } | |
35a40aad | 1399 | |
07c03fb0 | 1400 | for (i = 0; i < loop->num_nodes; i++) |
1401 | { | |
60e0f7c4 | 1402 | basic_block bb = ifc_bbs[i]; |
07c03fb0 | 1403 | |
ea3f88a7 | 1404 | if (!if_convertible_bb_p (loop, bb, exit_bb)) |
07c03fb0 | 1405 | return false; |
1406 | ||
60e0f7c4 | 1407 | if (bb_with_exit_edge_p (loop, bb)) |
1408 | exit_bb = bb; | |
1409 | } | |
1410 | ||
c71d3c24 | 1411 | for (i = 0; i < loop->num_nodes; i++) |
1412 | { | |
1413 | basic_block bb = ifc_bbs[i]; | |
1414 | gimple_stmt_iterator gsi; | |
1415 | ||
1416 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1417 | switch (gimple_code (gsi_stmt (gsi))) | |
1418 | { | |
1419 | case GIMPLE_LABEL: | |
1420 | case GIMPLE_ASSIGN: | |
1421 | case GIMPLE_CALL: | |
1422 | case GIMPLE_DEBUG: | |
1423 | case GIMPLE_COND: | |
ee0e4f73 | 1424 | gimple_set_uid (gsi_stmt (gsi), 0); |
c71d3c24 | 1425 | break; |
1426 | default: | |
1427 | return false; | |
1428 | } | |
1429 | } | |
60e0f7c4 | 1430 | |
9cda83ad | 1431 | data_reference_p dr; |
9cabbd00 | 1432 | |
bd6f374c | 1433 | innermost_DR_map |
1434 | = new hash_map<innermost_loop_behavior_hash, data_reference_p>; | |
ee0e4f73 | 1435 | baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; |
1436 | ||
84769861 | 1437 | /* Compute post-dominator tree locally. */ |
1438 | region = build_region (loop); | |
1439 | calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region); | |
1440 | ||
ee0e4f73 | 1441 | predicate_bbs (loop); |
1442 | ||
84769861 | 1443 | /* Free post-dominator tree since it is not used after predication. */ |
1444 | free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region); | |
1445 | region.release (); | |
1446 | ||
9cda83ad | 1447 | for (i = 0; refs->iterate (i, &dr); i++) |
1448 | { | |
bd6f374c | 1449 | tree ref = DR_REF (dr); |
1450 | ||
9cda83ad | 1451 | dr->aux = XNEW (struct ifc_dr); |
0308f68b | 1452 | DR_BASE_W_UNCONDITIONALLY (dr) = false; |
1453 | DR_RW_UNCONDITIONALLY (dr) = false; | |
1454 | DR_W_UNCONDITIONALLY (dr) = false; | |
1455 | IFC_DR (dr)->rw_predicate = boolean_false_node; | |
1456 | IFC_DR (dr)->w_predicate = boolean_false_node; | |
1457 | IFC_DR (dr)->base_w_predicate = boolean_false_node; | |
ee0e4f73 | 1458 | if (gimple_uid (DR_STMT (dr)) == 0) |
1459 | gimple_set_uid (DR_STMT (dr), i + 1); | |
bd6f374c | 1460 | |
1461 | /* If DR doesn't have innermost loop behavior or it's a compound | |
1462 | memory reference, we synthesize its innermost loop behavior | |
1463 | for hashing. */ | |
1464 | if (TREE_CODE (ref) == COMPONENT_REF | |
1465 | || TREE_CODE (ref) == IMAGPART_EXPR | |
1466 | || TREE_CODE (ref) == REALPART_EXPR | |
1467 | || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr) | |
1468 | || DR_INIT (dr) || DR_STEP (dr))) | |
1469 | { | |
1470 | while (TREE_CODE (ref) == COMPONENT_REF | |
1471 | || TREE_CODE (ref) == IMAGPART_EXPR | |
1472 | || TREE_CODE (ref) == REALPART_EXPR) | |
1473 | ref = TREE_OPERAND (ref, 0); | |
1474 | ||
a7e05ef2 | 1475 | memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr))); |
1476 | DR_BASE_ADDRESS (dr) = ref; | |
bd6f374c | 1477 | } |
ee0e4f73 | 1478 | hash_memrefs_baserefs_and_store_DRs_read_written_info (dr); |
9cabbd00 | 1479 | } |
1480 | ||
60e0f7c4 | 1481 | for (i = 0; i < loop->num_nodes; i++) |
1482 | { | |
1483 | basic_block bb = ifc_bbs[i]; | |
1484 | gimple_stmt_iterator itr; | |
1485 | ||
e1cc68bd | 1486 | /* Check the if-convertibility of statements in predicated BBs. */ |
c71d3c24 | 1487 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
e1cc68bd | 1488 | for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr)) |
db5c1c9a | 1489 | if (!if_convertible_stmt_p (gsi_stmt (itr), *refs)) |
e1cc68bd | 1490 | return false; |
07c03fb0 | 1491 | } |
1492 | ||
c71d3c24 | 1493 | /* Checking PHIs needs to be done after stmts, as the fact whether there |
1494 | are any masked loads or stores affects the tests. */ | |
1495 | for (i = 0; i < loop->num_nodes; i++) | |
1496 | { | |
1497 | basic_block bb = ifc_bbs[i]; | |
1a91d914 | 1498 | gphi_iterator itr; |
c71d3c24 | 1499 | |
1500 | for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr)) | |
e6ee4c61 | 1501 | if (!if_convertible_phi_p (loop, bb, itr.phi ())) |
c71d3c24 | 1502 | return false; |
1503 | } | |
1504 | ||
07c03fb0 | 1505 | if (dump_file) |
d11c70fa | 1506 | fprintf (dump_file, "Applying if-conversion\n"); |
07c03fb0 | 1507 | |
07c03fb0 | 1508 | return true; |
1509 | } | |
1510 | ||
e1cc68bd | 1511 | /* Return true when LOOP is if-convertible. |
1512 | LOOP is if-convertible if: | |
1513 | - it is innermost, | |
1514 | - it has two or more basic blocks, | |
1515 | - it has only one exit, | |
1516 | - loop header is not the exit edge, | |
1517 | - if its basic blocks and phi nodes are if convertible. */ | |
1518 | ||
1519 | static bool | |
db5c1c9a | 1520 | if_convertible_loop_p (struct loop *loop) |
e1cc68bd | 1521 | { |
1522 | edge e; | |
1523 | edge_iterator ei; | |
1524 | bool res = false; | |
f1f41a6c | 1525 | vec<data_reference_p> refs; |
e1cc68bd | 1526 | |
1527 | /* Handle only innermost loop. */ | |
1528 | if (!loop || loop->inner) | |
1529 | { | |
1530 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1531 | fprintf (dump_file, "not innermost loop\n"); | |
1532 | return false; | |
1533 | } | |
1534 | ||
1535 | /* If only one block, no need for if-conversion. */ | |
1536 | if (loop->num_nodes <= 2) | |
1537 | { | |
1538 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1539 | fprintf (dump_file, "less than 2 basic blocks\n"); | |
1540 | return false; | |
1541 | } | |
1542 | ||
1543 | /* More than one loop exit is too much to handle. */ | |
1544 | if (!single_exit (loop)) | |
1545 | { | |
1546 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1547 | fprintf (dump_file, "multiple exits\n"); | |
1548 | return false; | |
1549 | } | |
1550 | ||
1551 | /* If one of the loop header's edge is an exit edge then do not | |
1552 | apply if-conversion. */ | |
1553 | FOR_EACH_EDGE (e, ei, loop->header->succs) | |
1554 | if (loop_exit_edge_p (loop, e)) | |
1555 | return false; | |
1556 | ||
f1f41a6c | 1557 | refs.create (5); |
db5c1c9a | 1558 | res = if_convertible_loop_p_1 (loop, &refs); |
e1cc68bd | 1559 | |
9cda83ad | 1560 | data_reference_p dr; |
1561 | unsigned int i; | |
1562 | for (i = 0; refs.iterate (i, &dr); i++) | |
1563 | free (dr->aux); | |
9cabbd00 | 1564 | |
e1cc68bd | 1565 | free_data_refs (refs); |
ee0e4f73 | 1566 | |
bd6f374c | 1567 | delete innermost_DR_map; |
1568 | innermost_DR_map = NULL; | |
ee0e4f73 | 1569 | |
1570 | delete baseref_DR_map; | |
1571 | baseref_DR_map = NULL; | |
1572 | ||
e1cc68bd | 1573 | return res; |
1574 | } | |
1575 | ||
0b6de244 | 1576 | /* Returns true if def-stmt for phi argument ARG is simple increment/decrement |
1577 | which is in predicated basic block. | |
1578 | In fact, the following PHI pattern is searching: | |
1579 | loop-header: | |
1580 | reduc_1 = PHI <..., reduc_2> | |
1581 | ... | |
1582 | if (...) | |
1583 | reduc_3 = ... | |
1584 | reduc_2 = PHI <reduc_1, reduc_3> | |
1585 | ||
040b4eee | 1586 | ARG_0 and ARG_1 are correspondent PHI arguments. |
1587 | REDUC, OP0 and OP1 contain reduction stmt and its operands. | |
1588 | EXTENDED is true if PHI has > 2 arguments. */ | |
0b6de244 | 1589 | |
1590 | static bool | |
42acab1c | 1591 | is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, |
040b4eee | 1592 | tree *op0, tree *op1, bool extended) |
0b6de244 | 1593 | { |
1594 | tree lhs, r_op1, r_op2; | |
42acab1c | 1595 | gimple *stmt; |
1596 | gimple *header_phi = NULL; | |
0b6de244 | 1597 | enum tree_code reduction_op; |
91c4c1db | 1598 | basic_block bb = gimple_bb (phi); |
1599 | struct loop *loop = bb->loop_father; | |
0b6de244 | 1600 | edge latch_e = loop_latch_edge (loop); |
f8212648 | 1601 | imm_use_iterator imm_iter; |
1602 | use_operand_p use_p; | |
040b4eee | 1603 | edge e; |
1604 | edge_iterator ei; | |
1605 | bool result = false; | |
0b6de244 | 1606 | if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME) |
1607 | return false; | |
1608 | ||
040b4eee | 1609 | if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI) |
0b6de244 | 1610 | { |
1611 | lhs = arg_1; | |
1612 | header_phi = SSA_NAME_DEF_STMT (arg_0); | |
1613 | stmt = SSA_NAME_DEF_STMT (arg_1); | |
1614 | } | |
1615 | else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI) | |
1616 | { | |
1617 | lhs = arg_0; | |
1618 | header_phi = SSA_NAME_DEF_STMT (arg_1); | |
1619 | stmt = SSA_NAME_DEF_STMT (arg_0); | |
1620 | } | |
1621 | else | |
1622 | return false; | |
1623 | if (gimple_bb (header_phi) != loop->header) | |
1624 | return false; | |
1625 | ||
1626 | if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi)) | |
1627 | return false; | |
1628 | ||
1629 | if (gimple_code (stmt) != GIMPLE_ASSIGN | |
1630 | || gimple_has_volatile_ops (stmt)) | |
1631 | return false; | |
1632 | ||
3c7bf6da | 1633 | if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) |
1634 | return false; | |
1635 | ||
0b6de244 | 1636 | if (!is_predicated (gimple_bb (stmt))) |
1637 | return false; | |
1638 | ||
91c4c1db | 1639 | /* Check that stmt-block is predecessor of phi-block. */ |
040b4eee | 1640 | FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs) |
1641 | if (e->dest == bb) | |
1642 | { | |
1643 | result = true; | |
1644 | break; | |
1645 | } | |
1646 | if (!result) | |
91c4c1db | 1647 | return false; |
1648 | ||
0b6de244 | 1649 | if (!has_single_use (lhs)) |
1650 | return false; | |
1651 | ||
1652 | reduction_op = gimple_assign_rhs_code (stmt); | |
1653 | if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR) | |
1654 | return false; | |
1655 | r_op1 = gimple_assign_rhs1 (stmt); | |
1656 | r_op2 = gimple_assign_rhs2 (stmt); | |
1657 | ||
1658 | /* Make R_OP1 to hold reduction variable. */ | |
1659 | if (r_op2 == PHI_RESULT (header_phi) | |
1660 | && reduction_op == PLUS_EXPR) | |
a4f59596 | 1661 | std::swap (r_op1, r_op2); |
0b6de244 | 1662 | else if (r_op1 != PHI_RESULT (header_phi)) |
1663 | return false; | |
1664 | ||
f8212648 | 1665 | /* Check that R_OP1 is used in reduction stmt or in PHI only. */ |
1666 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1) | |
1667 | { | |
42acab1c | 1668 | gimple *use_stmt = USE_STMT (use_p); |
f8212648 | 1669 | if (is_gimple_debug (use_stmt)) |
1670 | continue; | |
1671 | if (use_stmt == stmt) | |
1672 | continue; | |
1673 | if (gimple_code (use_stmt) != GIMPLE_PHI) | |
1674 | return false; | |
1675 | } | |
1676 | ||
0b6de244 | 1677 | *op0 = r_op1; *op1 = r_op2; |
1678 | *reduc = stmt; | |
1679 | return true; | |
1680 | } | |
1681 | ||
1682 | /* Converts conditional scalar reduction into unconditional form, e.g. | |
1683 | bb_4 | |
1684 | if (_5 != 0) goto bb_5 else goto bb_6 | |
1685 | end_bb_4 | |
1686 | bb_5 | |
1687 | res_6 = res_13 + 1; | |
1688 | end_bb_5 | |
1689 | bb_6 | |
1690 | # res_2 = PHI <res_13(4), res_6(5)> | |
1691 | end_bb_6 | |
1692 | ||
1693 | will be converted into sequence | |
1694 | _ifc__1 = _5 != 0 ? 1 : 0; | |
1695 | res_2 = res_13 + _ifc__1; | |
1696 | Argument SWAP tells that arguments of conditional expression should be | |
1697 | swapped. | |
1698 | Returns rhs of resulting PHI assignment. */ | |
1699 | ||
1700 | static tree | |
42acab1c | 1701 | convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, |
0b6de244 | 1702 | tree cond, tree op0, tree op1, bool swap) |
1703 | { | |
1704 | gimple_stmt_iterator stmt_it; | |
42acab1c | 1705 | gimple *new_assign; |
0b6de244 | 1706 | tree rhs; |
1707 | tree rhs1 = gimple_assign_rhs1 (reduc); | |
1708 | tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_"); | |
1709 | tree c; | |
1710 | tree zero = build_zero_cst (TREE_TYPE (rhs1)); | |
1711 | ||
1712 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1713 | { | |
1714 | fprintf (dump_file, "Found cond scalar reduction.\n"); | |
1715 | print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM); | |
1716 | } | |
1717 | ||
1718 | /* Build cond expression using COND and constant operand | |
1719 | of reduction rhs. */ | |
1720 | c = fold_build_cond_expr (TREE_TYPE (rhs1), | |
1721 | unshare_expr (cond), | |
1722 | swap ? zero : op1, | |
1723 | swap ? op1 : zero); | |
1724 | ||
1725 | /* Create assignment stmt and insert it at GSI. */ | |
1726 | new_assign = gimple_build_assign (tmp, c); | |
1727 | gsi_insert_before (gsi, new_assign, GSI_SAME_STMT); | |
1728 | /* Build rhs for unconditional increment/decrement. */ | |
1729 | rhs = fold_build2 (gimple_assign_rhs_code (reduc), | |
1730 | TREE_TYPE (rhs1), op0, tmp); | |
1731 | ||
1732 | /* Delete original reduction stmt. */ | |
1733 | stmt_it = gsi_for_stmt (reduc); | |
1734 | gsi_remove (&stmt_it, true); | |
1735 | release_defs (reduc); | |
1736 | return rhs; | |
1737 | } | |
1738 | ||
f4ff098a | 1739 | /* Produce condition for all occurrences of ARG in PHI node. */ |
040b4eee | 1740 | |
1741 | static tree | |
1742 | gen_phi_arg_condition (gphi *phi, vec<int> *occur, | |
1743 | gimple_stmt_iterator *gsi) | |
1744 | { | |
1745 | int len; | |
1746 | int i; | |
1747 | tree cond = NULL_TREE; | |
1748 | tree c; | |
1749 | edge e; | |
1750 | ||
1751 | len = occur->length (); | |
1752 | gcc_assert (len > 0); | |
1753 | for (i = 0; i < len; i++) | |
1754 | { | |
1755 | e = gimple_phi_arg_edge (phi, (*occur)[i]); | |
1756 | c = bb_predicate (e->src); | |
1757 | if (is_true_predicate (c)) | |
ecaa5fd4 | 1758 | { |
1759 | cond = c; | |
1760 | break; | |
1761 | } | |
040b4eee | 1762 | c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c), |
1763 | is_gimple_condexpr, NULL_TREE, | |
1764 | true, GSI_SAME_STMT); | |
1765 | if (cond != NULL_TREE) | |
1766 | { | |
1767 | /* Must build OR expression. */ | |
1768 | cond = fold_or_predicates (EXPR_LOCATION (c), c, cond); | |
1769 | cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), | |
1770 | is_gimple_condexpr, NULL_TREE, | |
1771 | true, GSI_SAME_STMT); | |
1772 | } | |
1773 | else | |
1774 | cond = c; | |
1775 | } | |
1776 | gcc_assert (cond != NULL_TREE); | |
1777 | return cond; | |
1778 | } | |
1779 | ||
83c0fb43 | 1780 | /* Local valueization callback that follows all-use SSA edges. */ |
1781 | ||
1782 | static tree | |
1783 | ifcvt_follow_ssa_use_edges (tree val) | |
1784 | { | |
1785 | return val; | |
1786 | } | |
1787 | ||
3b91ccd9 | 1788 | /* Replace a scalar PHI node with a COND_EXPR using COND as condition. |
040b4eee | 1789 | This routine can handle PHI nodes with more than two arguments. |
07c03fb0 | 1790 | |
07c03fb0 | 1791 | For example, |
10b55fcb | 1792 | S1: A = PHI <x1(1), x2(5)> |
07c03fb0 | 1793 | is converted into, |
1794 | S2: A = cond ? x1 : x2; | |
b01e3c9b | 1795 | |
1796 | The generated code is inserted at GSI that points to the top of | |
040b4eee | 1797 | basic block's statement list. |
1798 | If PHI node has more than two arguments a chain of conditional | |
1799 | expression is produced. */ | |
1800 | ||
07c03fb0 | 1801 | |
1802 | static void | |
040b4eee | 1803 | predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) |
07c03fb0 | 1804 | { |
42acab1c | 1805 | gimple *new_stmt = NULL, *reduc; |
040b4eee | 1806 | tree rhs, res, arg0, arg1, op0, op1, scev; |
1807 | tree cond; | |
1808 | unsigned int index0; | |
1809 | unsigned int max, args_len; | |
1810 | edge e; | |
07c03fb0 | 1811 | basic_block bb; |
040b4eee | 1812 | unsigned int i; |
48e1416a | 1813 | |
3b91ccd9 | 1814 | res = gimple_phi_result (phi); |
7c782c9b | 1815 | if (virtual_operand_p (res)) |
3b91ccd9 | 1816 | return; |
1817 | ||
040b4eee | 1818 | if ((rhs = degenerate_phi_result (phi)) |
119cb9ea | 1819 | || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father, |
1820 | res)) | |
1821 | && !chrec_contains_undetermined (scev) | |
1822 | && scev != res | |
040b4eee | 1823 | && (rhs = gimple_phi_arg_def (phi, 0)))) |
07c03fb0 | 1824 | { |
040b4eee | 1825 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1826 | { | |
1827 | fprintf (dump_file, "Degenerate phi!\n"); | |
1828 | print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); | |
1829 | } | |
1830 | new_stmt = gimple_build_assign (res, rhs); | |
1831 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
1832 | update_stmt (new_stmt); | |
1833 | return; | |
1834 | } | |
0b6de244 | 1835 | |
040b4eee | 1836 | bb = gimple_bb (phi); |
1837 | if (EDGE_COUNT (bb->preds) == 2) | |
1838 | { | |
1839 | /* Predicate ordinary PHI node with 2 arguments. */ | |
1840 | edge first_edge, second_edge; | |
1841 | basic_block true_bb; | |
1842 | first_edge = EDGE_PRED (bb, 0); | |
1843 | second_edge = EDGE_PRED (bb, 1); | |
1844 | cond = bb_predicate (first_edge->src); | |
1845 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
a4f59596 | 1846 | std::swap (first_edge, second_edge); |
040b4eee | 1847 | if (EDGE_COUNT (first_edge->src->succs) > 1) |
1848 | { | |
1849 | cond = bb_predicate (second_edge->src); | |
1850 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
1851 | cond = TREE_OPERAND (cond, 0); | |
1852 | else | |
1853 | first_edge = second_edge; | |
1854 | } | |
1855 | else | |
1856 | cond = bb_predicate (first_edge->src); | |
1857 | /* Gimplify the condition to a valid cond-expr conditonal operand. */ | |
1858 | cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), | |
1859 | is_gimple_condexpr, NULL_TREE, | |
1860 | true, GSI_SAME_STMT); | |
1861 | true_bb = first_edge->src; | |
9fc1b637 | 1862 | if (EDGE_PRED (bb, 1)->src == true_bb) |
1863 | { | |
040b4eee | 1864 | arg0 = gimple_phi_arg_def (phi, 1); |
1865 | arg1 = gimple_phi_arg_def (phi, 0); | |
9fc1b637 | 1866 | } |
1867 | else | |
1868 | { | |
040b4eee | 1869 | arg0 = gimple_phi_arg_def (phi, 0); |
1870 | arg1 = gimple_phi_arg_def (phi, 1); | |
9fc1b637 | 1871 | } |
040b4eee | 1872 | if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1, |
1873 | &op0, &op1, false)) | |
0b6de244 | 1874 | /* Convert reduction stmt into vectorizable form. */ |
1875 | rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, | |
1876 | true_bb != gimple_bb (reduc)); | |
1877 | else | |
1878 | /* Build new RHS using selected condition and arguments. */ | |
1879 | rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), | |
040b4eee | 1880 | arg0, arg1); |
1881 | new_stmt = gimple_build_assign (res, rhs); | |
1882 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
83c0fb43 | 1883 | gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt); |
483d5f69 | 1884 | if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges)) |
1885 | { | |
1886 | new_stmt = gsi_stmt (new_gsi); | |
1887 | update_stmt (new_stmt); | |
1888 | } | |
040b4eee | 1889 | |
1890 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1891 | { | |
1892 | fprintf (dump_file, "new phi replacement stmt\n"); | |
1893 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); | |
1894 | } | |
1895 | return; | |
1896 | } | |
1897 | ||
1898 | /* Create hashmap for PHI node which contain vector of argument indexes | |
1899 | having the same value. */ | |
1900 | bool swap = false; | |
ee34b0e4 | 1901 | hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map; |
040b4eee | 1902 | unsigned int num_args = gimple_phi_num_args (phi); |
1903 | int max_ind = -1; | |
1904 | /* Vector of different PHI argument values. */ | |
1905 | auto_vec<tree> args (num_args); | |
1906 | ||
1907 | /* Compute phi_arg_map. */ | |
1908 | for (i = 0; i < num_args; i++) | |
1909 | { | |
1910 | tree arg; | |
1911 | ||
1912 | arg = gimple_phi_arg_def (phi, i); | |
1913 | if (!phi_arg_map.get (arg)) | |
1914 | args.quick_push (arg); | |
1915 | phi_arg_map.get_or_insert (arg).safe_push (i); | |
1916 | } | |
1917 | ||
1918 | /* Determine element with max number of occurrences. */ | |
1919 | max_ind = -1; | |
1920 | max = 1; | |
1921 | args_len = args.length (); | |
1922 | for (i = 0; i < args_len; i++) | |
1923 | { | |
1924 | unsigned int len; | |
1925 | if ((len = phi_arg_map.get (args[i])->length ()) > max) | |
1926 | { | |
1927 | max_ind = (int) i; | |
1928 | max = len; | |
1929 | } | |
1930 | } | |
1931 | ||
1932 | /* Put element with max number of occurences to the end of ARGS. */ | |
1933 | if (max_ind != -1 && max_ind +1 != (int) args_len) | |
a4f59596 | 1934 | std::swap (args[args_len - 1], args[max_ind]); |
07c03fb0 | 1935 | |
040b4eee | 1936 | /* Handle one special case when number of arguments with different values |
1937 | is equal 2 and one argument has the only occurrence. Such PHI can be | |
1938 | handled as if would have only 2 arguments. */ | |
1939 | if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1) | |
1940 | { | |
1941 | vec<int> *indexes; | |
1942 | indexes = phi_arg_map.get (args[0]); | |
1943 | index0 = (*indexes)[0]; | |
1944 | arg0 = args[0]; | |
1945 | arg1 = args[1]; | |
1946 | e = gimple_phi_arg_edge (phi, index0); | |
1947 | cond = bb_predicate (e->src); | |
1948 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
1949 | { | |
1950 | swap = true; | |
1951 | cond = TREE_OPERAND (cond, 0); | |
1952 | } | |
1953 | /* Gimplify the condition to a valid cond-expr conditonal operand. */ | |
1954 | cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), | |
1955 | is_gimple_condexpr, NULL_TREE, | |
1956 | true, GSI_SAME_STMT); | |
1957 | if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1, | |
1958 | &op0, &op1, true))) | |
1959 | rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), | |
1960 | swap? arg1 : arg0, | |
1961 | swap? arg0 : arg1); | |
1962 | else | |
1963 | /* Convert reduction stmt into vectorizable form. */ | |
1964 | rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, | |
1965 | swap); | |
1966 | new_stmt = gimple_build_assign (res, rhs); | |
1967 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
1968 | update_stmt (new_stmt); | |
1969 | } | |
1970 | else | |
1971 | { | |
1972 | /* Common case. */ | |
1973 | vec<int> *indexes; | |
1974 | tree type = TREE_TYPE (gimple_phi_result (phi)); | |
1975 | tree lhs; | |
1976 | arg1 = args[1]; | |
1977 | for (i = 0; i < args_len; i++) | |
1978 | { | |
1979 | arg0 = args[i]; | |
1980 | indexes = phi_arg_map.get (args[i]); | |
1981 | if (i != args_len - 1) | |
1982 | lhs = make_temp_ssa_name (type, NULL, "_ifc_"); | |
1983 | else | |
1984 | lhs = res; | |
1985 | cond = gen_phi_arg_condition (phi, indexes, gsi); | |
1986 | rhs = fold_build_cond_expr (type, unshare_expr (cond), | |
1987 | arg0, arg1); | |
1988 | new_stmt = gimple_build_assign (lhs, rhs); | |
1989 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
1990 | update_stmt (new_stmt); | |
1991 | arg1 = lhs; | |
1992 | } | |
1993 | } | |
07c03fb0 | 1994 | |
1995 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1996 | { | |
040b4eee | 1997 | fprintf (dump_file, "new extended phi replacement stmt\n"); |
75a70cf9 | 1998 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); |
07c03fb0 | 1999 | } |
2000 | } | |
2001 | ||
3b91ccd9 | 2002 | /* Replaces in LOOP all the scalar phi nodes other than those in the |
fa683b76 | 2003 | LOOP->header block with conditional modify expressions. */ |
07c03fb0 | 2004 | |
2005 | static void | |
3b91ccd9 | 2006 | predicate_all_scalar_phis (struct loop *loop) |
07c03fb0 | 2007 | { |
2008 | basic_block bb; | |
2009 | unsigned int orig_loop_num_nodes = loop->num_nodes; | |
2010 | unsigned int i; | |
2011 | ||
07c03fb0 | 2012 | for (i = 1; i < orig_loop_num_nodes; i++) |
2013 | { | |
1a91d914 | 2014 | gphi *phi; |
1a91d914 | 2015 | gimple_stmt_iterator gsi; |
2016 | gphi_iterator phi_gsi; | |
07c03fb0 | 2017 | bb = ifc_bbs[i]; |
35a40aad | 2018 | |
c077abad | 2019 | if (bb == loop->header) |
07c03fb0 | 2020 | continue; |
2021 | ||
75a70cf9 | 2022 | phi_gsi = gsi_start_phis (bb); |
fa683b76 | 2023 | if (gsi_end_p (phi_gsi)) |
2024 | continue; | |
07c03fb0 | 2025 | |
e6ee4c61 | 2026 | gsi = gsi_after_labels (bb); |
2027 | while (!gsi_end_p (phi_gsi)) | |
07c03fb0 | 2028 | { |
e6ee4c61 | 2029 | phi = phi_gsi.phi (); |
ebd01afe | 2030 | if (virtual_operand_p (gimple_phi_result (phi))) |
2031 | gsi_next (&phi_gsi); | |
2032 | else | |
2033 | { | |
2034 | predicate_scalar_phi (phi, &gsi); | |
2035 | remove_phi_node (&phi_gsi, false); | |
2036 | } | |
07c03fb0 | 2037 | } |
07c03fb0 | 2038 | } |
07c03fb0 | 2039 | } |
2040 | ||
fa683b76 | 2041 | /* Insert in each basic block of LOOP the statements produced by the |
2042 | gimplification of the predicates. */ | |
2043 | ||
2044 | static void | |
db5c1c9a | 2045 | insert_gimplified_predicates (loop_p loop) |
fa683b76 | 2046 | { |
2047 | unsigned int i; | |
2048 | ||
2049 | for (i = 0; i < loop->num_nodes; i++) | |
2050 | { | |
2051 | basic_block bb = ifc_bbs[i]; | |
3b91ccd9 | 2052 | gimple_seq stmts; |
040b4eee | 2053 | if (!is_predicated (bb)) |
2054 | gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL); | |
a560ff2b | 2055 | if (!is_predicated (bb)) |
2056 | { | |
2057 | /* Do not insert statements for a basic block that is not | |
2058 | predicated. Also make sure that the predicate of the | |
2059 | basic block is set to true. */ | |
2060 | reset_bb_predicate (bb); | |
2061 | continue; | |
2062 | } | |
2063 | ||
3b91ccd9 | 2064 | stmts = bb_predicate_gimplified_stmts (bb); |
fa683b76 | 2065 | if (stmts) |
2066 | { | |
03821886 | 2067 | if (need_to_predicate) |
3b91ccd9 | 2068 | { |
2069 | /* Insert the predicate of the BB just after the label, | |
2070 | as the if-conversion of memory writes will use this | |
2071 | predicate. */ | |
2072 | gimple_stmt_iterator gsi = gsi_after_labels (bb); | |
2073 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
2074 | } | |
fa683b76 | 2075 | else |
3b91ccd9 | 2076 | { |
2077 | /* Insert the predicate of the BB at the end of the BB | |
2078 | as this would reduce the register pressure: the only | |
2079 | use of this predicate will be in successor BBs. */ | |
2080 | gimple_stmt_iterator gsi = gsi_last_bb (bb); | |
2081 | ||
2082 | if (gsi_end_p (gsi) | |
2083 | || stmt_ends_bb_p (gsi_stmt (gsi))) | |
2084 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
2085 | else | |
2086 | gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT); | |
2087 | } | |
fa683b76 | 2088 | |
2089 | /* Once the sequence is code generated, set it to NULL. */ | |
2090 | set_bb_predicate_gimplified_stmts (bb, NULL); | |
2091 | } | |
2092 | } | |
2093 | } | |
2094 | ||
03821886 | 2095 | /* Helper function for predicate_statements. Returns index of existent |
d3815dd1 | 2096 | mask if it was created for given SIZE and -1 otherwise. */ |
2097 | ||
2098 | static int | |
2099 | mask_exists (int size, vec<int> vec) | |
2100 | { | |
2101 | unsigned int ix; | |
2102 | int v; | |
2103 | FOR_EACH_VEC_ELT (vec, ix, v) | |
2104 | if (v == size) | |
2105 | return (int) ix; | |
2106 | return -1; | |
2107 | } | |
2108 | ||
03821886 | 2109 | /* Helper function for predicate_statements. STMT is a memory read or |
2110 | write and it needs to be predicated by MASK. Return a statement | |
2111 | that does so. */ | |
2112 | ||
2113 | static gimple * | |
2114 | predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) | |
2115 | { | |
2116 | gcall *new_stmt; | |
2117 | ||
2118 | tree lhs = gimple_assign_lhs (stmt); | |
2119 | tree rhs = gimple_assign_rhs1 (stmt); | |
2120 | tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; | |
2121 | mark_addressable (ref); | |
2122 | tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref), | |
2123 | true, NULL_TREE, true, GSI_SAME_STMT); | |
2124 | tree ptr = build_int_cst (reference_alias_ptr_type (ref), | |
2125 | get_object_alignment (ref)); | |
2126 | /* Copy points-to info if possible. */ | |
2127 | if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr)) | |
2128 | copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr), | |
2129 | ref); | |
2130 | if (TREE_CODE (lhs) == SSA_NAME) | |
2131 | { | |
2132 | new_stmt | |
2133 | = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr, | |
2134 | ptr, mask); | |
2135 | gimple_call_set_lhs (new_stmt, lhs); | |
2136 | gimple_set_vuse (new_stmt, gimple_vuse (stmt)); | |
2137 | } | |
2138 | else | |
2139 | { | |
2140 | new_stmt | |
2141 | = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, | |
2142 | mask, rhs); | |
2143 | gimple_set_vuse (new_stmt, gimple_vuse (stmt)); | |
2144 | gimple_set_vdef (new_stmt, gimple_vdef (stmt)); | |
2145 | SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; | |
2146 | } | |
2147 | gimple_call_set_nothrow (new_stmt, true); | |
2148 | return new_stmt; | |
2149 | } | |
2150 | ||
2151 | /* STMT uses OP_LHS. Check whether it is equivalent to: | |
2152 | ||
2153 | ... = OP_MASK ? OP_LHS : X; | |
2154 | ||
2155 | Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is | |
2156 | known to have value OP_COND. */ | |
2157 | ||
2158 | static tree | |
2159 | check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond, | |
2160 | tree op_lhs) | |
2161 | { | |
2162 | gassign *assign = dyn_cast <gassign *> (stmt); | |
2163 | if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR) | |
2164 | return NULL_TREE; | |
2165 | ||
2166 | tree use_cond = gimple_assign_rhs1 (assign); | |
2167 | tree if_true = gimple_assign_rhs2 (assign); | |
2168 | tree if_false = gimple_assign_rhs3 (assign); | |
2169 | ||
2170 | if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0)) | |
2171 | && if_true == op_lhs) | |
2172 | return if_false; | |
2173 | ||
2174 | if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs) | |
2175 | return if_true; | |
2176 | ||
2177 | return NULL_TREE; | |
2178 | } | |
2179 | ||
2180 | /* Return true if VALUE is available for use at STMT. SSA_NAMES is | |
2181 | the set of SSA names defined earlier in STMT's block. */ | |
2182 | ||
2183 | static bool | |
2184 | value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names, | |
2185 | tree value) | |
2186 | { | |
2187 | if (is_gimple_min_invariant (value)) | |
2188 | return true; | |
2189 | ||
2190 | if (TREE_CODE (value) == SSA_NAME) | |
2191 | { | |
2192 | if (SSA_NAME_IS_DEFAULT_DEF (value)) | |
2193 | return true; | |
2194 | ||
2195 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value)); | |
2196 | basic_block use_bb = gimple_bb (stmt); | |
2197 | return (def_bb == use_bb | |
2198 | ? ssa_names->contains (value) | |
2199 | : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb)); | |
2200 | } | |
2201 | ||
2202 | return false; | |
2203 | } | |
2204 | ||
2205 | /* Helper function for predicate_statements. STMT is a potentially-trapping | |
2206 | arithmetic operation that needs to be predicated by MASK, an SSA_NAME that | |
2207 | has value COND. Return a statement that does so. SSA_NAMES is the set of | |
2208 | SSA names defined earlier in STMT's block. */ | |
2209 | ||
2210 | static gimple * | |
2211 | predicate_rhs_code (gassign *stmt, tree mask, tree cond, | |
2212 | hash_set<tree_ssa_name_hash> *ssa_names) | |
2213 | { | |
2214 | tree lhs = gimple_assign_lhs (stmt); | |
2215 | tree_code code = gimple_assign_rhs_code (stmt); | |
2216 | unsigned int nops = gimple_num_ops (stmt); | |
2217 | internal_fn cond_fn = get_conditional_internal_fn (code); | |
2218 | ||
2219 | /* Construct the arguments to the conditional internal function. */ | |
2220 | auto_vec<tree, 8> args; | |
2221 | args.safe_grow (nops + 1); | |
2222 | args[0] = mask; | |
2223 | for (unsigned int i = 1; i < nops; ++i) | |
2224 | args[i] = gimple_op (stmt, i); | |
2225 | args[nops] = NULL_TREE; | |
2226 | ||
2227 | /* Look for uses of the result to see whether they are COND_EXPRs that can | |
2228 | be folded into the conditional call. */ | |
2229 | imm_use_iterator imm_iter; | |
2230 | gimple *use_stmt; | |
2231 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs) | |
2232 | { | |
2233 | tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs); | |
2234 | if (new_else && value_available_p (stmt, ssa_names, new_else)) | |
2235 | { | |
2236 | if (!args[nops]) | |
2237 | args[nops] = new_else; | |
2238 | if (operand_equal_p (new_else, args[nops], 0)) | |
2239 | { | |
2240 | /* We have: | |
2241 | ||
2242 | LHS = IFN_COND (MASK, ..., ELSE); | |
2243 | X = MASK ? LHS : ELSE; | |
2244 | ||
2245 | which makes X equivalent to LHS. */ | |
2246 | tree use_lhs = gimple_assign_lhs (use_stmt); | |
2247 | redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs)); | |
2248 | } | |
2249 | } | |
2250 | } | |
2251 | if (!args[nops]) | |
2252 | args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs), | |
2253 | nops - 1, &args[1]); | |
2254 | ||
2255 | /* Create and insert the call. */ | |
2256 | gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args); | |
2257 | gimple_call_set_lhs (new_stmt, lhs); | |
2258 | gimple_call_set_nothrow (new_stmt, true); | |
2259 | ||
2260 | return new_stmt; | |
2261 | } | |
2262 | ||
3b91ccd9 | 2263 | /* Predicate each write to memory in LOOP. |
2264 | ||
2265 | This function transforms control flow constructs containing memory | |
2266 | writes of the form: | |
2267 | ||
2268 | | for (i = 0; i < N; i++) | |
2269 | | if (cond) | |
2270 | | A[i] = expr; | |
2271 | ||
2272 | into the following form that does not contain control flow: | |
2273 | ||
2274 | | for (i = 0; i < N; i++) | |
2275 | | A[i] = cond ? expr : A[i]; | |
2276 | ||
2277 | The original CFG looks like this: | |
2278 | ||
2279 | | bb_0 | |
2280 | | i = 0 | |
2281 | | end_bb_0 | |
2282 | | | |
2283 | | bb_1 | |
2284 | | if (i < N) goto bb_5 else goto bb_2 | |
2285 | | end_bb_1 | |
2286 | | | |
2287 | | bb_2 | |
2288 | | cond = some_computation; | |
2289 | | if (cond) goto bb_3 else goto bb_4 | |
2290 | | end_bb_2 | |
2291 | | | |
2292 | | bb_3 | |
2293 | | A[i] = expr; | |
2294 | | goto bb_4 | |
2295 | | end_bb_3 | |
2296 | | | |
2297 | | bb_4 | |
2298 | | goto bb_1 | |
2299 | | end_bb_4 | |
2300 | ||
2301 | insert_gimplified_predicates inserts the computation of the COND | |
2302 | expression at the beginning of the destination basic block: | |
2303 | ||
2304 | | bb_0 | |
2305 | | i = 0 | |
2306 | | end_bb_0 | |
2307 | | | |
2308 | | bb_1 | |
2309 | | if (i < N) goto bb_5 else goto bb_2 | |
2310 | | end_bb_1 | |
2311 | | | |
2312 | | bb_2 | |
2313 | | cond = some_computation; | |
2314 | | if (cond) goto bb_3 else goto bb_4 | |
2315 | | end_bb_2 | |
2316 | | | |
2317 | | bb_3 | |
2318 | | cond = some_computation; | |
2319 | | A[i] = expr; | |
2320 | | goto bb_4 | |
2321 | | end_bb_3 | |
2322 | | | |
2323 | | bb_4 | |
2324 | | goto bb_1 | |
2325 | | end_bb_4 | |
2326 | ||
03821886 | 2327 | predicate_statements is then predicating the memory write as follows: |
3b91ccd9 | 2328 | |
2329 | | bb_0 | |
2330 | | i = 0 | |
2331 | | end_bb_0 | |
2332 | | | |
2333 | | bb_1 | |
2334 | | if (i < N) goto bb_5 else goto bb_2 | |
2335 | | end_bb_1 | |
2336 | | | |
2337 | | bb_2 | |
2338 | | if (cond) goto bb_3 else goto bb_4 | |
2339 | | end_bb_2 | |
2340 | | | |
2341 | | bb_3 | |
2342 | | cond = some_computation; | |
2343 | | A[i] = cond ? expr : A[i]; | |
2344 | | goto bb_4 | |
2345 | | end_bb_3 | |
2346 | | | |
2347 | | bb_4 | |
2348 | | goto bb_1 | |
2349 | | end_bb_4 | |
2350 | ||
2351 | and finally combine_blocks removes the basic block boundaries making | |
2352 | the loop vectorizable: | |
2353 | ||
2354 | | bb_0 | |
2355 | | i = 0 | |
2356 | | if (i < N) goto bb_5 else goto bb_1 | |
2357 | | end_bb_0 | |
2358 | | | |
2359 | | bb_1 | |
2360 | | cond = some_computation; | |
2361 | | A[i] = cond ? expr : A[i]; | |
2362 | | if (i < N) goto bb_5 else goto bb_4 | |
2363 | | end_bb_1 | |
2364 | | | |
2365 | | bb_4 | |
2366 | | goto bb_1 | |
2367 | | end_bb_4 | |
2368 | */ | |
2369 | ||
2370 | static void | |
03821886 | 2371 | predicate_statements (loop_p loop) |
3b91ccd9 | 2372 | { |
2373 | unsigned int i, orig_loop_num_nodes = loop->num_nodes; | |
d3815dd1 | 2374 | auto_vec<int, 1> vect_sizes; |
2375 | auto_vec<tree, 1> vect_masks; | |
03821886 | 2376 | hash_set<tree_ssa_name_hash> ssa_names; |
3b91ccd9 | 2377 | |
2378 | for (i = 1; i < orig_loop_num_nodes; i++) | |
2379 | { | |
2380 | gimple_stmt_iterator gsi; | |
2381 | basic_block bb = ifc_bbs[i]; | |
2382 | tree cond = bb_predicate (bb); | |
2df61941 | 2383 | bool swap; |
d3815dd1 | 2384 | int index; |
3b91ccd9 | 2385 | |
fc1c9df7 | 2386 | if (is_true_predicate (cond)) |
3b91ccd9 | 2387 | continue; |
2388 | ||
2df61941 | 2389 | swap = false; |
2390 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
2391 | { | |
2392 | swap = true; | |
2393 | cond = TREE_OPERAND (cond, 0); | |
2394 | } | |
2395 | ||
d3815dd1 | 2396 | vect_sizes.truncate (0); |
2397 | vect_masks.truncate (0); | |
2398 | ||
fc1c9df7 | 2399 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) |
2400 | { | |
03821886 | 2401 | gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi)); |
2402 | if (!stmt) | |
fc1c9df7 | 2403 | ; |
6784dab5 | 2404 | else if (is_false_predicate (cond) |
2405 | && gimple_vdef (stmt)) | |
fc1c9df7 | 2406 | { |
2407 | unlink_stmt_vdef (stmt); | |
2408 | gsi_remove (&gsi, true); | |
2409 | release_defs (stmt); | |
2410 | continue; | |
2411 | } | |
2412 | else if (gimple_plf (stmt, GF_PLF_2)) | |
2413 | { | |
2414 | tree lhs = gimple_assign_lhs (stmt); | |
03821886 | 2415 | tree mask; |
2416 | gimple *new_stmt; | |
fc1c9df7 | 2417 | gimple_seq stmts = NULL; |
eafbcd13 | 2418 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
2419 | /* We checked before setting GF_PLF_2 that an equivalent | |
2420 | integer mode exists. */ | |
2421 | int bitsize = GET_MODE_BITSIZE (mode).to_constant (); | |
fc1c9df7 | 2422 | if (!vect_sizes.is_empty () |
2423 | && (index = mask_exists (bitsize, vect_sizes)) != -1) | |
2424 | /* Use created mask. */ | |
2425 | mask = vect_masks[index]; | |
2426 | else | |
2427 | { | |
2428 | if (COMPARISON_CLASS_P (cond)) | |
2429 | mask = gimple_build (&stmts, TREE_CODE (cond), | |
2430 | boolean_type_node, | |
2431 | TREE_OPERAND (cond, 0), | |
2432 | TREE_OPERAND (cond, 1)); | |
2433 | else | |
9b79c8e1 | 2434 | mask = cond; |
fc1c9df7 | 2435 | |
2436 | if (swap) | |
2437 | { | |
2438 | tree true_val | |
2439 | = constant_boolean_node (true, TREE_TYPE (mask)); | |
2440 | mask = gimple_build (&stmts, BIT_XOR_EXPR, | |
2441 | TREE_TYPE (mask), mask, true_val); | |
2442 | } | |
2443 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
2444 | ||
fc1c9df7 | 2445 | /* Save mask and its size for further use. */ |
2446 | vect_sizes.safe_push (bitsize); | |
2447 | vect_masks.safe_push (mask); | |
2448 | } | |
03821886 | 2449 | if (gimple_assign_single_p (stmt)) |
2450 | new_stmt = predicate_load_or_store (&gsi, stmt, mask); | |
fc1c9df7 | 2451 | else |
03821886 | 2452 | new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); |
ebd01afe | 2453 | |
fc1c9df7 | 2454 | gsi_replace (&gsi, new_stmt, true); |
2455 | } | |
2456 | else if (gimple_vdef (stmt)) | |
2457 | { | |
2458 | tree lhs = gimple_assign_lhs (stmt); | |
2459 | tree rhs = gimple_assign_rhs1 (stmt); | |
2460 | tree type = TREE_TYPE (lhs); | |
2461 | ||
2462 | lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi); | |
2463 | rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi); | |
2464 | if (swap) | |
2465 | std::swap (lhs, rhs); | |
2466 | cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond), | |
2467 | is_gimple_condexpr, NULL_TREE, | |
2468 | true, GSI_SAME_STMT); | |
2469 | rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs); | |
2470 | gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi)); | |
2471 | update_stmt (stmt); | |
2472 | } | |
03821886 | 2473 | tree lhs = gimple_get_lhs (gsi_stmt (gsi)); |
2474 | if (lhs && TREE_CODE (lhs) == SSA_NAME) | |
2475 | ssa_names.add (lhs); | |
fc1c9df7 | 2476 | gsi_next (&gsi); |
2477 | } | |
03821886 | 2478 | ssa_names.empty (); |
3b91ccd9 | 2479 | } |
2480 | } | |
2481 | ||
01c40c0b | 2482 | /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks |
08129148 | 2483 | other than the exit and latch of the LOOP. Also resets the |
2484 | GIMPLE_DEBUG information. */ | |
01c40c0b | 2485 | |
2486 | static void | |
2487 | remove_conditions_and_labels (loop_p loop) | |
2488 | { | |
2489 | gimple_stmt_iterator gsi; | |
2490 | unsigned int i; | |
2491 | ||
2492 | for (i = 0; i < loop->num_nodes; i++) | |
2493 | { | |
fa683b76 | 2494 | basic_block bb = ifc_bbs[i]; |
01c40c0b | 2495 | |
2496 | if (bb_with_exit_edge_p (loop, bb) | |
2497 | || bb == loop->latch) | |
2498 | continue; | |
2499 | ||
2500 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) | |
08129148 | 2501 | switch (gimple_code (gsi_stmt (gsi))) |
2502 | { | |
2503 | case GIMPLE_COND: | |
2504 | case GIMPLE_LABEL: | |
2505 | gsi_remove (&gsi, true); | |
2506 | break; | |
2507 | ||
2508 | case GIMPLE_DEBUG: | |
2509 | /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */ | |
2510 | if (gimple_debug_bind_p (gsi_stmt (gsi))) | |
2511 | { | |
2512 | gimple_debug_bind_reset_value (gsi_stmt (gsi)); | |
2513 | update_stmt (gsi_stmt (gsi)); | |
2514 | } | |
2515 | gsi_next (&gsi); | |
2516 | break; | |
2517 | ||
2518 | default: | |
2519 | gsi_next (&gsi); | |
2520 | } | |
01c40c0b | 2521 | } |
2522 | } | |
2523 | ||
9cfdcdb1 | 2524 | /* Combine all the basic blocks from LOOP into one or two super basic |
2525 | blocks. Replace PHI nodes with conditional modify expressions. */ | |
07c03fb0 | 2526 | |
2527 | static void | |
db5c1c9a | 2528 | combine_blocks (struct loop *loop) |
07c03fb0 | 2529 | { |
2530 | basic_block bb, exit_bb, merge_target_bb; | |
2531 | unsigned int orig_loop_num_nodes = loop->num_nodes; | |
2532 | unsigned int i; | |
71cfcaa2 | 2533 | edge e; |
2534 | edge_iterator ei; | |
fa717aca | 2535 | |
01c40c0b | 2536 | remove_conditions_and_labels (loop); |
5cc7d4f7 | 2537 | insert_gimplified_predicates (loop); |
3b91ccd9 | 2538 | predicate_all_scalar_phis (loop); |
2539 | ||
03821886 | 2540 | if (need_to_predicate) |
2541 | predicate_statements (loop); | |
07c03fb0 | 2542 | |
b01e3c9b | 2543 | /* Merge basic blocks: first remove all the edges in the loop, |
2544 | except for those from the exit block. */ | |
07c03fb0 | 2545 | exit_bb = NULL; |
eda71dfb | 2546 | bool *predicated = XNEWVEC (bool, orig_loop_num_nodes); |
71cfcaa2 | 2547 | for (i = 0; i < orig_loop_num_nodes; i++) |
2548 | { | |
2549 | bb = ifc_bbs[i]; | |
eda71dfb | 2550 | predicated[i] = !is_true_predicate (bb_predicate (bb)); |
39760a87 | 2551 | free_bb_predicate (bb); |
71cfcaa2 | 2552 | if (bb_with_exit_edge_p (loop, bb)) |
2553 | { | |
53a87a4b | 2554 | gcc_assert (exit_bb == NULL); |
71cfcaa2 | 2555 | exit_bb = bb; |
71cfcaa2 | 2556 | } |
2557 | } | |
2558 | gcc_assert (exit_bb != loop->latch); | |
07c03fb0 | 2559 | |
07c03fb0 | 2560 | for (i = 1; i < orig_loop_num_nodes; i++) |
2561 | { | |
07c03fb0 | 2562 | bb = ifc_bbs[i]; |
2563 | ||
71cfcaa2 | 2564 | for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) |
07c03fb0 | 2565 | { |
71cfcaa2 | 2566 | if (e->src == exit_bb) |
2567 | ei_next (&ei); | |
2568 | else | |
2569 | remove_edge (e); | |
2570 | } | |
2571 | } | |
07c03fb0 | 2572 | |
71cfcaa2 | 2573 | if (exit_bb != NULL) |
2574 | { | |
2575 | if (exit_bb != loop->header) | |
2576 | { | |
b01e3c9b | 2577 | /* Connect this node to loop header. */ |
7d956d51 | 2578 | make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU); |
71cfcaa2 | 2579 | set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header); |
07c03fb0 | 2580 | } |
2581 | ||
71cfcaa2 | 2582 | /* Redirect non-exit edges to loop->latch. */ |
2583 | FOR_EACH_EDGE (e, ei, exit_bb->succs) | |
2584 | { | |
2585 | if (!loop_exit_edge_p (loop, e)) | |
2586 | redirect_edge_and_branch (e, loop->latch); | |
2587 | } | |
2588 | set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); | |
2589 | } | |
2590 | else | |
2591 | { | |
b01e3c9b | 2592 | /* If the loop does not have an exit, reconnect header and latch. */ |
71cfcaa2 | 2593 | make_edge (loop->header, loop->latch, EDGE_FALLTHRU); |
2594 | set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header); | |
2595 | } | |
c077abad | 2596 | |
71cfcaa2 | 2597 | merge_target_bb = loop->header; |
ebd01afe | 2598 | |
2599 | /* Get at the virtual def valid for uses starting at the first block | |
2600 | we merge into the header. Without a virtual PHI the loop has the | |
2601 | same virtual use on all stmts. */ | |
2602 | gphi *vphi = get_virtual_phi (loop->header); | |
2603 | tree last_vdef = NULL_TREE; | |
2604 | if (vphi) | |
2605 | { | |
2606 | last_vdef = gimple_phi_result (vphi); | |
2607 | for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header); | |
2608 | ! gsi_end_p (gsi); gsi_next (&gsi)) | |
2609 | if (gimple_vdef (gsi_stmt (gsi))) | |
2610 | last_vdef = gimple_vdef (gsi_stmt (gsi)); | |
2611 | } | |
71cfcaa2 | 2612 | for (i = 1; i < orig_loop_num_nodes; i++) |
2613 | { | |
75a70cf9 | 2614 | gimple_stmt_iterator gsi; |
2615 | gimple_stmt_iterator last; | |
46d1d560 | 2616 | |
71cfcaa2 | 2617 | bb = ifc_bbs[i]; |
65b9482a | 2618 | |
71cfcaa2 | 2619 | if (bb == exit_bb || bb == loop->latch) |
2620 | continue; | |
65b9482a | 2621 | |
ebd01afe | 2622 | /* We release virtual PHIs late because we have to propagate them |
2623 | out using the current VUSE. The def might be the one used | |
2624 | after the loop. */ | |
2625 | vphi = get_virtual_phi (bb); | |
2626 | if (vphi) | |
2627 | { | |
2628 | imm_use_iterator iter; | |
2629 | use_operand_p use_p; | |
2630 | gimple *use_stmt; | |
2631 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) | |
2632 | { | |
2633 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
2634 | SET_USE (use_p, last_vdef); | |
2635 | } | |
2636 | gsi = gsi_for_stmt (vphi); | |
2637 | remove_phi_node (&gsi, true); | |
2638 | } | |
2639 | ||
eda71dfb | 2640 | /* Make stmts member of loop->header and clear range info from all stmts |
2641 | in BB which is now no longer executed conditional on a predicate we | |
2642 | could have derived it from. */ | |
01c40c0b | 2643 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
eda71dfb | 2644 | { |
42acab1c | 2645 | gimple *stmt = gsi_stmt (gsi); |
eda71dfb | 2646 | gimple_set_bb (stmt, merge_target_bb); |
ebd01afe | 2647 | /* Update virtual operands. */ |
2648 | if (last_vdef) | |
2649 | { | |
2650 | use_operand_p use_p = ssa_vuse_operand (stmt); | |
2651 | if (use_p | |
2652 | && USE_FROM_PTR (use_p) != last_vdef) | |
2653 | SET_USE (use_p, last_vdef); | |
2654 | if (gimple_vdef (stmt)) | |
2655 | last_vdef = gimple_vdef (stmt); | |
2656 | } | |
eda71dfb | 2657 | if (predicated[i]) |
2658 | { | |
2659 | ssa_op_iter i; | |
2660 | tree op; | |
2661 | FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF) | |
2662 | reset_flow_sensitive_info (op); | |
2663 | } | |
2664 | } | |
07c03fb0 | 2665 | |
2666 | /* Update stmt list. */ | |
75a70cf9 | 2667 | last = gsi_last_bb (merge_target_bb); |
ebd01afe | 2668 | gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT); |
75a70cf9 | 2669 | set_bb_seq (bb, NULL); |
07c03fb0 | 2670 | |
88e6f696 | 2671 | delete_basic_block (bb); |
07c03fb0 | 2672 | } |
e9437a8f | 2673 | |
b01e3c9b | 2674 | /* If possible, merge loop header to the block with the exit edge. |
2675 | This reduces the number of basic blocks to two, to please the | |
b519e9db | 2676 | vectorizer that handles only loops with two nodes. */ |
c077abad | 2677 | if (exit_bb |
ebd01afe | 2678 | && exit_bb != loop->header) |
2679 | { | |
2680 | /* We release virtual PHIs late because we have to propagate them | |
2681 | out using the current VUSE. The def might be the one used | |
2682 | after the loop. */ | |
2683 | vphi = get_virtual_phi (exit_bb); | |
2684 | if (vphi) | |
2685 | { | |
2686 | imm_use_iterator iter; | |
2687 | use_operand_p use_p; | |
2688 | gimple *use_stmt; | |
2689 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) | |
2690 | { | |
2691 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
2692 | SET_USE (use_p, last_vdef); | |
2693 | } | |
2694 | gimple_stmt_iterator gsi = gsi_for_stmt (vphi); | |
2695 | remove_phi_node (&gsi, true); | |
2696 | } | |
2697 | ||
2698 | if (can_merge_blocks_p (loop->header, exit_bb)) | |
2699 | merge_blocks (loop->header, exit_bb); | |
2700 | } | |
39760a87 | 2701 | |
2702 | free (ifc_bbs); | |
2703 | ifc_bbs = NULL; | |
eda71dfb | 2704 | free (predicated); |
07c03fb0 | 2705 | } |
2706 | ||
207b8288 | 2707 | /* Version LOOP before if-converting it; the original loop |
2708 | will be if-converted, the new copy of the loop will not, | |
c71d3c24 | 2709 | and the LOOP_VECTORIZED internal call will be guarding which |
2710 | loop to execute. The vectorizer pass will fold this | |
b6863ffa | 2711 | internal call into either true or false. |
2712 | ||
2713 | Note that this function intentionally invalidates profile. Both edges | |
2714 | out of LOOP_VECTORIZED must have 100% probability so the profile remains | |
2715 | consistent after the condition is folded in the vectorizer. */ | |
07c03fb0 | 2716 | |
ccd0a9f9 | 2717 | static struct loop * |
c71d3c24 | 2718 | version_loop_for_if_conversion (struct loop *loop) |
2719 | { | |
2720 | basic_block cond_bb; | |
f9e245b2 | 2721 | tree cond = make_ssa_name (boolean_type_node); |
c71d3c24 | 2722 | struct loop *new_loop; |
42acab1c | 2723 | gimple *g; |
c71d3c24 | 2724 | gimple_stmt_iterator gsi; |
9ee85230 | 2725 | unsigned int save_length; |
c71d3c24 | 2726 | |
2727 | g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2, | |
2728 | build_int_cst (integer_type_node, loop->num), | |
2729 | integer_zero_node); | |
2730 | gimple_call_set_lhs (g, cond); | |
2731 | ||
c3deca25 | 2732 | /* Save BB->aux around loop_version as that uses the same field. */ |
9ee85230 | 2733 | save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; |
2734 | void **saved_preds = XALLOCAVEC (void *, save_length); | |
2735 | for (unsigned i = 0; i < save_length; i++) | |
c3deca25 | 2736 | saved_preds[i] = ifc_bbs[i]->aux; |
2737 | ||
c71d3c24 | 2738 | initialize_original_copy_tables (); |
b6863ffa | 2739 | /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED |
2740 | is re-merged in the vectorizer. */ | |
c71d3c24 | 2741 | new_loop = loop_version (loop, cond, &cond_bb, |
720cfc43 | 2742 | profile_probability::always (), |
2743 | profile_probability::always (), | |
ca69b069 | 2744 | profile_probability::always (), |
2745 | profile_probability::always (), true); | |
c71d3c24 | 2746 | free_original_copy_tables (); |
c3deca25 | 2747 | |
9ee85230 | 2748 | for (unsigned i = 0; i < save_length; i++) |
c3deca25 | 2749 | ifc_bbs[i]->aux = saved_preds[i]; |
2750 | ||
c71d3c24 | 2751 | if (new_loop == NULL) |
ccd0a9f9 | 2752 | return NULL; |
c3deca25 | 2753 | |
c71d3c24 | 2754 | new_loop->dont_vectorize = true; |
4c73695b | 2755 | new_loop->force_vectorize = false; |
c71d3c24 | 2756 | gsi = gsi_last_bb (cond_bb); |
2757 | gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num)); | |
2758 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
5cc7d4f7 | 2759 | update_ssa (TODO_update_ssa); |
ccd0a9f9 | 2760 | return new_loop; |
c71d3c24 | 2761 | } |
2762 | ||
9ee85230 | 2763 | /* Return true when LOOP satisfies the follow conditions that will |
2764 | allow it to be recognized by the vectorizer for outer-loop | |
2765 | vectorization: | |
2766 | - The loop is not the root node of the loop tree. | |
2767 | - The loop has exactly one inner loop. | |
2768 | - The loop has a single exit. | |
2769 | - The loop header has a single successor, which is the inner | |
2770 | loop header. | |
da269671 | 2771 | - Each of the inner and outer loop latches have a single |
2772 | predecessor. | |
9ee85230 | 2773 | - The loop exit block has a single predecessor, which is the |
2774 | inner loop's exit block. */ | |
2775 | ||
2776 | static bool | |
2777 | versionable_outer_loop_p (struct loop *loop) | |
2778 | { | |
2779 | if (!loop_outer (loop) | |
ccd0a9f9 | 2780 | || loop->dont_vectorize |
9ee85230 | 2781 | || !loop->inner |
2782 | || loop->inner->next | |
2783 | || !single_exit (loop) | |
2784 | || !single_succ_p (loop->header) | |
da269671 | 2785 | || single_succ (loop->header) != loop->inner->header |
2786 | || !single_pred_p (loop->latch) | |
2787 | || !single_pred_p (loop->inner->latch)) | |
9ee85230 | 2788 | return false; |
ccd0a9f9 | 2789 | |
9ee85230 | 2790 | basic_block outer_exit = single_pred (loop->latch); |
2791 | basic_block inner_exit = single_pred (loop->inner->latch); | |
2792 | ||
2793 | if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit) | |
2794 | return false; | |
2795 | ||
2796 | if (dump_file) | |
2797 | fprintf (dump_file, "Found vectorizable outer loop for versioning\n"); | |
2798 | ||
2799 | return true; | |
2800 | } | |
2801 | ||
9ab8df54 | 2802 | /* Performs splitting of critical edges. Skip splitting and return false |
2803 | if LOOP will not be converted because: | |
2804 | ||
2805 | - LOOP is not well formed. | |
2806 | - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments. | |
2807 | ||
2808 | Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */ | |
040b4eee | 2809 | |
2810 | static bool | |
9ab8df54 | 2811 | ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv) |
040b4eee | 2812 | { |
2813 | basic_block *body; | |
2814 | basic_block bb; | |
2815 | unsigned int num = loop->num_nodes; | |
2816 | unsigned int i; | |
42acab1c | 2817 | gimple *stmt; |
040b4eee | 2818 | edge e; |
2819 | edge_iterator ei; | |
cf416779 | 2820 | auto_vec<edge> critical_edges; |
040b4eee | 2821 | |
9ab8df54 | 2822 | /* Loop is not well formed. */ |
2823 | if (num <= 2 || loop->inner || !single_exit (loop)) | |
040b4eee | 2824 | return false; |
2825 | ||
2826 | body = get_loop_body (loop); | |
2827 | for (i = 0; i < num; i++) | |
2828 | { | |
2829 | bb = body[i]; | |
9ab8df54 | 2830 | if (!aggressive_if_conv |
2831 | && phi_nodes (bb) | |
2832 | && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM) | |
2833 | { | |
2834 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2835 | fprintf (dump_file, | |
2836 | "BB %d has complicated PHI with more than %u args.\n", | |
2837 | bb->index, MAX_PHI_ARG_NUM); | |
2838 | ||
2839 | free (body); | |
9ab8df54 | 2840 | return false; |
2841 | } | |
2842 | if (bb == loop->latch || bb_with_exit_edge_p (loop, bb)) | |
040b4eee | 2843 | continue; |
9ab8df54 | 2844 | |
040b4eee | 2845 | stmt = last_stmt (bb); |
2846 | /* Skip basic blocks not ending with conditional branch. */ | |
9ab8df54 | 2847 | if (!stmt || gimple_code (stmt) != GIMPLE_COND) |
040b4eee | 2848 | continue; |
9ab8df54 | 2849 | |
040b4eee | 2850 | FOR_EACH_EDGE (e, ei, bb->succs) |
2851 | if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop) | |
9ab8df54 | 2852 | critical_edges.safe_push (e); |
040b4eee | 2853 | } |
2854 | free (body); | |
9ab8df54 | 2855 | |
2856 | while (critical_edges.length () > 0) | |
2857 | { | |
2858 | e = critical_edges.pop (); | |
2859 | /* Don't split if bb can be predicated along non-critical edge. */ | |
2860 | if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest)) | |
2861 | split_edge (e); | |
2862 | } | |
2863 | ||
040b4eee | 2864 | return true; |
2865 | } | |
2866 | ||
040b4eee | 2867 | /* Delete redundant statements produced by predication which prevents |
2868 | loop vectorization. */ | |
2869 | ||
2870 | static void | |
2871 | ifcvt_local_dce (basic_block bb) | |
2872 | { | |
42acab1c | 2873 | gimple *stmt; |
2874 | gimple *stmt1; | |
2875 | gimple *phi; | |
040b4eee | 2876 | gimple_stmt_iterator gsi; |
6e0cf981 | 2877 | auto_vec<gimple *> worklist; |
040b4eee | 2878 | enum gimple_code code; |
2879 | use_operand_p use_p; | |
2880 | imm_use_iterator imm_iter; | |
03821886 | 2881 | std::pair <tree, tree> *name_pair; |
2882 | unsigned int i; | |
2883 | ||
2884 | FOR_EACH_VEC_ELT (redundant_ssa_names, i, name_pair) | |
2885 | replace_uses_by (name_pair->first, name_pair->second); | |
2886 | redundant_ssa_names.release (); | |
040b4eee | 2887 | |
2888 | worklist.create (64); | |
2889 | /* Consider all phi as live statements. */ | |
2890 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2891 | { | |
2892 | phi = gsi_stmt (gsi); | |
2893 | gimple_set_plf (phi, GF_PLF_2, true); | |
2894 | worklist.safe_push (phi); | |
2895 | } | |
207b8288 | 2896 | /* Consider load/store statements, CALL and COND as live. */ |
040b4eee | 2897 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2898 | { | |
2899 | stmt = gsi_stmt (gsi); | |
2900 | if (gimple_store_p (stmt) | |
2901 | || gimple_assign_load_p (stmt) | |
2902 | || is_gimple_debug (stmt)) | |
2903 | { | |
2904 | gimple_set_plf (stmt, GF_PLF_2, true); | |
2905 | worklist.safe_push (stmt); | |
2906 | continue; | |
2907 | } | |
2908 | code = gimple_code (stmt); | |
2909 | if (code == GIMPLE_COND || code == GIMPLE_CALL) | |
2910 | { | |
2911 | gimple_set_plf (stmt, GF_PLF_2, true); | |
2912 | worklist.safe_push (stmt); | |
2913 | continue; | |
2914 | } | |
2915 | gimple_set_plf (stmt, GF_PLF_2, false); | |
2916 | ||
2917 | if (code == GIMPLE_ASSIGN) | |
2918 | { | |
2919 | tree lhs = gimple_assign_lhs (stmt); | |
2920 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | |
2921 | { | |
2922 | stmt1 = USE_STMT (use_p); | |
2923 | if (gimple_bb (stmt1) != bb) | |
2924 | { | |
2925 | gimple_set_plf (stmt, GF_PLF_2, true); | |
2926 | worklist.safe_push (stmt); | |
2927 | break; | |
2928 | } | |
2929 | } | |
2930 | } | |
2931 | } | |
2932 | /* Propagate liveness through arguments of live stmt. */ | |
2933 | while (worklist.length () > 0) | |
2934 | { | |
2935 | ssa_op_iter iter; | |
2936 | use_operand_p use_p; | |
2937 | tree use; | |
2938 | ||
2939 | stmt = worklist.pop (); | |
2940 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) | |
2941 | { | |
2942 | use = USE_FROM_PTR (use_p); | |
2943 | if (TREE_CODE (use) != SSA_NAME) | |
2944 | continue; | |
2945 | stmt1 = SSA_NAME_DEF_STMT (use); | |
2946 | if (gimple_bb (stmt1) != bb | |
2947 | || gimple_plf (stmt1, GF_PLF_2)) | |
2948 | continue; | |
2949 | gimple_set_plf (stmt1, GF_PLF_2, true); | |
2950 | worklist.safe_push (stmt1); | |
2951 | } | |
2952 | } | |
2953 | /* Delete dead statements. */ | |
2954 | gsi = gsi_start_bb (bb); | |
2955 | while (!gsi_end_p (gsi)) | |
2956 | { | |
2957 | stmt = gsi_stmt (gsi); | |
2958 | if (gimple_plf (stmt, GF_PLF_2)) | |
2959 | { | |
2960 | gsi_next (&gsi); | |
2961 | continue; | |
2962 | } | |
2963 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2964 | { | |
2965 | fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index); | |
2966 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); | |
2967 | } | |
2968 | gsi_remove (&gsi, true); | |
2969 | release_defs (stmt); | |
2970 | } | |
2971 | } | |
2972 | ||
c71d3c24 | 2973 | /* If-convert LOOP when it is legal. For the moment this pass has no |
2974 | profitability analysis. Returns non-zero todo flags when something | |
2975 | changed. */ | |
2976 | ||
5b631e09 | 2977 | unsigned int |
5ead86d3 | 2978 | tree_if_conversion (struct loop *loop) |
07c03fb0 | 2979 | { |
c71d3c24 | 2980 | unsigned int todo = 0; |
9ab8df54 | 2981 | bool aggressive_if_conv; |
ccd0a9f9 | 2982 | struct loop *rloop; |
67763c7f | 2983 | bitmap exit_bbs; |
9ab8df54 | 2984 | |
ccd0a9f9 | 2985 | again: |
2986 | rloop = NULL; | |
1055d96a | 2987 | ifc_bbs = NULL; |
03821886 | 2988 | need_to_predicate = false; |
9ab8df54 | 2989 | any_complicated_phi = false; |
07c03fb0 | 2990 | |
9ab8df54 | 2991 | /* Apply more aggressive if-conversion when loop or its outer loop were |
2992 | marked with simd pragma. When that's the case, we try to if-convert | |
2993 | loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */ | |
040b4eee | 2994 | aggressive_if_conv = loop->force_vectorize; |
040b4eee | 2995 | if (!aggressive_if_conv) |
2996 | { | |
2997 | struct loop *outer_loop = loop_outer (loop); | |
2998 | if (outer_loop && outer_loop->force_vectorize) | |
2999 | aggressive_if_conv = true; | |
3000 | } | |
3001 | ||
9ab8df54 | 3002 | if (!ifcvt_split_critical_edges (loop, aggressive_if_conv)) |
3003 | goto cleanup; | |
040b4eee | 3004 | |
db5c1c9a | 3005 | if (!if_convertible_loop_p (loop) |
4bd0f929 | 3006 | || !dbg_cnt (if_conversion_tree)) |
60e0f7c4 | 3007 | goto cleanup; |
a1660ced | 3008 | |
03821886 | 3009 | if ((need_to_predicate || any_complicated_phi) |
4c73695b | 3010 | && ((!flag_tree_loop_vectorize && !loop->force_vectorize) |
c71d3c24 | 3011 | || loop->dont_vectorize)) |
3012 | goto cleanup; | |
3013 | ||
b0c413f2 | 3014 | /* Since we have no cost model, always version loops unless the user |
1e04d935 | 3015 | specified -ftree-loop-if-convert or unless versioning is required. |
3016 | Either version this loop, or if the pattern is right for outer-loop | |
3017 | vectorization, version the outer loop. In the latter case we will | |
3018 | still if-convert the original inner loop. */ | |
03821886 | 3019 | if (need_to_predicate |
1e04d935 | 3020 | || any_complicated_phi |
3021 | || flag_tree_loop_if_convert != 1) | |
3022 | { | |
3023 | struct loop *vloop | |
3024 | = (versionable_outer_loop_p (loop_outer (loop)) | |
3025 | ? loop_outer (loop) : loop); | |
ccd0a9f9 | 3026 | struct loop *nloop = version_loop_for_if_conversion (vloop); |
3027 | if (nloop == NULL) | |
1e04d935 | 3028 | goto cleanup; |
ccd0a9f9 | 3029 | if (vloop != loop) |
3030 | { | |
3031 | /* If versionable_outer_loop_p decided to version the | |
3032 | outer loop, version also the inner loop of the non-vectorized | |
3033 | loop copy. So we transform: | |
3034 | loop1 | |
3035 | loop2 | |
3036 | into: | |
3037 | if (LOOP_VECTORIZED (1, 3)) | |
3038 | { | |
3039 | loop1 | |
3040 | loop2 | |
3041 | } | |
3042 | else | |
3043 | loop3 (copy of loop1) | |
3044 | if (LOOP_VECTORIZED (4, 5)) | |
3045 | loop4 (copy of loop2) | |
3046 | else | |
3047 | loop5 (copy of loop4) */ | |
3048 | gcc_assert (nloop->inner && nloop->inner->next == NULL); | |
3049 | rloop = nloop->inner; | |
3050 | } | |
1e04d935 | 3051 | } |
c71d3c24 | 3052 | |
60e0f7c4 | 3053 | /* Now all statements are if-convertible. Combine all the basic |
3054 | blocks into one huge basic block doing the if-conversion | |
3055 | on-the-fly. */ | |
db5c1c9a | 3056 | combine_blocks (loop); |
3b91ccd9 | 3057 | |
6172a9fd | 3058 | /* Delete dead predicate computations. */ |
9ab8df54 | 3059 | ifcvt_local_dce (loop->header); |
040b4eee | 3060 | |
67763c7f | 3061 | /* Perform local CSE, this esp. helps the vectorizer analysis if loads |
3062 | and stores are involved. | |
3063 | ??? We'll still keep dead stores though. */ | |
3064 | exit_bbs = BITMAP_ALLOC (NULL); | |
3065 | bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index); | |
3066 | todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); | |
3067 | BITMAP_FREE (exit_bbs); | |
3068 | ||
c71d3c24 | 3069 | todo |= TODO_cleanup_cfg; |
07c03fb0 | 3070 | |
60e0f7c4 | 3071 | cleanup: |
60e0f7c4 | 3072 | if (ifc_bbs) |
3073 | { | |
fa683b76 | 3074 | unsigned int i; |
3075 | ||
3076 | for (i = 0; i < loop->num_nodes; i++) | |
3077 | free_bb_predicate (ifc_bbs[i]); | |
3078 | ||
60e0f7c4 | 3079 | free (ifc_bbs); |
3080 | ifc_bbs = NULL; | |
3081 | } | |
ccd0a9f9 | 3082 | if (rloop != NULL) |
3083 | { | |
3084 | loop = rloop; | |
3085 | goto again; | |
3086 | } | |
b519e9db | 3087 | |
c71d3c24 | 3088 | return todo; |
07c03fb0 | 3089 | } |
3090 | ||
3091 | /* Tree if-conversion pass management. */ | |
3092 | ||
7620bc82 | 3093 | namespace { |
3094 | ||
3095 | const pass_data pass_data_if_conversion = | |
07c03fb0 | 3096 | { |
cbe8bda8 | 3097 | GIMPLE_PASS, /* type */ |
3098 | "ifcvt", /* name */ | |
3099 | OPTGROUP_NONE, /* optinfo_flags */ | |
ecec21ec | 3100 | TV_TREE_LOOP_IFCVT, /* tv_id */ |
cbe8bda8 | 3101 | ( PROP_cfg | PROP_ssa ), /* properties_required */ |
3102 | 0, /* properties_provided */ | |
3103 | 0, /* properties_destroyed */ | |
3104 | 0, /* todo_flags_start */ | |
8b88439e | 3105 | 0, /* todo_flags_finish */ |
07c03fb0 | 3106 | }; |
cbe8bda8 | 3107 | |
7620bc82 | 3108 | class pass_if_conversion : public gimple_opt_pass |
cbe8bda8 | 3109 | { |
3110 | public: | |
9af5ce0c | 3111 | pass_if_conversion (gcc::context *ctxt) |
3112 | : gimple_opt_pass (pass_data_if_conversion, ctxt) | |
cbe8bda8 | 3113 | {} |
3114 | ||
3115 | /* opt_pass methods: */ | |
31315c24 | 3116 | virtual bool gate (function *); |
65b0537f | 3117 | virtual unsigned int execute (function *); |
cbe8bda8 | 3118 | |
3119 | }; // class pass_if_conversion | |
3120 | ||
31315c24 | 3121 | bool |
3122 | pass_if_conversion::gate (function *fun) | |
3123 | { | |
3124 | return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops) | |
3125 | && flag_tree_loop_if_convert != 0) | |
b6f4c9b5 | 3126 | || flag_tree_loop_if_convert == 1); |
31315c24 | 3127 | } |
3128 | ||
65b0537f | 3129 | unsigned int |
3130 | pass_if_conversion::execute (function *fun) | |
3131 | { | |
3132 | struct loop *loop; | |
3133 | unsigned todo = 0; | |
3134 | ||
3135 | if (number_of_loops (fun) <= 1) | |
3136 | return 0; | |
3137 | ||
3138 | FOR_EACH_LOOP (loop, 0) | |
3139 | if (flag_tree_loop_if_convert == 1 | |
65b0537f | 3140 | || ((flag_tree_loop_vectorize || loop->force_vectorize) |
3141 | && !loop->dont_vectorize)) | |
3142 | todo |= tree_if_conversion (loop); | |
3143 | ||
e8e52514 | 3144 | if (todo) |
3145 | { | |
3146 | free_numbers_of_iterations_estimates (fun); | |
3147 | scev_reset (); | |
3148 | } | |
3149 | ||
382ecba7 | 3150 | if (flag_checking) |
3151 | { | |
3152 | basic_block bb; | |
3153 | FOR_EACH_BB_FN (bb, fun) | |
3154 | gcc_assert (!bb->aux); | |
3155 | } | |
65b0537f | 3156 | |
3157 | return todo; | |
3158 | } | |
3159 | ||
7620bc82 | 3160 | } // anon namespace |
3161 | ||
cbe8bda8 | 3162 | gimple_opt_pass * |
3163 | make_pass_if_conversion (gcc::context *ctxt) | |
3164 | { | |
3165 | return new pass_if_conversion (ctxt); | |
3166 | } |