]>
Commit | Line | Data |
---|---|---|
07c03fb0 | 1 | /* If-conversion for vectorizer. |
fbd26352 | 2 | Copyright (C) 2004-2019 Free Software Foundation, Inc. |
07c03fb0 | 3 | Contributed by Devang Patel <dpatel@apple.com> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
07c03fb0 | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
07c03fb0 | 20 | |
b01e3c9b | 21 | /* This pass implements a tree level if-conversion of loops. Its |
22 | initial goal is to help the vectorizer to vectorize loops with | |
23 | conditions. | |
07c03fb0 | 24 | |
25 | A short description of if-conversion: | |
26 | ||
9b482bc6 | 27 | o Decide if a loop is if-convertible or not. |
07c03fb0 | 28 | o Walk all loop basic blocks in breadth first order (BFS order). |
29 | o Remove conditional statements (at the end of basic block) | |
35a40aad | 30 | and propagate condition into destination basic blocks' |
07c03fb0 | 31 | predicate list. |
32 | o Replace modify expression with conditional modify expression | |
33 | using current basic block's condition. | |
34 | o Merge all basic blocks | |
35 | o Replace phi nodes with conditional modify expr | |
36 | o Merge all basic blocks into header | |
37 | ||
38 | Sample transformation: | |
39 | ||
40 | INPUT | |
41 | ----- | |
42 | ||
43 | # i_23 = PHI <0(0), i_18(10)>; | |
44 | <L0>:; | |
45 | j_15 = A[i_23]; | |
46 | if (j_15 > 41) goto <L1>; else goto <L17>; | |
47 | ||
48 | <L17>:; | |
49 | goto <bb 3> (<L3>); | |
50 | ||
51 | <L1>:; | |
52 | ||
53 | # iftmp.2_4 = PHI <0(8), 42(2)>; | |
54 | <L3>:; | |
55 | A[i_23] = iftmp.2_4; | |
56 | i_18 = i_23 + 1; | |
57 | if (i_18 <= 15) goto <L19>; else goto <L18>; | |
58 | ||
59 | <L19>:; | |
60 | goto <bb 1> (<L0>); | |
61 | ||
62 | <L18>:; | |
63 | ||
64 | OUTPUT | |
65 | ------ | |
66 | ||
67 | # i_23 = PHI <0(0), i_18(10)>; | |
68 | <L0>:; | |
69 | j_15 = A[i_23]; | |
35a40aad | 70 | |
07c03fb0 | 71 | <L3>:; |
72 | iftmp.2_4 = j_15 > 41 ? 42 : 0; | |
73 | A[i_23] = iftmp.2_4; | |
74 | i_18 = i_23 + 1; | |
75 | if (i_18 <= 15) goto <L19>; else goto <L18>; | |
35a40aad | 76 | |
07c03fb0 | 77 | <L19>:; |
78 | goto <bb 1> (<L0>); | |
79 | ||
80 | <L18>:; | |
81 | */ | |
82 | ||
83 | #include "config.h" | |
84 | #include "system.h" | |
85 | #include "coretypes.h" | |
9ef16211 | 86 | #include "backend.h" |
7c29e30e | 87 | #include "rtl.h" |
07c03fb0 | 88 | #include "tree.h" |
9ef16211 | 89 | #include "gimple.h" |
7c29e30e | 90 | #include "cfghooks.h" |
91 | #include "tree-pass.h" | |
9ef16211 | 92 | #include "ssa.h" |
7c29e30e | 93 | #include "expmed.h" |
94 | #include "optabs-query.h" | |
7c29e30e | 95 | #include "gimple-pretty-print.h" |
9ef16211 | 96 | #include "alias.h" |
b20a8bb4 | 97 | #include "fold-const.h" |
9ed99284 | 98 | #include "stor-layout.h" |
bc61cadb | 99 | #include "gimple-fold.h" |
a8783bee | 100 | #include "gimplify.h" |
dcf1a1ec | 101 | #include "gimple-iterator.h" |
e795d6e1 | 102 | #include "gimplify-me.h" |
073c1fd5 | 103 | #include "tree-cfg.h" |
073c1fd5 | 104 | #include "tree-into-ssa.h" |
69ee5dbb | 105 | #include "tree-ssa.h" |
07c03fb0 | 106 | #include "cfgloop.h" |
07c03fb0 | 107 | #include "tree-data-ref.h" |
108 | #include "tree-scalar-evolution.h" | |
189d0706 | 109 | #include "tree-ssa-loop.h" |
110 | #include "tree-ssa-loop-niter.h" | |
c71d3c24 | 111 | #include "tree-ssa-loop-ivopts.h" |
112 | #include "tree-ssa-address.h" | |
4bd0f929 | 113 | #include "dbgcnt.h" |
f4ff098a | 114 | #include "tree-hash-traits.h" |
ee0e4f73 | 115 | #include "varasm.h" |
d73c1238 | 116 | #include "builtins.h" |
d2276060 | 117 | #include "params.h" |
2e063ded | 118 | #include "cfganal.h" |
03821886 | 119 | #include "internal-fn.h" |
120 | #include "fold-const.h" | |
67763c7f | 121 | #include "tree-ssa-sccvn.h" |
40d78a35 | 122 | #include "tree-cfgcleanup.h" |
9ab8df54 | 123 | |
124 | /* Only handle PHIs with no more arguments unless we are asked to by | |
125 | simd pragma. */ | |
126 | #define MAX_PHI_ARG_NUM \ | |
127 | ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS)) | |
128 | ||
03821886 | 129 | /* True if we've converted a statement that was only executed when some |
130 | condition C was true, and if for correctness we need to predicate the | |
131 | statement to ensure that it is a no-op when C is false. See | |
132 | predicate_statements for the kinds of predication we support. */ | |
133 | static bool need_to_predicate; | |
07c03fb0 | 134 | |
9ab8df54 | 135 | /* Indicate if there are any complicated PHIs that need to be handled in |
136 | if-conversion. Complicated PHI has more than two arguments and can't | |
137 | be degenerated to two arguments PHI. See more information in comment | |
138 | before phi_convertible_by_degenerating_args. */ | |
139 | static bool any_complicated_phi; | |
140 | ||
bd6f374c | 141 | /* Hash for struct innermost_loop_behavior. It depends on the user to |
142 | free the memory. */ | |
143 | ||
144 | struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior> | |
145 | { | |
146 | static inline hashval_t hash (const value_type &); | |
147 | static inline bool equal (const value_type &, | |
148 | const compare_type &); | |
149 | }; | |
150 | ||
151 | inline hashval_t | |
152 | innermost_loop_behavior_hash::hash (const value_type &e) | |
153 | { | |
154 | hashval_t hash; | |
155 | ||
156 | hash = iterative_hash_expr (e->base_address, 0); | |
157 | hash = iterative_hash_expr (e->offset, hash); | |
158 | hash = iterative_hash_expr (e->init, hash); | |
159 | return iterative_hash_expr (e->step, hash); | |
160 | } | |
161 | ||
162 | inline bool | |
163 | innermost_loop_behavior_hash::equal (const value_type &e1, | |
164 | const compare_type &e2) | |
165 | { | |
166 | if ((e1->base_address && !e2->base_address) | |
167 | || (!e1->base_address && e2->base_address) | |
168 | || (!e1->offset && e2->offset) | |
169 | || (e1->offset && !e2->offset) | |
170 | || (!e1->init && e2->init) | |
171 | || (e1->init && !e2->init) | |
172 | || (!e1->step && e2->step) | |
173 | || (e1->step && !e2->step)) | |
174 | return false; | |
175 | ||
176 | if (e1->base_address && e2->base_address | |
177 | && !operand_equal_p (e1->base_address, e2->base_address, 0)) | |
178 | return false; | |
179 | if (e1->offset && e2->offset | |
180 | && !operand_equal_p (e1->offset, e2->offset, 0)) | |
181 | return false; | |
182 | if (e1->init && e2->init | |
183 | && !operand_equal_p (e1->init, e2->init, 0)) | |
184 | return false; | |
185 | if (e1->step && e2->step | |
186 | && !operand_equal_p (e1->step, e2->step, 0)) | |
187 | return false; | |
188 | ||
189 | return true; | |
190 | } | |
191 | ||
07c03fb0 | 192 | /* List of basic blocks in if-conversion-suitable order. */ |
193 | static basic_block *ifc_bbs; | |
194 | ||
bd6f374c | 195 | /* Hash table to store <DR's innermost loop behavior, DR> pairs. */ |
196 | static hash_map<innermost_loop_behavior_hash, | |
197 | data_reference_p> *innermost_DR_map; | |
ee0e4f73 | 198 | |
bd6f374c | 199 | /* Hash table to store <base reference, DR> pairs. */ |
ee0e4f73 | 200 | static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map; |
201 | ||
03821886 | 202 | /* List of redundant SSA names: the first should be replaced by the second. */ |
203 | static vec< std::pair<tree, tree> > redundant_ssa_names; | |
204 | ||
fa683b76 | 205 | /* Structure used to predicate basic blocks. This is attached to the |
206 | ->aux field of the BBs in the loop to be if-converted. */ | |
04009ada | 207 | struct bb_predicate { |
fa683b76 | 208 | |
209 | /* The condition under which this basic block is executed. */ | |
210 | tree predicate; | |
211 | ||
212 | /* PREDICATE is gimplified, and the sequence of statements is | |
213 | recorded here, in order to avoid the duplication of computations | |
214 | that occur in previous conditions. See PR44483. */ | |
215 | gimple_seq predicate_gimplified_stmts; | |
04009ada | 216 | }; |
fa683b76 | 217 | |
218 | /* Returns true when the basic block BB has a predicate. */ | |
219 | ||
220 | static inline bool | |
221 | bb_has_predicate (basic_block bb) | |
222 | { | |
223 | return bb->aux != NULL; | |
224 | } | |
225 | ||
226 | /* Returns the gimplified predicate for basic block BB. */ | |
227 | ||
228 | static inline tree | |
229 | bb_predicate (basic_block bb) | |
230 | { | |
04009ada | 231 | return ((struct bb_predicate *) bb->aux)->predicate; |
fa683b76 | 232 | } |
233 | ||
234 | /* Sets the gimplified predicate COND for basic block BB. */ | |
235 | ||
236 | static inline void | |
237 | set_bb_predicate (basic_block bb, tree cond) | |
238 | { | |
56db02b2 | 239 | gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR |
240 | && is_gimple_condexpr (TREE_OPERAND (cond, 0))) | |
241 | || is_gimple_condexpr (cond)); | |
04009ada | 242 | ((struct bb_predicate *) bb->aux)->predicate = cond; |
fa683b76 | 243 | } |
244 | ||
245 | /* Returns the sequence of statements of the gimplification of the | |
246 | predicate for basic block BB. */ | |
247 | ||
248 | static inline gimple_seq | |
249 | bb_predicate_gimplified_stmts (basic_block bb) | |
250 | { | |
04009ada | 251 | return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts; |
fa683b76 | 252 | } |
253 | ||
254 | /* Sets the sequence of statements STMTS of the gimplification of the | |
255 | predicate for basic block BB. */ | |
256 | ||
257 | static inline void | |
258 | set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) | |
259 | { | |
04009ada | 260 | ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts; |
fa683b76 | 261 | } |
262 | ||
263 | /* Adds the sequence of statements STMTS to the sequence of statements | |
264 | of the predicate for basic block BB. */ | |
265 | ||
266 | static inline void | |
267 | add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) | |
268 | { | |
5cc7d4f7 | 269 | /* We might have updated some stmts in STMTS via force_gimple_operand |
270 | calling fold_stmt and that producing multiple stmts. Delink immediate | |
271 | uses so update_ssa after loop versioning doesn't get confused for | |
272 | the not yet inserted predicates. | |
273 | ??? This should go away once we reliably avoid updating stmts | |
274 | not in any BB. */ | |
275 | for (gimple_stmt_iterator gsi = gsi_start (stmts); | |
276 | !gsi_end_p (gsi); gsi_next (&gsi)) | |
277 | { | |
278 | gimple *stmt = gsi_stmt (gsi); | |
279 | delink_stmt_imm_use (stmt); | |
280 | gimple_set_modified (stmt, true); | |
281 | } | |
c3deca25 | 282 | gimple_seq_add_seq_without_update |
04009ada | 283 | (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts); |
fa683b76 | 284 | } |
285 | ||
286 | /* Initializes to TRUE the predicate of basic block BB. */ | |
287 | ||
288 | static inline void | |
289 | init_bb_predicate (basic_block bb) | |
290 | { | |
04009ada | 291 | bb->aux = XNEW (struct bb_predicate); |
fa683b76 | 292 | set_bb_predicate_gimplified_stmts (bb, NULL); |
48c9b1fe | 293 | set_bb_predicate (bb, boolean_true_node); |
fa683b76 | 294 | } |
295 | ||
d2bbbc2d | 296 | /* Release the SSA_NAMEs associated with the predicate of basic block BB. */ |
fa683b76 | 297 | |
298 | static inline void | |
c71d3c24 | 299 | release_bb_predicate (basic_block bb) |
fa683b76 | 300 | { |
c71d3c24 | 301 | gimple_seq stmts = bb_predicate_gimplified_stmts (bb); |
fa683b76 | 302 | if (stmts) |
303 | { | |
d2bbbc2d | 304 | /* Ensure that these stmts haven't yet been added to a bb. */ |
c3deca25 | 305 | if (flag_checking) |
306 | for (gimple_stmt_iterator i = gsi_start (stmts); | |
307 | !gsi_end_p (i); gsi_next (&i)) | |
d2bbbc2d | 308 | gcc_assert (! gimple_bb (gsi_stmt (i))); |
fa683b76 | 309 | |
d2bbbc2d | 310 | /* Discard them. */ |
311 | gimple_seq_discard (stmts); | |
c71d3c24 | 312 | set_bb_predicate_gimplified_stmts (bb, NULL); |
fa683b76 | 313 | } |
c71d3c24 | 314 | } |
315 | ||
316 | /* Free the predicate of basic block BB. */ | |
fa683b76 | 317 | |
c71d3c24 | 318 | static inline void |
319 | free_bb_predicate (basic_block bb) | |
320 | { | |
321 | if (!bb_has_predicate (bb)) | |
322 | return; | |
323 | ||
324 | release_bb_predicate (bb); | |
fa683b76 | 325 | free (bb->aux); |
326 | bb->aux = NULL; | |
327 | } | |
328 | ||
c71d3c24 | 329 | /* Reinitialize predicate of BB with the true predicate. */ |
48c9b1fe | 330 | |
331 | static inline void | |
332 | reset_bb_predicate (basic_block bb) | |
333 | { | |
c71d3c24 | 334 | if (!bb_has_predicate (bb)) |
335 | init_bb_predicate (bb); | |
336 | else | |
337 | { | |
338 | release_bb_predicate (bb); | |
339 | set_bb_predicate (bb, boolean_true_node); | |
340 | } | |
48c9b1fe | 341 | } |
342 | ||
3b91ccd9 | 343 | /* Returns a new SSA_NAME of type TYPE that is assigned the value of |
344 | the expression EXPR. Inserts the statement created for this | |
345 | computation before GSI and leaves the iterator GSI at the same | |
346 | statement. */ | |
07c03fb0 | 347 | |
3b91ccd9 | 348 | static tree |
349 | ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi) | |
07c03fb0 | 350 | { |
03d37e4e | 351 | tree new_name = make_temp_ssa_name (type, NULL, "_ifc_"); |
42acab1c | 352 | gimple *stmt = gimple_build_assign (new_name, expr); |
ebd01afe | 353 | gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi))); |
3b91ccd9 | 354 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
03d37e4e | 355 | return new_name; |
1055d96a | 356 | } |
357 | ||
97efb92e | 358 | /* Return true when COND is a false predicate. */ |
359 | ||
360 | static inline bool | |
361 | is_false_predicate (tree cond) | |
362 | { | |
a8876617 | 363 | return (cond != NULL_TREE |
364 | && (cond == boolean_false_node | |
365 | || integer_zerop (cond))); | |
97efb92e | 366 | } |
367 | ||
16d6ea49 | 368 | /* Return true when COND is a true predicate. */ |
369 | ||
370 | static inline bool | |
371 | is_true_predicate (tree cond) | |
372 | { | |
373 | return (cond == NULL_TREE | |
374 | || cond == boolean_true_node | |
375 | || integer_onep (cond)); | |
376 | } | |
377 | ||
378 | /* Returns true when BB has a predicate that is not trivial: true or | |
379 | NULL_TREE. */ | |
380 | ||
381 | static inline bool | |
382 | is_predicated (basic_block bb) | |
383 | { | |
fa683b76 | 384 | return !is_true_predicate (bb_predicate (bb)); |
16d6ea49 | 385 | } |
386 | ||
74a43fe9 | 387 | /* Parses the predicate COND and returns its comparison code and |
388 | operands OP0 and OP1. */ | |
389 | ||
390 | static enum tree_code | |
391 | parse_predicate (tree cond, tree *op0, tree *op1) | |
392 | { | |
42acab1c | 393 | gimple *s; |
74a43fe9 | 394 | |
395 | if (TREE_CODE (cond) == SSA_NAME | |
396 | && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond))) | |
397 | { | |
398 | if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison) | |
399 | { | |
400 | *op0 = gimple_assign_rhs1 (s); | |
401 | *op1 = gimple_assign_rhs2 (s); | |
402 | return gimple_assign_rhs_code (s); | |
403 | } | |
404 | ||
405 | else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR) | |
406 | { | |
407 | tree op = gimple_assign_rhs1 (s); | |
408 | tree type = TREE_TYPE (op); | |
409 | enum tree_code code = parse_predicate (op, op0, op1); | |
410 | ||
411 | return code == ERROR_MARK ? ERROR_MARK | |
93633022 | 412 | : invert_tree_comparison (code, HONOR_NANS (type)); |
74a43fe9 | 413 | } |
414 | ||
415 | return ERROR_MARK; | |
416 | } | |
417 | ||
21c8a0ab | 418 | if (COMPARISON_CLASS_P (cond)) |
74a43fe9 | 419 | { |
420 | *op0 = TREE_OPERAND (cond, 0); | |
421 | *op1 = TREE_OPERAND (cond, 1); | |
422 | return TREE_CODE (cond); | |
423 | } | |
424 | ||
425 | return ERROR_MARK; | |
426 | } | |
427 | ||
4be7307c | 428 | /* Returns the fold of predicate C1 OR C2 at location LOC. */ |
429 | ||
430 | static tree | |
431 | fold_or_predicates (location_t loc, tree c1, tree c2) | |
432 | { | |
433 | tree op1a, op1b, op2a, op2b; | |
434 | enum tree_code code1 = parse_predicate (c1, &op1a, &op1b); | |
435 | enum tree_code code2 = parse_predicate (c2, &op2a, &op2b); | |
436 | ||
437 | if (code1 != ERROR_MARK && code2 != ERROR_MARK) | |
438 | { | |
439 | tree t = maybe_fold_or_comparisons (code1, op1a, op1b, | |
440 | code2, op2a, op2b); | |
441 | if (t) | |
442 | return t; | |
443 | } | |
444 | ||
445 | return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2); | |
446 | } | |
447 | ||
ed0124a2 | 448 | /* Returns either a COND_EXPR or the folded expression if the folded |
449 | expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR, | |
450 | a constant or a SSA_NAME. */ | |
451 | ||
452 | static tree | |
453 | fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs) | |
454 | { | |
455 | tree rhs1, lhs1, cond_expr; | |
040b4eee | 456 | |
457 | /* If COND is comparison r != 0 and r has boolean type, convert COND | |
458 | to SSA_NAME to accept by vect bool pattern. */ | |
459 | if (TREE_CODE (cond) == NE_EXPR) | |
460 | { | |
461 | tree op0 = TREE_OPERAND (cond, 0); | |
462 | tree op1 = TREE_OPERAND (cond, 1); | |
463 | if (TREE_CODE (op0) == SSA_NAME | |
464 | && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE | |
465 | && (integer_zerop (op1))) | |
466 | cond = op0; | |
467 | } | |
00a2230f | 468 | cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs); |
ed0124a2 | 469 | |
470 | if (cond_expr == NULL_TREE) | |
471 | return build3 (COND_EXPR, type, cond, rhs, lhs); | |
472 | ||
473 | STRIP_USELESS_TYPE_CONVERSION (cond_expr); | |
474 | ||
00a2230f | 475 | if (is_gimple_val (cond_expr)) |
ed0124a2 | 476 | return cond_expr; |
477 | ||
478 | if (TREE_CODE (cond_expr) == ABS_EXPR) | |
479 | { | |
480 | rhs1 = TREE_OPERAND (cond_expr, 1); | |
481 | STRIP_USELESS_TYPE_CONVERSION (rhs1); | |
00a2230f | 482 | if (is_gimple_val (rhs1)) |
ed0124a2 | 483 | return build1 (ABS_EXPR, type, rhs1); |
484 | } | |
485 | ||
486 | if (TREE_CODE (cond_expr) == MIN_EXPR | |
487 | || TREE_CODE (cond_expr) == MAX_EXPR) | |
488 | { | |
489 | lhs1 = TREE_OPERAND (cond_expr, 0); | |
490 | STRIP_USELESS_TYPE_CONVERSION (lhs1); | |
491 | rhs1 = TREE_OPERAND (cond_expr, 1); | |
492 | STRIP_USELESS_TYPE_CONVERSION (rhs1); | |
00a2230f | 493 | if (is_gimple_val (rhs1) && is_gimple_val (lhs1)) |
ed0124a2 | 494 | return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1); |
495 | } | |
496 | return build3 (COND_EXPR, type, cond, rhs, lhs); | |
497 | } | |
498 | ||
c71d3c24 | 499 | /* Add condition NC to the predicate list of basic block BB. LOOP is |
ee080c0c | 500 | the loop to be if-converted. Use predicate of cd-equivalent block |
501 | for join bb if it exists: we call basic blocks bb1 and bb2 | |
502 | cd-equivalent if they are executed under the same condition. */ | |
1055d96a | 503 | |
16d6ea49 | 504 | static inline void |
2e966e2a | 505 | add_to_predicate_list (class loop *loop, basic_block bb, tree nc) |
1055d96a | 506 | { |
56db02b2 | 507 | tree bc, *tp; |
ee080c0c | 508 | basic_block dom_bb; |
74a43fe9 | 509 | |
510 | if (is_true_predicate (nc)) | |
511 | return; | |
512 | ||
ee080c0c | 513 | /* If dominance tells us this basic block is always executed, |
514 | don't record any predicates for it. */ | |
515 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
516 | return; | |
c71d3c24 | 517 | |
ee080c0c | 518 | dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); |
519 | /* We use notion of cd equivalence to get simpler predicate for | |
520 | join block, e.g. if join block has 2 predecessors with predicates | |
521 | p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of | |
522 | p1 & p2 | p1 & !p2. */ | |
523 | if (dom_bb != loop->header | |
524 | && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb) | |
525 | { | |
526 | gcc_assert (flow_bb_inside_loop_p (loop, dom_bb)); | |
527 | bc = bb_predicate (dom_bb); | |
3c7578f3 | 528 | if (!is_true_predicate (bc)) |
529 | set_bb_predicate (bb, bc); | |
530 | else | |
531 | gcc_assert (is_true_predicate (bb_predicate (bb))); | |
ee080c0c | 532 | if (dump_file && (dump_flags & TDF_DETAILS)) |
533 | fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n", | |
534 | dom_bb->index, bb->index); | |
535 | return; | |
c71d3c24 | 536 | } |
ee080c0c | 537 | |
538 | if (!is_predicated (bb)) | |
539 | bc = nc; | |
74a43fe9 | 540 | else |
541 | { | |
74a43fe9 | 542 | bc = bb_predicate (bb); |
4be7307c | 543 | bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc); |
56db02b2 | 544 | if (is_true_predicate (bc)) |
545 | { | |
546 | reset_bb_predicate (bb); | |
547 | return; | |
548 | } | |
74a43fe9 | 549 | } |
550 | ||
56db02b2 | 551 | /* Allow a TRUTH_NOT_EXPR around the main predicate. */ |
552 | if (TREE_CODE (bc) == TRUTH_NOT_EXPR) | |
553 | tp = &TREE_OPERAND (bc, 0); | |
554 | else | |
555 | tp = &bc; | |
556 | if (!is_gimple_condexpr (*tp)) | |
74a43fe9 | 557 | { |
558 | gimple_seq stmts; | |
56db02b2 | 559 | *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE); |
74a43fe9 | 560 | add_bb_predicate_gimplified_stmts (bb, stmts); |
561 | } | |
56db02b2 | 562 | set_bb_predicate (bb, bc); |
1055d96a | 563 | } |
564 | ||
60e0f7c4 | 565 | /* Add the condition COND to the previous condition PREV_COND, and add |
566 | this to the predicate list of the destination of edge E. LOOP is | |
567 | the loop to be if-converted. */ | |
1055d96a | 568 | |
16d6ea49 | 569 | static void |
2e966e2a | 570 | add_to_dst_predicate_list (class loop *loop, edge e, |
60e0f7c4 | 571 | tree prev_cond, tree cond) |
1055d96a | 572 | { |
1055d96a | 573 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
16d6ea49 | 574 | return; |
1055d96a | 575 | |
16d6ea49 | 576 | if (!is_true_predicate (prev_cond)) |
577 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, | |
578 | prev_cond, cond); | |
07c03fb0 | 579 | |
040b4eee | 580 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest)) |
581 | add_to_predicate_list (loop, e->dest, cond); | |
1055d96a | 582 | } |
58bc8309 | 583 | |
b01e3c9b | 584 | /* Return true if one of the successor edges of BB exits LOOP. */ |
07c03fb0 | 585 | |
1055d96a | 586 | static bool |
2e966e2a | 587 | bb_with_exit_edge_p (class loop *loop, basic_block bb) |
1055d96a | 588 | { |
589 | edge e; | |
590 | edge_iterator ei; | |
07c03fb0 | 591 | |
1055d96a | 592 | FOR_EACH_EDGE (e, ei, bb->succs) |
593 | if (loop_exit_edge_p (loop, e)) | |
b01e3c9b | 594 | return true; |
07c03fb0 | 595 | |
b01e3c9b | 596 | return false; |
1055d96a | 597 | } |
63a1777b | 598 | |
4bd8a059 | 599 | /* Given PHI which has more than two arguments, this function checks if |
600 | it's if-convertible by degenerating its arguments. Specifically, if | |
601 | below two conditions are satisfied: | |
602 | ||
603 | 1) Number of PHI arguments with different values equals to 2 and one | |
604 | argument has the only occurrence. | |
605 | 2) The edge corresponding to the unique argument isn't critical edge. | |
606 | ||
607 | Such PHI can be handled as PHIs have only two arguments. For example, | |
608 | below PHI: | |
609 | ||
610 | res = PHI <A_1(e1), A_1(e2), A_2(e3)>; | |
611 | ||
612 | can be transformed into: | |
613 | ||
614 | res = (predicate of e3) ? A_2 : A_1; | |
615 | ||
616 | Return TRUE if it is the case, FALSE otherwise. */ | |
617 | ||
618 | static bool | |
619 | phi_convertible_by_degenerating_args (gphi *phi) | |
620 | { | |
621 | edge e; | |
622 | tree arg, t1 = NULL, t2 = NULL; | |
623 | unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0; | |
624 | unsigned int num_args = gimple_phi_num_args (phi); | |
625 | ||
626 | gcc_assert (num_args > 2); | |
627 | ||
628 | for (i = 0; i < num_args; i++) | |
629 | { | |
630 | arg = gimple_phi_arg_def (phi, i); | |
631 | if (t1 == NULL || operand_equal_p (t1, arg, 0)) | |
632 | { | |
633 | n1++; | |
634 | i1 = i; | |
635 | t1 = arg; | |
636 | } | |
637 | else if (t2 == NULL || operand_equal_p (t2, arg, 0)) | |
638 | { | |
639 | n2++; | |
640 | i2 = i; | |
641 | t2 = arg; | |
642 | } | |
643 | else | |
644 | return false; | |
645 | } | |
646 | ||
647 | if (n1 != 1 && n2 != 1) | |
648 | return false; | |
649 | ||
650 | /* Check if the edge corresponding to the unique arg is critical. */ | |
651 | e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2); | |
652 | if (EDGE_COUNT (e->src->succs) > 1) | |
653 | return false; | |
654 | ||
655 | return true; | |
656 | } | |
657 | ||
b01e3c9b | 658 | /* Return true when PHI is if-convertible. PHI is part of loop LOOP |
9ab8df54 | 659 | and it belongs to basic block BB. Note at this point, it is sure |
660 | that PHI is if-convertible. This function updates global variable | |
661 | ANY_COMPLICATED_PHI if PHI is complicated. */ | |
07c03fb0 | 662 | |
663 | static bool | |
2e966e2a | 664 | if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi) |
07c03fb0 | 665 | { |
666 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
667 | { | |
668 | fprintf (dump_file, "-------------------------\n"); | |
75a70cf9 | 669 | print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); |
07c03fb0 | 670 | } |
35a40aad | 671 | |
9ab8df54 | 672 | if (bb != loop->header |
673 | && gimple_phi_num_args (phi) > 2 | |
674 | && !phi_convertible_by_degenerating_args (phi)) | |
675 | any_complicated_phi = true; | |
35a40aad | 676 | |
07c03fb0 | 677 | return true; |
678 | } | |
679 | ||
9cabbd00 | 680 | /* Records the status of a data reference. This struct is attached to |
681 | each DR->aux field. */ | |
682 | ||
683 | struct ifc_dr { | |
0308f68b | 684 | bool rw_unconditionally; |
685 | bool w_unconditionally; | |
686 | bool written_at_least_once; | |
9cabbd00 | 687 | |
0308f68b | 688 | tree rw_predicate; |
689 | tree w_predicate; | |
690 | tree base_w_predicate; | |
9cabbd00 | 691 | }; |
692 | ||
693 | #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) | |
0308f68b | 694 | #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once) |
9cabbd00 | 695 | #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) |
0308f68b | 696 | #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally) |
9cabbd00 | 697 | |
ee0e4f73 | 698 | /* Iterates over DR's and stores refs, DR and base refs, DR pairs in |
699 | HASH tables. While storing them in HASH table, it checks if the | |
700 | reference is unconditionally read or written and stores that as a flag | |
701 | information. For base reference it checks if it is written atlest once | |
702 | unconditionally and stores it as flag information along with DR. | |
703 | In other words for every data reference A in STMT there exist other | |
704 | accesses to a data reference with the same base with predicates that | |
705 | add up (OR-up) to the true predicate: this ensures that the data | |
706 | reference A is touched (read or written) on every iteration of the | |
707 | if-converted loop. */ | |
708 | static void | |
709 | hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a) | |
e1cc68bd | 710 | { |
e1cc68bd | 711 | |
ee0e4f73 | 712 | data_reference_p *master_dr, *base_master_dr; |
ee0e4f73 | 713 | tree base_ref = DR_BASE_OBJECT (a); |
bd6f374c | 714 | innermost_loop_behavior *innermost = &DR_INNERMOST (a); |
ee0e4f73 | 715 | tree ca = bb_predicate (gimple_bb (DR_STMT (a))); |
716 | bool exist1, exist2; | |
e1cc68bd | 717 | |
bd6f374c | 718 | master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1); |
ee0e4f73 | 719 | if (!exist1) |
0308f68b | 720 | *master_dr = a; |
721 | ||
722 | if (DR_IS_WRITE (a)) | |
ee0e4f73 | 723 | { |
0308f68b | 724 | IFC_DR (*master_dr)->w_predicate |
725 | = fold_or_predicates (UNKNOWN_LOCATION, ca, | |
726 | IFC_DR (*master_dr)->w_predicate); | |
727 | if (is_true_predicate (IFC_DR (*master_dr)->w_predicate)) | |
728 | DR_W_UNCONDITIONALLY (*master_dr) = true; | |
ee0e4f73 | 729 | } |
0308f68b | 730 | IFC_DR (*master_dr)->rw_predicate |
731 | = fold_or_predicates (UNKNOWN_LOCATION, ca, | |
732 | IFC_DR (*master_dr)->rw_predicate); | |
733 | if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate)) | |
734 | DR_RW_UNCONDITIONALLY (*master_dr) = true; | |
e1cc68bd | 735 | |
9d1206d5 | 736 | if (DR_IS_WRITE (a)) |
ee0e4f73 | 737 | { |
9d1206d5 | 738 | base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2); |
9d1206d5 | 739 | if (!exist2) |
0308f68b | 740 | *base_master_dr = a; |
741 | IFC_DR (*base_master_dr)->base_w_predicate | |
742 | = fold_or_predicates (UNKNOWN_LOCATION, ca, | |
743 | IFC_DR (*base_master_dr)->base_w_predicate); | |
744 | if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate)) | |
745 | DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true; | |
9d1206d5 | 746 | } |
e1cc68bd | 747 | } |
748 | ||
189d0706 | 749 | /* Return TRUE if can prove the index IDX of an array reference REF is |
750 | within array bound. Return false otherwise. */ | |
751 | ||
752 | static bool | |
753 | idx_within_array_bound (tree ref, tree *idx, void *dta) | |
754 | { | |
30b5769f | 755 | wi::overflow_type overflow; |
189d0706 | 756 | widest_int niter, valid_niter, delta, wi_step; |
757 | tree ev, init, step; | |
758 | tree low, high; | |
2e966e2a | 759 | class loop *loop = (class loop*) dta; |
189d0706 | 760 | |
761 | /* Only support within-bound access for array references. */ | |
762 | if (TREE_CODE (ref) != ARRAY_REF) | |
763 | return false; | |
764 | ||
765 | /* For arrays at the end of the structure, we are not guaranteed that they | |
766 | do not really extend over their declared size. However, for arrays of | |
767 | size greater than one, this is unlikely to be intended. */ | |
768 | if (array_at_struct_end_p (ref)) | |
769 | return false; | |
770 | ||
771 | ev = analyze_scalar_evolution (loop, *idx); | |
772 | ev = instantiate_parameters (loop, ev); | |
773 | init = initial_condition (ev); | |
774 | step = evolution_part_in_loop_num (ev, loop->num); | |
775 | ||
776 | if (!init || TREE_CODE (init) != INTEGER_CST | |
777 | || (step && TREE_CODE (step) != INTEGER_CST)) | |
778 | return false; | |
779 | ||
780 | low = array_ref_low_bound (ref); | |
781 | high = array_ref_up_bound (ref); | |
782 | ||
783 | /* The case of nonconstant bounds could be handled, but it would be | |
784 | complicated. */ | |
785 | if (TREE_CODE (low) != INTEGER_CST | |
786 | || !high || TREE_CODE (high) != INTEGER_CST) | |
787 | return false; | |
788 | ||
789 | /* Check if the intial idx is within bound. */ | |
790 | if (wi::to_widest (init) < wi::to_widest (low) | |
791 | || wi::to_widest (init) > wi::to_widest (high)) | |
792 | return false; | |
793 | ||
794 | /* The idx is always within bound. */ | |
795 | if (!step || integer_zerop (step)) | |
796 | return true; | |
797 | ||
798 | if (!max_loop_iterations (loop, &niter)) | |
799 | return false; | |
800 | ||
801 | if (wi::to_widest (step) < 0) | |
802 | { | |
803 | delta = wi::to_widest (init) - wi::to_widest (low); | |
804 | wi_step = -wi::to_widest (step); | |
805 | } | |
806 | else | |
807 | { | |
808 | delta = wi::to_widest (high) - wi::to_widest (init); | |
809 | wi_step = wi::to_widest (step); | |
810 | } | |
811 | ||
812 | valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow); | |
813 | /* The iteration space of idx is within array bound. */ | |
814 | if (!overflow && niter <= valid_niter) | |
815 | return true; | |
816 | ||
817 | return false; | |
818 | } | |
819 | ||
820 | /* Return TRUE if ref is a within bound array reference. */ | |
821 | ||
822 | static bool | |
823 | ref_within_array_bound (gimple *stmt, tree ref) | |
824 | { | |
2e966e2a | 825 | class loop *loop = loop_containing_stmt (stmt); |
189d0706 | 826 | |
827 | gcc_assert (loop != NULL); | |
828 | return for_each_index (&ref, idx_within_array_bound, loop); | |
829 | } | |
830 | ||
831 | ||
832 | /* Given a memory reference expression T, return TRUE if base object | |
833 | it refers to is writable. The base object of a memory reference | |
834 | is the main object being referenced, which is returned by function | |
835 | get_base_address. */ | |
836 | ||
837 | static bool | |
838 | base_object_writable (tree ref) | |
839 | { | |
840 | tree base_tree = get_base_address (ref); | |
841 | ||
842 | return (base_tree | |
843 | && DECL_P (base_tree) | |
844 | && decl_binds_to_current_def_p (base_tree) | |
845 | && !TREE_READONLY (base_tree)); | |
846 | } | |
847 | ||
e1cc68bd | 848 | /* Return true when the memory references of STMT won't trap in the |
849 | if-converted code. There are two things that we have to check for: | |
850 | ||
851 | - writes to memory occur to writable memory: if-conversion of | |
852 | memory writes transforms the conditional memory writes into | |
853 | unconditional writes, i.e. "if (cond) A[i] = foo" is transformed | |
854 | into "A[i] = cond ? foo : A[i]", and as the write to memory may not | |
855 | be executed at all in the original code, it may be a readonly | |
856 | memory. To check that A is not const-qualified, we check that | |
857 | there exists at least an unconditional write to A in the current | |
858 | function. | |
859 | ||
860 | - reads or writes to memory are valid memory accesses for every | |
861 | iteration. To check that the memory accesses are correctly formed | |
862 | and that we are allowed to read and write in these locations, we | |
863 | check that the memory accesses to be if-converted occur at every | |
ee0e4f73 | 864 | iteration unconditionally. |
865 | ||
866 | Returns true for the memory reference in STMT, same memory reference | |
867 | is read or written unconditionally atleast once and the base memory | |
868 | reference is written unconditionally once. This is to check reference | |
869 | will not write fault. Also retuns true if the memory reference is | |
870 | unconditionally read once then we are conditionally writing to memory | |
871 | which is defined as read and write and is bound to the definition | |
872 | we are seeing. */ | |
e1cc68bd | 873 | static bool |
ee0e4f73 | 874 | ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs) |
e1cc68bd | 875 | { |
dbc55555 | 876 | /* If DR didn't see a reference here we can't use it to tell |
877 | whether the ref traps or not. */ | |
878 | if (gimple_uid (stmt) == 0) | |
879 | return false; | |
880 | ||
ee0e4f73 | 881 | data_reference_p *master_dr, *base_master_dr; |
882 | data_reference_p a = drs[gimple_uid (stmt) - 1]; | |
883 | ||
ee0e4f73 | 884 | tree base = DR_BASE_OBJECT (a); |
bd6f374c | 885 | innermost_loop_behavior *innermost = &DR_INNERMOST (a); |
ee0e4f73 | 886 | |
887 | gcc_assert (DR_STMT (a) == stmt); | |
bd6f374c | 888 | gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a) |
889 | || DR_INIT (a) || DR_STEP (a)); | |
ee0e4f73 | 890 | |
bd6f374c | 891 | master_dr = innermost_DR_map->get (innermost); |
892 | gcc_assert (master_dr != NULL); | |
ee0e4f73 | 893 | |
ee0e4f73 | 894 | base_master_dr = baseref_DR_map->get (base); |
895 | ||
0308f68b | 896 | /* If a is unconditionally written to it doesn't trap. */ |
897 | if (DR_W_UNCONDITIONALLY (*master_dr)) | |
898 | return true; | |
899 | ||
189d0706 | 900 | /* If a is unconditionally accessed then ... |
901 | ||
902 | Even a is conditional access, we can treat it as an unconditional | |
903 | one if it's an array reference and all its index are within array | |
904 | bound. */ | |
905 | if (DR_RW_UNCONDITIONALLY (*master_dr) | |
906 | || ref_within_array_bound (stmt, DR_REF (a))) | |
ee0e4f73 | 907 | { |
0308f68b | 908 | /* an unconditional read won't trap. */ |
909 | if (DR_IS_READ (a)) | |
910 | return true; | |
911 | ||
912 | /* an unconditionaly write won't trap if the base is written | |
913 | to unconditionally. */ | |
9d1206d5 | 914 | if (base_master_dr |
0308f68b | 915 | && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) |
d2276060 | 916 | return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); |
189d0706 | 917 | /* or the base is known to be not readonly. */ |
918 | else if (base_object_writable (DR_REF (a))) | |
919 | return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); | |
ee0e4f73 | 920 | } |
189d0706 | 921 | |
ee0e4f73 | 922 | return false; |
e1cc68bd | 923 | } |
924 | ||
c71d3c24 | 925 | /* Return true if STMT could be converted into a masked load or store |
926 | (conditional load or store based on a mask computed from bb predicate). */ | |
927 | ||
928 | static bool | |
42acab1c | 929 | ifcvt_can_use_mask_load_store (gimple *stmt) |
c71d3c24 | 930 | { |
c71d3c24 | 931 | /* Check whether this is a load or store. */ |
03821886 | 932 | tree lhs = gimple_assign_lhs (stmt); |
933 | bool is_load; | |
934 | tree ref; | |
c71d3c24 | 935 | if (gimple_store_p (stmt)) |
936 | { | |
937 | if (!is_gimple_val (gimple_assign_rhs1 (stmt))) | |
938 | return false; | |
939 | is_load = false; | |
940 | ref = lhs; | |
941 | } | |
942 | else if (gimple_assign_load_p (stmt)) | |
943 | { | |
944 | is_load = true; | |
945 | ref = gimple_assign_rhs1 (stmt); | |
946 | } | |
947 | else | |
948 | return false; | |
949 | ||
950 | if (may_be_nonaddressable_p (ref)) | |
951 | return false; | |
952 | ||
953 | /* Mask should be integer mode of the same size as the load/store | |
954 | mode. */ | |
03821886 | 955 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
2cf1bb25 | 956 | if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode)) |
c71d3c24 | 957 | return false; |
958 | ||
f636f094 | 959 | if (can_vec_mask_load_store_p (mode, VOIDmode, is_load)) |
c71d3c24 | 960 | return true; |
961 | ||
962 | return false; | |
963 | } | |
964 | ||
03821886 | 965 | /* Return true if STMT could be converted from an operation that is |
966 | unconditional to one that is conditional on a bb predicate mask. */ | |
967 | ||
968 | static bool | |
969 | ifcvt_can_predicate (gimple *stmt) | |
970 | { | |
971 | basic_block bb = gimple_bb (stmt); | |
972 | ||
973 | if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize) | |
974 | || bb->loop_father->dont_vectorize | |
975 | || gimple_has_volatile_ops (stmt)) | |
976 | return false; | |
977 | ||
978 | if (gimple_assign_single_p (stmt)) | |
979 | return ifcvt_can_use_mask_load_store (stmt); | |
980 | ||
981 | tree_code code = gimple_assign_rhs_code (stmt); | |
982 | tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
983 | tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
984 | if (!types_compatible_p (lhs_type, rhs_type)) | |
985 | return false; | |
986 | internal_fn cond_fn = get_conditional_internal_fn (code); | |
987 | return (cond_fn != IFN_LAST | |
988 | && vectorized_internal_fn_supported_p (cond_fn, lhs_type)); | |
989 | } | |
990 | ||
b01e3c9b | 991 | /* Return true when STMT is if-convertible. |
992 | ||
75a70cf9 | 993 | GIMPLE_ASSIGN statement is not if-convertible if, |
a1660ced | 994 | - it is not movable, |
995 | - it could trap, | |
3b91ccd9 | 996 | - LHS is not var decl. */ |
07c03fb0 | 997 | |
998 | static bool | |
42acab1c | 999 | if_convertible_gimple_assign_stmt_p (gimple *stmt, |
db5c1c9a | 1000 | vec<data_reference_p> refs) |
07c03fb0 | 1001 | { |
b01e3c9b | 1002 | tree lhs = gimple_assign_lhs (stmt); |
73772494 | 1003 | |
07c03fb0 | 1004 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1005 | { | |
1006 | fprintf (dump_file, "-------------------------\n"); | |
75a70cf9 | 1007 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
07c03fb0 | 1008 | } |
35a40aad | 1009 | |
3b91ccd9 | 1010 | if (!is_gimple_reg_type (TREE_TYPE (lhs))) |
1011 | return false; | |
1012 | ||
73772494 | 1013 | /* Some of these constrains might be too conservative. */ |
75a70cf9 | 1014 | if (stmt_ends_bb_p (stmt) |
1015 | || gimple_has_volatile_ops (stmt) | |
73772494 | 1016 | || (TREE_CODE (lhs) == SSA_NAME |
1017 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) | |
75a70cf9 | 1018 | || gimple_has_side_effects (stmt)) |
07c03fb0 | 1019 | { |
1020 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
73772494 | 1021 | fprintf (dump_file, "stmt not suitable for ifcvt\n"); |
07c03fb0 | 1022 | return false; |
1023 | } | |
1024 | ||
c71d3c24 | 1025 | /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because |
1026 | in between if_convertible_loop_p and combine_blocks | |
1027 | we can perform loop versioning. */ | |
1028 | gimple_set_plf (stmt, GF_PLF_2, false); | |
1029 | ||
0308f68b | 1030 | if ((! gimple_vuse (stmt) |
1031 | || gimple_could_trap_p_1 (stmt, false, false) | |
1032 | || ! ifcvt_memrefs_wont_trap (stmt, refs)) | |
1033 | && gimple_could_trap_p (stmt)) | |
07c03fb0 | 1034 | { |
03821886 | 1035 | if (ifcvt_can_predicate (stmt)) |
c71d3c24 | 1036 | { |
1037 | gimple_set_plf (stmt, GF_PLF_2, true); | |
03821886 | 1038 | need_to_predicate = true; |
c71d3c24 | 1039 | return true; |
1040 | } | |
07c03fb0 | 1041 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1042 | fprintf (dump_file, "tree could trap...\n"); | |
1043 | return false; | |
1044 | } | |
1045 | ||
f67e390a | 1046 | /* When if-converting stores force versioning, likewise if we |
1047 | ended up generating store data races. */ | |
1048 | if (gimple_vdef (stmt)) | |
03821886 | 1049 | need_to_predicate = true; |
07c03fb0 | 1050 | |
07c03fb0 | 1051 | return true; |
1052 | } | |
1053 | ||
b01e3c9b | 1054 | /* Return true when STMT is if-convertible. |
1055 | ||
1056 | A statement is if-convertible if: | |
c2c4377d | 1057 | - it is an if-convertible GIMPLE_ASSIGN, |
040b4eee | 1058 | - it is a GIMPLE_LABEL or a GIMPLE_COND, |
1059 | - it is builtins call. */ | |
07c03fb0 | 1060 | |
1061 | static bool | |
db5c1c9a | 1062 | if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs) |
07c03fb0 | 1063 | { |
75a70cf9 | 1064 | switch (gimple_code (stmt)) |
07c03fb0 | 1065 | { |
75a70cf9 | 1066 | case GIMPLE_LABEL: |
9845d120 | 1067 | case GIMPLE_DEBUG: |
b01e3c9b | 1068 | case GIMPLE_COND: |
1069 | return true; | |
35a40aad | 1070 | |
9845d120 | 1071 | case GIMPLE_ASSIGN: |
db5c1c9a | 1072 | return if_convertible_gimple_assign_stmt_p (stmt, refs); |
35a40aad | 1073 | |
2cac353d | 1074 | case GIMPLE_CALL: |
1075 | { | |
1076 | tree fndecl = gimple_call_fndecl (stmt); | |
1077 | if (fndecl) | |
1078 | { | |
1079 | int flags = gimple_call_flags (stmt); | |
1080 | if ((flags & ECF_CONST) | |
1081 | && !(flags & ECF_LOOPING_CONST_OR_PURE) | |
1082 | /* We can only vectorize some builtins at the moment, | |
1083 | so restrict if-conversion to those. */ | |
a0e9bfbb | 1084 | && fndecl_built_in_p (fndecl)) |
2cac353d | 1085 | return true; |
1086 | } | |
1087 | return false; | |
1088 | } | |
1089 | ||
07c03fb0 | 1090 | default: |
1091 | /* Don't know what to do with 'em so don't do anything. */ | |
1092 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1093 | { | |
1094 | fprintf (dump_file, "don't know what to do\n"); | |
75a70cf9 | 1095 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
07c03fb0 | 1096 | } |
1097 | return false; | |
07c03fb0 | 1098 | } |
1099 | ||
1100 | return true; | |
1101 | } | |
1102 | ||
040b4eee | 1103 | /* Assumes that BB has more than 1 predecessors. |
1104 | Returns false if at least one successor is not on critical edge | |
1105 | and true otherwise. */ | |
1106 | ||
1107 | static inline bool | |
1108 | all_preds_critical_p (basic_block bb) | |
1109 | { | |
1110 | edge e; | |
1111 | edge_iterator ei; | |
1112 | ||
1113 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1114 | if (EDGE_COUNT (e->src->succs) == 1) | |
1115 | return false; | |
1116 | return true; | |
1117 | } | |
1118 | ||
b01e3c9b | 1119 | /* Return true when BB is if-convertible. This routine does not check |
1120 | basic block's statements and phis. | |
1121 | ||
1122 | A basic block is not if-convertible if: | |
1123 | - it is non-empty and it is after the exit block (in BFS order), | |
1124 | - it is after the exit block but before the latch, | |
1125 | - its edges are not normal. | |
1126 | ||
1127 | EXIT_BB is the basic block containing the exit of the LOOP. BB is | |
1128 | inside LOOP. */ | |
07c03fb0 | 1129 | |
35a40aad | 1130 | static bool |
2e966e2a | 1131 | if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb) |
07c03fb0 | 1132 | { |
1133 | edge e; | |
cd665a06 | 1134 | edge_iterator ei; |
07c03fb0 | 1135 | |
1136 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1137 | fprintf (dump_file, "----------[%d]-------------\n", bb->index); | |
35a40aad | 1138 | |
040b4eee | 1139 | if (EDGE_COUNT (bb->succs) > 2) |
1140 | return false; | |
1141 | ||
ea3f88a7 | 1142 | if (exit_bb) |
07c03fb0 | 1143 | { |
1144 | if (bb != loop->latch) | |
1145 | { | |
1146 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1147 | fprintf (dump_file, "basic block after exit bb but before latch\n"); | |
1148 | return false; | |
1149 | } | |
1150 | else if (!empty_block_p (bb)) | |
1151 | { | |
1055d96a | 1152 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1153 | fprintf (dump_file, "non empty basic block after exit bb\n"); | |
1154 | return false; | |
1155 | } | |
1156 | else if (bb == loop->latch | |
1157 | && bb != exit_bb | |
1158 | && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb)) | |
1159 | { | |
1160 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1161 | fprintf (dump_file, "latch is not dominated by exit_block\n"); | |
1162 | return false; | |
1163 | } | |
1164 | } | |
1165 | ||
1166 | /* Be less adventurous and handle only normal edges. */ | |
1167 | FOR_EACH_EDGE (e, ei, bb->succs) | |
5147ec07 | 1168 | if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP)) |
1055d96a | 1169 | { |
1170 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b01e3c9b | 1171 | fprintf (dump_file, "Difficult to handle edges\n"); |
1055d96a | 1172 | return false; |
1173 | } | |
1174 | ||
1175 | return true; | |
1176 | } | |
1177 | ||
b01e3c9b | 1178 | /* Return true when all predecessor blocks of BB are visited. The |
1179 | VISITED bitmap keeps track of the visited blocks. */ | |
1055d96a | 1180 | |
1181 | static bool | |
1182 | pred_blocks_visited_p (basic_block bb, bitmap *visited) | |
1183 | { | |
1184 | edge e; | |
1185 | edge_iterator ei; | |
1186 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1187 | if (!bitmap_bit_p (*visited, e->src->index)) | |
1188 | return false; | |
1189 | ||
1190 | return true; | |
1191 | } | |
1192 | ||
1193 | /* Get body of a LOOP in suitable order for if-conversion. It is | |
1194 | caller's responsibility to deallocate basic block list. | |
1195 | If-conversion suitable order is, breadth first sort (BFS) order | |
1196 | with an additional constraint: select a block only if all its | |
1197 | predecessors are already selected. */ | |
1198 | ||
1199 | static basic_block * | |
2e966e2a | 1200 | get_loop_body_in_if_conv_order (const class loop *loop) |
1055d96a | 1201 | { |
1202 | basic_block *blocks, *blocks_in_bfs_order; | |
1203 | basic_block bb; | |
1204 | bitmap visited; | |
1205 | unsigned int index = 0; | |
1206 | unsigned int visited_count = 0; | |
1207 | ||
1208 | gcc_assert (loop->num_nodes); | |
34154e27 | 1209 | gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)); |
1055d96a | 1210 | |
1211 | blocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1212 | visited = BITMAP_ALLOC (NULL); | |
1213 | ||
1214 | blocks_in_bfs_order = get_loop_body_in_bfs_order (loop); | |
1215 | ||
1216 | index = 0; | |
1217 | while (index < loop->num_nodes) | |
1218 | { | |
1219 | bb = blocks_in_bfs_order [index]; | |
1220 | ||
1221 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
1222 | { | |
1223 | free (blocks_in_bfs_order); | |
1224 | BITMAP_FREE (visited); | |
1225 | free (blocks); | |
1226 | return NULL; | |
1227 | } | |
1228 | ||
1229 | if (!bitmap_bit_p (visited, bb->index)) | |
1230 | { | |
1231 | if (pred_blocks_visited_p (bb, &visited) | |
1232 | || bb == loop->header) | |
1233 | { | |
1234 | /* This block is now visited. */ | |
1235 | bitmap_set_bit (visited, bb->index); | |
1236 | blocks[visited_count++] = bb; | |
1237 | } | |
07c03fb0 | 1238 | } |
35a40aad | 1239 | |
1055d96a | 1240 | index++; |
07c03fb0 | 1241 | |
1055d96a | 1242 | if (index == loop->num_nodes |
1243 | && visited_count != loop->num_nodes) | |
1244 | /* Not done yet. */ | |
1245 | index = 0; | |
1246 | } | |
1247 | free (blocks_in_bfs_order); | |
1248 | BITMAP_FREE (visited); | |
1249 | return blocks; | |
07c03fb0 | 1250 | } |
1251 | ||
60e0f7c4 | 1252 | /* Returns true when the analysis of the predicates for all the basic |
1253 | blocks in LOOP succeeded. | |
1254 | ||
fa683b76 | 1255 | predicate_bbs first allocates the predicates of the basic blocks. |
c814c39c | 1256 | These fields are then initialized with the tree expressions |
1257 | representing the predicates under which a basic block is executed | |
1258 | in the LOOP. As the loop->header is executed at each iteration, it | |
1259 | has the "true" predicate. Other statements executed under a | |
1260 | condition are predicated with that condition, for example | |
60e0f7c4 | 1261 | |
1262 | | if (x) | |
1263 | | S1; | |
1264 | | else | |
1265 | | S2; | |
1266 | ||
5f7a9884 | 1267 | S1 will be predicated with "x", and |
1268 | S2 will be predicated with "!x". */ | |
60e0f7c4 | 1269 | |
c71d3c24 | 1270 | static void |
60e0f7c4 | 1271 | predicate_bbs (loop_p loop) |
1272 | { | |
1273 | unsigned int i; | |
1274 | ||
1275 | for (i = 0; i < loop->num_nodes; i++) | |
fa683b76 | 1276 | init_bb_predicate (ifc_bbs[i]); |
60e0f7c4 | 1277 | |
1278 | for (i = 0; i < loop->num_nodes; i++) | |
1279 | { | |
fa683b76 | 1280 | basic_block bb = ifc_bbs[i]; |
1281 | tree cond; | |
42acab1c | 1282 | gimple *stmt; |
60e0f7c4 | 1283 | |
040b4eee | 1284 | /* The loop latch and loop exit block are always executed and |
1285 | have no extra conditions to be processed: skip them. */ | |
1286 | if (bb == loop->latch | |
1287 | || bb_with_exit_edge_p (loop, bb)) | |
fa683b76 | 1288 | { |
040b4eee | 1289 | reset_bb_predicate (bb); |
fa683b76 | 1290 | continue; |
1291 | } | |
1292 | ||
1293 | cond = bb_predicate (bb); | |
c71d3c24 | 1294 | stmt = last_stmt (bb); |
1295 | if (stmt && gimple_code (stmt) == GIMPLE_COND) | |
60e0f7c4 | 1296 | { |
c71d3c24 | 1297 | tree c2; |
1298 | edge true_edge, false_edge; | |
1299 | location_t loc = gimple_location (stmt); | |
040b4eee | 1300 | tree c = build2_loc (loc, gimple_cond_code (stmt), |
c71d3c24 | 1301 | boolean_type_node, |
1302 | gimple_cond_lhs (stmt), | |
1303 | gimple_cond_rhs (stmt)); | |
1304 | ||
1305 | /* Add new condition into destination's predicate list. */ | |
1306 | extract_true_false_edges_from_block (gimple_bb (stmt), | |
1307 | &true_edge, &false_edge); | |
1308 | ||
1309 | /* If C is true, then TRUE_EDGE is taken. */ | |
1310 | add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond), | |
1311 | unshare_expr (c)); | |
1312 | ||
1313 | /* If C is false, then FALSE_EDGE is taken. */ | |
1314 | c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, | |
1315 | unshare_expr (c)); | |
1316 | add_to_dst_predicate_list (loop, false_edge, | |
1317 | unshare_expr (cond), c2); | |
1318 | ||
1319 | cond = NULL_TREE; | |
60e0f7c4 | 1320 | } |
1321 | ||
1322 | /* If current bb has only one successor, then consider it as an | |
1323 | unconditional goto. */ | |
1324 | if (single_succ_p (bb)) | |
1325 | { | |
1326 | basic_block bb_n = single_succ (bb); | |
1327 | ||
1328 | /* The successor bb inherits the predicate of its | |
1329 | predecessor. If there is no predicate in the predecessor | |
1330 | bb, then consider the successor bb as always executed. */ | |
1331 | if (cond == NULL_TREE) | |
1332 | cond = boolean_true_node; | |
1333 | ||
c71d3c24 | 1334 | add_to_predicate_list (loop, bb_n, cond); |
60e0f7c4 | 1335 | } |
1336 | } | |
1337 | ||
1338 | /* The loop header is always executed. */ | |
48c9b1fe | 1339 | reset_bb_predicate (loop->header); |
fa683b76 | 1340 | gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL |
1341 | && bb_predicate_gimplified_stmts (loop->latch) == NULL); | |
60e0f7c4 | 1342 | } |
1343 | ||
84769861 | 1344 | /* Build region by adding loop pre-header and post-header blocks. */ |
1345 | ||
1346 | static vec<basic_block> | |
2e966e2a | 1347 | build_region (class loop *loop) |
84769861 | 1348 | { |
1349 | vec<basic_block> region = vNULL; | |
1350 | basic_block exit_bb = NULL; | |
1351 | ||
1352 | gcc_assert (ifc_bbs); | |
1353 | /* The first element is loop pre-header. */ | |
1354 | region.safe_push (loop_preheader_edge (loop)->src); | |
1355 | ||
1356 | for (unsigned int i = 0; i < loop->num_nodes; i++) | |
1357 | { | |
1358 | basic_block bb = ifc_bbs[i]; | |
1359 | region.safe_push (bb); | |
1360 | /* Find loop postheader. */ | |
1361 | edge e; | |
1362 | edge_iterator ei; | |
1363 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1364 | if (loop_exit_edge_p (loop, e)) | |
1365 | { | |
1366 | exit_bb = e->dest; | |
1367 | break; | |
1368 | } | |
1369 | } | |
1370 | /* The last element is loop post-header. */ | |
1371 | gcc_assert (exit_bb); | |
1372 | region.safe_push (exit_bb); | |
1373 | return region; | |
1374 | } | |
1375 | ||
e1cc68bd | 1376 | /* Return true when LOOP is if-convertible. This is a helper function |
1377 | for if_convertible_loop_p. REFS and DDRS are initialized and freed | |
1378 | in if_convertible_loop_p. */ | |
07c03fb0 | 1379 | |
1380 | static bool | |
2e966e2a | 1381 | if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs) |
07c03fb0 | 1382 | { |
07c03fb0 | 1383 | unsigned int i; |
ea3f88a7 | 1384 | basic_block exit_bb = NULL; |
84769861 | 1385 | vec<basic_block> region; |
07c03fb0 | 1386 | |
0d4f3f8c | 1387 | if (find_data_references_in_loop (loop, refs) == chrec_dont_know) |
e1cc68bd | 1388 | return false; |
fa6e55f9 | 1389 | |
07c03fb0 | 1390 | calculate_dominance_info (CDI_DOMINATORS); |
07c03fb0 | 1391 | |
1392 | /* Allow statements that can be handled during if-conversion. */ | |
1393 | ifc_bbs = get_loop_body_in_if_conv_order (loop); | |
1394 | if (!ifc_bbs) | |
1395 | { | |
1396 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
d11c70fa | 1397 | fprintf (dump_file, "Irreducible loop\n"); |
07c03fb0 | 1398 | return false; |
1399 | } | |
35a40aad | 1400 | |
07c03fb0 | 1401 | for (i = 0; i < loop->num_nodes; i++) |
1402 | { | |
60e0f7c4 | 1403 | basic_block bb = ifc_bbs[i]; |
07c03fb0 | 1404 | |
ea3f88a7 | 1405 | if (!if_convertible_bb_p (loop, bb, exit_bb)) |
07c03fb0 | 1406 | return false; |
1407 | ||
60e0f7c4 | 1408 | if (bb_with_exit_edge_p (loop, bb)) |
1409 | exit_bb = bb; | |
1410 | } | |
1411 | ||
c71d3c24 | 1412 | for (i = 0; i < loop->num_nodes; i++) |
1413 | { | |
1414 | basic_block bb = ifc_bbs[i]; | |
1415 | gimple_stmt_iterator gsi; | |
1416 | ||
1417 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1418 | switch (gimple_code (gsi_stmt (gsi))) | |
1419 | { | |
1420 | case GIMPLE_LABEL: | |
1421 | case GIMPLE_ASSIGN: | |
1422 | case GIMPLE_CALL: | |
1423 | case GIMPLE_DEBUG: | |
1424 | case GIMPLE_COND: | |
ee0e4f73 | 1425 | gimple_set_uid (gsi_stmt (gsi), 0); |
c71d3c24 | 1426 | break; |
1427 | default: | |
1428 | return false; | |
1429 | } | |
1430 | } | |
60e0f7c4 | 1431 | |
9cda83ad | 1432 | data_reference_p dr; |
9cabbd00 | 1433 | |
bd6f374c | 1434 | innermost_DR_map |
1435 | = new hash_map<innermost_loop_behavior_hash, data_reference_p>; | |
ee0e4f73 | 1436 | baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; |
1437 | ||
84769861 | 1438 | /* Compute post-dominator tree locally. */ |
1439 | region = build_region (loop); | |
1440 | calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region); | |
1441 | ||
ee0e4f73 | 1442 | predicate_bbs (loop); |
1443 | ||
84769861 | 1444 | /* Free post-dominator tree since it is not used after predication. */ |
1445 | free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region); | |
1446 | region.release (); | |
1447 | ||
9cda83ad | 1448 | for (i = 0; refs->iterate (i, &dr); i++) |
1449 | { | |
bd6f374c | 1450 | tree ref = DR_REF (dr); |
1451 | ||
9cda83ad | 1452 | dr->aux = XNEW (struct ifc_dr); |
0308f68b | 1453 | DR_BASE_W_UNCONDITIONALLY (dr) = false; |
1454 | DR_RW_UNCONDITIONALLY (dr) = false; | |
1455 | DR_W_UNCONDITIONALLY (dr) = false; | |
1456 | IFC_DR (dr)->rw_predicate = boolean_false_node; | |
1457 | IFC_DR (dr)->w_predicate = boolean_false_node; | |
1458 | IFC_DR (dr)->base_w_predicate = boolean_false_node; | |
ee0e4f73 | 1459 | if (gimple_uid (DR_STMT (dr)) == 0) |
1460 | gimple_set_uid (DR_STMT (dr), i + 1); | |
bd6f374c | 1461 | |
1462 | /* If DR doesn't have innermost loop behavior or it's a compound | |
1463 | memory reference, we synthesize its innermost loop behavior | |
1464 | for hashing. */ | |
1465 | if (TREE_CODE (ref) == COMPONENT_REF | |
1466 | || TREE_CODE (ref) == IMAGPART_EXPR | |
1467 | || TREE_CODE (ref) == REALPART_EXPR | |
1468 | || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr) | |
1469 | || DR_INIT (dr) || DR_STEP (dr))) | |
1470 | { | |
1471 | while (TREE_CODE (ref) == COMPONENT_REF | |
1472 | || TREE_CODE (ref) == IMAGPART_EXPR | |
1473 | || TREE_CODE (ref) == REALPART_EXPR) | |
1474 | ref = TREE_OPERAND (ref, 0); | |
1475 | ||
a7e05ef2 | 1476 | memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr))); |
1477 | DR_BASE_ADDRESS (dr) = ref; | |
bd6f374c | 1478 | } |
ee0e4f73 | 1479 | hash_memrefs_baserefs_and_store_DRs_read_written_info (dr); |
9cabbd00 | 1480 | } |
1481 | ||
60e0f7c4 | 1482 | for (i = 0; i < loop->num_nodes; i++) |
1483 | { | |
1484 | basic_block bb = ifc_bbs[i]; | |
1485 | gimple_stmt_iterator itr; | |
1486 | ||
e1cc68bd | 1487 | /* Check the if-convertibility of statements in predicated BBs. */ |
c71d3c24 | 1488 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
e1cc68bd | 1489 | for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr)) |
db5c1c9a | 1490 | if (!if_convertible_stmt_p (gsi_stmt (itr), *refs)) |
e1cc68bd | 1491 | return false; |
07c03fb0 | 1492 | } |
1493 | ||
c71d3c24 | 1494 | /* Checking PHIs needs to be done after stmts, as the fact whether there |
1495 | are any masked loads or stores affects the tests. */ | |
1496 | for (i = 0; i < loop->num_nodes; i++) | |
1497 | { | |
1498 | basic_block bb = ifc_bbs[i]; | |
1a91d914 | 1499 | gphi_iterator itr; |
c71d3c24 | 1500 | |
1501 | for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr)) | |
e6ee4c61 | 1502 | if (!if_convertible_phi_p (loop, bb, itr.phi ())) |
c71d3c24 | 1503 | return false; |
1504 | } | |
1505 | ||
07c03fb0 | 1506 | if (dump_file) |
d11c70fa | 1507 | fprintf (dump_file, "Applying if-conversion\n"); |
07c03fb0 | 1508 | |
07c03fb0 | 1509 | return true; |
1510 | } | |
1511 | ||
e1cc68bd | 1512 | /* Return true when LOOP is if-convertible. |
1513 | LOOP is if-convertible if: | |
1514 | - it is innermost, | |
1515 | - it has two or more basic blocks, | |
1516 | - it has only one exit, | |
1517 | - loop header is not the exit edge, | |
1518 | - if its basic blocks and phi nodes are if convertible. */ | |
1519 | ||
1520 | static bool | |
2e966e2a | 1521 | if_convertible_loop_p (class loop *loop) |
e1cc68bd | 1522 | { |
1523 | edge e; | |
1524 | edge_iterator ei; | |
1525 | bool res = false; | |
f1f41a6c | 1526 | vec<data_reference_p> refs; |
e1cc68bd | 1527 | |
1528 | /* Handle only innermost loop. */ | |
1529 | if (!loop || loop->inner) | |
1530 | { | |
1531 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1532 | fprintf (dump_file, "not innermost loop\n"); | |
1533 | return false; | |
1534 | } | |
1535 | ||
1536 | /* If only one block, no need for if-conversion. */ | |
1537 | if (loop->num_nodes <= 2) | |
1538 | { | |
1539 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1540 | fprintf (dump_file, "less than 2 basic blocks\n"); | |
1541 | return false; | |
1542 | } | |
1543 | ||
1544 | /* More than one loop exit is too much to handle. */ | |
1545 | if (!single_exit (loop)) | |
1546 | { | |
1547 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1548 | fprintf (dump_file, "multiple exits\n"); | |
1549 | return false; | |
1550 | } | |
1551 | ||
1552 | /* If one of the loop header's edge is an exit edge then do not | |
1553 | apply if-conversion. */ | |
1554 | FOR_EACH_EDGE (e, ei, loop->header->succs) | |
1555 | if (loop_exit_edge_p (loop, e)) | |
1556 | return false; | |
1557 | ||
f1f41a6c | 1558 | refs.create (5); |
db5c1c9a | 1559 | res = if_convertible_loop_p_1 (loop, &refs); |
e1cc68bd | 1560 | |
9cda83ad | 1561 | data_reference_p dr; |
1562 | unsigned int i; | |
1563 | for (i = 0; refs.iterate (i, &dr); i++) | |
1564 | free (dr->aux); | |
9cabbd00 | 1565 | |
e1cc68bd | 1566 | free_data_refs (refs); |
ee0e4f73 | 1567 | |
bd6f374c | 1568 | delete innermost_DR_map; |
1569 | innermost_DR_map = NULL; | |
ee0e4f73 | 1570 | |
1571 | delete baseref_DR_map; | |
1572 | baseref_DR_map = NULL; | |
1573 | ||
e1cc68bd | 1574 | return res; |
1575 | } | |
1576 | ||
0b6de244 | 1577 | /* Returns true if def-stmt for phi argument ARG is simple increment/decrement |
1578 | which is in predicated basic block. | |
1579 | In fact, the following PHI pattern is searching: | |
1580 | loop-header: | |
1581 | reduc_1 = PHI <..., reduc_2> | |
1582 | ... | |
1583 | if (...) | |
1584 | reduc_3 = ... | |
1585 | reduc_2 = PHI <reduc_1, reduc_3> | |
1586 | ||
040b4eee | 1587 | ARG_0 and ARG_1 are correspondent PHI arguments. |
1588 | REDUC, OP0 and OP1 contain reduction stmt and its operands. | |
1589 | EXTENDED is true if PHI has > 2 arguments. */ | |
0b6de244 | 1590 | |
1591 | static bool | |
42acab1c | 1592 | is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, |
040b4eee | 1593 | tree *op0, tree *op1, bool extended) |
0b6de244 | 1594 | { |
1595 | tree lhs, r_op1, r_op2; | |
42acab1c | 1596 | gimple *stmt; |
1597 | gimple *header_phi = NULL; | |
0b6de244 | 1598 | enum tree_code reduction_op; |
91c4c1db | 1599 | basic_block bb = gimple_bb (phi); |
2e966e2a | 1600 | class loop *loop = bb->loop_father; |
0b6de244 | 1601 | edge latch_e = loop_latch_edge (loop); |
f8212648 | 1602 | imm_use_iterator imm_iter; |
1603 | use_operand_p use_p; | |
040b4eee | 1604 | edge e; |
1605 | edge_iterator ei; | |
1606 | bool result = false; | |
0b6de244 | 1607 | if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME) |
1608 | return false; | |
1609 | ||
040b4eee | 1610 | if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI) |
0b6de244 | 1611 | { |
1612 | lhs = arg_1; | |
1613 | header_phi = SSA_NAME_DEF_STMT (arg_0); | |
1614 | stmt = SSA_NAME_DEF_STMT (arg_1); | |
1615 | } | |
1616 | else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI) | |
1617 | { | |
1618 | lhs = arg_0; | |
1619 | header_phi = SSA_NAME_DEF_STMT (arg_1); | |
1620 | stmt = SSA_NAME_DEF_STMT (arg_0); | |
1621 | } | |
1622 | else | |
1623 | return false; | |
1624 | if (gimple_bb (header_phi) != loop->header) | |
1625 | return false; | |
1626 | ||
1627 | if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi)) | |
1628 | return false; | |
1629 | ||
1630 | if (gimple_code (stmt) != GIMPLE_ASSIGN | |
1631 | || gimple_has_volatile_ops (stmt)) | |
1632 | return false; | |
1633 | ||
3c7bf6da | 1634 | if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) |
1635 | return false; | |
1636 | ||
0b6de244 | 1637 | if (!is_predicated (gimple_bb (stmt))) |
1638 | return false; | |
1639 | ||
91c4c1db | 1640 | /* Check that stmt-block is predecessor of phi-block. */ |
040b4eee | 1641 | FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs) |
1642 | if (e->dest == bb) | |
1643 | { | |
1644 | result = true; | |
1645 | break; | |
1646 | } | |
1647 | if (!result) | |
91c4c1db | 1648 | return false; |
1649 | ||
0b6de244 | 1650 | if (!has_single_use (lhs)) |
1651 | return false; | |
1652 | ||
1653 | reduction_op = gimple_assign_rhs_code (stmt); | |
1654 | if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR) | |
1655 | return false; | |
1656 | r_op1 = gimple_assign_rhs1 (stmt); | |
1657 | r_op2 = gimple_assign_rhs2 (stmt); | |
1658 | ||
1659 | /* Make R_OP1 to hold reduction variable. */ | |
1660 | if (r_op2 == PHI_RESULT (header_phi) | |
1661 | && reduction_op == PLUS_EXPR) | |
a4f59596 | 1662 | std::swap (r_op1, r_op2); |
0b6de244 | 1663 | else if (r_op1 != PHI_RESULT (header_phi)) |
1664 | return false; | |
1665 | ||
f8212648 | 1666 | /* Check that R_OP1 is used in reduction stmt or in PHI only. */ |
1667 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1) | |
1668 | { | |
42acab1c | 1669 | gimple *use_stmt = USE_STMT (use_p); |
f8212648 | 1670 | if (is_gimple_debug (use_stmt)) |
1671 | continue; | |
1672 | if (use_stmt == stmt) | |
1673 | continue; | |
1674 | if (gimple_code (use_stmt) != GIMPLE_PHI) | |
1675 | return false; | |
1676 | } | |
1677 | ||
0b6de244 | 1678 | *op0 = r_op1; *op1 = r_op2; |
1679 | *reduc = stmt; | |
1680 | return true; | |
1681 | } | |
1682 | ||
1683 | /* Converts conditional scalar reduction into unconditional form, e.g. | |
1684 | bb_4 | |
1685 | if (_5 != 0) goto bb_5 else goto bb_6 | |
1686 | end_bb_4 | |
1687 | bb_5 | |
1688 | res_6 = res_13 + 1; | |
1689 | end_bb_5 | |
1690 | bb_6 | |
1691 | # res_2 = PHI <res_13(4), res_6(5)> | |
1692 | end_bb_6 | |
1693 | ||
1694 | will be converted into sequence | |
1695 | _ifc__1 = _5 != 0 ? 1 : 0; | |
1696 | res_2 = res_13 + _ifc__1; | |
1697 | Argument SWAP tells that arguments of conditional expression should be | |
1698 | swapped. | |
1699 | Returns rhs of resulting PHI assignment. */ | |
1700 | ||
1701 | static tree | |
42acab1c | 1702 | convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, |
0b6de244 | 1703 | tree cond, tree op0, tree op1, bool swap) |
1704 | { | |
1705 | gimple_stmt_iterator stmt_it; | |
42acab1c | 1706 | gimple *new_assign; |
0b6de244 | 1707 | tree rhs; |
1708 | tree rhs1 = gimple_assign_rhs1 (reduc); | |
1709 | tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_"); | |
1710 | tree c; | |
1711 | tree zero = build_zero_cst (TREE_TYPE (rhs1)); | |
1712 | ||
1713 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1714 | { | |
1715 | fprintf (dump_file, "Found cond scalar reduction.\n"); | |
1716 | print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM); | |
1717 | } | |
1718 | ||
1719 | /* Build cond expression using COND and constant operand | |
1720 | of reduction rhs. */ | |
1721 | c = fold_build_cond_expr (TREE_TYPE (rhs1), | |
1722 | unshare_expr (cond), | |
1723 | swap ? zero : op1, | |
1724 | swap ? op1 : zero); | |
1725 | ||
1726 | /* Create assignment stmt and insert it at GSI. */ | |
1727 | new_assign = gimple_build_assign (tmp, c); | |
1728 | gsi_insert_before (gsi, new_assign, GSI_SAME_STMT); | |
1729 | /* Build rhs for unconditional increment/decrement. */ | |
1730 | rhs = fold_build2 (gimple_assign_rhs_code (reduc), | |
1731 | TREE_TYPE (rhs1), op0, tmp); | |
1732 | ||
1733 | /* Delete original reduction stmt. */ | |
1734 | stmt_it = gsi_for_stmt (reduc); | |
1735 | gsi_remove (&stmt_it, true); | |
1736 | release_defs (reduc); | |
1737 | return rhs; | |
1738 | } | |
1739 | ||
f4ff098a | 1740 | /* Produce condition for all occurrences of ARG in PHI node. */ |
040b4eee | 1741 | |
1742 | static tree | |
1743 | gen_phi_arg_condition (gphi *phi, vec<int> *occur, | |
1744 | gimple_stmt_iterator *gsi) | |
1745 | { | |
1746 | int len; | |
1747 | int i; | |
1748 | tree cond = NULL_TREE; | |
1749 | tree c; | |
1750 | edge e; | |
1751 | ||
1752 | len = occur->length (); | |
1753 | gcc_assert (len > 0); | |
1754 | for (i = 0; i < len; i++) | |
1755 | { | |
1756 | e = gimple_phi_arg_edge (phi, (*occur)[i]); | |
1757 | c = bb_predicate (e->src); | |
1758 | if (is_true_predicate (c)) | |
ecaa5fd4 | 1759 | { |
1760 | cond = c; | |
1761 | break; | |
1762 | } | |
040b4eee | 1763 | c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c), |
1764 | is_gimple_condexpr, NULL_TREE, | |
1765 | true, GSI_SAME_STMT); | |
1766 | if (cond != NULL_TREE) | |
1767 | { | |
1768 | /* Must build OR expression. */ | |
1769 | cond = fold_or_predicates (EXPR_LOCATION (c), c, cond); | |
1770 | cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), | |
1771 | is_gimple_condexpr, NULL_TREE, | |
1772 | true, GSI_SAME_STMT); | |
1773 | } | |
1774 | else | |
1775 | cond = c; | |
1776 | } | |
1777 | gcc_assert (cond != NULL_TREE); | |
1778 | return cond; | |
1779 | } | |
1780 | ||
83c0fb43 | 1781 | /* Local valueization callback that follows all-use SSA edges. */ |
1782 | ||
1783 | static tree | |
1784 | ifcvt_follow_ssa_use_edges (tree val) | |
1785 | { | |
1786 | return val; | |
1787 | } | |
1788 | ||
3b91ccd9 | 1789 | /* Replace a scalar PHI node with a COND_EXPR using COND as condition. |
040b4eee | 1790 | This routine can handle PHI nodes with more than two arguments. |
07c03fb0 | 1791 | |
07c03fb0 | 1792 | For example, |
10b55fcb | 1793 | S1: A = PHI <x1(1), x2(5)> |
07c03fb0 | 1794 | is converted into, |
1795 | S2: A = cond ? x1 : x2; | |
b01e3c9b | 1796 | |
1797 | The generated code is inserted at GSI that points to the top of | |
040b4eee | 1798 | basic block's statement list. |
1799 | If PHI node has more than two arguments a chain of conditional | |
1800 | expression is produced. */ | |
1801 | ||
07c03fb0 | 1802 | |
1803 | static void | |
040b4eee | 1804 | predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) |
07c03fb0 | 1805 | { |
42acab1c | 1806 | gimple *new_stmt = NULL, *reduc; |
040b4eee | 1807 | tree rhs, res, arg0, arg1, op0, op1, scev; |
1808 | tree cond; | |
1809 | unsigned int index0; | |
1810 | unsigned int max, args_len; | |
1811 | edge e; | |
07c03fb0 | 1812 | basic_block bb; |
040b4eee | 1813 | unsigned int i; |
48e1416a | 1814 | |
3b91ccd9 | 1815 | res = gimple_phi_result (phi); |
7c782c9b | 1816 | if (virtual_operand_p (res)) |
3b91ccd9 | 1817 | return; |
1818 | ||
040b4eee | 1819 | if ((rhs = degenerate_phi_result (phi)) |
119cb9ea | 1820 | || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father, |
1821 | res)) | |
1822 | && !chrec_contains_undetermined (scev) | |
1823 | && scev != res | |
040b4eee | 1824 | && (rhs = gimple_phi_arg_def (phi, 0)))) |
07c03fb0 | 1825 | { |
040b4eee | 1826 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1827 | { | |
1828 | fprintf (dump_file, "Degenerate phi!\n"); | |
1829 | print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); | |
1830 | } | |
1831 | new_stmt = gimple_build_assign (res, rhs); | |
1832 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
1833 | update_stmt (new_stmt); | |
1834 | return; | |
1835 | } | |
0b6de244 | 1836 | |
040b4eee | 1837 | bb = gimple_bb (phi); |
1838 | if (EDGE_COUNT (bb->preds) == 2) | |
1839 | { | |
1840 | /* Predicate ordinary PHI node with 2 arguments. */ | |
1841 | edge first_edge, second_edge; | |
1842 | basic_block true_bb; | |
1843 | first_edge = EDGE_PRED (bb, 0); | |
1844 | second_edge = EDGE_PRED (bb, 1); | |
1845 | cond = bb_predicate (first_edge->src); | |
1846 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
a4f59596 | 1847 | std::swap (first_edge, second_edge); |
040b4eee | 1848 | if (EDGE_COUNT (first_edge->src->succs) > 1) |
1849 | { | |
1850 | cond = bb_predicate (second_edge->src); | |
1851 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
1852 | cond = TREE_OPERAND (cond, 0); | |
1853 | else | |
1854 | first_edge = second_edge; | |
1855 | } | |
1856 | else | |
1857 | cond = bb_predicate (first_edge->src); | |
1858 | /* Gimplify the condition to a valid cond-expr conditonal operand. */ | |
1859 | cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), | |
1860 | is_gimple_condexpr, NULL_TREE, | |
1861 | true, GSI_SAME_STMT); | |
1862 | true_bb = first_edge->src; | |
9fc1b637 | 1863 | if (EDGE_PRED (bb, 1)->src == true_bb) |
1864 | { | |
040b4eee | 1865 | arg0 = gimple_phi_arg_def (phi, 1); |
1866 | arg1 = gimple_phi_arg_def (phi, 0); | |
9fc1b637 | 1867 | } |
1868 | else | |
1869 | { | |
040b4eee | 1870 | arg0 = gimple_phi_arg_def (phi, 0); |
1871 | arg1 = gimple_phi_arg_def (phi, 1); | |
9fc1b637 | 1872 | } |
040b4eee | 1873 | if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1, |
1874 | &op0, &op1, false)) | |
0b6de244 | 1875 | /* Convert reduction stmt into vectorizable form. */ |
1876 | rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, | |
1877 | true_bb != gimple_bb (reduc)); | |
1878 | else | |
1879 | /* Build new RHS using selected condition and arguments. */ | |
1880 | rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), | |
040b4eee | 1881 | arg0, arg1); |
1882 | new_stmt = gimple_build_assign (res, rhs); | |
1883 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
83c0fb43 | 1884 | gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt); |
483d5f69 | 1885 | if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges)) |
1886 | { | |
1887 | new_stmt = gsi_stmt (new_gsi); | |
1888 | update_stmt (new_stmt); | |
1889 | } | |
040b4eee | 1890 | |
1891 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1892 | { | |
1893 | fprintf (dump_file, "new phi replacement stmt\n"); | |
1894 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); | |
1895 | } | |
1896 | return; | |
1897 | } | |
1898 | ||
1899 | /* Create hashmap for PHI node which contain vector of argument indexes | |
1900 | having the same value. */ | |
1901 | bool swap = false; | |
ee34b0e4 | 1902 | hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map; |
040b4eee | 1903 | unsigned int num_args = gimple_phi_num_args (phi); |
1904 | int max_ind = -1; | |
1905 | /* Vector of different PHI argument values. */ | |
1906 | auto_vec<tree> args (num_args); | |
1907 | ||
1908 | /* Compute phi_arg_map. */ | |
1909 | for (i = 0; i < num_args; i++) | |
1910 | { | |
1911 | tree arg; | |
1912 | ||
1913 | arg = gimple_phi_arg_def (phi, i); | |
1914 | if (!phi_arg_map.get (arg)) | |
1915 | args.quick_push (arg); | |
1916 | phi_arg_map.get_or_insert (arg).safe_push (i); | |
1917 | } | |
1918 | ||
1919 | /* Determine element with max number of occurrences. */ | |
1920 | max_ind = -1; | |
1921 | max = 1; | |
1922 | args_len = args.length (); | |
1923 | for (i = 0; i < args_len; i++) | |
1924 | { | |
1925 | unsigned int len; | |
1926 | if ((len = phi_arg_map.get (args[i])->length ()) > max) | |
1927 | { | |
1928 | max_ind = (int) i; | |
1929 | max = len; | |
1930 | } | |
1931 | } | |
1932 | ||
1933 | /* Put element with max number of occurences to the end of ARGS. */ | |
1934 | if (max_ind != -1 && max_ind +1 != (int) args_len) | |
a4f59596 | 1935 | std::swap (args[args_len - 1], args[max_ind]); |
07c03fb0 | 1936 | |
040b4eee | 1937 | /* Handle one special case when number of arguments with different values |
1938 | is equal 2 and one argument has the only occurrence. Such PHI can be | |
1939 | handled as if would have only 2 arguments. */ | |
1940 | if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1) | |
1941 | { | |
1942 | vec<int> *indexes; | |
1943 | indexes = phi_arg_map.get (args[0]); | |
1944 | index0 = (*indexes)[0]; | |
1945 | arg0 = args[0]; | |
1946 | arg1 = args[1]; | |
1947 | e = gimple_phi_arg_edge (phi, index0); | |
1948 | cond = bb_predicate (e->src); | |
1949 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
1950 | { | |
1951 | swap = true; | |
1952 | cond = TREE_OPERAND (cond, 0); | |
1953 | } | |
1954 | /* Gimplify the condition to a valid cond-expr conditonal operand. */ | |
1955 | cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), | |
1956 | is_gimple_condexpr, NULL_TREE, | |
1957 | true, GSI_SAME_STMT); | |
1958 | if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1, | |
1959 | &op0, &op1, true))) | |
1960 | rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), | |
1961 | swap? arg1 : arg0, | |
1962 | swap? arg0 : arg1); | |
1963 | else | |
1964 | /* Convert reduction stmt into vectorizable form. */ | |
1965 | rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, | |
1966 | swap); | |
1967 | new_stmt = gimple_build_assign (res, rhs); | |
1968 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
1969 | update_stmt (new_stmt); | |
1970 | } | |
1971 | else | |
1972 | { | |
1973 | /* Common case. */ | |
1974 | vec<int> *indexes; | |
1975 | tree type = TREE_TYPE (gimple_phi_result (phi)); | |
1976 | tree lhs; | |
1977 | arg1 = args[1]; | |
1978 | for (i = 0; i < args_len; i++) | |
1979 | { | |
1980 | arg0 = args[i]; | |
1981 | indexes = phi_arg_map.get (args[i]); | |
1982 | if (i != args_len - 1) | |
1983 | lhs = make_temp_ssa_name (type, NULL, "_ifc_"); | |
1984 | else | |
1985 | lhs = res; | |
1986 | cond = gen_phi_arg_condition (phi, indexes, gsi); | |
1987 | rhs = fold_build_cond_expr (type, unshare_expr (cond), | |
1988 | arg0, arg1); | |
1989 | new_stmt = gimple_build_assign (lhs, rhs); | |
1990 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
1991 | update_stmt (new_stmt); | |
1992 | arg1 = lhs; | |
1993 | } | |
1994 | } | |
07c03fb0 | 1995 | |
1996 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1997 | { | |
040b4eee | 1998 | fprintf (dump_file, "new extended phi replacement stmt\n"); |
75a70cf9 | 1999 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); |
07c03fb0 | 2000 | } |
2001 | } | |
2002 | ||
3b91ccd9 | 2003 | /* Replaces in LOOP all the scalar phi nodes other than those in the |
fa683b76 | 2004 | LOOP->header block with conditional modify expressions. */ |
07c03fb0 | 2005 | |
2006 | static void | |
2e966e2a | 2007 | predicate_all_scalar_phis (class loop *loop) |
07c03fb0 | 2008 | { |
2009 | basic_block bb; | |
2010 | unsigned int orig_loop_num_nodes = loop->num_nodes; | |
2011 | unsigned int i; | |
2012 | ||
07c03fb0 | 2013 | for (i = 1; i < orig_loop_num_nodes; i++) |
2014 | { | |
1a91d914 | 2015 | gphi *phi; |
1a91d914 | 2016 | gimple_stmt_iterator gsi; |
2017 | gphi_iterator phi_gsi; | |
07c03fb0 | 2018 | bb = ifc_bbs[i]; |
35a40aad | 2019 | |
c077abad | 2020 | if (bb == loop->header) |
07c03fb0 | 2021 | continue; |
2022 | ||
75a70cf9 | 2023 | phi_gsi = gsi_start_phis (bb); |
fa683b76 | 2024 | if (gsi_end_p (phi_gsi)) |
2025 | continue; | |
07c03fb0 | 2026 | |
e6ee4c61 | 2027 | gsi = gsi_after_labels (bb); |
2028 | while (!gsi_end_p (phi_gsi)) | |
07c03fb0 | 2029 | { |
e6ee4c61 | 2030 | phi = phi_gsi.phi (); |
ebd01afe | 2031 | if (virtual_operand_p (gimple_phi_result (phi))) |
2032 | gsi_next (&phi_gsi); | |
2033 | else | |
2034 | { | |
2035 | predicate_scalar_phi (phi, &gsi); | |
2036 | remove_phi_node (&phi_gsi, false); | |
2037 | } | |
07c03fb0 | 2038 | } |
07c03fb0 | 2039 | } |
07c03fb0 | 2040 | } |
2041 | ||
fa683b76 | 2042 | /* Insert in each basic block of LOOP the statements produced by the |
2043 | gimplification of the predicates. */ | |
2044 | ||
2045 | static void | |
db5c1c9a | 2046 | insert_gimplified_predicates (loop_p loop) |
fa683b76 | 2047 | { |
2048 | unsigned int i; | |
2049 | ||
2050 | for (i = 0; i < loop->num_nodes; i++) | |
2051 | { | |
2052 | basic_block bb = ifc_bbs[i]; | |
3b91ccd9 | 2053 | gimple_seq stmts; |
040b4eee | 2054 | if (!is_predicated (bb)) |
2055 | gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL); | |
a560ff2b | 2056 | if (!is_predicated (bb)) |
2057 | { | |
2058 | /* Do not insert statements for a basic block that is not | |
2059 | predicated. Also make sure that the predicate of the | |
2060 | basic block is set to true. */ | |
2061 | reset_bb_predicate (bb); | |
2062 | continue; | |
2063 | } | |
2064 | ||
3b91ccd9 | 2065 | stmts = bb_predicate_gimplified_stmts (bb); |
fa683b76 | 2066 | if (stmts) |
2067 | { | |
03821886 | 2068 | if (need_to_predicate) |
3b91ccd9 | 2069 | { |
2070 | /* Insert the predicate of the BB just after the label, | |
2071 | as the if-conversion of memory writes will use this | |
2072 | predicate. */ | |
2073 | gimple_stmt_iterator gsi = gsi_after_labels (bb); | |
2074 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
2075 | } | |
fa683b76 | 2076 | else |
3b91ccd9 | 2077 | { |
2078 | /* Insert the predicate of the BB at the end of the BB | |
2079 | as this would reduce the register pressure: the only | |
2080 | use of this predicate will be in successor BBs. */ | |
2081 | gimple_stmt_iterator gsi = gsi_last_bb (bb); | |
2082 | ||
2083 | if (gsi_end_p (gsi) | |
2084 | || stmt_ends_bb_p (gsi_stmt (gsi))) | |
2085 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
2086 | else | |
2087 | gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT); | |
2088 | } | |
fa683b76 | 2089 | |
2090 | /* Once the sequence is code generated, set it to NULL. */ | |
2091 | set_bb_predicate_gimplified_stmts (bb, NULL); | |
2092 | } | |
2093 | } | |
2094 | } | |
2095 | ||
03821886 | 2096 | /* Helper function for predicate_statements. Returns index of existent |
d3815dd1 | 2097 | mask if it was created for given SIZE and -1 otherwise. */ |
2098 | ||
2099 | static int | |
2100 | mask_exists (int size, vec<int> vec) | |
2101 | { | |
2102 | unsigned int ix; | |
2103 | int v; | |
2104 | FOR_EACH_VEC_ELT (vec, ix, v) | |
2105 | if (v == size) | |
2106 | return (int) ix; | |
2107 | return -1; | |
2108 | } | |
2109 | ||
03821886 | 2110 | /* Helper function for predicate_statements. STMT is a memory read or |
2111 | write and it needs to be predicated by MASK. Return a statement | |
2112 | that does so. */ | |
2113 | ||
2114 | static gimple * | |
2115 | predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) | |
2116 | { | |
2117 | gcall *new_stmt; | |
2118 | ||
2119 | tree lhs = gimple_assign_lhs (stmt); | |
2120 | tree rhs = gimple_assign_rhs1 (stmt); | |
2121 | tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; | |
2122 | mark_addressable (ref); | |
2123 | tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref), | |
2124 | true, NULL_TREE, true, GSI_SAME_STMT); | |
2125 | tree ptr = build_int_cst (reference_alias_ptr_type (ref), | |
2126 | get_object_alignment (ref)); | |
2127 | /* Copy points-to info if possible. */ | |
2128 | if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr)) | |
2129 | copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr), | |
2130 | ref); | |
2131 | if (TREE_CODE (lhs) == SSA_NAME) | |
2132 | { | |
2133 | new_stmt | |
2134 | = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr, | |
2135 | ptr, mask); | |
2136 | gimple_call_set_lhs (new_stmt, lhs); | |
2137 | gimple_set_vuse (new_stmt, gimple_vuse (stmt)); | |
2138 | } | |
2139 | else | |
2140 | { | |
2141 | new_stmt | |
2142 | = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, | |
2143 | mask, rhs); | |
1263a9e1 | 2144 | gimple_move_vops (new_stmt, stmt); |
03821886 | 2145 | } |
2146 | gimple_call_set_nothrow (new_stmt, true); | |
2147 | return new_stmt; | |
2148 | } | |
2149 | ||
2150 | /* STMT uses OP_LHS. Check whether it is equivalent to: | |
2151 | ||
2152 | ... = OP_MASK ? OP_LHS : X; | |
2153 | ||
2154 | Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is | |
2155 | known to have value OP_COND. */ | |
2156 | ||
2157 | static tree | |
2158 | check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond, | |
2159 | tree op_lhs) | |
2160 | { | |
2161 | gassign *assign = dyn_cast <gassign *> (stmt); | |
2162 | if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR) | |
2163 | return NULL_TREE; | |
2164 | ||
2165 | tree use_cond = gimple_assign_rhs1 (assign); | |
2166 | tree if_true = gimple_assign_rhs2 (assign); | |
2167 | tree if_false = gimple_assign_rhs3 (assign); | |
2168 | ||
2169 | if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0)) | |
2170 | && if_true == op_lhs) | |
2171 | return if_false; | |
2172 | ||
2173 | if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs) | |
2174 | return if_true; | |
2175 | ||
2176 | return NULL_TREE; | |
2177 | } | |
2178 | ||
2179 | /* Return true if VALUE is available for use at STMT. SSA_NAMES is | |
2180 | the set of SSA names defined earlier in STMT's block. */ | |
2181 | ||
2182 | static bool | |
2183 | value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names, | |
2184 | tree value) | |
2185 | { | |
2186 | if (is_gimple_min_invariant (value)) | |
2187 | return true; | |
2188 | ||
2189 | if (TREE_CODE (value) == SSA_NAME) | |
2190 | { | |
2191 | if (SSA_NAME_IS_DEFAULT_DEF (value)) | |
2192 | return true; | |
2193 | ||
2194 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value)); | |
2195 | basic_block use_bb = gimple_bb (stmt); | |
2196 | return (def_bb == use_bb | |
2197 | ? ssa_names->contains (value) | |
2198 | : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb)); | |
2199 | } | |
2200 | ||
2201 | return false; | |
2202 | } | |
2203 | ||
2204 | /* Helper function for predicate_statements. STMT is a potentially-trapping | |
2205 | arithmetic operation that needs to be predicated by MASK, an SSA_NAME that | |
2206 | has value COND. Return a statement that does so. SSA_NAMES is the set of | |
2207 | SSA names defined earlier in STMT's block. */ | |
2208 | ||
2209 | static gimple * | |
2210 | predicate_rhs_code (gassign *stmt, tree mask, tree cond, | |
2211 | hash_set<tree_ssa_name_hash> *ssa_names) | |
2212 | { | |
2213 | tree lhs = gimple_assign_lhs (stmt); | |
2214 | tree_code code = gimple_assign_rhs_code (stmt); | |
2215 | unsigned int nops = gimple_num_ops (stmt); | |
2216 | internal_fn cond_fn = get_conditional_internal_fn (code); | |
2217 | ||
2218 | /* Construct the arguments to the conditional internal function. */ | |
2219 | auto_vec<tree, 8> args; | |
2220 | args.safe_grow (nops + 1); | |
2221 | args[0] = mask; | |
2222 | for (unsigned int i = 1; i < nops; ++i) | |
2223 | args[i] = gimple_op (stmt, i); | |
2224 | args[nops] = NULL_TREE; | |
2225 | ||
2226 | /* Look for uses of the result to see whether they are COND_EXPRs that can | |
2227 | be folded into the conditional call. */ | |
2228 | imm_use_iterator imm_iter; | |
2229 | gimple *use_stmt; | |
2230 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs) | |
2231 | { | |
2232 | tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs); | |
2233 | if (new_else && value_available_p (stmt, ssa_names, new_else)) | |
2234 | { | |
2235 | if (!args[nops]) | |
2236 | args[nops] = new_else; | |
2237 | if (operand_equal_p (new_else, args[nops], 0)) | |
2238 | { | |
2239 | /* We have: | |
2240 | ||
2241 | LHS = IFN_COND (MASK, ..., ELSE); | |
2242 | X = MASK ? LHS : ELSE; | |
2243 | ||
2244 | which makes X equivalent to LHS. */ | |
2245 | tree use_lhs = gimple_assign_lhs (use_stmt); | |
2246 | redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs)); | |
2247 | } | |
2248 | } | |
2249 | } | |
2250 | if (!args[nops]) | |
2251 | args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs), | |
2252 | nops - 1, &args[1]); | |
2253 | ||
2254 | /* Create and insert the call. */ | |
2255 | gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args); | |
2256 | gimple_call_set_lhs (new_stmt, lhs); | |
2257 | gimple_call_set_nothrow (new_stmt, true); | |
2258 | ||
2259 | return new_stmt; | |
2260 | } | |
2261 | ||
3b91ccd9 | 2262 | /* Predicate each write to memory in LOOP. |
2263 | ||
2264 | This function transforms control flow constructs containing memory | |
2265 | writes of the form: | |
2266 | ||
2267 | | for (i = 0; i < N; i++) | |
2268 | | if (cond) | |
2269 | | A[i] = expr; | |
2270 | ||
2271 | into the following form that does not contain control flow: | |
2272 | ||
2273 | | for (i = 0; i < N; i++) | |
2274 | | A[i] = cond ? expr : A[i]; | |
2275 | ||
2276 | The original CFG looks like this: | |
2277 | ||
2278 | | bb_0 | |
2279 | | i = 0 | |
2280 | | end_bb_0 | |
2281 | | | |
2282 | | bb_1 | |
2283 | | if (i < N) goto bb_5 else goto bb_2 | |
2284 | | end_bb_1 | |
2285 | | | |
2286 | | bb_2 | |
2287 | | cond = some_computation; | |
2288 | | if (cond) goto bb_3 else goto bb_4 | |
2289 | | end_bb_2 | |
2290 | | | |
2291 | | bb_3 | |
2292 | | A[i] = expr; | |
2293 | | goto bb_4 | |
2294 | | end_bb_3 | |
2295 | | | |
2296 | | bb_4 | |
2297 | | goto bb_1 | |
2298 | | end_bb_4 | |
2299 | ||
2300 | insert_gimplified_predicates inserts the computation of the COND | |
2301 | expression at the beginning of the destination basic block: | |
2302 | ||
2303 | | bb_0 | |
2304 | | i = 0 | |
2305 | | end_bb_0 | |
2306 | | | |
2307 | | bb_1 | |
2308 | | if (i < N) goto bb_5 else goto bb_2 | |
2309 | | end_bb_1 | |
2310 | | | |
2311 | | bb_2 | |
2312 | | cond = some_computation; | |
2313 | | if (cond) goto bb_3 else goto bb_4 | |
2314 | | end_bb_2 | |
2315 | | | |
2316 | | bb_3 | |
2317 | | cond = some_computation; | |
2318 | | A[i] = expr; | |
2319 | | goto bb_4 | |
2320 | | end_bb_3 | |
2321 | | | |
2322 | | bb_4 | |
2323 | | goto bb_1 | |
2324 | | end_bb_4 | |
2325 | ||
03821886 | 2326 | predicate_statements is then predicating the memory write as follows: |
3b91ccd9 | 2327 | |
2328 | | bb_0 | |
2329 | | i = 0 | |
2330 | | end_bb_0 | |
2331 | | | |
2332 | | bb_1 | |
2333 | | if (i < N) goto bb_5 else goto bb_2 | |
2334 | | end_bb_1 | |
2335 | | | |
2336 | | bb_2 | |
2337 | | if (cond) goto bb_3 else goto bb_4 | |
2338 | | end_bb_2 | |
2339 | | | |
2340 | | bb_3 | |
2341 | | cond = some_computation; | |
2342 | | A[i] = cond ? expr : A[i]; | |
2343 | | goto bb_4 | |
2344 | | end_bb_3 | |
2345 | | | |
2346 | | bb_4 | |
2347 | | goto bb_1 | |
2348 | | end_bb_4 | |
2349 | ||
2350 | and finally combine_blocks removes the basic block boundaries making | |
2351 | the loop vectorizable: | |
2352 | ||
2353 | | bb_0 | |
2354 | | i = 0 | |
2355 | | if (i < N) goto bb_5 else goto bb_1 | |
2356 | | end_bb_0 | |
2357 | | | |
2358 | | bb_1 | |
2359 | | cond = some_computation; | |
2360 | | A[i] = cond ? expr : A[i]; | |
2361 | | if (i < N) goto bb_5 else goto bb_4 | |
2362 | | end_bb_1 | |
2363 | | | |
2364 | | bb_4 | |
2365 | | goto bb_1 | |
2366 | | end_bb_4 | |
2367 | */ | |
2368 | ||
2369 | static void | |
03821886 | 2370 | predicate_statements (loop_p loop) |
3b91ccd9 | 2371 | { |
2372 | unsigned int i, orig_loop_num_nodes = loop->num_nodes; | |
d3815dd1 | 2373 | auto_vec<int, 1> vect_sizes; |
2374 | auto_vec<tree, 1> vect_masks; | |
03821886 | 2375 | hash_set<tree_ssa_name_hash> ssa_names; |
3b91ccd9 | 2376 | |
2377 | for (i = 1; i < orig_loop_num_nodes; i++) | |
2378 | { | |
2379 | gimple_stmt_iterator gsi; | |
2380 | basic_block bb = ifc_bbs[i]; | |
2381 | tree cond = bb_predicate (bb); | |
2df61941 | 2382 | bool swap; |
d3815dd1 | 2383 | int index; |
3b91ccd9 | 2384 | |
fc1c9df7 | 2385 | if (is_true_predicate (cond)) |
3b91ccd9 | 2386 | continue; |
2387 | ||
2df61941 | 2388 | swap = false; |
2389 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) | |
2390 | { | |
2391 | swap = true; | |
2392 | cond = TREE_OPERAND (cond, 0); | |
2393 | } | |
2394 | ||
d3815dd1 | 2395 | vect_sizes.truncate (0); |
2396 | vect_masks.truncate (0); | |
2397 | ||
fc1c9df7 | 2398 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) |
2399 | { | |
03821886 | 2400 | gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi)); |
2401 | if (!stmt) | |
fc1c9df7 | 2402 | ; |
6784dab5 | 2403 | else if (is_false_predicate (cond) |
2404 | && gimple_vdef (stmt)) | |
fc1c9df7 | 2405 | { |
2406 | unlink_stmt_vdef (stmt); | |
2407 | gsi_remove (&gsi, true); | |
2408 | release_defs (stmt); | |
2409 | continue; | |
2410 | } | |
2411 | else if (gimple_plf (stmt, GF_PLF_2)) | |
2412 | { | |
2413 | tree lhs = gimple_assign_lhs (stmt); | |
03821886 | 2414 | tree mask; |
2415 | gimple *new_stmt; | |
fc1c9df7 | 2416 | gimple_seq stmts = NULL; |
eafbcd13 | 2417 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
2418 | /* We checked before setting GF_PLF_2 that an equivalent | |
2419 | integer mode exists. */ | |
2420 | int bitsize = GET_MODE_BITSIZE (mode).to_constant (); | |
fc1c9df7 | 2421 | if (!vect_sizes.is_empty () |
2422 | && (index = mask_exists (bitsize, vect_sizes)) != -1) | |
2423 | /* Use created mask. */ | |
2424 | mask = vect_masks[index]; | |
2425 | else | |
2426 | { | |
2427 | if (COMPARISON_CLASS_P (cond)) | |
2428 | mask = gimple_build (&stmts, TREE_CODE (cond), | |
2429 | boolean_type_node, | |
2430 | TREE_OPERAND (cond, 0), | |
2431 | TREE_OPERAND (cond, 1)); | |
2432 | else | |
9b79c8e1 | 2433 | mask = cond; |
fc1c9df7 | 2434 | |
2435 | if (swap) | |
2436 | { | |
2437 | tree true_val | |
2438 | = constant_boolean_node (true, TREE_TYPE (mask)); | |
2439 | mask = gimple_build (&stmts, BIT_XOR_EXPR, | |
2440 | TREE_TYPE (mask), mask, true_val); | |
2441 | } | |
2442 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
2443 | ||
fc1c9df7 | 2444 | /* Save mask and its size for further use. */ |
2445 | vect_sizes.safe_push (bitsize); | |
2446 | vect_masks.safe_push (mask); | |
2447 | } | |
03821886 | 2448 | if (gimple_assign_single_p (stmt)) |
2449 | new_stmt = predicate_load_or_store (&gsi, stmt, mask); | |
fc1c9df7 | 2450 | else |
03821886 | 2451 | new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); |
ebd01afe | 2452 | |
fc1c9df7 | 2453 | gsi_replace (&gsi, new_stmt, true); |
2454 | } | |
2455 | else if (gimple_vdef (stmt)) | |
2456 | { | |
2457 | tree lhs = gimple_assign_lhs (stmt); | |
2458 | tree rhs = gimple_assign_rhs1 (stmt); | |
2459 | tree type = TREE_TYPE (lhs); | |
2460 | ||
2461 | lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi); | |
2462 | rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi); | |
2463 | if (swap) | |
2464 | std::swap (lhs, rhs); | |
2465 | cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond), | |
2466 | is_gimple_condexpr, NULL_TREE, | |
2467 | true, GSI_SAME_STMT); | |
2468 | rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs); | |
2469 | gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi)); | |
2470 | update_stmt (stmt); | |
2471 | } | |
03821886 | 2472 | tree lhs = gimple_get_lhs (gsi_stmt (gsi)); |
2473 | if (lhs && TREE_CODE (lhs) == SSA_NAME) | |
2474 | ssa_names.add (lhs); | |
fc1c9df7 | 2475 | gsi_next (&gsi); |
2476 | } | |
03821886 | 2477 | ssa_names.empty (); |
3b91ccd9 | 2478 | } |
2479 | } | |
2480 | ||
01c40c0b | 2481 | /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks |
08129148 | 2482 | other than the exit and latch of the LOOP. Also resets the |
2483 | GIMPLE_DEBUG information. */ | |
01c40c0b | 2484 | |
2485 | static void | |
2486 | remove_conditions_and_labels (loop_p loop) | |
2487 | { | |
2488 | gimple_stmt_iterator gsi; | |
2489 | unsigned int i; | |
2490 | ||
2491 | for (i = 0; i < loop->num_nodes; i++) | |
2492 | { | |
fa683b76 | 2493 | basic_block bb = ifc_bbs[i]; |
01c40c0b | 2494 | |
2495 | if (bb_with_exit_edge_p (loop, bb) | |
2496 | || bb == loop->latch) | |
2497 | continue; | |
2498 | ||
2499 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) | |
08129148 | 2500 | switch (gimple_code (gsi_stmt (gsi))) |
2501 | { | |
2502 | case GIMPLE_COND: | |
2503 | case GIMPLE_LABEL: | |
2504 | gsi_remove (&gsi, true); | |
2505 | break; | |
2506 | ||
2507 | case GIMPLE_DEBUG: | |
2508 | /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */ | |
2509 | if (gimple_debug_bind_p (gsi_stmt (gsi))) | |
2510 | { | |
2511 | gimple_debug_bind_reset_value (gsi_stmt (gsi)); | |
2512 | update_stmt (gsi_stmt (gsi)); | |
2513 | } | |
2514 | gsi_next (&gsi); | |
2515 | break; | |
2516 | ||
2517 | default: | |
2518 | gsi_next (&gsi); | |
2519 | } | |
01c40c0b | 2520 | } |
2521 | } | |
2522 | ||
9cfdcdb1 | 2523 | /* Combine all the basic blocks from LOOP into one or two super basic |
2524 | blocks. Replace PHI nodes with conditional modify expressions. */ | |
07c03fb0 | 2525 | |
2526 | static void | |
2e966e2a | 2527 | combine_blocks (class loop *loop) |
07c03fb0 | 2528 | { |
2529 | basic_block bb, exit_bb, merge_target_bb; | |
2530 | unsigned int orig_loop_num_nodes = loop->num_nodes; | |
2531 | unsigned int i; | |
71cfcaa2 | 2532 | edge e; |
2533 | edge_iterator ei; | |
fa717aca | 2534 | |
01c40c0b | 2535 | remove_conditions_and_labels (loop); |
5cc7d4f7 | 2536 | insert_gimplified_predicates (loop); |
3b91ccd9 | 2537 | predicate_all_scalar_phis (loop); |
2538 | ||
03821886 | 2539 | if (need_to_predicate) |
2540 | predicate_statements (loop); | |
07c03fb0 | 2541 | |
b01e3c9b | 2542 | /* Merge basic blocks: first remove all the edges in the loop, |
2543 | except for those from the exit block. */ | |
07c03fb0 | 2544 | exit_bb = NULL; |
eda71dfb | 2545 | bool *predicated = XNEWVEC (bool, orig_loop_num_nodes); |
71cfcaa2 | 2546 | for (i = 0; i < orig_loop_num_nodes; i++) |
2547 | { | |
2548 | bb = ifc_bbs[i]; | |
eda71dfb | 2549 | predicated[i] = !is_true_predicate (bb_predicate (bb)); |
39760a87 | 2550 | free_bb_predicate (bb); |
71cfcaa2 | 2551 | if (bb_with_exit_edge_p (loop, bb)) |
2552 | { | |
53a87a4b | 2553 | gcc_assert (exit_bb == NULL); |
71cfcaa2 | 2554 | exit_bb = bb; |
71cfcaa2 | 2555 | } |
2556 | } | |
2557 | gcc_assert (exit_bb != loop->latch); | |
07c03fb0 | 2558 | |
07c03fb0 | 2559 | for (i = 1; i < orig_loop_num_nodes; i++) |
2560 | { | |
07c03fb0 | 2561 | bb = ifc_bbs[i]; |
2562 | ||
71cfcaa2 | 2563 | for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) |
07c03fb0 | 2564 | { |
71cfcaa2 | 2565 | if (e->src == exit_bb) |
2566 | ei_next (&ei); | |
2567 | else | |
2568 | remove_edge (e); | |
2569 | } | |
2570 | } | |
07c03fb0 | 2571 | |
71cfcaa2 | 2572 | if (exit_bb != NULL) |
2573 | { | |
2574 | if (exit_bb != loop->header) | |
2575 | { | |
b01e3c9b | 2576 | /* Connect this node to loop header. */ |
7d956d51 | 2577 | make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU); |
71cfcaa2 | 2578 | set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header); |
07c03fb0 | 2579 | } |
2580 | ||
71cfcaa2 | 2581 | /* Redirect non-exit edges to loop->latch. */ |
2582 | FOR_EACH_EDGE (e, ei, exit_bb->succs) | |
2583 | { | |
2584 | if (!loop_exit_edge_p (loop, e)) | |
2585 | redirect_edge_and_branch (e, loop->latch); | |
2586 | } | |
2587 | set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); | |
2588 | } | |
2589 | else | |
2590 | { | |
b01e3c9b | 2591 | /* If the loop does not have an exit, reconnect header and latch. */ |
71cfcaa2 | 2592 | make_edge (loop->header, loop->latch, EDGE_FALLTHRU); |
2593 | set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header); | |
2594 | } | |
c077abad | 2595 | |
71cfcaa2 | 2596 | merge_target_bb = loop->header; |
ebd01afe | 2597 | |
2598 | /* Get at the virtual def valid for uses starting at the first block | |
2599 | we merge into the header. Without a virtual PHI the loop has the | |
2600 | same virtual use on all stmts. */ | |
2601 | gphi *vphi = get_virtual_phi (loop->header); | |
2602 | tree last_vdef = NULL_TREE; | |
2603 | if (vphi) | |
2604 | { | |
2605 | last_vdef = gimple_phi_result (vphi); | |
2606 | for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header); | |
2607 | ! gsi_end_p (gsi); gsi_next (&gsi)) | |
2608 | if (gimple_vdef (gsi_stmt (gsi))) | |
2609 | last_vdef = gimple_vdef (gsi_stmt (gsi)); | |
2610 | } | |
71cfcaa2 | 2611 | for (i = 1; i < orig_loop_num_nodes; i++) |
2612 | { | |
75a70cf9 | 2613 | gimple_stmt_iterator gsi; |
2614 | gimple_stmt_iterator last; | |
46d1d560 | 2615 | |
71cfcaa2 | 2616 | bb = ifc_bbs[i]; |
65b9482a | 2617 | |
71cfcaa2 | 2618 | if (bb == exit_bb || bb == loop->latch) |
2619 | continue; | |
65b9482a | 2620 | |
ebd01afe | 2621 | /* We release virtual PHIs late because we have to propagate them |
2622 | out using the current VUSE. The def might be the one used | |
2623 | after the loop. */ | |
2624 | vphi = get_virtual_phi (bb); | |
2625 | if (vphi) | |
2626 | { | |
2627 | imm_use_iterator iter; | |
2628 | use_operand_p use_p; | |
2629 | gimple *use_stmt; | |
2630 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) | |
2631 | { | |
2632 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
2633 | SET_USE (use_p, last_vdef); | |
2634 | } | |
5ead71c8 | 2635 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) |
2636 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; | |
ebd01afe | 2637 | gsi = gsi_for_stmt (vphi); |
2638 | remove_phi_node (&gsi, true); | |
2639 | } | |
2640 | ||
eda71dfb | 2641 | /* Make stmts member of loop->header and clear range info from all stmts |
2642 | in BB which is now no longer executed conditional on a predicate we | |
2643 | could have derived it from. */ | |
01c40c0b | 2644 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
eda71dfb | 2645 | { |
42acab1c | 2646 | gimple *stmt = gsi_stmt (gsi); |
eda71dfb | 2647 | gimple_set_bb (stmt, merge_target_bb); |
ebd01afe | 2648 | /* Update virtual operands. */ |
2649 | if (last_vdef) | |
2650 | { | |
2651 | use_operand_p use_p = ssa_vuse_operand (stmt); | |
2652 | if (use_p | |
2653 | && USE_FROM_PTR (use_p) != last_vdef) | |
2654 | SET_USE (use_p, last_vdef); | |
2655 | if (gimple_vdef (stmt)) | |
2656 | last_vdef = gimple_vdef (stmt); | |
2657 | } | |
eda71dfb | 2658 | if (predicated[i]) |
2659 | { | |
2660 | ssa_op_iter i; | |
2661 | tree op; | |
2662 | FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF) | |
2663 | reset_flow_sensitive_info (op); | |
2664 | } | |
2665 | } | |
07c03fb0 | 2666 | |
2667 | /* Update stmt list. */ | |
75a70cf9 | 2668 | last = gsi_last_bb (merge_target_bb); |
ebd01afe | 2669 | gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT); |
75a70cf9 | 2670 | set_bb_seq (bb, NULL); |
07c03fb0 | 2671 | |
88e6f696 | 2672 | delete_basic_block (bb); |
07c03fb0 | 2673 | } |
e9437a8f | 2674 | |
b01e3c9b | 2675 | /* If possible, merge loop header to the block with the exit edge. |
2676 | This reduces the number of basic blocks to two, to please the | |
b519e9db | 2677 | vectorizer that handles only loops with two nodes. */ |
c077abad | 2678 | if (exit_bb |
ebd01afe | 2679 | && exit_bb != loop->header) |
2680 | { | |
2681 | /* We release virtual PHIs late because we have to propagate them | |
2682 | out using the current VUSE. The def might be the one used | |
2683 | after the loop. */ | |
2684 | vphi = get_virtual_phi (exit_bb); | |
2685 | if (vphi) | |
2686 | { | |
2687 | imm_use_iterator iter; | |
2688 | use_operand_p use_p; | |
2689 | gimple *use_stmt; | |
2690 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) | |
2691 | { | |
2692 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
2693 | SET_USE (use_p, last_vdef); | |
2694 | } | |
5ead71c8 | 2695 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) |
2696 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; | |
ebd01afe | 2697 | gimple_stmt_iterator gsi = gsi_for_stmt (vphi); |
2698 | remove_phi_node (&gsi, true); | |
2699 | } | |
2700 | ||
2701 | if (can_merge_blocks_p (loop->header, exit_bb)) | |
2702 | merge_blocks (loop->header, exit_bb); | |
2703 | } | |
39760a87 | 2704 | |
2705 | free (ifc_bbs); | |
2706 | ifc_bbs = NULL; | |
eda71dfb | 2707 | free (predicated); |
07c03fb0 | 2708 | } |
2709 | ||
207b8288 | 2710 | /* Version LOOP before if-converting it; the original loop |
2711 | will be if-converted, the new copy of the loop will not, | |
c71d3c24 | 2712 | and the LOOP_VECTORIZED internal call will be guarding which |
2713 | loop to execute. The vectorizer pass will fold this | |
b6863ffa | 2714 | internal call into either true or false. |
2715 | ||
2716 | Note that this function intentionally invalidates profile. Both edges | |
2717 | out of LOOP_VECTORIZED must have 100% probability so the profile remains | |
2718 | consistent after the condition is folded in the vectorizer. */ | |
07c03fb0 | 2719 | |
2e966e2a | 2720 | static class loop * |
2721 | version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds) | |
c71d3c24 | 2722 | { |
2723 | basic_block cond_bb; | |
f9e245b2 | 2724 | tree cond = make_ssa_name (boolean_type_node); |
2e966e2a | 2725 | class loop *new_loop; |
42acab1c | 2726 | gimple *g; |
c71d3c24 | 2727 | gimple_stmt_iterator gsi; |
9ee85230 | 2728 | unsigned int save_length; |
c71d3c24 | 2729 | |
2730 | g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2, | |
2731 | build_int_cst (integer_type_node, loop->num), | |
2732 | integer_zero_node); | |
2733 | gimple_call_set_lhs (g, cond); | |
2734 | ||
c3deca25 | 2735 | /* Save BB->aux around loop_version as that uses the same field. */ |
9ee85230 | 2736 | save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; |
2737 | void **saved_preds = XALLOCAVEC (void *, save_length); | |
2738 | for (unsigned i = 0; i < save_length; i++) | |
c3deca25 | 2739 | saved_preds[i] = ifc_bbs[i]->aux; |
2740 | ||
c71d3c24 | 2741 | initialize_original_copy_tables (); |
b6863ffa | 2742 | /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED |
2743 | is re-merged in the vectorizer. */ | |
c71d3c24 | 2744 | new_loop = loop_version (loop, cond, &cond_bb, |
720cfc43 | 2745 | profile_probability::always (), |
2746 | profile_probability::always (), | |
ca69b069 | 2747 | profile_probability::always (), |
2748 | profile_probability::always (), true); | |
c71d3c24 | 2749 | free_original_copy_tables (); |
c3deca25 | 2750 | |
9ee85230 | 2751 | for (unsigned i = 0; i < save_length; i++) |
c3deca25 | 2752 | ifc_bbs[i]->aux = saved_preds[i]; |
2753 | ||
c71d3c24 | 2754 | if (new_loop == NULL) |
ccd0a9f9 | 2755 | return NULL; |
c3deca25 | 2756 | |
c71d3c24 | 2757 | new_loop->dont_vectorize = true; |
4c73695b | 2758 | new_loop->force_vectorize = false; |
c71d3c24 | 2759 | gsi = gsi_last_bb (cond_bb); |
2760 | gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num)); | |
dd31e240 | 2761 | if (preds) |
2762 | preds->safe_push (g); | |
c71d3c24 | 2763 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
5cc7d4f7 | 2764 | update_ssa (TODO_update_ssa); |
ccd0a9f9 | 2765 | return new_loop; |
c71d3c24 | 2766 | } |
2767 | ||
9ee85230 | 2768 | /* Return true when LOOP satisfies the follow conditions that will |
2769 | allow it to be recognized by the vectorizer for outer-loop | |
2770 | vectorization: | |
2771 | - The loop is not the root node of the loop tree. | |
2772 | - The loop has exactly one inner loop. | |
2773 | - The loop has a single exit. | |
2774 | - The loop header has a single successor, which is the inner | |
2775 | loop header. | |
da269671 | 2776 | - Each of the inner and outer loop latches have a single |
2777 | predecessor. | |
9ee85230 | 2778 | - The loop exit block has a single predecessor, which is the |
2779 | inner loop's exit block. */ | |
2780 | ||
2781 | static bool | |
2e966e2a | 2782 | versionable_outer_loop_p (class loop *loop) |
9ee85230 | 2783 | { |
2784 | if (!loop_outer (loop) | |
ccd0a9f9 | 2785 | || loop->dont_vectorize |
9ee85230 | 2786 | || !loop->inner |
2787 | || loop->inner->next | |
2788 | || !single_exit (loop) | |
2789 | || !single_succ_p (loop->header) | |
da269671 | 2790 | || single_succ (loop->header) != loop->inner->header |
2791 | || !single_pred_p (loop->latch) | |
2792 | || !single_pred_p (loop->inner->latch)) | |
9ee85230 | 2793 | return false; |
ccd0a9f9 | 2794 | |
9ee85230 | 2795 | basic_block outer_exit = single_pred (loop->latch); |
2796 | basic_block inner_exit = single_pred (loop->inner->latch); | |
2797 | ||
2798 | if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit) | |
2799 | return false; | |
2800 | ||
2801 | if (dump_file) | |
2802 | fprintf (dump_file, "Found vectorizable outer loop for versioning\n"); | |
2803 | ||
2804 | return true; | |
2805 | } | |
2806 | ||
9ab8df54 | 2807 | /* Performs splitting of critical edges. Skip splitting and return false |
2808 | if LOOP will not be converted because: | |
2809 | ||
2810 | - LOOP is not well formed. | |
2811 | - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments. | |
2812 | ||
2813 | Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */ | |
040b4eee | 2814 | |
2815 | static bool | |
2e966e2a | 2816 | ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv) |
040b4eee | 2817 | { |
2818 | basic_block *body; | |
2819 | basic_block bb; | |
2820 | unsigned int num = loop->num_nodes; | |
2821 | unsigned int i; | |
42acab1c | 2822 | gimple *stmt; |
040b4eee | 2823 | edge e; |
2824 | edge_iterator ei; | |
cf416779 | 2825 | auto_vec<edge> critical_edges; |
040b4eee | 2826 | |
9ab8df54 | 2827 | /* Loop is not well formed. */ |
2828 | if (num <= 2 || loop->inner || !single_exit (loop)) | |
040b4eee | 2829 | return false; |
2830 | ||
2831 | body = get_loop_body (loop); | |
2832 | for (i = 0; i < num; i++) | |
2833 | { | |
2834 | bb = body[i]; | |
9ab8df54 | 2835 | if (!aggressive_if_conv |
2836 | && phi_nodes (bb) | |
2837 | && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM) | |
2838 | { | |
2839 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2840 | fprintf (dump_file, | |
2841 | "BB %d has complicated PHI with more than %u args.\n", | |
2842 | bb->index, MAX_PHI_ARG_NUM); | |
2843 | ||
2844 | free (body); | |
9ab8df54 | 2845 | return false; |
2846 | } | |
2847 | if (bb == loop->latch || bb_with_exit_edge_p (loop, bb)) | |
040b4eee | 2848 | continue; |
9ab8df54 | 2849 | |
040b4eee | 2850 | stmt = last_stmt (bb); |
2851 | /* Skip basic blocks not ending with conditional branch. */ | |
9ab8df54 | 2852 | if (!stmt || gimple_code (stmt) != GIMPLE_COND) |
040b4eee | 2853 | continue; |
9ab8df54 | 2854 | |
040b4eee | 2855 | FOR_EACH_EDGE (e, ei, bb->succs) |
2856 | if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop) | |
9ab8df54 | 2857 | critical_edges.safe_push (e); |
040b4eee | 2858 | } |
2859 | free (body); | |
9ab8df54 | 2860 | |
2861 | while (critical_edges.length () > 0) | |
2862 | { | |
2863 | e = critical_edges.pop (); | |
2864 | /* Don't split if bb can be predicated along non-critical edge. */ | |
2865 | if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest)) | |
2866 | split_edge (e); | |
2867 | } | |
2868 | ||
040b4eee | 2869 | return true; |
2870 | } | |
2871 | ||
040b4eee | 2872 | /* Delete redundant statements produced by predication which prevents |
2873 | loop vectorization. */ | |
2874 | ||
2875 | static void | |
2876 | ifcvt_local_dce (basic_block bb) | |
2877 | { | |
42acab1c | 2878 | gimple *stmt; |
2879 | gimple *stmt1; | |
2880 | gimple *phi; | |
040b4eee | 2881 | gimple_stmt_iterator gsi; |
6e0cf981 | 2882 | auto_vec<gimple *> worklist; |
040b4eee | 2883 | enum gimple_code code; |
2884 | use_operand_p use_p; | |
2885 | imm_use_iterator imm_iter; | |
03821886 | 2886 | std::pair <tree, tree> *name_pair; |
2887 | unsigned int i; | |
2888 | ||
2889 | FOR_EACH_VEC_ELT (redundant_ssa_names, i, name_pair) | |
2890 | replace_uses_by (name_pair->first, name_pair->second); | |
2891 | redundant_ssa_names.release (); | |
040b4eee | 2892 | |
2893 | worklist.create (64); | |
2894 | /* Consider all phi as live statements. */ | |
2895 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2896 | { | |
2897 | phi = gsi_stmt (gsi); | |
2898 | gimple_set_plf (phi, GF_PLF_2, true); | |
2899 | worklist.safe_push (phi); | |
2900 | } | |
207b8288 | 2901 | /* Consider load/store statements, CALL and COND as live. */ |
040b4eee | 2902 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2903 | { | |
2904 | stmt = gsi_stmt (gsi); | |
2905 | if (gimple_store_p (stmt) | |
2906 | || gimple_assign_load_p (stmt) | |
2907 | || is_gimple_debug (stmt)) | |
2908 | { | |
2909 | gimple_set_plf (stmt, GF_PLF_2, true); | |
2910 | worklist.safe_push (stmt); | |
2911 | continue; | |
2912 | } | |
2913 | code = gimple_code (stmt); | |
2914 | if (code == GIMPLE_COND || code == GIMPLE_CALL) | |
2915 | { | |
2916 | gimple_set_plf (stmt, GF_PLF_2, true); | |
2917 | worklist.safe_push (stmt); | |
2918 | continue; | |
2919 | } | |
2920 | gimple_set_plf (stmt, GF_PLF_2, false); | |
2921 | ||
2922 | if (code == GIMPLE_ASSIGN) | |
2923 | { | |
2924 | tree lhs = gimple_assign_lhs (stmt); | |
2925 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | |
2926 | { | |
2927 | stmt1 = USE_STMT (use_p); | |
2928 | if (gimple_bb (stmt1) != bb) | |
2929 | { | |
2930 | gimple_set_plf (stmt, GF_PLF_2, true); | |
2931 | worklist.safe_push (stmt); | |
2932 | break; | |
2933 | } | |
2934 | } | |
2935 | } | |
2936 | } | |
2937 | /* Propagate liveness through arguments of live stmt. */ | |
2938 | while (worklist.length () > 0) | |
2939 | { | |
2940 | ssa_op_iter iter; | |
2941 | use_operand_p use_p; | |
2942 | tree use; | |
2943 | ||
2944 | stmt = worklist.pop (); | |
2945 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) | |
2946 | { | |
2947 | use = USE_FROM_PTR (use_p); | |
2948 | if (TREE_CODE (use) != SSA_NAME) | |
2949 | continue; | |
2950 | stmt1 = SSA_NAME_DEF_STMT (use); | |
2951 | if (gimple_bb (stmt1) != bb | |
2952 | || gimple_plf (stmt1, GF_PLF_2)) | |
2953 | continue; | |
2954 | gimple_set_plf (stmt1, GF_PLF_2, true); | |
2955 | worklist.safe_push (stmt1); | |
2956 | } | |
2957 | } | |
2958 | /* Delete dead statements. */ | |
2959 | gsi = gsi_start_bb (bb); | |
2960 | while (!gsi_end_p (gsi)) | |
2961 | { | |
2962 | stmt = gsi_stmt (gsi); | |
2963 | if (gimple_plf (stmt, GF_PLF_2)) | |
2964 | { | |
2965 | gsi_next (&gsi); | |
2966 | continue; | |
2967 | } | |
2968 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2969 | { | |
2970 | fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index); | |
2971 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); | |
2972 | } | |
2973 | gsi_remove (&gsi, true); | |
2974 | release_defs (stmt); | |
2975 | } | |
2976 | } | |
2977 | ||
c71d3c24 | 2978 | /* If-convert LOOP when it is legal. For the moment this pass has no |
2979 | profitability analysis. Returns non-zero todo flags when something | |
2980 | changed. */ | |
2981 | ||
5b631e09 | 2982 | unsigned int |
2e966e2a | 2983 | tree_if_conversion (class loop *loop, vec<gimple *> *preds) |
07c03fb0 | 2984 | { |
c71d3c24 | 2985 | unsigned int todo = 0; |
9ab8df54 | 2986 | bool aggressive_if_conv; |
2e966e2a | 2987 | class loop *rloop; |
67763c7f | 2988 | bitmap exit_bbs; |
9ab8df54 | 2989 | |
ccd0a9f9 | 2990 | again: |
2991 | rloop = NULL; | |
1055d96a | 2992 | ifc_bbs = NULL; |
03821886 | 2993 | need_to_predicate = false; |
9ab8df54 | 2994 | any_complicated_phi = false; |
07c03fb0 | 2995 | |
9ab8df54 | 2996 | /* Apply more aggressive if-conversion when loop or its outer loop were |
2997 | marked with simd pragma. When that's the case, we try to if-convert | |
2998 | loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */ | |
040b4eee | 2999 | aggressive_if_conv = loop->force_vectorize; |
040b4eee | 3000 | if (!aggressive_if_conv) |
3001 | { | |
2e966e2a | 3002 | class loop *outer_loop = loop_outer (loop); |
040b4eee | 3003 | if (outer_loop && outer_loop->force_vectorize) |
3004 | aggressive_if_conv = true; | |
3005 | } | |
3006 | ||
9ab8df54 | 3007 | if (!ifcvt_split_critical_edges (loop, aggressive_if_conv)) |
3008 | goto cleanup; | |
040b4eee | 3009 | |
db5c1c9a | 3010 | if (!if_convertible_loop_p (loop) |
4bd0f929 | 3011 | || !dbg_cnt (if_conversion_tree)) |
60e0f7c4 | 3012 | goto cleanup; |
a1660ced | 3013 | |
03821886 | 3014 | if ((need_to_predicate || any_complicated_phi) |
4c73695b | 3015 | && ((!flag_tree_loop_vectorize && !loop->force_vectorize) |
c71d3c24 | 3016 | || loop->dont_vectorize)) |
3017 | goto cleanup; | |
3018 | ||
b0c413f2 | 3019 | /* Since we have no cost model, always version loops unless the user |
1e04d935 | 3020 | specified -ftree-loop-if-convert or unless versioning is required. |
3021 | Either version this loop, or if the pattern is right for outer-loop | |
3022 | vectorization, version the outer loop. In the latter case we will | |
3023 | still if-convert the original inner loop. */ | |
03821886 | 3024 | if (need_to_predicate |
1e04d935 | 3025 | || any_complicated_phi |
3026 | || flag_tree_loop_if_convert != 1) | |
3027 | { | |
2e966e2a | 3028 | class loop *vloop |
1e04d935 | 3029 | = (versionable_outer_loop_p (loop_outer (loop)) |
3030 | ? loop_outer (loop) : loop); | |
2e966e2a | 3031 | class loop *nloop = version_loop_for_if_conversion (vloop, preds); |
ccd0a9f9 | 3032 | if (nloop == NULL) |
1e04d935 | 3033 | goto cleanup; |
ccd0a9f9 | 3034 | if (vloop != loop) |
3035 | { | |
3036 | /* If versionable_outer_loop_p decided to version the | |
3037 | outer loop, version also the inner loop of the non-vectorized | |
3038 | loop copy. So we transform: | |
3039 | loop1 | |
3040 | loop2 | |
3041 | into: | |
3042 | if (LOOP_VECTORIZED (1, 3)) | |
3043 | { | |
3044 | loop1 | |
3045 | loop2 | |
3046 | } | |
3047 | else | |
3048 | loop3 (copy of loop1) | |
3049 | if (LOOP_VECTORIZED (4, 5)) | |
3050 | loop4 (copy of loop2) | |
3051 | else | |
3052 | loop5 (copy of loop4) */ | |
3053 | gcc_assert (nloop->inner && nloop->inner->next == NULL); | |
3054 | rloop = nloop->inner; | |
3055 | } | |
1e04d935 | 3056 | } |
c71d3c24 | 3057 | |
60e0f7c4 | 3058 | /* Now all statements are if-convertible. Combine all the basic |
3059 | blocks into one huge basic block doing the if-conversion | |
3060 | on-the-fly. */ | |
db5c1c9a | 3061 | combine_blocks (loop); |
3b91ccd9 | 3062 | |
6172a9fd | 3063 | /* Delete dead predicate computations. */ |
9ab8df54 | 3064 | ifcvt_local_dce (loop->header); |
040b4eee | 3065 | |
67763c7f | 3066 | /* Perform local CSE, this esp. helps the vectorizer analysis if loads |
6d26c698 | 3067 | and stores are involved. CSE only the loop body, not the entry |
3068 | PHIs, those are to be kept in sync with the non-if-converted copy. | |
67763c7f | 3069 | ??? We'll still keep dead stores though. */ |
3070 | exit_bbs = BITMAP_ALLOC (NULL); | |
3071 | bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index); | |
6d26c698 | 3072 | bitmap_set_bit (exit_bbs, loop->latch->index); |
67763c7f | 3073 | todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); |
3074 | BITMAP_FREE (exit_bbs); | |
3075 | ||
c71d3c24 | 3076 | todo |= TODO_cleanup_cfg; |
07c03fb0 | 3077 | |
60e0f7c4 | 3078 | cleanup: |
60e0f7c4 | 3079 | if (ifc_bbs) |
3080 | { | |
fa683b76 | 3081 | unsigned int i; |
3082 | ||
3083 | for (i = 0; i < loop->num_nodes; i++) | |
3084 | free_bb_predicate (ifc_bbs[i]); | |
3085 | ||
60e0f7c4 | 3086 | free (ifc_bbs); |
3087 | ifc_bbs = NULL; | |
3088 | } | |
ccd0a9f9 | 3089 | if (rloop != NULL) |
3090 | { | |
3091 | loop = rloop; | |
3092 | goto again; | |
3093 | } | |
b519e9db | 3094 | |
c71d3c24 | 3095 | return todo; |
07c03fb0 | 3096 | } |
3097 | ||
3098 | /* Tree if-conversion pass management. */ | |
3099 | ||
7620bc82 | 3100 | namespace { |
3101 | ||
3102 | const pass_data pass_data_if_conversion = | |
07c03fb0 | 3103 | { |
cbe8bda8 | 3104 | GIMPLE_PASS, /* type */ |
3105 | "ifcvt", /* name */ | |
3106 | OPTGROUP_NONE, /* optinfo_flags */ | |
ecec21ec | 3107 | TV_TREE_LOOP_IFCVT, /* tv_id */ |
cbe8bda8 | 3108 | ( PROP_cfg | PROP_ssa ), /* properties_required */ |
3109 | 0, /* properties_provided */ | |
3110 | 0, /* properties_destroyed */ | |
3111 | 0, /* todo_flags_start */ | |
8b88439e | 3112 | 0, /* todo_flags_finish */ |
07c03fb0 | 3113 | }; |
cbe8bda8 | 3114 | |
7620bc82 | 3115 | class pass_if_conversion : public gimple_opt_pass |
cbe8bda8 | 3116 | { |
3117 | public: | |
9af5ce0c | 3118 | pass_if_conversion (gcc::context *ctxt) |
3119 | : gimple_opt_pass (pass_data_if_conversion, ctxt) | |
cbe8bda8 | 3120 | {} |
3121 | ||
3122 | /* opt_pass methods: */ | |
31315c24 | 3123 | virtual bool gate (function *); |
65b0537f | 3124 | virtual unsigned int execute (function *); |
cbe8bda8 | 3125 | |
3126 | }; // class pass_if_conversion | |
3127 | ||
31315c24 | 3128 | bool |
3129 | pass_if_conversion::gate (function *fun) | |
3130 | { | |
3131 | return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops) | |
3132 | && flag_tree_loop_if_convert != 0) | |
b6f4c9b5 | 3133 | || flag_tree_loop_if_convert == 1); |
31315c24 | 3134 | } |
3135 | ||
65b0537f | 3136 | unsigned int |
3137 | pass_if_conversion::execute (function *fun) | |
3138 | { | |
2e966e2a | 3139 | class loop *loop; |
65b0537f | 3140 | unsigned todo = 0; |
3141 | ||
3142 | if (number_of_loops (fun) <= 1) | |
3143 | return 0; | |
3144 | ||
40d78a35 | 3145 | auto_vec<gimple *> preds; |
65b0537f | 3146 | FOR_EACH_LOOP (loop, 0) |
3147 | if (flag_tree_loop_if_convert == 1 | |
65b0537f | 3148 | || ((flag_tree_loop_vectorize || loop->force_vectorize) |
3149 | && !loop->dont_vectorize)) | |
40d78a35 | 3150 | todo |= tree_if_conversion (loop, &preds); |
65b0537f | 3151 | |
e8e52514 | 3152 | if (todo) |
3153 | { | |
3154 | free_numbers_of_iterations_estimates (fun); | |
3155 | scev_reset (); | |
3156 | } | |
3157 | ||
382ecba7 | 3158 | if (flag_checking) |
3159 | { | |
3160 | basic_block bb; | |
3161 | FOR_EACH_BB_FN (bb, fun) | |
3162 | gcc_assert (!bb->aux); | |
3163 | } | |
65b0537f | 3164 | |
40d78a35 | 3165 | /* Perform IL update now, it might elide some loops. */ |
3166 | if (todo & TODO_cleanup_cfg) | |
3167 | { | |
3168 | cleanup_tree_cfg (); | |
3169 | if (need_ssa_update_p (fun)) | |
3170 | todo |= TODO_update_ssa; | |
3171 | } | |
3172 | if (todo & TODO_update_ssa_any) | |
3173 | update_ssa (todo & TODO_update_ssa_any); | |
3174 | ||
3175 | /* If if-conversion elided the loop fall back to the original one. */ | |
3176 | for (unsigned i = 0; i < preds.length (); ++i) | |
3177 | { | |
3178 | gimple *g = preds[i]; | |
c06d4366 | 3179 | if (!gimple_bb (g)) |
3180 | continue; | |
40d78a35 | 3181 | unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0)); |
3182 | if (!get_loop (fun, ifcvt_loop)) | |
3183 | { | |
3184 | if (dump_file) | |
3185 | fprintf (dump_file, "If-converted loop vanished\n"); | |
3186 | fold_loop_internal_call (g, boolean_false_node); | |
3187 | } | |
3188 | } | |
3189 | ||
3190 | return 0; | |
65b0537f | 3191 | } |
3192 | ||
7620bc82 | 3193 | } // anon namespace |
3194 | ||
cbe8bda8 | 3195 | gimple_opt_pass * |
3196 | make_pass_if_conversion (gcc::context *ctxt) | |
3197 | { | |
3198 | return new pass_if_conversion (ctxt); | |
3199 | } |