]>
Commit | Line | Data |
---|---|---|
06598532 | 1 | /* High-level loop manipulation functions. |
8e8f6434 | 2 | Copyright (C) 2004-2018 Free Software Foundation, Inc. |
48e1416a | 3 | |
06598532 | 4 | This file is part of GCC. |
48e1416a | 5 | |
06598532 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
06598532 | 9 | later version. |
48e1416a | 10 | |
06598532 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
06598532 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
06598532 | 19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
9ef16211 | 23 | #include "backend.h" |
06598532 | 24 | #include "tree.h" |
9ef16211 | 25 | #include "gimple.h" |
7c29e30e | 26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */ | |
9ef16211 | 28 | #include "ssa.h" |
7c29e30e | 29 | #include "gimple-pretty-print.h" |
b20a8bb4 | 30 | #include "fold-const.h" |
94ea8568 | 31 | #include "cfganal.h" |
a8783bee | 32 | #include "gimplify.h" |
dcf1a1ec | 33 | #include "gimple-iterator.h" |
e795d6e1 | 34 | #include "gimplify-me.h" |
073c1fd5 | 35 | #include "tree-cfg.h" |
05d9c18a | 36 | #include "tree-ssa-loop-ivopts.h" |
37 | #include "tree-ssa-loop-manip.h" | |
38 | #include "tree-ssa-loop-niter.h" | |
073c1fd5 | 39 | #include "tree-ssa-loop.h" |
40 | #include "tree-into-ssa.h" | |
69ee5dbb | 41 | #include "tree-ssa.h" |
06598532 | 42 | #include "cfgloop.h" |
06598532 | 43 | #include "tree-scalar-evolution.h" |
b30560de | 44 | #include "params.h" |
bc8bb825 | 45 | #include "tree-inline.h" |
06598532 | 46 | |
ed7e2206 | 47 | /* All bitmaps for rewriting into loop-closed SSA go on this obstack, |
48 | so that we can free them all at once. */ | |
49 | static bitmap_obstack loop_renamer_obstack; | |
50 | ||
bb445479 | 51 | /* Creates an induction variable with value BASE + STEP * iteration in LOOP. |
52 | It is expected that neither BASE nor STEP are shared with other expressions | |
53 | (unless the sharing rules allow this). Use VAR as a base var_decl for it | |
54 | (if NULL, a new temporary will be created). The increment will occur at | |
48e1416a | 55 | INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and |
f98505bb | 56 | AFTER can be computed using standard_iv_increment_position. The ssa versions |
bb445479 | 57 | of the variable before and after increment will be stored in VAR_BEFORE and |
58 | VAR_AFTER (unless they are NULL). */ | |
59 | ||
60 | void | |
61 | create_iv (tree base, tree step, tree var, struct loop *loop, | |
75a70cf9 | 62 | gimple_stmt_iterator *incr_pos, bool after, |
bb445479 | 63 | tree *var_before, tree *var_after) |
64 | { | |
1a91d914 | 65 | gassign *stmt; |
66 | gphi *phi; | |
75a70cf9 | 67 | tree initial, step1; |
68 | gimple_seq stmts; | |
bb445479 | 69 | tree vb, va; |
70 | enum tree_code incr_op = PLUS_EXPR; | |
651874e1 | 71 | edge pe = loop_preheader_edge (loop); |
bb445479 | 72 | |
03d37e4e | 73 | if (var != NULL_TREE) |
74 | { | |
f9e245b2 | 75 | vb = make_ssa_name (var); |
76 | va = make_ssa_name (var); | |
03d37e4e | 77 | } |
78 | else | |
79 | { | |
80 | vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp"); | |
81 | va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp"); | |
82 | } | |
bb445479 | 83 | if (var_before) |
84 | *var_before = vb; | |
bb445479 | 85 | if (var_after) |
86 | *var_after = va; | |
87 | ||
88 | /* For easier readability of the created code, produce MINUS_EXPRs | |
89 | when suitable. */ | |
90 | if (TREE_CODE (step) == INTEGER_CST) | |
91 | { | |
92 | if (TYPE_UNSIGNED (TREE_TYPE (step))) | |
93 | { | |
49d00087 | 94 | step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
bb445479 | 95 | if (tree_int_cst_lt (step1, step)) |
96 | { | |
97 | incr_op = MINUS_EXPR; | |
98 | step = step1; | |
99 | } | |
100 | } | |
101 | else | |
102 | { | |
add6ee5e | 103 | bool ovf; |
104 | ||
105 | if (!tree_expr_nonnegative_warnv_p (step, &ovf) | |
bb445479 | 106 | && may_negate_without_overflow_p (step)) |
107 | { | |
108 | incr_op = MINUS_EXPR; | |
49d00087 | 109 | step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
bb445479 | 110 | } |
111 | } | |
112 | } | |
0de36bdb | 113 | if (POINTER_TYPE_P (TREE_TYPE (base))) |
114 | { | |
86f2ad37 | 115 | if (TREE_CODE (base) == ADDR_EXPR) |
116 | mark_addressable (TREE_OPERAND (base, 0)); | |
a0553bff | 117 | step = convert_to_ptrofftype (step); |
0de36bdb | 118 | if (incr_op == MINUS_EXPR) |
a0553bff | 119 | step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
0de36bdb | 120 | incr_op = POINTER_PLUS_EXPR; |
121 | } | |
651874e1 | 122 | /* Gimplify the step if necessary. We put the computations in front of the |
123 | loop (i.e. the step should be loop invariant). */ | |
06240723 | 124 | step = force_gimple_operand (step, &stmts, true, NULL_TREE); |
651874e1 | 125 | if (stmts) |
75a70cf9 | 126 | gsi_insert_seq_on_edge_immediate (pe, stmts); |
651874e1 | 127 | |
e9cf809e | 128 | stmt = gimple_build_assign (va, incr_op, vb, step); |
bb445479 | 129 | if (after) |
75a70cf9 | 130 | gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT); |
bb445479 | 131 | else |
75a70cf9 | 132 | gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT); |
bb445479 | 133 | |
dec41e98 | 134 | initial = force_gimple_operand (base, &stmts, true, var); |
135 | if (stmts) | |
75a70cf9 | 136 | gsi_insert_seq_on_edge_immediate (pe, stmts); |
bb445479 | 137 | |
1a91d914 | 138 | phi = create_phi_node (vb, loop->header); |
139 | add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION); | |
140 | add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION); | |
bb445479 | 141 | } |
142 | ||
4fb07d00 | 143 | /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of |
d09bc815 | 144 | both DEF_LOOP and USE_LOOP. */ |
145 | ||
146 | static inline struct loop * | |
147 | find_sibling_superloop (struct loop *use_loop, struct loop *def_loop) | |
148 | { | |
149 | unsigned ud = loop_depth (use_loop); | |
150 | unsigned dd = loop_depth (def_loop); | |
151 | gcc_assert (ud > 0 && dd > 0); | |
152 | if (ud > dd) | |
153 | use_loop = superloop_at_depth (use_loop, dd); | |
154 | if (ud < dd) | |
155 | def_loop = superloop_at_depth (def_loop, ud); | |
156 | while (loop_outer (use_loop) != loop_outer (def_loop)) | |
157 | { | |
158 | use_loop = loop_outer (use_loop); | |
159 | def_loop = loop_outer (def_loop); | |
160 | gcc_assert (use_loop && def_loop); | |
161 | } | |
162 | return use_loop; | |
163 | } | |
164 | ||
165 | /* DEF_BB is a basic block containing a DEF that needs rewriting into | |
166 | loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing | |
167 | uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in | |
168 | USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_B). | |
169 | ALL_EXITS[I] is the set of all basic blocks that exit loop I. | |
170 | ||
171 | Compute the subset of LOOP_EXITS that exit the loop containing DEF_BB | |
172 | or one of its loop fathers, in which DEF is live. This set is returned | |
173 | in the bitmap LIVE_EXITS. | |
174 | ||
175 | Instead of computing the complete livein set of the def, we use the loop | |
176 | nesting tree as a form of poor man's structure analysis. This greatly | |
177 | speeds up the analysis, which is important because this function may be | |
178 | called on all SSA names that need rewriting, one at a time. */ | |
06598532 | 179 | |
180 | static void | |
d09bc815 | 181 | compute_live_loop_exits (bitmap live_exits, bitmap use_blocks, |
182 | bitmap *loop_exits, basic_block def_bb) | |
06598532 | 183 | { |
d09bc815 | 184 | unsigned i; |
185 | bitmap_iterator bi; | |
d09bc815 | 186 | struct loop *def_loop = def_bb->loop_father; |
187 | unsigned def_loop_depth = loop_depth (def_loop); | |
188 | bitmap def_loop_exits; | |
189 | ||
190 | /* Normally the work list size is bounded by the number of basic | |
191 | blocks in the largest loop. We don't know this number, but we | |
192 | can be fairly sure that it will be relatively small. */ | |
c2078b80 | 193 | auto_vec<basic_block> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128)); |
d09bc815 | 194 | |
195 | EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi) | |
196 | { | |
f5a6b05f | 197 | basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i); |
d09bc815 | 198 | struct loop *use_loop = use_bb->loop_father; |
199 | gcc_checking_assert (def_loop != use_loop | |
200 | && ! flow_loop_nested_p (def_loop, use_loop)); | |
201 | if (! flow_loop_nested_p (use_loop, def_loop)) | |
202 | use_bb = find_sibling_superloop (use_loop, def_loop)->header; | |
203 | if (bitmap_set_bit (live_exits, use_bb->index)) | |
f1f41a6c | 204 | worklist.safe_push (use_bb); |
d09bc815 | 205 | } |
206 | ||
207 | /* Iterate until the worklist is empty. */ | |
f1f41a6c | 208 | while (! worklist.is_empty ()) |
d09bc815 | 209 | { |
210 | edge e; | |
211 | edge_iterator ei; | |
212 | ||
213 | /* Pull a block off the worklist. */ | |
f1f41a6c | 214 | basic_block bb = worklist.pop (); |
d09bc815 | 215 | |
216 | /* Make sure we have at least enough room in the work list | |
217 | for all predecessors of this block. */ | |
f1f41a6c | 218 | worklist.reserve (EDGE_COUNT (bb->preds)); |
d09bc815 | 219 | |
220 | /* For each predecessor block. */ | |
221 | FOR_EACH_EDGE (e, ei, bb->preds) | |
222 | { | |
223 | basic_block pred = e->src; | |
224 | struct loop *pred_loop = pred->loop_father; | |
225 | unsigned pred_loop_depth = loop_depth (pred_loop); | |
226 | bool pred_visited; | |
227 | ||
228 | /* We should have met DEF_BB along the way. */ | |
34154e27 | 229 | gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
d09bc815 | 230 | |
231 | if (pred_loop_depth >= def_loop_depth) | |
232 | { | |
233 | if (pred_loop_depth > def_loop_depth) | |
234 | pred_loop = superloop_at_depth (pred_loop, def_loop_depth); | |
235 | /* If we've reached DEF_LOOP, our train ends here. */ | |
236 | if (pred_loop == def_loop) | |
237 | continue; | |
238 | } | |
239 | else if (! flow_loop_nested_p (pred_loop, def_loop)) | |
240 | pred = find_sibling_superloop (pred_loop, def_loop)->header; | |
241 | ||
242 | /* Add PRED to the LIVEIN set. PRED_VISITED is true if | |
243 | we had already added PRED to LIVEIN before. */ | |
244 | pred_visited = !bitmap_set_bit (live_exits, pred->index); | |
245 | ||
246 | /* If we have visited PRED before, don't add it to the worklist. | |
247 | If BB dominates PRED, then we're probably looking at a loop. | |
248 | We're only interested in looking up in the dominance tree | |
249 | because DEF_BB dominates all the uses. */ | |
250 | if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb)) | |
251 | continue; | |
252 | ||
f1f41a6c | 253 | worklist.quick_push (pred); |
d09bc815 | 254 | } |
255 | } | |
d09bc815 | 256 | |
257 | def_loop_exits = BITMAP_ALLOC (&loop_renamer_obstack); | |
258 | for (struct loop *loop = def_loop; | |
259 | loop != current_loops->tree_root; | |
260 | loop = loop_outer (loop)) | |
261 | bitmap_ior_into (def_loop_exits, loop_exits[loop->num]); | |
262 | bitmap_and_into (live_exits, def_loop_exits); | |
263 | BITMAP_FREE (def_loop_exits); | |
264 | } | |
265 | ||
266 | /* Add a loop-closing PHI for VAR in basic block EXIT. */ | |
267 | ||
268 | static void | |
269 | add_exit_phi (basic_block exit, tree var) | |
270 | { | |
1a91d914 | 271 | gphi *phi; |
06598532 | 272 | edge e; |
cd665a06 | 273 | edge_iterator ei; |
06598532 | 274 | |
d09bc815 | 275 | /* Check that at least one of the edges entering the EXIT block exits |
276 | the loop, or a superloop of that loop, that VAR is defined in. */ | |
382ecba7 | 277 | if (flag_checking) |
06598532 | 278 | { |
382ecba7 | 279 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); |
280 | basic_block def_bb = gimple_bb (def_stmt); | |
281 | FOR_EACH_EDGE (e, ei, exit->preds) | |
282 | { | |
283 | struct loop *aloop = find_common_loop (def_bb->loop_father, | |
284 | e->src->loop_father); | |
285 | if (!flow_bb_inside_loop_p (aloop, e->dest)) | |
286 | break; | |
287 | } | |
288 | gcc_assert (e); | |
06598532 | 289 | } |
290 | ||
9c06f260 | 291 | phi = create_phi_node (NULL_TREE, exit); |
d09bc815 | 292 | create_new_def_for (var, phi, gimple_phi_result_ptr (phi)); |
cd665a06 | 293 | FOR_EACH_EDGE (e, ei, exit->preds) |
d09bc815 | 294 | add_phi_arg (phi, var, e, UNKNOWN_LOCATION); |
295 | ||
296 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
297 | { | |
298 | fprintf (dump_file, ";; Created LCSSA PHI: "); | |
299 | print_gimple_stmt (dump_file, phi, 0, dump_flags); | |
300 | } | |
06598532 | 301 | } |
302 | ||
303 | /* Add exit phis for VAR that is used in LIVEIN. | |
d09bc815 | 304 | Exits of the loops are stored in LOOP_EXITS. */ |
06598532 | 305 | |
306 | static void | |
d09bc815 | 307 | add_exit_phis_var (tree var, bitmap use_blocks, bitmap *loop_exits) |
06598532 | 308 | { |
4f917ffe | 309 | unsigned index; |
0cc4271a | 310 | bitmap_iterator bi; |
d09bc815 | 311 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); |
312 | bitmap live_exits = BITMAP_ALLOC (&loop_renamer_obstack); | |
06598532 | 313 | |
cf230d5d | 314 | gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index)); |
06598532 | 315 | |
d09bc815 | 316 | compute_live_loop_exits (live_exits, use_blocks, loop_exits, def_bb); |
06598532 | 317 | |
d09bc815 | 318 | EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi) |
0cc4271a | 319 | { |
f5a6b05f | 320 | add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var); |
0cc4271a | 321 | } |
ed7e2206 | 322 | |
d09bc815 | 323 | BITMAP_FREE (live_exits); |
06598532 | 324 | } |
325 | ||
326 | /* Add exit phis for the names marked in NAMES_TO_RENAME. | |
327 | Exits of the loops are stored in EXITS. Sets of blocks where the ssa | |
328 | names are used are stored in USE_BLOCKS. */ | |
329 | ||
330 | static void | |
d09bc815 | 331 | add_exit_phis (bitmap names_to_rename, bitmap *use_blocks, bitmap *loop_exits) |
06598532 | 332 | { |
333 | unsigned i; | |
0cc4271a | 334 | bitmap_iterator bi; |
06598532 | 335 | |
0cc4271a | 336 | EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi) |
06598532 | 337 | { |
338 | add_exit_phis_var (ssa_name (i), use_blocks[i], loop_exits); | |
0cc4271a | 339 | } |
06598532 | 340 | } |
341 | ||
d09bc815 | 342 | /* Fill the array of bitmaps LOOP_EXITS with all loop exit edge targets. */ |
06598532 | 343 | |
d09bc815 | 344 | static void |
345 | get_loops_exits (bitmap *loop_exits) | |
06598532 | 346 | { |
d09bc815 | 347 | struct loop *loop; |
348 | unsigned j; | |
06598532 | 349 | edge e; |
350 | ||
f21d4d00 | 351 | FOR_EACH_LOOP (loop, 0) |
06598532 | 352 | { |
f1f41a6c | 353 | vec<edge> exit_edges = get_loop_exit_edges (loop); |
d09bc815 | 354 | loop_exits[loop->num] = BITMAP_ALLOC (&loop_renamer_obstack); |
f1f41a6c | 355 | FOR_EACH_VEC_ELT (exit_edges, j, e) |
d09bc815 | 356 | bitmap_set_bit (loop_exits[loop->num], e->dest->index); |
f1f41a6c | 357 | exit_edges.release (); |
06598532 | 358 | } |
06598532 | 359 | } |
360 | ||
361 | /* For USE in BB, if it is used outside of the loop it is defined in, | |
362 | mark it for rewrite. Record basic block BB where it is used | |
bfd098e9 | 363 | to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap. |
364 | Note that for USEs in phis, BB should be the src of the edge corresponding to | |
365 | the use, rather than the bb containing the phi. */ | |
06598532 | 366 | |
367 | static void | |
095dcfa3 | 368 | find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks, |
369 | bitmap need_phis) | |
06598532 | 370 | { |
371 | unsigned ver; | |
372 | basic_block def_bb; | |
373 | struct loop *def_loop; | |
374 | ||
375 | if (TREE_CODE (use) != SSA_NAME) | |
376 | return; | |
377 | ||
378 | ver = SSA_NAME_VERSION (use); | |
75a70cf9 | 379 | def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); |
06598532 | 380 | if (!def_bb) |
381 | return; | |
382 | def_loop = def_bb->loop_father; | |
383 | ||
d88fd237 | 384 | /* If the definition is not inside a loop, it is not interesting. */ |
9e3536f4 | 385 | if (!loop_outer (def_loop)) |
06598532 | 386 | return; |
387 | ||
d88fd237 | 388 | /* If the use is not outside of the loop it is defined in, it is not |
389 | interesting. */ | |
390 | if (flow_bb_inside_loop_p (def_loop, bb)) | |
391 | return; | |
392 | ||
ed7e2206 | 393 | /* If we're seeing VER for the first time, we still have to allocate |
394 | a bitmap for its uses. */ | |
395 | if (bitmap_set_bit (need_phis, ver)) | |
396 | use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack); | |
06598532 | 397 | bitmap_set_bit (use_blocks[ver], bb->index); |
06598532 | 398 | } |
399 | ||
303352c3 | 400 | /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the |
401 | loop they are defined to rewrite. Record the set of blocks in which the ssa | |
402 | names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS. */ | |
06598532 | 403 | |
404 | static void | |
42acab1c | 405 | find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis, |
303352c3 | 406 | int use_flags) |
06598532 | 407 | { |
43daa21e | 408 | ssa_op_iter iter; |
409 | tree var; | |
75a70cf9 | 410 | basic_block bb = gimple_bb (stmt); |
06598532 | 411 | |
9845d120 | 412 | if (is_gimple_debug (stmt)) |
413 | return; | |
414 | ||
303352c3 | 415 | /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES |
416 | only. */ | |
417 | if (use_flags == SSA_OP_VIRTUAL_USES) | |
418 | { | |
419 | tree vuse = gimple_vuse (stmt); | |
420 | if (vuse != NULL_TREE) | |
421 | find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis); | |
422 | } | |
423 | else | |
424 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags) | |
425 | find_uses_to_rename_use (bb, var, use_blocks, need_phis); | |
06598532 | 426 | } |
427 | ||
303352c3 | 428 | /* Marks names matching USE_FLAGS that are used in BB and outside of the loop |
429 | they are defined in for rewrite. Records the set of blocks in which the ssa | |
430 | names are used to USE_BLOCKS. Record the SSA names that will | |
431 | need exit PHIs in NEED_PHIS. */ | |
06598532 | 432 | |
433 | static void | |
303352c3 | 434 | find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis, |
435 | int use_flags) | |
06598532 | 436 | { |
053fdd99 | 437 | edge e; |
438 | edge_iterator ei; | |
303352c3 | 439 | bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0; |
440 | bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0; | |
06598532 | 441 | |
053fdd99 | 442 | FOR_EACH_EDGE (e, ei, bb->succs) |
1a91d914 | 443 | for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi); |
444 | gsi_next (&bsi)) | |
d09bc815 | 445 | { |
1a91d914 | 446 | gphi *phi = bsi.phi (); |
303352c3 | 447 | bool virtual_p = virtual_operand_p (gimple_phi_result (phi)); |
448 | if ((virtual_p && do_virtuals) | |
449 | || (!virtual_p && do_nonvirtuals)) | |
5789f1f8 | 450 | find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e), |
451 | use_blocks, need_phis); | |
d09bc815 | 452 | } |
48e1416a | 453 | |
1a91d914 | 454 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); |
455 | gsi_next (&bsi)) | |
303352c3 | 456 | find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis, |
457 | use_flags); | |
053fdd99 | 458 | } |
48e1416a | 459 | |
303352c3 | 460 | /* Marks names matching USE_FLAGS that are used outside of the loop they are |
461 | defined in for rewrite. Records the set of blocks in which the ssa names are | |
462 | used to USE_BLOCKS. Record the SSA names that will need exit PHIs in | |
463 | NEED_PHIS. If CHANGED_BBS is not NULL, scan only blocks in this set. */ | |
053fdd99 | 464 | |
465 | static void | |
303352c3 | 466 | find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis, |
467 | int use_flags) | |
053fdd99 | 468 | { |
469 | basic_block bb; | |
470 | unsigned index; | |
471 | bitmap_iterator bi; | |
06598532 | 472 | |
f55f91f5 | 473 | if (changed_bbs) |
474 | EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi) | |
59ae3d1b | 475 | { |
476 | bb = BASIC_BLOCK_FOR_FN (cfun, index); | |
477 | if (bb) | |
478 | find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags); | |
479 | } | |
053fdd99 | 480 | else |
fc00614f | 481 | FOR_EACH_BB_FN (bb, cfun) |
303352c3 | 482 | find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags); |
483 | } | |
484 | ||
485 | /* Mark uses of DEF that are used outside of the loop they are defined in for | |
486 | rewrite. Record the set of blocks in which the ssa names are used to | |
487 | USE_BLOCKS. Record the SSA names that will need exit PHIs in NEED_PHIS. */ | |
488 | ||
489 | static void | |
490 | find_uses_to_rename_def (tree def, bitmap *use_blocks, bitmap need_phis) | |
491 | { | |
42acab1c | 492 | gimple *use_stmt; |
303352c3 | 493 | imm_use_iterator imm_iter; |
494 | ||
495 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) | |
496 | { | |
8ab5d9f0 | 497 | if (is_gimple_debug (use_stmt)) |
498 | continue; | |
499 | ||
303352c3 | 500 | basic_block use_bb = gimple_bb (use_stmt); |
501 | ||
502 | use_operand_p use_p; | |
503 | FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) | |
504 | { | |
505 | if (gimple_code (use_stmt) == GIMPLE_PHI) | |
506 | { | |
507 | edge e = gimple_phi_arg_edge (as_a <gphi *> (use_stmt), | |
508 | PHI_ARG_INDEX_FROM_USE (use_p)); | |
509 | use_bb = e->src; | |
510 | } | |
511 | find_uses_to_rename_use (use_bb, USE_FROM_PTR (use_p), use_blocks, | |
512 | need_phis); | |
513 | } | |
514 | } | |
515 | } | |
516 | ||
517 | /* Marks names matching USE_FLAGS that are defined in LOOP and used outside of | |
518 | it for rewrite. Records the set of blocks in which the ssa names are used to | |
519 | USE_BLOCKS. Record the SSA names that will need exit PHIs in NEED_PHIS. */ | |
520 | ||
521 | static void | |
522 | find_uses_to_rename_in_loop (struct loop *loop, bitmap *use_blocks, | |
523 | bitmap need_phis, int use_flags) | |
524 | { | |
525 | bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0; | |
526 | bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0; | |
527 | int def_flags = ((do_virtuals ? SSA_OP_VIRTUAL_DEFS : 0) | |
528 | | (do_nonvirtuals ? SSA_OP_DEF : 0)); | |
529 | ||
530 | ||
531 | basic_block *bbs = get_loop_body (loop); | |
532 | ||
533 | for (unsigned int i = 0; i < loop->num_nodes; i++) | |
534 | { | |
535 | basic_block bb = bbs[i]; | |
536 | ||
537 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); | |
538 | gsi_next (&bsi)) | |
539 | { | |
540 | gphi *phi = bsi.phi (); | |
541 | tree res = gimple_phi_result (phi); | |
542 | bool virtual_p = virtual_operand_p (res); | |
543 | if ((virtual_p && do_virtuals) | |
544 | || (!virtual_p && do_nonvirtuals)) | |
545 | find_uses_to_rename_def (res, use_blocks, need_phis); | |
546 | } | |
547 | ||
548 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); | |
549 | gsi_next (&bsi)) | |
550 | { | |
42acab1c | 551 | gimple *stmt = gsi_stmt (bsi); |
303352c3 | 552 | /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows |
553 | SSA_OP_VIRTUAL_DEFS only. */ | |
554 | if (def_flags == SSA_OP_VIRTUAL_DEFS) | |
555 | { | |
556 | tree vdef = gimple_vdef (stmt); | |
557 | if (vdef != NULL) | |
558 | find_uses_to_rename_def (vdef, use_blocks, need_phis); | |
559 | } | |
560 | else | |
561 | { | |
562 | tree var; | |
563 | ssa_op_iter iter; | |
564 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, def_flags) | |
565 | find_uses_to_rename_def (var, use_blocks, need_phis); | |
566 | } | |
567 | } | |
568 | } | |
569 | ||
570 | XDELETEVEC (bbs); | |
06598532 | 571 | } |
572 | ||
573 | /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra | |
574 | phi nodes to ensure that no variable is used outside the loop it is | |
575 | defined in. | |
576 | ||
577 | This strengthening of the basic ssa form has several advantages: | |
578 | ||
579 | 1) Updating it during unrolling/peeling/versioning is trivial, since | |
580 | we do not need to care about the uses outside of the loop. | |
cf230d5d | 581 | The same applies to virtual operands which are also rewritten into |
582 | loop closed SSA form. Note that virtual operands are always live | |
583 | until function exit. | |
06598532 | 584 | 2) The behavior of all uses of an induction variable is the same. |
585 | Without this, you need to distinguish the case when the variable | |
586 | is used outside of the loop it is defined in, for example | |
587 | ||
588 | for (i = 0; i < 100; i++) | |
589 | { | |
590 | for (j = 0; j < 100; j++) | |
591 | { | |
592 | k = i + j; | |
593 | use1 (k); | |
594 | } | |
595 | use2 (k); | |
596 | } | |
597 | ||
598 | Looking from the outer loop with the normal SSA form, the first use of k | |
599 | is not well-behaved, while the second one is an induction variable with | |
053fdd99 | 600 | base 99 and step 1. |
48e1416a | 601 | |
303352c3 | 602 | If LOOP is non-null, only rewrite uses that have defs in LOOP. Otherwise, |
603 | if CHANGED_BBS is not NULL, we look for uses outside loops only in the | |
604 | basic blocks in this set. | |
605 | ||
606 | USE_FLAGS allows us to specify whether we want virtual, non-virtual or | |
607 | both variables rewritten. | |
095dcfa3 | 608 | |
609 | UPDATE_FLAG is used in the call to update_ssa. See | |
610 | TODO_update_ssa* for documentation. */ | |
06598532 | 611 | |
612 | void | |
303352c3 | 613 | rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag, |
614 | int use_flags, struct loop *loop) | |
06598532 | 615 | { |
06598532 | 616 | bitmap *use_blocks; |
d8a0d6b8 | 617 | bitmap names_to_rename; |
618 | ||
f24ec26f | 619 | loops_state_set (LOOP_CLOSED_SSA); |
41f75a99 | 620 | if (number_of_loops (cfun) <= 1) |
d8a0d6b8 | 621 | return; |
622 | ||
d09bc815 | 623 | /* If the pass has caused the SSA form to be out-of-date, update it |
624 | now. */ | |
29134d13 | 625 | if (update_flag != 0) |
303352c3 | 626 | update_ssa (update_flag); |
29134d13 | 627 | else if (flag_checking) |
628 | verify_ssa (true, true); | |
d09bc815 | 629 | |
ed7e2206 | 630 | bitmap_obstack_initialize (&loop_renamer_obstack); |
631 | ||
ed7e2206 | 632 | names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack); |
06598532 | 633 | |
ed7e2206 | 634 | /* Uses of names to rename. We don't have to initialize this array, |
635 | because we know that we will only have entries for the SSA names | |
636 | in NAMES_TO_RENAME. */ | |
91442a6f | 637 | use_blocks = XNEWVEC (bitmap, num_ssa_names); |
06598532 | 638 | |
303352c3 | 639 | if (loop != NULL) |
640 | { | |
641 | gcc_assert (changed_bbs == NULL); | |
642 | find_uses_to_rename_in_loop (loop, use_blocks, names_to_rename, | |
643 | use_flags); | |
644 | } | |
645 | else | |
646 | { | |
647 | gcc_assert (loop == NULL); | |
648 | find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags); | |
649 | } | |
053fdd99 | 650 | |
c8498e04 | 651 | if (!bitmap_empty_p (names_to_rename)) |
652 | { | |
653 | /* An array of bitmaps where LOOP_EXITS[I] is the set of basic blocks | |
654 | that are the destination of an edge exiting loop number I. */ | |
41f75a99 | 655 | bitmap *loop_exits = XNEWVEC (bitmap, number_of_loops (cfun)); |
c8498e04 | 656 | get_loops_exits (loop_exits); |
657 | ||
658 | /* Add the PHI nodes on exits of the loops for the names we need to | |
659 | rewrite. */ | |
660 | add_exit_phis (names_to_rename, use_blocks, loop_exits); | |
661 | ||
662 | free (loop_exits); | |
663 | ||
664 | /* Fix up all the names found to be used outside their original | |
665 | loops. */ | |
666 | update_ssa (TODO_update_ssa); | |
667 | } | |
06598532 | 668 | |
ed7e2206 | 669 | bitmap_obstack_release (&loop_renamer_obstack); |
06598532 | 670 | free (use_blocks); |
06598532 | 671 | } |
672 | ||
303352c3 | 673 | /* Rewrites the non-virtual defs and uses into a loop closed ssa form. If |
674 | CHANGED_BBS is not NULL, we look for uses outside loops only in the basic | |
675 | blocks in this set. UPDATE_FLAG is used in the call to update_ssa. See | |
676 | TODO_update_ssa* for documentation. */ | |
4d7c6f77 | 677 | |
303352c3 | 678 | void |
679 | rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag) | |
4d7c6f77 | 680 | { |
303352c3 | 681 | rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_USE, NULL); |
4d7c6f77 | 682 | } |
683 | ||
303352c3 | 684 | /* Rewrites virtual defs and uses with def in LOOP into loop closed ssa |
685 | form. */ | |
4d7c6f77 | 686 | |
687 | void | |
688 | rewrite_virtuals_into_loop_closed_ssa (struct loop *loop) | |
689 | { | |
303352c3 | 690 | rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_VIRTUAL_USES, loop); |
4d7c6f77 | 691 | } |
692 | ||
232624e0 | 693 | /* Check invariants of the loop closed ssa form for the def in DEF_BB. */ |
06598532 | 694 | |
695 | static void | |
232624e0 | 696 | check_loop_closed_ssa_def (basic_block def_bb, tree def) |
06598532 | 697 | { |
232624e0 | 698 | use_operand_p use_p; |
699 | imm_use_iterator iterator; | |
700 | FOR_EACH_IMM_USE_FAST (use_p, iterator, def) | |
701 | { | |
702 | if (is_gimple_debug (USE_STMT (use_p))) | |
703 | continue; | |
48e1416a | 704 | |
232624e0 | 705 | basic_block use_bb = gimple_bb (USE_STMT (use_p)); |
706 | if (is_a <gphi *> (USE_STMT (use_p))) | |
707 | use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; | |
06598532 | 708 | |
232624e0 | 709 | gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb)); |
710 | } | |
06598532 | 711 | } |
712 | ||
232624e0 | 713 | /* Checks invariants of loop closed ssa form in BB. */ |
06598532 | 714 | |
715 | static void | |
232624e0 | 716 | check_loop_closed_ssa_bb (basic_block bb) |
06598532 | 717 | { |
232624e0 | 718 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
719 | gsi_next (&bsi)) | |
720 | { | |
721 | gphi *phi = bsi.phi (); | |
06598532 | 722 | |
232624e0 | 723 | if (!virtual_operand_p (PHI_RESULT (phi))) |
724 | check_loop_closed_ssa_def (bb, PHI_RESULT (phi)); | |
725 | } | |
9845d120 | 726 | |
232624e0 | 727 | for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi); |
728 | gsi_next_nondebug (&bsi)) | |
729 | { | |
730 | ssa_op_iter iter; | |
731 | tree var; | |
732 | gimple *stmt = gsi_stmt (bsi); | |
733 | ||
734 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF) | |
735 | check_loop_closed_ssa_def (bb, var); | |
736 | } | |
06598532 | 737 | } |
738 | ||
ca77c6ec | 739 | /* Checks that invariants of the loop closed ssa form are preserved. |
232624e0 | 740 | Call verify_ssa when VERIFY_SSA_P is true. Note all loops are checked |
741 | if LOOP is NULL, otherwise, only LOOP is checked. */ | |
06598532 | 742 | |
4b987fac | 743 | DEBUG_FUNCTION void |
232624e0 | 744 | verify_loop_closed_ssa (bool verify_ssa_p, struct loop *loop) |
06598532 | 745 | { |
41f75a99 | 746 | if (number_of_loops (cfun) <= 1) |
095dcfa3 | 747 | return; |
748 | ||
ca77c6ec | 749 | if (verify_ssa_p) |
71b65939 | 750 | verify_ssa (false, true); |
06598532 | 751 | |
4b366dd3 | 752 | timevar_push (TV_VERIFY_LOOP_CLOSED); |
753 | ||
232624e0 | 754 | if (loop == NULL) |
06598532 | 755 | { |
232624e0 | 756 | basic_block bb; |
06598532 | 757 | |
232624e0 | 758 | FOR_EACH_BB_FN (bb, cfun) |
759 | if (bb->loop_father && bb->loop_father->num > 0) | |
760 | check_loop_closed_ssa_bb (bb); | |
761 | } | |
762 | else | |
763 | { | |
764 | basic_block *bbs = get_loop_body (loop); | |
765 | ||
766 | for (unsigned i = 0; i < loop->num_nodes; ++i) | |
767 | check_loop_closed_ssa_bb (bbs[i]); | |
768 | ||
769 | free (bbs); | |
06598532 | 770 | } |
4b366dd3 | 771 | |
772 | timevar_pop (TV_VERIFY_LOOP_CLOSED); | |
06598532 | 773 | } |
dec41e98 | 774 | |
775 | /* Split loop exit edge EXIT. The things are a bit complicated by a need to | |
6bae816f | 776 | preserve the loop closed ssa form. If COPY_CONSTANTS_P is true then |
777 | forwarder PHIs are also created for constant arguments. | |
778 | The newly created block is returned. */ | |
dec41e98 | 779 | |
28c92cbb | 780 | basic_block |
6bae816f | 781 | split_loop_exit_edge (edge exit, bool copy_constants_p) |
dec41e98 | 782 | { |
783 | basic_block dest = exit->dest; | |
88e6f696 | 784 | basic_block bb = split_edge (exit); |
1a91d914 | 785 | gphi *phi, *new_phi; |
75a70cf9 | 786 | tree new_name, name; |
dec41e98 | 787 | use_operand_p op_p; |
1a91d914 | 788 | gphi_iterator psi; |
be1e7283 | 789 | location_t locus; |
dec41e98 | 790 | |
75a70cf9 | 791 | for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi)) |
dec41e98 | 792 | { |
1a91d914 | 793 | phi = psi.phi (); |
ea091dfd | 794 | op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb)); |
efbcb6de | 795 | locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb)); |
dec41e98 | 796 | |
b2ca91ff | 797 | name = USE_FROM_PTR (op_p); |
798 | ||
4fb5e5ca | 799 | /* If the argument of the PHI node is a constant, we do not need |
b2ca91ff | 800 | to keep it inside loop. */ |
6bae816f | 801 | if (TREE_CODE (name) != SSA_NAME |
802 | && !copy_constants_p) | |
b2ca91ff | 803 | continue; |
804 | ||
805 | /* Otherwise create an auxiliary phi node that will copy the value | |
4fb5e5ca | 806 | of the SSA name out of the loop. */ |
6bae816f | 807 | new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL); |
dec41e98 | 808 | new_phi = create_phi_node (new_name, bb); |
60d535d2 | 809 | add_phi_arg (new_phi, name, exit, locus); |
dec41e98 | 810 | SET_USE (op_p, new_name); |
811 | } | |
28c92cbb | 812 | |
813 | return bb; | |
dec41e98 | 814 | } |
815 | ||
dec41e98 | 816 | /* Returns the basic block in that statements should be emitted for induction |
817 | variables incremented at the end of the LOOP. */ | |
818 | ||
819 | basic_block | |
820 | ip_end_pos (struct loop *loop) | |
821 | { | |
822 | return loop->latch; | |
823 | } | |
824 | ||
825 | /* Returns the basic block in that statements should be emitted for induction | |
826 | variables incremented just before exit condition of a LOOP. */ | |
827 | ||
828 | basic_block | |
829 | ip_normal_pos (struct loop *loop) | |
830 | { | |
42acab1c | 831 | gimple *last; |
dec41e98 | 832 | basic_block bb; |
833 | edge exit; | |
834 | ||
ea091dfd | 835 | if (!single_pred_p (loop->latch)) |
dec41e98 | 836 | return NULL; |
837 | ||
ea091dfd | 838 | bb = single_pred (loop->latch); |
dec41e98 | 839 | last = last_stmt (bb); |
d92f9312 | 840 | if (!last |
75a70cf9 | 841 | || gimple_code (last) != GIMPLE_COND) |
dec41e98 | 842 | return NULL; |
843 | ||
cd665a06 | 844 | exit = EDGE_SUCC (bb, 0); |
dec41e98 | 845 | if (exit->dest == loop->latch) |
cd665a06 | 846 | exit = EDGE_SUCC (bb, 1); |
dec41e98 | 847 | |
848 | if (flow_bb_inside_loop_p (loop, exit->dest)) | |
849 | return NULL; | |
850 | ||
851 | return bb; | |
852 | } | |
853 | ||
854 | /* Stores the standard position for induction variable increment in LOOP | |
855 | (just before the exit condition if it is available and latch block is empty, | |
856 | end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if | |
857 | the increment should be inserted after *BSI. */ | |
858 | ||
859 | void | |
75a70cf9 | 860 | standard_iv_increment_position (struct loop *loop, gimple_stmt_iterator *bsi, |
dec41e98 | 861 | bool *insert_after) |
862 | { | |
863 | basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop); | |
42acab1c | 864 | gimple *last = last_stmt (latch); |
dec41e98 | 865 | |
866 | if (!bb | |
75a70cf9 | 867 | || (last && gimple_code (last) != GIMPLE_LABEL)) |
dec41e98 | 868 | { |
75a70cf9 | 869 | *bsi = gsi_last_bb (latch); |
dec41e98 | 870 | *insert_after = true; |
871 | } | |
872 | else | |
873 | { | |
75a70cf9 | 874 | *bsi = gsi_last_bb (bb); |
dec41e98 | 875 | *insert_after = false; |
876 | } | |
877 | } | |
e12d0591 | 878 | |
879 | /* Copies phi node arguments for duplicated blocks. The index of the first | |
880 | duplicated block is FIRST_NEW_BLOCK. */ | |
881 | ||
882 | static void | |
883 | copy_phi_node_args (unsigned first_new_block) | |
884 | { | |
885 | unsigned i; | |
886 | ||
fe672ac0 | 887 | for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
f5a6b05f | 888 | BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED; |
e12d0591 | 889 | |
fe672ac0 | 890 | for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
f5a6b05f | 891 | add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i)); |
e12d0591 | 892 | |
fe672ac0 | 893 | for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
f5a6b05f | 894 | BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED; |
e12d0591 | 895 | } |
896 | ||
e12d0591 | 897 | |
095dcfa3 | 898 | /* The same as cfgloopmanip.c:duplicate_loop_to_header_edge, but also |
899 | updates the PHI nodes at start of the copied region. In order to | |
900 | achieve this, only loops whose exits all lead to the same location | |
901 | are handled. | |
e12d0591 | 902 | |
095dcfa3 | 903 | Notice that we do not completely update the SSA web after |
904 | duplication. The caller is responsible for calling update_ssa | |
905 | after the loop has been duplicated. */ | |
e12d0591 | 906 | |
907 | bool | |
75a70cf9 | 908 | gimple_duplicate_loop_to_header_edge (struct loop *loop, edge e, |
e12d0591 | 909 | unsigned int ndupl, sbitmap wont_exit, |
f1f41a6c | 910 | edge orig, vec<edge> *to_remove, |
f3c40e6d | 911 | int flags) |
e12d0591 | 912 | { |
913 | unsigned first_new_block; | |
e12d0591 | 914 | |
f24ec26f | 915 | if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES)) |
e12d0591 | 916 | return false; |
f24ec26f | 917 | if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS)) |
e12d0591 | 918 | return false; |
919 | ||
fe672ac0 | 920 | first_new_block = last_basic_block_for_fn (cfun); |
7194de72 | 921 | if (!duplicate_loop_to_header_edge (loop, e, ndupl, wont_exit, |
f3c40e6d | 922 | orig, to_remove, flags)) |
e12d0591 | 923 | return false; |
924 | ||
925 | /* Readd the removed phi args for e. */ | |
44a46103 | 926 | flush_pending_stmts (e); |
e12d0591 | 927 | |
928 | /* Copy the phi node arguments. */ | |
929 | copy_phi_node_args (first_new_block); | |
930 | ||
e12d0591 | 931 | scev_reset (); |
e12d0591 | 932 | |
933 | return true; | |
934 | } | |
b30560de | 935 | |
b30560de | 936 | /* Returns true if we can unroll LOOP FACTOR times. Number |
937 | of iterations of the loop is returned in NITER. */ | |
938 | ||
939 | bool | |
940 | can_unroll_loop_p (struct loop *loop, unsigned factor, | |
941 | struct tree_niter_desc *niter) | |
942 | { | |
943 | edge exit; | |
944 | ||
945 | /* Check whether unrolling is possible. We only want to unroll loops | |
946 | for that we are able to determine number of iterations. We also | |
947 | want to split the extra iterations of the loop from its end, | |
948 | therefore we require that the loop has precisely one | |
949 | exit. */ | |
950 | ||
951 | exit = single_dom_exit (loop); | |
952 | if (!exit) | |
953 | return false; | |
954 | ||
955 | if (!number_of_iterations_exit (loop, exit, niter, false) | |
07392428 | 956 | || niter->cmp == ERROR_MARK |
957 | /* Scalar evolutions analysis might have copy propagated | |
958 | the abnormal ssa names into these expressions, hence | |
fa7637bd | 959 | emitting the computations based on them during loop |
07392428 | 960 | unrolling might create overlapping life ranges for |
961 | them, and failures in out-of-ssa. */ | |
962 | || contains_abnormal_ssa_name_p (niter->may_be_zero) | |
963 | || contains_abnormal_ssa_name_p (niter->control.base) | |
964 | || contains_abnormal_ssa_name_p (niter->control.step) | |
965 | || contains_abnormal_ssa_name_p (niter->bound)) | |
b30560de | 966 | return false; |
967 | ||
968 | /* And of course, we must be able to duplicate the loop. */ | |
969 | if (!can_duplicate_loop_p (loop)) | |
970 | return false; | |
971 | ||
972 | /* The final loop should be small enough. */ | |
bc8bb825 | 973 | if (tree_num_loop_insns (loop, &eni_size_weights) * factor |
b30560de | 974 | > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)) |
975 | return false; | |
976 | ||
977 | return true; | |
978 | } | |
979 | ||
980 | /* Determines the conditions that control execution of LOOP unrolled FACTOR | |
981 | times. DESC is number of iterations of LOOP. ENTER_COND is set to | |
982 | condition that must be true if the main loop can be entered. | |
983 | EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing | |
984 | how the exit from the unrolled loop should be controlled. */ | |
985 | ||
986 | static void | |
987 | determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc, | |
988 | unsigned factor, tree *enter_cond, | |
989 | tree *exit_base, tree *exit_step, | |
990 | enum tree_code *exit_cmp, tree *exit_bound) | |
991 | { | |
75a70cf9 | 992 | gimple_seq stmts; |
b30560de | 993 | tree base = desc->control.base; |
994 | tree step = desc->control.step; | |
995 | tree bound = desc->bound; | |
e88bb328 | 996 | tree type = TREE_TYPE (step); |
b30560de | 997 | tree bigstep, delta; |
998 | tree min = lower_bound_in_type (type, type); | |
999 | tree max = upper_bound_in_type (type, type); | |
1000 | enum tree_code cmp = desc->cmp; | |
1001 | tree cond = boolean_true_node, assum; | |
1002 | ||
a0553bff | 1003 | /* For pointers, do the arithmetics in the type of step. */ |
e88bb328 | 1004 | base = fold_convert (type, base); |
1005 | bound = fold_convert (type, bound); | |
1006 | ||
b30560de | 1007 | *enter_cond = boolean_false_node; |
1008 | *exit_base = NULL_TREE; | |
1009 | *exit_step = NULL_TREE; | |
1010 | *exit_cmp = ERROR_MARK; | |
1011 | *exit_bound = NULL_TREE; | |
1012 | gcc_assert (cmp != ERROR_MARK); | |
1013 | ||
1014 | /* We only need to be correct when we answer question | |
1015 | "Do at least FACTOR more iterations remain?" in the unrolled loop. | |
1016 | Thus, transforming BASE + STEP * i <> BOUND to | |
1017 | BASE + STEP * i < BOUND is ok. */ | |
1018 | if (cmp == NE_EXPR) | |
1019 | { | |
1020 | if (tree_int_cst_sign_bit (step)) | |
1021 | cmp = GT_EXPR; | |
1022 | else | |
1023 | cmp = LT_EXPR; | |
1024 | } | |
1025 | else if (cmp == LT_EXPR) | |
1026 | { | |
1027 | gcc_assert (!tree_int_cst_sign_bit (step)); | |
1028 | } | |
1029 | else if (cmp == GT_EXPR) | |
1030 | { | |
1031 | gcc_assert (tree_int_cst_sign_bit (step)); | |
1032 | } | |
1033 | else | |
1034 | gcc_unreachable (); | |
1035 | ||
1036 | /* The main body of the loop may be entered iff: | |
1037 | ||
1038 | 1) desc->may_be_zero is false. | |
1039 | 2) it is possible to check that there are at least FACTOR iterations | |
1040 | of the loop, i.e., BOUND - step * FACTOR does not overflow. | |
1041 | 3) # of iterations is at least FACTOR */ | |
1042 | ||
cd743a11 | 1043 | if (!integer_zerop (desc->may_be_zero)) |
b30560de | 1044 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1045 | invert_truthvalue (desc->may_be_zero), | |
1046 | cond); | |
1047 | ||
1048 | bigstep = fold_build2 (MULT_EXPR, type, step, | |
1049 | build_int_cst_type (type, factor)); | |
1050 | delta = fold_build2 (MINUS_EXPR, type, bigstep, step); | |
1051 | if (cmp == LT_EXPR) | |
1052 | assum = fold_build2 (GE_EXPR, boolean_type_node, | |
1053 | bound, | |
1054 | fold_build2 (PLUS_EXPR, type, min, delta)); | |
1055 | else | |
1056 | assum = fold_build2 (LE_EXPR, boolean_type_node, | |
1057 | bound, | |
1058 | fold_build2 (PLUS_EXPR, type, max, delta)); | |
1059 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); | |
1060 | ||
1061 | bound = fold_build2 (MINUS_EXPR, type, bound, delta); | |
1062 | assum = fold_build2 (cmp, boolean_type_node, base, bound); | |
1063 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); | |
1064 | ||
1065 | cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE); | |
1066 | if (stmts) | |
75a70cf9 | 1067 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
b30560de | 1068 | /* cond now may be a gimple comparison, which would be OK, but also any |
1069 | other gimple rhs (say a && b). In this case we need to force it to | |
1070 | operand. */ | |
1071 | if (!is_gimple_condexpr (cond)) | |
1072 | { | |
1073 | cond = force_gimple_operand (cond, &stmts, true, NULL_TREE); | |
1074 | if (stmts) | |
75a70cf9 | 1075 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
b30560de | 1076 | } |
1077 | *enter_cond = cond; | |
1078 | ||
1079 | base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE); | |
1080 | if (stmts) | |
75a70cf9 | 1081 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
b30560de | 1082 | bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE); |
1083 | if (stmts) | |
75a70cf9 | 1084 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
b30560de | 1085 | |
1086 | *exit_base = base; | |
1087 | *exit_step = bigstep; | |
1088 | *exit_cmp = cmp; | |
1089 | *exit_bound = bound; | |
1090 | } | |
1091 | ||
611d2ac1 | 1092 | /* Scales the frequencies of all basic blocks in LOOP that are strictly |
1093 | dominated by BB by NUM/DEN. */ | |
1094 | ||
1095 | static void | |
1096 | scale_dominated_blocks_in_loop (struct loop *loop, basic_block bb, | |
0f5aacd1 | 1097 | profile_count num, profile_count den) |
611d2ac1 | 1098 | { |
1099 | basic_block son; | |
1100 | ||
0f5aacd1 | 1101 | if (!den.nonzero_p () && !(num == profile_count::zero ())) |
611d2ac1 | 1102 | return; |
1103 | ||
1104 | for (son = first_dom_son (CDI_DOMINATORS, bb); | |
1105 | son; | |
1106 | son = next_dom_son (CDI_DOMINATORS, son)) | |
1107 | { | |
1108 | if (!flow_bb_inside_loop_p (loop, son)) | |
1109 | continue; | |
0f5aacd1 | 1110 | scale_bbs_frequencies_profile_count (&son, 1, num, den); |
611d2ac1 | 1111 | scale_dominated_blocks_in_loop (loop, son, num, den); |
1112 | } | |
1113 | } | |
1114 | ||
12420a15 | 1115 | /* Return estimated niter for LOOP after unrolling by FACTOR times. */ |
1116 | ||
1117 | gcov_type | |
1118 | niter_for_unrolled_loop (struct loop *loop, unsigned factor) | |
1119 | { | |
1120 | gcc_assert (factor != 0); | |
1121 | bool profile_p = false; | |
1122 | gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p); | |
10de1374 | 1123 | /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the |
1124 | "+ 1" converts latch iterations to loop iterations and the "- 1" | |
1125 | converts back. */ | |
12420a15 | 1126 | gcov_type new_est_niter = est_niter / factor; |
1127 | ||
205ce1aa | 1128 | if (est_niter == -1) |
1129 | return -1; | |
1130 | ||
12420a15 | 1131 | /* Without profile feedback, loops for which we do not know a better estimate |
1132 | are assumed to roll 10 times. When we unroll such loop, it appears to | |
1133 | roll too little, and it may even seem to be cold. To avoid this, we | |
1134 | ensure that the created loop appears to roll at least 5 times (but at | |
1135 | most as many times as before unrolling). Don't do adjustment if profile | |
1136 | feedback is present. */ | |
1137 | if (new_est_niter < 5 && !profile_p) | |
1138 | { | |
1139 | if (est_niter < 5) | |
1140 | new_est_niter = est_niter; | |
1141 | else | |
1142 | new_est_niter = 5; | |
1143 | } | |
1144 | ||
10de1374 | 1145 | if (loop->any_upper_bound) |
1146 | { | |
1147 | /* As above, this is really CEIL (upper_bound + 1, factor) - 1. */ | |
1148 | widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound, | |
1149 | factor); | |
1150 | if (wi::ltu_p (bound, new_est_niter)) | |
1151 | new_est_niter = bound.to_uhwi (); | |
1152 | } | |
1153 | ||
12420a15 | 1154 | return new_est_niter; |
1155 | } | |
1156 | ||
7194de72 | 1157 | /* Unroll LOOP FACTOR times. DESC describes number of iterations of LOOP. |
1158 | EXIT is the exit of the loop to that DESC corresponds. | |
1159 | ||
b30560de | 1160 | If N is number of iterations of the loop and MAY_BE_ZERO is the condition |
1161 | under that loop exits in the first iteration even if N != 0, | |
48e1416a | 1162 | |
b30560de | 1163 | while (1) |
1164 | { | |
1165 | x = phi (init, next); | |
1166 | ||
1167 | pre; | |
1168 | if (st) | |
1169 | break; | |
1170 | post; | |
1171 | } | |
1172 | ||
1173 | becomes (with possibly the exit conditions formulated a bit differently, | |
1174 | avoiding the need to create a new iv): | |
48e1416a | 1175 | |
b30560de | 1176 | if (MAY_BE_ZERO || N < FACTOR) |
1177 | goto rest; | |
1178 | ||
1179 | do | |
1180 | { | |
1181 | x = phi (init, next); | |
1182 | ||
1183 | pre; | |
1184 | post; | |
1185 | pre; | |
1186 | post; | |
1187 | ... | |
1188 | pre; | |
1189 | post; | |
1190 | N -= FACTOR; | |
48e1416a | 1191 | |
b30560de | 1192 | } while (N >= FACTOR); |
1193 | ||
1194 | rest: | |
1195 | init' = phi (init, x); | |
1196 | ||
1197 | while (1) | |
1198 | { | |
1199 | x = phi (init', next); | |
1200 | ||
1201 | pre; | |
1202 | if (st) | |
1203 | break; | |
1204 | post; | |
f6ae6f2a | 1205 | } |
48e1416a | 1206 | |
f6ae6f2a | 1207 | Before the loop is unrolled, TRANSFORM is called for it (only for the |
1208 | unrolled loop, but not for its versioned copy). DATA is passed to | |
1209 | TRANSFORM. */ | |
b30560de | 1210 | |
7cef6c97 | 1211 | /* Probability in % that the unrolled loop is entered. Just a guess. */ |
1212 | #define PROB_UNROLLED_LOOP_ENTERED 90 | |
1213 | ||
b30560de | 1214 | void |
f6ae6f2a | 1215 | tree_transform_and_unroll_loop (struct loop *loop, unsigned factor, |
1216 | edge exit, struct tree_niter_desc *desc, | |
1217 | transform_callback transform, | |
1218 | void *data) | |
b30560de | 1219 | { |
1a91d914 | 1220 | gcond *exit_if; |
75a70cf9 | 1221 | tree ctr_before, ctr_after; |
b30560de | 1222 | tree enter_main_cond, exit_base, exit_step, exit_bound; |
1223 | enum tree_code exit_cmp; | |
1a91d914 | 1224 | gphi *phi_old_loop, *phi_new_loop, *phi_rest; |
1225 | gphi_iterator psi_old_loop, psi_new_loop; | |
ec11736b | 1226 | tree init, next, new_init; |
b30560de | 1227 | struct loop *new_loop; |
1228 | basic_block rest, exit_bb; | |
1229 | edge old_entry, new_entry, old_latch, precond_edge, new_exit; | |
611d2ac1 | 1230 | edge new_nonexit, e; |
75a70cf9 | 1231 | gimple_stmt_iterator bsi; |
b30560de | 1232 | use_operand_p op; |
1233 | bool ok; | |
7ec47501 | 1234 | unsigned i; |
1235 | profile_probability prob, prob_entry, scale_unrolled; | |
db9cef39 | 1236 | profile_count freq_e, freq_h; |
12420a15 | 1237 | gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor); |
89c8b802 | 1238 | unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; |
c2078b80 | 1239 | auto_vec<edge> to_remove; |
b30560de | 1240 | |
b30560de | 1241 | determine_exit_conditions (loop, desc, factor, |
1242 | &enter_main_cond, &exit_base, &exit_step, | |
1243 | &exit_cmp, &exit_bound); | |
1244 | ||
7cef6c97 | 1245 | /* Let us assume that the unrolled loop is quite likely to be entered. */ |
1246 | if (integer_nonzerop (enter_main_cond)) | |
7ec47501 | 1247 | prob_entry = profile_probability::always (); |
7cef6c97 | 1248 | else |
7ec47501 | 1249 | prob_entry = profile_probability::guessed_always () |
1250 | .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100); | |
7cef6c97 | 1251 | |
1252 | /* The values for scales should keep profile consistent, and somewhat close | |
1253 | to correct. | |
1254 | ||
1255 | TODO: The current value of SCALE_REST makes it appear that the loop that | |
1256 | is created by splitting the remaining iterations of the unrolled loop is | |
1257 | executed the same number of times as the original loop, and with the same | |
1258 | frequencies, which is obviously wrong. This does not appear to cause | |
1259 | problems, so we do not bother with fixing it for now. To make the profile | |
1260 | correct, we would need to change the probability of the exit edge of the | |
1261 | loop, and recompute the distribution of frequencies in its body because | |
1262 | of this change (scale the frequencies of blocks before and after the exit | |
1263 | by appropriate factors). */ | |
1264 | scale_unrolled = prob_entry; | |
7cef6c97 | 1265 | |
7ec47501 | 1266 | new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry, |
1267 | prob_entry.invert (), scale_unrolled, | |
ca69b069 | 1268 | profile_probability::guessed_always (), |
1269 | true); | |
b30560de | 1270 | gcc_assert (new_loop != NULL); |
1271 | update_ssa (TODO_update_ssa); | |
1272 | ||
611d2ac1 | 1273 | /* Prepare the cfg and update the phi nodes. Move the loop exit to the |
1274 | loop latch (and make its condition dummy, for the moment). */ | |
b30560de | 1275 | rest = loop_preheader_edge (new_loop)->src; |
1276 | precond_edge = single_pred_edge (rest); | |
88e6f696 | 1277 | split_edge (loop_latch_edge (loop)); |
b30560de | 1278 | exit_bb = single_pred (loop->latch); |
1279 | ||
611d2ac1 | 1280 | /* Since the exit edge will be removed, the frequency of all the blocks |
1281 | in the loop that are dominated by it must be scaled by | |
1282 | 1 / (1 - exit->probability). */ | |
720cfc43 | 1283 | if (exit->probability.initialized_p ()) |
1284 | scale_dominated_blocks_in_loop (loop, exit->src, | |
7ec47501 | 1285 | /* We are scaling up here so probability |
1286 | does not fit. */ | |
0f5aacd1 | 1287 | loop->header->count, |
1288 | loop->header->count | |
1289 | - loop->header->count.apply_probability | |
1290 | (exit->probability)); | |
611d2ac1 | 1291 | |
75a70cf9 | 1292 | bsi = gsi_last_bb (exit_bb); |
1293 | exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node, | |
1294 | integer_zero_node, | |
1295 | NULL_TREE, NULL_TREE); | |
63f88450 | 1296 | |
75a70cf9 | 1297 | gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT); |
89c8b802 | 1298 | new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr); |
dce58e66 | 1299 | rescan_loop_exit (new_exit, true, false); |
611d2ac1 | 1300 | |
1301 | /* Set the probability of new exit to the same of the old one. Fix | |
1302 | the frequency of the latch block, by scaling it back by | |
1303 | 1 - exit->probability. */ | |
611d2ac1 | 1304 | new_exit->probability = exit->probability; |
b30560de | 1305 | new_nonexit = single_pred_edge (loop->latch); |
720cfc43 | 1306 | new_nonexit->probability = exit->probability.invert (); |
b30560de | 1307 | new_nonexit->flags = EDGE_TRUE_VALUE; |
720cfc43 | 1308 | if (new_nonexit->probability.initialized_p ()) |
7ec47501 | 1309 | scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability); |
b30560de | 1310 | |
1311 | old_entry = loop_preheader_edge (loop); | |
1312 | new_entry = loop_preheader_edge (new_loop); | |
1313 | old_latch = loop_latch_edge (loop); | |
75a70cf9 | 1314 | for (psi_old_loop = gsi_start_phis (loop->header), |
1315 | psi_new_loop = gsi_start_phis (new_loop->header); | |
1316 | !gsi_end_p (psi_old_loop); | |
1317 | gsi_next (&psi_old_loop), gsi_next (&psi_new_loop)) | |
b30560de | 1318 | { |
1a91d914 | 1319 | phi_old_loop = psi_old_loop.phi (); |
1320 | phi_new_loop = psi_new_loop.phi (); | |
75a70cf9 | 1321 | |
b30560de | 1322 | init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry); |
1323 | op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry); | |
1324 | gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); | |
1325 | next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch); | |
1326 | ||
1327 | /* Prefer using original variable as a base for the new ssa name. | |
1328 | This is necessary for virtual ops, and useful in order to avoid | |
1329 | losing debug info for real ops. */ | |
b34a77f7 | 1330 | if (TREE_CODE (next) == SSA_NAME |
1331 | && useless_type_conversion_p (TREE_TYPE (next), | |
1332 | TREE_TYPE (init))) | |
f9e245b2 | 1333 | new_init = copy_ssa_name (next); |
b34a77f7 | 1334 | else if (TREE_CODE (init) == SSA_NAME |
1335 | && useless_type_conversion_p (TREE_TYPE (init), | |
1336 | TREE_TYPE (next))) | |
f9e245b2 | 1337 | new_init = copy_ssa_name (init); |
b34a77f7 | 1338 | else if (useless_type_conversion_p (TREE_TYPE (next), TREE_TYPE (init))) |
ec11736b | 1339 | new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, "unrinittmp"); |
b30560de | 1340 | else |
ec11736b | 1341 | new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, "unrinittmp"); |
b30560de | 1342 | |
b30560de | 1343 | phi_rest = create_phi_node (new_init, rest); |
b30560de | 1344 | |
60d535d2 | 1345 | add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION); |
1346 | add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION); | |
b30560de | 1347 | SET_USE (op, new_init); |
1348 | } | |
1349 | ||
611d2ac1 | 1350 | remove_path (exit); |
1351 | ||
f6ae6f2a | 1352 | /* Transform the loop. */ |
1353 | if (transform) | |
1354 | (*transform) (loop, data); | |
1355 | ||
611d2ac1 | 1356 | /* Unroll the loop and remove the exits in all iterations except for the |
1357 | last one. */ | |
3c6549f8 | 1358 | auto_sbitmap wont_exit (factor); |
53c5d9d4 | 1359 | bitmap_ones (wont_exit); |
08b7917c | 1360 | bitmap_clear_bit (wont_exit, factor - 1); |
611d2ac1 | 1361 | |
75a70cf9 | 1362 | ok = gimple_duplicate_loop_to_header_edge |
f6ae6f2a | 1363 | (loop, loop_latch_edge (loop), factor - 1, |
611d2ac1 | 1364 | wont_exit, new_exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ); |
f6ae6f2a | 1365 | gcc_assert (ok); |
611d2ac1 | 1366 | |
f1f41a6c | 1367 | FOR_EACH_VEC_ELT (to_remove, i, e) |
611d2ac1 | 1368 | { |
1369 | ok = remove_path (e); | |
1370 | gcc_assert (ok); | |
1371 | } | |
f6ae6f2a | 1372 | update_ssa (TODO_update_ssa); |
1373 | ||
1374 | /* Ensure that the frequencies in the loop match the new estimated | |
1375 | number of iterations, and change the probability of the new | |
1376 | exit edge. */ | |
12420a15 | 1377 | |
1378 | freq_h = loop->header->count; | |
ea5d3981 | 1379 | freq_e = (loop_preheader_edge (loop))->count (); |
205ce1aa | 1380 | if (freq_h.nonzero_p ()) |
12420a15 | 1381 | { |
12420a15 | 1382 | /* Avoid dropping loop body profile counter to 0 because of zero count |
1383 | in loop's preheader. */ | |
ae277201 | 1384 | if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ())) |
1385 | freq_e = freq_e.force_nonzero (); | |
ca69b069 | 1386 | scale_loop_frequencies (loop, freq_e.probability_in (freq_h)); |
12420a15 | 1387 | } |
f6ae6f2a | 1388 | |
1389 | exit_bb = single_pred (loop->latch); | |
1390 | new_exit = find_edge (exit_bb, rest); | |
720cfc43 | 1391 | new_exit->probability = profile_probability::always () |
1392 | .apply_scale (1, new_est_niter + 1); | |
f6ae6f2a | 1393 | |
ea5d3981 | 1394 | rest->count += new_exit->count (); |
f6ae6f2a | 1395 | |
1396 | new_nonexit = single_pred_edge (loop->latch); | |
7ec47501 | 1397 | prob = new_nonexit->probability; |
720cfc43 | 1398 | new_nonexit->probability = new_exit->probability.invert (); |
7ec47501 | 1399 | prob = new_nonexit->probability / prob; |
1400 | if (prob.initialized_p ()) | |
1401 | scale_bbs_frequencies (&loop->latch, 1, prob); | |
f6ae6f2a | 1402 | |
b30560de | 1403 | /* Finally create the new counter for number of iterations and add the new |
1404 | exit instruction. */ | |
320e3d05 | 1405 | bsi = gsi_last_nondebug_bb (exit_bb); |
1a91d914 | 1406 | exit_if = as_a <gcond *> (gsi_stmt (bsi)); |
b30560de | 1407 | create_iv (exit_base, exit_step, NULL_TREE, loop, |
f6ae6f2a | 1408 | &bsi, false, &ctr_before, &ctr_after); |
75a70cf9 | 1409 | gimple_cond_set_code (exit_if, exit_cmp); |
1410 | gimple_cond_set_lhs (exit_if, ctr_after); | |
1411 | gimple_cond_set_rhs (exit_if, exit_bound); | |
f6ae6f2a | 1412 | update_stmt (exit_if); |
b30560de | 1413 | |
382ecba7 | 1414 | checking_verify_flow_info (); |
1415 | checking_verify_loop_structure (); | |
232624e0 | 1416 | checking_verify_loop_closed_ssa (true, loop); |
1417 | checking_verify_loop_closed_ssa (true, new_loop); | |
b30560de | 1418 | } |
f6ae6f2a | 1419 | |
1420 | /* Wrapper over tree_transform_and_unroll_loop for case we do not | |
1421 | want to transform the loop before unrolling. The meaning | |
1422 | of the arguments is the same as for tree_transform_and_unroll_loop. */ | |
1423 | ||
1424 | void | |
1425 | tree_unroll_loop (struct loop *loop, unsigned factor, | |
1426 | edge exit, struct tree_niter_desc *desc) | |
1427 | { | |
1428 | tree_transform_and_unroll_loop (loop, factor, exit, desc, | |
1429 | NULL, NULL); | |
1430 | } | |
5fa90eea | 1431 | |
1432 | /* Rewrite the phi node at position PSI in function of the main | |
1433 | induction variable MAIN_IV and insert the generated code at GSI. */ | |
1434 | ||
1435 | static void | |
1436 | rewrite_phi_with_iv (loop_p loop, | |
1a91d914 | 1437 | gphi_iterator *psi, |
5fa90eea | 1438 | gimple_stmt_iterator *gsi, |
1439 | tree main_iv) | |
1440 | { | |
1441 | affine_iv iv; | |
1a91d914 | 1442 | gassign *stmt; |
1443 | gphi *phi = psi->phi (); | |
5fa90eea | 1444 | tree atype, mtype, val, res = PHI_RESULT (phi); |
1445 | ||
7c782c9b | 1446 | if (virtual_operand_p (res) || res == main_iv) |
5fa90eea | 1447 | { |
1448 | gsi_next (psi); | |
1449 | return; | |
1450 | } | |
1451 | ||
1452 | if (!simple_iv (loop, loop, res, &iv, true)) | |
1453 | { | |
1454 | gsi_next (psi); | |
1455 | return; | |
1456 | } | |
1457 | ||
1458 | remove_phi_node (psi, false); | |
1459 | ||
1460 | atype = TREE_TYPE (res); | |
1461 | mtype = POINTER_TYPE_P (atype) ? sizetype : atype; | |
1462 | val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step), | |
1463 | fold_convert (mtype, main_iv)); | |
1464 | val = fold_build2 (POINTER_TYPE_P (atype) | |
1465 | ? POINTER_PLUS_EXPR : PLUS_EXPR, | |
1466 | atype, unshare_expr (iv.base), val); | |
1467 | val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true, | |
1468 | GSI_SAME_STMT); | |
1469 | stmt = gimple_build_assign (res, val); | |
1470 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
5fa90eea | 1471 | } |
1472 | ||
1473 | /* Rewrite all the phi nodes of LOOP in function of the main induction | |
1474 | variable MAIN_IV. */ | |
1475 | ||
1476 | static void | |
1477 | rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv) | |
1478 | { | |
1479 | unsigned i; | |
1480 | basic_block *bbs = get_loop_body_in_dom_order (loop); | |
1a91d914 | 1481 | gphi_iterator psi; |
5fa90eea | 1482 | |
1483 | for (i = 0; i < loop->num_nodes; i++) | |
1484 | { | |
1485 | basic_block bb = bbs[i]; | |
1486 | gimple_stmt_iterator gsi = gsi_after_labels (bb); | |
1487 | ||
1488 | if (bb->loop_father != loop) | |
1489 | continue; | |
1490 | ||
1491 | for (psi = gsi_start_phis (bb); !gsi_end_p (psi); ) | |
1492 | rewrite_phi_with_iv (loop, &psi, &gsi, main_iv); | |
1493 | } | |
1494 | ||
1495 | free (bbs); | |
1496 | } | |
1497 | ||
5b11c9b9 | 1498 | /* Bases all the induction variables in LOOP on a single induction variable |
1499 | (with base 0 and step 1), whose final value is compared with *NIT. When the | |
1500 | IV type precision has to be larger than *NIT type precision, *NIT is | |
1501 | converted to the larger type, the conversion code is inserted before the | |
1502 | loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true, | |
1503 | the induction variable is incremented in the loop latch, otherwise it is | |
1504 | incremented in the loop header. Return the induction variable that was | |
1505 | created. */ | |
5fa90eea | 1506 | |
1507 | tree | |
0207206d | 1508 | canonicalize_loop_ivs (struct loop *loop, tree *nit, bool bump_in_latch) |
5fa90eea | 1509 | { |
1510 | unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit)); | |
1511 | unsigned original_precision = precision; | |
1512 | tree type, var_before; | |
1a91d914 | 1513 | gimple_stmt_iterator gsi; |
1514 | gphi_iterator psi; | |
1515 | gcond *stmt; | |
5fa90eea | 1516 | edge exit = single_dom_exit (loop); |
1517 | gimple_seq stmts; | |
38a66497 | 1518 | bool unsigned_p = false; |
5fa90eea | 1519 | |
1520 | for (psi = gsi_start_phis (loop->header); | |
1521 | !gsi_end_p (psi); gsi_next (&psi)) | |
1522 | { | |
1a91d914 | 1523 | gphi *phi = psi.phi (); |
5fa90eea | 1524 | tree res = PHI_RESULT (phi); |
38a66497 | 1525 | bool uns; |
5fa90eea | 1526 | |
38a66497 | 1527 | type = TREE_TYPE (res); |
7c782c9b | 1528 | if (virtual_operand_p (res) |
38a66497 | 1529 | || (!INTEGRAL_TYPE_P (type) |
1530 | && !POINTER_TYPE_P (type)) | |
1531 | || TYPE_PRECISION (type) < precision) | |
1532 | continue; | |
1533 | ||
1534 | uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type); | |
1535 | ||
1536 | if (TYPE_PRECISION (type) > precision) | |
1537 | unsigned_p = uns; | |
1538 | else | |
1539 | unsigned_p |= uns; | |
1540 | ||
1541 | precision = TYPE_PRECISION (type); | |
5fa90eea | 1542 | } |
1543 | ||
1a5d4b27 | 1544 | scalar_int_mode mode = smallest_int_mode_for_size (precision); |
38a66497 | 1545 | precision = GET_MODE_PRECISION (mode); |
1546 | type = build_nonstandard_integer_type (precision, unsigned_p); | |
5fa90eea | 1547 | |
02677482 | 1548 | if (original_precision != precision |
1549 | || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p) | |
5fa90eea | 1550 | { |
1551 | *nit = fold_convert (type, *nit); | |
1552 | *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE); | |
1553 | if (stmts) | |
1554 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); | |
1555 | } | |
1556 | ||
149c96e8 | 1557 | if (bump_in_latch) |
1558 | gsi = gsi_last_bb (loop->latch); | |
1559 | else | |
1560 | gsi = gsi_last_nondebug_bb (loop->header); | |
5fa90eea | 1561 | create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE, |
0207206d | 1562 | loop, &gsi, bump_in_latch, &var_before, NULL); |
5fa90eea | 1563 | |
1564 | rewrite_all_phi_nodes_with_iv (loop, var_before); | |
1565 | ||
1a91d914 | 1566 | stmt = as_a <gcond *> (last_stmt (exit->src)); |
5fa90eea | 1567 | /* Make the loop exit if the control condition is not satisfied. */ |
1568 | if (exit->flags & EDGE_TRUE_VALUE) | |
1569 | { | |
1570 | edge te, fe; | |
1571 | ||
1572 | extract_true_false_edges_from_block (exit->src, &te, &fe); | |
1573 | te->flags = EDGE_FALSE_VALUE; | |
1574 | fe->flags = EDGE_TRUE_VALUE; | |
1575 | } | |
1576 | gimple_cond_set_code (stmt, LT_EXPR); | |
1577 | gimple_cond_set_lhs (stmt, var_before); | |
1578 | gimple_cond_set_rhs (stmt, *nit); | |
1579 | update_stmt (stmt); | |
1580 | ||
1581 | return var_before; | |
1582 | } |