]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-manip.c
2015-06-04 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-manip.c
CommitLineData
06598532 1/* High-level loop manipulation functions.
d353bf18 2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
48e1416a 3
06598532 4This file is part of GCC.
48e1416a 5
06598532 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
06598532 9later version.
48e1416a 10
06598532 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
06598532 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
06598532 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
b20a8bb4 24#include "hash-set.h"
b20a8bb4 25#include "vec.h"
b20a8bb4 26#include "input.h"
27#include "alias.h"
28#include "symtab.h"
b20a8bb4 29#include "inchash.h"
06598532 30#include "tree.h"
b20a8bb4 31#include "fold-const.h"
06598532 32#include "tm_p.h"
94ea8568 33#include "predict.h"
94ea8568 34#include "hard-reg-set.h"
35#include "input.h"
36#include "function.h"
37#include "dominance.h"
38#include "cfg.h"
39#include "cfganal.h"
06598532 40#include "basic-block.h"
bc61cadb 41#include "tree-ssa-alias.h"
42#include "internal-fn.h"
43#include "gimple-expr.h"
44#include "is-a.h"
e795d6e1 45#include "gimple.h"
a8783bee 46#include "gimplify.h"
dcf1a1ec 47#include "gimple-iterator.h"
e795d6e1 48#include "gimplify-me.h"
073c1fd5 49#include "gimple-ssa.h"
50#include "tree-cfg.h"
51#include "tree-phinodes.h"
52#include "ssa-iterators.h"
9ed99284 53#include "stringpool.h"
073c1fd5 54#include "tree-ssanames.h"
05d9c18a 55#include "tree-ssa-loop-ivopts.h"
56#include "tree-ssa-loop-manip.h"
57#include "tree-ssa-loop-niter.h"
073c1fd5 58#include "tree-ssa-loop.h"
59#include "tree-into-ssa.h"
69ee5dbb 60#include "tree-ssa.h"
b9ed1410 61#include "dumpfile.h"
d09bc815 62#include "gimple-pretty-print.h"
06598532 63#include "cfgloop.h"
b9ed1410 64#include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */
06598532 65#include "tree-scalar-evolution.h"
b30560de 66#include "params.h"
bc8bb825 67#include "tree-inline.h"
5fa90eea 68#include "langhooks.h"
06598532 69
ed7e2206 70/* All bitmaps for rewriting into loop-closed SSA go on this obstack,
71 so that we can free them all at once. */
72static bitmap_obstack loop_renamer_obstack;
73
bb445479 74/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
75 It is expected that neither BASE nor STEP are shared with other expressions
76 (unless the sharing rules allow this). Use VAR as a base var_decl for it
77 (if NULL, a new temporary will be created). The increment will occur at
48e1416a 78 INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and
f98505bb 79 AFTER can be computed using standard_iv_increment_position. The ssa versions
bb445479 80 of the variable before and after increment will be stored in VAR_BEFORE and
81 VAR_AFTER (unless they are NULL). */
82
83void
84create_iv (tree base, tree step, tree var, struct loop *loop,
75a70cf9 85 gimple_stmt_iterator *incr_pos, bool after,
bb445479 86 tree *var_before, tree *var_after)
87{
1a91d914 88 gassign *stmt;
89 gphi *phi;
75a70cf9 90 tree initial, step1;
91 gimple_seq stmts;
bb445479 92 tree vb, va;
93 enum tree_code incr_op = PLUS_EXPR;
651874e1 94 edge pe = loop_preheader_edge (loop);
bb445479 95
03d37e4e 96 if (var != NULL_TREE)
97 {
f9e245b2 98 vb = make_ssa_name (var);
99 va = make_ssa_name (var);
03d37e4e 100 }
101 else
102 {
103 vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
104 va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
105 }
bb445479 106 if (var_before)
107 *var_before = vb;
bb445479 108 if (var_after)
109 *var_after = va;
110
111 /* For easier readability of the created code, produce MINUS_EXPRs
112 when suitable. */
113 if (TREE_CODE (step) == INTEGER_CST)
114 {
115 if (TYPE_UNSIGNED (TREE_TYPE (step)))
116 {
49d00087 117 step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
bb445479 118 if (tree_int_cst_lt (step1, step))
119 {
120 incr_op = MINUS_EXPR;
121 step = step1;
122 }
123 }
124 else
125 {
add6ee5e 126 bool ovf;
127
128 if (!tree_expr_nonnegative_warnv_p (step, &ovf)
bb445479 129 && may_negate_without_overflow_p (step))
130 {
131 incr_op = MINUS_EXPR;
49d00087 132 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
bb445479 133 }
134 }
135 }
0de36bdb 136 if (POINTER_TYPE_P (TREE_TYPE (base)))
137 {
86f2ad37 138 if (TREE_CODE (base) == ADDR_EXPR)
139 mark_addressable (TREE_OPERAND (base, 0));
a0553bff 140 step = convert_to_ptrofftype (step);
0de36bdb 141 if (incr_op == MINUS_EXPR)
a0553bff 142 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
0de36bdb 143 incr_op = POINTER_PLUS_EXPR;
144 }
651874e1 145 /* Gimplify the step if necessary. We put the computations in front of the
146 loop (i.e. the step should be loop invariant). */
06240723 147 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
651874e1 148 if (stmts)
75a70cf9 149 gsi_insert_seq_on_edge_immediate (pe, stmts);
651874e1 150
e9cf809e 151 stmt = gimple_build_assign (va, incr_op, vb, step);
bb445479 152 if (after)
75a70cf9 153 gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
bb445479 154 else
75a70cf9 155 gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
bb445479 156
dec41e98 157 initial = force_gimple_operand (base, &stmts, true, var);
158 if (stmts)
75a70cf9 159 gsi_insert_seq_on_edge_immediate (pe, stmts);
bb445479 160
1a91d914 161 phi = create_phi_node (vb, loop->header);
162 add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
163 add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
bb445479 164}
165
4fb07d00 166/* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
d09bc815 167 both DEF_LOOP and USE_LOOP. */
168
169static inline struct loop *
170find_sibling_superloop (struct loop *use_loop, struct loop *def_loop)
171{
172 unsigned ud = loop_depth (use_loop);
173 unsigned dd = loop_depth (def_loop);
174 gcc_assert (ud > 0 && dd > 0);
175 if (ud > dd)
176 use_loop = superloop_at_depth (use_loop, dd);
177 if (ud < dd)
178 def_loop = superloop_at_depth (def_loop, ud);
179 while (loop_outer (use_loop) != loop_outer (def_loop))
180 {
181 use_loop = loop_outer (use_loop);
182 def_loop = loop_outer (def_loop);
183 gcc_assert (use_loop && def_loop);
184 }
185 return use_loop;
186}
187
188/* DEF_BB is a basic block containing a DEF that needs rewriting into
189 loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing
190 uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
191 USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_B).
192 ALL_EXITS[I] is the set of all basic blocks that exit loop I.
193
194 Compute the subset of LOOP_EXITS that exit the loop containing DEF_BB
195 or one of its loop fathers, in which DEF is live. This set is returned
196 in the bitmap LIVE_EXITS.
197
198 Instead of computing the complete livein set of the def, we use the loop
199 nesting tree as a form of poor man's structure analysis. This greatly
200 speeds up the analysis, which is important because this function may be
201 called on all SSA names that need rewriting, one at a time. */
06598532 202
203static void
d09bc815 204compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
205 bitmap *loop_exits, basic_block def_bb)
06598532 206{
d09bc815 207 unsigned i;
208 bitmap_iterator bi;
d09bc815 209 struct loop *def_loop = def_bb->loop_father;
210 unsigned def_loop_depth = loop_depth (def_loop);
211 bitmap def_loop_exits;
212
213 /* Normally the work list size is bounded by the number of basic
214 blocks in the largest loop. We don't know this number, but we
215 can be fairly sure that it will be relatively small. */
c2078b80 216 auto_vec<basic_block> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
d09bc815 217
218 EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
219 {
f5a6b05f 220 basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
d09bc815 221 struct loop *use_loop = use_bb->loop_father;
222 gcc_checking_assert (def_loop != use_loop
223 && ! flow_loop_nested_p (def_loop, use_loop));
224 if (! flow_loop_nested_p (use_loop, def_loop))
225 use_bb = find_sibling_superloop (use_loop, def_loop)->header;
226 if (bitmap_set_bit (live_exits, use_bb->index))
f1f41a6c 227 worklist.safe_push (use_bb);
d09bc815 228 }
229
230 /* Iterate until the worklist is empty. */
f1f41a6c 231 while (! worklist.is_empty ())
d09bc815 232 {
233 edge e;
234 edge_iterator ei;
235
236 /* Pull a block off the worklist. */
f1f41a6c 237 basic_block bb = worklist.pop ();
d09bc815 238
239 /* Make sure we have at least enough room in the work list
240 for all predecessors of this block. */
f1f41a6c 241 worklist.reserve (EDGE_COUNT (bb->preds));
d09bc815 242
243 /* For each predecessor block. */
244 FOR_EACH_EDGE (e, ei, bb->preds)
245 {
246 basic_block pred = e->src;
247 struct loop *pred_loop = pred->loop_father;
248 unsigned pred_loop_depth = loop_depth (pred_loop);
249 bool pred_visited;
250
251 /* We should have met DEF_BB along the way. */
34154e27 252 gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
d09bc815 253
254 if (pred_loop_depth >= def_loop_depth)
255 {
256 if (pred_loop_depth > def_loop_depth)
257 pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
258 /* If we've reached DEF_LOOP, our train ends here. */
259 if (pred_loop == def_loop)
260 continue;
261 }
262 else if (! flow_loop_nested_p (pred_loop, def_loop))
263 pred = find_sibling_superloop (pred_loop, def_loop)->header;
264
265 /* Add PRED to the LIVEIN set. PRED_VISITED is true if
266 we had already added PRED to LIVEIN before. */
267 pred_visited = !bitmap_set_bit (live_exits, pred->index);
268
269 /* If we have visited PRED before, don't add it to the worklist.
270 If BB dominates PRED, then we're probably looking at a loop.
271 We're only interested in looking up in the dominance tree
272 because DEF_BB dominates all the uses. */
273 if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
274 continue;
275
f1f41a6c 276 worklist.quick_push (pred);
d09bc815 277 }
278 }
d09bc815 279
280 def_loop_exits = BITMAP_ALLOC (&loop_renamer_obstack);
281 for (struct loop *loop = def_loop;
282 loop != current_loops->tree_root;
283 loop = loop_outer (loop))
284 bitmap_ior_into (def_loop_exits, loop_exits[loop->num]);
285 bitmap_and_into (live_exits, def_loop_exits);
286 BITMAP_FREE (def_loop_exits);
287}
288
289/* Add a loop-closing PHI for VAR in basic block EXIT. */
290
291static void
292add_exit_phi (basic_block exit, tree var)
293{
1a91d914 294 gphi *phi;
06598532 295 edge e;
cd665a06 296 edge_iterator ei;
06598532 297
d09bc815 298#ifdef ENABLE_CHECKING
299 /* Check that at least one of the edges entering the EXIT block exits
300 the loop, or a superloop of that loop, that VAR is defined in. */
301 gimple def_stmt = SSA_NAME_DEF_STMT (var);
302 basic_block def_bb = gimple_bb (def_stmt);
cd665a06 303 FOR_EACH_EDGE (e, ei, exit->preds)
06598532 304 {
d09bc815 305 struct loop *aloop = find_common_loop (def_bb->loop_father,
306 e->src->loop_father);
307 if (!flow_bb_inside_loop_p (aloop, e->dest))
06598532 308 break;
309 }
310
d09bc815 311 gcc_checking_assert (e);
312#endif
06598532 313
9c06f260 314 phi = create_phi_node (NULL_TREE, exit);
d09bc815 315 create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
cd665a06 316 FOR_EACH_EDGE (e, ei, exit->preds)
d09bc815 317 add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
318
319 if (dump_file && (dump_flags & TDF_DETAILS))
320 {
321 fprintf (dump_file, ";; Created LCSSA PHI: ");
322 print_gimple_stmt (dump_file, phi, 0, dump_flags);
323 }
06598532 324}
325
326/* Add exit phis for VAR that is used in LIVEIN.
d09bc815 327 Exits of the loops are stored in LOOP_EXITS. */
06598532 328
329static void
d09bc815 330add_exit_phis_var (tree var, bitmap use_blocks, bitmap *loop_exits)
06598532 331{
4f917ffe 332 unsigned index;
0cc4271a 333 bitmap_iterator bi;
d09bc815 334 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
335 bitmap live_exits = BITMAP_ALLOC (&loop_renamer_obstack);
06598532 336
cf230d5d 337 gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
06598532 338
d09bc815 339 compute_live_loop_exits (live_exits, use_blocks, loop_exits, def_bb);
06598532 340
d09bc815 341 EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
0cc4271a 342 {
f5a6b05f 343 add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
0cc4271a 344 }
ed7e2206 345
d09bc815 346 BITMAP_FREE (live_exits);
06598532 347}
348
349/* Add exit phis for the names marked in NAMES_TO_RENAME.
350 Exits of the loops are stored in EXITS. Sets of blocks where the ssa
351 names are used are stored in USE_BLOCKS. */
352
353static void
d09bc815 354add_exit_phis (bitmap names_to_rename, bitmap *use_blocks, bitmap *loop_exits)
06598532 355{
356 unsigned i;
0cc4271a 357 bitmap_iterator bi;
06598532 358
0cc4271a 359 EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
06598532 360 {
361 add_exit_phis_var (ssa_name (i), use_blocks[i], loop_exits);
0cc4271a 362 }
06598532 363}
364
d09bc815 365/* Fill the array of bitmaps LOOP_EXITS with all loop exit edge targets. */
06598532 366
d09bc815 367static void
368get_loops_exits (bitmap *loop_exits)
06598532 369{
d09bc815 370 struct loop *loop;
371 unsigned j;
06598532 372 edge e;
373
f21d4d00 374 FOR_EACH_LOOP (loop, 0)
06598532 375 {
f1f41a6c 376 vec<edge> exit_edges = get_loop_exit_edges (loop);
d09bc815 377 loop_exits[loop->num] = BITMAP_ALLOC (&loop_renamer_obstack);
f1f41a6c 378 FOR_EACH_VEC_ELT (exit_edges, j, e)
d09bc815 379 bitmap_set_bit (loop_exits[loop->num], e->dest->index);
f1f41a6c 380 exit_edges.release ();
06598532 381 }
06598532 382}
383
384/* For USE in BB, if it is used outside of the loop it is defined in,
385 mark it for rewrite. Record basic block BB where it is used
095dcfa3 386 to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap. */
06598532 387
388static void
095dcfa3 389find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
390 bitmap need_phis)
06598532 391{
392 unsigned ver;
393 basic_block def_bb;
394 struct loop *def_loop;
395
396 if (TREE_CODE (use) != SSA_NAME)
397 return;
398
399 ver = SSA_NAME_VERSION (use);
75a70cf9 400 def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
06598532 401 if (!def_bb)
402 return;
403 def_loop = def_bb->loop_father;
404
d88fd237 405 /* If the definition is not inside a loop, it is not interesting. */
9e3536f4 406 if (!loop_outer (def_loop))
06598532 407 return;
408
d88fd237 409 /* If the use is not outside of the loop it is defined in, it is not
410 interesting. */
411 if (flow_bb_inside_loop_p (def_loop, bb))
412 return;
413
ed7e2206 414 /* If we're seeing VER for the first time, we still have to allocate
415 a bitmap for its uses. */
416 if (bitmap_set_bit (need_phis, ver))
417 use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
06598532 418 bitmap_set_bit (use_blocks[ver], bb->index);
06598532 419}
420
421/* For uses in STMT, mark names that are used outside of the loop they are
422 defined to rewrite. Record the set of blocks in that the ssa
095dcfa3 423 names are defined to USE_BLOCKS and the ssa names themselves to
424 NEED_PHIS. */
06598532 425
426static void
75a70cf9 427find_uses_to_rename_stmt (gimple stmt, bitmap *use_blocks, bitmap need_phis)
06598532 428{
43daa21e 429 ssa_op_iter iter;
430 tree var;
75a70cf9 431 basic_block bb = gimple_bb (stmt);
06598532 432
9845d120 433 if (is_gimple_debug (stmt))
434 return;
435
5789f1f8 436 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
095dcfa3 437 find_uses_to_rename_use (bb, var, use_blocks, need_phis);
06598532 438}
439
053fdd99 440/* Marks names that are used in BB and outside of the loop they are
441 defined in for rewrite. Records the set of blocks in that the ssa
095dcfa3 442 names are defined to USE_BLOCKS. Record the SSA names that will
443 need exit PHIs in NEED_PHIS. */
06598532 444
445static void
095dcfa3 446find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis)
06598532 447{
053fdd99 448 edge e;
449 edge_iterator ei;
06598532 450
053fdd99 451 FOR_EACH_EDGE (e, ei, bb->succs)
1a91d914 452 for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
453 gsi_next (&bsi))
d09bc815 454 {
1a91d914 455 gphi *phi = bsi.phi ();
5789f1f8 456 if (! virtual_operand_p (gimple_phi_result (phi)))
457 find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
458 use_blocks, need_phis);
d09bc815 459 }
48e1416a 460
1a91d914 461 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
462 gsi_next (&bsi))
75a70cf9 463 find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis);
053fdd99 464}
48e1416a 465
053fdd99 466/* Marks names that are used outside of the loop they are defined in
467 for rewrite. Records the set of blocks in that the ssa
468 names are defined to USE_BLOCKS. If CHANGED_BBS is not NULL,
469 scan only blocks in this set. */
470
471static void
095dcfa3 472find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis)
053fdd99 473{
474 basic_block bb;
475 unsigned index;
476 bitmap_iterator bi;
06598532 477
f55f91f5 478 if (changed_bbs)
479 EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
f5a6b05f 480 find_uses_to_rename_bb (BASIC_BLOCK_FOR_FN (cfun, index), use_blocks, need_phis);
053fdd99 481 else
fc00614f 482 FOR_EACH_BB_FN (bb, cfun)
f55f91f5 483 find_uses_to_rename_bb (bb, use_blocks, need_phis);
06598532 484}
485
486/* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
487 phi nodes to ensure that no variable is used outside the loop it is
488 defined in.
489
490 This strengthening of the basic ssa form has several advantages:
491
492 1) Updating it during unrolling/peeling/versioning is trivial, since
493 we do not need to care about the uses outside of the loop.
cf230d5d 494 The same applies to virtual operands which are also rewritten into
495 loop closed SSA form. Note that virtual operands are always live
496 until function exit.
06598532 497 2) The behavior of all uses of an induction variable is the same.
498 Without this, you need to distinguish the case when the variable
499 is used outside of the loop it is defined in, for example
500
501 for (i = 0; i < 100; i++)
502 {
503 for (j = 0; j < 100; j++)
504 {
505 k = i + j;
506 use1 (k);
507 }
508 use2 (k);
509 }
510
511 Looking from the outer loop with the normal SSA form, the first use of k
512 is not well-behaved, while the second one is an induction variable with
053fdd99 513 base 99 and step 1.
48e1416a 514
053fdd99 515 If CHANGED_BBS is not NULL, we look for uses outside loops only in
095dcfa3 516 the basic blocks in this set.
517
518 UPDATE_FLAG is used in the call to update_ssa. See
519 TODO_update_ssa* for documentation. */
06598532 520
521void
095dcfa3 522rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
06598532 523{
06598532 524 bitmap *use_blocks;
d8a0d6b8 525 bitmap names_to_rename;
526
f24ec26f 527 loops_state_set (LOOP_CLOSED_SSA);
41f75a99 528 if (number_of_loops (cfun) <= 1)
d8a0d6b8 529 return;
530
d09bc815 531 /* If the pass has caused the SSA form to be out-of-date, update it
532 now. */
533 update_ssa (update_flag);
534
ed7e2206 535 bitmap_obstack_initialize (&loop_renamer_obstack);
536
ed7e2206 537 names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
06598532 538
ed7e2206 539 /* Uses of names to rename. We don't have to initialize this array,
540 because we know that we will only have entries for the SSA names
541 in NAMES_TO_RENAME. */
91442a6f 542 use_blocks = XNEWVEC (bitmap, num_ssa_names);
06598532 543
544 /* Find the uses outside loops. */
095dcfa3 545 find_uses_to_rename (changed_bbs, use_blocks, names_to_rename);
053fdd99 546
c8498e04 547 if (!bitmap_empty_p (names_to_rename))
548 {
549 /* An array of bitmaps where LOOP_EXITS[I] is the set of basic blocks
550 that are the destination of an edge exiting loop number I. */
41f75a99 551 bitmap *loop_exits = XNEWVEC (bitmap, number_of_loops (cfun));
c8498e04 552 get_loops_exits (loop_exits);
553
554 /* Add the PHI nodes on exits of the loops for the names we need to
555 rewrite. */
556 add_exit_phis (names_to_rename, use_blocks, loop_exits);
557
558 free (loop_exits);
559
560 /* Fix up all the names found to be used outside their original
561 loops. */
562 update_ssa (TODO_update_ssa);
563 }
06598532 564
ed7e2206 565 bitmap_obstack_release (&loop_renamer_obstack);
06598532 566 free (use_blocks);
06598532 567}
568
569/* Check invariants of the loop closed ssa form for the USE in BB. */
570
571static void
572check_loop_closed_ssa_use (basic_block bb, tree use)
573{
75a70cf9 574 gimple def;
06598532 575 basic_block def_bb;
48e1416a 576
7c782c9b 577 if (TREE_CODE (use) != SSA_NAME || virtual_operand_p (use))
06598532 578 return;
579
580 def = SSA_NAME_DEF_STMT (use);
75a70cf9 581 def_bb = gimple_bb (def);
8c0963c4 582 gcc_assert (!def_bb
583 || flow_bb_inside_loop_p (def_bb->loop_father, bb));
06598532 584}
585
586/* Checks invariants of loop closed ssa form in statement STMT in BB. */
587
588static void
75a70cf9 589check_loop_closed_ssa_stmt (basic_block bb, gimple stmt)
06598532 590{
43daa21e 591 ssa_op_iter iter;
592 tree var;
06598532 593
9845d120 594 if (is_gimple_debug (stmt))
595 return;
596
ed7e2206 597 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
43daa21e 598 check_loop_closed_ssa_use (bb, var);
06598532 599}
600
ca77c6ec 601/* Checks that invariants of the loop closed ssa form are preserved.
602 Call verify_ssa when VERIFY_SSA_P is true. */
06598532 603
4b987fac 604DEBUG_FUNCTION void
ca77c6ec 605verify_loop_closed_ssa (bool verify_ssa_p)
06598532 606{
607 basic_block bb;
75a70cf9 608 edge e;
609 edge_iterator ei;
06598532 610
41f75a99 611 if (number_of_loops (cfun) <= 1)
095dcfa3 612 return;
613
ca77c6ec 614 if (verify_ssa_p)
71b65939 615 verify_ssa (false, true);
06598532 616
4b366dd3 617 timevar_push (TV_VERIFY_LOOP_CLOSED);
618
fc00614f 619 FOR_EACH_BB_FN (bb, cfun)
06598532 620 {
1a91d914 621 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
622 gsi_next (&bsi))
75a70cf9 623 {
1a91d914 624 gphi *phi = bsi.phi ();
75a70cf9 625 FOR_EACH_EDGE (e, ei, bb->preds)
626 check_loop_closed_ssa_use (e->src,
627 PHI_ARG_DEF_FROM_EDGE (phi, e));
628 }
06598532 629
1a91d914 630 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
631 gsi_next (&bsi))
75a70cf9 632 check_loop_closed_ssa_stmt (bb, gsi_stmt (bsi));
06598532 633 }
4b366dd3 634
635 timevar_pop (TV_VERIFY_LOOP_CLOSED);
06598532 636}
dec41e98 637
638/* Split loop exit edge EXIT. The things are a bit complicated by a need to
28c92cbb 639 preserve the loop closed ssa form. The newly created block is returned. */
dec41e98 640
28c92cbb 641basic_block
dec41e98 642split_loop_exit_edge (edge exit)
643{
644 basic_block dest = exit->dest;
88e6f696 645 basic_block bb = split_edge (exit);
1a91d914 646 gphi *phi, *new_phi;
75a70cf9 647 tree new_name, name;
dec41e98 648 use_operand_p op_p;
1a91d914 649 gphi_iterator psi;
efbcb6de 650 source_location locus;
dec41e98 651
75a70cf9 652 for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 653 {
1a91d914 654 phi = psi.phi ();
ea091dfd 655 op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
efbcb6de 656 locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
dec41e98 657
b2ca91ff 658 name = USE_FROM_PTR (op_p);
659
4fb5e5ca 660 /* If the argument of the PHI node is a constant, we do not need
b2ca91ff 661 to keep it inside loop. */
662 if (TREE_CODE (name) != SSA_NAME)
663 continue;
664
665 /* Otherwise create an auxiliary phi node that will copy the value
4fb5e5ca 666 of the SSA name out of the loop. */
b2ca91ff 667 new_name = duplicate_ssa_name (name, NULL);
dec41e98 668 new_phi = create_phi_node (new_name, bb);
60d535d2 669 add_phi_arg (new_phi, name, exit, locus);
dec41e98 670 SET_USE (op_p, new_name);
671 }
28c92cbb 672
673 return bb;
dec41e98 674}
675
dec41e98 676/* Returns the basic block in that statements should be emitted for induction
677 variables incremented at the end of the LOOP. */
678
679basic_block
680ip_end_pos (struct loop *loop)
681{
682 return loop->latch;
683}
684
685/* Returns the basic block in that statements should be emitted for induction
686 variables incremented just before exit condition of a LOOP. */
687
688basic_block
689ip_normal_pos (struct loop *loop)
690{
75a70cf9 691 gimple last;
dec41e98 692 basic_block bb;
693 edge exit;
694
ea091dfd 695 if (!single_pred_p (loop->latch))
dec41e98 696 return NULL;
697
ea091dfd 698 bb = single_pred (loop->latch);
dec41e98 699 last = last_stmt (bb);
d92f9312 700 if (!last
75a70cf9 701 || gimple_code (last) != GIMPLE_COND)
dec41e98 702 return NULL;
703
cd665a06 704 exit = EDGE_SUCC (bb, 0);
dec41e98 705 if (exit->dest == loop->latch)
cd665a06 706 exit = EDGE_SUCC (bb, 1);
dec41e98 707
708 if (flow_bb_inside_loop_p (loop, exit->dest))
709 return NULL;
710
711 return bb;
712}
713
714/* Stores the standard position for induction variable increment in LOOP
715 (just before the exit condition if it is available and latch block is empty,
716 end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if
717 the increment should be inserted after *BSI. */
718
719void
75a70cf9 720standard_iv_increment_position (struct loop *loop, gimple_stmt_iterator *bsi,
dec41e98 721 bool *insert_after)
722{
723 basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
75a70cf9 724 gimple last = last_stmt (latch);
dec41e98 725
726 if (!bb
75a70cf9 727 || (last && gimple_code (last) != GIMPLE_LABEL))
dec41e98 728 {
75a70cf9 729 *bsi = gsi_last_bb (latch);
dec41e98 730 *insert_after = true;
731 }
732 else
733 {
75a70cf9 734 *bsi = gsi_last_bb (bb);
dec41e98 735 *insert_after = false;
736 }
737}
e12d0591 738
739/* Copies phi node arguments for duplicated blocks. The index of the first
740 duplicated block is FIRST_NEW_BLOCK. */
741
742static void
743copy_phi_node_args (unsigned first_new_block)
744{
745 unsigned i;
746
fe672ac0 747 for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
f5a6b05f 748 BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
e12d0591 749
fe672ac0 750 for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
f5a6b05f 751 add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
e12d0591 752
fe672ac0 753 for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
f5a6b05f 754 BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
e12d0591 755}
756
e12d0591 757
095dcfa3 758/* The same as cfgloopmanip.c:duplicate_loop_to_header_edge, but also
759 updates the PHI nodes at start of the copied region. In order to
760 achieve this, only loops whose exits all lead to the same location
761 are handled.
e12d0591 762
095dcfa3 763 Notice that we do not completely update the SSA web after
764 duplication. The caller is responsible for calling update_ssa
765 after the loop has been duplicated. */
e12d0591 766
767bool
75a70cf9 768gimple_duplicate_loop_to_header_edge (struct loop *loop, edge e,
e12d0591 769 unsigned int ndupl, sbitmap wont_exit,
f1f41a6c 770 edge orig, vec<edge> *to_remove,
f3c40e6d 771 int flags)
e12d0591 772{
773 unsigned first_new_block;
e12d0591 774
f24ec26f 775 if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
e12d0591 776 return false;
f24ec26f 777 if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
e12d0591 778 return false;
779
fe672ac0 780 first_new_block = last_basic_block_for_fn (cfun);
7194de72 781 if (!duplicate_loop_to_header_edge (loop, e, ndupl, wont_exit,
f3c40e6d 782 orig, to_remove, flags))
e12d0591 783 return false;
784
785 /* Readd the removed phi args for e. */
44a46103 786 flush_pending_stmts (e);
e12d0591 787
788 /* Copy the phi node arguments. */
789 copy_phi_node_args (first_new_block);
790
e12d0591 791 scev_reset ();
e12d0591 792
793 return true;
794}
b30560de 795
b30560de 796/* Returns true if we can unroll LOOP FACTOR times. Number
797 of iterations of the loop is returned in NITER. */
798
799bool
800can_unroll_loop_p (struct loop *loop, unsigned factor,
801 struct tree_niter_desc *niter)
802{
803 edge exit;
804
805 /* Check whether unrolling is possible. We only want to unroll loops
806 for that we are able to determine number of iterations. We also
807 want to split the extra iterations of the loop from its end,
808 therefore we require that the loop has precisely one
809 exit. */
810
811 exit = single_dom_exit (loop);
812 if (!exit)
813 return false;
814
815 if (!number_of_iterations_exit (loop, exit, niter, false)
07392428 816 || niter->cmp == ERROR_MARK
817 /* Scalar evolutions analysis might have copy propagated
818 the abnormal ssa names into these expressions, hence
fa7637bd 819 emitting the computations based on them during loop
07392428 820 unrolling might create overlapping life ranges for
821 them, and failures in out-of-ssa. */
822 || contains_abnormal_ssa_name_p (niter->may_be_zero)
823 || contains_abnormal_ssa_name_p (niter->control.base)
824 || contains_abnormal_ssa_name_p (niter->control.step)
825 || contains_abnormal_ssa_name_p (niter->bound))
b30560de 826 return false;
827
828 /* And of course, we must be able to duplicate the loop. */
829 if (!can_duplicate_loop_p (loop))
830 return false;
831
832 /* The final loop should be small enough. */
bc8bb825 833 if (tree_num_loop_insns (loop, &eni_size_weights) * factor
b30560de 834 > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
835 return false;
836
837 return true;
838}
839
840/* Determines the conditions that control execution of LOOP unrolled FACTOR
841 times. DESC is number of iterations of LOOP. ENTER_COND is set to
842 condition that must be true if the main loop can be entered.
843 EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
844 how the exit from the unrolled loop should be controlled. */
845
846static void
847determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
848 unsigned factor, tree *enter_cond,
849 tree *exit_base, tree *exit_step,
850 enum tree_code *exit_cmp, tree *exit_bound)
851{
75a70cf9 852 gimple_seq stmts;
b30560de 853 tree base = desc->control.base;
854 tree step = desc->control.step;
855 tree bound = desc->bound;
e88bb328 856 tree type = TREE_TYPE (step);
b30560de 857 tree bigstep, delta;
858 tree min = lower_bound_in_type (type, type);
859 tree max = upper_bound_in_type (type, type);
860 enum tree_code cmp = desc->cmp;
861 tree cond = boolean_true_node, assum;
862
a0553bff 863 /* For pointers, do the arithmetics in the type of step. */
e88bb328 864 base = fold_convert (type, base);
865 bound = fold_convert (type, bound);
866
b30560de 867 *enter_cond = boolean_false_node;
868 *exit_base = NULL_TREE;
869 *exit_step = NULL_TREE;
870 *exit_cmp = ERROR_MARK;
871 *exit_bound = NULL_TREE;
872 gcc_assert (cmp != ERROR_MARK);
873
874 /* We only need to be correct when we answer question
875 "Do at least FACTOR more iterations remain?" in the unrolled loop.
876 Thus, transforming BASE + STEP * i <> BOUND to
877 BASE + STEP * i < BOUND is ok. */
878 if (cmp == NE_EXPR)
879 {
880 if (tree_int_cst_sign_bit (step))
881 cmp = GT_EXPR;
882 else
883 cmp = LT_EXPR;
884 }
885 else if (cmp == LT_EXPR)
886 {
887 gcc_assert (!tree_int_cst_sign_bit (step));
888 }
889 else if (cmp == GT_EXPR)
890 {
891 gcc_assert (tree_int_cst_sign_bit (step));
892 }
893 else
894 gcc_unreachable ();
895
896 /* The main body of the loop may be entered iff:
897
898 1) desc->may_be_zero is false.
899 2) it is possible to check that there are at least FACTOR iterations
900 of the loop, i.e., BOUND - step * FACTOR does not overflow.
901 3) # of iterations is at least FACTOR */
902
cd743a11 903 if (!integer_zerop (desc->may_be_zero))
b30560de 904 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
905 invert_truthvalue (desc->may_be_zero),
906 cond);
907
908 bigstep = fold_build2 (MULT_EXPR, type, step,
909 build_int_cst_type (type, factor));
910 delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
911 if (cmp == LT_EXPR)
912 assum = fold_build2 (GE_EXPR, boolean_type_node,
913 bound,
914 fold_build2 (PLUS_EXPR, type, min, delta));
915 else
916 assum = fold_build2 (LE_EXPR, boolean_type_node,
917 bound,
918 fold_build2 (PLUS_EXPR, type, max, delta));
919 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
920
921 bound = fold_build2 (MINUS_EXPR, type, bound, delta);
922 assum = fold_build2 (cmp, boolean_type_node, base, bound);
923 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
924
925 cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
926 if (stmts)
75a70cf9 927 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
b30560de 928 /* cond now may be a gimple comparison, which would be OK, but also any
929 other gimple rhs (say a && b). In this case we need to force it to
930 operand. */
931 if (!is_gimple_condexpr (cond))
932 {
933 cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
934 if (stmts)
75a70cf9 935 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
b30560de 936 }
937 *enter_cond = cond;
938
939 base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
940 if (stmts)
75a70cf9 941 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
b30560de 942 bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
943 if (stmts)
75a70cf9 944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
b30560de 945
946 *exit_base = base;
947 *exit_step = bigstep;
948 *exit_cmp = cmp;
949 *exit_bound = bound;
950}
951
611d2ac1 952/* Scales the frequencies of all basic blocks in LOOP that are strictly
953 dominated by BB by NUM/DEN. */
954
955static void
956scale_dominated_blocks_in_loop (struct loop *loop, basic_block bb,
957 int num, int den)
958{
959 basic_block son;
960
961 if (den == 0)
962 return;
963
964 for (son = first_dom_son (CDI_DOMINATORS, bb);
965 son;
966 son = next_dom_son (CDI_DOMINATORS, son))
967 {
968 if (!flow_bb_inside_loop_p (loop, son))
969 continue;
970 scale_bbs_frequencies_int (&son, 1, num, den);
971 scale_dominated_blocks_in_loop (loop, son, num, den);
972 }
973}
974
7194de72 975/* Unroll LOOP FACTOR times. DESC describes number of iterations of LOOP.
976 EXIT is the exit of the loop to that DESC corresponds.
977
b30560de 978 If N is number of iterations of the loop and MAY_BE_ZERO is the condition
979 under that loop exits in the first iteration even if N != 0,
48e1416a 980
b30560de 981 while (1)
982 {
983 x = phi (init, next);
984
985 pre;
986 if (st)
987 break;
988 post;
989 }
990
991 becomes (with possibly the exit conditions formulated a bit differently,
992 avoiding the need to create a new iv):
48e1416a 993
b30560de 994 if (MAY_BE_ZERO || N < FACTOR)
995 goto rest;
996
997 do
998 {
999 x = phi (init, next);
1000
1001 pre;
1002 post;
1003 pre;
1004 post;
1005 ...
1006 pre;
1007 post;
1008 N -= FACTOR;
48e1416a 1009
b30560de 1010 } while (N >= FACTOR);
1011
1012 rest:
1013 init' = phi (init, x);
1014
1015 while (1)
1016 {
1017 x = phi (init', next);
1018
1019 pre;
1020 if (st)
1021 break;
1022 post;
f6ae6f2a 1023 }
48e1416a 1024
f6ae6f2a 1025 Before the loop is unrolled, TRANSFORM is called for it (only for the
1026 unrolled loop, but not for its versioned copy). DATA is passed to
1027 TRANSFORM. */
b30560de 1028
7cef6c97 1029/* Probability in % that the unrolled loop is entered. Just a guess. */
1030#define PROB_UNROLLED_LOOP_ENTERED 90
1031
b30560de 1032void
f6ae6f2a 1033tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
1034 edge exit, struct tree_niter_desc *desc,
1035 transform_callback transform,
1036 void *data)
b30560de 1037{
1a91d914 1038 gcond *exit_if;
75a70cf9 1039 tree ctr_before, ctr_after;
b30560de 1040 tree enter_main_cond, exit_base, exit_step, exit_bound;
1041 enum tree_code exit_cmp;
1a91d914 1042 gphi *phi_old_loop, *phi_new_loop, *phi_rest;
1043 gphi_iterator psi_old_loop, psi_new_loop;
ec11736b 1044 tree init, next, new_init;
b30560de 1045 struct loop *new_loop;
1046 basic_block rest, exit_bb;
1047 edge old_entry, new_entry, old_latch, precond_edge, new_exit;
611d2ac1 1048 edge new_nonexit, e;
75a70cf9 1049 gimple_stmt_iterator bsi;
b30560de 1050 use_operand_p op;
1051 bool ok;
7cef6c97 1052 unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
611d2ac1 1053 unsigned new_est_niter, i, prob;
89c8b802 1054 unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
b30560de 1055 sbitmap wont_exit;
c2078b80 1056 auto_vec<edge> to_remove;
b30560de 1057
1058 est_niter = expected_loop_iterations (loop);
1059 determine_exit_conditions (loop, desc, factor,
1060 &enter_main_cond, &exit_base, &exit_step,
1061 &exit_cmp, &exit_bound);
1062
7cef6c97 1063 /* Let us assume that the unrolled loop is quite likely to be entered. */
1064 if (integer_nonzerop (enter_main_cond))
1065 prob_entry = REG_BR_PROB_BASE;
1066 else
1067 prob_entry = PROB_UNROLLED_LOOP_ENTERED * REG_BR_PROB_BASE / 100;
1068
1069 /* The values for scales should keep profile consistent, and somewhat close
1070 to correct.
1071
1072 TODO: The current value of SCALE_REST makes it appear that the loop that
1073 is created by splitting the remaining iterations of the unrolled loop is
1074 executed the same number of times as the original loop, and with the same
1075 frequencies, which is obviously wrong. This does not appear to cause
1076 problems, so we do not bother with fixing it for now. To make the profile
1077 correct, we would need to change the probability of the exit edge of the
1078 loop, and recompute the distribution of frequencies in its body because
1079 of this change (scale the frequencies of blocks before and after the exit
1080 by appropriate factors). */
1081 scale_unrolled = prob_entry;
1082 scale_rest = REG_BR_PROB_BASE;
1083
1084 new_loop = loop_version (loop, enter_main_cond, NULL,
1085 prob_entry, scale_unrolled, scale_rest, true);
b30560de 1086 gcc_assert (new_loop != NULL);
1087 update_ssa (TODO_update_ssa);
1088
f6ae6f2a 1089 /* Determine the probability of the exit edge of the unrolled loop. */
7cef6c97 1090 new_est_niter = est_niter / factor;
1091
1092 /* Without profile feedback, loops for that we do not know a better estimate
1093 are assumed to roll 10 times. When we unroll such loop, it appears to
1094 roll too little, and it may even seem to be cold. To avoid this, we
1095 ensure that the created loop appears to roll at least 5 times (but at
1096 most as many times as before unrolling). */
1097 if (new_est_niter < 5)
1098 {
1099 if (est_niter < 5)
1100 new_est_niter = est_niter;
1101 else
1102 new_est_niter = 5;
1103 }
1104
611d2ac1 1105 /* Prepare the cfg and update the phi nodes. Move the loop exit to the
1106 loop latch (and make its condition dummy, for the moment). */
b30560de 1107 rest = loop_preheader_edge (new_loop)->src;
1108 precond_edge = single_pred_edge (rest);
88e6f696 1109 split_edge (loop_latch_edge (loop));
b30560de 1110 exit_bb = single_pred (loop->latch);
1111
611d2ac1 1112 /* Since the exit edge will be removed, the frequency of all the blocks
1113 in the loop that are dominated by it must be scaled by
1114 1 / (1 - exit->probability). */
1115 scale_dominated_blocks_in_loop (loop, exit->src,
1116 REG_BR_PROB_BASE,
1117 REG_BR_PROB_BASE - exit->probability);
1118
75a70cf9 1119 bsi = gsi_last_bb (exit_bb);
1120 exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
1121 integer_zero_node,
1122 NULL_TREE, NULL_TREE);
63f88450 1123
75a70cf9 1124 gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
89c8b802 1125 new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
dce58e66 1126 rescan_loop_exit (new_exit, true, false);
611d2ac1 1127
1128 /* Set the probability of new exit to the same of the old one. Fix
1129 the frequency of the latch block, by scaling it back by
1130 1 - exit->probability. */
1131 new_exit->count = exit->count;
1132 new_exit->probability = exit->probability;
b30560de 1133 new_nonexit = single_pred_edge (loop->latch);
611d2ac1 1134 new_nonexit->probability = REG_BR_PROB_BASE - exit->probability;
b30560de 1135 new_nonexit->flags = EDGE_TRUE_VALUE;
611d2ac1 1136 new_nonexit->count -= exit->count;
1137 if (new_nonexit->count < 0)
1138 new_nonexit->count = 0;
1139 scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
1140 REG_BR_PROB_BASE);
b30560de 1141
1142 old_entry = loop_preheader_edge (loop);
1143 new_entry = loop_preheader_edge (new_loop);
1144 old_latch = loop_latch_edge (loop);
75a70cf9 1145 for (psi_old_loop = gsi_start_phis (loop->header),
1146 psi_new_loop = gsi_start_phis (new_loop->header);
1147 !gsi_end_p (psi_old_loop);
1148 gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
b30560de 1149 {
1a91d914 1150 phi_old_loop = psi_old_loop.phi ();
1151 phi_new_loop = psi_new_loop.phi ();
75a70cf9 1152
b30560de 1153 init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
1154 op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
1155 gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
1156 next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
1157
1158 /* Prefer using original variable as a base for the new ssa name.
1159 This is necessary for virtual ops, and useful in order to avoid
1160 losing debug info for real ops. */
b34a77f7 1161 if (TREE_CODE (next) == SSA_NAME
1162 && useless_type_conversion_p (TREE_TYPE (next),
1163 TREE_TYPE (init)))
f9e245b2 1164 new_init = copy_ssa_name (next);
b34a77f7 1165 else if (TREE_CODE (init) == SSA_NAME
1166 && useless_type_conversion_p (TREE_TYPE (init),
1167 TREE_TYPE (next)))
f9e245b2 1168 new_init = copy_ssa_name (init);
b34a77f7 1169 else if (useless_type_conversion_p (TREE_TYPE (next), TREE_TYPE (init)))
ec11736b 1170 new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, "unrinittmp");
b30560de 1171 else
ec11736b 1172 new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, "unrinittmp");
b30560de 1173
b30560de 1174 phi_rest = create_phi_node (new_init, rest);
b30560de 1175
60d535d2 1176 add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
1177 add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
b30560de 1178 SET_USE (op, new_init);
1179 }
1180
611d2ac1 1181 remove_path (exit);
1182
f6ae6f2a 1183 /* Transform the loop. */
1184 if (transform)
1185 (*transform) (loop, data);
1186
611d2ac1 1187 /* Unroll the loop and remove the exits in all iterations except for the
1188 last one. */
f6ae6f2a 1189 wont_exit = sbitmap_alloc (factor);
53c5d9d4 1190 bitmap_ones (wont_exit);
08b7917c 1191 bitmap_clear_bit (wont_exit, factor - 1);
611d2ac1 1192
75a70cf9 1193 ok = gimple_duplicate_loop_to_header_edge
f6ae6f2a 1194 (loop, loop_latch_edge (loop), factor - 1,
611d2ac1 1195 wont_exit, new_exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ);
f6ae6f2a 1196 free (wont_exit);
1197 gcc_assert (ok);
611d2ac1 1198
f1f41a6c 1199 FOR_EACH_VEC_ELT (to_remove, i, e)
611d2ac1 1200 {
1201 ok = remove_path (e);
1202 gcc_assert (ok);
1203 }
f6ae6f2a 1204 update_ssa (TODO_update_ssa);
1205
1206 /* Ensure that the frequencies in the loop match the new estimated
1207 number of iterations, and change the probability of the new
1208 exit edge. */
1209 freq_h = loop->header->frequency;
1210 freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
1211 if (freq_h != 0)
1212 scale_loop_frequencies (loop, freq_e * (new_est_niter + 1), freq_h);
1213
1214 exit_bb = single_pred (loop->latch);
1215 new_exit = find_edge (exit_bb, rest);
1216 new_exit->count = loop_preheader_edge (loop)->count;
1217 new_exit->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
1218
1219 rest->count += new_exit->count;
1220 rest->frequency += EDGE_FREQUENCY (new_exit);
1221
1222 new_nonexit = single_pred_edge (loop->latch);
611d2ac1 1223 prob = new_nonexit->probability;
f6ae6f2a 1224 new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
611d2ac1 1225 new_nonexit->count = exit_bb->count - new_exit->count;
1226 if (new_nonexit->count < 0)
1227 new_nonexit->count = 0;
be429d54 1228 if (prob > 0)
1229 scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
1230 prob);
f6ae6f2a 1231
b30560de 1232 /* Finally create the new counter for number of iterations and add the new
1233 exit instruction. */
320e3d05 1234 bsi = gsi_last_nondebug_bb (exit_bb);
1a91d914 1235 exit_if = as_a <gcond *> (gsi_stmt (bsi));
b30560de 1236 create_iv (exit_base, exit_step, NULL_TREE, loop,
f6ae6f2a 1237 &bsi, false, &ctr_before, &ctr_after);
75a70cf9 1238 gimple_cond_set_code (exit_if, exit_cmp);
1239 gimple_cond_set_lhs (exit_if, ctr_after);
1240 gimple_cond_set_rhs (exit_if, exit_bound);
f6ae6f2a 1241 update_stmt (exit_if);
b30560de 1242
56743459 1243#ifdef ENABLE_CHECKING
b30560de 1244 verify_flow_info ();
7194de72 1245 verify_loop_structure ();
ca77c6ec 1246 verify_loop_closed_ssa (true);
56743459 1247#endif
b30560de 1248}
f6ae6f2a 1249
1250/* Wrapper over tree_transform_and_unroll_loop for case we do not
1251 want to transform the loop before unrolling. The meaning
1252 of the arguments is the same as for tree_transform_and_unroll_loop. */
1253
1254void
1255tree_unroll_loop (struct loop *loop, unsigned factor,
1256 edge exit, struct tree_niter_desc *desc)
1257{
1258 tree_transform_and_unroll_loop (loop, factor, exit, desc,
1259 NULL, NULL);
1260}
5fa90eea 1261
1262/* Rewrite the phi node at position PSI in function of the main
1263 induction variable MAIN_IV and insert the generated code at GSI. */
1264
1265static void
1266rewrite_phi_with_iv (loop_p loop,
1a91d914 1267 gphi_iterator *psi,
5fa90eea 1268 gimple_stmt_iterator *gsi,
1269 tree main_iv)
1270{
1271 affine_iv iv;
1a91d914 1272 gassign *stmt;
1273 gphi *phi = psi->phi ();
5fa90eea 1274 tree atype, mtype, val, res = PHI_RESULT (phi);
1275
7c782c9b 1276 if (virtual_operand_p (res) || res == main_iv)
5fa90eea 1277 {
1278 gsi_next (psi);
1279 return;
1280 }
1281
1282 if (!simple_iv (loop, loop, res, &iv, true))
1283 {
1284 gsi_next (psi);
1285 return;
1286 }
1287
1288 remove_phi_node (psi, false);
1289
1290 atype = TREE_TYPE (res);
1291 mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1292 val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1293 fold_convert (mtype, main_iv));
1294 val = fold_build2 (POINTER_TYPE_P (atype)
1295 ? POINTER_PLUS_EXPR : PLUS_EXPR,
1296 atype, unshare_expr (iv.base), val);
1297 val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
1298 GSI_SAME_STMT);
1299 stmt = gimple_build_assign (res, val);
1300 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
5fa90eea 1301}
1302
1303/* Rewrite all the phi nodes of LOOP in function of the main induction
1304 variable MAIN_IV. */
1305
1306static void
1307rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
1308{
1309 unsigned i;
1310 basic_block *bbs = get_loop_body_in_dom_order (loop);
1a91d914 1311 gphi_iterator psi;
5fa90eea 1312
1313 for (i = 0; i < loop->num_nodes; i++)
1314 {
1315 basic_block bb = bbs[i];
1316 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1317
1318 if (bb->loop_father != loop)
1319 continue;
1320
1321 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
1322 rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
1323 }
1324
1325 free (bbs);
1326}
1327
1328/* Bases all the induction variables in LOOP on a single induction
1329 variable (unsigned with base 0 and step 1), whose final value is
1330 compared with *NIT. When the IV type precision has to be larger
1331 than *NIT type precision, *NIT is converted to the larger type, the
1332 conversion code is inserted before the loop, and *NIT is updated to
0207206d 1333 the new definition. When BUMP_IN_LATCH is true, the induction
1334 variable is incremented in the loop latch, otherwise it is
1335 incremented in the loop header. Return the induction variable that
1336 was created. */
5fa90eea 1337
1338tree
0207206d 1339canonicalize_loop_ivs (struct loop *loop, tree *nit, bool bump_in_latch)
5fa90eea 1340{
1341 unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
1342 unsigned original_precision = precision;
1343 tree type, var_before;
1a91d914 1344 gimple_stmt_iterator gsi;
1345 gphi_iterator psi;
1346 gcond *stmt;
5fa90eea 1347 edge exit = single_dom_exit (loop);
1348 gimple_seq stmts;
3754d046 1349 machine_mode mode;
38a66497 1350 bool unsigned_p = false;
5fa90eea 1351
1352 for (psi = gsi_start_phis (loop->header);
1353 !gsi_end_p (psi); gsi_next (&psi))
1354 {
1a91d914 1355 gphi *phi = psi.phi ();
5fa90eea 1356 tree res = PHI_RESULT (phi);
38a66497 1357 bool uns;
5fa90eea 1358
38a66497 1359 type = TREE_TYPE (res);
7c782c9b 1360 if (virtual_operand_p (res)
38a66497 1361 || (!INTEGRAL_TYPE_P (type)
1362 && !POINTER_TYPE_P (type))
1363 || TYPE_PRECISION (type) < precision)
1364 continue;
1365
1366 uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
1367
1368 if (TYPE_PRECISION (type) > precision)
1369 unsigned_p = uns;
1370 else
1371 unsigned_p |= uns;
1372
1373 precision = TYPE_PRECISION (type);
5fa90eea 1374 }
1375
38a66497 1376 mode = smallest_mode_for_size (precision, MODE_INT);
1377 precision = GET_MODE_PRECISION (mode);
1378 type = build_nonstandard_integer_type (precision, unsigned_p);
5fa90eea 1379
1380 if (original_precision != precision)
1381 {
1382 *nit = fold_convert (type, *nit);
1383 *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
1384 if (stmts)
1385 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1386 }
1387
149c96e8 1388 if (bump_in_latch)
1389 gsi = gsi_last_bb (loop->latch);
1390 else
1391 gsi = gsi_last_nondebug_bb (loop->header);
5fa90eea 1392 create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
0207206d 1393 loop, &gsi, bump_in_latch, &var_before, NULL);
5fa90eea 1394
1395 rewrite_all_phi_nodes_with_iv (loop, var_before);
1396
1a91d914 1397 stmt = as_a <gcond *> (last_stmt (exit->src));
5fa90eea 1398 /* Make the loop exit if the control condition is not satisfied. */
1399 if (exit->flags & EDGE_TRUE_VALUE)
1400 {
1401 edge te, fe;
1402
1403 extract_true_false_edges_from_block (exit->src, &te, &fe);
1404 te->flags = EDGE_FALSE_VALUE;
1405 fe->flags = EDGE_TRUE_VALUE;
1406 }
1407 gimple_cond_set_code (stmt, LT_EXPR);
1408 gimple_cond_set_lhs (stmt, var_before);
1409 gimple_cond_set_rhs (stmt, *nit);
1410 update_stmt (stmt);
1411
1412 return var_before;
1413}