]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivcanon.c
Optimize inserting value_type into std::vector
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
CommitLineData
84eb345f 1/* Induction variable canonicalization and loop peeling.
f1717362 2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
48e1416a 3
bb445479 4This file is part of GCC.
48e1416a 5
bb445479 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
bb445479 9later version.
48e1416a 10
bb445479 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
bb445479 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bb445479 19
20/* This pass detects the loops that iterate a constant number of times,
48e1416a 21 adds a canonical induction variable (step -1, tested against 0)
bb445479 22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
24
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
29 to pay up.
30
c836de3f 31 We also perform
4cf494ec 32 - complete unrolling (or peeling) when the loops is rolling few enough
c836de3f 33 times
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
36 info). */
bb445479 37
38#include "config.h"
39#include "system.h"
40#include "coretypes.h"
9ef16211 41#include "backend.h"
bb445479 42#include "tree.h"
9ef16211 43#include "gimple.h"
7c29e30e 44#include "cfghooks.h"
45#include "tree-pass.h"
9ef16211 46#include "ssa.h"
7c29e30e 47#include "cgraph.h"
48#include "gimple-pretty-print.h"
b20a8bb4 49#include "fold-const.h"
886c1262 50#include "profile.h"
bc61cadb 51#include "gimple-fold.h"
52#include "tree-eh.h"
dcf1a1ec 53#include "gimple-iterator.h"
073c1fd5 54#include "tree-cfg.h"
05d9c18a 55#include "tree-ssa-loop-manip.h"
56#include "tree-ssa-loop-niter.h"
073c1fd5 57#include "tree-ssa-loop.h"
58#include "tree-into-ssa.h"
bb445479 59#include "cfgloop.h"
bb445479 60#include "tree-chrec.h"
61#include "tree-scalar-evolution.h"
62#include "params.h"
bb445479 63#include "tree-inline.h"
424a4a92 64#include "tree-cfgcleanup.h"
f7715905 65#include "builtins.h"
bb445479 66
604f7b8a 67/* Specifies types of loops that may be unrolled. */
68
69enum unroll_level
70{
6414dd4c 71 UL_SINGLE_ITER, /* Only loops that exit immediately in the first
604f7b8a 72 iteration. */
73 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
74 of code size. */
75 UL_ALL /* All suitable loops. */
76};
77
bb445479 78/* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
79 is the exit edge whose condition is replaced. */
80
81static void
82create_canonical_iv (struct loop *loop, edge exit, tree niter)
83{
84 edge in;
75a70cf9 85 tree type, var;
1a91d914 86 gcond *cond;
75a70cf9 87 gimple_stmt_iterator incr_at;
bb445479 88 enum tree_code cmp;
89
90 if (dump_file && (dump_flags & TDF_DETAILS))
91 {
92 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
93 print_generic_expr (dump_file, niter, TDF_SLIM);
94 fprintf (dump_file, " iterations.\n");
95 }
96
1a91d914 97 cond = as_a <gcond *> (last_stmt (exit->src));
cd665a06 98 in = EDGE_SUCC (exit->src, 0);
bb445479 99 if (in == exit)
cd665a06 100 in = EDGE_SUCC (exit->src, 1);
bb445479 101
102 /* Note that we do not need to worry about overflows, since
103 type of niter is always unsigned and all comparisons are
104 just for equality/nonequality -- i.e. everything works
105 with a modulo arithmetics. */
106
107 type = TREE_TYPE (niter);
49d00087 108 niter = fold_build2 (PLUS_EXPR, type,
109 niter,
110 build_int_cst (type, 1));
75a70cf9 111 incr_at = gsi_last_bb (in->src);
bb445479 112 create_iv (niter,
3c6185f1 113 build_int_cst (type, -1),
bb445479 114 NULL_TREE, loop,
115 &incr_at, false, NULL, &var);
116
117 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
75a70cf9 118 gimple_cond_set_code (cond, cmp);
119 gimple_cond_set_lhs (cond, var);
120 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
22aa74c4 121 update_stmt (cond);
bb445479 122}
123
aa2ba534 124/* Describe size of loop as detected by tree_estimate_loop_size. */
125struct loop_size
126{
127 /* Number of instructions in the loop. */
128 int overall;
129
130 /* Number of instructions that will be likely optimized out in
131 peeled iterations of loop (i.e. computation based on induction
132 variable where induction variable starts at known constant.) */
133 int eliminated_by_peeling;
134
135 /* Same statistics for last iteration of loop: it is smaller because
136 instructions after exit are not executed. */
137 int last_iteration;
138 int last_iteration_eliminated_by_peeling;
d583c979 139
140 /* If some IV computation will become constant. */
141 bool constant_iv;
142
143 /* Number of call stmts that are not a builtin and are pure or const
144 present on the hot path. */
145 int num_pure_calls_on_hot_path;
146 /* Number of call stmts that are not a builtin and are not pure nor const
147 present on the hot path. */
148 int num_non_pure_calls_on_hot_path;
149 /* Number of statements other than calls in the loop. */
150 int non_call_stmts_on_hot_path;
151 /* Number of branches seen on the hot path. */
152 int num_branches_on_hot_path;
aa2ba534 153};
154
155/* Return true if OP in STMT will be constant after peeling LOOP. */
156
157static bool
42acab1c 158constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
aa2ba534 159{
160 affine_iv iv;
161
162 if (is_gimple_min_invariant (op))
163 return true;
48e1416a 164
aa2ba534 165 /* We can still fold accesses to constant arrays when index is known. */
166 if (TREE_CODE (op) != SSA_NAME)
167 {
168 tree base = op;
169
170 /* First make fast look if we see constant array inside. */
171 while (handled_component_p (base))
172 base = TREE_OPERAND (base, 0);
248022b2 173 if ((DECL_P (base)
df8d3e89 174 && ctor_for_folding (base) != error_mark_node)
aa2ba534 175 || CONSTANT_CLASS_P (base))
176 {
177 /* If so, see if we understand all the indices. */
178 base = op;
179 while (handled_component_p (base))
180 {
181 if (TREE_CODE (base) == ARRAY_REF
182 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
183 return false;
184 base = TREE_OPERAND (base, 0);
185 }
186 return true;
187 }
188 return false;
189 }
190
191 /* Induction variables are constants. */
192 if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
193 return false;
194 if (!is_gimple_min_invariant (iv.base))
195 return false;
196 if (!is_gimple_min_invariant (iv.step))
197 return false;
198 return true;
199}
200
d583c979 201/* Computes an estimated number of insns in LOOP.
202 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
203 iteration of the loop.
204 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
205 of loop.
84eb345f 206 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
04437ab6 207 Stop estimating after UPPER_BOUND is met. Return true in this case. */
aa2ba534 208
84eb345f 209static bool
f18de397 210tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
211 struct loop_size *size, int upper_bound)
aa2ba534 212{
213 basic_block *body = get_loop_body (loop);
214 gimple_stmt_iterator gsi;
215 unsigned int i;
216 bool after_exit;
f1f41a6c 217 vec<basic_block> path = get_loop_hot_path (loop);
aa2ba534 218
219 size->overall = 0;
220 size->eliminated_by_peeling = 0;
221 size->last_iteration = 0;
222 size->last_iteration_eliminated_by_peeling = 0;
d583c979 223 size->num_pure_calls_on_hot_path = 0;
224 size->num_non_pure_calls_on_hot_path = 0;
225 size->non_call_stmts_on_hot_path = 0;
226 size->num_branches_on_hot_path = 0;
227 size->constant_iv = 0;
aa2ba534 228
229 if (dump_file && (dump_flags & TDF_DETAILS))
230 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
231 for (i = 0; i < loop->num_nodes; i++)
232 {
c790d986 233 if (edge_to_cancel && body[i] != edge_to_cancel->src
234 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
aa2ba534 235 after_exit = true;
236 else
237 after_exit = false;
238 if (dump_file && (dump_flags & TDF_DETAILS))
f18de397 239 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
240 after_exit);
aa2ba534 241
242 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
243 {
42acab1c 244 gimple *stmt = gsi_stmt (gsi);
aa2ba534 245 int num = estimate_num_insns (stmt, &eni_size_weights);
246 bool likely_eliminated = false;
d583c979 247 bool likely_eliminated_last = false;
248 bool likely_eliminated_peeled = false;
aa2ba534 249
250 if (dump_file && (dump_flags & TDF_DETAILS))
251 {
252 fprintf (dump_file, " size: %3i ", num);
253 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
254 }
255
256 /* Look for reasons why we might optimize this stmt away. */
257
ae8a8b85 258 if (gimple_has_side_effects (stmt))
259 ;
aa2ba534 260 /* Exit conditional. */
ae8a8b85 261 else if (exit && body[i] == exit->src
d583c979 262 && stmt == last_stmt (exit->src))
aa2ba534 263 {
264 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 265 fprintf (dump_file, " Exit condition will be eliminated "
266 "in peeled copies.\n");
267 likely_eliminated_peeled = true;
268 }
269 else if (edge_to_cancel && body[i] == edge_to_cancel->src
270 && stmt == last_stmt (edge_to_cancel->src))
271 {
272 if (dump_file && (dump_flags & TDF_DETAILS))
273 fprintf (dump_file, " Exit condition will be eliminated "
274 "in last copy.\n");
275 likely_eliminated_last = true;
aa2ba534 276 }
277 /* Sets of IV variables */
278 else if (gimple_code (stmt) == GIMPLE_ASSIGN
279 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
280 {
281 if (dump_file && (dump_flags & TDF_DETAILS))
282 fprintf (dump_file, " Induction variable computation will"
283 " be folded away.\n");
284 likely_eliminated = true;
285 }
286 /* Assignments of IV variables. */
287 else if (gimple_code (stmt) == GIMPLE_ASSIGN
288 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
f18de397 289 && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,
290 loop)
aa2ba534 291 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
292 || constant_after_peeling (gimple_assign_rhs2 (stmt),
293 stmt, loop)))
294 {
d583c979 295 size->constant_iv = true;
aa2ba534 296 if (dump_file && (dump_flags & TDF_DETAILS))
f18de397 297 fprintf (dump_file,
298 " Constant expression will be folded away.\n");
aa2ba534 299 likely_eliminated = true;
300 }
301 /* Conditionals. */
d583c979 302 else if ((gimple_code (stmt) == GIMPLE_COND
f18de397 303 && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
304 loop)
305 && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
306 loop)
93505d22 307 /* We don't simplify all constant compares so make sure
308 they are not both constant already. See PR70288. */
309 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
310 || ! is_gimple_min_invariant (gimple_cond_rhs (stmt))))
d583c979 311 || (gimple_code (stmt) == GIMPLE_SWITCH
1a91d914 312 && constant_after_peeling (gimple_switch_index (
313 as_a <gswitch *> (stmt)),
93505d22 314 stmt, loop)
f18de397 315 && ! is_gimple_min_invariant
316 (gimple_switch_index (as_a <gswitch *> (stmt)))))
aa2ba534 317 {
318 if (dump_file && (dump_flags & TDF_DETAILS))
319 fprintf (dump_file, " Constant conditional.\n");
320 likely_eliminated = true;
321 }
322
323 size->overall += num;
d583c979 324 if (likely_eliminated || likely_eliminated_peeled)
aa2ba534 325 size->eliminated_by_peeling += num;
326 if (!after_exit)
327 {
328 size->last_iteration += num;
d583c979 329 if (likely_eliminated || likely_eliminated_last)
aa2ba534 330 size->last_iteration_eliminated_by_peeling += num;
331 }
84eb345f 332 if ((size->overall * 3 / 2 - size->eliminated_by_peeling
333 - size->last_iteration_eliminated_by_peeling) > upper_bound)
334 {
335 free (body);
04437ab6 336 path.release ();
84eb345f 337 return true;
338 }
aa2ba534 339 }
340 }
f1f41a6c 341 while (path.length ())
d583c979 342 {
f1f41a6c 343 basic_block bb = path.pop ();
d583c979 344 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
345 {
42acab1c 346 gimple *stmt = gsi_stmt (gsi);
f18de397 347 if (gimple_code (stmt) == GIMPLE_CALL
348 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
d583c979 349 {
350 int flags = gimple_call_flags (stmt);
f18de397 351 if (flags & (ECF_PURE | ECF_CONST))
d583c979 352 size->num_pure_calls_on_hot_path++;
353 else
354 size->num_non_pure_calls_on_hot_path++;
355 size->num_branches_on_hot_path ++;
356 }
f18de397 357 /* Count inexpensive calls as non-calls, because they will likely
358 expand inline. */
359 else if (gimple_code (stmt) != GIMPLE_DEBUG)
d583c979 360 size->non_call_stmts_on_hot_path++;
361 if (((gimple_code (stmt) == GIMPLE_COND
362 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
f18de397 363 || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
364 loop)))
d583c979 365 || (gimple_code (stmt) == GIMPLE_SWITCH
1a91d914 366 && !constant_after_peeling (gimple_switch_index (
367 as_a <gswitch *> (stmt)),
368 stmt, loop)))
d583c979 369 && (!exit || bb != exit->src))
370 size->num_branches_on_hot_path++;
371 }
372 }
f1f41a6c 373 path.release ();
aa2ba534 374 if (dump_file && (dump_flags & TDF_DETAILS))
375 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
376 size->eliminated_by_peeling, size->last_iteration,
377 size->last_iteration_eliminated_by_peeling);
48e1416a 378
aa2ba534 379 free (body);
84eb345f 380 return false;
aa2ba534 381}
604f7b8a 382
aa2ba534 383/* Estimate number of insns of completely unrolled loop.
384 It is (NUNROLL + 1) * size of loop body with taking into account
385 the fact that in last copy everything after exit conditional
386 is dead and that some instructions will be eliminated after
387 peeling.
604f7b8a 388
c31fb425 389 Loop body is likely going to simplify further, this is difficult
aa2ba534 390 to guess, we just decrease the result by 1/3. */
604f7b8a 391
392static unsigned HOST_WIDE_INT
aa2ba534 393estimated_unrolled_size (struct loop_size *size,
604f7b8a 394 unsigned HOST_WIDE_INT nunroll)
395{
aa2ba534 396 HOST_WIDE_INT unr_insns = ((nunroll)
397 * (HOST_WIDE_INT) (size->overall
398 - size->eliminated_by_peeling));
399 if (!nunroll)
400 unr_insns = 0;
401 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
402
403 unr_insns = unr_insns * 2 / 3;
604f7b8a 404 if (unr_insns <= 0)
405 unr_insns = 1;
604f7b8a 406
407 return unr_insns;
408}
409
c790d986 410/* Loop LOOP is known to not loop. See if there is an edge in the loop
411 body that can be remove to make the loop to always exit and at
412 the same time it does not make any code potentially executed
413 during the last iteration dead.
414
4cf494ec 415 After complete unrolling we still may get rid of the conditional
c790d986 416 on the exit in the last copy even if we have no idea what it does.
417 This is quite common case for loops of form
418
419 int a[5];
420 for (i=0;i<b;i++)
421 a[i]=0;
422
423 Here we prove the loop to iterate 5 times but we do not know
424 it from induction variable.
425
426 For now we handle only simple case where there is exit condition
427 just before the latch block and the latch block contains no statements
428 with side effect that may otherwise terminate the execution of loop
429 (such as by EH or by terminating the program or longjmp).
430
431 In the general case we may want to cancel the paths leading to statements
432 loop-niter identified as having undefined effect in the last iteration.
433 The other cases are hopefully rare and will be cleaned up later. */
434
f86b328b 435static edge
c790d986 436loop_edge_to_cancel (struct loop *loop)
437{
f1f41a6c 438 vec<edge> exits;
c790d986 439 unsigned i;
440 edge edge_to_cancel;
441 gimple_stmt_iterator gsi;
442
443 /* We want only one predecestor of the loop. */
444 if (EDGE_COUNT (loop->latch->preds) > 1)
445 return NULL;
446
447 exits = get_loop_exit_edges (loop);
448
f1f41a6c 449 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
c790d986 450 {
451 /* Find the other edge than the loop exit
452 leaving the conditoinal. */
453 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
454 continue;
455 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
456 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
457 else
458 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
459
248022b2 460 /* We only can handle conditionals. */
461 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
462 continue;
463
c790d986 464 /* We should never have conditionals in the loop latch. */
465 gcc_assert (edge_to_cancel->dest != loop->header);
466
467 /* Check that it leads to loop latch. */
468 if (edge_to_cancel->dest != loop->latch)
469 continue;
470
f1f41a6c 471 exits.release ();
c790d986 472
473 /* Verify that the code in loop latch does nothing that may end program
474 execution without really reaching the exit. This may include
475 non-pure/const function calls, EH statements, volatile ASMs etc. */
476 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
477 if (gimple_has_side_effects (gsi_stmt (gsi)))
478 return NULL;
479 return edge_to_cancel;
480 }
f1f41a6c 481 exits.release ();
c790d986 482 return NULL;
483}
484
72276d01 485/* Remove all tests for exits that are known to be taken after LOOP was
486 peeled NPEELED times. Put gcc_unreachable before every statement
487 known to not be executed. */
488
489static bool
490remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
491{
492 struct nb_iter_bound *elt;
493 bool changed = false;
494
495 for (elt = loop->bounds; elt; elt = elt->next)
496 {
497 /* If statement is known to be undefined after peeling, turn it
498 into unreachable (or trap when debugging experience is supposed
499 to be good). */
500 if (!elt->is_exit
796b6678 501 && wi::ltu_p (elt->bound, npeeled))
72276d01 502 {
503 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
1a91d914 504 gcall *stmt = gimple_build_call
72276d01 505 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
72276d01 506 gimple_set_location (stmt, gimple_location (elt->stmt));
507 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3b43af65 508 split_block (gimple_bb (stmt), stmt);
72276d01 509 changed = true;
510 if (dump_file && (dump_flags & TDF_DETAILS))
511 {
512 fprintf (dump_file, "Forced statement unreachable: ");
513 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
514 }
515 }
516 /* If we know the exit will be taken after peeling, update. */
517 else if (elt->is_exit
796b6678 518 && wi::leu_p (elt->bound, npeeled))
72276d01 519 {
520 basic_block bb = gimple_bb (elt->stmt);
521 edge exit_edge = EDGE_SUCC (bb, 0);
522
523 if (dump_file && (dump_flags & TDF_DETAILS))
524 {
525 fprintf (dump_file, "Forced exit to be taken: ");
526 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
527 }
528 if (!loop_exit_edge_p (loop, exit_edge))
529 exit_edge = EDGE_SUCC (bb, 1);
530 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
1a91d914 531 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
72276d01 532 if (exit_edge->flags & EDGE_TRUE_VALUE)
1a91d914 533 gimple_cond_make_true (cond_stmt);
72276d01 534 else
1a91d914 535 gimple_cond_make_false (cond_stmt);
536 update_stmt (cond_stmt);
72276d01 537 changed = true;
538 }
539 }
540 return changed;
541}
542
543/* Remove all exits that are known to be never taken because of the loop bound
544 discovered. */
545
546static bool
547remove_redundant_iv_tests (struct loop *loop)
548{
549 struct nb_iter_bound *elt;
550 bool changed = false;
551
552 if (!loop->any_upper_bound)
553 return false;
554 for (elt = loop->bounds; elt; elt = elt->next)
555 {
556 /* Exit is pointless if it won't be taken before loop reaches
557 upper bound. */
558 if (elt->is_exit && loop->any_upper_bound
796b6678 559 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
72276d01 560 {
561 basic_block bb = gimple_bb (elt->stmt);
562 edge exit_edge = EDGE_SUCC (bb, 0);
563 struct tree_niter_desc niter;
564
565 if (!loop_exit_edge_p (loop, exit_edge))
566 exit_edge = EDGE_SUCC (bb, 1);
567
568 /* Only when we know the actual number of iterations, not
569 just a bound, we can remove the exit. */
570 if (!number_of_iterations_exit (loop, exit_edge,
3a690dce 571 &niter, false, false)
572 || !integer_onep (niter.assumptions)
72276d01 573 || !integer_zerop (niter.may_be_zero)
574 || !niter.niter
575 || TREE_CODE (niter.niter) != INTEGER_CST
cd9b5516 576 || !wi::ltu_p (loop->nb_iterations_upper_bound,
577 wi::to_widest (niter.niter)))
72276d01 578 continue;
579
580 if (dump_file && (dump_flags & TDF_DETAILS))
581 {
582 fprintf (dump_file, "Removed pointless exit: ");
583 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
584 }
1a91d914 585 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
72276d01 586 if (exit_edge->flags & EDGE_TRUE_VALUE)
1a91d914 587 gimple_cond_make_false (cond_stmt);
72276d01 588 else
1a91d914 589 gimple_cond_make_true (cond_stmt);
590 update_stmt (cond_stmt);
72276d01 591 changed = true;
592 }
593 }
594 return changed;
595}
596
0cfe7a23 597/* Stores loops that will be unlooped and edges that will be removed
598 after we process whole loop tree. */
f1f41a6c 599static vec<loop_p> loops_to_unloop;
600static vec<int> loops_to_unloop_nunroll;
0cfe7a23 601static vec<edge> edges_to_remove;
b96f8145 602/* Stores loops that has been peeled. */
603static bitmap peeled_loops;
72276d01 604
605/* Cancel all fully unrolled loops by putting __builtin_unreachable
606 on the latch edge.
607 We do it after all unrolling since unlooping moves basic blocks
608 across loop boundaries trashing loop closed SSA form as well
609 as SCEV info needed to be intact during unrolling.
610
c790d986 611 IRRED_INVALIDATED is used to bookkeep if information about
612 irreducible regions may become invalid as a result
9f0ac045 613 of the transformation.
614 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
615 when we need to go into loop closed SSA form. */
bb445479 616
f86b328b 617static void
72276d01 618unloop_loops (bitmap loop_closed_ssa_invalidated,
619 bool *irred_invalidated)
620{
f1f41a6c 621 while (loops_to_unloop.length ())
72276d01 622 {
f1f41a6c 623 struct loop *loop = loops_to_unloop.pop ();
624 int n_unroll = loops_to_unloop_nunroll.pop ();
72276d01 625 basic_block latch = loop->latch;
626 edge latch_edge = loop_latch_edge (loop);
627 int flags = latch_edge->flags;
628 location_t locus = latch_edge->goto_locus;
1a91d914 629 gcall *stmt;
72276d01 630 gimple_stmt_iterator gsi;
631
632 remove_exits_and_undefined_stmts (loop, n_unroll);
633
634 /* Unloop destroys the latch edge. */
635 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
636
637 /* Create new basic block for the latch edge destination and wire
638 it in. */
639 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
640 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
641 latch_edge->probability = 0;
642 latch_edge->count = 0;
643 latch_edge->flags |= flags;
644 latch_edge->goto_locus = locus;
645
646 latch_edge->dest->loop_father = current_loops->tree_root;
647 latch_edge->dest->count = 0;
648 latch_edge->dest->frequency = 0;
649 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
650
651 gsi = gsi_start_bb (latch_edge->dest);
652 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
653 }
f1f41a6c 654 loops_to_unloop.release ();
655 loops_to_unloop_nunroll.release ();
be6d8ddc 656
657 /* Remove edges in peeled copies. */
658 unsigned i;
659 edge e;
660 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
661 {
662 bool ok = remove_path (e);
663 gcc_assert (ok);
664 }
665 edges_to_remove.release ();
72276d01 666}
667
668/* Tries to unroll LOOP completely, i.e. NITER times.
669 UL determines which loops we are allowed to unroll.
f55775aa 670 EXIT is the exit of the loop that should be eliminated.
72276d01 671 MAXITER specfy bound on number of iterations, -1 if it is
f55775aa 672 not known or too large for HOST_WIDE_INT. The location
673 LOCUS corresponding to the loop is used when emitting
674 a summary of the unroll to the dump file. */
72276d01 675
bb445479 676static bool
7194de72 677try_unroll_loop_completely (struct loop *loop,
bb445479 678 edge exit, tree niter,
c790d986 679 enum unroll_level ul,
f55775aa 680 HOST_WIDE_INT maxiter,
681 location_t locus)
bb445479 682{
04e3ee8a 683 unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
aa2ba534 684 struct loop_size size;
c790d986 685 bool n_unroll_found = false;
c790d986 686 edge edge_to_cancel = NULL;
c836de3f 687 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
bb445479 688
c790d986 689 /* See if we proved number of iterations to be low constant.
bb445479 690
c790d986 691 EXIT is an edge that will be removed in all but last iteration of
692 the loop.
693
694 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
695 of the unrolled sequence and is expected to make the final loop not
696 rolling.
697
698 If the number of execution of loop is determined by standard induction
699 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
700 from the iv test. */
e913b5cd 701 if (tree_fits_uhwi_p (niter))
c790d986 702 {
e913b5cd 703 n_unroll = tree_to_uhwi (niter);
c790d986 704 n_unroll_found = true;
705 edge_to_cancel = EDGE_SUCC (exit->src, 0);
706 if (edge_to_cancel == exit)
707 edge_to_cancel = EDGE_SUCC (exit->src, 1);
708 }
709 /* We do not know the number of iterations and thus we can not eliminate
710 the EXIT edge. */
711 else
52c6c602 712 exit = NULL;
c790d986 713
714 /* See if we can improve our estimate by using recorded loop bounds. */
c790d986 715 if (maxiter >= 0
716 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
717 {
718 n_unroll = maxiter;
719 n_unroll_found = true;
720 /* Loop terminates before the IV variable test, so we can not
721 remove it in the last iteration. */
722 edge_to_cancel = NULL;
723 }
724
725 if (!n_unroll_found)
bb445479 726 return false;
bb445479 727
04e3ee8a 728 if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
729 {
730 if (dump_file && (dump_flags & TDF_DETAILS))
731 fprintf (dump_file, "Not unrolling loop %d "
2c290da1 732 "(--param max-completely-peel-times limit reached).\n",
04e3ee8a 733 loop->num);
734 return false;
735 }
bb445479 736
c790d986 737 if (!edge_to_cancel)
738 edge_to_cancel = loop_edge_to_cancel (loop);
739
bb445479 740 if (n_unroll)
741 {
c790d986 742 sbitmap wont_exit;
84eb345f 743 bool large;
604f7b8a 744 if (ul == UL_SINGLE_ITER)
bb445479 745 return false;
746
eedd711b 747 /* EXIT can be removed only if we are sure it passes first N_UNROLL
748 iterations. */
749 bool remove_exit = (exit && niter
750 && TREE_CODE (niter) == INTEGER_CST
751 && wi::leu_p (n_unroll, wi::to_widest (niter)));
752
84eb345f 753 large = tree_estimate_loop_size
eedd711b 754 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
84eb345f 755 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
aa2ba534 756 ninsns = size.overall;
84eb345f 757 if (large)
758 {
759 if (dump_file && (dump_flags & TDF_DETAILS))
760 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
761 loop->num);
762 return false;
763 }
bb445479 764
aa2ba534 765 unr_insns = estimated_unrolled_size (&size, n_unroll);
d88fd237 766 if (dump_file && (dump_flags & TDF_DETAILS))
767 {
768 fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
769 fprintf (dump_file, " Estimated size after unrolling: %d\n",
770 (int) unr_insns);
771 }
772
d583c979 773 /* If the code is going to shrink, we don't need to be extra cautious
774 on guessing if the unrolling is going to be profitable. */
775 if (unr_insns
776 /* If there is IV variable that will become constant, we save
777 one instruction in the loop prologue we do not account
778 otherwise. */
779 <= ninsns + (size.constant_iv != false))
780 ;
c790d986 781 /* We unroll only inner loops, because we do not consider it profitable
782 otheriwse. We still can cancel loopback edge of not rolling loop;
783 this is always a good idea. */
d583c979 784 else if (ul == UL_NO_GROWTH)
785 {
786 if (dump_file && (dump_flags & TDF_DETAILS))
787 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
788 loop->num);
789 return false;
790 }
4cf494ec 791 /* Outer loops tend to be less interesting candidates for complete
d583c979 792 unrolling unless we can do a lot of propagation into the inner loop
793 body. For now we disable outer loop unrolling when the code would
794 grow. */
795 else if (loop->inner)
c790d986 796 {
797 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 798 fprintf (dump_file, "Not unrolling loop %d: "
c790d986 799 "it is not innermost and code would grow.\n",
800 loop->num);
801 return false;
802 }
d583c979 803 /* If there is call on a hot path through the loop, then
804 there is most probably not much to optimize. */
805 else if (size.num_non_pure_calls_on_hot_path)
f00b5e35 806 {
807 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 808 fprintf (dump_file, "Not unrolling loop %d: "
809 "contains call and code would grow.\n",
f00b5e35 810 loop->num);
811 return false;
812 }
d583c979 813 /* If there is pure/const call in the function, then we
814 can still optimize the unrolled loop body if it contains
815 some other interesting code than the calls and code
816 storing or cumulating the return value. */
817 else if (size.num_pure_calls_on_hot_path
818 /* One IV increment, one test, one ivtmp store
c31fb425 819 and one useful stmt. That is about minimal loop
d583c979 820 doing pure call. */
821 && (size.non_call_stmts_on_hot_path
822 <= 3 + size.num_pure_calls_on_hot_path))
604f7b8a 823 {
604f7b8a 824 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 825 fprintf (dump_file, "Not unrolling loop %d: "
826 "contains just pure calls and code would grow.\n",
827 loop->num);
828 return false;
829 }
39e9b130 830 /* Complete unrolling is a major win when control flow is removed and
d583c979 831 one big basic block is created. If the loop contains control flow
832 the optimization may still be a win because of eliminating the loop
833 overhead but it also may blow the branch predictor tables.
834 Limit number of branches on the hot path through the peeled
835 sequence. */
836 else if (size.num_branches_on_hot_path * (int)n_unroll
837 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
838 {
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Not unrolling loop %d: "
841 " number of branches on hot path in the unrolled sequence"
842 " reach --param max-peel-branches limit.\n",
843 loop->num);
844 return false;
845 }
846 else if (unr_insns
847 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
848 {
849 if (dump_file && (dump_flags & TDF_DETAILS))
850 fprintf (dump_file, "Not unrolling loop %d: "
851 "(--param max-completely-peeled-insns limit reached).\n",
c790d986 852 loop->num);
d88fd237 853 return false;
604f7b8a 854 }
c836de3f 855 dump_printf_loc (report_flags, locus,
856 "loop turned into non-loop; it never loops.\n");
fb54ef7c 857
01020a5f 858 initialize_original_copy_tables ();
fb54ef7c 859 wont_exit = sbitmap_alloc (n_unroll + 1);
eedd711b 860 if (exit && niter
861 && TREE_CODE (niter) == INTEGER_CST
862 && wi::leu_p (n_unroll, wi::to_widest (niter)))
863 {
864 bitmap_ones (wont_exit);
865 if (wi::eq_p (wi::to_widest (niter), n_unroll)
866 || edge_to_cancel)
867 bitmap_clear_bit (wont_exit, 0);
868 }
869 else
870 {
871 exit = NULL;
872 bitmap_clear (wont_exit);
873 }
fb54ef7c 874
75a70cf9 875 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
876 n_unroll, wont_exit,
0cfe7a23 877 exit, &edges_to_remove,
75a70cf9 878 DLTHE_FLAG_UPDATE_FREQ
879 | DLTHE_FLAG_COMPLETTE_PEEL))
bb445479 880 {
01020a5f 881 free_original_copy_tables ();
fb54ef7c 882 free (wont_exit);
d583c979 883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "Failed to duplicate the loop\n");
bb445479 885 return false;
886 }
40ffaada 887
fb54ef7c 888 free (wont_exit);
01020a5f 889 free_original_copy_tables ();
bb445479 890 }
bb445479 891
c790d986 892 /* Remove the conditional from the last copy of the loop. */
893 if (edge_to_cancel)
894 {
1a91d914 895 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
eedd711b 896 force_edge_cold (edge_to_cancel, true);
c790d986 897 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
898 gimple_cond_make_false (cond);
899 else
900 gimple_cond_make_true (cond);
901 update_stmt (cond);
902 /* Do not remove the path. Doing so may remove outer loop
903 and confuse bookkeeping code in tree_unroll_loops_completelly. */
904 }
c790d986 905
72276d01 906 /* Store the loop for later unlooping and exit removal. */
f1f41a6c 907 loops_to_unloop.safe_push (loop);
908 loops_to_unloop_nunroll.safe_push (n_unroll);
095dcfa3 909
f55775aa 910 if (dump_enabled_p ())
c790d986 911 {
912 if (!n_unroll)
f55775aa 913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
6ee2edad 914 "loop turned into non-loop; it never loops\n");
c790d986 915 else
f55775aa 916 {
917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
6ee2edad 918 "loop with %d iterations completely unrolled",
919 (int) (n_unroll + 1));
f55775aa 920 if (profile_info)
921 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
922 " (header execution count %d)",
923 (int)loop->header->count);
924 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
925 }
926 }
927
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 {
c790d986 930 if (exit)
931 fprintf (dump_file, "Exit condition of peeled iterations was "
932 "eliminated.\n");
933 if (edge_to_cancel)
934 fprintf (dump_file, "Last iteration exit edge was proved true.\n");
935 else
936 fprintf (dump_file, "Latch of last iteration was marked by "
937 "__builtin_unreachable ().\n");
938 }
bb445479 939
940 return true;
941}
942
c836de3f 943/* Return number of instructions after peeling. */
944static unsigned HOST_WIDE_INT
945estimated_peeled_sequence_size (struct loop_size *size,
946 unsigned HOST_WIDE_INT npeel)
947{
948 return MAX (npeel * (HOST_WIDE_INT) (size->overall
949 - size->eliminated_by_peeling), 1);
950}
951
952/* If the loop is expected to iterate N times and is
953 small enough, duplicate the loop body N+1 times before
954 the loop itself. This way the hot path will never
955 enter the loop.
956 Parameters are the same as for try_unroll_loops_completely */
957
958static bool
959try_peel_loop (struct loop *loop,
960 edge exit, tree niter,
961 HOST_WIDE_INT maxiter)
962{
39ab2939 963 HOST_WIDE_INT npeel;
c836de3f 964 struct loop_size size;
965 int peeled_size;
966 sbitmap wont_exit;
c836de3f 967
b96f8145 968 if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
969 || !peeled_loops)
c836de3f 970 return false;
971
b96f8145 972 if (bitmap_bit_p (peeled_loops, loop->num))
973 {
974 if (dump_file)
975 fprintf (dump_file, "Not peeling: loop is already peeled\n");
976 return false;
977 }
978
3ccfbed6 979 /* Peel only innermost loops.
980 While the code is perfectly capable of peeling non-innermost loops,
981 the heuristics would probably need some improvements. */
c836de3f 982 if (loop->inner)
983 {
984 if (dump_file)
985 fprintf (dump_file, "Not peeling: outer loop\n");
986 return false;
987 }
988
989 if (!optimize_loop_for_speed_p (loop))
990 {
991 if (dump_file)
992 fprintf (dump_file, "Not peeling: cold loop\n");
993 return false;
994 }
995
996 /* Check if there is an estimate on the number of iterations. */
997 npeel = estimated_loop_iterations_int (loop);
b96f8145 998 if (npeel < 0)
999 npeel = likely_max_loop_iterations_int (loop);
c836de3f 1000 if (npeel < 0)
1001 {
1002 if (dump_file)
1003 fprintf (dump_file, "Not peeling: number of iterations is not "
1004 "estimated\n");
1005 return false;
1006 }
1007 if (maxiter >= 0 && maxiter <= npeel)
1008 {
1009 if (dump_file)
1010 fprintf (dump_file, "Not peeling: upper bound is known so can "
4cf494ec 1011 "unroll completely\n");
c836de3f 1012 return false;
1013 }
1014
1015 /* We want to peel estimated number of iterations + 1 (so we never
1016 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1017 and be sure to avoid overflows. */
1018 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1019 {
1020 if (dump_file)
1021 fprintf (dump_file, "Not peeling: rolls too much "
39ab2939 1022 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
c836de3f 1023 return false;
1024 }
1025 npeel++;
1026
1027 /* Check peeled loops size. */
1028 tree_estimate_loop_size (loop, exit, NULL, &size,
1029 PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
39ab2939 1030 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
c836de3f 1031 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1032 {
1033 if (dump_file)
1034 fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1035 "(%i insns > --param max-peel-insns)", peeled_size);
1036 return false;
1037 }
1038
1039 /* Duplicate possibly eliminating the exits. */
1040 initialize_original_copy_tables ();
1041 wont_exit = sbitmap_alloc (npeel + 1);
3ccfbed6 1042 if (exit && niter
1043 && TREE_CODE (niter) == INTEGER_CST
1044 && wi::leu_p (npeel, wi::to_widest (niter)))
1045 {
1046 bitmap_ones (wont_exit);
b96f8145 1047 bitmap_clear_bit (wont_exit, 0);
3ccfbed6 1048 }
1049 else
1050 {
1051 exit = NULL;
1052 bitmap_clear (wont_exit);
1053 }
c836de3f 1054 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1055 npeel, wont_exit,
0cfe7a23 1056 exit, &edges_to_remove,
3ccfbed6 1057 DLTHE_FLAG_UPDATE_FREQ))
c836de3f 1058 {
1059 free_original_copy_tables ();
1060 free (wont_exit);
1061 return false;
1062 }
c836de3f 1063 free (wont_exit);
1064 free_original_copy_tables ();
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1066 {
1067 fprintf (dump_file, "Peeled loop %d, %i times.\n",
39ab2939 1068 loop->num, (int) npeel);
c836de3f 1069 }
3ccfbed6 1070 if (loop->any_estimate)
1071 {
1072 if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1073 loop->nb_iterations_estimate -= npeel;
1074 else
1075 loop->nb_iterations_estimate = 0;
1076 }
c836de3f 1077 if (loop->any_upper_bound)
3ccfbed6 1078 {
b96f8145 1079 if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
3ccfbed6 1080 loop->nb_iterations_upper_bound -= npeel;
1081 else
1082 loop->nb_iterations_upper_bound = 0;
1083 }
8e3ffe30 1084 if (loop->any_likely_upper_bound)
3ccfbed6 1085 {
b96f8145 1086 if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
3ccfbed6 1087 loop->nb_iterations_likely_upper_bound -= npeel;
1088 else
1089 {
1090 loop->any_estimate = true;
1091 loop->nb_iterations_estimate = 0;
1092 loop->nb_iterations_likely_upper_bound = 0;
1093 }
1094 }
1095 gcov_type entry_count = 0;
1096 int entry_freq = 0;
1097
0cfe7a23 1098 edge e;
3ccfbed6 1099 edge_iterator ei;
1100 FOR_EACH_EDGE (e, ei, loop->header->preds)
1101 if (e->src != loop->latch)
1102 {
1103 entry_count += e->src->count;
1104 entry_freq += e->src->frequency;
1105 gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1106 }
1107 int scale = 1;
1108 if (loop->header->count)
1109 scale = RDIV (entry_count * REG_BR_PROB_BASE, loop->header->count);
1110 else if (loop->header->frequency)
1111 scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
1112 scale_loop_profile (loop, scale, 0);
b96f8145 1113 bitmap_set_bit (peeled_loops, loop->num);
c836de3f 1114 return true;
1115}
7194de72 1116/* Adds a canonical induction variable to LOOP if suitable.
48e1416a 1117 CREATE_IV is true if we may create a new iv. UL determines
604f7b8a 1118 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
48e1416a 1119 to determine the number of iterations of a loop by direct evaluation.
72276d01 1120 Returns true if cfg is changed. */
bb445479 1121
1122static bool
7194de72 1123canonicalize_loop_induction_variables (struct loop *loop,
604f7b8a 1124 bool create_iv, enum unroll_level ul,
72276d01 1125 bool try_eval)
bb445479 1126{
1127 edge exit = NULL;
1128 tree niter;
72276d01 1129 HOST_WIDE_INT maxiter;
1130 bool modified = false;
f55775aa 1131 location_t locus = UNKNOWN_LOCATION;
bb445479 1132
0c3c2e56 1133 niter = number_of_latch_executions (loop);
f55775aa 1134 exit = single_exit (loop);
bb445479 1135 if (TREE_CODE (niter) == INTEGER_CST)
f55775aa 1136 locus = gimple_location (last_stmt (exit->src));
b091dc59 1137 else
1138 {
1139 /* If the loop has more than one exit, try checking all of them
1140 for # of iterations determinable through scev. */
f55775aa 1141 if (!exit)
b091dc59 1142 niter = find_loop_niter (loop, &exit);
1143
1144 /* Finally if everything else fails, try brute force evaluation. */
1145 if (try_eval
1146 && (chrec_contains_undetermined (niter)
1147 || TREE_CODE (niter) != INTEGER_CST))
1148 niter = find_loop_niter_by_eval (loop, &exit);
1149
f55775aa 1150 if (exit)
1151 locus = gimple_location (last_stmt (exit->src));
1152
c790d986 1153 if (TREE_CODE (niter) != INTEGER_CST)
1154 exit = NULL;
b091dc59 1155 }
bb445479 1156
c790d986 1157 /* We work exceptionally hard here to estimate the bound
1158 by find_loop_niter_by_eval. Be sure to keep it for future. */
1159 if (niter && TREE_CODE (niter) == INTEGER_CST)
57337fec 1160 {
5de9d3ed 1161 record_niter_bound (loop, wi::to_widest (niter),
57337fec 1162 exit == single_likely_exit (loop), true);
1163 }
c790d986 1164
72276d01 1165 /* Force re-computation of loop bounds so we can remove redundant exits. */
1166 maxiter = max_loop_iterations_int (loop);
1167
c790d986 1168 if (dump_file && (dump_flags & TDF_DETAILS)
1169 && TREE_CODE (niter) == INTEGER_CST)
bb445479 1170 {
1171 fprintf (dump_file, "Loop %d iterates ", loop->num);
1172 print_generic_expr (dump_file, niter, TDF_SLIM);
1173 fprintf (dump_file, " times.\n");
1174 }
c790d986 1175 if (dump_file && (dump_flags & TDF_DETAILS)
72276d01 1176 && maxiter >= 0)
c790d986 1177 {
1178 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
72276d01 1179 (int)maxiter);
c790d986 1180 }
8e3ffe30 1181 if (dump_file && (dump_flags & TDF_DETAILS)
1182 && likely_max_loop_iterations_int (loop) >= 0)
1183 {
eedd711b 1184 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1185 loop->num, (int)likely_max_loop_iterations_int (loop));
8e3ffe30 1186 }
bb445479 1187
72276d01 1188 /* Remove exits that are known to be never taken based on loop bound.
1189 Needs to be called after compilation of max_loop_iterations_int that
1190 populates the loop bounds. */
1191 modified |= remove_redundant_iv_tests (loop);
1192
f55775aa 1193 if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
bb445479 1194 return true;
1195
c790d986 1196 if (create_iv
57337fec 1197 && niter && !chrec_contains_undetermined (niter)
1198 && exit && just_once_each_iteration_p (loop, exit->src))
bb445479 1199 create_canonical_iv (loop, exit, niter);
1200
c836de3f 1201 if (ul == UL_ALL)
1202 modified |= try_peel_loop (loop, exit, niter, maxiter);
1203
72276d01 1204 return modified;
bb445479 1205}
1206
1207/* The main entry point of the pass. Adds canonical induction variables
7194de72 1208 to the suitable loops. */
bb445479 1209
4c641bf8 1210unsigned int
7194de72 1211canonicalize_induction_variables (void)
bb445479 1212{
bb445479 1213 struct loop *loop;
053fdd99 1214 bool changed = false;
c790d986 1215 bool irred_invalidated = false;
9f0ac045 1216 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
48e1416a 1217
d4f078b5 1218 free_numbers_of_iterations_estimates (cfun);
72276d01 1219 estimate_numbers_of_iterations ();
1220
f21d4d00 1221 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
bb445479 1222 {
17519ba0 1223 changed |= canonicalize_loop_induction_variables (loop,
1224 true, UL_SINGLE_ITER,
72276d01 1225 true);
bb445479 1226 }
ea1c5c31 1227 gcc_assert (!need_ssa_update_p (cfun));
bb445479 1228
72276d01 1229 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
c790d986 1230 if (irred_invalidated
1231 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1232 mark_irreducible_loops ();
1233
08162157 1234 /* Clean up the information about numbers of iterations, since brute force
1235 evaluation could reveal new information. */
1236 scev_reset ();
1237
9f0ac045 1238 if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1239 {
1240 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1241 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1242 }
1243 BITMAP_FREE (loop_closed_ssa_invalidated);
1244
bb445479 1245 if (changed)
4c641bf8 1246 return TODO_cleanup_cfg;
1247 return 0;
bb445479 1248}
1249
2ebfc881 1250/* Propagate constant SSA_NAMEs defined in basic block BB. */
1251
1252static void
1253propagate_constants_for_unrolling (basic_block bb)
1254{
2ebfc881 1255 /* Look for degenerate PHI nodes with constant argument. */
1a91d914 1256 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
2ebfc881 1257 {
1a91d914 1258 gphi *phi = gsi.phi ();
2ebfc881 1259 tree result = gimple_phi_result (phi);
1260 tree arg = gimple_phi_arg_def (phi, 0);
1261
2eaaa4e7 1262 if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
1263 && gimple_phi_num_args (phi) == 1
1264 && TREE_CODE (arg) == INTEGER_CST)
2ebfc881 1265 {
d33914da 1266 replace_uses_by (result, arg);
2ebfc881 1267 gsi_remove (&gsi, true);
1268 release_ssa_name (result);
1269 }
1270 else
1271 gsi_next (&gsi);
1272 }
1273
1274 /* Look for assignments to SSA names with constant RHS. */
1a91d914 1275 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2ebfc881 1276 {
42acab1c 1277 gimple *stmt = gsi_stmt (gsi);
2ebfc881 1278 tree lhs;
1279
1280 if (is_gimple_assign (stmt)
fca2aa67 1281 && gimple_assign_rhs_code (stmt) == INTEGER_CST
2ebfc881 1282 && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
fca2aa67 1283 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2ebfc881 1284 {
d33914da 1285 replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
2ebfc881 1286 gsi_remove (&gsi, true);
1287 release_ssa_name (lhs);
1288 }
1289 else
1290 gsi_next (&gsi);
1291 }
1292}
1293
042301ef 1294/* Process loops from innermost to outer, stopping at the innermost
1295 loop we unrolled. */
1296
1297static bool
1298tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
0cfe7a23 1299 bitmap father_bbs, struct loop *loop)
042301ef 1300{
1301 struct loop *loop_father;
1302 bool changed = false;
1303 struct loop *inner;
1304 enum unroll_level ul;
1305
1306 /* Process inner loops first. */
1307 for (inner = loop->inner; inner != NULL; inner = inner->next)
1308 changed |= tree_unroll_loops_completely_1 (may_increase_size,
0cfe7a23 1309 unroll_outer, father_bbs,
042301ef 1310 inner);
1311
1312 /* If we changed an inner loop we cannot process outer loops in this
1313 iteration because SSA form is not up-to-date. Continue with
1314 siblings of outer loops instead. */
1315 if (changed)
1316 return true;
1317
3d483a94 1318 /* Don't unroll #pragma omp simd loops until the vectorizer
1319 attempts to vectorize those. */
4c73695b 1320 if (loop->force_vectorize)
3d483a94 1321 return false;
1322
042301ef 1323 /* Try to unroll this loop. */
1324 loop_father = loop_outer (loop);
1325 if (!loop_father)
1326 return false;
1327
1328 if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1329 /* Unroll outermost loops only if asked to do so or they do
1330 not cause code growth. */
1331 && (unroll_outer || loop_outer (loop_father)))
1332 ul = UL_ALL;
1333 else
1334 ul = UL_NO_GROWTH;
1335
1336 if (canonicalize_loop_induction_variables
1337 (loop, false, ul, !flag_tree_loop_ivcanon))
1338 {
1339 /* If we'll continue unrolling, we need to propagate constants
1340 within the new basic blocks to fold away induction variable
1341 computations; otherwise, the size might blow up before the
1342 iteration is complete and the IR eventually cleaned up. */
0cfe7a23 1343 if (loop_outer (loop_father))
1344 bitmap_set_bit (father_bbs, loop_father->header->index);
042301ef 1345
1346 return true;
1347 }
1348
1349 return false;
1350}
1351
604f7b8a 1352/* Unroll LOOPS completely if they iterate just few times. Unless
1353 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1354 size of the code does not increase. */
bb445479 1355
4c641bf8 1356unsigned int
d88fd237 1357tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
bb445479 1358{
0cfe7a23 1359 bitmap father_bbs = BITMAP_ALLOC (NULL);
d88fd237 1360 bool changed;
793a0ab5 1361 int iteration = 0;
9f0ac045 1362 bool irred_invalidated = false;
bb445479 1363
d88fd237 1364 do
bb445479 1365 {
d88fd237 1366 changed = false;
9f0ac045 1367 bitmap loop_closed_ssa_invalidated = NULL;
1368
1369 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1370 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
bb445479 1371
d4f078b5 1372 free_numbers_of_iterations_estimates (cfun);
72276d01 1373 estimate_numbers_of_iterations ();
1374
042301ef 1375 changed = tree_unroll_loops_completely_1 (may_increase_size,
0cfe7a23 1376 unroll_outer, father_bbs,
042301ef 1377 current_loops->tree_root);
d88fd237 1378 if (changed)
1379 {
2ebfc881 1380 unsigned i;
1381
72276d01 1382 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
c790d986 1383
72276d01 1384 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
9f0ac045 1385 if (loop_closed_ssa_invalidated
1386 && !bitmap_empty_p (loop_closed_ssa_invalidated))
1387 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1388 TODO_update_ssa);
1389 else
1390 update_ssa (TODO_update_ssa);
ea1c5c31 1391
0cfe7a23 1392 /* father_bbs is a bitmap of loop father header BB indices.
1393 Translate that to what non-root loops these BBs belong to now. */
1394 bitmap_iterator bi;
1395 bitmap fathers = BITMAP_ALLOC (NULL);
1396 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1397 {
1398 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1399 if (! unrolled_loop_bb)
1400 continue;
1401 if (loop_outer (unrolled_loop_bb->loop_father))
1402 bitmap_set_bit (fathers,
1403 unrolled_loop_bb->loop_father->num);
1404 }
1405 bitmap_clear (father_bbs);
2ebfc881 1406 /* Propagate the constants within the new basic blocks. */
0cfe7a23 1407 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1408 {
1409 loop_p father = get_loop (cfun, i);
1410 basic_block *body = get_loop_body_in_dom_order (father);
1411 for (unsigned j = 0; j < father->num_nodes; j++)
1412 propagate_constants_for_unrolling (body[j]);
1413 free (body);
1414 }
1415 BITMAP_FREE (fathers);
2ebfc881 1416
d88fd237 1417 /* This will take care of removing completely unrolled loops
1418 from the loop structures so we can continue unrolling now
1419 innermost loops. */
b2a225ba 1420 if (cleanup_tree_cfg ())
1421 update_ssa (TODO_update_ssa_only_virtuals);
d88fd237 1422
1423 /* Clean up the information about numbers of iterations, since
1424 complete unrolling might have invalidated it. */
1425 scev_reset ();
382ecba7 1426 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
9f0ac045 1427 verify_loop_closed_ssa (true);
d88fd237 1428 }
9f0ac045 1429 if (loop_closed_ssa_invalidated)
1430 BITMAP_FREE (loop_closed_ssa_invalidated);
d88fd237 1431 }
793a0ab5 1432 while (changed
1433 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
08162157 1434
0cfe7a23 1435 BITMAP_FREE (father_bbs);
2ebfc881 1436
9f0ac045 1437 if (irred_invalidated
1438 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1439 mark_irreducible_loops ();
1440
4c641bf8 1441 return 0;
bb445479 1442}
f86b328b 1443
1444/* Canonical induction variable creation pass. */
1445
f86b328b 1446namespace {
1447
1448const pass_data pass_data_iv_canon =
1449{
1450 GIMPLE_PASS, /* type */
1451 "ivcanon", /* name */
1452 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1453 TV_TREE_LOOP_IVCANON, /* tv_id */
1454 ( PROP_cfg | PROP_ssa ), /* properties_required */
1455 0, /* properties_provided */
1456 0, /* properties_destroyed */
1457 0, /* todo_flags_start */
1458 0, /* todo_flags_finish */
1459};
1460
1461class pass_iv_canon : public gimple_opt_pass
1462{
1463public:
1464 pass_iv_canon (gcc::context *ctxt)
1465 : gimple_opt_pass (pass_data_iv_canon, ctxt)
1466 {}
1467
1468 /* opt_pass methods: */
31315c24 1469 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
65b0537f 1470 virtual unsigned int execute (function *fun);
f86b328b 1471
1472}; // class pass_iv_canon
1473
65b0537f 1474unsigned int
1475pass_iv_canon::execute (function *fun)
1476{
1477 if (number_of_loops (fun) <= 1)
1478 return 0;
1479
1480 return canonicalize_induction_variables ();
1481}
1482
f86b328b 1483} // anon namespace
1484
1485gimple_opt_pass *
1486make_pass_iv_canon (gcc::context *ctxt)
1487{
1488 return new pass_iv_canon (ctxt);
1489}
1490
1491/* Complete unrolling of loops. */
1492
f86b328b 1493namespace {
1494
1495const pass_data pass_data_complete_unroll =
1496{
1497 GIMPLE_PASS, /* type */
1498 "cunroll", /* name */
1499 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1500 TV_COMPLETE_UNROLL, /* tv_id */
1501 ( PROP_cfg | PROP_ssa ), /* properties_required */
1502 0, /* properties_provided */
1503 0, /* properties_destroyed */
1504 0, /* todo_flags_start */
1505 0, /* todo_flags_finish */
1506};
1507
1508class pass_complete_unroll : public gimple_opt_pass
1509{
1510public:
1511 pass_complete_unroll (gcc::context *ctxt)
1512 : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1513 {}
1514
1515 /* opt_pass methods: */
65b0537f 1516 virtual unsigned int execute (function *);
f86b328b 1517
1518}; // class pass_complete_unroll
1519
65b0537f 1520unsigned int
1521pass_complete_unroll::execute (function *fun)
1522{
1523 if (number_of_loops (fun) <= 1)
1524 return 0;
1525
b96f8145 1526 /* If we ever decide to run loop peeling more than once, we will need to
1527 track loops already peeled in loop structures themselves to avoid
1528 re-peeling the same loop multiple times. */
1529 if (flag_peel_loops)
1530 peeled_loops = BITMAP_ALLOC (NULL);
1531 int val = tree_unroll_loops_completely (flag_unroll_loops
1532 || flag_peel_loops
1533 || optimize >= 3, true);
1534 if (peeled_loops)
1535 {
1536 BITMAP_FREE (peeled_loops);
1537 peeled_loops = NULL;
1538 }
1539 return val;
65b0537f 1540}
1541
f86b328b 1542} // anon namespace
1543
1544gimple_opt_pass *
1545make_pass_complete_unroll (gcc::context *ctxt)
1546{
1547 return new pass_complete_unroll (ctxt);
1548}
1549
1550/* Complete unrolling of inner loops. */
1551
f86b328b 1552namespace {
1553
1554const pass_data pass_data_complete_unrolli =
1555{
1556 GIMPLE_PASS, /* type */
1557 "cunrolli", /* name */
1558 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1559 TV_COMPLETE_UNROLL, /* tv_id */
1560 ( PROP_cfg | PROP_ssa ), /* properties_required */
1561 0, /* properties_provided */
1562 0, /* properties_destroyed */
1563 0, /* todo_flags_start */
8b88439e 1564 0, /* todo_flags_finish */
f86b328b 1565};
1566
1567class pass_complete_unrolli : public gimple_opt_pass
1568{
1569public:
1570 pass_complete_unrolli (gcc::context *ctxt)
1571 : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1572 {}
1573
1574 /* opt_pass methods: */
31315c24 1575 virtual bool gate (function *) { return optimize >= 2; }
65b0537f 1576 virtual unsigned int execute (function *);
f86b328b 1577
1578}; // class pass_complete_unrolli
1579
65b0537f 1580unsigned int
1581pass_complete_unrolli::execute (function *fun)
1582{
1583 unsigned ret = 0;
1584
1585 loop_optimizer_init (LOOPS_NORMAL
1586 | LOOPS_HAVE_RECORDED_EXITS);
1587 if (number_of_loops (fun) > 1)
1588 {
1589 scev_initialize ();
1590 ret = tree_unroll_loops_completely (optimize >= 3, false);
d4f078b5 1591 free_numbers_of_iterations_estimates (fun);
65b0537f 1592 scev_finalize ();
1593 }
1594 loop_optimizer_finalize ();
1595
1596 return ret;
1597}
1598
f86b328b 1599} // anon namespace
1600
1601gimple_opt_pass *
1602make_pass_complete_unrolli (gcc::context *ctxt)
1603{
1604 return new pass_complete_unrolli (ctxt);
1605}
1606
1607