]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivcanon.c
Merger of git branch "gimple-classes-v2-option-3"
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
CommitLineData
84eb345f 1/* Induction variable canonicalization and loop peeling.
3aea1f79 2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
48e1416a 3
bb445479 4This file is part of GCC.
48e1416a 5
bb445479 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
bb445479 9later version.
48e1416a 10
bb445479 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
bb445479 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bb445479 19
20/* This pass detects the loops that iterate a constant number of times,
48e1416a 21 adds a canonical induction variable (step -1, tested against 0)
bb445479 22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
24
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
29 to pay up.
30
c836de3f 31 We also perform
4cf494ec 32 - complete unrolling (or peeling) when the loops is rolling few enough
c836de3f 33 times
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
36 info). */
bb445479 37
38#include "config.h"
39#include "system.h"
40#include "coretypes.h"
41#include "tm.h"
42#include "tree.h"
bb445479 43#include "tm_p.h"
886c1262 44#include "profile.h"
94ea8568 45#include "predict.h"
46#include "vec.h"
47#include "hashtab.h"
48#include "hash-set.h"
49#include "machmode.h"
50#include "hard-reg-set.h"
51#include "input.h"
52#include "function.h"
53#include "dominance.h"
54#include "cfg.h"
bb445479 55#include "basic-block.h"
ce084dfc 56#include "gimple-pretty-print.h"
bc61cadb 57#include "tree-ssa-alias.h"
58#include "internal-fn.h"
59#include "gimple-fold.h"
60#include "tree-eh.h"
61#include "gimple-expr.h"
62#include "is-a.h"
073c1fd5 63#include "gimple.h"
dcf1a1ec 64#include "gimple-iterator.h"
073c1fd5 65#include "gimple-ssa.h"
1140c305 66#include "hash-map.h"
67#include "plugin-api.h"
68#include "ipa-ref.h"
073c1fd5 69#include "cgraph.h"
70#include "tree-cfg.h"
71#include "tree-phinodes.h"
72#include "ssa-iterators.h"
9ed99284 73#include "stringpool.h"
073c1fd5 74#include "tree-ssanames.h"
05d9c18a 75#include "tree-ssa-loop-manip.h"
76#include "tree-ssa-loop-niter.h"
073c1fd5 77#include "tree-ssa-loop.h"
78#include "tree-into-ssa.h"
bb445479 79#include "cfgloop.h"
80#include "tree-pass.h"
bb445479 81#include "tree-chrec.h"
82#include "tree-scalar-evolution.h"
83#include "params.h"
84#include "flags.h"
85#include "tree-inline.h"
aa2ba534 86#include "target.h"
424a4a92 87#include "tree-cfgcleanup.h"
f7715905 88#include "builtins.h"
bb445479 89
604f7b8a 90/* Specifies types of loops that may be unrolled. */
91
92enum unroll_level
93{
6414dd4c 94 UL_SINGLE_ITER, /* Only loops that exit immediately in the first
604f7b8a 95 iteration. */
96 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
97 of code size. */
98 UL_ALL /* All suitable loops. */
99};
100
bb445479 101/* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
102 is the exit edge whose condition is replaced. */
103
104static void
105create_canonical_iv (struct loop *loop, edge exit, tree niter)
106{
107 edge in;
75a70cf9 108 tree type, var;
1a91d914 109 gcond *cond;
75a70cf9 110 gimple_stmt_iterator incr_at;
bb445479 111 enum tree_code cmp;
112
113 if (dump_file && (dump_flags & TDF_DETAILS))
114 {
115 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
116 print_generic_expr (dump_file, niter, TDF_SLIM);
117 fprintf (dump_file, " iterations.\n");
118 }
119
1a91d914 120 cond = as_a <gcond *> (last_stmt (exit->src));
cd665a06 121 in = EDGE_SUCC (exit->src, 0);
bb445479 122 if (in == exit)
cd665a06 123 in = EDGE_SUCC (exit->src, 1);
bb445479 124
125 /* Note that we do not need to worry about overflows, since
126 type of niter is always unsigned and all comparisons are
127 just for equality/nonequality -- i.e. everything works
128 with a modulo arithmetics. */
129
130 type = TREE_TYPE (niter);
49d00087 131 niter = fold_build2 (PLUS_EXPR, type,
132 niter,
133 build_int_cst (type, 1));
75a70cf9 134 incr_at = gsi_last_bb (in->src);
bb445479 135 create_iv (niter,
3c6185f1 136 build_int_cst (type, -1),
bb445479 137 NULL_TREE, loop,
138 &incr_at, false, NULL, &var);
139
140 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
75a70cf9 141 gimple_cond_set_code (cond, cmp);
142 gimple_cond_set_lhs (cond, var);
143 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
22aa74c4 144 update_stmt (cond);
bb445479 145}
146
aa2ba534 147/* Describe size of loop as detected by tree_estimate_loop_size. */
148struct loop_size
149{
150 /* Number of instructions in the loop. */
151 int overall;
152
153 /* Number of instructions that will be likely optimized out in
154 peeled iterations of loop (i.e. computation based on induction
155 variable where induction variable starts at known constant.) */
156 int eliminated_by_peeling;
157
158 /* Same statistics for last iteration of loop: it is smaller because
159 instructions after exit are not executed. */
160 int last_iteration;
161 int last_iteration_eliminated_by_peeling;
d583c979 162
163 /* If some IV computation will become constant. */
164 bool constant_iv;
165
166 /* Number of call stmts that are not a builtin and are pure or const
167 present on the hot path. */
168 int num_pure_calls_on_hot_path;
169 /* Number of call stmts that are not a builtin and are not pure nor const
170 present on the hot path. */
171 int num_non_pure_calls_on_hot_path;
172 /* Number of statements other than calls in the loop. */
173 int non_call_stmts_on_hot_path;
174 /* Number of branches seen on the hot path. */
175 int num_branches_on_hot_path;
aa2ba534 176};
177
178/* Return true if OP in STMT will be constant after peeling LOOP. */
179
180static bool
181constant_after_peeling (tree op, gimple stmt, struct loop *loop)
182{
183 affine_iv iv;
184
185 if (is_gimple_min_invariant (op))
186 return true;
48e1416a 187
aa2ba534 188 /* We can still fold accesses to constant arrays when index is known. */
189 if (TREE_CODE (op) != SSA_NAME)
190 {
191 tree base = op;
192
193 /* First make fast look if we see constant array inside. */
194 while (handled_component_p (base))
195 base = TREE_OPERAND (base, 0);
248022b2 196 if ((DECL_P (base)
df8d3e89 197 && ctor_for_folding (base) != error_mark_node)
aa2ba534 198 || CONSTANT_CLASS_P (base))
199 {
200 /* If so, see if we understand all the indices. */
201 base = op;
202 while (handled_component_p (base))
203 {
204 if (TREE_CODE (base) == ARRAY_REF
205 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
206 return false;
207 base = TREE_OPERAND (base, 0);
208 }
209 return true;
210 }
211 return false;
212 }
213
214 /* Induction variables are constants. */
215 if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
216 return false;
217 if (!is_gimple_min_invariant (iv.base))
218 return false;
219 if (!is_gimple_min_invariant (iv.step))
220 return false;
221 return true;
222}
223
d583c979 224/* Computes an estimated number of insns in LOOP.
225 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
226 iteration of the loop.
227 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
228 of loop.
84eb345f 229 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
04437ab6 230 Stop estimating after UPPER_BOUND is met. Return true in this case. */
aa2ba534 231
84eb345f 232static bool
233tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
234 int upper_bound)
aa2ba534 235{
236 basic_block *body = get_loop_body (loop);
237 gimple_stmt_iterator gsi;
238 unsigned int i;
239 bool after_exit;
f1f41a6c 240 vec<basic_block> path = get_loop_hot_path (loop);
aa2ba534 241
242 size->overall = 0;
243 size->eliminated_by_peeling = 0;
244 size->last_iteration = 0;
245 size->last_iteration_eliminated_by_peeling = 0;
d583c979 246 size->num_pure_calls_on_hot_path = 0;
247 size->num_non_pure_calls_on_hot_path = 0;
248 size->non_call_stmts_on_hot_path = 0;
249 size->num_branches_on_hot_path = 0;
250 size->constant_iv = 0;
aa2ba534 251
252 if (dump_file && (dump_flags & TDF_DETAILS))
253 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
254 for (i = 0; i < loop->num_nodes; i++)
255 {
c790d986 256 if (edge_to_cancel && body[i] != edge_to_cancel->src
257 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
aa2ba534 258 after_exit = true;
259 else
260 after_exit = false;
261 if (dump_file && (dump_flags & TDF_DETAILS))
262 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
263
264 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
265 {
266 gimple stmt = gsi_stmt (gsi);
267 int num = estimate_num_insns (stmt, &eni_size_weights);
268 bool likely_eliminated = false;
d583c979 269 bool likely_eliminated_last = false;
270 bool likely_eliminated_peeled = false;
aa2ba534 271
272 if (dump_file && (dump_flags & TDF_DETAILS))
273 {
274 fprintf (dump_file, " size: %3i ", num);
275 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
276 }
277
278 /* Look for reasons why we might optimize this stmt away. */
279
ae8a8b85 280 if (gimple_has_side_effects (stmt))
281 ;
aa2ba534 282 /* Exit conditional. */
ae8a8b85 283 else if (exit && body[i] == exit->src
d583c979 284 && stmt == last_stmt (exit->src))
aa2ba534 285 {
286 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 287 fprintf (dump_file, " Exit condition will be eliminated "
288 "in peeled copies.\n");
289 likely_eliminated_peeled = true;
290 }
291 else if (edge_to_cancel && body[i] == edge_to_cancel->src
292 && stmt == last_stmt (edge_to_cancel->src))
293 {
294 if (dump_file && (dump_flags & TDF_DETAILS))
295 fprintf (dump_file, " Exit condition will be eliminated "
296 "in last copy.\n");
297 likely_eliminated_last = true;
aa2ba534 298 }
299 /* Sets of IV variables */
300 else if (gimple_code (stmt) == GIMPLE_ASSIGN
301 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
302 {
303 if (dump_file && (dump_flags & TDF_DETAILS))
304 fprintf (dump_file, " Induction variable computation will"
305 " be folded away.\n");
306 likely_eliminated = true;
307 }
308 /* Assignments of IV variables. */
309 else if (gimple_code (stmt) == GIMPLE_ASSIGN
310 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
d583c979 311 && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
aa2ba534 312 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
313 || constant_after_peeling (gimple_assign_rhs2 (stmt),
314 stmt, loop)))
315 {
d583c979 316 size->constant_iv = true;
aa2ba534 317 if (dump_file && (dump_flags & TDF_DETAILS))
318 fprintf (dump_file, " Constant expression will be folded away.\n");
319 likely_eliminated = true;
320 }
321 /* Conditionals. */
d583c979 322 else if ((gimple_code (stmt) == GIMPLE_COND
323 && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
324 && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
325 || (gimple_code (stmt) == GIMPLE_SWITCH
1a91d914 326 && constant_after_peeling (gimple_switch_index (
327 as_a <gswitch *> (stmt)),
328 stmt, loop)))
aa2ba534 329 {
330 if (dump_file && (dump_flags & TDF_DETAILS))
331 fprintf (dump_file, " Constant conditional.\n");
332 likely_eliminated = true;
333 }
334
335 size->overall += num;
d583c979 336 if (likely_eliminated || likely_eliminated_peeled)
aa2ba534 337 size->eliminated_by_peeling += num;
338 if (!after_exit)
339 {
340 size->last_iteration += num;
d583c979 341 if (likely_eliminated || likely_eliminated_last)
aa2ba534 342 size->last_iteration_eliminated_by_peeling += num;
343 }
84eb345f 344 if ((size->overall * 3 / 2 - size->eliminated_by_peeling
345 - size->last_iteration_eliminated_by_peeling) > upper_bound)
346 {
347 free (body);
04437ab6 348 path.release ();
84eb345f 349 return true;
350 }
aa2ba534 351 }
352 }
f1f41a6c 353 while (path.length ())
d583c979 354 {
f1f41a6c 355 basic_block bb = path.pop ();
d583c979 356 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
357 {
358 gimple stmt = gsi_stmt (gsi);
359 if (gimple_code (stmt) == GIMPLE_CALL)
360 {
361 int flags = gimple_call_flags (stmt);
362 tree decl = gimple_call_fndecl (stmt);
363
364 if (decl && DECL_IS_BUILTIN (decl)
365 && is_inexpensive_builtin (decl))
366 ;
367 else if (flags & (ECF_PURE | ECF_CONST))
368 size->num_pure_calls_on_hot_path++;
369 else
370 size->num_non_pure_calls_on_hot_path++;
371 size->num_branches_on_hot_path ++;
372 }
373 else if (gimple_code (stmt) != GIMPLE_CALL
374 && gimple_code (stmt) != GIMPLE_DEBUG)
375 size->non_call_stmts_on_hot_path++;
376 if (((gimple_code (stmt) == GIMPLE_COND
377 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
378 || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
379 || (gimple_code (stmt) == GIMPLE_SWITCH
1a91d914 380 && !constant_after_peeling (gimple_switch_index (
381 as_a <gswitch *> (stmt)),
382 stmt, loop)))
d583c979 383 && (!exit || bb != exit->src))
384 size->num_branches_on_hot_path++;
385 }
386 }
f1f41a6c 387 path.release ();
aa2ba534 388 if (dump_file && (dump_flags & TDF_DETAILS))
389 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
390 size->eliminated_by_peeling, size->last_iteration,
391 size->last_iteration_eliminated_by_peeling);
48e1416a 392
aa2ba534 393 free (body);
84eb345f 394 return false;
aa2ba534 395}
604f7b8a 396
aa2ba534 397/* Estimate number of insns of completely unrolled loop.
398 It is (NUNROLL + 1) * size of loop body with taking into account
399 the fact that in last copy everything after exit conditional
400 is dead and that some instructions will be eliminated after
401 peeling.
604f7b8a 402
c31fb425 403 Loop body is likely going to simplify further, this is difficult
aa2ba534 404 to guess, we just decrease the result by 1/3. */
604f7b8a 405
406static unsigned HOST_WIDE_INT
aa2ba534 407estimated_unrolled_size (struct loop_size *size,
604f7b8a 408 unsigned HOST_WIDE_INT nunroll)
409{
aa2ba534 410 HOST_WIDE_INT unr_insns = ((nunroll)
411 * (HOST_WIDE_INT) (size->overall
412 - size->eliminated_by_peeling));
413 if (!nunroll)
414 unr_insns = 0;
415 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
416
417 unr_insns = unr_insns * 2 / 3;
604f7b8a 418 if (unr_insns <= 0)
419 unr_insns = 1;
604f7b8a 420
421 return unr_insns;
422}
423
c790d986 424/* Loop LOOP is known to not loop. See if there is an edge in the loop
425 body that can be remove to make the loop to always exit and at
426 the same time it does not make any code potentially executed
427 during the last iteration dead.
428
4cf494ec 429 After complete unrolling we still may get rid of the conditional
c790d986 430 on the exit in the last copy even if we have no idea what it does.
431 This is quite common case for loops of form
432
433 int a[5];
434 for (i=0;i<b;i++)
435 a[i]=0;
436
437 Here we prove the loop to iterate 5 times but we do not know
438 it from induction variable.
439
440 For now we handle only simple case where there is exit condition
441 just before the latch block and the latch block contains no statements
442 with side effect that may otherwise terminate the execution of loop
443 (such as by EH or by terminating the program or longjmp).
444
445 In the general case we may want to cancel the paths leading to statements
446 loop-niter identified as having undefined effect in the last iteration.
447 The other cases are hopefully rare and will be cleaned up later. */
448
f86b328b 449static edge
c790d986 450loop_edge_to_cancel (struct loop *loop)
451{
f1f41a6c 452 vec<edge> exits;
c790d986 453 unsigned i;
454 edge edge_to_cancel;
455 gimple_stmt_iterator gsi;
456
457 /* We want only one predecestor of the loop. */
458 if (EDGE_COUNT (loop->latch->preds) > 1)
459 return NULL;
460
461 exits = get_loop_exit_edges (loop);
462
f1f41a6c 463 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
c790d986 464 {
465 /* Find the other edge than the loop exit
466 leaving the conditoinal. */
467 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
468 continue;
469 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
470 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
471 else
472 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
473
248022b2 474 /* We only can handle conditionals. */
475 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
476 continue;
477
c790d986 478 /* We should never have conditionals in the loop latch. */
479 gcc_assert (edge_to_cancel->dest != loop->header);
480
481 /* Check that it leads to loop latch. */
482 if (edge_to_cancel->dest != loop->latch)
483 continue;
484
f1f41a6c 485 exits.release ();
c790d986 486
487 /* Verify that the code in loop latch does nothing that may end program
488 execution without really reaching the exit. This may include
489 non-pure/const function calls, EH statements, volatile ASMs etc. */
490 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
491 if (gimple_has_side_effects (gsi_stmt (gsi)))
492 return NULL;
493 return edge_to_cancel;
494 }
f1f41a6c 495 exits.release ();
c790d986 496 return NULL;
497}
498
72276d01 499/* Remove all tests for exits that are known to be taken after LOOP was
500 peeled NPEELED times. Put gcc_unreachable before every statement
501 known to not be executed. */
502
503static bool
504remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
505{
506 struct nb_iter_bound *elt;
507 bool changed = false;
508
509 for (elt = loop->bounds; elt; elt = elt->next)
510 {
511 /* If statement is known to be undefined after peeling, turn it
512 into unreachable (or trap when debugging experience is supposed
513 to be good). */
514 if (!elt->is_exit
796b6678 515 && wi::ltu_p (elt->bound, npeeled))
72276d01 516 {
517 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
1a91d914 518 gcall *stmt = gimple_build_call
72276d01 519 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
520
521 gimple_set_location (stmt, gimple_location (elt->stmt));
522 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
523 changed = true;
524 if (dump_file && (dump_flags & TDF_DETAILS))
525 {
526 fprintf (dump_file, "Forced statement unreachable: ");
527 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
528 }
529 }
530 /* If we know the exit will be taken after peeling, update. */
531 else if (elt->is_exit
796b6678 532 && wi::leu_p (elt->bound, npeeled))
72276d01 533 {
534 basic_block bb = gimple_bb (elt->stmt);
535 edge exit_edge = EDGE_SUCC (bb, 0);
536
537 if (dump_file && (dump_flags & TDF_DETAILS))
538 {
539 fprintf (dump_file, "Forced exit to be taken: ");
540 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
541 }
542 if (!loop_exit_edge_p (loop, exit_edge))
543 exit_edge = EDGE_SUCC (bb, 1);
544 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
1a91d914 545 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
72276d01 546 if (exit_edge->flags & EDGE_TRUE_VALUE)
1a91d914 547 gimple_cond_make_true (cond_stmt);
72276d01 548 else
1a91d914 549 gimple_cond_make_false (cond_stmt);
550 update_stmt (cond_stmt);
72276d01 551 changed = true;
552 }
553 }
554 return changed;
555}
556
557/* Remove all exits that are known to be never taken because of the loop bound
558 discovered. */
559
560static bool
561remove_redundant_iv_tests (struct loop *loop)
562{
563 struct nb_iter_bound *elt;
564 bool changed = false;
565
566 if (!loop->any_upper_bound)
567 return false;
568 for (elt = loop->bounds; elt; elt = elt->next)
569 {
570 /* Exit is pointless if it won't be taken before loop reaches
571 upper bound. */
572 if (elt->is_exit && loop->any_upper_bound
796b6678 573 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
72276d01 574 {
575 basic_block bb = gimple_bb (elt->stmt);
576 edge exit_edge = EDGE_SUCC (bb, 0);
577 struct tree_niter_desc niter;
578
579 if (!loop_exit_edge_p (loop, exit_edge))
580 exit_edge = EDGE_SUCC (bb, 1);
581
582 /* Only when we know the actual number of iterations, not
583 just a bound, we can remove the exit. */
584 if (!number_of_iterations_exit (loop, exit_edge,
3a690dce 585 &niter, false, false)
586 || !integer_onep (niter.assumptions)
72276d01 587 || !integer_zerop (niter.may_be_zero)
588 || !niter.niter
589 || TREE_CODE (niter.niter) != INTEGER_CST
cd9b5516 590 || !wi::ltu_p (loop->nb_iterations_upper_bound,
591 wi::to_widest (niter.niter)))
72276d01 592 continue;
593
594 if (dump_file && (dump_flags & TDF_DETAILS))
595 {
596 fprintf (dump_file, "Removed pointless exit: ");
597 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
598 }
1a91d914 599 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
72276d01 600 if (exit_edge->flags & EDGE_TRUE_VALUE)
1a91d914 601 gimple_cond_make_false (cond_stmt);
72276d01 602 else
1a91d914 603 gimple_cond_make_true (cond_stmt);
604 update_stmt (cond_stmt);
72276d01 605 changed = true;
606 }
607 }
608 return changed;
609}
610
611/* Stores loops that will be unlooped after we process whole loop tree. */
f1f41a6c 612static vec<loop_p> loops_to_unloop;
613static vec<int> loops_to_unloop_nunroll;
72276d01 614
615/* Cancel all fully unrolled loops by putting __builtin_unreachable
616 on the latch edge.
617 We do it after all unrolling since unlooping moves basic blocks
618 across loop boundaries trashing loop closed SSA form as well
619 as SCEV info needed to be intact during unrolling.
620
c790d986 621 IRRED_INVALIDATED is used to bookkeep if information about
622 irreducible regions may become invalid as a result
9f0ac045 623 of the transformation.
624 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
625 when we need to go into loop closed SSA form. */
bb445479 626
f86b328b 627static void
72276d01 628unloop_loops (bitmap loop_closed_ssa_invalidated,
629 bool *irred_invalidated)
630{
f1f41a6c 631 while (loops_to_unloop.length ())
72276d01 632 {
f1f41a6c 633 struct loop *loop = loops_to_unloop.pop ();
634 int n_unroll = loops_to_unloop_nunroll.pop ();
72276d01 635 basic_block latch = loop->latch;
636 edge latch_edge = loop_latch_edge (loop);
637 int flags = latch_edge->flags;
638 location_t locus = latch_edge->goto_locus;
1a91d914 639 gcall *stmt;
72276d01 640 gimple_stmt_iterator gsi;
641
642 remove_exits_and_undefined_stmts (loop, n_unroll);
643
644 /* Unloop destroys the latch edge. */
645 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
646
647 /* Create new basic block for the latch edge destination and wire
648 it in. */
649 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
650 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
651 latch_edge->probability = 0;
652 latch_edge->count = 0;
653 latch_edge->flags |= flags;
654 latch_edge->goto_locus = locus;
655
656 latch_edge->dest->loop_father = current_loops->tree_root;
657 latch_edge->dest->count = 0;
658 latch_edge->dest->frequency = 0;
659 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
660
661 gsi = gsi_start_bb (latch_edge->dest);
662 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
663 }
f1f41a6c 664 loops_to_unloop.release ();
665 loops_to_unloop_nunroll.release ();
72276d01 666}
667
668/* Tries to unroll LOOP completely, i.e. NITER times.
669 UL determines which loops we are allowed to unroll.
f55775aa 670 EXIT is the exit of the loop that should be eliminated.
72276d01 671 MAXITER specfy bound on number of iterations, -1 if it is
f55775aa 672 not known or too large for HOST_WIDE_INT. The location
673 LOCUS corresponding to the loop is used when emitting
674 a summary of the unroll to the dump file. */
72276d01 675
bb445479 676static bool
7194de72 677try_unroll_loop_completely (struct loop *loop,
bb445479 678 edge exit, tree niter,
c790d986 679 enum unroll_level ul,
f55775aa 680 HOST_WIDE_INT maxiter,
681 location_t locus)
bb445479 682{
04e3ee8a 683 unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
aa2ba534 684 struct loop_size size;
c790d986 685 bool n_unroll_found = false;
c790d986 686 edge edge_to_cancel = NULL;
c836de3f 687 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
bb445479 688
c790d986 689 /* See if we proved number of iterations to be low constant.
bb445479 690
c790d986 691 EXIT is an edge that will be removed in all but last iteration of
692 the loop.
693
694 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
695 of the unrolled sequence and is expected to make the final loop not
696 rolling.
697
698 If the number of execution of loop is determined by standard induction
699 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
700 from the iv test. */
e913b5cd 701 if (tree_fits_uhwi_p (niter))
c790d986 702 {
e913b5cd 703 n_unroll = tree_to_uhwi (niter);
c790d986 704 n_unroll_found = true;
705 edge_to_cancel = EDGE_SUCC (exit->src, 0);
706 if (edge_to_cancel == exit)
707 edge_to_cancel = EDGE_SUCC (exit->src, 1);
708 }
709 /* We do not know the number of iterations and thus we can not eliminate
710 the EXIT edge. */
711 else
52c6c602 712 exit = NULL;
c790d986 713
714 /* See if we can improve our estimate by using recorded loop bounds. */
c790d986 715 if (maxiter >= 0
716 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
717 {
718 n_unroll = maxiter;
719 n_unroll_found = true;
720 /* Loop terminates before the IV variable test, so we can not
721 remove it in the last iteration. */
722 edge_to_cancel = NULL;
723 }
724
725 if (!n_unroll_found)
bb445479 726 return false;
bb445479 727
04e3ee8a 728 if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
729 {
730 if (dump_file && (dump_flags & TDF_DETAILS))
731 fprintf (dump_file, "Not unrolling loop %d "
732 "(--param max-completely-peeled-times limit reached).\n",
733 loop->num);
734 return false;
735 }
bb445479 736
c790d986 737 if (!edge_to_cancel)
738 edge_to_cancel = loop_edge_to_cancel (loop);
739
bb445479 740 if (n_unroll)
741 {
c790d986 742 sbitmap wont_exit;
743 edge e;
744 unsigned i;
84eb345f 745 bool large;
1e094109 746 vec<edge> to_remove = vNULL;
604f7b8a 747 if (ul == UL_SINGLE_ITER)
bb445479 748 return false;
749
84eb345f 750 large = tree_estimate_loop_size
751 (loop, exit, edge_to_cancel, &size,
752 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
aa2ba534 753 ninsns = size.overall;
84eb345f 754 if (large)
755 {
756 if (dump_file && (dump_flags & TDF_DETAILS))
757 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
758 loop->num);
759 return false;
760 }
bb445479 761
aa2ba534 762 unr_insns = estimated_unrolled_size (&size, n_unroll);
d88fd237 763 if (dump_file && (dump_flags & TDF_DETAILS))
764 {
765 fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
766 fprintf (dump_file, " Estimated size after unrolling: %d\n",
767 (int) unr_insns);
768 }
769
d583c979 770 /* If the code is going to shrink, we don't need to be extra cautious
771 on guessing if the unrolling is going to be profitable. */
772 if (unr_insns
773 /* If there is IV variable that will become constant, we save
774 one instruction in the loop prologue we do not account
775 otherwise. */
776 <= ninsns + (size.constant_iv != false))
777 ;
c790d986 778 /* We unroll only inner loops, because we do not consider it profitable
779 otheriwse. We still can cancel loopback edge of not rolling loop;
780 this is always a good idea. */
d583c979 781 else if (ul == UL_NO_GROWTH)
782 {
783 if (dump_file && (dump_flags & TDF_DETAILS))
784 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
785 loop->num);
786 return false;
787 }
4cf494ec 788 /* Outer loops tend to be less interesting candidates for complete
d583c979 789 unrolling unless we can do a lot of propagation into the inner loop
790 body. For now we disable outer loop unrolling when the code would
791 grow. */
792 else if (loop->inner)
c790d986 793 {
794 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 795 fprintf (dump_file, "Not unrolling loop %d: "
c790d986 796 "it is not innermost and code would grow.\n",
797 loop->num);
798 return false;
799 }
d583c979 800 /* If there is call on a hot path through the loop, then
801 there is most probably not much to optimize. */
802 else if (size.num_non_pure_calls_on_hot_path)
f00b5e35 803 {
804 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 805 fprintf (dump_file, "Not unrolling loop %d: "
806 "contains call and code would grow.\n",
f00b5e35 807 loop->num);
808 return false;
809 }
d583c979 810 /* If there is pure/const call in the function, then we
811 can still optimize the unrolled loop body if it contains
812 some other interesting code than the calls and code
813 storing or cumulating the return value. */
814 else if (size.num_pure_calls_on_hot_path
815 /* One IV increment, one test, one ivtmp store
c31fb425 816 and one useful stmt. That is about minimal loop
d583c979 817 doing pure call. */
818 && (size.non_call_stmts_on_hot_path
819 <= 3 + size.num_pure_calls_on_hot_path))
604f7b8a 820 {
604f7b8a 821 if (dump_file && (dump_flags & TDF_DETAILS))
d583c979 822 fprintf (dump_file, "Not unrolling loop %d: "
823 "contains just pure calls and code would grow.\n",
824 loop->num);
825 return false;
826 }
827 /* Complette unrolling is major win when control flow is removed and
828 one big basic block is created. If the loop contains control flow
829 the optimization may still be a win because of eliminating the loop
830 overhead but it also may blow the branch predictor tables.
831 Limit number of branches on the hot path through the peeled
832 sequence. */
833 else if (size.num_branches_on_hot_path * (int)n_unroll
834 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
835 {
836 if (dump_file && (dump_flags & TDF_DETAILS))
837 fprintf (dump_file, "Not unrolling loop %d: "
838 " number of branches on hot path in the unrolled sequence"
839 " reach --param max-peel-branches limit.\n",
840 loop->num);
841 return false;
842 }
843 else if (unr_insns
844 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
845 {
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "Not unrolling loop %d: "
848 "(--param max-completely-peeled-insns limit reached).\n",
c790d986 849 loop->num);
d88fd237 850 return false;
604f7b8a 851 }
c836de3f 852 dump_printf_loc (report_flags, locus,
853 "loop turned into non-loop; it never loops.\n");
fb54ef7c 854
01020a5f 855 initialize_original_copy_tables ();
fb54ef7c 856 wont_exit = sbitmap_alloc (n_unroll + 1);
53c5d9d4 857 bitmap_ones (wont_exit);
08b7917c 858 bitmap_clear_bit (wont_exit, 0);
fb54ef7c 859
75a70cf9 860 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
861 n_unroll, wont_exit,
862 exit, &to_remove,
863 DLTHE_FLAG_UPDATE_FREQ
864 | DLTHE_FLAG_COMPLETTE_PEEL))
bb445479 865 {
01020a5f 866 free_original_copy_tables ();
fb54ef7c 867 free (wont_exit);
d583c979 868 if (dump_file && (dump_flags & TDF_DETAILS))
869 fprintf (dump_file, "Failed to duplicate the loop\n");
bb445479 870 return false;
871 }
40ffaada 872
f1f41a6c 873 FOR_EACH_VEC_ELT (to_remove, i, e)
40ffaada 874 {
875 bool ok = remove_path (e);
876 gcc_assert (ok);
877 }
878
f1f41a6c 879 to_remove.release ();
fb54ef7c 880 free (wont_exit);
01020a5f 881 free_original_copy_tables ();
bb445479 882 }
bb445479 883
72276d01 884
c790d986 885 /* Remove the conditional from the last copy of the loop. */
886 if (edge_to_cancel)
887 {
1a91d914 888 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
c790d986 889 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
890 gimple_cond_make_false (cond);
891 else
892 gimple_cond_make_true (cond);
893 update_stmt (cond);
894 /* Do not remove the path. Doing so may remove outer loop
895 and confuse bookkeeping code in tree_unroll_loops_completelly. */
896 }
c790d986 897
72276d01 898 /* Store the loop for later unlooping and exit removal. */
f1f41a6c 899 loops_to_unloop.safe_push (loop);
900 loops_to_unloop_nunroll.safe_push (n_unroll);
095dcfa3 901
f55775aa 902 if (dump_enabled_p ())
c790d986 903 {
904 if (!n_unroll)
f55775aa 905 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
6ee2edad 906 "loop turned into non-loop; it never loops\n");
c790d986 907 else
f55775aa 908 {
909 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
6ee2edad 910 "loop with %d iterations completely unrolled",
911 (int) (n_unroll + 1));
f55775aa 912 if (profile_info)
913 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
914 " (header execution count %d)",
915 (int)loop->header->count);
916 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
917 }
918 }
919
920 if (dump_file && (dump_flags & TDF_DETAILS))
921 {
c790d986 922 if (exit)
923 fprintf (dump_file, "Exit condition of peeled iterations was "
924 "eliminated.\n");
925 if (edge_to_cancel)
926 fprintf (dump_file, "Last iteration exit edge was proved true.\n");
927 else
928 fprintf (dump_file, "Latch of last iteration was marked by "
929 "__builtin_unreachable ().\n");
930 }
bb445479 931
932 return true;
933}
934
c836de3f 935/* Return number of instructions after peeling. */
936static unsigned HOST_WIDE_INT
937estimated_peeled_sequence_size (struct loop_size *size,
938 unsigned HOST_WIDE_INT npeel)
939{
940 return MAX (npeel * (HOST_WIDE_INT) (size->overall
941 - size->eliminated_by_peeling), 1);
942}
943
944/* If the loop is expected to iterate N times and is
945 small enough, duplicate the loop body N+1 times before
946 the loop itself. This way the hot path will never
947 enter the loop.
948 Parameters are the same as for try_unroll_loops_completely */
949
950static bool
951try_peel_loop (struct loop *loop,
952 edge exit, tree niter,
953 HOST_WIDE_INT maxiter)
954{
955 int npeel;
956 struct loop_size size;
957 int peeled_size;
958 sbitmap wont_exit;
959 unsigned i;
960 vec<edge> to_remove = vNULL;
961 edge e;
962
963 /* If the iteration bound is known and large, then we can safely eliminate
964 the check in peeled copies. */
965 if (TREE_CODE (niter) != INTEGER_CST)
966 exit = NULL;
967
968 if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
969 return false;
970
971 /* Peel only innermost loops. */
972 if (loop->inner)
973 {
974 if (dump_file)
975 fprintf (dump_file, "Not peeling: outer loop\n");
976 return false;
977 }
978
979 if (!optimize_loop_for_speed_p (loop))
980 {
981 if (dump_file)
982 fprintf (dump_file, "Not peeling: cold loop\n");
983 return false;
984 }
985
986 /* Check if there is an estimate on the number of iterations. */
987 npeel = estimated_loop_iterations_int (loop);
988 if (npeel < 0)
989 {
990 if (dump_file)
991 fprintf (dump_file, "Not peeling: number of iterations is not "
992 "estimated\n");
993 return false;
994 }
995 if (maxiter >= 0 && maxiter <= npeel)
996 {
997 if (dump_file)
998 fprintf (dump_file, "Not peeling: upper bound is known so can "
4cf494ec 999 "unroll completely\n");
c836de3f 1000 return false;
1001 }
1002
1003 /* We want to peel estimated number of iterations + 1 (so we never
1004 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1005 and be sure to avoid overflows. */
1006 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1007 {
1008 if (dump_file)
1009 fprintf (dump_file, "Not peeling: rolls too much "
1010 "(%i + 1 > --param max-peel-times)\n", npeel);
1011 return false;
1012 }
1013 npeel++;
1014
1015 /* Check peeled loops size. */
1016 tree_estimate_loop_size (loop, exit, NULL, &size,
1017 PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1018 if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
1019 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1020 {
1021 if (dump_file)
1022 fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1023 "(%i insns > --param max-peel-insns)", peeled_size);
1024 return false;
1025 }
1026
1027 /* Duplicate possibly eliminating the exits. */
1028 initialize_original_copy_tables ();
1029 wont_exit = sbitmap_alloc (npeel + 1);
1030 bitmap_ones (wont_exit);
1031 bitmap_clear_bit (wont_exit, 0);
1032 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1033 npeel, wont_exit,
1034 exit, &to_remove,
1035 DLTHE_FLAG_UPDATE_FREQ
1036 | DLTHE_FLAG_COMPLETTE_PEEL))
1037 {
1038 free_original_copy_tables ();
1039 free (wont_exit);
1040 return false;
1041 }
1042 FOR_EACH_VEC_ELT (to_remove, i, e)
1043 {
1044 bool ok = remove_path (e);
1045 gcc_assert (ok);
1046 }
1047 free (wont_exit);
1048 free_original_copy_tables ();
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 {
1051 fprintf (dump_file, "Peeled loop %d, %i times.\n",
1052 loop->num, npeel);
1053 }
1054 if (loop->any_upper_bound)
1055 loop->nb_iterations_upper_bound -= npeel;
1056 loop->nb_iterations_estimate = 0;
1057 /* Make sure to mark loop cold so we do not try to peel it more. */
1058 scale_loop_profile (loop, 1, 0);
1059 loop->header->count = 0;
1060 return true;
1061}
7194de72 1062/* Adds a canonical induction variable to LOOP if suitable.
48e1416a 1063 CREATE_IV is true if we may create a new iv. UL determines
604f7b8a 1064 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
48e1416a 1065 to determine the number of iterations of a loop by direct evaluation.
72276d01 1066 Returns true if cfg is changed. */
bb445479 1067
1068static bool
7194de72 1069canonicalize_loop_induction_variables (struct loop *loop,
604f7b8a 1070 bool create_iv, enum unroll_level ul,
72276d01 1071 bool try_eval)
bb445479 1072{
1073 edge exit = NULL;
1074 tree niter;
72276d01 1075 HOST_WIDE_INT maxiter;
1076 bool modified = false;
f55775aa 1077 location_t locus = UNKNOWN_LOCATION;
bb445479 1078
0c3c2e56 1079 niter = number_of_latch_executions (loop);
f55775aa 1080 exit = single_exit (loop);
bb445479 1081 if (TREE_CODE (niter) == INTEGER_CST)
f55775aa 1082 locus = gimple_location (last_stmt (exit->src));
b091dc59 1083 else
1084 {
1085 /* If the loop has more than one exit, try checking all of them
1086 for # of iterations determinable through scev. */
f55775aa 1087 if (!exit)
b091dc59 1088 niter = find_loop_niter (loop, &exit);
1089
1090 /* Finally if everything else fails, try brute force evaluation. */
1091 if (try_eval
1092 && (chrec_contains_undetermined (niter)
1093 || TREE_CODE (niter) != INTEGER_CST))
1094 niter = find_loop_niter_by_eval (loop, &exit);
1095
f55775aa 1096 if (exit)
1097 locus = gimple_location (last_stmt (exit->src));
1098
c790d986 1099 if (TREE_CODE (niter) != INTEGER_CST)
1100 exit = NULL;
b091dc59 1101 }
bb445479 1102
c790d986 1103 /* We work exceptionally hard here to estimate the bound
1104 by find_loop_niter_by_eval. Be sure to keep it for future. */
1105 if (niter && TREE_CODE (niter) == INTEGER_CST)
57337fec 1106 {
5de9d3ed 1107 record_niter_bound (loop, wi::to_widest (niter),
57337fec 1108 exit == single_likely_exit (loop), true);
1109 }
c790d986 1110
72276d01 1111 /* Force re-computation of loop bounds so we can remove redundant exits. */
1112 maxiter = max_loop_iterations_int (loop);
1113
c790d986 1114 if (dump_file && (dump_flags & TDF_DETAILS)
1115 && TREE_CODE (niter) == INTEGER_CST)
bb445479 1116 {
1117 fprintf (dump_file, "Loop %d iterates ", loop->num);
1118 print_generic_expr (dump_file, niter, TDF_SLIM);
1119 fprintf (dump_file, " times.\n");
1120 }
c790d986 1121 if (dump_file && (dump_flags & TDF_DETAILS)
72276d01 1122 && maxiter >= 0)
c790d986 1123 {
1124 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
72276d01 1125 (int)maxiter);
c790d986 1126 }
bb445479 1127
72276d01 1128 /* Remove exits that are known to be never taken based on loop bound.
1129 Needs to be called after compilation of max_loop_iterations_int that
1130 populates the loop bounds. */
1131 modified |= remove_redundant_iv_tests (loop);
1132
f55775aa 1133 if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
bb445479 1134 return true;
1135
c790d986 1136 if (create_iv
57337fec 1137 && niter && !chrec_contains_undetermined (niter)
1138 && exit && just_once_each_iteration_p (loop, exit->src))
bb445479 1139 create_canonical_iv (loop, exit, niter);
1140
c836de3f 1141 if (ul == UL_ALL)
1142 modified |= try_peel_loop (loop, exit, niter, maxiter);
1143
72276d01 1144 return modified;
bb445479 1145}
1146
1147/* The main entry point of the pass. Adds canonical induction variables
7194de72 1148 to the suitable loops. */
bb445479 1149
4c641bf8 1150unsigned int
7194de72 1151canonicalize_induction_variables (void)
bb445479 1152{
bb445479 1153 struct loop *loop;
053fdd99 1154 bool changed = false;
c790d986 1155 bool irred_invalidated = false;
9f0ac045 1156 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
48e1416a 1157
72276d01 1158 free_numbers_of_iterations_estimates ();
1159 estimate_numbers_of_iterations ();
1160
f21d4d00 1161 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
bb445479 1162 {
17519ba0 1163 changed |= canonicalize_loop_induction_variables (loop,
1164 true, UL_SINGLE_ITER,
72276d01 1165 true);
bb445479 1166 }
ea1c5c31 1167 gcc_assert (!need_ssa_update_p (cfun));
bb445479 1168
72276d01 1169 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
c790d986 1170 if (irred_invalidated
1171 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1172 mark_irreducible_loops ();
1173
08162157 1174 /* Clean up the information about numbers of iterations, since brute force
1175 evaluation could reveal new information. */
1176 scev_reset ();
1177
9f0ac045 1178 if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1179 {
1180 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1181 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1182 }
1183 BITMAP_FREE (loop_closed_ssa_invalidated);
1184
bb445479 1185 if (changed)
4c641bf8 1186 return TODO_cleanup_cfg;
1187 return 0;
bb445479 1188}
1189
2ebfc881 1190/* Propagate VAL into all uses of SSA_NAME. */
1191
1192static void
1193propagate_into_all_uses (tree ssa_name, tree val)
1194{
1195 imm_use_iterator iter;
1196 gimple use_stmt;
1197
1198 FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name)
1199 {
1200 gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt);
1201 use_operand_p use;
1202
1203 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1204 SET_USE (use, val);
1205
1206 if (is_gimple_assign (use_stmt)
1207 && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt))
1208 == GIMPLE_SINGLE_RHS)
1209 {
1210 tree rhs = gimple_assign_rhs1 (use_stmt);
1211
1212 if (TREE_CODE (rhs) == ADDR_EXPR)
1213 recompute_tree_invariant_for_addr_expr (rhs);
1214 }
1215
1216 fold_stmt_inplace (&use_stmt_gsi);
1217 update_stmt (use_stmt);
f4ea772b 1218 maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt);
2ebfc881 1219 }
1220}
1221
1222/* Propagate constant SSA_NAMEs defined in basic block BB. */
1223
1224static void
1225propagate_constants_for_unrolling (basic_block bb)
1226{
2ebfc881 1227 /* Look for degenerate PHI nodes with constant argument. */
1a91d914 1228 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
2ebfc881 1229 {
1a91d914 1230 gphi *phi = gsi.phi ();
2ebfc881 1231 tree result = gimple_phi_result (phi);
1232 tree arg = gimple_phi_arg_def (phi, 0);
1233
1234 if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST)
1235 {
1236 propagate_into_all_uses (result, arg);
1237 gsi_remove (&gsi, true);
1238 release_ssa_name (result);
1239 }
1240 else
1241 gsi_next (&gsi);
1242 }
1243
1244 /* Look for assignments to SSA names with constant RHS. */
1a91d914 1245 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2ebfc881 1246 {
1247 gimple stmt = gsi_stmt (gsi);
1248 tree lhs;
1249
1250 if (is_gimple_assign (stmt)
fca2aa67 1251 && gimple_assign_rhs_code (stmt) == INTEGER_CST
2ebfc881 1252 && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
fca2aa67 1253 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2ebfc881 1254 {
1255 propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt));
1256 gsi_remove (&gsi, true);
1257 release_ssa_name (lhs);
1258 }
1259 else
1260 gsi_next (&gsi);
1261 }
1262}
1263
042301ef 1264/* Process loops from innermost to outer, stopping at the innermost
1265 loop we unrolled. */
1266
1267static bool
1268tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
d70aebca 1269 vec<loop_p, va_heap>& father_stack,
042301ef 1270 struct loop *loop)
1271{
1272 struct loop *loop_father;
1273 bool changed = false;
1274 struct loop *inner;
1275 enum unroll_level ul;
1276
1277 /* Process inner loops first. */
1278 for (inner = loop->inner; inner != NULL; inner = inner->next)
1279 changed |= tree_unroll_loops_completely_1 (may_increase_size,
1280 unroll_outer, father_stack,
1281 inner);
1282
1283 /* If we changed an inner loop we cannot process outer loops in this
1284 iteration because SSA form is not up-to-date. Continue with
1285 siblings of outer loops instead. */
1286 if (changed)
1287 return true;
1288
3d483a94 1289 /* Don't unroll #pragma omp simd loops until the vectorizer
1290 attempts to vectorize those. */
4c73695b 1291 if (loop->force_vectorize)
3d483a94 1292 return false;
1293
042301ef 1294 /* Try to unroll this loop. */
1295 loop_father = loop_outer (loop);
1296 if (!loop_father)
1297 return false;
1298
1299 if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1300 /* Unroll outermost loops only if asked to do so or they do
1301 not cause code growth. */
1302 && (unroll_outer || loop_outer (loop_father)))
1303 ul = UL_ALL;
1304 else
1305 ul = UL_NO_GROWTH;
1306
1307 if (canonicalize_loop_induction_variables
1308 (loop, false, ul, !flag_tree_loop_ivcanon))
1309 {
1310 /* If we'll continue unrolling, we need to propagate constants
1311 within the new basic blocks to fold away induction variable
1312 computations; otherwise, the size might blow up before the
1313 iteration is complete and the IR eventually cleaned up. */
1314 if (loop_outer (loop_father) && !loop_father->aux)
1315 {
1316 father_stack.safe_push (loop_father);
1317 loop_father->aux = loop_father;
1318 }
1319
1320 return true;
1321 }
1322
1323 return false;
1324}
1325
604f7b8a 1326/* Unroll LOOPS completely if they iterate just few times. Unless
1327 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1328 size of the code does not increase. */
bb445479 1329
4c641bf8 1330unsigned int
d88fd237 1331tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
bb445479 1332{
4997014d 1333 auto_vec<loop_p, 16> father_stack;
d88fd237 1334 bool changed;
793a0ab5 1335 int iteration = 0;
9f0ac045 1336 bool irred_invalidated = false;
bb445479 1337
d88fd237 1338 do
bb445479 1339 {
d88fd237 1340 changed = false;
9f0ac045 1341 bitmap loop_closed_ssa_invalidated = NULL;
1342
1343 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1344 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
bb445479 1345
72276d01 1346 free_numbers_of_iterations_estimates ();
1347 estimate_numbers_of_iterations ();
1348
042301ef 1349 changed = tree_unroll_loops_completely_1 (may_increase_size,
1350 unroll_outer, father_stack,
1351 current_loops->tree_root);
d88fd237 1352 if (changed)
1353 {
2ebfc881 1354 struct loop **iter;
1355 unsigned i;
1356
72276d01 1357 /* Be sure to skip unlooped loops while procesing father_stack
1358 array. */
f1f41a6c 1359 FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
72276d01 1360 (*iter)->aux = NULL;
f1f41a6c 1361 FOR_EACH_VEC_ELT (father_stack, i, iter)
72276d01 1362 if (!(*iter)->aux)
1363 *iter = NULL;
1364 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
c790d986 1365
72276d01 1366 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
9f0ac045 1367 if (loop_closed_ssa_invalidated
1368 && !bitmap_empty_p (loop_closed_ssa_invalidated))
1369 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1370 TODO_update_ssa);
1371 else
1372 update_ssa (TODO_update_ssa);
ea1c5c31 1373
2ebfc881 1374 /* Propagate the constants within the new basic blocks. */
f1f41a6c 1375 FOR_EACH_VEC_ELT (father_stack, i, iter)
72276d01 1376 if (*iter)
1377 {
1378 unsigned j;
1379 basic_block *body = get_loop_body_in_dom_order (*iter);
1380 for (j = 0; j < (*iter)->num_nodes; j++)
1381 propagate_constants_for_unrolling (body[j]);
1382 free (body);
1383 (*iter)->aux = NULL;
1384 }
f1f41a6c 1385 father_stack.truncate (0);
2ebfc881 1386
d88fd237 1387 /* This will take care of removing completely unrolled loops
1388 from the loop structures so we can continue unrolling now
1389 innermost loops. */
b2a225ba 1390 if (cleanup_tree_cfg ())
1391 update_ssa (TODO_update_ssa_only_virtuals);
d88fd237 1392
1393 /* Clean up the information about numbers of iterations, since
1394 complete unrolling might have invalidated it. */
1395 scev_reset ();
9f0ac045 1396#ifdef ENABLE_CHECKING
1397 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1398 verify_loop_closed_ssa (true);
1399#endif
d88fd237 1400 }
9f0ac045 1401 if (loop_closed_ssa_invalidated)
1402 BITMAP_FREE (loop_closed_ssa_invalidated);
d88fd237 1403 }
793a0ab5 1404 while (changed
1405 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
08162157 1406
f1f41a6c 1407 father_stack.release ();
2ebfc881 1408
9f0ac045 1409 if (irred_invalidated
1410 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1411 mark_irreducible_loops ();
1412
4c641bf8 1413 return 0;
bb445479 1414}
f86b328b 1415
1416/* Canonical induction variable creation pass. */
1417
f86b328b 1418namespace {
1419
1420const pass_data pass_data_iv_canon =
1421{
1422 GIMPLE_PASS, /* type */
1423 "ivcanon", /* name */
1424 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1425 TV_TREE_LOOP_IVCANON, /* tv_id */
1426 ( PROP_cfg | PROP_ssa ), /* properties_required */
1427 0, /* properties_provided */
1428 0, /* properties_destroyed */
1429 0, /* todo_flags_start */
1430 0, /* todo_flags_finish */
1431};
1432
1433class pass_iv_canon : public gimple_opt_pass
1434{
1435public:
1436 pass_iv_canon (gcc::context *ctxt)
1437 : gimple_opt_pass (pass_data_iv_canon, ctxt)
1438 {}
1439
1440 /* opt_pass methods: */
31315c24 1441 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
65b0537f 1442 virtual unsigned int execute (function *fun);
f86b328b 1443
1444}; // class pass_iv_canon
1445
65b0537f 1446unsigned int
1447pass_iv_canon::execute (function *fun)
1448{
1449 if (number_of_loops (fun) <= 1)
1450 return 0;
1451
1452 return canonicalize_induction_variables ();
1453}
1454
f86b328b 1455} // anon namespace
1456
1457gimple_opt_pass *
1458make_pass_iv_canon (gcc::context *ctxt)
1459{
1460 return new pass_iv_canon (ctxt);
1461}
1462
1463/* Complete unrolling of loops. */
1464
f86b328b 1465namespace {
1466
1467const pass_data pass_data_complete_unroll =
1468{
1469 GIMPLE_PASS, /* type */
1470 "cunroll", /* name */
1471 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1472 TV_COMPLETE_UNROLL, /* tv_id */
1473 ( PROP_cfg | PROP_ssa ), /* properties_required */
1474 0, /* properties_provided */
1475 0, /* properties_destroyed */
1476 0, /* todo_flags_start */
1477 0, /* todo_flags_finish */
1478};
1479
1480class pass_complete_unroll : public gimple_opt_pass
1481{
1482public:
1483 pass_complete_unroll (gcc::context *ctxt)
1484 : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1485 {}
1486
1487 /* opt_pass methods: */
65b0537f 1488 virtual unsigned int execute (function *);
f86b328b 1489
1490}; // class pass_complete_unroll
1491
65b0537f 1492unsigned int
1493pass_complete_unroll::execute (function *fun)
1494{
1495 if (number_of_loops (fun) <= 1)
1496 return 0;
1497
1498 return tree_unroll_loops_completely (flag_unroll_loops
1499 || flag_peel_loops
1500 || optimize >= 3, true);
1501}
1502
f86b328b 1503} // anon namespace
1504
1505gimple_opt_pass *
1506make_pass_complete_unroll (gcc::context *ctxt)
1507{
1508 return new pass_complete_unroll (ctxt);
1509}
1510
1511/* Complete unrolling of inner loops. */
1512
f86b328b 1513namespace {
1514
1515const pass_data pass_data_complete_unrolli =
1516{
1517 GIMPLE_PASS, /* type */
1518 "cunrolli", /* name */
1519 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1520 TV_COMPLETE_UNROLL, /* tv_id */
1521 ( PROP_cfg | PROP_ssa ), /* properties_required */
1522 0, /* properties_provided */
1523 0, /* properties_destroyed */
1524 0, /* todo_flags_start */
8b88439e 1525 0, /* todo_flags_finish */
f86b328b 1526};
1527
1528class pass_complete_unrolli : public gimple_opt_pass
1529{
1530public:
1531 pass_complete_unrolli (gcc::context *ctxt)
1532 : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1533 {}
1534
1535 /* opt_pass methods: */
31315c24 1536 virtual bool gate (function *) { return optimize >= 2; }
65b0537f 1537 virtual unsigned int execute (function *);
f86b328b 1538
1539}; // class pass_complete_unrolli
1540
65b0537f 1541unsigned int
1542pass_complete_unrolli::execute (function *fun)
1543{
1544 unsigned ret = 0;
1545
1546 loop_optimizer_init (LOOPS_NORMAL
1547 | LOOPS_HAVE_RECORDED_EXITS);
1548 if (number_of_loops (fun) > 1)
1549 {
1550 scev_initialize ();
1551 ret = tree_unroll_loops_completely (optimize >= 3, false);
1552 free_numbers_of_iterations_estimates ();
1553 scev_finalize ();
1554 }
1555 loop_optimizer_finalize ();
1556
1557 return ret;
1558}
1559
f86b328b 1560} // anon namespace
1561
1562gimple_opt_pass *
1563make_pass_complete_unrolli (gcc::context *ctxt)
1564{
1565 return new pass_complete_unrolli (ctxt);
1566}
1567
1568