]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivcanon.c
2019-06-19 Steven G. Kargl <kargl@gcc.gnu.org>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
CommitLineData
84eb345f 1/* Induction variable canonicalization and loop peeling.
fbd26352 2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
48e1416a 3
bb445479 4This file is part of GCC.
48e1416a 5
bb445479 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
bb445479 9later version.
48e1416a 10
bb445479 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
bb445479 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bb445479 19
20/* This pass detects the loops that iterate a constant number of times,
48e1416a 21 adds a canonical induction variable (step -1, tested against 0)
bb445479 22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
24
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
29 to pay up.
30
c836de3f 31 We also perform
4cf494ec 32 - complete unrolling (or peeling) when the loops is rolling few enough
c836de3f 33 times
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
36 info). */
bb445479 37
38#include "config.h"
39#include "system.h"
40#include "coretypes.h"
9ef16211 41#include "backend.h"
bb445479 42#include "tree.h"
9ef16211 43#include "gimple.h"
7c29e30e 44#include "cfghooks.h"
45#include "tree-pass.h"
9ef16211 46#include "ssa.h"
7c29e30e 47#include "cgraph.h"
48#include "gimple-pretty-print.h"
b20a8bb4 49#include "fold-const.h"
886c1262 50#include "profile.h"
bc61cadb 51#include "gimple-fold.h"
52#include "tree-eh.h"
dcf1a1ec 53#include "gimple-iterator.h"
073c1fd5 54#include "tree-cfg.h"
05d9c18a 55#include "tree-ssa-loop-manip.h"
56#include "tree-ssa-loop-niter.h"
073c1fd5 57#include "tree-ssa-loop.h"
58#include "tree-into-ssa.h"
bb445479 59#include "cfgloop.h"
bb445479 60#include "tree-chrec.h"
61#include "tree-scalar-evolution.h"
62#include "params.h"
bb445479 63#include "tree-inline.h"
424a4a92 64#include "tree-cfgcleanup.h"
f7715905 65#include "builtins.h"
51e85e64 66#include "tree-ssa-sccvn.h"
bb445479 67
604f7b8a 68/* Specifies types of loops that may be unrolled. */
69
70enum unroll_level
71{
6414dd4c 72 UL_SINGLE_ITER, /* Only loops that exit immediately in the first
604f7b8a 73 iteration. */
74 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
75 of code size. */
76 UL_ALL /* All suitable loops. */
77};
78
bb445479 79/* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
5051abaf 80 is the exit edge whose condition is replaced. The ssa versions of the new
81 IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
82 if they are not NULL. */
bb445479 83
5051abaf 84void
85create_canonical_iv (struct loop *loop, edge exit, tree niter,
86 tree *var_before = NULL, tree *var_after = NULL)
bb445479 87{
88 edge in;
75a70cf9 89 tree type, var;
1a91d914 90 gcond *cond;
75a70cf9 91 gimple_stmt_iterator incr_at;
bb445479 92 enum tree_code cmp;
93
94 if (dump_file && (dump_flags & TDF_DETAILS))
95 {
96 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
97 print_generic_expr (dump_file, niter, TDF_SLIM);
98 fprintf (dump_file, " iterations.\n");
99 }
100
1a91d914 101 cond = as_a <gcond *> (last_stmt (exit->src));
cd665a06 102 in = EDGE_SUCC (exit->src, 0);
bb445479 103 if (in == exit)
cd665a06 104 in = EDGE_SUCC (exit->src, 1);
bb445479 105
106 /* Note that we do not need to worry about overflows, since
107 type of niter is always unsigned and all comparisons are
108 just for equality/nonequality -- i.e. everything works
109 with a modulo arithmetics. */
110
111 type = TREE_TYPE (niter);
49d00087 112 niter = fold_build2 (PLUS_EXPR, type,
113 niter,
114 build_int_cst (type, 1));
75a70cf9 115 incr_at = gsi_last_bb (in->src);
bb445479 116 create_iv (niter,
3c6185f1 117 build_int_cst (type, -1),
bb445479 118 NULL_TREE, loop,
5051abaf 119 &incr_at, false, var_before, &var);
120 if (var_after)
121 *var_after = var;
bb445479 122
123 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
75a70cf9 124 gimple_cond_set_code (cond, cmp);
125 gimple_cond_set_lhs (cond, var);
126 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
22aa74c4 127 update_stmt (cond);
bb445479 128}
129
aa2ba534 130/* Describe size of loop as detected by tree_estimate_loop_size. */
131struct loop_size
132{
133 /* Number of instructions in the loop. */
134 int overall;
135
136 /* Number of instructions that will be likely optimized out in
137 peeled iterations of loop (i.e. computation based on induction
138 variable where induction variable starts at known constant.) */
139 int eliminated_by_peeling;
140
141 /* Same statistics for last iteration of loop: it is smaller because
142 instructions after exit are not executed. */
143 int last_iteration;
144 int last_iteration_eliminated_by_peeling;
d583c979 145
146 /* If some IV computation will become constant. */
147 bool constant_iv;
148
149 /* Number of call stmts that are not a builtin and are pure or const
150 present on the hot path. */
151 int num_pure_calls_on_hot_path;
152 /* Number of call stmts that are not a builtin and are not pure nor const
153 present on the hot path. */
154 int num_non_pure_calls_on_hot_path;
155 /* Number of statements other than calls in the loop. */
156 int non_call_stmts_on_hot_path;
157 /* Number of branches seen on the hot path. */
158 int num_branches_on_hot_path;
aa2ba534 159};
160
161/* Return true if OP in STMT will be constant after peeling LOOP. */
162
163static bool
42acab1c 164constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
aa2ba534 165{
aa2ba534 166 if (is_gimple_min_invariant (op))
167 return true;
48e1416a 168
aa2ba534 169 /* We can still fold accesses to constant arrays when index is known. */
170 if (TREE_CODE (op) != SSA_NAME)
171 {
172 tree base = op;
173
174 /* First make fast look if we see constant array inside. */
175 while (handled_component_p (base))
176 base = TREE_OPERAND (base, 0);
248022b2 177 if ((DECL_P (base)
df8d3e89 178 && ctor_for_folding (base) != error_mark_node)
aa2ba534 179 || CONSTANT_CLASS_P (base))
180 {
181 /* If so, see if we understand all the indices. */
182 base = op;
183 while (handled_component_p (base))
184 {
185 if (TREE_CODE (base) == ARRAY_REF
186 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
187 return false;
188 base = TREE_OPERAND (base, 0);
189 }
190 return true;
191 }
192 return false;
193 }
194
c39eea14 195 /* Induction variables are constants when defined in loop. */
196 if (loop_containing_stmt (stmt) != loop)
aa2ba534 197 return false;
c39eea14 198 tree ev = analyze_scalar_evolution (loop, op);
199 if (chrec_contains_undetermined (ev)
200 || chrec_contains_symbols (ev))
aa2ba534 201 return false;
202 return true;
203}
204
d583c979 205/* Computes an estimated number of insns in LOOP.
206 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
207 iteration of the loop.
208 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
209 of loop.
84eb345f 210 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
04437ab6 211 Stop estimating after UPPER_BOUND is met. Return true in this case. */
aa2ba534 212
84eb345f 213static bool
f18de397 214tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
215 struct loop_size *size, int upper_bound)
aa2ba534 216{
217 basic_block *body = get_loop_body (loop);
218 gimple_stmt_iterator gsi;
219 unsigned int i;
220 bool after_exit;
f1f41a6c 221 vec<basic_block> path = get_loop_hot_path (loop);
aa2ba534 222
223 size->overall = 0;
224 size->eliminated_by_peeling = 0;
225 size->last_iteration = 0;
226 size->last_iteration_eliminated_by_peeling = 0;
d583c979 227 size->num_pure_calls_on_hot_path = 0;
228 size->num_non_pure_calls_on_hot_path = 0;
229 size->non_call_stmts_on_hot_path = 0;
230 size->num_branches_on_hot_path = 0;
231 size->constant_iv = 0;
aa2ba534 232
233 if (dump_file && (dump_flags & TDF_DETAILS))
234 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
235 for (i = 0; i < loop->num_nodes; i++)
236 {
c790d986 237 if (edge_to_cancel && body[i] != edge_to_cancel->src
238 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
aa2ba534 239 after_exit = true;
240 else
241 after_exit = false;
242 if (dump_file && (dump_flags & TDF_DETAILS))
f18de397 243 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
244 after_exit);
aa2ba534 245
246 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
247 {
42acab1c 248 gimple *stmt = gsi_stmt (gsi);
aa2ba534 249 int num = estimate_num_insns (stmt, &eni_size_weights);
250 bool likely_eliminated = false;
d583c979 251 bool likely_eliminated_last = false;
252 bool likely_eliminated_peeled = false;
aa2ba534 253
254 if (dump_file && (dump_flags & TDF_DETAILS))
255 {
256 fprintf (dump_file, " size: %3i ", num);
1ffa4346 257 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
aa2ba534 258 }
259
260 /* Look for reasons why we might optimize this stmt away. */
261
8c1879bc 262 if (!gimple_has_side_effects (stmt))
aa2ba534 263 {
8c1879bc 264 /* Exit conditional. */
265 if (exit && body[i] == exit->src
266 && stmt == last_stmt (exit->src))
267 {
268 if (dump_file && (dump_flags & TDF_DETAILS))
269 fprintf (dump_file, " Exit condition will be eliminated "
270 "in peeled copies.\n");
271 likely_eliminated_peeled = true;
272 }
273 if (edge_to_cancel && body[i] == edge_to_cancel->src
274 && stmt == last_stmt (edge_to_cancel->src))
275 {
276 if (dump_file && (dump_flags & TDF_DETAILS))
277 fprintf (dump_file, " Exit condition will be eliminated "
278 "in last copy.\n");
279 likely_eliminated_last = true;
280 }
281 /* Sets of IV variables */
282 if (gimple_code (stmt) == GIMPLE_ASSIGN
283 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
284 {
285 if (dump_file && (dump_flags & TDF_DETAILS))
286 fprintf (dump_file, " Induction variable computation will"
287 " be folded away.\n");
288 likely_eliminated = true;
289 }
290 /* Assignments of IV variables. */
291 else if (gimple_code (stmt) == GIMPLE_ASSIGN
292 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
293 && constant_after_peeling (gimple_assign_rhs1 (stmt),
93505d22 294 stmt, loop)
8c1879bc 295 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
296 || constant_after_peeling (gimple_assign_rhs2 (stmt),
297 stmt, loop)))
298 {
299 size->constant_iv = true;
300 if (dump_file && (dump_flags & TDF_DETAILS))
301 fprintf (dump_file,
302 " Constant expression will be folded away.\n");
303 likely_eliminated = true;
304 }
305 /* Conditionals. */
306 else if ((gimple_code (stmt) == GIMPLE_COND
307 && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
308 loop)
309 && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
310 loop)
311 /* We don't simplify all constant compares so make sure
312 they are not both constant already. See PR70288. */
313 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
314 || ! is_gimple_min_invariant
315 (gimple_cond_rhs (stmt))))
316 || (gimple_code (stmt) == GIMPLE_SWITCH
317 && constant_after_peeling (gimple_switch_index (
318 as_a <gswitch *>
319 (stmt)),
320 stmt, loop)
321 && ! is_gimple_min_invariant
322 (gimple_switch_index
323 (as_a <gswitch *> (stmt)))))
324 {
325 if (dump_file && (dump_flags & TDF_DETAILS))
326 fprintf (dump_file, " Constant conditional.\n");
327 likely_eliminated = true;
328 }
aa2ba534 329 }
330
331 size->overall += num;
d583c979 332 if (likely_eliminated || likely_eliminated_peeled)
aa2ba534 333 size->eliminated_by_peeling += num;
334 if (!after_exit)
335 {
336 size->last_iteration += num;
d583c979 337 if (likely_eliminated || likely_eliminated_last)
aa2ba534 338 size->last_iteration_eliminated_by_peeling += num;
339 }
84eb345f 340 if ((size->overall * 3 / 2 - size->eliminated_by_peeling
341 - size->last_iteration_eliminated_by_peeling) > upper_bound)
342 {
343 free (body);
04437ab6 344 path.release ();
84eb345f 345 return true;
346 }
aa2ba534 347 }
348 }
f1f41a6c 349 while (path.length ())
d583c979 350 {
f1f41a6c 351 basic_block bb = path.pop ();
d583c979 352 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
353 {
42acab1c 354 gimple *stmt = gsi_stmt (gsi);
f18de397 355 if (gimple_code (stmt) == GIMPLE_CALL
356 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
d583c979 357 {
358 int flags = gimple_call_flags (stmt);
f18de397 359 if (flags & (ECF_PURE | ECF_CONST))
d583c979 360 size->num_pure_calls_on_hot_path++;
361 else
362 size->num_non_pure_calls_on_hot_path++;
363 size->num_branches_on_hot_path ++;
364 }
f18de397 365 /* Count inexpensive calls as non-calls, because they will likely
366 expand inline. */
367 else if (gimple_code (stmt) != GIMPLE_DEBUG)
d583c979 368 size->non_call_stmts_on_hot_path++;
369 if (((gimple_code (stmt) == GIMPLE_COND
370 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
fffa8e82 371 || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
372 loop)))
d583c979 373 || (gimple_code (stmt) == GIMPLE_SWITCH
1a91d914 374 && !constant_after_peeling (gimple_switch_index (
375 as_a <gswitch *> (stmt)),
376 stmt, loop)))
d583c979 377 && (!exit || bb != exit->src))
378 size->num_branches_on_hot_path++;
379 }
380 }
f1f41a6c 381 path.release ();
aa2ba534 382 if (dump_file && (dump_flags & TDF_DETAILS))
383 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
384 size->eliminated_by_peeling, size->last_iteration,
385 size->last_iteration_eliminated_by_peeling);
48e1416a 386
aa2ba534 387 free (body);
84eb345f 388 return false;
aa2ba534 389}
604f7b8a 390
aa2ba534 391/* Estimate number of insns of completely unrolled loop.
392 It is (NUNROLL + 1) * size of loop body with taking into account
393 the fact that in last copy everything after exit conditional
394 is dead and that some instructions will be eliminated after
395 peeling.
604f7b8a 396
c31fb425 397 Loop body is likely going to simplify further, this is difficult
aa2ba534 398 to guess, we just decrease the result by 1/3. */
604f7b8a 399
400static unsigned HOST_WIDE_INT
aa2ba534 401estimated_unrolled_size (struct loop_size *size,
604f7b8a 402 unsigned HOST_WIDE_INT nunroll)
403{
aa2ba534 404 HOST_WIDE_INT unr_insns = ((nunroll)
405 * (HOST_WIDE_INT) (size->overall
406 - size->eliminated_by_peeling));
407 if (!nunroll)
408 unr_insns = 0;
409 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
410
411 unr_insns = unr_insns * 2 / 3;
604f7b8a 412 if (unr_insns <= 0)
413 unr_insns = 1;
604f7b8a 414
415 return unr_insns;
416}
417
c790d986 418/* Loop LOOP is known to not loop. See if there is an edge in the loop
419 body that can be remove to make the loop to always exit and at
420 the same time it does not make any code potentially executed
421 during the last iteration dead.
422
4cf494ec 423 After complete unrolling we still may get rid of the conditional
c790d986 424 on the exit in the last copy even if we have no idea what it does.
425 This is quite common case for loops of form
426
427 int a[5];
428 for (i=0;i<b;i++)
429 a[i]=0;
430
431 Here we prove the loop to iterate 5 times but we do not know
432 it from induction variable.
433
434 For now we handle only simple case where there is exit condition
435 just before the latch block and the latch block contains no statements
436 with side effect that may otherwise terminate the execution of loop
437 (such as by EH or by terminating the program or longjmp).
438
439 In the general case we may want to cancel the paths leading to statements
440 loop-niter identified as having undefined effect in the last iteration.
441 The other cases are hopefully rare and will be cleaned up later. */
442
f86b328b 443static edge
c790d986 444loop_edge_to_cancel (struct loop *loop)
445{
f1f41a6c 446 vec<edge> exits;
c790d986 447 unsigned i;
448 edge edge_to_cancel;
449 gimple_stmt_iterator gsi;
450
451 /* We want only one predecestor of the loop. */
452 if (EDGE_COUNT (loop->latch->preds) > 1)
453 return NULL;
454
455 exits = get_loop_exit_edges (loop);
456
f1f41a6c 457 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
c790d986 458 {
459 /* Find the other edge than the loop exit
460 leaving the conditoinal. */
461 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
462 continue;
463 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
464 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
465 else
466 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
467
248022b2 468 /* We only can handle conditionals. */
469 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
470 continue;
471
c790d986 472 /* We should never have conditionals in the loop latch. */
473 gcc_assert (edge_to_cancel->dest != loop->header);
474
475 /* Check that it leads to loop latch. */
476 if (edge_to_cancel->dest != loop->latch)
477 continue;
478
f1f41a6c 479 exits.release ();
c790d986 480
481 /* Verify that the code in loop latch does nothing that may end program
482 execution without really reaching the exit. This may include
483 non-pure/const function calls, EH statements, volatile ASMs etc. */
484 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
485 if (gimple_has_side_effects (gsi_stmt (gsi)))
486 return NULL;
487 return edge_to_cancel;
488 }
f1f41a6c 489 exits.release ();
c790d986 490 return NULL;
491}
492
72276d01 493/* Remove all tests for exits that are known to be taken after LOOP was
494 peeled NPEELED times. Put gcc_unreachable before every statement
495 known to not be executed. */
496
497static bool
498remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
499{
500 struct nb_iter_bound *elt;
501 bool changed = false;
502
503 for (elt = loop->bounds; elt; elt = elt->next)
504 {
505 /* If statement is known to be undefined after peeling, turn it
506 into unreachable (or trap when debugging experience is supposed
507 to be good). */
508 if (!elt->is_exit
796b6678 509 && wi::ltu_p (elt->bound, npeeled))
72276d01 510 {
511 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
1a91d914 512 gcall *stmt = gimple_build_call
72276d01 513 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
72276d01 514 gimple_set_location (stmt, gimple_location (elt->stmt));
515 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3b43af65 516 split_block (gimple_bb (stmt), stmt);
72276d01 517 changed = true;
518 if (dump_file && (dump_flags & TDF_DETAILS))
519 {
520 fprintf (dump_file, "Forced statement unreachable: ");
1ffa4346 521 print_gimple_stmt (dump_file, elt->stmt, 0);
72276d01 522 }
523 }
524 /* If we know the exit will be taken after peeling, update. */
525 else if (elt->is_exit
796b6678 526 && wi::leu_p (elt->bound, npeeled))
72276d01 527 {
528 basic_block bb = gimple_bb (elt->stmt);
529 edge exit_edge = EDGE_SUCC (bb, 0);
530
531 if (dump_file && (dump_flags & TDF_DETAILS))
532 {
533 fprintf (dump_file, "Forced exit to be taken: ");
1ffa4346 534 print_gimple_stmt (dump_file, elt->stmt, 0);
72276d01 535 }
536 if (!loop_exit_edge_p (loop, exit_edge))
537 exit_edge = EDGE_SUCC (bb, 1);
720cfc43 538 exit_edge->probability = profile_probability::always ();
72276d01 539 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
1a91d914 540 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
72276d01 541 if (exit_edge->flags & EDGE_TRUE_VALUE)
1a91d914 542 gimple_cond_make_true (cond_stmt);
72276d01 543 else
1a91d914 544 gimple_cond_make_false (cond_stmt);
545 update_stmt (cond_stmt);
72276d01 546 changed = true;
547 }
548 }
549 return changed;
550}
551
552/* Remove all exits that are known to be never taken because of the loop bound
553 discovered. */
554
555static bool
556remove_redundant_iv_tests (struct loop *loop)
557{
558 struct nb_iter_bound *elt;
559 bool changed = false;
560
561 if (!loop->any_upper_bound)
562 return false;
563 for (elt = loop->bounds; elt; elt = elt->next)
564 {
565 /* Exit is pointless if it won't be taken before loop reaches
566 upper bound. */
567 if (elt->is_exit && loop->any_upper_bound
796b6678 568 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
72276d01 569 {
570 basic_block bb = gimple_bb (elt->stmt);
571 edge exit_edge = EDGE_SUCC (bb, 0);
572 struct tree_niter_desc niter;
573
574 if (!loop_exit_edge_p (loop, exit_edge))
575 exit_edge = EDGE_SUCC (bb, 1);
576
577 /* Only when we know the actual number of iterations, not
578 just a bound, we can remove the exit. */
579 if (!number_of_iterations_exit (loop, exit_edge,
3a690dce 580 &niter, false, false)
581 || !integer_onep (niter.assumptions)
72276d01 582 || !integer_zerop (niter.may_be_zero)
583 || !niter.niter
584 || TREE_CODE (niter.niter) != INTEGER_CST
cd9b5516 585 || !wi::ltu_p (loop->nb_iterations_upper_bound,
586 wi::to_widest (niter.niter)))
72276d01 587 continue;
588
589 if (dump_file && (dump_flags & TDF_DETAILS))
590 {
591 fprintf (dump_file, "Removed pointless exit: ");
1ffa4346 592 print_gimple_stmt (dump_file, elt->stmt, 0);
72276d01 593 }
1a91d914 594 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
72276d01 595 if (exit_edge->flags & EDGE_TRUE_VALUE)
1a91d914 596 gimple_cond_make_false (cond_stmt);
72276d01 597 else
1a91d914 598 gimple_cond_make_true (cond_stmt);
599 update_stmt (cond_stmt);
72276d01 600 changed = true;
601 }
602 }
603 return changed;
604}
605
0cfe7a23 606/* Stores loops that will be unlooped and edges that will be removed
607 after we process whole loop tree. */
f1f41a6c 608static vec<loop_p> loops_to_unloop;
609static vec<int> loops_to_unloop_nunroll;
0cfe7a23 610static vec<edge> edges_to_remove;
b96f8145 611/* Stores loops that has been peeled. */
612static bitmap peeled_loops;
72276d01 613
614/* Cancel all fully unrolled loops by putting __builtin_unreachable
615 on the latch edge.
616 We do it after all unrolling since unlooping moves basic blocks
617 across loop boundaries trashing loop closed SSA form as well
618 as SCEV info needed to be intact during unrolling.
619
c790d986 620 IRRED_INVALIDATED is used to bookkeep if information about
621 irreducible regions may become invalid as a result
9f0ac045 622 of the transformation.
623 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
624 when we need to go into loop closed SSA form. */
bb445479 625
f86b328b 626static void
72276d01 627unloop_loops (bitmap loop_closed_ssa_invalidated,
628 bool *irred_invalidated)
629{
f1f41a6c 630 while (loops_to_unloop.length ())
72276d01 631 {
f1f41a6c 632 struct loop *loop = loops_to_unloop.pop ();
633 int n_unroll = loops_to_unloop_nunroll.pop ();
72276d01 634 basic_block latch = loop->latch;
635 edge latch_edge = loop_latch_edge (loop);
636 int flags = latch_edge->flags;
637 location_t locus = latch_edge->goto_locus;
1a91d914 638 gcall *stmt;
72276d01 639 gimple_stmt_iterator gsi;
640
641 remove_exits_and_undefined_stmts (loop, n_unroll);
642
643 /* Unloop destroys the latch edge. */
644 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
645
646 /* Create new basic block for the latch edge destination and wire
647 it in. */
648 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
649 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
720cfc43 650 latch_edge->probability = profile_probability::never ();
72276d01 651 latch_edge->flags |= flags;
652 latch_edge->goto_locus = locus;
653
7465dbcd 654 add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
db9cef39 655 latch_edge->dest->count = profile_count::zero ();
72276d01 656 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
657
658 gsi = gsi_start_bb (latch_edge->dest);
659 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
660 }
f1f41a6c 661 loops_to_unloop.release ();
662 loops_to_unloop_nunroll.release ();
be6d8ddc 663
25565afc 664 /* Remove edges in peeled copies. Given remove_path removes dominated
665 regions we need to cope with removal of already removed paths. */
be6d8ddc 666 unsigned i;
667 edge e;
25565afc 668 auto_vec<int, 20> src_bbs;
669 src_bbs.reserve_exact (edges_to_remove.length ());
be6d8ddc 670 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
25565afc 671 src_bbs.quick_push (e->src->index);
672 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
673 if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
674 {
675 bool ok = remove_path (e, irred_invalidated,
676 loop_closed_ssa_invalidated);
677 gcc_assert (ok);
678 }
be6d8ddc 679 edges_to_remove.release ();
72276d01 680}
681
682/* Tries to unroll LOOP completely, i.e. NITER times.
683 UL determines which loops we are allowed to unroll.
f55775aa 684 EXIT is the exit of the loop that should be eliminated.
72276d01 685 MAXITER specfy bound on number of iterations, -1 if it is
f55775aa 686 not known or too large for HOST_WIDE_INT. The location
687 LOCUS corresponding to the loop is used when emitting
688 a summary of the unroll to the dump file. */
72276d01 689
bb445479 690static bool
7194de72 691try_unroll_loop_completely (struct loop *loop,
1d183080 692 edge exit, tree niter, bool may_be_zero,
c790d986 693 enum unroll_level ul,
f55775aa 694 HOST_WIDE_INT maxiter,
c309657f 695 dump_user_location_t locus, bool allow_peel)
bb445479 696{
2a09b28c 697 unsigned HOST_WIDE_INT n_unroll = 0;
c790d986 698 bool n_unroll_found = false;
c790d986 699 edge edge_to_cancel = NULL;
bb445479 700
c790d986 701 /* See if we proved number of iterations to be low constant.
bb445479 702
c790d986 703 EXIT is an edge that will be removed in all but last iteration of
704 the loop.
705
706 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
707 of the unrolled sequence and is expected to make the final loop not
708 rolling.
709
710 If the number of execution of loop is determined by standard induction
711 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
712 from the iv test. */
e913b5cd 713 if (tree_fits_uhwi_p (niter))
c790d986 714 {
e913b5cd 715 n_unroll = tree_to_uhwi (niter);
c790d986 716 n_unroll_found = true;
717 edge_to_cancel = EDGE_SUCC (exit->src, 0);
718 if (edge_to_cancel == exit)
719 edge_to_cancel = EDGE_SUCC (exit->src, 1);
720 }
f4d3c071 721 /* We do not know the number of iterations and thus we cannot eliminate
c790d986 722 the EXIT edge. */
723 else
52c6c602 724 exit = NULL;
c790d986 725
726 /* See if we can improve our estimate by using recorded loop bounds. */
866fc6a0 727 if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
728 && maxiter >= 0
c790d986 729 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
730 {
731 n_unroll = maxiter;
732 n_unroll_found = true;
f4d3c071 733 /* Loop terminates before the IV variable test, so we cannot
c790d986 734 remove it in the last iteration. */
735 edge_to_cancel = NULL;
736 }
737
738 if (!n_unroll_found)
bb445479 739 return false;
bb445479 740
2a09b28c 741 if (!loop->unroll
742 && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
04e3ee8a 743 {
744 if (dump_file && (dump_flags & TDF_DETAILS))
745 fprintf (dump_file, "Not unrolling loop %d "
2c290da1 746 "(--param max-completely-peel-times limit reached).\n",
04e3ee8a 747 loop->num);
748 return false;
749 }
bb445479 750
c790d986 751 if (!edge_to_cancel)
752 edge_to_cancel = loop_edge_to_cancel (loop);
753
bb445479 754 if (n_unroll)
755 {
604f7b8a 756 if (ul == UL_SINGLE_ITER)
bb445479 757 return false;
758
2a09b28c 759 if (loop->unroll)
84eb345f 760 {
2a09b28c 761 /* If the unrolling factor is too large, bail out. */
762 if (n_unroll > (unsigned)loop->unroll)
763 {
764 if (dump_file && (dump_flags & TDF_DETAILS))
765 fprintf (dump_file,
766 "Not unrolling loop %d: "
767 "user didn't want it unrolled completely.\n",
768 loop->num);
769 return false;
770 }
84eb345f 771 }
2a09b28c 772 else
d88fd237 773 {
2a09b28c 774 struct loop_size size;
775 /* EXIT can be removed only if we are sure it passes first N_UNROLL
776 iterations. */
777 bool remove_exit = (exit && niter
778 && TREE_CODE (niter) == INTEGER_CST
779 && wi::leu_p (n_unroll, wi::to_widest (niter)));
780 bool large
781 = tree_estimate_loop_size
782 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
783 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
784 if (large)
785 {
786 if (dump_file && (dump_flags & TDF_DETAILS))
787 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
788 loop->num);
789 return false;
790 }
d88fd237 791
2a09b28c 792 unsigned HOST_WIDE_INT ninsns = size.overall;
793 unsigned HOST_WIDE_INT unr_insns
794 = estimated_unrolled_size (&size, n_unroll);
d583c979 795 if (dump_file && (dump_flags & TDF_DETAILS))
2a09b28c 796 {
797 fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
798 fprintf (dump_file, " Estimated size after unrolling: %d\n",
799 (int) unr_insns);
800 }
801
802 /* If the code is going to shrink, we don't need to be extra
803 cautious on guessing if the unrolling is going to be
804 profitable. */
805 if (unr_insns
806 /* If there is IV variable that will become constant, we
807 save one instruction in the loop prologue we do not
808 account otherwise. */
809 <= ninsns + (size.constant_iv != false))
810 ;
811 /* We unroll only inner loops, because we do not consider it
812 profitable otheriwse. We still can cancel loopback edge
813 of not rolling loop; this is always a good idea. */
814 else if (ul == UL_NO_GROWTH)
815 {
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
818 loop->num);
819 return false;
820 }
821 /* Outer loops tend to be less interesting candidates for
822 complete unrolling unless we can do a lot of propagation
823 into the inner loop body. For now we disable outer loop
824 unrolling when the code would grow. */
825 else if (loop->inner)
826 {
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "Not unrolling loop %d: "
829 "it is not innermost and code would grow.\n",
830 loop->num);
831 return false;
832 }
833 /* If there is call on a hot path through the loop, then
834 there is most probably not much to optimize. */
835 else if (size.num_non_pure_calls_on_hot_path)
836 {
837 if (dump_file && (dump_flags & TDF_DETAILS))
838 fprintf (dump_file, "Not unrolling loop %d: "
839 "contains call and code would grow.\n",
840 loop->num);
841 return false;
842 }
843 /* If there is pure/const call in the function, then we can
844 still optimize the unrolled loop body if it contains some
845 other interesting code than the calls and code storing or
846 cumulating the return value. */
847 else if (size.num_pure_calls_on_hot_path
848 /* One IV increment, one test, one ivtmp store and
849 one useful stmt. That is about minimal loop
850 doing pure call. */
851 && (size.non_call_stmts_on_hot_path
852 <= 3 + size.num_pure_calls_on_hot_path))
853 {
854 if (dump_file && (dump_flags & TDF_DETAILS))
855 fprintf (dump_file, "Not unrolling loop %d: "
856 "contains just pure calls and code would grow.\n",
857 loop->num);
858 return false;
859 }
860 /* Complete unrolling is major win when control flow is
861 removed and one big basic block is created. If the loop
862 contains control flow the optimization may still be a win
863 because of eliminating the loop overhead but it also may
864 blow the branch predictor tables. Limit number of
865 branches on the hot path through the peeled sequence. */
866 else if (size.num_branches_on_hot_path * (int)n_unroll
867 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
868 {
869 if (dump_file && (dump_flags & TDF_DETAILS))
870 fprintf (dump_file, "Not unrolling loop %d: "
871 "number of branches on hot path in the unrolled "
872 "sequence reaches --param max-peel-branches limit.\n",
873 loop->num);
874 return false;
875 }
876 else if (unr_insns
877 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
878 {
879 if (dump_file && (dump_flags & TDF_DETAILS))
880 fprintf (dump_file, "Not unrolling loop %d: "
881 "number of insns in the unrolled sequence reaches "
882 "--param max-completely-peeled-insns limit.\n",
883 loop->num);
884 return false;
885 }
604f7b8a 886 }
fb54ef7c 887
01020a5f 888 initialize_original_copy_tables ();
3c6549f8 889 auto_sbitmap wont_exit (n_unroll + 1);
eedd711b 890 if (exit && niter
891 && TREE_CODE (niter) == INTEGER_CST
892 && wi::leu_p (n_unroll, wi::to_widest (niter)))
893 {
894 bitmap_ones (wont_exit);
895 if (wi::eq_p (wi::to_widest (niter), n_unroll)
896 || edge_to_cancel)
897 bitmap_clear_bit (wont_exit, 0);
898 }
899 else
900 {
901 exit = NULL;
902 bitmap_clear (wont_exit);
903 }
1d183080 904 if (may_be_zero)
905 bitmap_clear_bit (wont_exit, 1);
fb54ef7c 906
75a70cf9 907 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
908 n_unroll, wont_exit,
0cfe7a23 909 exit, &edges_to_remove,
75a70cf9 910 DLTHE_FLAG_UPDATE_FREQ
911 | DLTHE_FLAG_COMPLETTE_PEEL))
bb445479 912 {
01020a5f 913 free_original_copy_tables ();
d583c979 914 if (dump_file && (dump_flags & TDF_DETAILS))
915 fprintf (dump_file, "Failed to duplicate the loop\n");
bb445479 916 return false;
917 }
40ffaada 918
01020a5f 919 free_original_copy_tables ();
bb445479 920 }
bb445479 921
c790d986 922 /* Remove the conditional from the last copy of the loop. */
923 if (edge_to_cancel)
924 {
1a91d914 925 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
eedd711b 926 force_edge_cold (edge_to_cancel, true);
c790d986 927 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
928 gimple_cond_make_false (cond);
929 else
930 gimple_cond_make_true (cond);
931 update_stmt (cond);
2a09b28c 932 /* Do not remove the path, as doing so may remove outer loop and
933 confuse bookkeeping code in tree_unroll_loops_completely. */
c790d986 934 }
c790d986 935
72276d01 936 /* Store the loop for later unlooping and exit removal. */
f1f41a6c 937 loops_to_unloop.safe_push (loop);
938 loops_to_unloop_nunroll.safe_push (n_unroll);
095dcfa3 939
f55775aa 940 if (dump_enabled_p ())
c790d986 941 {
942 if (!n_unroll)
f55775aa 943 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
6ee2edad 944 "loop turned into non-loop; it never loops\n");
c790d986 945 else
f55775aa 946 {
947 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
6ee2edad 948 "loop with %d iterations completely unrolled",
2a09b28c 949 (int) n_unroll);
db9cef39 950 if (loop->header->count.initialized_p ())
f55775aa 951 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
952 " (header execution count %d)",
db9cef39 953 (int)loop->header->count.to_gcov_type ());
f55775aa 954 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
955 }
956 }
957
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 {
c790d986 960 if (exit)
961 fprintf (dump_file, "Exit condition of peeled iterations was "
962 "eliminated.\n");
963 if (edge_to_cancel)
964 fprintf (dump_file, "Last iteration exit edge was proved true.\n");
965 else
966 fprintf (dump_file, "Latch of last iteration was marked by "
967 "__builtin_unreachable ().\n");
968 }
bb445479 969
970 return true;
971}
972
c836de3f 973/* Return number of instructions after peeling. */
974static unsigned HOST_WIDE_INT
975estimated_peeled_sequence_size (struct loop_size *size,
976 unsigned HOST_WIDE_INT npeel)
977{
978 return MAX (npeel * (HOST_WIDE_INT) (size->overall
979 - size->eliminated_by_peeling), 1);
980}
981
982/* If the loop is expected to iterate N times and is
983 small enough, duplicate the loop body N+1 times before
984 the loop itself. This way the hot path will never
985 enter the loop.
986 Parameters are the same as for try_unroll_loops_completely */
987
988static bool
989try_peel_loop (struct loop *loop,
1d183080 990 edge exit, tree niter, bool may_be_zero,
c836de3f 991 HOST_WIDE_INT maxiter)
992{
39ab2939 993 HOST_WIDE_INT npeel;
c836de3f 994 struct loop_size size;
995 int peeled_size;
c836de3f 996
2a09b28c 997 if (!flag_peel_loops
998 || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
b96f8145 999 || !peeled_loops)
c836de3f 1000 return false;
1001
b96f8145 1002 if (bitmap_bit_p (peeled_loops, loop->num))
1003 {
1004 if (dump_file)
1005 fprintf (dump_file, "Not peeling: loop is already peeled\n");
1006 return false;
1007 }
1008
2a09b28c 1009 /* We don't peel loops that will be unrolled as this can duplicate a
1010 loop more times than the user requested. */
1011 if (loop->unroll)
1012 {
1013 if (dump_file)
1014 fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1015 return false;
1016 }
1017
3ccfbed6 1018 /* Peel only innermost loops.
1019 While the code is perfectly capable of peeling non-innermost loops,
1020 the heuristics would probably need some improvements. */
c836de3f 1021 if (loop->inner)
1022 {
1023 if (dump_file)
2a09b28c 1024 fprintf (dump_file, "Not peeling: outer loop\n");
c836de3f 1025 return false;
1026 }
1027
1028 if (!optimize_loop_for_speed_p (loop))
1029 {
1030 if (dump_file)
2a09b28c 1031 fprintf (dump_file, "Not peeling: cold loop\n");
c836de3f 1032 return false;
1033 }
1034
1035 /* Check if there is an estimate on the number of iterations. */
1036 npeel = estimated_loop_iterations_int (loop);
b96f8145 1037 if (npeel < 0)
1038 npeel = likely_max_loop_iterations_int (loop);
c836de3f 1039 if (npeel < 0)
1040 {
1041 if (dump_file)
1042 fprintf (dump_file, "Not peeling: number of iterations is not "
1043 "estimated\n");
1044 return false;
1045 }
1046 if (maxiter >= 0 && maxiter <= npeel)
1047 {
1048 if (dump_file)
2a09b28c 1049 fprintf (dump_file, "Not peeling: upper bound is known so can "
4cf494ec 1050 "unroll completely\n");
c836de3f 1051 return false;
1052 }
1053
1054 /* We want to peel estimated number of iterations + 1 (so we never
1055 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1056 and be sure to avoid overflows. */
1057 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1058 {
1059 if (dump_file)
2a09b28c 1060 fprintf (dump_file, "Not peeling: rolls too much "
39ab2939 1061 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
c836de3f 1062 return false;
1063 }
1064 npeel++;
1065
1066 /* Check peeled loops size. */
1067 tree_estimate_loop_size (loop, exit, NULL, &size,
1068 PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
39ab2939 1069 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
c836de3f 1070 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1071 {
1072 if (dump_file)
2a09b28c 1073 fprintf (dump_file, "Not peeling: peeled sequence size is too large "
c836de3f 1074 "(%i insns > --param max-peel-insns)", peeled_size);
1075 return false;
1076 }
1077
1078 /* Duplicate possibly eliminating the exits. */
1079 initialize_original_copy_tables ();
3c6549f8 1080 auto_sbitmap wont_exit (npeel + 1);
3ccfbed6 1081 if (exit && niter
1082 && TREE_CODE (niter) == INTEGER_CST
1083 && wi::leu_p (npeel, wi::to_widest (niter)))
1084 {
1085 bitmap_ones (wont_exit);
b96f8145 1086 bitmap_clear_bit (wont_exit, 0);
3ccfbed6 1087 }
1088 else
1089 {
1090 exit = NULL;
1091 bitmap_clear (wont_exit);
1092 }
1d183080 1093 if (may_be_zero)
1094 bitmap_clear_bit (wont_exit, 1);
c836de3f 1095 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1096 npeel, wont_exit,
0cfe7a23 1097 exit, &edges_to_remove,
3ccfbed6 1098 DLTHE_FLAG_UPDATE_FREQ))
c836de3f 1099 {
1100 free_original_copy_tables ();
c836de3f 1101 return false;
1102 }
c836de3f 1103 free_original_copy_tables ();
1104 if (dump_file && (dump_flags & TDF_DETAILS))
1105 {
1106 fprintf (dump_file, "Peeled loop %d, %i times.\n",
39ab2939 1107 loop->num, (int) npeel);
c836de3f 1108 }
3ccfbed6 1109 if (loop->any_estimate)
1110 {
1111 if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1112 loop->nb_iterations_estimate -= npeel;
1113 else
1114 loop->nb_iterations_estimate = 0;
1115 }
c836de3f 1116 if (loop->any_upper_bound)
3ccfbed6 1117 {
b96f8145 1118 if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
3ccfbed6 1119 loop->nb_iterations_upper_bound -= npeel;
1120 else
1121 loop->nb_iterations_upper_bound = 0;
1122 }
8e3ffe30 1123 if (loop->any_likely_upper_bound)
3ccfbed6 1124 {
b96f8145 1125 if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
3ccfbed6 1126 loop->nb_iterations_likely_upper_bound -= npeel;
1127 else
1128 {
1129 loop->any_estimate = true;
1130 loop->nb_iterations_estimate = 0;
1131 loop->nb_iterations_likely_upper_bound = 0;
1132 }
1133 }
db9cef39 1134 profile_count entry_count = profile_count::zero ();
3ccfbed6 1135
0cfe7a23 1136 edge e;
3ccfbed6 1137 edge_iterator ei;
1138 FOR_EACH_EDGE (e, ei, loop->header->preds)
1139 if (e->src != loop->latch)
1140 {
db9cef39 1141 if (e->src->count.initialized_p ())
913b81c4 1142 entry_count += e->src->count;
3ccfbed6 1143 gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1144 }
913b81c4 1145 profile_probability p;
205ce1aa 1146 p = entry_count.probability_in (loop->header->count);
ca69b069 1147 scale_loop_profile (loop, p, 0);
b96f8145 1148 bitmap_set_bit (peeled_loops, loop->num);
c836de3f 1149 return true;
1150}
7194de72 1151/* Adds a canonical induction variable to LOOP if suitable.
48e1416a 1152 CREATE_IV is true if we may create a new iv. UL determines
604f7b8a 1153 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
48e1416a 1154 to determine the number of iterations of a loop by direct evaluation.
72276d01 1155 Returns true if cfg is changed. */
bb445479 1156
1157static bool
7194de72 1158canonicalize_loop_induction_variables (struct loop *loop,
604f7b8a 1159 bool create_iv, enum unroll_level ul,
866fc6a0 1160 bool try_eval, bool allow_peel)
bb445479 1161{
1162 edge exit = NULL;
1163 tree niter;
72276d01 1164 HOST_WIDE_INT maxiter;
1165 bool modified = false;
c309657f 1166 dump_user_location_t locus;
1d183080 1167 struct tree_niter_desc niter_desc;
1168 bool may_be_zero = false;
bb445479 1169
1d183080 1170 /* For unrolling allow conditional constant or zero iterations, thus
1171 perform loop-header copying on-the-fly. */
f55775aa 1172 exit = single_exit (loop);
1d183080 1173 niter = chrec_dont_know;
1174 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1175 {
1176 niter = niter_desc.niter;
1177 may_be_zero
1178 = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1179 }
bb445479 1180 if (TREE_CODE (niter) == INTEGER_CST)
c309657f 1181 locus = last_stmt (exit->src);
b091dc59 1182 else
1183 {
1d183080 1184 /* For non-constant niter fold may_be_zero into niter again. */
1185 if (may_be_zero)
1186 {
1187 if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1188 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1189 niter_desc.may_be_zero,
1190 build_int_cst (TREE_TYPE (niter), 0), niter);
1191 else
1192 niter = chrec_dont_know;
1193 may_be_zero = false;
1194 }
1195
b091dc59 1196 /* If the loop has more than one exit, try checking all of them
1197 for # of iterations determinable through scev. */
f55775aa 1198 if (!exit)
b091dc59 1199 niter = find_loop_niter (loop, &exit);
1200
1201 /* Finally if everything else fails, try brute force evaluation. */
1202 if (try_eval
1203 && (chrec_contains_undetermined (niter)
1204 || TREE_CODE (niter) != INTEGER_CST))
1205 niter = find_loop_niter_by_eval (loop, &exit);
1206
f55775aa 1207 if (exit)
c309657f 1208 locus = last_stmt (exit->src);
f55775aa 1209
c790d986 1210 if (TREE_CODE (niter) != INTEGER_CST)
1211 exit = NULL;
b091dc59 1212 }
bb445479 1213
c790d986 1214 /* We work exceptionally hard here to estimate the bound
1215 by find_loop_niter_by_eval. Be sure to keep it for future. */
1216 if (niter && TREE_CODE (niter) == INTEGER_CST)
57337fec 1217 {
5de9d3ed 1218 record_niter_bound (loop, wi::to_widest (niter),
57337fec 1219 exit == single_likely_exit (loop), true);
1220 }
c790d986 1221
72276d01 1222 /* Force re-computation of loop bounds so we can remove redundant exits. */
1223 maxiter = max_loop_iterations_int (loop);
1224
c790d986 1225 if (dump_file && (dump_flags & TDF_DETAILS)
1226 && TREE_CODE (niter) == INTEGER_CST)
bb445479 1227 {
1228 fprintf (dump_file, "Loop %d iterates ", loop->num);
1229 print_generic_expr (dump_file, niter, TDF_SLIM);
1230 fprintf (dump_file, " times.\n");
1231 }
c790d986 1232 if (dump_file && (dump_flags & TDF_DETAILS)
72276d01 1233 && maxiter >= 0)
c790d986 1234 {
1235 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
72276d01 1236 (int)maxiter);
c790d986 1237 }
8e3ffe30 1238 if (dump_file && (dump_flags & TDF_DETAILS)
1239 && likely_max_loop_iterations_int (loop) >= 0)
1240 {
eedd711b 1241 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1242 loop->num, (int)likely_max_loop_iterations_int (loop));
8e3ffe30 1243 }
bb445479 1244
72276d01 1245 /* Remove exits that are known to be never taken based on loop bound.
1246 Needs to be called after compilation of max_loop_iterations_int that
1247 populates the loop bounds. */
1248 modified |= remove_redundant_iv_tests (loop);
1249
1d183080 1250 if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1251 maxiter, locus, allow_peel))
bb445479 1252 return true;
1253
c790d986 1254 if (create_iv
57337fec 1255 && niter && !chrec_contains_undetermined (niter)
1256 && exit && just_once_each_iteration_p (loop, exit->src))
1d183080 1257 {
1258 tree iv_niter = niter;
1259 if (may_be_zero)
1260 {
1261 if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1262 iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1263 niter_desc.may_be_zero,
1264 build_int_cst (TREE_TYPE (iv_niter), 0),
1265 iv_niter);
1266 else
1267 iv_niter = NULL_TREE;
1268 }
1269 if (iv_niter)
1270 create_canonical_iv (loop, exit, iv_niter);
1271 }
bb445479 1272
c836de3f 1273 if (ul == UL_ALL)
1d183080 1274 modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
c836de3f 1275
72276d01 1276 return modified;
bb445479 1277}
1278
1279/* The main entry point of the pass. Adds canonical induction variables
7194de72 1280 to the suitable loops. */
bb445479 1281
4c641bf8 1282unsigned int
7194de72 1283canonicalize_induction_variables (void)
bb445479 1284{
bb445479 1285 struct loop *loop;
053fdd99 1286 bool changed = false;
c790d986 1287 bool irred_invalidated = false;
9f0ac045 1288 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
48e1416a 1289
46480a95 1290 estimate_numbers_of_iterations (cfun);
72276d01 1291
f21d4d00 1292 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
bb445479 1293 {
17519ba0 1294 changed |= canonicalize_loop_induction_variables (loop,
1295 true, UL_SINGLE_ITER,
866fc6a0 1296 true, false);
bb445479 1297 }
ea1c5c31 1298 gcc_assert (!need_ssa_update_p (cfun));
bb445479 1299
72276d01 1300 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
c790d986 1301 if (irred_invalidated
1302 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1303 mark_irreducible_loops ();
1304
08162157 1305 /* Clean up the information about numbers of iterations, since brute force
1306 evaluation could reveal new information. */
866da453 1307 free_numbers_of_iterations_estimates (cfun);
08162157 1308 scev_reset ();
1309
9f0ac045 1310 if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1311 {
1312 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1313 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1314 }
1315 BITMAP_FREE (loop_closed_ssa_invalidated);
1316
bb445479 1317 if (changed)
4c641bf8 1318 return TODO_cleanup_cfg;
1319 return 0;
bb445479 1320}
1321
042301ef 1322/* Process loops from innermost to outer, stopping at the innermost
1323 loop we unrolled. */
1324
1325static bool
1326tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
0cfe7a23 1327 bitmap father_bbs, struct loop *loop)
042301ef 1328{
1329 struct loop *loop_father;
1330 bool changed = false;
1331 struct loop *inner;
1332 enum unroll_level ul;
b5d7c99e 1333 unsigned num = number_of_loops (cfun);
042301ef 1334
b5d7c99e 1335 /* Process inner loops first. Don't walk loops added by the recursive
1336 calls because SSA form is not up-to-date. They can be handled in the
1337 next iteration. */
bfb7f32b 1338 bitmap child_father_bbs = NULL;
042301ef 1339 for (inner = loop->inner; inner != NULL; inner = inner->next)
b5d7c99e 1340 if ((unsigned) inner->num < num)
bfb7f32b 1341 {
1342 if (!child_father_bbs)
1343 child_father_bbs = BITMAP_ALLOC (NULL);
1344 if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1345 child_father_bbs, inner))
1346 {
1347 bitmap_ior_into (father_bbs, child_father_bbs);
1348 bitmap_clear (child_father_bbs);
1349 changed = true;
1350 }
1351 }
1352 if (child_father_bbs)
1353 BITMAP_FREE (child_father_bbs);
b5d7c99e 1354
042301ef 1355 /* If we changed an inner loop we cannot process outer loops in this
1356 iteration because SSA form is not up-to-date. Continue with
1357 siblings of outer loops instead. */
1358 if (changed)
bfb7f32b 1359 {
1360 /* If we are recorded as father clear all other fathers that
1361 are necessarily covered already to avoid redundant work. */
1362 if (bitmap_bit_p (father_bbs, loop->header->index))
1363 {
1364 bitmap_clear (father_bbs);
1365 bitmap_set_bit (father_bbs, loop->header->index);
1366 }
1367 return true;
1368 }
042301ef 1369
3d483a94 1370 /* Don't unroll #pragma omp simd loops until the vectorizer
1371 attempts to vectorize those. */
4c73695b 1372 if (loop->force_vectorize)
3d483a94 1373 return false;
1374
042301ef 1375 /* Try to unroll this loop. */
1376 loop_father = loop_outer (loop);
1377 if (!loop_father)
1378 return false;
1379
2a09b28c 1380 if (loop->unroll > 1)
1381 ul = UL_ALL;
1382 else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
042301ef 1383 /* Unroll outermost loops only if asked to do so or they do
1384 not cause code growth. */
1385 && (unroll_outer || loop_outer (loop_father)))
1386 ul = UL_ALL;
1387 else
1388 ul = UL_NO_GROWTH;
1389
1390 if (canonicalize_loop_induction_variables
866fc6a0 1391 (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
042301ef 1392 {
1393 /* If we'll continue unrolling, we need to propagate constants
1394 within the new basic blocks to fold away induction variable
1395 computations; otherwise, the size might blow up before the
1396 iteration is complete and the IR eventually cleaned up. */
0cfe7a23 1397 if (loop_outer (loop_father))
bfb7f32b 1398 {
1399 /* Once we process our father we will have processed
1400 the fathers of our children as well, so avoid doing
1401 redundant work and clear fathers we've gathered sofar. */
1402 bitmap_clear (father_bbs);
1403 bitmap_set_bit (father_bbs, loop_father->header->index);
1404 }
042301ef 1405
1406 return true;
1407 }
1408
1409 return false;
1410}
1411
604f7b8a 1412/* Unroll LOOPS completely if they iterate just few times. Unless
1413 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1414 size of the code does not increase. */
bb445479 1415
2a09b28c 1416static unsigned int
d88fd237 1417tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
bb445479 1418{
0cfe7a23 1419 bitmap father_bbs = BITMAP_ALLOC (NULL);
d88fd237 1420 bool changed;
793a0ab5 1421 int iteration = 0;
9f0ac045 1422 bool irred_invalidated = false;
bb445479 1423
46480a95 1424 estimate_numbers_of_iterations (cfun);
1425
d88fd237 1426 do
bb445479 1427 {
d88fd237 1428 changed = false;
9f0ac045 1429 bitmap loop_closed_ssa_invalidated = NULL;
1430
1431 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1432 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
bb445479 1433
d4f078b5 1434 free_numbers_of_iterations_estimates (cfun);
46480a95 1435 estimate_numbers_of_iterations (cfun);
72276d01 1436
042301ef 1437 changed = tree_unroll_loops_completely_1 (may_increase_size,
0cfe7a23 1438 unroll_outer, father_bbs,
042301ef 1439 current_loops->tree_root);
d88fd237 1440 if (changed)
1441 {
2ebfc881 1442 unsigned i;
1443
72276d01 1444 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
c790d986 1445
f4d3c071 1446 /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
9f0ac045 1447 if (loop_closed_ssa_invalidated
1448 && !bitmap_empty_p (loop_closed_ssa_invalidated))
1449 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1450 TODO_update_ssa);
1451 else
1452 update_ssa (TODO_update_ssa);
ea1c5c31 1453
0cfe7a23 1454 /* father_bbs is a bitmap of loop father header BB indices.
1455 Translate that to what non-root loops these BBs belong to now. */
1456 bitmap_iterator bi;
1457 bitmap fathers = BITMAP_ALLOC (NULL);
1458 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1459 {
1460 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1461 if (! unrolled_loop_bb)
1462 continue;
1463 if (loop_outer (unrolled_loop_bb->loop_father))
1464 bitmap_set_bit (fathers,
1465 unrolled_loop_bb->loop_father->num);
1466 }
1467 bitmap_clear (father_bbs);
2ebfc881 1468 /* Propagate the constants within the new basic blocks. */
0cfe7a23 1469 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1470 {
1471 loop_p father = get_loop (cfun, i);
51e85e64 1472 bitmap exit_bbs = BITMAP_ALLOC (NULL);
1473 loop_exit *exit = father->exits->next;
1474 while (exit->e)
1475 {
1476 bitmap_set_bit (exit_bbs, exit->e->dest->index);
1477 exit = exit->next;
1478 }
1479 do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
0cfe7a23 1480 }
1481 BITMAP_FREE (fathers);
2ebfc881 1482
d88fd237 1483 /* This will take care of removing completely unrolled loops
1484 from the loop structures so we can continue unrolling now
1485 innermost loops. */
b2a225ba 1486 if (cleanup_tree_cfg ())
1487 update_ssa (TODO_update_ssa_only_virtuals);
d88fd237 1488
1489 /* Clean up the information about numbers of iterations, since
1490 complete unrolling might have invalidated it. */
1491 scev_reset ();
382ecba7 1492 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
9f0ac045 1493 verify_loop_closed_ssa (true);
d88fd237 1494 }
9f0ac045 1495 if (loop_closed_ssa_invalidated)
1496 BITMAP_FREE (loop_closed_ssa_invalidated);
d88fd237 1497 }
793a0ab5 1498 while (changed
1499 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
08162157 1500
0cfe7a23 1501 BITMAP_FREE (father_bbs);
2ebfc881 1502
9f0ac045 1503 if (irred_invalidated
1504 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1505 mark_irreducible_loops ();
1506
4c641bf8 1507 return 0;
bb445479 1508}
f86b328b 1509
1510/* Canonical induction variable creation pass. */
1511
f86b328b 1512namespace {
1513
1514const pass_data pass_data_iv_canon =
1515{
1516 GIMPLE_PASS, /* type */
1517 "ivcanon", /* name */
1518 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1519 TV_TREE_LOOP_IVCANON, /* tv_id */
1520 ( PROP_cfg | PROP_ssa ), /* properties_required */
1521 0, /* properties_provided */
1522 0, /* properties_destroyed */
1523 0, /* todo_flags_start */
1524 0, /* todo_flags_finish */
1525};
1526
1527class pass_iv_canon : public gimple_opt_pass
1528{
1529public:
1530 pass_iv_canon (gcc::context *ctxt)
1531 : gimple_opt_pass (pass_data_iv_canon, ctxt)
1532 {}
1533
1534 /* opt_pass methods: */
31315c24 1535 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
65b0537f 1536 virtual unsigned int execute (function *fun);
f86b328b 1537
1538}; // class pass_iv_canon
1539
65b0537f 1540unsigned int
1541pass_iv_canon::execute (function *fun)
1542{
1543 if (number_of_loops (fun) <= 1)
1544 return 0;
1545
1546 return canonicalize_induction_variables ();
1547}
1548
f86b328b 1549} // anon namespace
1550
1551gimple_opt_pass *
1552make_pass_iv_canon (gcc::context *ctxt)
1553{
1554 return new pass_iv_canon (ctxt);
1555}
1556
1557/* Complete unrolling of loops. */
1558
f86b328b 1559namespace {
1560
1561const pass_data pass_data_complete_unroll =
1562{
1563 GIMPLE_PASS, /* type */
1564 "cunroll", /* name */
1565 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1566 TV_COMPLETE_UNROLL, /* tv_id */
1567 ( PROP_cfg | PROP_ssa ), /* properties_required */
1568 0, /* properties_provided */
1569 0, /* properties_destroyed */
1570 0, /* todo_flags_start */
1571 0, /* todo_flags_finish */
1572};
1573
1574class pass_complete_unroll : public gimple_opt_pass
1575{
1576public:
1577 pass_complete_unroll (gcc::context *ctxt)
1578 : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1579 {}
1580
1581 /* opt_pass methods: */
65b0537f 1582 virtual unsigned int execute (function *);
f86b328b 1583
1584}; // class pass_complete_unroll
1585
65b0537f 1586unsigned int
1587pass_complete_unroll::execute (function *fun)
1588{
1589 if (number_of_loops (fun) <= 1)
1590 return 0;
1591
b96f8145 1592 /* If we ever decide to run loop peeling more than once, we will need to
1593 track loops already peeled in loop structures themselves to avoid
1594 re-peeling the same loop multiple times. */
1595 if (flag_peel_loops)
1596 peeled_loops = BITMAP_ALLOC (NULL);
2a09b28c 1597 unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
1598 || flag_peel_loops
1599 || optimize >= 3, true);
b96f8145 1600 if (peeled_loops)
1601 {
1602 BITMAP_FREE (peeled_loops);
1603 peeled_loops = NULL;
1604 }
1605 return val;
65b0537f 1606}
1607
f86b328b 1608} // anon namespace
1609
1610gimple_opt_pass *
1611make_pass_complete_unroll (gcc::context *ctxt)
1612{
1613 return new pass_complete_unroll (ctxt);
1614}
1615
1616/* Complete unrolling of inner loops. */
1617
f86b328b 1618namespace {
1619
1620const pass_data pass_data_complete_unrolli =
1621{
1622 GIMPLE_PASS, /* type */
1623 "cunrolli", /* name */
1624 OPTGROUP_LOOP, /* optinfo_flags */
f86b328b 1625 TV_COMPLETE_UNROLL, /* tv_id */
1626 ( PROP_cfg | PROP_ssa ), /* properties_required */
1627 0, /* properties_provided */
1628 0, /* properties_destroyed */
1629 0, /* todo_flags_start */
8b88439e 1630 0, /* todo_flags_finish */
f86b328b 1631};
1632
1633class pass_complete_unrolli : public gimple_opt_pass
1634{
1635public:
1636 pass_complete_unrolli (gcc::context *ctxt)
1637 : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1638 {}
1639
1640 /* opt_pass methods: */
31315c24 1641 virtual bool gate (function *) { return optimize >= 2; }
65b0537f 1642 virtual unsigned int execute (function *);
f86b328b 1643
1644}; // class pass_complete_unrolli
1645
65b0537f 1646unsigned int
1647pass_complete_unrolli::execute (function *fun)
1648{
1649 unsigned ret = 0;
1650
2a09b28c 1651 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
65b0537f 1652 if (number_of_loops (fun) > 1)
1653 {
1654 scev_initialize ();
1655 ret = tree_unroll_loops_completely (optimize >= 3, false);
65b0537f 1656 scev_finalize ();
1657 }
1658 loop_optimizer_finalize ();
1659
1660 return ret;
1661}
1662
f86b328b 1663} // anon namespace
1664
1665gimple_opt_pass *
1666make_pass_complete_unrolli (gcc::context *ctxt)
1667{
1668 return new pass_complete_unrolli (ctxt);
1669}
1670
1671