]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivcanon.c
tree-ssa-loop-niter.h (estimate_numbers_of_iterations): Take struct function as arg.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
CommitLineData
0588ac84 1/* Induction variable canonicalization and loop peeling.
cbe34bb5 2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
b8698a0f 3
82b85a85 4This file is part of GCC.
b8698a0f 5
82b85a85
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
82b85a85 9later version.
b8698a0f 10
82b85a85
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
82b85a85 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
82b85a85
ZD
19
20/* This pass detects the loops that iterate a constant number of times,
b8698a0f 21 adds a canonical induction variable (step -1, tested against 0)
82b85a85
ZD
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
24
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
29 to pay up.
30
f8934be7 31 We also perform
1c5fd343 32 - complete unrolling (or peeling) when the loops is rolling few enough
f8934be7
JH
33 times
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
36 info). */
82b85a85
ZD
37
38#include "config.h"
39#include "system.h"
40#include "coretypes.h"
c7131fb2 41#include "backend.h"
82b85a85 42#include "tree.h"
c7131fb2 43#include "gimple.h"
957060b5
AM
44#include "cfghooks.h"
45#include "tree-pass.h"
c7131fb2 46#include "ssa.h"
957060b5
AM
47#include "cgraph.h"
48#include "gimple-pretty-print.h"
40e23961 49#include "fold-const.h"
59f2e9d8 50#include "profile.h"
2fb9a547
AM
51#include "gimple-fold.h"
52#include "tree-eh.h"
5be5c238 53#include "gimple-iterator.h"
442b4905 54#include "tree-cfg.h"
e28030cf
AM
55#include "tree-ssa-loop-manip.h"
56#include "tree-ssa-loop-niter.h"
442b4905
AM
57#include "tree-ssa-loop.h"
58#include "tree-into-ssa.h"
82b85a85 59#include "cfgloop.h"
82b85a85
ZD
60#include "tree-chrec.h"
61#include "tree-scalar-evolution.h"
62#include "params.h"
82b85a85 63#include "tree-inline.h"
4484a35a 64#include "tree-cfgcleanup.h"
9b2b7279 65#include "builtins.h"
82b85a85 66
91a01f21
ZD
67/* Specifies types of loops that may be unrolled. */
68
69enum unroll_level
70{
bb22512c 71 UL_SINGLE_ITER, /* Only loops that exit immediately in the first
91a01f21
ZD
72 iteration. */
73 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
74 of code size. */
75 UL_ALL /* All suitable loops. */
76};
77
82b85a85
ZD
78/* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
79 is the exit edge whose condition is replaced. */
80
81static void
82create_canonical_iv (struct loop *loop, edge exit, tree niter)
83{
84 edge in;
726a989a 85 tree type, var;
538dd0b7 86 gcond *cond;
726a989a 87 gimple_stmt_iterator incr_at;
82b85a85
ZD
88 enum tree_code cmp;
89
90 if (dump_file && (dump_flags & TDF_DETAILS))
91 {
92 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
93 print_generic_expr (dump_file, niter, TDF_SLIM);
94 fprintf (dump_file, " iterations.\n");
95 }
96
538dd0b7 97 cond = as_a <gcond *> (last_stmt (exit->src));
628f6a4e 98 in = EDGE_SUCC (exit->src, 0);
82b85a85 99 if (in == exit)
628f6a4e 100 in = EDGE_SUCC (exit->src, 1);
82b85a85
ZD
101
102 /* Note that we do not need to worry about overflows, since
103 type of niter is always unsigned and all comparisons are
104 just for equality/nonequality -- i.e. everything works
105 with a modulo arithmetics. */
106
107 type = TREE_TYPE (niter);
987b67bc
KH
108 niter = fold_build2 (PLUS_EXPR, type,
109 niter,
110 build_int_cst (type, 1));
726a989a 111 incr_at = gsi_last_bb (in->src);
82b85a85 112 create_iv (niter,
57decb7e 113 build_int_cst (type, -1),
82b85a85
ZD
114 NULL_TREE, loop,
115 &incr_at, false, NULL, &var);
116
117 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
726a989a
RB
118 gimple_cond_set_code (cond, cmp);
119 gimple_cond_set_lhs (cond, var);
120 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
f430bae8 121 update_stmt (cond);
82b85a85
ZD
122}
123
08f1af2e
JH
124/* Describe size of loop as detected by tree_estimate_loop_size. */
125struct loop_size
126{
127 /* Number of instructions in the loop. */
128 int overall;
129
130 /* Number of instructions that will be likely optimized out in
131 peeled iterations of loop (i.e. computation based on induction
132 variable where induction variable starts at known constant.) */
133 int eliminated_by_peeling;
134
135 /* Same statistics for last iteration of loop: it is smaller because
136 instructions after exit are not executed. */
137 int last_iteration;
138 int last_iteration_eliminated_by_peeling;
519cac4a
JH
139
140 /* If some IV computation will become constant. */
141 bool constant_iv;
142
143 /* Number of call stmts that are not a builtin and are pure or const
144 present on the hot path. */
145 int num_pure_calls_on_hot_path;
146 /* Number of call stmts that are not a builtin and are not pure nor const
147 present on the hot path. */
148 int num_non_pure_calls_on_hot_path;
149 /* Number of statements other than calls in the loop. */
150 int non_call_stmts_on_hot_path;
151 /* Number of branches seen on the hot path. */
152 int num_branches_on_hot_path;
08f1af2e
JH
153};
154
155/* Return true if OP in STMT will be constant after peeling LOOP. */
156
157static bool
355fe088 158constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
08f1af2e 159{
08f1af2e
JH
160 if (is_gimple_min_invariant (op))
161 return true;
b8698a0f 162
08f1af2e
JH
163 /* We can still fold accesses to constant arrays when index is known. */
164 if (TREE_CODE (op) != SSA_NAME)
165 {
166 tree base = op;
167
168 /* First make fast look if we see constant array inside. */
169 while (handled_component_p (base))
170 base = TREE_OPERAND (base, 0);
a093356a 171 if ((DECL_P (base)
6a6dac52 172 && ctor_for_folding (base) != error_mark_node)
08f1af2e
JH
173 || CONSTANT_CLASS_P (base))
174 {
175 /* If so, see if we understand all the indices. */
176 base = op;
177 while (handled_component_p (base))
178 {
179 if (TREE_CODE (base) == ARRAY_REF
180 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
181 return false;
182 base = TREE_OPERAND (base, 0);
183 }
184 return true;
185 }
186 return false;
187 }
188
7fd1a747
RB
189 /* Induction variables are constants when defined in loop. */
190 if (loop_containing_stmt (stmt) != loop)
08f1af2e 191 return false;
7fd1a747
RB
192 tree ev = analyze_scalar_evolution (loop, op);
193 if (chrec_contains_undetermined (ev)
194 || chrec_contains_symbols (ev))
08f1af2e
JH
195 return false;
196 return true;
197}
198
519cac4a
JH
199/* Computes an estimated number of insns in LOOP.
200 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
201 iteration of the loop.
202 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
203 of loop.
0588ac84 204 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
9d676bf6 205 Stop estimating after UPPER_BOUND is met. Return true in this case. */
08f1af2e 206
0588ac84 207static bool
ce120587
JH
208tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
209 struct loop_size *size, int upper_bound)
08f1af2e
JH
210{
211 basic_block *body = get_loop_body (loop);
212 gimple_stmt_iterator gsi;
213 unsigned int i;
214 bool after_exit;
9771b263 215 vec<basic_block> path = get_loop_hot_path (loop);
08f1af2e
JH
216
217 size->overall = 0;
218 size->eliminated_by_peeling = 0;
219 size->last_iteration = 0;
220 size->last_iteration_eliminated_by_peeling = 0;
519cac4a
JH
221 size->num_pure_calls_on_hot_path = 0;
222 size->num_non_pure_calls_on_hot_path = 0;
223 size->non_call_stmts_on_hot_path = 0;
224 size->num_branches_on_hot_path = 0;
225 size->constant_iv = 0;
08f1af2e
JH
226
227 if (dump_file && (dump_flags & TDF_DETAILS))
228 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
229 for (i = 0; i < loop->num_nodes; i++)
230 {
b7442c2f
JH
231 if (edge_to_cancel && body[i] != edge_to_cancel->src
232 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
08f1af2e
JH
233 after_exit = true;
234 else
235 after_exit = false;
236 if (dump_file && (dump_flags & TDF_DETAILS))
ce120587
JH
237 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
238 after_exit);
08f1af2e
JH
239
240 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
241 {
355fe088 242 gimple *stmt = gsi_stmt (gsi);
08f1af2e
JH
243 int num = estimate_num_insns (stmt, &eni_size_weights);
244 bool likely_eliminated = false;
519cac4a
JH
245 bool likely_eliminated_last = false;
246 bool likely_eliminated_peeled = false;
08f1af2e
JH
247
248 if (dump_file && (dump_flags & TDF_DETAILS))
249 {
250 fprintf (dump_file, " size: %3i ", num);
ef6cb4c7 251 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
08f1af2e
JH
252 }
253
254 /* Look for reasons why we might optimize this stmt away. */
255
1dc8d15b 256 if (!gimple_has_side_effects (stmt))
08f1af2e 257 {
1dc8d15b
JH
258 /* Exit conditional. */
259 if (exit && body[i] == exit->src
260 && stmt == last_stmt (exit->src))
261 {
262 if (dump_file && (dump_flags & TDF_DETAILS))
263 fprintf (dump_file, " Exit condition will be eliminated "
264 "in peeled copies.\n");
265 likely_eliminated_peeled = true;
266 }
267 if (edge_to_cancel && body[i] == edge_to_cancel->src
268 && stmt == last_stmt (edge_to_cancel->src))
269 {
270 if (dump_file && (dump_flags & TDF_DETAILS))
271 fprintf (dump_file, " Exit condition will be eliminated "
272 "in last copy.\n");
273 likely_eliminated_last = true;
274 }
275 /* Sets of IV variables */
276 if (gimple_code (stmt) == GIMPLE_ASSIGN
277 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
278 {
279 if (dump_file && (dump_flags & TDF_DETAILS))
280 fprintf (dump_file, " Induction variable computation will"
281 " be folded away.\n");
282 likely_eliminated = true;
283 }
284 /* Assignments of IV variables. */
285 else if (gimple_code (stmt) == GIMPLE_ASSIGN
286 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
287 && constant_after_peeling (gimple_assign_rhs1 (stmt),
56bbfaf6 288 stmt, loop)
1dc8d15b
JH
289 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
290 || constant_after_peeling (gimple_assign_rhs2 (stmt),
291 stmt, loop)))
292 {
293 size->constant_iv = true;
294 if (dump_file && (dump_flags & TDF_DETAILS))
295 fprintf (dump_file,
296 " Constant expression will be folded away.\n");
297 likely_eliminated = true;
298 }
299 /* Conditionals. */
300 else if ((gimple_code (stmt) == GIMPLE_COND
301 && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
302 loop)
303 && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
304 loop)
305 /* We don't simplify all constant compares so make sure
306 they are not both constant already. See PR70288. */
307 && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
308 || ! is_gimple_min_invariant
309 (gimple_cond_rhs (stmt))))
310 || (gimple_code (stmt) == GIMPLE_SWITCH
311 && constant_after_peeling (gimple_switch_index (
312 as_a <gswitch *>
313 (stmt)),
314 stmt, loop)
315 && ! is_gimple_min_invariant
316 (gimple_switch_index
317 (as_a <gswitch *> (stmt)))))
318 {
319 if (dump_file && (dump_flags & TDF_DETAILS))
320 fprintf (dump_file, " Constant conditional.\n");
321 likely_eliminated = true;
322 }
08f1af2e
JH
323 }
324
325 size->overall += num;
519cac4a 326 if (likely_eliminated || likely_eliminated_peeled)
08f1af2e
JH
327 size->eliminated_by_peeling += num;
328 if (!after_exit)
329 {
330 size->last_iteration += num;
519cac4a 331 if (likely_eliminated || likely_eliminated_last)
08f1af2e
JH
332 size->last_iteration_eliminated_by_peeling += num;
333 }
0588ac84
JH
334 if ((size->overall * 3 / 2 - size->eliminated_by_peeling
335 - size->last_iteration_eliminated_by_peeling) > upper_bound)
336 {
337 free (body);
9d676bf6 338 path.release ();
0588ac84
JH
339 return true;
340 }
08f1af2e
JH
341 }
342 }
9771b263 343 while (path.length ())
519cac4a 344 {
9771b263 345 basic_block bb = path.pop ();
519cac4a
JH
346 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
347 {
355fe088 348 gimple *stmt = gsi_stmt (gsi);
ce120587
JH
349 if (gimple_code (stmt) == GIMPLE_CALL
350 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
519cac4a
JH
351 {
352 int flags = gimple_call_flags (stmt);
ce120587 353 if (flags & (ECF_PURE | ECF_CONST))
519cac4a
JH
354 size->num_pure_calls_on_hot_path++;
355 else
356 size->num_non_pure_calls_on_hot_path++;
357 size->num_branches_on_hot_path ++;
358 }
ce120587
JH
359 /* Count inexpensive calls as non-calls, because they will likely
360 expand inline. */
361 else if (gimple_code (stmt) != GIMPLE_DEBUG)
519cac4a
JH
362 size->non_call_stmts_on_hot_path++;
363 if (((gimple_code (stmt) == GIMPLE_COND
364 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
ce120587
JH
365 || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
366 loop)))
519cac4a 367 || (gimple_code (stmt) == GIMPLE_SWITCH
538dd0b7
DM
368 && !constant_after_peeling (gimple_switch_index (
369 as_a <gswitch *> (stmt)),
370 stmt, loop)))
519cac4a
JH
371 && (!exit || bb != exit->src))
372 size->num_branches_on_hot_path++;
373 }
374 }
9771b263 375 path.release ();
08f1af2e
JH
376 if (dump_file && (dump_flags & TDF_DETAILS))
377 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
378 size->eliminated_by_peeling, size->last_iteration,
379 size->last_iteration_eliminated_by_peeling);
b8698a0f 380
08f1af2e 381 free (body);
0588ac84 382 return false;
08f1af2e 383}
91a01f21 384
08f1af2e
JH
385/* Estimate number of insns of completely unrolled loop.
386 It is (NUNROLL + 1) * size of loop body with taking into account
387 the fact that in last copy everything after exit conditional
388 is dead and that some instructions will be eliminated after
389 peeling.
91a01f21 390
c0d18c6c 391 Loop body is likely going to simplify further, this is difficult
08f1af2e 392 to guess, we just decrease the result by 1/3. */
91a01f21
ZD
393
394static unsigned HOST_WIDE_INT
08f1af2e 395estimated_unrolled_size (struct loop_size *size,
91a01f21
ZD
396 unsigned HOST_WIDE_INT nunroll)
397{
08f1af2e
JH
398 HOST_WIDE_INT unr_insns = ((nunroll)
399 * (HOST_WIDE_INT) (size->overall
400 - size->eliminated_by_peeling));
401 if (!nunroll)
402 unr_insns = 0;
403 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
404
405 unr_insns = unr_insns * 2 / 3;
91a01f21
ZD
406 if (unr_insns <= 0)
407 unr_insns = 1;
91a01f21
ZD
408
409 return unr_insns;
410}
411
b7442c2f
JH
412/* Loop LOOP is known to not loop. See if there is an edge in the loop
413 body that can be remove to make the loop to always exit and at
414 the same time it does not make any code potentially executed
415 during the last iteration dead.
416
1c5fd343 417 After complete unrolling we still may get rid of the conditional
b7442c2f
JH
418 on the exit in the last copy even if we have no idea what it does.
419 This is quite common case for loops of form
420
421 int a[5];
422 for (i=0;i<b;i++)
423 a[i]=0;
424
425 Here we prove the loop to iterate 5 times but we do not know
426 it from induction variable.
427
428 For now we handle only simple case where there is exit condition
429 just before the latch block and the latch block contains no statements
430 with side effect that may otherwise terminate the execution of loop
431 (such as by EH or by terminating the program or longjmp).
432
433 In the general case we may want to cancel the paths leading to statements
434 loop-niter identified as having undefined effect in the last iteration.
435 The other cases are hopefully rare and will be cleaned up later. */
436
71343877 437static edge
b7442c2f
JH
438loop_edge_to_cancel (struct loop *loop)
439{
9771b263 440 vec<edge> exits;
b7442c2f
JH
441 unsigned i;
442 edge edge_to_cancel;
443 gimple_stmt_iterator gsi;
444
445 /* We want only one predecestor of the loop. */
446 if (EDGE_COUNT (loop->latch->preds) > 1)
447 return NULL;
448
449 exits = get_loop_exit_edges (loop);
450
9771b263 451 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
b7442c2f
JH
452 {
453 /* Find the other edge than the loop exit
454 leaving the conditoinal. */
455 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
456 continue;
457 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
458 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
459 else
460 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
461
a093356a
JH
462 /* We only can handle conditionals. */
463 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
464 continue;
465
b7442c2f
JH
466 /* We should never have conditionals in the loop latch. */
467 gcc_assert (edge_to_cancel->dest != loop->header);
468
469 /* Check that it leads to loop latch. */
470 if (edge_to_cancel->dest != loop->latch)
471 continue;
472
9771b263 473 exits.release ();
b7442c2f
JH
474
475 /* Verify that the code in loop latch does nothing that may end program
476 execution without really reaching the exit. This may include
477 non-pure/const function calls, EH statements, volatile ASMs etc. */
478 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
479 if (gimple_has_side_effects (gsi_stmt (gsi)))
480 return NULL;
481 return edge_to_cancel;
482 }
9771b263 483 exits.release ();
b7442c2f
JH
484 return NULL;
485}
486
f2a1b469
JH
487/* Remove all tests for exits that are known to be taken after LOOP was
488 peeled NPEELED times. Put gcc_unreachable before every statement
489 known to not be executed. */
490
491static bool
492remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
493{
494 struct nb_iter_bound *elt;
495 bool changed = false;
496
497 for (elt = loop->bounds; elt; elt = elt->next)
498 {
499 /* If statement is known to be undefined after peeling, turn it
500 into unreachable (or trap when debugging experience is supposed
501 to be good). */
502 if (!elt->is_exit
807e902e 503 && wi::ltu_p (elt->bound, npeeled))
f2a1b469
JH
504 {
505 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
538dd0b7 506 gcall *stmt = gimple_build_call
f2a1b469 507 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
f2a1b469
JH
508 gimple_set_location (stmt, gimple_location (elt->stmt));
509 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
ad3e2ba0 510 split_block (gimple_bb (stmt), stmt);
f2a1b469
JH
511 changed = true;
512 if (dump_file && (dump_flags & TDF_DETAILS))
513 {
514 fprintf (dump_file, "Forced statement unreachable: ");
ef6cb4c7 515 print_gimple_stmt (dump_file, elt->stmt, 0);
f2a1b469
JH
516 }
517 }
518 /* If we know the exit will be taken after peeling, update. */
519 else if (elt->is_exit
807e902e 520 && wi::leu_p (elt->bound, npeeled))
f2a1b469
JH
521 {
522 basic_block bb = gimple_bb (elt->stmt);
523 edge exit_edge = EDGE_SUCC (bb, 0);
524
525 if (dump_file && (dump_flags & TDF_DETAILS))
526 {
527 fprintf (dump_file, "Forced exit to be taken: ");
ef6cb4c7 528 print_gimple_stmt (dump_file, elt->stmt, 0);
f2a1b469
JH
529 }
530 if (!loop_exit_edge_p (loop, exit_edge))
531 exit_edge = EDGE_SUCC (bb, 1);
8fb0ae82
JH
532 exit_edge->probability = REG_BR_PROB_BASE;
533 exit_edge->count = exit_edge->src->count;
f2a1b469 534 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
538dd0b7 535 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
f2a1b469 536 if (exit_edge->flags & EDGE_TRUE_VALUE)
538dd0b7 537 gimple_cond_make_true (cond_stmt);
f2a1b469 538 else
538dd0b7
DM
539 gimple_cond_make_false (cond_stmt);
540 update_stmt (cond_stmt);
f2a1b469
JH
541 changed = true;
542 }
543 }
544 return changed;
545}
546
547/* Remove all exits that are known to be never taken because of the loop bound
548 discovered. */
549
550static bool
551remove_redundant_iv_tests (struct loop *loop)
552{
553 struct nb_iter_bound *elt;
554 bool changed = false;
555
556 if (!loop->any_upper_bound)
557 return false;
558 for (elt = loop->bounds; elt; elt = elt->next)
559 {
560 /* Exit is pointless if it won't be taken before loop reaches
561 upper bound. */
562 if (elt->is_exit && loop->any_upper_bound
807e902e 563 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
f2a1b469
JH
564 {
565 basic_block bb = gimple_bb (elt->stmt);
566 edge exit_edge = EDGE_SUCC (bb, 0);
567 struct tree_niter_desc niter;
568
569 if (!loop_exit_edge_p (loop, exit_edge))
570 exit_edge = EDGE_SUCC (bb, 1);
571
572 /* Only when we know the actual number of iterations, not
573 just a bound, we can remove the exit. */
574 if (!number_of_iterations_exit (loop, exit_edge,
13ea799b
RB
575 &niter, false, false)
576 || !integer_onep (niter.assumptions)
f2a1b469
JH
577 || !integer_zerop (niter.may_be_zero)
578 || !niter.niter
579 || TREE_CODE (niter.niter) != INTEGER_CST
807e902e
KZ
580 || !wi::ltu_p (loop->nb_iterations_upper_bound,
581 wi::to_widest (niter.niter)))
f2a1b469
JH
582 continue;
583
584 if (dump_file && (dump_flags & TDF_DETAILS))
585 {
586 fprintf (dump_file, "Removed pointless exit: ");
ef6cb4c7 587 print_gimple_stmt (dump_file, elt->stmt, 0);
f2a1b469 588 }
538dd0b7 589 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
f2a1b469 590 if (exit_edge->flags & EDGE_TRUE_VALUE)
538dd0b7 591 gimple_cond_make_false (cond_stmt);
f2a1b469 592 else
538dd0b7
DM
593 gimple_cond_make_true (cond_stmt);
594 update_stmt (cond_stmt);
f2a1b469
JH
595 changed = true;
596 }
597 }
598 return changed;
599}
600
b66af6ed
RB
601/* Stores loops that will be unlooped and edges that will be removed
602 after we process whole loop tree. */
9771b263
DN
603static vec<loop_p> loops_to_unloop;
604static vec<int> loops_to_unloop_nunroll;
b66af6ed 605static vec<edge> edges_to_remove;
a448ff40
JH
606/* Stores loops that has been peeled. */
607static bitmap peeled_loops;
f2a1b469
JH
608
609/* Cancel all fully unrolled loops by putting __builtin_unreachable
610 on the latch edge.
611 We do it after all unrolling since unlooping moves basic blocks
612 across loop boundaries trashing loop closed SSA form as well
613 as SCEV info needed to be intact during unrolling.
614
b7442c2f
JH
615 IRRED_INVALIDATED is used to bookkeep if information about
616 irreducible regions may become invalid as a result
1a7de201
JH
617 of the transformation.
618 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
619 when we need to go into loop closed SSA form. */
82b85a85 620
71343877 621static void
f2a1b469
JH
622unloop_loops (bitmap loop_closed_ssa_invalidated,
623 bool *irred_invalidated)
624{
9771b263 625 while (loops_to_unloop.length ())
f2a1b469 626 {
9771b263
DN
627 struct loop *loop = loops_to_unloop.pop ();
628 int n_unroll = loops_to_unloop_nunroll.pop ();
f2a1b469
JH
629 basic_block latch = loop->latch;
630 edge latch_edge = loop_latch_edge (loop);
631 int flags = latch_edge->flags;
632 location_t locus = latch_edge->goto_locus;
538dd0b7 633 gcall *stmt;
f2a1b469
JH
634 gimple_stmt_iterator gsi;
635
636 remove_exits_and_undefined_stmts (loop, n_unroll);
637
638 /* Unloop destroys the latch edge. */
639 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
640
641 /* Create new basic block for the latch edge destination and wire
642 it in. */
643 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
644 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
645 latch_edge->probability = 0;
3995f3a2 646 latch_edge->count = profile_count::zero ();
f2a1b469
JH
647 latch_edge->flags |= flags;
648 latch_edge->goto_locus = locus;
649
eb2afa1a 650 add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
3995f3a2 651 latch_edge->dest->count = profile_count::zero ();
f2a1b469
JH
652 latch_edge->dest->frequency = 0;
653 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
654
655 gsi = gsi_start_bb (latch_edge->dest);
656 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
657 }
9771b263
DN
658 loops_to_unloop.release ();
659 loops_to_unloop_nunroll.release ();
9fcebb5a
RB
660
661 /* Remove edges in peeled copies. */
662 unsigned i;
663 edge e;
664 FOR_EACH_VEC_ELT (edges_to_remove, i, e)
665 {
eb2afa1a 666 bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
9fcebb5a
RB
667 gcc_assert (ok);
668 }
669 edges_to_remove.release ();
f2a1b469
JH
670}
671
672/* Tries to unroll LOOP completely, i.e. NITER times.
673 UL determines which loops we are allowed to unroll.
e25a6711 674 EXIT is the exit of the loop that should be eliminated.
f2a1b469 675 MAXITER specfy bound on number of iterations, -1 if it is
e25a6711
TJ
676 not known or too large for HOST_WIDE_INT. The location
677 LOCUS corresponding to the loop is used when emitting
678 a summary of the unroll to the dump file. */
f2a1b469 679
82b85a85 680static bool
d73be268 681try_unroll_loop_completely (struct loop *loop,
82b85a85 682 edge exit, tree niter,
b7442c2f 683 enum unroll_level ul,
e25a6711
TJ
684 HOST_WIDE_INT maxiter,
685 location_t locus)
82b85a85 686{
d03f2c17 687 unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
08f1af2e 688 struct loop_size size;
b7442c2f 689 bool n_unroll_found = false;
b7442c2f 690 edge edge_to_cancel = NULL;
8264c84d 691 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
82b85a85 692
b7442c2f 693 /* See if we proved number of iterations to be low constant.
82b85a85 694
b7442c2f
JH
695 EXIT is an edge that will be removed in all but last iteration of
696 the loop.
697
698 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
699 of the unrolled sequence and is expected to make the final loop not
700 rolling.
701
702 If the number of execution of loop is determined by standard induction
703 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
704 from the iv test. */
cc269bb6 705 if (tree_fits_uhwi_p (niter))
b7442c2f 706 {
ae7e9ddd 707 n_unroll = tree_to_uhwi (niter);
b7442c2f
JH
708 n_unroll_found = true;
709 edge_to_cancel = EDGE_SUCC (exit->src, 0);
710 if (edge_to_cancel == exit)
711 edge_to_cancel = EDGE_SUCC (exit->src, 1);
712 }
713 /* We do not know the number of iterations and thus we can not eliminate
714 the EXIT edge. */
715 else
716 exit = NULL;
717
718 /* See if we can improve our estimate by using recorded loop bounds. */
b7442c2f
JH
719 if (maxiter >= 0
720 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
721 {
722 n_unroll = maxiter;
723 n_unroll_found = true;
724 /* Loop terminates before the IV variable test, so we can not
725 remove it in the last iteration. */
726 edge_to_cancel = NULL;
727 }
728
729 if (!n_unroll_found)
82b85a85 730 return false;
82b85a85 731
d03f2c17
EB
732 if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
733 {
734 if (dump_file && (dump_flags & TDF_DETAILS))
735 fprintf (dump_file, "Not unrolling loop %d "
f4ca9095 736 "(--param max-completely-peel-times limit reached).\n",
d03f2c17
EB
737 loop->num);
738 return false;
739 }
82b85a85 740
b7442c2f
JH
741 if (!edge_to_cancel)
742 edge_to_cancel = loop_edge_to_cancel (loop);
743
82b85a85
ZD
744 if (n_unroll)
745 {
0588ac84 746 bool large;
91a01f21 747 if (ul == UL_SINGLE_ITER)
82b85a85
ZD
748 return false;
749
98bdbb39
JH
750 /* EXIT can be removed only if we are sure it passes first N_UNROLL
751 iterations. */
752 bool remove_exit = (exit && niter
753 && TREE_CODE (niter) == INTEGER_CST
754 && wi::leu_p (n_unroll, wi::to_widest (niter)));
755
0588ac84 756 large = tree_estimate_loop_size
98bdbb39 757 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
0588ac84 758 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
08f1af2e 759 ninsns = size.overall;
0588ac84
JH
760 if (large)
761 {
762 if (dump_file && (dump_flags & TDF_DETAILS))
763 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
764 loop->num);
765 return false;
766 }
82b85a85 767
08f1af2e 768 unr_insns = estimated_unrolled_size (&size, n_unroll);
d6e840ee
RG
769 if (dump_file && (dump_flags & TDF_DETAILS))
770 {
771 fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
772 fprintf (dump_file, " Estimated size after unrolling: %d\n",
773 (int) unr_insns);
774 }
775
519cac4a
JH
776 /* If the code is going to shrink, we don't need to be extra cautious
777 on guessing if the unrolling is going to be profitable. */
778 if (unr_insns
779 /* If there is IV variable that will become constant, we save
780 one instruction in the loop prologue we do not account
781 otherwise. */
782 <= ninsns + (size.constant_iv != false))
783 ;
b7442c2f
JH
784 /* We unroll only inner loops, because we do not consider it profitable
785 otheriwse. We still can cancel loopback edge of not rolling loop;
786 this is always a good idea. */
519cac4a
JH
787 else if (ul == UL_NO_GROWTH)
788 {
789 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
791 loop->num);
792 return false;
793 }
1c5fd343 794 /* Outer loops tend to be less interesting candidates for complete
519cac4a
JH
795 unrolling unless we can do a lot of propagation into the inner loop
796 body. For now we disable outer loop unrolling when the code would
797 grow. */
798 else if (loop->inner)
b7442c2f
JH
799 {
800 if (dump_file && (dump_flags & TDF_DETAILS))
519cac4a 801 fprintf (dump_file, "Not unrolling loop %d: "
b7442c2f
JH
802 "it is not innermost and code would grow.\n",
803 loop->num);
804 return false;
805 }
519cac4a
JH
806 /* If there is call on a hot path through the loop, then
807 there is most probably not much to optimize. */
808 else if (size.num_non_pure_calls_on_hot_path)
678e7c65
RG
809 {
810 if (dump_file && (dump_flags & TDF_DETAILS))
519cac4a
JH
811 fprintf (dump_file, "Not unrolling loop %d: "
812 "contains call and code would grow.\n",
678e7c65
RG
813 loop->num);
814 return false;
815 }
519cac4a
JH
816 /* If there is pure/const call in the function, then we
817 can still optimize the unrolled loop body if it contains
818 some other interesting code than the calls and code
819 storing or cumulating the return value. */
820 else if (size.num_pure_calls_on_hot_path
821 /* One IV increment, one test, one ivtmp store
c0d18c6c 822 and one useful stmt. That is about minimal loop
519cac4a
JH
823 doing pure call. */
824 && (size.non_call_stmts_on_hot_path
825 <= 3 + size.num_pure_calls_on_hot_path))
91a01f21 826 {
91a01f21 827 if (dump_file && (dump_flags & TDF_DETAILS))
519cac4a
JH
828 fprintf (dump_file, "Not unrolling loop %d: "
829 "contains just pure calls and code would grow.\n",
830 loop->num);
831 return false;
832 }
0f33a54e 833 /* Complete unrolling is a major win when control flow is removed and
519cac4a
JH
834 one big basic block is created. If the loop contains control flow
835 the optimization may still be a win because of eliminating the loop
836 overhead but it also may blow the branch predictor tables.
837 Limit number of branches on the hot path through the peeled
838 sequence. */
839 else if (size.num_branches_on_hot_path * (int)n_unroll
840 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
841 {
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 fprintf (dump_file, "Not unrolling loop %d: "
844 " number of branches on hot path in the unrolled sequence"
845 " reach --param max-peel-branches limit.\n",
846 loop->num);
847 return false;
848 }
849 else if (unr_insns
850 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
851 {
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, "Not unrolling loop %d: "
854 "(--param max-completely-peeled-insns limit reached).\n",
b7442c2f 855 loop->num);
d6e840ee 856 return false;
91a01f21 857 }
8fb0ae82
JH
858 if (!n_unroll)
859 dump_printf_loc (report_flags, locus,
860 "loop turned into non-loop; it never loops.\n");
178df94f 861
6580ee77 862 initialize_original_copy_tables ();
7ba9e72d 863 auto_sbitmap wont_exit (n_unroll + 1);
98bdbb39
JH
864 if (exit && niter
865 && TREE_CODE (niter) == INTEGER_CST
866 && wi::leu_p (n_unroll, wi::to_widest (niter)))
867 {
868 bitmap_ones (wont_exit);
869 if (wi::eq_p (wi::to_widest (niter), n_unroll)
870 || edge_to_cancel)
871 bitmap_clear_bit (wont_exit, 0);
872 }
873 else
874 {
875 exit = NULL;
876 bitmap_clear (wont_exit);
877 }
178df94f 878
726a989a
RB
879 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
880 n_unroll, wont_exit,
b66af6ed 881 exit, &edges_to_remove,
726a989a
RB
882 DLTHE_FLAG_UPDATE_FREQ
883 | DLTHE_FLAG_COMPLETTE_PEEL))
82b85a85 884 {
6580ee77 885 free_original_copy_tables ();
519cac4a
JH
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Failed to duplicate the loop\n");
82b85a85
ZD
888 return false;
889 }
6c74788e 890
6580ee77 891 free_original_copy_tables ();
82b85a85 892 }
82b85a85 893
b7442c2f
JH
894 /* Remove the conditional from the last copy of the loop. */
895 if (edge_to_cancel)
896 {
538dd0b7 897 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
98bdbb39 898 force_edge_cold (edge_to_cancel, true);
b7442c2f
JH
899 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
900 gimple_cond_make_false (cond);
901 else
902 gimple_cond_make_true (cond);
903 update_stmt (cond);
904 /* Do not remove the path. Doing so may remove outer loop
905 and confuse bookkeeping code in tree_unroll_loops_completelly. */
906 }
b7442c2f 907
f2a1b469 908 /* Store the loop for later unlooping and exit removal. */
9771b263
DN
909 loops_to_unloop.safe_push (loop);
910 loops_to_unloop_nunroll.safe_push (n_unroll);
84d65814 911
e25a6711 912 if (dump_enabled_p ())
b7442c2f
JH
913 {
914 if (!n_unroll)
e25a6711 915 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
9cc1fb4b 916 "loop turned into non-loop; it never loops\n");
b7442c2f 917 else
e25a6711
TJ
918 {
919 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
9cc1fb4b
XDL
920 "loop with %d iterations completely unrolled",
921 (int) (n_unroll + 1));
3995f3a2 922 if (loop->header->count.initialized_p ())
e25a6711
TJ
923 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
924 " (header execution count %d)",
3995f3a2 925 (int)loop->header->count.to_gcov_type ());
e25a6711
TJ
926 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
927 }
928 }
929
930 if (dump_file && (dump_flags & TDF_DETAILS))
931 {
b7442c2f
JH
932 if (exit)
933 fprintf (dump_file, "Exit condition of peeled iterations was "
934 "eliminated.\n");
935 if (edge_to_cancel)
936 fprintf (dump_file, "Last iteration exit edge was proved true.\n");
937 else
938 fprintf (dump_file, "Latch of last iteration was marked by "
939 "__builtin_unreachable ().\n");
940 }
82b85a85
ZD
941
942 return true;
943}
944
f8934be7
JH
945/* Return number of instructions after peeling. */
946static unsigned HOST_WIDE_INT
947estimated_peeled_sequence_size (struct loop_size *size,
948 unsigned HOST_WIDE_INT npeel)
949{
950 return MAX (npeel * (HOST_WIDE_INT) (size->overall
951 - size->eliminated_by_peeling), 1);
952}
953
954/* If the loop is expected to iterate N times and is
955 small enough, duplicate the loop body N+1 times before
956 the loop itself. This way the hot path will never
957 enter the loop.
958 Parameters are the same as for try_unroll_loops_completely */
959
960static bool
961try_peel_loop (struct loop *loop,
962 edge exit, tree niter,
963 HOST_WIDE_INT maxiter)
964{
3c10b1e6 965 HOST_WIDE_INT npeel;
f8934be7
JH
966 struct loop_size size;
967 int peeled_size;
f8934be7 968
a448ff40
JH
969 if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
970 || !peeled_loops)
f8934be7
JH
971 return false;
972
a448ff40
JH
973 if (bitmap_bit_p (peeled_loops, loop->num))
974 {
975 if (dump_file)
976 fprintf (dump_file, "Not peeling: loop is already peeled\n");
977 return false;
978 }
979
eb897cfc
JH
980 /* Peel only innermost loops.
981 While the code is perfectly capable of peeling non-innermost loops,
982 the heuristics would probably need some improvements. */
f8934be7
JH
983 if (loop->inner)
984 {
985 if (dump_file)
986 fprintf (dump_file, "Not peeling: outer loop\n");
987 return false;
988 }
989
990 if (!optimize_loop_for_speed_p (loop))
991 {
992 if (dump_file)
993 fprintf (dump_file, "Not peeling: cold loop\n");
994 return false;
995 }
996
997 /* Check if there is an estimate on the number of iterations. */
998 npeel = estimated_loop_iterations_int (loop);
a448ff40
JH
999 if (npeel < 0)
1000 npeel = likely_max_loop_iterations_int (loop);
f8934be7
JH
1001 if (npeel < 0)
1002 {
1003 if (dump_file)
1004 fprintf (dump_file, "Not peeling: number of iterations is not "
1005 "estimated\n");
1006 return false;
1007 }
1008 if (maxiter >= 0 && maxiter <= npeel)
1009 {
1010 if (dump_file)
1011 fprintf (dump_file, "Not peeling: upper bound is known so can "
1c5fd343 1012 "unroll completely\n");
f8934be7
JH
1013 return false;
1014 }
1015
1016 /* We want to peel estimated number of iterations + 1 (so we never
1017 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1018 and be sure to avoid overflows. */
1019 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1020 {
1021 if (dump_file)
1022 fprintf (dump_file, "Not peeling: rolls too much "
3c10b1e6 1023 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
f8934be7
JH
1024 return false;
1025 }
1026 npeel++;
1027
1028 /* Check peeled loops size. */
1029 tree_estimate_loop_size (loop, exit, NULL, &size,
1030 PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
3c10b1e6 1031 if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
f8934be7
JH
1032 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1033 {
1034 if (dump_file)
1035 fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1036 "(%i insns > --param max-peel-insns)", peeled_size);
1037 return false;
1038 }
1039
1040 /* Duplicate possibly eliminating the exits. */
1041 initialize_original_copy_tables ();
7ba9e72d 1042 auto_sbitmap wont_exit (npeel + 1);
eb897cfc
JH
1043 if (exit && niter
1044 && TREE_CODE (niter) == INTEGER_CST
1045 && wi::leu_p (npeel, wi::to_widest (niter)))
1046 {
1047 bitmap_ones (wont_exit);
a448ff40 1048 bitmap_clear_bit (wont_exit, 0);
eb897cfc
JH
1049 }
1050 else
1051 {
1052 exit = NULL;
1053 bitmap_clear (wont_exit);
1054 }
f8934be7
JH
1055 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1056 npeel, wont_exit,
b66af6ed 1057 exit, &edges_to_remove,
eb897cfc 1058 DLTHE_FLAG_UPDATE_FREQ))
f8934be7
JH
1059 {
1060 free_original_copy_tables ();
f8934be7
JH
1061 return false;
1062 }
f8934be7
JH
1063 free_original_copy_tables ();
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 {
1066 fprintf (dump_file, "Peeled loop %d, %i times.\n",
3c10b1e6 1067 loop->num, (int) npeel);
f8934be7 1068 }
eb897cfc
JH
1069 if (loop->any_estimate)
1070 {
1071 if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1072 loop->nb_iterations_estimate -= npeel;
1073 else
1074 loop->nb_iterations_estimate = 0;
1075 }
f8934be7 1076 if (loop->any_upper_bound)
eb897cfc 1077 {
a448ff40 1078 if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
eb897cfc
JH
1079 loop->nb_iterations_upper_bound -= npeel;
1080 else
1081 loop->nb_iterations_upper_bound = 0;
1082 }
105e29c5 1083 if (loop->any_likely_upper_bound)
eb897cfc 1084 {
a448ff40 1085 if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
eb897cfc
JH
1086 loop->nb_iterations_likely_upper_bound -= npeel;
1087 else
1088 {
1089 loop->any_estimate = true;
1090 loop->nb_iterations_estimate = 0;
1091 loop->nb_iterations_likely_upper_bound = 0;
1092 }
1093 }
3995f3a2 1094 profile_count entry_count = profile_count::zero ();
eb897cfc
JH
1095 int entry_freq = 0;
1096
b66af6ed 1097 edge e;
eb897cfc
JH
1098 edge_iterator ei;
1099 FOR_EACH_EDGE (e, ei, loop->header->preds)
1100 if (e->src != loop->latch)
1101 {
3995f3a2
JH
1102 if (e->src->count.initialized_p ())
1103 entry_count = e->src->count + e->src->count;
eb897cfc
JH
1104 entry_freq += e->src->frequency;
1105 gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1106 }
1107 int scale = 1;
3995f3a2
JH
1108 if (loop->header->count > 0)
1109 scale = entry_count.probability_in (loop->header->count);
eb897cfc
JH
1110 else if (loop->header->frequency)
1111 scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency);
1112 scale_loop_profile (loop, scale, 0);
a448ff40 1113 bitmap_set_bit (peeled_loops, loop->num);
f8934be7
JH
1114 return true;
1115}
d73be268 1116/* Adds a canonical induction variable to LOOP if suitable.
b8698a0f 1117 CREATE_IV is true if we may create a new iv. UL determines
91a01f21 1118 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
b8698a0f 1119 to determine the number of iterations of a loop by direct evaluation.
f2a1b469 1120 Returns true if cfg is changed. */
82b85a85
ZD
1121
1122static bool
d73be268 1123canonicalize_loop_induction_variables (struct loop *loop,
91a01f21 1124 bool create_iv, enum unroll_level ul,
f2a1b469 1125 bool try_eval)
82b85a85
ZD
1126{
1127 edge exit = NULL;
1128 tree niter;
f2a1b469
JH
1129 HOST_WIDE_INT maxiter;
1130 bool modified = false;
e25a6711 1131 location_t locus = UNKNOWN_LOCATION;
82b85a85 1132
a14865db 1133 niter = number_of_latch_executions (loop);
e25a6711 1134 exit = single_exit (loop);
82b85a85 1135 if (TREE_CODE (niter) == INTEGER_CST)
e25a6711 1136 locus = gimple_location (last_stmt (exit->src));
ca4c3169
ZD
1137 else
1138 {
1139 /* If the loop has more than one exit, try checking all of them
1140 for # of iterations determinable through scev. */
e25a6711 1141 if (!exit)
ca4c3169
ZD
1142 niter = find_loop_niter (loop, &exit);
1143
1144 /* Finally if everything else fails, try brute force evaluation. */
1145 if (try_eval
1146 && (chrec_contains_undetermined (niter)
1147 || TREE_CODE (niter) != INTEGER_CST))
1148 niter = find_loop_niter_by_eval (loop, &exit);
1149
e25a6711
TJ
1150 if (exit)
1151 locus = gimple_location (last_stmt (exit->src));
1152
b7442c2f
JH
1153 if (TREE_CODE (niter) != INTEGER_CST)
1154 exit = NULL;
ca4c3169 1155 }
82b85a85 1156
b7442c2f
JH
1157 /* We work exceptionally hard here to estimate the bound
1158 by find_loop_niter_by_eval. Be sure to keep it for future. */
1159 if (niter && TREE_CODE (niter) == INTEGER_CST)
0450d718 1160 {
807e902e 1161 record_niter_bound (loop, wi::to_widest (niter),
0450d718
JH
1162 exit == single_likely_exit (loop), true);
1163 }
b7442c2f 1164
f2a1b469
JH
1165 /* Force re-computation of loop bounds so we can remove redundant exits. */
1166 maxiter = max_loop_iterations_int (loop);
1167
b7442c2f
JH
1168 if (dump_file && (dump_flags & TDF_DETAILS)
1169 && TREE_CODE (niter) == INTEGER_CST)
82b85a85
ZD
1170 {
1171 fprintf (dump_file, "Loop %d iterates ", loop->num);
1172 print_generic_expr (dump_file, niter, TDF_SLIM);
1173 fprintf (dump_file, " times.\n");
1174 }
b7442c2f 1175 if (dump_file && (dump_flags & TDF_DETAILS)
f2a1b469 1176 && maxiter >= 0)
b7442c2f
JH
1177 {
1178 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
f2a1b469 1179 (int)maxiter);
b7442c2f 1180 }
105e29c5
JH
1181 if (dump_file && (dump_flags & TDF_DETAILS)
1182 && likely_max_loop_iterations_int (loop) >= 0)
1183 {
98bdbb39
JH
1184 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1185 loop->num, (int)likely_max_loop_iterations_int (loop));
105e29c5 1186 }
82b85a85 1187
f2a1b469
JH
1188 /* Remove exits that are known to be never taken based on loop bound.
1189 Needs to be called after compilation of max_loop_iterations_int that
1190 populates the loop bounds. */
1191 modified |= remove_redundant_iv_tests (loop);
1192
e25a6711 1193 if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
82b85a85
ZD
1194 return true;
1195
b7442c2f 1196 if (create_iv
0450d718
JH
1197 && niter && !chrec_contains_undetermined (niter)
1198 && exit && just_once_each_iteration_p (loop, exit->src))
82b85a85
ZD
1199 create_canonical_iv (loop, exit, niter);
1200
f8934be7
JH
1201 if (ul == UL_ALL)
1202 modified |= try_peel_loop (loop, exit, niter, maxiter);
1203
f2a1b469 1204 return modified;
82b85a85
ZD
1205}
1206
1207/* The main entry point of the pass. Adds canonical induction variables
d73be268 1208 to the suitable loops. */
82b85a85 1209
c7f965b6 1210unsigned int
d73be268 1211canonicalize_induction_variables (void)
82b85a85 1212{
82b85a85 1213 struct loop *loop;
2b271002 1214 bool changed = false;
b7442c2f 1215 bool irred_invalidated = false;
1a7de201 1216 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
b8698a0f 1217
adb7eaa2 1218 estimate_numbers_of_iterations (cfun);
f2a1b469 1219
f0bd40b1 1220 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
82b85a85 1221 {
42fd6772
ZD
1222 changed |= canonicalize_loop_induction_variables (loop,
1223 true, UL_SINGLE_ITER,
f2a1b469 1224 true);
82b85a85 1225 }
18eb76eb 1226 gcc_assert (!need_ssa_update_p (cfun));
82b85a85 1227
f2a1b469 1228 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
b7442c2f
JH
1229 if (irred_invalidated
1230 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1231 mark_irreducible_loops ();
1232
47bcd07d
ZD
1233 /* Clean up the information about numbers of iterations, since brute force
1234 evaluation could reveal new information. */
01f1c24e 1235 free_numbers_of_iterations_estimates (cfun);
47bcd07d
ZD
1236 scev_reset ();
1237
1a7de201
JH
1238 if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1239 {
1240 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1241 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1242 }
1243 BITMAP_FREE (loop_closed_ssa_invalidated);
1244
82b85a85 1245 if (changed)
c7f965b6
AP
1246 return TODO_cleanup_cfg;
1247 return 0;
82b85a85
ZD
1248}
1249
fea4ea73
EB
1250/* Propagate constant SSA_NAMEs defined in basic block BB. */
1251
1252static void
1253propagate_constants_for_unrolling (basic_block bb)
1254{
fea4ea73 1255 /* Look for degenerate PHI nodes with constant argument. */
538dd0b7 1256 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
fea4ea73 1257 {
538dd0b7 1258 gphi *phi = gsi.phi ();
fea4ea73
EB
1259 tree result = gimple_phi_result (phi);
1260 tree arg = gimple_phi_arg_def (phi, 0);
1261
ef318184
RB
1262 if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
1263 && gimple_phi_num_args (phi) == 1
7fd1a747 1264 && CONSTANT_CLASS_P (arg))
fea4ea73 1265 {
57548bf5 1266 replace_uses_by (result, arg);
fea4ea73
EB
1267 gsi_remove (&gsi, true);
1268 release_ssa_name (result);
1269 }
1270 else
1271 gsi_next (&gsi);
1272 }
1273
1274 /* Look for assignments to SSA names with constant RHS. */
538dd0b7 1275 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
fea4ea73 1276 {
355fe088 1277 gimple *stmt = gsi_stmt (gsi);
fea4ea73
EB
1278 tree lhs;
1279
1280 if (is_gimple_assign (stmt)
7fd1a747 1281 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
fea4ea73 1282 && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
9ff09a22 1283 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
fea4ea73 1284 {
57548bf5 1285 replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
fea4ea73
EB
1286 gsi_remove (&gsi, true);
1287 release_ssa_name (lhs);
1288 }
1289 else
1290 gsi_next (&gsi);
1291 }
1292}
1293
1b2253d4
RB
1294/* Process loops from innermost to outer, stopping at the innermost
1295 loop we unrolled. */
1296
1297static bool
1298tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
b66af6ed 1299 bitmap father_bbs, struct loop *loop)
1b2253d4
RB
1300{
1301 struct loop *loop_father;
1302 bool changed = false;
1303 struct loop *inner;
1304 enum unroll_level ul;
1305
1306 /* Process inner loops first. */
1307 for (inner = loop->inner; inner != NULL; inner = inner->next)
1308 changed |= tree_unroll_loops_completely_1 (may_increase_size,
b66af6ed 1309 unroll_outer, father_bbs,
1b2253d4
RB
1310 inner);
1311
1312 /* If we changed an inner loop we cannot process outer loops in this
1313 iteration because SSA form is not up-to-date. Continue with
1314 siblings of outer loops instead. */
1315 if (changed)
1316 return true;
1317
74bf76ed
JJ
1318 /* Don't unroll #pragma omp simd loops until the vectorizer
1319 attempts to vectorize those. */
b15b5979 1320 if (loop->force_vectorize)
74bf76ed
JJ
1321 return false;
1322
1b2253d4
RB
1323 /* Try to unroll this loop. */
1324 loop_father = loop_outer (loop);
1325 if (!loop_father)
1326 return false;
1327
1328 if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1329 /* Unroll outermost loops only if asked to do so or they do
1330 not cause code growth. */
1331 && (unroll_outer || loop_outer (loop_father)))
1332 ul = UL_ALL;
1333 else
1334 ul = UL_NO_GROWTH;
1335
1336 if (canonicalize_loop_induction_variables
1337 (loop, false, ul, !flag_tree_loop_ivcanon))
1338 {
1339 /* If we'll continue unrolling, we need to propagate constants
1340 within the new basic blocks to fold away induction variable
1341 computations; otherwise, the size might blow up before the
1342 iteration is complete and the IR eventually cleaned up. */
b66af6ed
RB
1343 if (loop_outer (loop_father))
1344 bitmap_set_bit (father_bbs, loop_father->header->index);
1b2253d4
RB
1345
1346 return true;
1347 }
1348
1349 return false;
1350}
1351
91a01f21
ZD
1352/* Unroll LOOPS completely if they iterate just few times. Unless
1353 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1354 size of the code does not increase. */
82b85a85 1355
c7f965b6 1356unsigned int
d6e840ee 1357tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
82b85a85 1358{
b66af6ed 1359 bitmap father_bbs = BITMAP_ALLOC (NULL);
d6e840ee 1360 bool changed;
30bc1dca 1361 int iteration = 0;
1a7de201 1362 bool irred_invalidated = false;
82b85a85 1363
adb7eaa2
RB
1364 estimate_numbers_of_iterations (cfun);
1365
d6e840ee 1366 do
82b85a85 1367 {
d6e840ee 1368 changed = false;
1a7de201
JH
1369 bitmap loop_closed_ssa_invalidated = NULL;
1370
1371 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1372 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
82b85a85 1373
61183076 1374 free_numbers_of_iterations_estimates (cfun);
adb7eaa2 1375 estimate_numbers_of_iterations (cfun);
f2a1b469 1376
1b2253d4 1377 changed = tree_unroll_loops_completely_1 (may_increase_size,
b66af6ed 1378 unroll_outer, father_bbs,
1b2253d4 1379 current_loops->tree_root);
d6e840ee
RG
1380 if (changed)
1381 {
fea4ea73
EB
1382 unsigned i;
1383
f2a1b469 1384 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
b7442c2f 1385
f2a1b469 1386 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
1a7de201
JH
1387 if (loop_closed_ssa_invalidated
1388 && !bitmap_empty_p (loop_closed_ssa_invalidated))
1389 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1390 TODO_update_ssa);
1391 else
1392 update_ssa (TODO_update_ssa);
18eb76eb 1393
b66af6ed
RB
1394 /* father_bbs is a bitmap of loop father header BB indices.
1395 Translate that to what non-root loops these BBs belong to now. */
1396 bitmap_iterator bi;
1397 bitmap fathers = BITMAP_ALLOC (NULL);
1398 EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1399 {
1400 basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1401 if (! unrolled_loop_bb)
1402 continue;
1403 if (loop_outer (unrolled_loop_bb->loop_father))
1404 bitmap_set_bit (fathers,
1405 unrolled_loop_bb->loop_father->num);
1406 }
1407 bitmap_clear (father_bbs);
fea4ea73 1408 /* Propagate the constants within the new basic blocks. */
b66af6ed
RB
1409 EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1410 {
1411 loop_p father = get_loop (cfun, i);
1412 basic_block *body = get_loop_body_in_dom_order (father);
1413 for (unsigned j = 0; j < father->num_nodes; j++)
1414 propagate_constants_for_unrolling (body[j]);
1415 free (body);
1416 }
1417 BITMAP_FREE (fathers);
fea4ea73 1418
d6e840ee
RG
1419 /* This will take care of removing completely unrolled loops
1420 from the loop structures so we can continue unrolling now
1421 innermost loops. */
ace4eb90
RG
1422 if (cleanup_tree_cfg ())
1423 update_ssa (TODO_update_ssa_only_virtuals);
d6e840ee
RG
1424
1425 /* Clean up the information about numbers of iterations, since
1426 complete unrolling might have invalidated it. */
1427 scev_reset ();
b2b29377 1428 if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1a7de201 1429 verify_loop_closed_ssa (true);
d6e840ee 1430 }
1a7de201
JH
1431 if (loop_closed_ssa_invalidated)
1432 BITMAP_FREE (loop_closed_ssa_invalidated);
d6e840ee 1433 }
30bc1dca
RG
1434 while (changed
1435 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
47bcd07d 1436
b66af6ed 1437 BITMAP_FREE (father_bbs);
fea4ea73 1438
1a7de201
JH
1439 if (irred_invalidated
1440 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1441 mark_irreducible_loops ();
1442
c7f965b6 1443 return 0;
82b85a85 1444}
71343877
AM
1445
1446/* Canonical induction variable creation pass. */
1447
71343877
AM
1448namespace {
1449
1450const pass_data pass_data_iv_canon =
1451{
1452 GIMPLE_PASS, /* type */
1453 "ivcanon", /* name */
1454 OPTGROUP_LOOP, /* optinfo_flags */
71343877
AM
1455 TV_TREE_LOOP_IVCANON, /* tv_id */
1456 ( PROP_cfg | PROP_ssa ), /* properties_required */
1457 0, /* properties_provided */
1458 0, /* properties_destroyed */
1459 0, /* todo_flags_start */
1460 0, /* todo_flags_finish */
1461};
1462
1463class pass_iv_canon : public gimple_opt_pass
1464{
1465public:
1466 pass_iv_canon (gcc::context *ctxt)
1467 : gimple_opt_pass (pass_data_iv_canon, ctxt)
1468 {}
1469
1470 /* opt_pass methods: */
1a3d085c 1471 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
be55bfe6 1472 virtual unsigned int execute (function *fun);
71343877
AM
1473
1474}; // class pass_iv_canon
1475
be55bfe6
TS
1476unsigned int
1477pass_iv_canon::execute (function *fun)
1478{
1479 if (number_of_loops (fun) <= 1)
1480 return 0;
1481
1482 return canonicalize_induction_variables ();
1483}
1484
71343877
AM
1485} // anon namespace
1486
1487gimple_opt_pass *
1488make_pass_iv_canon (gcc::context *ctxt)
1489{
1490 return new pass_iv_canon (ctxt);
1491}
1492
1493/* Complete unrolling of loops. */
1494
71343877
AM
1495namespace {
1496
1497const pass_data pass_data_complete_unroll =
1498{
1499 GIMPLE_PASS, /* type */
1500 "cunroll", /* name */
1501 OPTGROUP_LOOP, /* optinfo_flags */
71343877
AM
1502 TV_COMPLETE_UNROLL, /* tv_id */
1503 ( PROP_cfg | PROP_ssa ), /* properties_required */
1504 0, /* properties_provided */
1505 0, /* properties_destroyed */
1506 0, /* todo_flags_start */
1507 0, /* todo_flags_finish */
1508};
1509
1510class pass_complete_unroll : public gimple_opt_pass
1511{
1512public:
1513 pass_complete_unroll (gcc::context *ctxt)
1514 : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1515 {}
1516
1517 /* opt_pass methods: */
be55bfe6 1518 virtual unsigned int execute (function *);
71343877
AM
1519
1520}; // class pass_complete_unroll
1521
be55bfe6
TS
1522unsigned int
1523pass_complete_unroll::execute (function *fun)
1524{
1525 if (number_of_loops (fun) <= 1)
1526 return 0;
1527
a448ff40
JH
1528 /* If we ever decide to run loop peeling more than once, we will need to
1529 track loops already peeled in loop structures themselves to avoid
1530 re-peeling the same loop multiple times. */
1531 if (flag_peel_loops)
1532 peeled_loops = BITMAP_ALLOC (NULL);
1533 int val = tree_unroll_loops_completely (flag_unroll_loops
1534 || flag_peel_loops
1535 || optimize >= 3, true);
1536 if (peeled_loops)
1537 {
1538 BITMAP_FREE (peeled_loops);
1539 peeled_loops = NULL;
1540 }
1541 return val;
be55bfe6
TS
1542}
1543
71343877
AM
1544} // anon namespace
1545
1546gimple_opt_pass *
1547make_pass_complete_unroll (gcc::context *ctxt)
1548{
1549 return new pass_complete_unroll (ctxt);
1550}
1551
1552/* Complete unrolling of inner loops. */
1553
71343877
AM
1554namespace {
1555
1556const pass_data pass_data_complete_unrolli =
1557{
1558 GIMPLE_PASS, /* type */
1559 "cunrolli", /* name */
1560 OPTGROUP_LOOP, /* optinfo_flags */
71343877
AM
1561 TV_COMPLETE_UNROLL, /* tv_id */
1562 ( PROP_cfg | PROP_ssa ), /* properties_required */
1563 0, /* properties_provided */
1564 0, /* properties_destroyed */
1565 0, /* todo_flags_start */
3bea341f 1566 0, /* todo_flags_finish */
71343877
AM
1567};
1568
1569class pass_complete_unrolli : public gimple_opt_pass
1570{
1571public:
1572 pass_complete_unrolli (gcc::context *ctxt)
1573 : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1574 {}
1575
1576 /* opt_pass methods: */
1a3d085c 1577 virtual bool gate (function *) { return optimize >= 2; }
be55bfe6 1578 virtual unsigned int execute (function *);
71343877
AM
1579
1580}; // class pass_complete_unrolli
1581
be55bfe6
TS
1582unsigned int
1583pass_complete_unrolli::execute (function *fun)
1584{
1585 unsigned ret = 0;
1586
1587 loop_optimizer_init (LOOPS_NORMAL
1588 | LOOPS_HAVE_RECORDED_EXITS);
1589 if (number_of_loops (fun) > 1)
1590 {
1591 scev_initialize ();
1592 ret = tree_unroll_loops_completely (optimize >= 3, false);
be55bfe6
TS
1593 scev_finalize ();
1594 }
1595 loop_optimizer_finalize ();
1596
1597 return ret;
1598}
1599
71343877
AM
1600} // anon namespace
1601
1602gimple_opt_pass *
1603make_pass_complete_unrolli (gcc::context *ctxt)
1604{
1605 return new pass_complete_unrolli (ctxt);
1606}
1607
1608