]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivcanon.c
2015-06-04 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivcanon.c
1 /* Induction variable canonicalization and loop peeling.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass detects the loops that iterate a constant number of times,
21 adds a canonical induction variable (step -1, tested against 0)
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
24
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
29 to pay up.
30
31 We also perform
32 - complete unrolling (or peeling) when the loops is rolling few enough
33 times
34 - simple peeling (i.e. copying few initial iterations prior the loop)
35 when number of iteration estimate is known (typically by the profile
36 info). */
37
38 #include "config.h"
39 #include "system.h"
40 #include "coretypes.h"
41 #include "tm.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "input.h"
45 #include "alias.h"
46 #include "symtab.h"
47 #include "inchash.h"
48 #include "tree.h"
49 #include "fold-const.h"
50 #include "tm_p.h"
51 #include "profile.h"
52 #include "predict.h"
53 #include "hard-reg-set.h"
54 #include "input.h"
55 #include "function.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "basic-block.h"
59 #include "gimple-pretty-print.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-fold.h"
63 #include "tree-eh.h"
64 #include "gimple-expr.h"
65 #include "is-a.h"
66 #include "gimple.h"
67 #include "gimple-iterator.h"
68 #include "gimple-ssa.h"
69 #include "hash-map.h"
70 #include "plugin-api.h"
71 #include "ipa-ref.h"
72 #include "cgraph.h"
73 #include "tree-cfg.h"
74 #include "tree-phinodes.h"
75 #include "ssa-iterators.h"
76 #include "stringpool.h"
77 #include "tree-ssanames.h"
78 #include "tree-ssa-loop-manip.h"
79 #include "tree-ssa-loop-niter.h"
80 #include "tree-ssa-loop.h"
81 #include "tree-into-ssa.h"
82 #include "cfgloop.h"
83 #include "tree-pass.h"
84 #include "tree-chrec.h"
85 #include "tree-scalar-evolution.h"
86 #include "params.h"
87 #include "flags.h"
88 #include "tree-inline.h"
89 #include "target.h"
90 #include "tree-cfgcleanup.h"
91 #include "builtins.h"
92
93 /* Specifies types of loops that may be unrolled. */
94
95 enum unroll_level
96 {
97 UL_SINGLE_ITER, /* Only loops that exit immediately in the first
98 iteration. */
99 UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
100 of code size. */
101 UL_ALL /* All suitable loops. */
102 };
103
104 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
105 is the exit edge whose condition is replaced. */
106
107 static void
108 create_canonical_iv (struct loop *loop, edge exit, tree niter)
109 {
110 edge in;
111 tree type, var;
112 gcond *cond;
113 gimple_stmt_iterator incr_at;
114 enum tree_code cmp;
115
116 if (dump_file && (dump_flags & TDF_DETAILS))
117 {
118 fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
119 print_generic_expr (dump_file, niter, TDF_SLIM);
120 fprintf (dump_file, " iterations.\n");
121 }
122
123 cond = as_a <gcond *> (last_stmt (exit->src));
124 in = EDGE_SUCC (exit->src, 0);
125 if (in == exit)
126 in = EDGE_SUCC (exit->src, 1);
127
128 /* Note that we do not need to worry about overflows, since
129 type of niter is always unsigned and all comparisons are
130 just for equality/nonequality -- i.e. everything works
131 with a modulo arithmetics. */
132
133 type = TREE_TYPE (niter);
134 niter = fold_build2 (PLUS_EXPR, type,
135 niter,
136 build_int_cst (type, 1));
137 incr_at = gsi_last_bb (in->src);
138 create_iv (niter,
139 build_int_cst (type, -1),
140 NULL_TREE, loop,
141 &incr_at, false, NULL, &var);
142
143 cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
144 gimple_cond_set_code (cond, cmp);
145 gimple_cond_set_lhs (cond, var);
146 gimple_cond_set_rhs (cond, build_int_cst (type, 0));
147 update_stmt (cond);
148 }
149
150 /* Describe size of loop as detected by tree_estimate_loop_size. */
151 struct loop_size
152 {
153 /* Number of instructions in the loop. */
154 int overall;
155
156 /* Number of instructions that will be likely optimized out in
157 peeled iterations of loop (i.e. computation based on induction
158 variable where induction variable starts at known constant.) */
159 int eliminated_by_peeling;
160
161 /* Same statistics for last iteration of loop: it is smaller because
162 instructions after exit are not executed. */
163 int last_iteration;
164 int last_iteration_eliminated_by_peeling;
165
166 /* If some IV computation will become constant. */
167 bool constant_iv;
168
169 /* Number of call stmts that are not a builtin and are pure or const
170 present on the hot path. */
171 int num_pure_calls_on_hot_path;
172 /* Number of call stmts that are not a builtin and are not pure nor const
173 present on the hot path. */
174 int num_non_pure_calls_on_hot_path;
175 /* Number of statements other than calls in the loop. */
176 int non_call_stmts_on_hot_path;
177 /* Number of branches seen on the hot path. */
178 int num_branches_on_hot_path;
179 };
180
181 /* Return true if OP in STMT will be constant after peeling LOOP. */
182
183 static bool
184 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
185 {
186 affine_iv iv;
187
188 if (is_gimple_min_invariant (op))
189 return true;
190
191 /* We can still fold accesses to constant arrays when index is known. */
192 if (TREE_CODE (op) != SSA_NAME)
193 {
194 tree base = op;
195
196 /* First make fast look if we see constant array inside. */
197 while (handled_component_p (base))
198 base = TREE_OPERAND (base, 0);
199 if ((DECL_P (base)
200 && ctor_for_folding (base) != error_mark_node)
201 || CONSTANT_CLASS_P (base))
202 {
203 /* If so, see if we understand all the indices. */
204 base = op;
205 while (handled_component_p (base))
206 {
207 if (TREE_CODE (base) == ARRAY_REF
208 && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
209 return false;
210 base = TREE_OPERAND (base, 0);
211 }
212 return true;
213 }
214 return false;
215 }
216
217 /* Induction variables are constants. */
218 if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
219 return false;
220 if (!is_gimple_min_invariant (iv.base))
221 return false;
222 if (!is_gimple_min_invariant (iv.step))
223 return false;
224 return true;
225 }
226
227 /* Computes an estimated number of insns in LOOP.
228 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
229 iteration of the loop.
230 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
231 of loop.
232 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
233 Stop estimating after UPPER_BOUND is met. Return true in this case. */
234
235 static bool
236 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
237 int upper_bound)
238 {
239 basic_block *body = get_loop_body (loop);
240 gimple_stmt_iterator gsi;
241 unsigned int i;
242 bool after_exit;
243 vec<basic_block> path = get_loop_hot_path (loop);
244
245 size->overall = 0;
246 size->eliminated_by_peeling = 0;
247 size->last_iteration = 0;
248 size->last_iteration_eliminated_by_peeling = 0;
249 size->num_pure_calls_on_hot_path = 0;
250 size->num_non_pure_calls_on_hot_path = 0;
251 size->non_call_stmts_on_hot_path = 0;
252 size->num_branches_on_hot_path = 0;
253 size->constant_iv = 0;
254
255 if (dump_file && (dump_flags & TDF_DETAILS))
256 fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
257 for (i = 0; i < loop->num_nodes; i++)
258 {
259 if (edge_to_cancel && body[i] != edge_to_cancel->src
260 && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
261 after_exit = true;
262 else
263 after_exit = false;
264 if (dump_file && (dump_flags & TDF_DETAILS))
265 fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
266
267 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
268 {
269 gimple stmt = gsi_stmt (gsi);
270 int num = estimate_num_insns (stmt, &eni_size_weights);
271 bool likely_eliminated = false;
272 bool likely_eliminated_last = false;
273 bool likely_eliminated_peeled = false;
274
275 if (dump_file && (dump_flags & TDF_DETAILS))
276 {
277 fprintf (dump_file, " size: %3i ", num);
278 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
279 }
280
281 /* Look for reasons why we might optimize this stmt away. */
282
283 if (gimple_has_side_effects (stmt))
284 ;
285 /* Exit conditional. */
286 else if (exit && body[i] == exit->src
287 && stmt == last_stmt (exit->src))
288 {
289 if (dump_file && (dump_flags & TDF_DETAILS))
290 fprintf (dump_file, " Exit condition will be eliminated "
291 "in peeled copies.\n");
292 likely_eliminated_peeled = true;
293 }
294 else if (edge_to_cancel && body[i] == edge_to_cancel->src
295 && stmt == last_stmt (edge_to_cancel->src))
296 {
297 if (dump_file && (dump_flags & TDF_DETAILS))
298 fprintf (dump_file, " Exit condition will be eliminated "
299 "in last copy.\n");
300 likely_eliminated_last = true;
301 }
302 /* Sets of IV variables */
303 else if (gimple_code (stmt) == GIMPLE_ASSIGN
304 && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
305 {
306 if (dump_file && (dump_flags & TDF_DETAILS))
307 fprintf (dump_file, " Induction variable computation will"
308 " be folded away.\n");
309 likely_eliminated = true;
310 }
311 /* Assignments of IV variables. */
312 else if (gimple_code (stmt) == GIMPLE_ASSIGN
313 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
314 && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
315 && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
316 || constant_after_peeling (gimple_assign_rhs2 (stmt),
317 stmt, loop)))
318 {
319 size->constant_iv = true;
320 if (dump_file && (dump_flags & TDF_DETAILS))
321 fprintf (dump_file, " Constant expression will be folded away.\n");
322 likely_eliminated = true;
323 }
324 /* Conditionals. */
325 else if ((gimple_code (stmt) == GIMPLE_COND
326 && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
327 && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
328 || (gimple_code (stmt) == GIMPLE_SWITCH
329 && constant_after_peeling (gimple_switch_index (
330 as_a <gswitch *> (stmt)),
331 stmt, loop)))
332 {
333 if (dump_file && (dump_flags & TDF_DETAILS))
334 fprintf (dump_file, " Constant conditional.\n");
335 likely_eliminated = true;
336 }
337
338 size->overall += num;
339 if (likely_eliminated || likely_eliminated_peeled)
340 size->eliminated_by_peeling += num;
341 if (!after_exit)
342 {
343 size->last_iteration += num;
344 if (likely_eliminated || likely_eliminated_last)
345 size->last_iteration_eliminated_by_peeling += num;
346 }
347 if ((size->overall * 3 / 2 - size->eliminated_by_peeling
348 - size->last_iteration_eliminated_by_peeling) > upper_bound)
349 {
350 free (body);
351 path.release ();
352 return true;
353 }
354 }
355 }
356 while (path.length ())
357 {
358 basic_block bb = path.pop ();
359 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
360 {
361 gimple stmt = gsi_stmt (gsi);
362 if (gimple_code (stmt) == GIMPLE_CALL)
363 {
364 int flags = gimple_call_flags (stmt);
365 tree decl = gimple_call_fndecl (stmt);
366
367 if (decl && DECL_IS_BUILTIN (decl)
368 && is_inexpensive_builtin (decl))
369 ;
370 else if (flags & (ECF_PURE | ECF_CONST))
371 size->num_pure_calls_on_hot_path++;
372 else
373 size->num_non_pure_calls_on_hot_path++;
374 size->num_branches_on_hot_path ++;
375 }
376 else if (gimple_code (stmt) != GIMPLE_CALL
377 && gimple_code (stmt) != GIMPLE_DEBUG)
378 size->non_call_stmts_on_hot_path++;
379 if (((gimple_code (stmt) == GIMPLE_COND
380 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
381 || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
382 || (gimple_code (stmt) == GIMPLE_SWITCH
383 && !constant_after_peeling (gimple_switch_index (
384 as_a <gswitch *> (stmt)),
385 stmt, loop)))
386 && (!exit || bb != exit->src))
387 size->num_branches_on_hot_path++;
388 }
389 }
390 path.release ();
391 if (dump_file && (dump_flags & TDF_DETAILS))
392 fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
393 size->eliminated_by_peeling, size->last_iteration,
394 size->last_iteration_eliminated_by_peeling);
395
396 free (body);
397 return false;
398 }
399
400 /* Estimate number of insns of completely unrolled loop.
401 It is (NUNROLL + 1) * size of loop body with taking into account
402 the fact that in last copy everything after exit conditional
403 is dead and that some instructions will be eliminated after
404 peeling.
405
406 Loop body is likely going to simplify further, this is difficult
407 to guess, we just decrease the result by 1/3. */
408
409 static unsigned HOST_WIDE_INT
410 estimated_unrolled_size (struct loop_size *size,
411 unsigned HOST_WIDE_INT nunroll)
412 {
413 HOST_WIDE_INT unr_insns = ((nunroll)
414 * (HOST_WIDE_INT) (size->overall
415 - size->eliminated_by_peeling));
416 if (!nunroll)
417 unr_insns = 0;
418 unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
419
420 unr_insns = unr_insns * 2 / 3;
421 if (unr_insns <= 0)
422 unr_insns = 1;
423
424 return unr_insns;
425 }
426
427 /* Loop LOOP is known to not loop. See if there is an edge in the loop
428 body that can be remove to make the loop to always exit and at
429 the same time it does not make any code potentially executed
430 during the last iteration dead.
431
432 After complete unrolling we still may get rid of the conditional
433 on the exit in the last copy even if we have no idea what it does.
434 This is quite common case for loops of form
435
436 int a[5];
437 for (i=0;i<b;i++)
438 a[i]=0;
439
440 Here we prove the loop to iterate 5 times but we do not know
441 it from induction variable.
442
443 For now we handle only simple case where there is exit condition
444 just before the latch block and the latch block contains no statements
445 with side effect that may otherwise terminate the execution of loop
446 (such as by EH or by terminating the program or longjmp).
447
448 In the general case we may want to cancel the paths leading to statements
449 loop-niter identified as having undefined effect in the last iteration.
450 The other cases are hopefully rare and will be cleaned up later. */
451
452 static edge
453 loop_edge_to_cancel (struct loop *loop)
454 {
455 vec<edge> exits;
456 unsigned i;
457 edge edge_to_cancel;
458 gimple_stmt_iterator gsi;
459
460 /* We want only one predecestor of the loop. */
461 if (EDGE_COUNT (loop->latch->preds) > 1)
462 return NULL;
463
464 exits = get_loop_exit_edges (loop);
465
466 FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
467 {
468 /* Find the other edge than the loop exit
469 leaving the conditoinal. */
470 if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
471 continue;
472 if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
473 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
474 else
475 edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
476
477 /* We only can handle conditionals. */
478 if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
479 continue;
480
481 /* We should never have conditionals in the loop latch. */
482 gcc_assert (edge_to_cancel->dest != loop->header);
483
484 /* Check that it leads to loop latch. */
485 if (edge_to_cancel->dest != loop->latch)
486 continue;
487
488 exits.release ();
489
490 /* Verify that the code in loop latch does nothing that may end program
491 execution without really reaching the exit. This may include
492 non-pure/const function calls, EH statements, volatile ASMs etc. */
493 for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
494 if (gimple_has_side_effects (gsi_stmt (gsi)))
495 return NULL;
496 return edge_to_cancel;
497 }
498 exits.release ();
499 return NULL;
500 }
501
502 /* Remove all tests for exits that are known to be taken after LOOP was
503 peeled NPEELED times. Put gcc_unreachable before every statement
504 known to not be executed. */
505
506 static bool
507 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
508 {
509 struct nb_iter_bound *elt;
510 bool changed = false;
511
512 for (elt = loop->bounds; elt; elt = elt->next)
513 {
514 /* If statement is known to be undefined after peeling, turn it
515 into unreachable (or trap when debugging experience is supposed
516 to be good). */
517 if (!elt->is_exit
518 && wi::ltu_p (elt->bound, npeeled))
519 {
520 gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
521 gcall *stmt = gimple_build_call
522 (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
523
524 gimple_set_location (stmt, gimple_location (elt->stmt));
525 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
526 changed = true;
527 if (dump_file && (dump_flags & TDF_DETAILS))
528 {
529 fprintf (dump_file, "Forced statement unreachable: ");
530 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
531 }
532 }
533 /* If we know the exit will be taken after peeling, update. */
534 else if (elt->is_exit
535 && wi::leu_p (elt->bound, npeeled))
536 {
537 basic_block bb = gimple_bb (elt->stmt);
538 edge exit_edge = EDGE_SUCC (bb, 0);
539
540 if (dump_file && (dump_flags & TDF_DETAILS))
541 {
542 fprintf (dump_file, "Forced exit to be taken: ");
543 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
544 }
545 if (!loop_exit_edge_p (loop, exit_edge))
546 exit_edge = EDGE_SUCC (bb, 1);
547 gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
548 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
549 if (exit_edge->flags & EDGE_TRUE_VALUE)
550 gimple_cond_make_true (cond_stmt);
551 else
552 gimple_cond_make_false (cond_stmt);
553 update_stmt (cond_stmt);
554 changed = true;
555 }
556 }
557 return changed;
558 }
559
560 /* Remove all exits that are known to be never taken because of the loop bound
561 discovered. */
562
563 static bool
564 remove_redundant_iv_tests (struct loop *loop)
565 {
566 struct nb_iter_bound *elt;
567 bool changed = false;
568
569 if (!loop->any_upper_bound)
570 return false;
571 for (elt = loop->bounds; elt; elt = elt->next)
572 {
573 /* Exit is pointless if it won't be taken before loop reaches
574 upper bound. */
575 if (elt->is_exit && loop->any_upper_bound
576 && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
577 {
578 basic_block bb = gimple_bb (elt->stmt);
579 edge exit_edge = EDGE_SUCC (bb, 0);
580 struct tree_niter_desc niter;
581
582 if (!loop_exit_edge_p (loop, exit_edge))
583 exit_edge = EDGE_SUCC (bb, 1);
584
585 /* Only when we know the actual number of iterations, not
586 just a bound, we can remove the exit. */
587 if (!number_of_iterations_exit (loop, exit_edge,
588 &niter, false, false)
589 || !integer_onep (niter.assumptions)
590 || !integer_zerop (niter.may_be_zero)
591 || !niter.niter
592 || TREE_CODE (niter.niter) != INTEGER_CST
593 || !wi::ltu_p (loop->nb_iterations_upper_bound,
594 wi::to_widest (niter.niter)))
595 continue;
596
597 if (dump_file && (dump_flags & TDF_DETAILS))
598 {
599 fprintf (dump_file, "Removed pointless exit: ");
600 print_gimple_stmt (dump_file, elt->stmt, 0, 0);
601 }
602 gcond *cond_stmt = as_a <gcond *> (elt->stmt);
603 if (exit_edge->flags & EDGE_TRUE_VALUE)
604 gimple_cond_make_false (cond_stmt);
605 else
606 gimple_cond_make_true (cond_stmt);
607 update_stmt (cond_stmt);
608 changed = true;
609 }
610 }
611 return changed;
612 }
613
614 /* Stores loops that will be unlooped after we process whole loop tree. */
615 static vec<loop_p> loops_to_unloop;
616 static vec<int> loops_to_unloop_nunroll;
617
618 /* Cancel all fully unrolled loops by putting __builtin_unreachable
619 on the latch edge.
620 We do it after all unrolling since unlooping moves basic blocks
621 across loop boundaries trashing loop closed SSA form as well
622 as SCEV info needed to be intact during unrolling.
623
624 IRRED_INVALIDATED is used to bookkeep if information about
625 irreducible regions may become invalid as a result
626 of the transformation.
627 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
628 when we need to go into loop closed SSA form. */
629
630 static void
631 unloop_loops (bitmap loop_closed_ssa_invalidated,
632 bool *irred_invalidated)
633 {
634 while (loops_to_unloop.length ())
635 {
636 struct loop *loop = loops_to_unloop.pop ();
637 int n_unroll = loops_to_unloop_nunroll.pop ();
638 basic_block latch = loop->latch;
639 edge latch_edge = loop_latch_edge (loop);
640 int flags = latch_edge->flags;
641 location_t locus = latch_edge->goto_locus;
642 gcall *stmt;
643 gimple_stmt_iterator gsi;
644
645 remove_exits_and_undefined_stmts (loop, n_unroll);
646
647 /* Unloop destroys the latch edge. */
648 unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
649
650 /* Create new basic block for the latch edge destination and wire
651 it in. */
652 stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
653 latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
654 latch_edge->probability = 0;
655 latch_edge->count = 0;
656 latch_edge->flags |= flags;
657 latch_edge->goto_locus = locus;
658
659 latch_edge->dest->loop_father = current_loops->tree_root;
660 latch_edge->dest->count = 0;
661 latch_edge->dest->frequency = 0;
662 set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
663
664 gsi = gsi_start_bb (latch_edge->dest);
665 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
666 }
667 loops_to_unloop.release ();
668 loops_to_unloop_nunroll.release ();
669 }
670
671 /* Tries to unroll LOOP completely, i.e. NITER times.
672 UL determines which loops we are allowed to unroll.
673 EXIT is the exit of the loop that should be eliminated.
674 MAXITER specfy bound on number of iterations, -1 if it is
675 not known or too large for HOST_WIDE_INT. The location
676 LOCUS corresponding to the loop is used when emitting
677 a summary of the unroll to the dump file. */
678
679 static bool
680 try_unroll_loop_completely (struct loop *loop,
681 edge exit, tree niter,
682 enum unroll_level ul,
683 HOST_WIDE_INT maxiter,
684 location_t locus)
685 {
686 unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
687 struct loop_size size;
688 bool n_unroll_found = false;
689 edge edge_to_cancel = NULL;
690 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
691
692 /* See if we proved number of iterations to be low constant.
693
694 EXIT is an edge that will be removed in all but last iteration of
695 the loop.
696
697 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
698 of the unrolled sequence and is expected to make the final loop not
699 rolling.
700
701 If the number of execution of loop is determined by standard induction
702 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
703 from the iv test. */
704 if (tree_fits_uhwi_p (niter))
705 {
706 n_unroll = tree_to_uhwi (niter);
707 n_unroll_found = true;
708 edge_to_cancel = EDGE_SUCC (exit->src, 0);
709 if (edge_to_cancel == exit)
710 edge_to_cancel = EDGE_SUCC (exit->src, 1);
711 }
712 /* We do not know the number of iterations and thus we can not eliminate
713 the EXIT edge. */
714 else
715 exit = NULL;
716
717 /* See if we can improve our estimate by using recorded loop bounds. */
718 if (maxiter >= 0
719 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
720 {
721 n_unroll = maxiter;
722 n_unroll_found = true;
723 /* Loop terminates before the IV variable test, so we can not
724 remove it in the last iteration. */
725 edge_to_cancel = NULL;
726 }
727
728 if (!n_unroll_found)
729 return false;
730
731 if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
732 {
733 if (dump_file && (dump_flags & TDF_DETAILS))
734 fprintf (dump_file, "Not unrolling loop %d "
735 "(--param max-completely-peeled-times limit reached).\n",
736 loop->num);
737 return false;
738 }
739
740 if (!edge_to_cancel)
741 edge_to_cancel = loop_edge_to_cancel (loop);
742
743 if (n_unroll)
744 {
745 sbitmap wont_exit;
746 edge e;
747 unsigned i;
748 bool large;
749 vec<edge> to_remove = vNULL;
750 if (ul == UL_SINGLE_ITER)
751 return false;
752
753 large = tree_estimate_loop_size
754 (loop, exit, edge_to_cancel, &size,
755 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
756 ninsns = size.overall;
757 if (large)
758 {
759 if (dump_file && (dump_flags & TDF_DETAILS))
760 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
761 loop->num);
762 return false;
763 }
764
765 unr_insns = estimated_unrolled_size (&size, n_unroll);
766 if (dump_file && (dump_flags & TDF_DETAILS))
767 {
768 fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
769 fprintf (dump_file, " Estimated size after unrolling: %d\n",
770 (int) unr_insns);
771 }
772
773 /* If the code is going to shrink, we don't need to be extra cautious
774 on guessing if the unrolling is going to be profitable. */
775 if (unr_insns
776 /* If there is IV variable that will become constant, we save
777 one instruction in the loop prologue we do not account
778 otherwise. */
779 <= ninsns + (size.constant_iv != false))
780 ;
781 /* We unroll only inner loops, because we do not consider it profitable
782 otheriwse. We still can cancel loopback edge of not rolling loop;
783 this is always a good idea. */
784 else if (ul == UL_NO_GROWTH)
785 {
786 if (dump_file && (dump_flags & TDF_DETAILS))
787 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
788 loop->num);
789 return false;
790 }
791 /* Outer loops tend to be less interesting candidates for complete
792 unrolling unless we can do a lot of propagation into the inner loop
793 body. For now we disable outer loop unrolling when the code would
794 grow. */
795 else if (loop->inner)
796 {
797 if (dump_file && (dump_flags & TDF_DETAILS))
798 fprintf (dump_file, "Not unrolling loop %d: "
799 "it is not innermost and code would grow.\n",
800 loop->num);
801 return false;
802 }
803 /* If there is call on a hot path through the loop, then
804 there is most probably not much to optimize. */
805 else if (size.num_non_pure_calls_on_hot_path)
806 {
807 if (dump_file && (dump_flags & TDF_DETAILS))
808 fprintf (dump_file, "Not unrolling loop %d: "
809 "contains call and code would grow.\n",
810 loop->num);
811 return false;
812 }
813 /* If there is pure/const call in the function, then we
814 can still optimize the unrolled loop body if it contains
815 some other interesting code than the calls and code
816 storing or cumulating the return value. */
817 else if (size.num_pure_calls_on_hot_path
818 /* One IV increment, one test, one ivtmp store
819 and one useful stmt. That is about minimal loop
820 doing pure call. */
821 && (size.non_call_stmts_on_hot_path
822 <= 3 + size.num_pure_calls_on_hot_path))
823 {
824 if (dump_file && (dump_flags & TDF_DETAILS))
825 fprintf (dump_file, "Not unrolling loop %d: "
826 "contains just pure calls and code would grow.\n",
827 loop->num);
828 return false;
829 }
830 /* Complette unrolling is major win when control flow is removed and
831 one big basic block is created. If the loop contains control flow
832 the optimization may still be a win because of eliminating the loop
833 overhead but it also may blow the branch predictor tables.
834 Limit number of branches on the hot path through the peeled
835 sequence. */
836 else if (size.num_branches_on_hot_path * (int)n_unroll
837 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
838 {
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Not unrolling loop %d: "
841 " number of branches on hot path in the unrolled sequence"
842 " reach --param max-peel-branches limit.\n",
843 loop->num);
844 return false;
845 }
846 else if (unr_insns
847 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
848 {
849 if (dump_file && (dump_flags & TDF_DETAILS))
850 fprintf (dump_file, "Not unrolling loop %d: "
851 "(--param max-completely-peeled-insns limit reached).\n",
852 loop->num);
853 return false;
854 }
855 dump_printf_loc (report_flags, locus,
856 "loop turned into non-loop; it never loops.\n");
857
858 initialize_original_copy_tables ();
859 wont_exit = sbitmap_alloc (n_unroll + 1);
860 bitmap_ones (wont_exit);
861 bitmap_clear_bit (wont_exit, 0);
862
863 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
864 n_unroll, wont_exit,
865 exit, &to_remove,
866 DLTHE_FLAG_UPDATE_FREQ
867 | DLTHE_FLAG_COMPLETTE_PEEL))
868 {
869 free_original_copy_tables ();
870 free (wont_exit);
871 if (dump_file && (dump_flags & TDF_DETAILS))
872 fprintf (dump_file, "Failed to duplicate the loop\n");
873 return false;
874 }
875
876 FOR_EACH_VEC_ELT (to_remove, i, e)
877 {
878 bool ok = remove_path (e);
879 gcc_assert (ok);
880 }
881
882 to_remove.release ();
883 free (wont_exit);
884 free_original_copy_tables ();
885 }
886
887
888 /* Remove the conditional from the last copy of the loop. */
889 if (edge_to_cancel)
890 {
891 gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
892 if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
893 gimple_cond_make_false (cond);
894 else
895 gimple_cond_make_true (cond);
896 update_stmt (cond);
897 /* Do not remove the path. Doing so may remove outer loop
898 and confuse bookkeeping code in tree_unroll_loops_completelly. */
899 }
900
901 /* Store the loop for later unlooping and exit removal. */
902 loops_to_unloop.safe_push (loop);
903 loops_to_unloop_nunroll.safe_push (n_unroll);
904
905 if (dump_enabled_p ())
906 {
907 if (!n_unroll)
908 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
909 "loop turned into non-loop; it never loops\n");
910 else
911 {
912 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
913 "loop with %d iterations completely unrolled",
914 (int) (n_unroll + 1));
915 if (profile_info)
916 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
917 " (header execution count %d)",
918 (int)loop->header->count);
919 dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
920 }
921 }
922
923 if (dump_file && (dump_flags & TDF_DETAILS))
924 {
925 if (exit)
926 fprintf (dump_file, "Exit condition of peeled iterations was "
927 "eliminated.\n");
928 if (edge_to_cancel)
929 fprintf (dump_file, "Last iteration exit edge was proved true.\n");
930 else
931 fprintf (dump_file, "Latch of last iteration was marked by "
932 "__builtin_unreachable ().\n");
933 }
934
935 return true;
936 }
937
938 /* Return number of instructions after peeling. */
939 static unsigned HOST_WIDE_INT
940 estimated_peeled_sequence_size (struct loop_size *size,
941 unsigned HOST_WIDE_INT npeel)
942 {
943 return MAX (npeel * (HOST_WIDE_INT) (size->overall
944 - size->eliminated_by_peeling), 1);
945 }
946
947 /* If the loop is expected to iterate N times and is
948 small enough, duplicate the loop body N+1 times before
949 the loop itself. This way the hot path will never
950 enter the loop.
951 Parameters are the same as for try_unroll_loops_completely */
952
953 static bool
954 try_peel_loop (struct loop *loop,
955 edge exit, tree niter,
956 HOST_WIDE_INT maxiter)
957 {
958 int npeel;
959 struct loop_size size;
960 int peeled_size;
961 sbitmap wont_exit;
962 unsigned i;
963 vec<edge> to_remove = vNULL;
964 edge e;
965
966 /* If the iteration bound is known and large, then we can safely eliminate
967 the check in peeled copies. */
968 if (TREE_CODE (niter) != INTEGER_CST)
969 exit = NULL;
970
971 if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
972 return false;
973
974 /* Peel only innermost loops. */
975 if (loop->inner)
976 {
977 if (dump_file)
978 fprintf (dump_file, "Not peeling: outer loop\n");
979 return false;
980 }
981
982 if (!optimize_loop_for_speed_p (loop))
983 {
984 if (dump_file)
985 fprintf (dump_file, "Not peeling: cold loop\n");
986 return false;
987 }
988
989 /* Check if there is an estimate on the number of iterations. */
990 npeel = estimated_loop_iterations_int (loop);
991 if (npeel < 0)
992 {
993 if (dump_file)
994 fprintf (dump_file, "Not peeling: number of iterations is not "
995 "estimated\n");
996 return false;
997 }
998 if (maxiter >= 0 && maxiter <= npeel)
999 {
1000 if (dump_file)
1001 fprintf (dump_file, "Not peeling: upper bound is known so can "
1002 "unroll completely\n");
1003 return false;
1004 }
1005
1006 /* We want to peel estimated number of iterations + 1 (so we never
1007 enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1008 and be sure to avoid overflows. */
1009 if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1010 {
1011 if (dump_file)
1012 fprintf (dump_file, "Not peeling: rolls too much "
1013 "(%i + 1 > --param max-peel-times)\n", npeel);
1014 return false;
1015 }
1016 npeel++;
1017
1018 /* Check peeled loops size. */
1019 tree_estimate_loop_size (loop, exit, NULL, &size,
1020 PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1021 if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
1022 > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1023 {
1024 if (dump_file)
1025 fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1026 "(%i insns > --param max-peel-insns)", peeled_size);
1027 return false;
1028 }
1029
1030 /* Duplicate possibly eliminating the exits. */
1031 initialize_original_copy_tables ();
1032 wont_exit = sbitmap_alloc (npeel + 1);
1033 bitmap_ones (wont_exit);
1034 bitmap_clear_bit (wont_exit, 0);
1035 if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1036 npeel, wont_exit,
1037 exit, &to_remove,
1038 DLTHE_FLAG_UPDATE_FREQ
1039 | DLTHE_FLAG_COMPLETTE_PEEL))
1040 {
1041 free_original_copy_tables ();
1042 free (wont_exit);
1043 return false;
1044 }
1045 FOR_EACH_VEC_ELT (to_remove, i, e)
1046 {
1047 bool ok = remove_path (e);
1048 gcc_assert (ok);
1049 }
1050 free (wont_exit);
1051 free_original_copy_tables ();
1052 if (dump_file && (dump_flags & TDF_DETAILS))
1053 {
1054 fprintf (dump_file, "Peeled loop %d, %i times.\n",
1055 loop->num, npeel);
1056 }
1057 if (loop->any_upper_bound)
1058 loop->nb_iterations_upper_bound -= npeel;
1059 loop->nb_iterations_estimate = 0;
1060 /* Make sure to mark loop cold so we do not try to peel it more. */
1061 scale_loop_profile (loop, 1, 0);
1062 loop->header->count = 0;
1063 return true;
1064 }
1065 /* Adds a canonical induction variable to LOOP if suitable.
1066 CREATE_IV is true if we may create a new iv. UL determines
1067 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
1068 to determine the number of iterations of a loop by direct evaluation.
1069 Returns true if cfg is changed. */
1070
1071 static bool
1072 canonicalize_loop_induction_variables (struct loop *loop,
1073 bool create_iv, enum unroll_level ul,
1074 bool try_eval)
1075 {
1076 edge exit = NULL;
1077 tree niter;
1078 HOST_WIDE_INT maxiter;
1079 bool modified = false;
1080 location_t locus = UNKNOWN_LOCATION;
1081
1082 niter = number_of_latch_executions (loop);
1083 exit = single_exit (loop);
1084 if (TREE_CODE (niter) == INTEGER_CST)
1085 locus = gimple_location (last_stmt (exit->src));
1086 else
1087 {
1088 /* If the loop has more than one exit, try checking all of them
1089 for # of iterations determinable through scev. */
1090 if (!exit)
1091 niter = find_loop_niter (loop, &exit);
1092
1093 /* Finally if everything else fails, try brute force evaluation. */
1094 if (try_eval
1095 && (chrec_contains_undetermined (niter)
1096 || TREE_CODE (niter) != INTEGER_CST))
1097 niter = find_loop_niter_by_eval (loop, &exit);
1098
1099 if (exit)
1100 locus = gimple_location (last_stmt (exit->src));
1101
1102 if (TREE_CODE (niter) != INTEGER_CST)
1103 exit = NULL;
1104 }
1105
1106 /* We work exceptionally hard here to estimate the bound
1107 by find_loop_niter_by_eval. Be sure to keep it for future. */
1108 if (niter && TREE_CODE (niter) == INTEGER_CST)
1109 {
1110 record_niter_bound (loop, wi::to_widest (niter),
1111 exit == single_likely_exit (loop), true);
1112 }
1113
1114 /* Force re-computation of loop bounds so we can remove redundant exits. */
1115 maxiter = max_loop_iterations_int (loop);
1116
1117 if (dump_file && (dump_flags & TDF_DETAILS)
1118 && TREE_CODE (niter) == INTEGER_CST)
1119 {
1120 fprintf (dump_file, "Loop %d iterates ", loop->num);
1121 print_generic_expr (dump_file, niter, TDF_SLIM);
1122 fprintf (dump_file, " times.\n");
1123 }
1124 if (dump_file && (dump_flags & TDF_DETAILS)
1125 && maxiter >= 0)
1126 {
1127 fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1128 (int)maxiter);
1129 }
1130
1131 /* Remove exits that are known to be never taken based on loop bound.
1132 Needs to be called after compilation of max_loop_iterations_int that
1133 populates the loop bounds. */
1134 modified |= remove_redundant_iv_tests (loop);
1135
1136 if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
1137 return true;
1138
1139 if (create_iv
1140 && niter && !chrec_contains_undetermined (niter)
1141 && exit && just_once_each_iteration_p (loop, exit->src))
1142 create_canonical_iv (loop, exit, niter);
1143
1144 if (ul == UL_ALL)
1145 modified |= try_peel_loop (loop, exit, niter, maxiter);
1146
1147 return modified;
1148 }
1149
1150 /* The main entry point of the pass. Adds canonical induction variables
1151 to the suitable loops. */
1152
1153 unsigned int
1154 canonicalize_induction_variables (void)
1155 {
1156 struct loop *loop;
1157 bool changed = false;
1158 bool irred_invalidated = false;
1159 bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1160
1161 free_numbers_of_iterations_estimates ();
1162 estimate_numbers_of_iterations ();
1163
1164 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1165 {
1166 changed |= canonicalize_loop_induction_variables (loop,
1167 true, UL_SINGLE_ITER,
1168 true);
1169 }
1170 gcc_assert (!need_ssa_update_p (cfun));
1171
1172 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1173 if (irred_invalidated
1174 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1175 mark_irreducible_loops ();
1176
1177 /* Clean up the information about numbers of iterations, since brute force
1178 evaluation could reveal new information. */
1179 scev_reset ();
1180
1181 if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1182 {
1183 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1184 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1185 }
1186 BITMAP_FREE (loop_closed_ssa_invalidated);
1187
1188 if (changed)
1189 return TODO_cleanup_cfg;
1190 return 0;
1191 }
1192
1193 /* Propagate VAL into all uses of SSA_NAME. */
1194
1195 static void
1196 propagate_into_all_uses (tree ssa_name, tree val)
1197 {
1198 imm_use_iterator iter;
1199 gimple use_stmt;
1200
1201 FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name)
1202 {
1203 gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt);
1204 use_operand_p use;
1205
1206 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1207 SET_USE (use, val);
1208
1209 if (is_gimple_assign (use_stmt)
1210 && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt))
1211 == GIMPLE_SINGLE_RHS)
1212 {
1213 tree rhs = gimple_assign_rhs1 (use_stmt);
1214
1215 if (TREE_CODE (rhs) == ADDR_EXPR)
1216 recompute_tree_invariant_for_addr_expr (rhs);
1217 }
1218
1219 fold_stmt_inplace (&use_stmt_gsi);
1220 update_stmt (use_stmt);
1221 maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt);
1222 }
1223 }
1224
1225 /* Propagate constant SSA_NAMEs defined in basic block BB. */
1226
1227 static void
1228 propagate_constants_for_unrolling (basic_block bb)
1229 {
1230 /* Look for degenerate PHI nodes with constant argument. */
1231 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1232 {
1233 gphi *phi = gsi.phi ();
1234 tree result = gimple_phi_result (phi);
1235 tree arg = gimple_phi_arg_def (phi, 0);
1236
1237 if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST)
1238 {
1239 propagate_into_all_uses (result, arg);
1240 gsi_remove (&gsi, true);
1241 release_ssa_name (result);
1242 }
1243 else
1244 gsi_next (&gsi);
1245 }
1246
1247 /* Look for assignments to SSA names with constant RHS. */
1248 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1249 {
1250 gimple stmt = gsi_stmt (gsi);
1251 tree lhs;
1252
1253 if (is_gimple_assign (stmt)
1254 && gimple_assign_rhs_code (stmt) == INTEGER_CST
1255 && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1256 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1257 {
1258 propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt));
1259 gsi_remove (&gsi, true);
1260 release_ssa_name (lhs);
1261 }
1262 else
1263 gsi_next (&gsi);
1264 }
1265 }
1266
1267 /* Process loops from innermost to outer, stopping at the innermost
1268 loop we unrolled. */
1269
1270 static bool
1271 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1272 vec<loop_p, va_heap>& father_stack,
1273 struct loop *loop)
1274 {
1275 struct loop *loop_father;
1276 bool changed = false;
1277 struct loop *inner;
1278 enum unroll_level ul;
1279
1280 /* Process inner loops first. */
1281 for (inner = loop->inner; inner != NULL; inner = inner->next)
1282 changed |= tree_unroll_loops_completely_1 (may_increase_size,
1283 unroll_outer, father_stack,
1284 inner);
1285
1286 /* If we changed an inner loop we cannot process outer loops in this
1287 iteration because SSA form is not up-to-date. Continue with
1288 siblings of outer loops instead. */
1289 if (changed)
1290 return true;
1291
1292 /* Don't unroll #pragma omp simd loops until the vectorizer
1293 attempts to vectorize those. */
1294 if (loop->force_vectorize)
1295 return false;
1296
1297 /* Try to unroll this loop. */
1298 loop_father = loop_outer (loop);
1299 if (!loop_father)
1300 return false;
1301
1302 if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1303 /* Unroll outermost loops only if asked to do so or they do
1304 not cause code growth. */
1305 && (unroll_outer || loop_outer (loop_father)))
1306 ul = UL_ALL;
1307 else
1308 ul = UL_NO_GROWTH;
1309
1310 if (canonicalize_loop_induction_variables
1311 (loop, false, ul, !flag_tree_loop_ivcanon))
1312 {
1313 /* If we'll continue unrolling, we need to propagate constants
1314 within the new basic blocks to fold away induction variable
1315 computations; otherwise, the size might blow up before the
1316 iteration is complete and the IR eventually cleaned up. */
1317 if (loop_outer (loop_father) && !loop_father->aux)
1318 {
1319 father_stack.safe_push (loop_father);
1320 loop_father->aux = loop_father;
1321 }
1322
1323 return true;
1324 }
1325
1326 return false;
1327 }
1328
1329 /* Unroll LOOPS completely if they iterate just few times. Unless
1330 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1331 size of the code does not increase. */
1332
1333 unsigned int
1334 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1335 {
1336 auto_vec<loop_p, 16> father_stack;
1337 bool changed;
1338 int iteration = 0;
1339 bool irred_invalidated = false;
1340
1341 do
1342 {
1343 changed = false;
1344 bitmap loop_closed_ssa_invalidated = NULL;
1345
1346 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1347 loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1348
1349 free_numbers_of_iterations_estimates ();
1350 estimate_numbers_of_iterations ();
1351
1352 changed = tree_unroll_loops_completely_1 (may_increase_size,
1353 unroll_outer, father_stack,
1354 current_loops->tree_root);
1355 if (changed)
1356 {
1357 struct loop **iter;
1358 unsigned i;
1359
1360 /* Be sure to skip unlooped loops while procesing father_stack
1361 array. */
1362 FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
1363 (*iter)->aux = NULL;
1364 FOR_EACH_VEC_ELT (father_stack, i, iter)
1365 if (!(*iter)->aux)
1366 *iter = NULL;
1367 unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1368
1369 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
1370 if (loop_closed_ssa_invalidated
1371 && !bitmap_empty_p (loop_closed_ssa_invalidated))
1372 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1373 TODO_update_ssa);
1374 else
1375 update_ssa (TODO_update_ssa);
1376
1377 /* Propagate the constants within the new basic blocks. */
1378 FOR_EACH_VEC_ELT (father_stack, i, iter)
1379 if (*iter)
1380 {
1381 unsigned j;
1382 basic_block *body = get_loop_body_in_dom_order (*iter);
1383 for (j = 0; j < (*iter)->num_nodes; j++)
1384 propagate_constants_for_unrolling (body[j]);
1385 free (body);
1386 (*iter)->aux = NULL;
1387 }
1388 father_stack.truncate (0);
1389
1390 /* This will take care of removing completely unrolled loops
1391 from the loop structures so we can continue unrolling now
1392 innermost loops. */
1393 if (cleanup_tree_cfg ())
1394 update_ssa (TODO_update_ssa_only_virtuals);
1395
1396 /* Clean up the information about numbers of iterations, since
1397 complete unrolling might have invalidated it. */
1398 scev_reset ();
1399 #ifdef ENABLE_CHECKING
1400 if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1401 verify_loop_closed_ssa (true);
1402 #endif
1403 }
1404 if (loop_closed_ssa_invalidated)
1405 BITMAP_FREE (loop_closed_ssa_invalidated);
1406 }
1407 while (changed
1408 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1409
1410 father_stack.release ();
1411
1412 if (irred_invalidated
1413 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1414 mark_irreducible_loops ();
1415
1416 return 0;
1417 }
1418
1419 /* Canonical induction variable creation pass. */
1420
1421 namespace {
1422
1423 const pass_data pass_data_iv_canon =
1424 {
1425 GIMPLE_PASS, /* type */
1426 "ivcanon", /* name */
1427 OPTGROUP_LOOP, /* optinfo_flags */
1428 TV_TREE_LOOP_IVCANON, /* tv_id */
1429 ( PROP_cfg | PROP_ssa ), /* properties_required */
1430 0, /* properties_provided */
1431 0, /* properties_destroyed */
1432 0, /* todo_flags_start */
1433 0, /* todo_flags_finish */
1434 };
1435
1436 class pass_iv_canon : public gimple_opt_pass
1437 {
1438 public:
1439 pass_iv_canon (gcc::context *ctxt)
1440 : gimple_opt_pass (pass_data_iv_canon, ctxt)
1441 {}
1442
1443 /* opt_pass methods: */
1444 virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1445 virtual unsigned int execute (function *fun);
1446
1447 }; // class pass_iv_canon
1448
1449 unsigned int
1450 pass_iv_canon::execute (function *fun)
1451 {
1452 if (number_of_loops (fun) <= 1)
1453 return 0;
1454
1455 return canonicalize_induction_variables ();
1456 }
1457
1458 } // anon namespace
1459
1460 gimple_opt_pass *
1461 make_pass_iv_canon (gcc::context *ctxt)
1462 {
1463 return new pass_iv_canon (ctxt);
1464 }
1465
1466 /* Complete unrolling of loops. */
1467
1468 namespace {
1469
1470 const pass_data pass_data_complete_unroll =
1471 {
1472 GIMPLE_PASS, /* type */
1473 "cunroll", /* name */
1474 OPTGROUP_LOOP, /* optinfo_flags */
1475 TV_COMPLETE_UNROLL, /* tv_id */
1476 ( PROP_cfg | PROP_ssa ), /* properties_required */
1477 0, /* properties_provided */
1478 0, /* properties_destroyed */
1479 0, /* todo_flags_start */
1480 0, /* todo_flags_finish */
1481 };
1482
1483 class pass_complete_unroll : public gimple_opt_pass
1484 {
1485 public:
1486 pass_complete_unroll (gcc::context *ctxt)
1487 : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1488 {}
1489
1490 /* opt_pass methods: */
1491 virtual unsigned int execute (function *);
1492
1493 }; // class pass_complete_unroll
1494
1495 unsigned int
1496 pass_complete_unroll::execute (function *fun)
1497 {
1498 if (number_of_loops (fun) <= 1)
1499 return 0;
1500
1501 return tree_unroll_loops_completely (flag_unroll_loops
1502 || flag_peel_loops
1503 || optimize >= 3, true);
1504 }
1505
1506 } // anon namespace
1507
1508 gimple_opt_pass *
1509 make_pass_complete_unroll (gcc::context *ctxt)
1510 {
1511 return new pass_complete_unroll (ctxt);
1512 }
1513
1514 /* Complete unrolling of inner loops. */
1515
1516 namespace {
1517
1518 const pass_data pass_data_complete_unrolli =
1519 {
1520 GIMPLE_PASS, /* type */
1521 "cunrolli", /* name */
1522 OPTGROUP_LOOP, /* optinfo_flags */
1523 TV_COMPLETE_UNROLL, /* tv_id */
1524 ( PROP_cfg | PROP_ssa ), /* properties_required */
1525 0, /* properties_provided */
1526 0, /* properties_destroyed */
1527 0, /* todo_flags_start */
1528 0, /* todo_flags_finish */
1529 };
1530
1531 class pass_complete_unrolli : public gimple_opt_pass
1532 {
1533 public:
1534 pass_complete_unrolli (gcc::context *ctxt)
1535 : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1536 {}
1537
1538 /* opt_pass methods: */
1539 virtual bool gate (function *) { return optimize >= 2; }
1540 virtual unsigned int execute (function *);
1541
1542 }; // class pass_complete_unrolli
1543
1544 unsigned int
1545 pass_complete_unrolli::execute (function *fun)
1546 {
1547 unsigned ret = 0;
1548
1549 loop_optimizer_init (LOOPS_NORMAL
1550 | LOOPS_HAVE_RECORDED_EXITS);
1551 if (number_of_loops (fun) > 1)
1552 {
1553 scev_initialize ();
1554 ret = tree_unroll_loops_completely (optimize >= 3, false);
1555 free_numbers_of_iterations_estimates ();
1556 scev_finalize ();
1557 }
1558 loop_optimizer_finalize ();
1559
1560 return ret;
1561 }
1562
1563 } // anon namespace
1564
1565 gimple_opt_pass *
1566 make_pass_complete_unrolli (gcc::context *ctxt)
1567 {
1568 return new pass_complete_unrolli (ctxt);
1569 }
1570
1571