]>
Commit | Line | Data |
---|---|---|
84eb345f | 1 | /* Induction variable canonicalization and loop peeling. |
711789cc | 2 | Copyright (C) 2004-2013 Free Software Foundation, Inc. |
48e1416a | 3 | |
bb445479 | 4 | This file is part of GCC. |
48e1416a | 5 | |
bb445479 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
bb445479 | 9 | later version. |
48e1416a | 10 | |
bb445479 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
bb445479 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
bb445479 | 19 | |
20 | /* This pass detects the loops that iterate a constant number of times, | |
48e1416a | 21 | adds a canonical induction variable (step -1, tested against 0) |
bb445479 | 22 | and replaces the exit test. This enables the less powerful rtl |
23 | level analysis to use this information. | |
24 | ||
25 | This might spoil the code in some cases (by increasing register pressure). | |
26 | Note that in the case the new variable is not needed, ivopts will get rid | |
27 | of it, so it might only be a problem when there are no other linear induction | |
28 | variables. In that case the created optimization possibilities are likely | |
29 | to pay up. | |
30 | ||
31 | Additionally in case we detect that it is beneficial to unroll the | |
32 | loop completely, we do it right here to expose the optimization | |
33 | possibilities to the following passes. */ | |
34 | ||
35 | #include "config.h" | |
36 | #include "system.h" | |
37 | #include "coretypes.h" | |
38 | #include "tm.h" | |
39 | #include "tree.h" | |
bb445479 | 40 | #include "tm_p.h" |
bb445479 | 41 | #include "basic-block.h" |
ce084dfc | 42 | #include "gimple-pretty-print.h" |
073c1fd5 | 43 | #include "gimple.h" |
dcf1a1ec | 44 | #include "gimple-iterator.h" |
073c1fd5 | 45 | #include "gimple-ssa.h" |
46 | #include "cgraph.h" | |
47 | #include "tree-cfg.h" | |
48 | #include "tree-phinodes.h" | |
49 | #include "ssa-iterators.h" | |
9ed99284 | 50 | #include "stringpool.h" |
073c1fd5 | 51 | #include "tree-ssanames.h" |
05d9c18a | 52 | #include "tree-ssa-loop-manip.h" |
53 | #include "tree-ssa-loop-niter.h" | |
073c1fd5 | 54 | #include "tree-ssa-loop.h" |
55 | #include "tree-into-ssa.h" | |
bb445479 | 56 | #include "cfgloop.h" |
57 | #include "tree-pass.h" | |
bb445479 | 58 | #include "tree-chrec.h" |
59 | #include "tree-scalar-evolution.h" | |
60 | #include "params.h" | |
61 | #include "flags.h" | |
62 | #include "tree-inline.h" | |
aa2ba534 | 63 | #include "target.h" |
424a4a92 | 64 | #include "tree-cfgcleanup.h" |
bb445479 | 65 | |
604f7b8a | 66 | /* Specifies types of loops that may be unrolled. */ |
67 | ||
68 | enum unroll_level | |
69 | { | |
6414dd4c | 70 | UL_SINGLE_ITER, /* Only loops that exit immediately in the first |
604f7b8a | 71 | iteration. */ |
72 | UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase | |
73 | of code size. */ | |
74 | UL_ALL /* All suitable loops. */ | |
75 | }; | |
76 | ||
bb445479 | 77 | /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT |
78 | is the exit edge whose condition is replaced. */ | |
79 | ||
80 | static void | |
81 | create_canonical_iv (struct loop *loop, edge exit, tree niter) | |
82 | { | |
83 | edge in; | |
75a70cf9 | 84 | tree type, var; |
85 | gimple cond; | |
86 | gimple_stmt_iterator incr_at; | |
bb445479 | 87 | enum tree_code cmp; |
88 | ||
89 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
90 | { | |
91 | fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); | |
92 | print_generic_expr (dump_file, niter, TDF_SLIM); | |
93 | fprintf (dump_file, " iterations.\n"); | |
94 | } | |
95 | ||
96 | cond = last_stmt (exit->src); | |
cd665a06 | 97 | in = EDGE_SUCC (exit->src, 0); |
bb445479 | 98 | if (in == exit) |
cd665a06 | 99 | in = EDGE_SUCC (exit->src, 1); |
bb445479 | 100 | |
101 | /* Note that we do not need to worry about overflows, since | |
102 | type of niter is always unsigned and all comparisons are | |
103 | just for equality/nonequality -- i.e. everything works | |
104 | with a modulo arithmetics. */ | |
105 | ||
106 | type = TREE_TYPE (niter); | |
49d00087 | 107 | niter = fold_build2 (PLUS_EXPR, type, |
108 | niter, | |
109 | build_int_cst (type, 1)); | |
75a70cf9 | 110 | incr_at = gsi_last_bb (in->src); |
bb445479 | 111 | create_iv (niter, |
3c6185f1 | 112 | build_int_cst (type, -1), |
bb445479 | 113 | NULL_TREE, loop, |
114 | &incr_at, false, NULL, &var); | |
115 | ||
116 | cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | |
75a70cf9 | 117 | gimple_cond_set_code (cond, cmp); |
118 | gimple_cond_set_lhs (cond, var); | |
119 | gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | |
22aa74c4 | 120 | update_stmt (cond); |
bb445479 | 121 | } |
122 | ||
aa2ba534 | 123 | /* Describe size of loop as detected by tree_estimate_loop_size. */ |
124 | struct loop_size | |
125 | { | |
126 | /* Number of instructions in the loop. */ | |
127 | int overall; | |
128 | ||
129 | /* Number of instructions that will be likely optimized out in | |
130 | peeled iterations of loop (i.e. computation based on induction | |
131 | variable where induction variable starts at known constant.) */ | |
132 | int eliminated_by_peeling; | |
133 | ||
134 | /* Same statistics for last iteration of loop: it is smaller because | |
135 | instructions after exit are not executed. */ | |
136 | int last_iteration; | |
137 | int last_iteration_eliminated_by_peeling; | |
d583c979 | 138 | |
139 | /* If some IV computation will become constant. */ | |
140 | bool constant_iv; | |
141 | ||
142 | /* Number of call stmts that are not a builtin and are pure or const | |
143 | present on the hot path. */ | |
144 | int num_pure_calls_on_hot_path; | |
145 | /* Number of call stmts that are not a builtin and are not pure nor const | |
146 | present on the hot path. */ | |
147 | int num_non_pure_calls_on_hot_path; | |
148 | /* Number of statements other than calls in the loop. */ | |
149 | int non_call_stmts_on_hot_path; | |
150 | /* Number of branches seen on the hot path. */ | |
151 | int num_branches_on_hot_path; | |
aa2ba534 | 152 | }; |
153 | ||
154 | /* Return true if OP in STMT will be constant after peeling LOOP. */ | |
155 | ||
156 | static bool | |
157 | constant_after_peeling (tree op, gimple stmt, struct loop *loop) | |
158 | { | |
159 | affine_iv iv; | |
160 | ||
161 | if (is_gimple_min_invariant (op)) | |
162 | return true; | |
48e1416a | 163 | |
aa2ba534 | 164 | /* We can still fold accesses to constant arrays when index is known. */ |
165 | if (TREE_CODE (op) != SSA_NAME) | |
166 | { | |
167 | tree base = op; | |
168 | ||
169 | /* First make fast look if we see constant array inside. */ | |
170 | while (handled_component_p (base)) | |
171 | base = TREE_OPERAND (base, 0); | |
248022b2 | 172 | if ((DECL_P (base) |
df8d3e89 | 173 | && ctor_for_folding (base) != error_mark_node) |
aa2ba534 | 174 | || CONSTANT_CLASS_P (base)) |
175 | { | |
176 | /* If so, see if we understand all the indices. */ | |
177 | base = op; | |
178 | while (handled_component_p (base)) | |
179 | { | |
180 | if (TREE_CODE (base) == ARRAY_REF | |
181 | && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) | |
182 | return false; | |
183 | base = TREE_OPERAND (base, 0); | |
184 | } | |
185 | return true; | |
186 | } | |
187 | return false; | |
188 | } | |
189 | ||
190 | /* Induction variables are constants. */ | |
191 | if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)) | |
192 | return false; | |
193 | if (!is_gimple_min_invariant (iv.base)) | |
194 | return false; | |
195 | if (!is_gimple_min_invariant (iv.step)) | |
196 | return false; | |
197 | return true; | |
198 | } | |
199 | ||
d583c979 | 200 | /* Computes an estimated number of insns in LOOP. |
201 | EXIT (if non-NULL) is an exite edge that will be eliminated in all but last | |
202 | iteration of the loop. | |
203 | EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration | |
204 | of loop. | |
84eb345f | 205 | Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. |
04437ab6 | 206 | Stop estimating after UPPER_BOUND is met. Return true in this case. */ |
aa2ba534 | 207 | |
84eb345f | 208 | static bool |
209 | tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size, | |
210 | int upper_bound) | |
aa2ba534 | 211 | { |
212 | basic_block *body = get_loop_body (loop); | |
213 | gimple_stmt_iterator gsi; | |
214 | unsigned int i; | |
215 | bool after_exit; | |
f1f41a6c | 216 | vec<basic_block> path = get_loop_hot_path (loop); |
aa2ba534 | 217 | |
218 | size->overall = 0; | |
219 | size->eliminated_by_peeling = 0; | |
220 | size->last_iteration = 0; | |
221 | size->last_iteration_eliminated_by_peeling = 0; | |
d583c979 | 222 | size->num_pure_calls_on_hot_path = 0; |
223 | size->num_non_pure_calls_on_hot_path = 0; | |
224 | size->non_call_stmts_on_hot_path = 0; | |
225 | size->num_branches_on_hot_path = 0; | |
226 | size->constant_iv = 0; | |
aa2ba534 | 227 | |
228 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
229 | fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); | |
230 | for (i = 0; i < loop->num_nodes; i++) | |
231 | { | |
c790d986 | 232 | if (edge_to_cancel && body[i] != edge_to_cancel->src |
233 | && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) | |
aa2ba534 | 234 | after_exit = true; |
235 | else | |
236 | after_exit = false; | |
237 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
238 | fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit); | |
239 | ||
240 | for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
241 | { | |
242 | gimple stmt = gsi_stmt (gsi); | |
243 | int num = estimate_num_insns (stmt, &eni_size_weights); | |
244 | bool likely_eliminated = false; | |
d583c979 | 245 | bool likely_eliminated_last = false; |
246 | bool likely_eliminated_peeled = false; | |
aa2ba534 | 247 | |
248 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
249 | { | |
250 | fprintf (dump_file, " size: %3i ", num); | |
251 | print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0); | |
252 | } | |
253 | ||
254 | /* Look for reasons why we might optimize this stmt away. */ | |
255 | ||
ae8a8b85 | 256 | if (gimple_has_side_effects (stmt)) |
257 | ; | |
aa2ba534 | 258 | /* Exit conditional. */ |
ae8a8b85 | 259 | else if (exit && body[i] == exit->src |
d583c979 | 260 | && stmt == last_stmt (exit->src)) |
aa2ba534 | 261 | { |
262 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
d583c979 | 263 | fprintf (dump_file, " Exit condition will be eliminated " |
264 | "in peeled copies.\n"); | |
265 | likely_eliminated_peeled = true; | |
266 | } | |
267 | else if (edge_to_cancel && body[i] == edge_to_cancel->src | |
268 | && stmt == last_stmt (edge_to_cancel->src)) | |
269 | { | |
270 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
271 | fprintf (dump_file, " Exit condition will be eliminated " | |
272 | "in last copy.\n"); | |
273 | likely_eliminated_last = true; | |
aa2ba534 | 274 | } |
275 | /* Sets of IV variables */ | |
276 | else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
277 | && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) | |
278 | { | |
279 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
280 | fprintf (dump_file, " Induction variable computation will" | |
281 | " be folded away.\n"); | |
282 | likely_eliminated = true; | |
283 | } | |
284 | /* Assignments of IV variables. */ | |
285 | else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
286 | && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | |
d583c979 | 287 | && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop) |
aa2ba534 | 288 | && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS |
289 | || constant_after_peeling (gimple_assign_rhs2 (stmt), | |
290 | stmt, loop))) | |
291 | { | |
d583c979 | 292 | size->constant_iv = true; |
aa2ba534 | 293 | if (dump_file && (dump_flags & TDF_DETAILS)) |
294 | fprintf (dump_file, " Constant expression will be folded away.\n"); | |
295 | likely_eliminated = true; | |
296 | } | |
297 | /* Conditionals. */ | |
d583c979 | 298 | else if ((gimple_code (stmt) == GIMPLE_COND |
299 | && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
300 | && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)) | |
301 | || (gimple_code (stmt) == GIMPLE_SWITCH | |
302 | && constant_after_peeling (gimple_switch_index (stmt), stmt, loop))) | |
aa2ba534 | 303 | { |
304 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
305 | fprintf (dump_file, " Constant conditional.\n"); | |
306 | likely_eliminated = true; | |
307 | } | |
308 | ||
309 | size->overall += num; | |
d583c979 | 310 | if (likely_eliminated || likely_eliminated_peeled) |
aa2ba534 | 311 | size->eliminated_by_peeling += num; |
312 | if (!after_exit) | |
313 | { | |
314 | size->last_iteration += num; | |
d583c979 | 315 | if (likely_eliminated || likely_eliminated_last) |
aa2ba534 | 316 | size->last_iteration_eliminated_by_peeling += num; |
317 | } | |
84eb345f | 318 | if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
319 | - size->last_iteration_eliminated_by_peeling) > upper_bound) | |
320 | { | |
321 | free (body); | |
04437ab6 | 322 | path.release (); |
84eb345f | 323 | return true; |
324 | } | |
aa2ba534 | 325 | } |
326 | } | |
f1f41a6c | 327 | while (path.length ()) |
d583c979 | 328 | { |
f1f41a6c | 329 | basic_block bb = path.pop (); |
d583c979 | 330 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
331 | { | |
332 | gimple stmt = gsi_stmt (gsi); | |
333 | if (gimple_code (stmt) == GIMPLE_CALL) | |
334 | { | |
335 | int flags = gimple_call_flags (stmt); | |
336 | tree decl = gimple_call_fndecl (stmt); | |
337 | ||
338 | if (decl && DECL_IS_BUILTIN (decl) | |
339 | && is_inexpensive_builtin (decl)) | |
340 | ; | |
341 | else if (flags & (ECF_PURE | ECF_CONST)) | |
342 | size->num_pure_calls_on_hot_path++; | |
343 | else | |
344 | size->num_non_pure_calls_on_hot_path++; | |
345 | size->num_branches_on_hot_path ++; | |
346 | } | |
347 | else if (gimple_code (stmt) != GIMPLE_CALL | |
348 | && gimple_code (stmt) != GIMPLE_DEBUG) | |
349 | size->non_call_stmts_on_hot_path++; | |
350 | if (((gimple_code (stmt) == GIMPLE_COND | |
351 | && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
352 | || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))) | |
353 | || (gimple_code (stmt) == GIMPLE_SWITCH | |
354 | && !constant_after_peeling (gimple_switch_index (stmt), stmt, loop))) | |
355 | && (!exit || bb != exit->src)) | |
356 | size->num_branches_on_hot_path++; | |
357 | } | |
358 | } | |
f1f41a6c | 359 | path.release (); |
aa2ba534 | 360 | if (dump_file && (dump_flags & TDF_DETAILS)) |
361 | fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, | |
362 | size->eliminated_by_peeling, size->last_iteration, | |
363 | size->last_iteration_eliminated_by_peeling); | |
48e1416a | 364 | |
aa2ba534 | 365 | free (body); |
84eb345f | 366 | return false; |
aa2ba534 | 367 | } |
604f7b8a | 368 | |
aa2ba534 | 369 | /* Estimate number of insns of completely unrolled loop. |
370 | It is (NUNROLL + 1) * size of loop body with taking into account | |
371 | the fact that in last copy everything after exit conditional | |
372 | is dead and that some instructions will be eliminated after | |
373 | peeling. | |
604f7b8a | 374 | |
c31fb425 | 375 | Loop body is likely going to simplify further, this is difficult |
aa2ba534 | 376 | to guess, we just decrease the result by 1/3. */ |
604f7b8a | 377 | |
378 | static unsigned HOST_WIDE_INT | |
aa2ba534 | 379 | estimated_unrolled_size (struct loop_size *size, |
604f7b8a | 380 | unsigned HOST_WIDE_INT nunroll) |
381 | { | |
aa2ba534 | 382 | HOST_WIDE_INT unr_insns = ((nunroll) |
383 | * (HOST_WIDE_INT) (size->overall | |
384 | - size->eliminated_by_peeling)); | |
385 | if (!nunroll) | |
386 | unr_insns = 0; | |
387 | unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; | |
388 | ||
389 | unr_insns = unr_insns * 2 / 3; | |
604f7b8a | 390 | if (unr_insns <= 0) |
391 | unr_insns = 1; | |
604f7b8a | 392 | |
393 | return unr_insns; | |
394 | } | |
395 | ||
c790d986 | 396 | /* Loop LOOP is known to not loop. See if there is an edge in the loop |
397 | body that can be remove to make the loop to always exit and at | |
398 | the same time it does not make any code potentially executed | |
399 | during the last iteration dead. | |
400 | ||
401 | After complette unrolling we still may get rid of the conditional | |
402 | on the exit in the last copy even if we have no idea what it does. | |
403 | This is quite common case for loops of form | |
404 | ||
405 | int a[5]; | |
406 | for (i=0;i<b;i++) | |
407 | a[i]=0; | |
408 | ||
409 | Here we prove the loop to iterate 5 times but we do not know | |
410 | it from induction variable. | |
411 | ||
412 | For now we handle only simple case where there is exit condition | |
413 | just before the latch block and the latch block contains no statements | |
414 | with side effect that may otherwise terminate the execution of loop | |
415 | (such as by EH or by terminating the program or longjmp). | |
416 | ||
417 | In the general case we may want to cancel the paths leading to statements | |
418 | loop-niter identified as having undefined effect in the last iteration. | |
419 | The other cases are hopefully rare and will be cleaned up later. */ | |
420 | ||
f86b328b | 421 | static edge |
c790d986 | 422 | loop_edge_to_cancel (struct loop *loop) |
423 | { | |
f1f41a6c | 424 | vec<edge> exits; |
c790d986 | 425 | unsigned i; |
426 | edge edge_to_cancel; | |
427 | gimple_stmt_iterator gsi; | |
428 | ||
429 | /* We want only one predecestor of the loop. */ | |
430 | if (EDGE_COUNT (loop->latch->preds) > 1) | |
431 | return NULL; | |
432 | ||
433 | exits = get_loop_exit_edges (loop); | |
434 | ||
f1f41a6c | 435 | FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) |
c790d986 | 436 | { |
437 | /* Find the other edge than the loop exit | |
438 | leaving the conditoinal. */ | |
439 | if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) | |
440 | continue; | |
441 | if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) | |
442 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); | |
443 | else | |
444 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); | |
445 | ||
248022b2 | 446 | /* We only can handle conditionals. */ |
447 | if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
448 | continue; | |
449 | ||
c790d986 | 450 | /* We should never have conditionals in the loop latch. */ |
451 | gcc_assert (edge_to_cancel->dest != loop->header); | |
452 | ||
453 | /* Check that it leads to loop latch. */ | |
454 | if (edge_to_cancel->dest != loop->latch) | |
455 | continue; | |
456 | ||
f1f41a6c | 457 | exits.release (); |
c790d986 | 458 | |
459 | /* Verify that the code in loop latch does nothing that may end program | |
460 | execution without really reaching the exit. This may include | |
461 | non-pure/const function calls, EH statements, volatile ASMs etc. */ | |
462 | for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) | |
463 | if (gimple_has_side_effects (gsi_stmt (gsi))) | |
464 | return NULL; | |
465 | return edge_to_cancel; | |
466 | } | |
f1f41a6c | 467 | exits.release (); |
c790d986 | 468 | return NULL; |
469 | } | |
470 | ||
72276d01 | 471 | /* Remove all tests for exits that are known to be taken after LOOP was |
472 | peeled NPEELED times. Put gcc_unreachable before every statement | |
473 | known to not be executed. */ | |
474 | ||
475 | static bool | |
476 | remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled) | |
477 | { | |
478 | struct nb_iter_bound *elt; | |
479 | bool changed = false; | |
480 | ||
481 | for (elt = loop->bounds; elt; elt = elt->next) | |
482 | { | |
483 | /* If statement is known to be undefined after peeling, turn it | |
484 | into unreachable (or trap when debugging experience is supposed | |
485 | to be good). */ | |
486 | if (!elt->is_exit | |
487 | && elt->bound.ult (double_int::from_uhwi (npeeled))) | |
488 | { | |
489 | gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); | |
490 | gimple stmt = gimple_build_call | |
491 | (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
492 | ||
493 | gimple_set_location (stmt, gimple_location (elt->stmt)); | |
494 | gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
495 | changed = true; | |
496 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
497 | { | |
498 | fprintf (dump_file, "Forced statement unreachable: "); | |
499 | print_gimple_stmt (dump_file, elt->stmt, 0, 0); | |
500 | } | |
501 | } | |
502 | /* If we know the exit will be taken after peeling, update. */ | |
503 | else if (elt->is_exit | |
504 | && elt->bound.ule (double_int::from_uhwi (npeeled))) | |
505 | { | |
506 | basic_block bb = gimple_bb (elt->stmt); | |
507 | edge exit_edge = EDGE_SUCC (bb, 0); | |
508 | ||
509 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
510 | { | |
511 | fprintf (dump_file, "Forced exit to be taken: "); | |
512 | print_gimple_stmt (dump_file, elt->stmt, 0, 0); | |
513 | } | |
514 | if (!loop_exit_edge_p (loop, exit_edge)) | |
515 | exit_edge = EDGE_SUCC (bb, 1); | |
516 | gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); | |
517 | if (exit_edge->flags & EDGE_TRUE_VALUE) | |
518 | gimple_cond_make_true (elt->stmt); | |
519 | else | |
520 | gimple_cond_make_false (elt->stmt); | |
521 | update_stmt (elt->stmt); | |
522 | changed = true; | |
523 | } | |
524 | } | |
525 | return changed; | |
526 | } | |
527 | ||
528 | /* Remove all exits that are known to be never taken because of the loop bound | |
529 | discovered. */ | |
530 | ||
531 | static bool | |
532 | remove_redundant_iv_tests (struct loop *loop) | |
533 | { | |
534 | struct nb_iter_bound *elt; | |
535 | bool changed = false; | |
536 | ||
537 | if (!loop->any_upper_bound) | |
538 | return false; | |
539 | for (elt = loop->bounds; elt; elt = elt->next) | |
540 | { | |
541 | /* Exit is pointless if it won't be taken before loop reaches | |
542 | upper bound. */ | |
543 | if (elt->is_exit && loop->any_upper_bound | |
544 | && loop->nb_iterations_upper_bound.ult (elt->bound)) | |
545 | { | |
546 | basic_block bb = gimple_bb (elt->stmt); | |
547 | edge exit_edge = EDGE_SUCC (bb, 0); | |
548 | struct tree_niter_desc niter; | |
549 | ||
550 | if (!loop_exit_edge_p (loop, exit_edge)) | |
551 | exit_edge = EDGE_SUCC (bb, 1); | |
552 | ||
553 | /* Only when we know the actual number of iterations, not | |
554 | just a bound, we can remove the exit. */ | |
555 | if (!number_of_iterations_exit (loop, exit_edge, | |
3a690dce | 556 | &niter, false, false) |
557 | || !integer_onep (niter.assumptions) | |
72276d01 | 558 | || !integer_zerop (niter.may_be_zero) |
559 | || !niter.niter | |
560 | || TREE_CODE (niter.niter) != INTEGER_CST | |
561 | || !loop->nb_iterations_upper_bound.ult | |
562 | (tree_to_double_int (niter.niter))) | |
563 | continue; | |
564 | ||
565 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
566 | { | |
567 | fprintf (dump_file, "Removed pointless exit: "); | |
568 | print_gimple_stmt (dump_file, elt->stmt, 0, 0); | |
569 | } | |
570 | if (exit_edge->flags & EDGE_TRUE_VALUE) | |
571 | gimple_cond_make_false (elt->stmt); | |
572 | else | |
573 | gimple_cond_make_true (elt->stmt); | |
574 | update_stmt (elt->stmt); | |
575 | changed = true; | |
576 | } | |
577 | } | |
578 | return changed; | |
579 | } | |
580 | ||
581 | /* Stores loops that will be unlooped after we process whole loop tree. */ | |
f1f41a6c | 582 | static vec<loop_p> loops_to_unloop; |
583 | static vec<int> loops_to_unloop_nunroll; | |
72276d01 | 584 | |
585 | /* Cancel all fully unrolled loops by putting __builtin_unreachable | |
586 | on the latch edge. | |
587 | We do it after all unrolling since unlooping moves basic blocks | |
588 | across loop boundaries trashing loop closed SSA form as well | |
589 | as SCEV info needed to be intact during unrolling. | |
590 | ||
c790d986 | 591 | IRRED_INVALIDATED is used to bookkeep if information about |
592 | irreducible regions may become invalid as a result | |
9f0ac045 | 593 | of the transformation. |
594 | LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case | |
595 | when we need to go into loop closed SSA form. */ | |
bb445479 | 596 | |
f86b328b | 597 | static void |
72276d01 | 598 | unloop_loops (bitmap loop_closed_ssa_invalidated, |
599 | bool *irred_invalidated) | |
600 | { | |
f1f41a6c | 601 | while (loops_to_unloop.length ()) |
72276d01 | 602 | { |
f1f41a6c | 603 | struct loop *loop = loops_to_unloop.pop (); |
604 | int n_unroll = loops_to_unloop_nunroll.pop (); | |
72276d01 | 605 | basic_block latch = loop->latch; |
606 | edge latch_edge = loop_latch_edge (loop); | |
607 | int flags = latch_edge->flags; | |
608 | location_t locus = latch_edge->goto_locus; | |
609 | gimple stmt; | |
610 | gimple_stmt_iterator gsi; | |
611 | ||
612 | remove_exits_and_undefined_stmts (loop, n_unroll); | |
613 | ||
614 | /* Unloop destroys the latch edge. */ | |
615 | unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); | |
616 | ||
617 | /* Create new basic block for the latch edge destination and wire | |
618 | it in. */ | |
619 | stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
620 | latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); | |
621 | latch_edge->probability = 0; | |
622 | latch_edge->count = 0; | |
623 | latch_edge->flags |= flags; | |
624 | latch_edge->goto_locus = locus; | |
625 | ||
626 | latch_edge->dest->loop_father = current_loops->tree_root; | |
627 | latch_edge->dest->count = 0; | |
628 | latch_edge->dest->frequency = 0; | |
629 | set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); | |
630 | ||
631 | gsi = gsi_start_bb (latch_edge->dest); | |
632 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
633 | } | |
f1f41a6c | 634 | loops_to_unloop.release (); |
635 | loops_to_unloop_nunroll.release (); | |
72276d01 | 636 | } |
637 | ||
638 | /* Tries to unroll LOOP completely, i.e. NITER times. | |
639 | UL determines which loops we are allowed to unroll. | |
f55775aa | 640 | EXIT is the exit of the loop that should be eliminated. |
72276d01 | 641 | MAXITER specfy bound on number of iterations, -1 if it is |
f55775aa | 642 | not known or too large for HOST_WIDE_INT. The location |
643 | LOCUS corresponding to the loop is used when emitting | |
644 | a summary of the unroll to the dump file. */ | |
72276d01 | 645 | |
bb445479 | 646 | static bool |
7194de72 | 647 | try_unroll_loop_completely (struct loop *loop, |
bb445479 | 648 | edge exit, tree niter, |
c790d986 | 649 | enum unroll_level ul, |
f55775aa | 650 | HOST_WIDE_INT maxiter, |
651 | location_t locus) | |
bb445479 | 652 | { |
604f7b8a | 653 | unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; |
75a70cf9 | 654 | gimple cond; |
aa2ba534 | 655 | struct loop_size size; |
c790d986 | 656 | bool n_unroll_found = false; |
c790d986 | 657 | edge edge_to_cancel = NULL; |
bb445479 | 658 | |
c790d986 | 659 | /* See if we proved number of iterations to be low constant. |
bb445479 | 660 | |
c790d986 | 661 | EXIT is an edge that will be removed in all but last iteration of |
662 | the loop. | |
663 | ||
664 | EDGE_TO_CACNEL is an edge that will be removed from the last iteration | |
665 | of the unrolled sequence and is expected to make the final loop not | |
666 | rolling. | |
667 | ||
668 | If the number of execution of loop is determined by standard induction | |
669 | variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving | |
670 | from the iv test. */ | |
cd4547bf | 671 | if (tree_fits_uhwi_p (niter)) |
c790d986 | 672 | { |
6a0712d4 | 673 | n_unroll = tree_to_uhwi (niter); |
c790d986 | 674 | n_unroll_found = true; |
675 | edge_to_cancel = EDGE_SUCC (exit->src, 0); | |
676 | if (edge_to_cancel == exit) | |
677 | edge_to_cancel = EDGE_SUCC (exit->src, 1); | |
678 | } | |
679 | /* We do not know the number of iterations and thus we can not eliminate | |
680 | the EXIT edge. */ | |
681 | else | |
682 | exit = NULL; | |
683 | ||
684 | /* See if we can improve our estimate by using recorded loop bounds. */ | |
c790d986 | 685 | if (maxiter >= 0 |
686 | && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) | |
687 | { | |
688 | n_unroll = maxiter; | |
689 | n_unroll_found = true; | |
690 | /* Loop terminates before the IV variable test, so we can not | |
691 | remove it in the last iteration. */ | |
692 | edge_to_cancel = NULL; | |
693 | } | |
694 | ||
695 | if (!n_unroll_found) | |
bb445479 | 696 | return false; |
bb445479 | 697 | |
698 | max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); | |
699 | if (n_unroll > max_unroll) | |
700 | return false; | |
701 | ||
c790d986 | 702 | if (!edge_to_cancel) |
703 | edge_to_cancel = loop_edge_to_cancel (loop); | |
704 | ||
bb445479 | 705 | if (n_unroll) |
706 | { | |
c790d986 | 707 | sbitmap wont_exit; |
708 | edge e; | |
709 | unsigned i; | |
84eb345f | 710 | bool large; |
1e094109 | 711 | vec<edge> to_remove = vNULL; |
604f7b8a | 712 | if (ul == UL_SINGLE_ITER) |
bb445479 | 713 | return false; |
714 | ||
84eb345f | 715 | large = tree_estimate_loop_size |
716 | (loop, exit, edge_to_cancel, &size, | |
717 | PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); | |
aa2ba534 | 718 | ninsns = size.overall; |
84eb345f | 719 | if (large) |
720 | { | |
721 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
722 | fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
723 | loop->num); | |
724 | return false; | |
725 | } | |
bb445479 | 726 | |
aa2ba534 | 727 | unr_insns = estimated_unrolled_size (&size, n_unroll); |
d88fd237 | 728 | if (dump_file && (dump_flags & TDF_DETAILS)) |
729 | { | |
730 | fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
731 | fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
732 | (int) unr_insns); | |
733 | } | |
734 | ||
d583c979 | 735 | /* If the code is going to shrink, we don't need to be extra cautious |
736 | on guessing if the unrolling is going to be profitable. */ | |
737 | if (unr_insns | |
738 | /* If there is IV variable that will become constant, we save | |
739 | one instruction in the loop prologue we do not account | |
740 | otherwise. */ | |
741 | <= ninsns + (size.constant_iv != false)) | |
742 | ; | |
c790d986 | 743 | /* We unroll only inner loops, because we do not consider it profitable |
744 | otheriwse. We still can cancel loopback edge of not rolling loop; | |
745 | this is always a good idea. */ | |
d583c979 | 746 | else if (ul == UL_NO_GROWTH) |
747 | { | |
748 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
749 | fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
750 | loop->num); | |
751 | return false; | |
752 | } | |
753 | /* Outer loops tend to be less interesting candidates for complette | |
754 | unrolling unless we can do a lot of propagation into the inner loop | |
755 | body. For now we disable outer loop unrolling when the code would | |
756 | grow. */ | |
757 | else if (loop->inner) | |
c790d986 | 758 | { |
759 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
d583c979 | 760 | fprintf (dump_file, "Not unrolling loop %d: " |
c790d986 | 761 | "it is not innermost and code would grow.\n", |
762 | loop->num); | |
763 | return false; | |
764 | } | |
d583c979 | 765 | /* If there is call on a hot path through the loop, then |
766 | there is most probably not much to optimize. */ | |
767 | else if (size.num_non_pure_calls_on_hot_path) | |
f00b5e35 | 768 | { |
769 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
d583c979 | 770 | fprintf (dump_file, "Not unrolling loop %d: " |
771 | "contains call and code would grow.\n", | |
f00b5e35 | 772 | loop->num); |
773 | return false; | |
774 | } | |
d583c979 | 775 | /* If there is pure/const call in the function, then we |
776 | can still optimize the unrolled loop body if it contains | |
777 | some other interesting code than the calls and code | |
778 | storing or cumulating the return value. */ | |
779 | else if (size.num_pure_calls_on_hot_path | |
780 | /* One IV increment, one test, one ivtmp store | |
c31fb425 | 781 | and one useful stmt. That is about minimal loop |
d583c979 | 782 | doing pure call. */ |
783 | && (size.non_call_stmts_on_hot_path | |
784 | <= 3 + size.num_pure_calls_on_hot_path)) | |
604f7b8a | 785 | { |
604f7b8a | 786 | if (dump_file && (dump_flags & TDF_DETAILS)) |
d583c979 | 787 | fprintf (dump_file, "Not unrolling loop %d: " |
788 | "contains just pure calls and code would grow.\n", | |
789 | loop->num); | |
790 | return false; | |
791 | } | |
792 | /* Complette unrolling is major win when control flow is removed and | |
793 | one big basic block is created. If the loop contains control flow | |
794 | the optimization may still be a win because of eliminating the loop | |
795 | overhead but it also may blow the branch predictor tables. | |
796 | Limit number of branches on the hot path through the peeled | |
797 | sequence. */ | |
798 | else if (size.num_branches_on_hot_path * (int)n_unroll | |
799 | > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
800 | { | |
801 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
802 | fprintf (dump_file, "Not unrolling loop %d: " | |
803 | " number of branches on hot path in the unrolled sequence" | |
804 | " reach --param max-peel-branches limit.\n", | |
805 | loop->num); | |
806 | return false; | |
807 | } | |
808 | else if (unr_insns | |
809 | > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
810 | { | |
811 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
812 | fprintf (dump_file, "Not unrolling loop %d: " | |
813 | "(--param max-completely-peeled-insns limit reached).\n", | |
c790d986 | 814 | loop->num); |
d88fd237 | 815 | return false; |
604f7b8a | 816 | } |
fb54ef7c | 817 | |
01020a5f | 818 | initialize_original_copy_tables (); |
fb54ef7c | 819 | wont_exit = sbitmap_alloc (n_unroll + 1); |
53c5d9d4 | 820 | bitmap_ones (wont_exit); |
08b7917c | 821 | bitmap_clear_bit (wont_exit, 0); |
fb54ef7c | 822 | |
75a70cf9 | 823 | if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
824 | n_unroll, wont_exit, | |
825 | exit, &to_remove, | |
826 | DLTHE_FLAG_UPDATE_FREQ | |
827 | | DLTHE_FLAG_COMPLETTE_PEEL)) | |
bb445479 | 828 | { |
01020a5f | 829 | free_original_copy_tables (); |
fb54ef7c | 830 | free (wont_exit); |
d583c979 | 831 | if (dump_file && (dump_flags & TDF_DETAILS)) |
832 | fprintf (dump_file, "Failed to duplicate the loop\n"); | |
bb445479 | 833 | return false; |
834 | } | |
40ffaada | 835 | |
f1f41a6c | 836 | FOR_EACH_VEC_ELT (to_remove, i, e) |
40ffaada | 837 | { |
838 | bool ok = remove_path (e); | |
839 | gcc_assert (ok); | |
840 | } | |
841 | ||
f1f41a6c | 842 | to_remove.release (); |
fb54ef7c | 843 | free (wont_exit); |
01020a5f | 844 | free_original_copy_tables (); |
bb445479 | 845 | } |
bb445479 | 846 | |
72276d01 | 847 | |
c790d986 | 848 | /* Remove the conditional from the last copy of the loop. */ |
849 | if (edge_to_cancel) | |
850 | { | |
851 | cond = last_stmt (edge_to_cancel->src); | |
852 | if (edge_to_cancel->flags & EDGE_TRUE_VALUE) | |
853 | gimple_cond_make_false (cond); | |
854 | else | |
855 | gimple_cond_make_true (cond); | |
856 | update_stmt (cond); | |
857 | /* Do not remove the path. Doing so may remove outer loop | |
858 | and confuse bookkeeping code in tree_unroll_loops_completelly. */ | |
859 | } | |
c790d986 | 860 | |
72276d01 | 861 | /* Store the loop for later unlooping and exit removal. */ |
f1f41a6c | 862 | loops_to_unloop.safe_push (loop); |
863 | loops_to_unloop_nunroll.safe_push (n_unroll); | |
095dcfa3 | 864 | |
f55775aa | 865 | if (dump_enabled_p ()) |
c790d986 | 866 | { |
867 | if (!n_unroll) | |
f55775aa | 868 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, |
6ee2edad | 869 | "loop turned into non-loop; it never loops\n"); |
c790d986 | 870 | else |
f55775aa | 871 | { |
872 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
6ee2edad | 873 | "loop with %d iterations completely unrolled", |
874 | (int) (n_unroll + 1)); | |
f55775aa | 875 | if (profile_info) |
876 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, | |
877 | " (header execution count %d)", | |
878 | (int)loop->header->count); | |
879 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); | |
880 | } | |
881 | } | |
882 | ||
883 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
884 | { | |
c790d986 | 885 | if (exit) |
886 | fprintf (dump_file, "Exit condition of peeled iterations was " | |
887 | "eliminated.\n"); | |
888 | if (edge_to_cancel) | |
889 | fprintf (dump_file, "Last iteration exit edge was proved true.\n"); | |
890 | else | |
891 | fprintf (dump_file, "Latch of last iteration was marked by " | |
892 | "__builtin_unreachable ().\n"); | |
893 | } | |
bb445479 | 894 | |
895 | return true; | |
896 | } | |
897 | ||
7194de72 | 898 | /* Adds a canonical induction variable to LOOP if suitable. |
48e1416a | 899 | CREATE_IV is true if we may create a new iv. UL determines |
604f7b8a | 900 | which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
48e1416a | 901 | to determine the number of iterations of a loop by direct evaluation. |
72276d01 | 902 | Returns true if cfg is changed. */ |
bb445479 | 903 | |
904 | static bool | |
7194de72 | 905 | canonicalize_loop_induction_variables (struct loop *loop, |
604f7b8a | 906 | bool create_iv, enum unroll_level ul, |
72276d01 | 907 | bool try_eval) |
bb445479 | 908 | { |
909 | edge exit = NULL; | |
910 | tree niter; | |
72276d01 | 911 | HOST_WIDE_INT maxiter; |
912 | bool modified = false; | |
f55775aa | 913 | location_t locus = UNKNOWN_LOCATION; |
bb445479 | 914 | |
0c3c2e56 | 915 | niter = number_of_latch_executions (loop); |
f55775aa | 916 | exit = single_exit (loop); |
bb445479 | 917 | if (TREE_CODE (niter) == INTEGER_CST) |
f55775aa | 918 | locus = gimple_location (last_stmt (exit->src)); |
b091dc59 | 919 | else |
920 | { | |
921 | /* If the loop has more than one exit, try checking all of them | |
922 | for # of iterations determinable through scev. */ | |
f55775aa | 923 | if (!exit) |
b091dc59 | 924 | niter = find_loop_niter (loop, &exit); |
925 | ||
926 | /* Finally if everything else fails, try brute force evaluation. */ | |
927 | if (try_eval | |
928 | && (chrec_contains_undetermined (niter) | |
929 | || TREE_CODE (niter) != INTEGER_CST)) | |
930 | niter = find_loop_niter_by_eval (loop, &exit); | |
931 | ||
f55775aa | 932 | if (exit) |
933 | locus = gimple_location (last_stmt (exit->src)); | |
934 | ||
c790d986 | 935 | if (TREE_CODE (niter) != INTEGER_CST) |
936 | exit = NULL; | |
b091dc59 | 937 | } |
bb445479 | 938 | |
c790d986 | 939 | /* We work exceptionally hard here to estimate the bound |
940 | by find_loop_niter_by_eval. Be sure to keep it for future. */ | |
941 | if (niter && TREE_CODE (niter) == INTEGER_CST) | |
57337fec | 942 | { |
943 | record_niter_bound (loop, tree_to_double_int (niter), | |
944 | exit == single_likely_exit (loop), true); | |
945 | } | |
c790d986 | 946 | |
72276d01 | 947 | /* Force re-computation of loop bounds so we can remove redundant exits. */ |
948 | maxiter = max_loop_iterations_int (loop); | |
949 | ||
c790d986 | 950 | if (dump_file && (dump_flags & TDF_DETAILS) |
951 | && TREE_CODE (niter) == INTEGER_CST) | |
bb445479 | 952 | { |
953 | fprintf (dump_file, "Loop %d iterates ", loop->num); | |
954 | print_generic_expr (dump_file, niter, TDF_SLIM); | |
955 | fprintf (dump_file, " times.\n"); | |
956 | } | |
c790d986 | 957 | if (dump_file && (dump_flags & TDF_DETAILS) |
72276d01 | 958 | && maxiter >= 0) |
c790d986 | 959 | { |
960 | fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, | |
72276d01 | 961 | (int)maxiter); |
c790d986 | 962 | } |
bb445479 | 963 | |
72276d01 | 964 | /* Remove exits that are known to be never taken based on loop bound. |
965 | Needs to be called after compilation of max_loop_iterations_int that | |
966 | populates the loop bounds. */ | |
967 | modified |= remove_redundant_iv_tests (loop); | |
968 | ||
f55775aa | 969 | if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus)) |
bb445479 | 970 | return true; |
971 | ||
c790d986 | 972 | if (create_iv |
57337fec | 973 | && niter && !chrec_contains_undetermined (niter) |
974 | && exit && just_once_each_iteration_p (loop, exit->src)) | |
bb445479 | 975 | create_canonical_iv (loop, exit, niter); |
976 | ||
72276d01 | 977 | return modified; |
bb445479 | 978 | } |
979 | ||
980 | /* The main entry point of the pass. Adds canonical induction variables | |
7194de72 | 981 | to the suitable loops. */ |
bb445479 | 982 | |
4c641bf8 | 983 | unsigned int |
7194de72 | 984 | canonicalize_induction_variables (void) |
bb445479 | 985 | { |
17519ba0 | 986 | loop_iterator li; |
bb445479 | 987 | struct loop *loop; |
053fdd99 | 988 | bool changed = false; |
c790d986 | 989 | bool irred_invalidated = false; |
9f0ac045 | 990 | bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); |
48e1416a | 991 | |
72276d01 | 992 | free_numbers_of_iterations_estimates (); |
993 | estimate_numbers_of_iterations (); | |
994 | ||
995 | FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
bb445479 | 996 | { |
17519ba0 | 997 | changed |= canonicalize_loop_induction_variables (loop, |
998 | true, UL_SINGLE_ITER, | |
72276d01 | 999 | true); |
bb445479 | 1000 | } |
ea1c5c31 | 1001 | gcc_assert (!need_ssa_update_p (cfun)); |
bb445479 | 1002 | |
72276d01 | 1003 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
c790d986 | 1004 | if (irred_invalidated |
1005 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1006 | mark_irreducible_loops (); | |
1007 | ||
08162157 | 1008 | /* Clean up the information about numbers of iterations, since brute force |
1009 | evaluation could reveal new information. */ | |
1010 | scev_reset (); | |
1011 | ||
9f0ac045 | 1012 | if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1013 | { | |
1014 | gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); | |
1015 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1016 | } | |
1017 | BITMAP_FREE (loop_closed_ssa_invalidated); | |
1018 | ||
bb445479 | 1019 | if (changed) |
4c641bf8 | 1020 | return TODO_cleanup_cfg; |
1021 | return 0; | |
bb445479 | 1022 | } |
1023 | ||
2ebfc881 | 1024 | /* Propagate VAL into all uses of SSA_NAME. */ |
1025 | ||
1026 | static void | |
1027 | propagate_into_all_uses (tree ssa_name, tree val) | |
1028 | { | |
1029 | imm_use_iterator iter; | |
1030 | gimple use_stmt; | |
1031 | ||
1032 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name) | |
1033 | { | |
1034 | gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt); | |
1035 | use_operand_p use; | |
1036 | ||
1037 | FOR_EACH_IMM_USE_ON_STMT (use, iter) | |
1038 | SET_USE (use, val); | |
1039 | ||
1040 | if (is_gimple_assign (use_stmt) | |
1041 | && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt)) | |
1042 | == GIMPLE_SINGLE_RHS) | |
1043 | { | |
1044 | tree rhs = gimple_assign_rhs1 (use_stmt); | |
1045 | ||
1046 | if (TREE_CODE (rhs) == ADDR_EXPR) | |
1047 | recompute_tree_invariant_for_addr_expr (rhs); | |
1048 | } | |
1049 | ||
1050 | fold_stmt_inplace (&use_stmt_gsi); | |
1051 | update_stmt (use_stmt); | |
f4ea772b | 1052 | maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt); |
2ebfc881 | 1053 | } |
1054 | } | |
1055 | ||
1056 | /* Propagate constant SSA_NAMEs defined in basic block BB. */ | |
1057 | ||
1058 | static void | |
1059 | propagate_constants_for_unrolling (basic_block bb) | |
1060 | { | |
1061 | gimple_stmt_iterator gsi; | |
1062 | ||
1063 | /* Look for degenerate PHI nodes with constant argument. */ | |
1064 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); ) | |
1065 | { | |
1066 | gimple phi = gsi_stmt (gsi); | |
1067 | tree result = gimple_phi_result (phi); | |
1068 | tree arg = gimple_phi_arg_def (phi, 0); | |
1069 | ||
1070 | if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST) | |
1071 | { | |
1072 | propagate_into_all_uses (result, arg); | |
1073 | gsi_remove (&gsi, true); | |
1074 | release_ssa_name (result); | |
1075 | } | |
1076 | else | |
1077 | gsi_next (&gsi); | |
1078 | } | |
1079 | ||
1080 | /* Look for assignments to SSA names with constant RHS. */ | |
1081 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) | |
1082 | { | |
1083 | gimple stmt = gsi_stmt (gsi); | |
1084 | tree lhs; | |
1085 | ||
1086 | if (is_gimple_assign (stmt) | |
fca2aa67 | 1087 | && gimple_assign_rhs_code (stmt) == INTEGER_CST |
2ebfc881 | 1088 | && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME) |
fca2aa67 | 1089 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) |
2ebfc881 | 1090 | { |
1091 | propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt)); | |
1092 | gsi_remove (&gsi, true); | |
1093 | release_ssa_name (lhs); | |
1094 | } | |
1095 | else | |
1096 | gsi_next (&gsi); | |
1097 | } | |
1098 | } | |
1099 | ||
042301ef | 1100 | /* Process loops from innermost to outer, stopping at the innermost |
1101 | loop we unrolled. */ | |
1102 | ||
1103 | static bool | |
1104 | tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | |
d70aebca | 1105 | vec<loop_p, va_heap>& father_stack, |
042301ef | 1106 | struct loop *loop) |
1107 | { | |
1108 | struct loop *loop_father; | |
1109 | bool changed = false; | |
1110 | struct loop *inner; | |
1111 | enum unroll_level ul; | |
1112 | ||
1113 | /* Process inner loops first. */ | |
1114 | for (inner = loop->inner; inner != NULL; inner = inner->next) | |
1115 | changed |= tree_unroll_loops_completely_1 (may_increase_size, | |
1116 | unroll_outer, father_stack, | |
1117 | inner); | |
1118 | ||
1119 | /* If we changed an inner loop we cannot process outer loops in this | |
1120 | iteration because SSA form is not up-to-date. Continue with | |
1121 | siblings of outer loops instead. */ | |
1122 | if (changed) | |
1123 | return true; | |
1124 | ||
3d483a94 | 1125 | /* Don't unroll #pragma omp simd loops until the vectorizer |
1126 | attempts to vectorize those. */ | |
1127 | if (loop->force_vect) | |
1128 | return false; | |
1129 | ||
042301ef | 1130 | /* Try to unroll this loop. */ |
1131 | loop_father = loop_outer (loop); | |
1132 | if (!loop_father) | |
1133 | return false; | |
1134 | ||
1135 | if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
1136 | /* Unroll outermost loops only if asked to do so or they do | |
1137 | not cause code growth. */ | |
1138 | && (unroll_outer || loop_outer (loop_father))) | |
1139 | ul = UL_ALL; | |
1140 | else | |
1141 | ul = UL_NO_GROWTH; | |
1142 | ||
1143 | if (canonicalize_loop_induction_variables | |
1144 | (loop, false, ul, !flag_tree_loop_ivcanon)) | |
1145 | { | |
1146 | /* If we'll continue unrolling, we need to propagate constants | |
1147 | within the new basic blocks to fold away induction variable | |
1148 | computations; otherwise, the size might blow up before the | |
1149 | iteration is complete and the IR eventually cleaned up. */ | |
1150 | if (loop_outer (loop_father) && !loop_father->aux) | |
1151 | { | |
1152 | father_stack.safe_push (loop_father); | |
1153 | loop_father->aux = loop_father; | |
1154 | } | |
1155 | ||
1156 | return true; | |
1157 | } | |
1158 | ||
1159 | return false; | |
1160 | } | |
1161 | ||
604f7b8a | 1162 | /* Unroll LOOPS completely if they iterate just few times. Unless |
1163 | MAY_INCREASE_SIZE is true, perform the unrolling only if the | |
1164 | size of the code does not increase. */ | |
bb445479 | 1165 | |
4c641bf8 | 1166 | unsigned int |
d88fd237 | 1167 | tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
bb445479 | 1168 | { |
d70aebca | 1169 | stack_vec<loop_p, 16> father_stack; |
d88fd237 | 1170 | bool changed; |
793a0ab5 | 1171 | int iteration = 0; |
9f0ac045 | 1172 | bool irred_invalidated = false; |
bb445479 | 1173 | |
d88fd237 | 1174 | do |
bb445479 | 1175 | { |
d88fd237 | 1176 | changed = false; |
9f0ac045 | 1177 | bitmap loop_closed_ssa_invalidated = NULL; |
1178 | ||
1179 | if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) | |
1180 | loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
bb445479 | 1181 | |
72276d01 | 1182 | free_numbers_of_iterations_estimates (); |
1183 | estimate_numbers_of_iterations (); | |
1184 | ||
042301ef | 1185 | changed = tree_unroll_loops_completely_1 (may_increase_size, |
1186 | unroll_outer, father_stack, | |
1187 | current_loops->tree_root); | |
d88fd237 | 1188 | if (changed) |
1189 | { | |
2ebfc881 | 1190 | struct loop **iter; |
1191 | unsigned i; | |
1192 | ||
72276d01 | 1193 | /* Be sure to skip unlooped loops while procesing father_stack |
1194 | array. */ | |
f1f41a6c | 1195 | FOR_EACH_VEC_ELT (loops_to_unloop, i, iter) |
72276d01 | 1196 | (*iter)->aux = NULL; |
f1f41a6c | 1197 | FOR_EACH_VEC_ELT (father_stack, i, iter) |
72276d01 | 1198 | if (!(*iter)->aux) |
1199 | *iter = NULL; | |
1200 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); | |
c790d986 | 1201 | |
72276d01 | 1202 | /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */ |
9f0ac045 | 1203 | if (loop_closed_ssa_invalidated |
1204 | && !bitmap_empty_p (loop_closed_ssa_invalidated)) | |
1205 | rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, | |
1206 | TODO_update_ssa); | |
1207 | else | |
1208 | update_ssa (TODO_update_ssa); | |
ea1c5c31 | 1209 | |
2ebfc881 | 1210 | /* Propagate the constants within the new basic blocks. */ |
f1f41a6c | 1211 | FOR_EACH_VEC_ELT (father_stack, i, iter) |
72276d01 | 1212 | if (*iter) |
1213 | { | |
1214 | unsigned j; | |
1215 | basic_block *body = get_loop_body_in_dom_order (*iter); | |
1216 | for (j = 0; j < (*iter)->num_nodes; j++) | |
1217 | propagate_constants_for_unrolling (body[j]); | |
1218 | free (body); | |
1219 | (*iter)->aux = NULL; | |
1220 | } | |
f1f41a6c | 1221 | father_stack.truncate (0); |
2ebfc881 | 1222 | |
d88fd237 | 1223 | /* This will take care of removing completely unrolled loops |
1224 | from the loop structures so we can continue unrolling now | |
1225 | innermost loops. */ | |
b2a225ba | 1226 | if (cleanup_tree_cfg ()) |
1227 | update_ssa (TODO_update_ssa_only_virtuals); | |
d88fd237 | 1228 | |
1229 | /* Clean up the information about numbers of iterations, since | |
1230 | complete unrolling might have invalidated it. */ | |
1231 | scev_reset (); | |
9f0ac045 | 1232 | #ifdef ENABLE_CHECKING |
1233 | if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) | |
1234 | verify_loop_closed_ssa (true); | |
1235 | #endif | |
d88fd237 | 1236 | } |
9f0ac045 | 1237 | if (loop_closed_ssa_invalidated) |
1238 | BITMAP_FREE (loop_closed_ssa_invalidated); | |
d88fd237 | 1239 | } |
793a0ab5 | 1240 | while (changed |
1241 | && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); | |
08162157 | 1242 | |
f1f41a6c | 1243 | father_stack.release (); |
2ebfc881 | 1244 | |
9f0ac045 | 1245 | if (irred_invalidated |
1246 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1247 | mark_irreducible_loops (); | |
1248 | ||
4c641bf8 | 1249 | return 0; |
bb445479 | 1250 | } |
f86b328b | 1251 | |
1252 | /* Canonical induction variable creation pass. */ | |
1253 | ||
1254 | static unsigned int | |
1255 | tree_ssa_loop_ivcanon (void) | |
1256 | { | |
1257 | if (number_of_loops (cfun) <= 1) | |
1258 | return 0; | |
1259 | ||
1260 | return canonicalize_induction_variables (); | |
1261 | } | |
1262 | ||
1263 | static bool | |
1264 | gate_tree_ssa_loop_ivcanon (void) | |
1265 | { | |
1266 | return flag_tree_loop_ivcanon != 0; | |
1267 | } | |
1268 | ||
1269 | namespace { | |
1270 | ||
1271 | const pass_data pass_data_iv_canon = | |
1272 | { | |
1273 | GIMPLE_PASS, /* type */ | |
1274 | "ivcanon", /* name */ | |
1275 | OPTGROUP_LOOP, /* optinfo_flags */ | |
1276 | true, /* has_gate */ | |
1277 | true, /* has_execute */ | |
1278 | TV_TREE_LOOP_IVCANON, /* tv_id */ | |
1279 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1280 | 0, /* properties_provided */ | |
1281 | 0, /* properties_destroyed */ | |
1282 | 0, /* todo_flags_start */ | |
1283 | 0, /* todo_flags_finish */ | |
1284 | }; | |
1285 | ||
1286 | class pass_iv_canon : public gimple_opt_pass | |
1287 | { | |
1288 | public: | |
1289 | pass_iv_canon (gcc::context *ctxt) | |
1290 | : gimple_opt_pass (pass_data_iv_canon, ctxt) | |
1291 | {} | |
1292 | ||
1293 | /* opt_pass methods: */ | |
1294 | bool gate () { return gate_tree_ssa_loop_ivcanon (); } | |
1295 | unsigned int execute () { return tree_ssa_loop_ivcanon (); } | |
1296 | ||
1297 | }; // class pass_iv_canon | |
1298 | ||
1299 | } // anon namespace | |
1300 | ||
1301 | gimple_opt_pass * | |
1302 | make_pass_iv_canon (gcc::context *ctxt) | |
1303 | { | |
1304 | return new pass_iv_canon (ctxt); | |
1305 | } | |
1306 | ||
1307 | /* Complete unrolling of loops. */ | |
1308 | ||
1309 | static unsigned int | |
1310 | tree_complete_unroll (void) | |
1311 | { | |
1312 | if (number_of_loops (cfun) <= 1) | |
1313 | return 0; | |
1314 | ||
1315 | return tree_unroll_loops_completely (flag_unroll_loops | |
1316 | || flag_peel_loops | |
1317 | || optimize >= 3, true); | |
1318 | } | |
1319 | ||
1320 | static bool | |
1321 | gate_tree_complete_unroll (void) | |
1322 | { | |
1323 | return true; | |
1324 | } | |
1325 | ||
1326 | namespace { | |
1327 | ||
1328 | const pass_data pass_data_complete_unroll = | |
1329 | { | |
1330 | GIMPLE_PASS, /* type */ | |
1331 | "cunroll", /* name */ | |
1332 | OPTGROUP_LOOP, /* optinfo_flags */ | |
1333 | true, /* has_gate */ | |
1334 | true, /* has_execute */ | |
1335 | TV_COMPLETE_UNROLL, /* tv_id */ | |
1336 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1337 | 0, /* properties_provided */ | |
1338 | 0, /* properties_destroyed */ | |
1339 | 0, /* todo_flags_start */ | |
1340 | 0, /* todo_flags_finish */ | |
1341 | }; | |
1342 | ||
1343 | class pass_complete_unroll : public gimple_opt_pass | |
1344 | { | |
1345 | public: | |
1346 | pass_complete_unroll (gcc::context *ctxt) | |
1347 | : gimple_opt_pass (pass_data_complete_unroll, ctxt) | |
1348 | {} | |
1349 | ||
1350 | /* opt_pass methods: */ | |
1351 | bool gate () { return gate_tree_complete_unroll (); } | |
1352 | unsigned int execute () { return tree_complete_unroll (); } | |
1353 | ||
1354 | }; // class pass_complete_unroll | |
1355 | ||
1356 | } // anon namespace | |
1357 | ||
1358 | gimple_opt_pass * | |
1359 | make_pass_complete_unroll (gcc::context *ctxt) | |
1360 | { | |
1361 | return new pass_complete_unroll (ctxt); | |
1362 | } | |
1363 | ||
1364 | /* Complete unrolling of inner loops. */ | |
1365 | ||
1366 | static unsigned int | |
1367 | tree_complete_unroll_inner (void) | |
1368 | { | |
1369 | unsigned ret = 0; | |
1370 | ||
1371 | loop_optimizer_init (LOOPS_NORMAL | |
1372 | | LOOPS_HAVE_RECORDED_EXITS); | |
1373 | if (number_of_loops (cfun) > 1) | |
1374 | { | |
1375 | scev_initialize (); | |
1376 | ret = tree_unroll_loops_completely (optimize >= 3, false); | |
1377 | free_numbers_of_iterations_estimates (); | |
1378 | scev_finalize (); | |
1379 | } | |
1380 | loop_optimizer_finalize (); | |
1381 | ||
1382 | return ret; | |
1383 | } | |
1384 | ||
1385 | static bool | |
1386 | gate_tree_complete_unroll_inner (void) | |
1387 | { | |
1388 | return optimize >= 2; | |
1389 | } | |
1390 | ||
1391 | namespace { | |
1392 | ||
1393 | const pass_data pass_data_complete_unrolli = | |
1394 | { | |
1395 | GIMPLE_PASS, /* type */ | |
1396 | "cunrolli", /* name */ | |
1397 | OPTGROUP_LOOP, /* optinfo_flags */ | |
1398 | true, /* has_gate */ | |
1399 | true, /* has_execute */ | |
1400 | TV_COMPLETE_UNROLL, /* tv_id */ | |
1401 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1402 | 0, /* properties_provided */ | |
1403 | 0, /* properties_destroyed */ | |
1404 | 0, /* todo_flags_start */ | |
1405 | TODO_verify_flow, /* todo_flags_finish */ | |
1406 | }; | |
1407 | ||
1408 | class pass_complete_unrolli : public gimple_opt_pass | |
1409 | { | |
1410 | public: | |
1411 | pass_complete_unrolli (gcc::context *ctxt) | |
1412 | : gimple_opt_pass (pass_data_complete_unrolli, ctxt) | |
1413 | {} | |
1414 | ||
1415 | /* opt_pass methods: */ | |
1416 | bool gate () { return gate_tree_complete_unroll_inner (); } | |
1417 | unsigned int execute () { return tree_complete_unroll_inner (); } | |
1418 | ||
1419 | }; // class pass_complete_unrolli | |
1420 | ||
1421 | } // anon namespace | |
1422 | ||
1423 | gimple_opt_pass * | |
1424 | make_pass_complete_unrolli (gcc::context *ctxt) | |
1425 | { | |
1426 | return new pass_complete_unrolli (ctxt); | |
1427 | } | |
1428 | ||
1429 |