]>
Commit | Line | Data |
---|---|---|
84eb345f | 1 | /* Induction variable canonicalization and loop peeling. |
fbd26352 | 2 | Copyright (C) 2004-2019 Free Software Foundation, Inc. |
48e1416a | 3 | |
bb445479 | 4 | This file is part of GCC. |
48e1416a | 5 | |
bb445479 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
bb445479 | 9 | later version. |
48e1416a | 10 | |
bb445479 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
bb445479 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
bb445479 | 19 | |
20 | /* This pass detects the loops that iterate a constant number of times, | |
48e1416a | 21 | adds a canonical induction variable (step -1, tested against 0) |
bb445479 | 22 | and replaces the exit test. This enables the less powerful rtl |
23 | level analysis to use this information. | |
24 | ||
25 | This might spoil the code in some cases (by increasing register pressure). | |
26 | Note that in the case the new variable is not needed, ivopts will get rid | |
27 | of it, so it might only be a problem when there are no other linear induction | |
28 | variables. In that case the created optimization possibilities are likely | |
29 | to pay up. | |
30 | ||
c836de3f | 31 | We also perform |
4cf494ec | 32 | - complete unrolling (or peeling) when the loops is rolling few enough |
c836de3f | 33 | times |
34 | - simple peeling (i.e. copying few initial iterations prior the loop) | |
35 | when number of iteration estimate is known (typically by the profile | |
36 | info). */ | |
bb445479 | 37 | |
38 | #include "config.h" | |
39 | #include "system.h" | |
40 | #include "coretypes.h" | |
9ef16211 | 41 | #include "backend.h" |
bb445479 | 42 | #include "tree.h" |
9ef16211 | 43 | #include "gimple.h" |
7c29e30e | 44 | #include "cfghooks.h" |
45 | #include "tree-pass.h" | |
9ef16211 | 46 | #include "ssa.h" |
7c29e30e | 47 | #include "cgraph.h" |
48 | #include "gimple-pretty-print.h" | |
b20a8bb4 | 49 | #include "fold-const.h" |
886c1262 | 50 | #include "profile.h" |
bc61cadb | 51 | #include "gimple-fold.h" |
52 | #include "tree-eh.h" | |
dcf1a1ec | 53 | #include "gimple-iterator.h" |
073c1fd5 | 54 | #include "tree-cfg.h" |
05d9c18a | 55 | #include "tree-ssa-loop-manip.h" |
56 | #include "tree-ssa-loop-niter.h" | |
073c1fd5 | 57 | #include "tree-ssa-loop.h" |
58 | #include "tree-into-ssa.h" | |
bb445479 | 59 | #include "cfgloop.h" |
bb445479 | 60 | #include "tree-chrec.h" |
61 | #include "tree-scalar-evolution.h" | |
62 | #include "params.h" | |
bb445479 | 63 | #include "tree-inline.h" |
424a4a92 | 64 | #include "tree-cfgcleanup.h" |
f7715905 | 65 | #include "builtins.h" |
51e85e64 | 66 | #include "tree-ssa-sccvn.h" |
bb445479 | 67 | |
604f7b8a | 68 | /* Specifies types of loops that may be unrolled. */ |
69 | ||
70 | enum unroll_level | |
71 | { | |
6414dd4c | 72 | UL_SINGLE_ITER, /* Only loops that exit immediately in the first |
604f7b8a | 73 | iteration. */ |
74 | UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase | |
75 | of code size. */ | |
76 | UL_ALL /* All suitable loops. */ | |
77 | }; | |
78 | ||
bb445479 | 79 | /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT |
5051abaf | 80 | is the exit edge whose condition is replaced. The ssa versions of the new |
81 | IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER | |
82 | if they are not NULL. */ | |
bb445479 | 83 | |
5051abaf | 84 | void |
85 | create_canonical_iv (struct loop *loop, edge exit, tree niter, | |
86 | tree *var_before = NULL, tree *var_after = NULL) | |
bb445479 | 87 | { |
88 | edge in; | |
75a70cf9 | 89 | tree type, var; |
1a91d914 | 90 | gcond *cond; |
75a70cf9 | 91 | gimple_stmt_iterator incr_at; |
bb445479 | 92 | enum tree_code cmp; |
93 | ||
94 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
95 | { | |
96 | fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); | |
97 | print_generic_expr (dump_file, niter, TDF_SLIM); | |
98 | fprintf (dump_file, " iterations.\n"); | |
99 | } | |
100 | ||
1a91d914 | 101 | cond = as_a <gcond *> (last_stmt (exit->src)); |
cd665a06 | 102 | in = EDGE_SUCC (exit->src, 0); |
bb445479 | 103 | if (in == exit) |
cd665a06 | 104 | in = EDGE_SUCC (exit->src, 1); |
bb445479 | 105 | |
106 | /* Note that we do not need to worry about overflows, since | |
107 | type of niter is always unsigned and all comparisons are | |
108 | just for equality/nonequality -- i.e. everything works | |
109 | with a modulo arithmetics. */ | |
110 | ||
111 | type = TREE_TYPE (niter); | |
49d00087 | 112 | niter = fold_build2 (PLUS_EXPR, type, |
113 | niter, | |
114 | build_int_cst (type, 1)); | |
75a70cf9 | 115 | incr_at = gsi_last_bb (in->src); |
bb445479 | 116 | create_iv (niter, |
3c6185f1 | 117 | build_int_cst (type, -1), |
bb445479 | 118 | NULL_TREE, loop, |
5051abaf | 119 | &incr_at, false, var_before, &var); |
120 | if (var_after) | |
121 | *var_after = var; | |
bb445479 | 122 | |
123 | cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | |
75a70cf9 | 124 | gimple_cond_set_code (cond, cmp); |
125 | gimple_cond_set_lhs (cond, var); | |
126 | gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | |
22aa74c4 | 127 | update_stmt (cond); |
bb445479 | 128 | } |
129 | ||
aa2ba534 | 130 | /* Describe size of loop as detected by tree_estimate_loop_size. */ |
131 | struct loop_size | |
132 | { | |
133 | /* Number of instructions in the loop. */ | |
134 | int overall; | |
135 | ||
136 | /* Number of instructions that will be likely optimized out in | |
137 | peeled iterations of loop (i.e. computation based on induction | |
138 | variable where induction variable starts at known constant.) */ | |
139 | int eliminated_by_peeling; | |
140 | ||
141 | /* Same statistics for last iteration of loop: it is smaller because | |
142 | instructions after exit are not executed. */ | |
143 | int last_iteration; | |
144 | int last_iteration_eliminated_by_peeling; | |
d583c979 | 145 | |
146 | /* If some IV computation will become constant. */ | |
147 | bool constant_iv; | |
148 | ||
149 | /* Number of call stmts that are not a builtin and are pure or const | |
150 | present on the hot path. */ | |
151 | int num_pure_calls_on_hot_path; | |
152 | /* Number of call stmts that are not a builtin and are not pure nor const | |
153 | present on the hot path. */ | |
154 | int num_non_pure_calls_on_hot_path; | |
155 | /* Number of statements other than calls in the loop. */ | |
156 | int non_call_stmts_on_hot_path; | |
157 | /* Number of branches seen on the hot path. */ | |
158 | int num_branches_on_hot_path; | |
aa2ba534 | 159 | }; |
160 | ||
161 | /* Return true if OP in STMT will be constant after peeling LOOP. */ | |
162 | ||
163 | static bool | |
42acab1c | 164 | constant_after_peeling (tree op, gimple *stmt, struct loop *loop) |
aa2ba534 | 165 | { |
aa2ba534 | 166 | if (is_gimple_min_invariant (op)) |
167 | return true; | |
48e1416a | 168 | |
aa2ba534 | 169 | /* We can still fold accesses to constant arrays when index is known. */ |
170 | if (TREE_CODE (op) != SSA_NAME) | |
171 | { | |
172 | tree base = op; | |
173 | ||
174 | /* First make fast look if we see constant array inside. */ | |
175 | while (handled_component_p (base)) | |
176 | base = TREE_OPERAND (base, 0); | |
248022b2 | 177 | if ((DECL_P (base) |
df8d3e89 | 178 | && ctor_for_folding (base) != error_mark_node) |
aa2ba534 | 179 | || CONSTANT_CLASS_P (base)) |
180 | { | |
181 | /* If so, see if we understand all the indices. */ | |
182 | base = op; | |
183 | while (handled_component_p (base)) | |
184 | { | |
185 | if (TREE_CODE (base) == ARRAY_REF | |
186 | && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) | |
187 | return false; | |
188 | base = TREE_OPERAND (base, 0); | |
189 | } | |
190 | return true; | |
191 | } | |
192 | return false; | |
193 | } | |
194 | ||
c39eea14 | 195 | /* Induction variables are constants when defined in loop. */ |
196 | if (loop_containing_stmt (stmt) != loop) | |
aa2ba534 | 197 | return false; |
c39eea14 | 198 | tree ev = analyze_scalar_evolution (loop, op); |
199 | if (chrec_contains_undetermined (ev) | |
200 | || chrec_contains_symbols (ev)) | |
aa2ba534 | 201 | return false; |
202 | return true; | |
203 | } | |
204 | ||
d583c979 | 205 | /* Computes an estimated number of insns in LOOP. |
206 | EXIT (if non-NULL) is an exite edge that will be eliminated in all but last | |
207 | iteration of the loop. | |
208 | EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration | |
209 | of loop. | |
84eb345f | 210 | Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. |
04437ab6 | 211 | Stop estimating after UPPER_BOUND is met. Return true in this case. */ |
aa2ba534 | 212 | |
84eb345f | 213 | static bool |
f18de397 | 214 | tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, |
215 | struct loop_size *size, int upper_bound) | |
aa2ba534 | 216 | { |
217 | basic_block *body = get_loop_body (loop); | |
218 | gimple_stmt_iterator gsi; | |
219 | unsigned int i; | |
220 | bool after_exit; | |
f1f41a6c | 221 | vec<basic_block> path = get_loop_hot_path (loop); |
aa2ba534 | 222 | |
223 | size->overall = 0; | |
224 | size->eliminated_by_peeling = 0; | |
225 | size->last_iteration = 0; | |
226 | size->last_iteration_eliminated_by_peeling = 0; | |
d583c979 | 227 | size->num_pure_calls_on_hot_path = 0; |
228 | size->num_non_pure_calls_on_hot_path = 0; | |
229 | size->non_call_stmts_on_hot_path = 0; | |
230 | size->num_branches_on_hot_path = 0; | |
231 | size->constant_iv = 0; | |
aa2ba534 | 232 | |
233 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
234 | fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); | |
235 | for (i = 0; i < loop->num_nodes; i++) | |
236 | { | |
c790d986 | 237 | if (edge_to_cancel && body[i] != edge_to_cancel->src |
238 | && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) | |
aa2ba534 | 239 | after_exit = true; |
240 | else | |
241 | after_exit = false; | |
242 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
f18de397 | 243 | fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, |
244 | after_exit); | |
aa2ba534 | 245 | |
246 | for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
247 | { | |
42acab1c | 248 | gimple *stmt = gsi_stmt (gsi); |
aa2ba534 | 249 | int num = estimate_num_insns (stmt, &eni_size_weights); |
250 | bool likely_eliminated = false; | |
d583c979 | 251 | bool likely_eliminated_last = false; |
252 | bool likely_eliminated_peeled = false; | |
aa2ba534 | 253 | |
254 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
255 | { | |
256 | fprintf (dump_file, " size: %3i ", num); | |
1ffa4346 | 257 | print_gimple_stmt (dump_file, gsi_stmt (gsi), 0); |
aa2ba534 | 258 | } |
259 | ||
260 | /* Look for reasons why we might optimize this stmt away. */ | |
261 | ||
8c1879bc | 262 | if (!gimple_has_side_effects (stmt)) |
aa2ba534 | 263 | { |
8c1879bc | 264 | /* Exit conditional. */ |
265 | if (exit && body[i] == exit->src | |
266 | && stmt == last_stmt (exit->src)) | |
267 | { | |
268 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
269 | fprintf (dump_file, " Exit condition will be eliminated " | |
270 | "in peeled copies.\n"); | |
271 | likely_eliminated_peeled = true; | |
272 | } | |
273 | if (edge_to_cancel && body[i] == edge_to_cancel->src | |
274 | && stmt == last_stmt (edge_to_cancel->src)) | |
275 | { | |
276 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
277 | fprintf (dump_file, " Exit condition will be eliminated " | |
278 | "in last copy.\n"); | |
279 | likely_eliminated_last = true; | |
280 | } | |
281 | /* Sets of IV variables */ | |
282 | if (gimple_code (stmt) == GIMPLE_ASSIGN | |
283 | && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) | |
284 | { | |
285 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
286 | fprintf (dump_file, " Induction variable computation will" | |
287 | " be folded away.\n"); | |
288 | likely_eliminated = true; | |
289 | } | |
290 | /* Assignments of IV variables. */ | |
291 | else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
292 | && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | |
293 | && constant_after_peeling (gimple_assign_rhs1 (stmt), | |
93505d22 | 294 | stmt, loop) |
8c1879bc | 295 | && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS |
296 | || constant_after_peeling (gimple_assign_rhs2 (stmt), | |
297 | stmt, loop))) | |
298 | { | |
299 | size->constant_iv = true; | |
300 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
301 | fprintf (dump_file, | |
302 | " Constant expression will be folded away.\n"); | |
303 | likely_eliminated = true; | |
304 | } | |
305 | /* Conditionals. */ | |
306 | else if ((gimple_code (stmt) == GIMPLE_COND | |
307 | && constant_after_peeling (gimple_cond_lhs (stmt), stmt, | |
308 | loop) | |
309 | && constant_after_peeling (gimple_cond_rhs (stmt), stmt, | |
310 | loop) | |
311 | /* We don't simplify all constant compares so make sure | |
312 | they are not both constant already. See PR70288. */ | |
313 | && (! is_gimple_min_invariant (gimple_cond_lhs (stmt)) | |
314 | || ! is_gimple_min_invariant | |
315 | (gimple_cond_rhs (stmt)))) | |
316 | || (gimple_code (stmt) == GIMPLE_SWITCH | |
317 | && constant_after_peeling (gimple_switch_index ( | |
318 | as_a <gswitch *> | |
319 | (stmt)), | |
320 | stmt, loop) | |
321 | && ! is_gimple_min_invariant | |
322 | (gimple_switch_index | |
323 | (as_a <gswitch *> (stmt))))) | |
324 | { | |
325 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
326 | fprintf (dump_file, " Constant conditional.\n"); | |
327 | likely_eliminated = true; | |
328 | } | |
aa2ba534 | 329 | } |
330 | ||
331 | size->overall += num; | |
d583c979 | 332 | if (likely_eliminated || likely_eliminated_peeled) |
aa2ba534 | 333 | size->eliminated_by_peeling += num; |
334 | if (!after_exit) | |
335 | { | |
336 | size->last_iteration += num; | |
d583c979 | 337 | if (likely_eliminated || likely_eliminated_last) |
aa2ba534 | 338 | size->last_iteration_eliminated_by_peeling += num; |
339 | } | |
84eb345f | 340 | if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
341 | - size->last_iteration_eliminated_by_peeling) > upper_bound) | |
342 | { | |
343 | free (body); | |
04437ab6 | 344 | path.release (); |
84eb345f | 345 | return true; |
346 | } | |
aa2ba534 | 347 | } |
348 | } | |
f1f41a6c | 349 | while (path.length ()) |
d583c979 | 350 | { |
f1f41a6c | 351 | basic_block bb = path.pop (); |
d583c979 | 352 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
353 | { | |
42acab1c | 354 | gimple *stmt = gsi_stmt (gsi); |
f18de397 | 355 | if (gimple_code (stmt) == GIMPLE_CALL |
356 | && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))) | |
d583c979 | 357 | { |
358 | int flags = gimple_call_flags (stmt); | |
f18de397 | 359 | if (flags & (ECF_PURE | ECF_CONST)) |
d583c979 | 360 | size->num_pure_calls_on_hot_path++; |
361 | else | |
362 | size->num_non_pure_calls_on_hot_path++; | |
363 | size->num_branches_on_hot_path ++; | |
364 | } | |
f18de397 | 365 | /* Count inexpensive calls as non-calls, because they will likely |
366 | expand inline. */ | |
367 | else if (gimple_code (stmt) != GIMPLE_DEBUG) | |
d583c979 | 368 | size->non_call_stmts_on_hot_path++; |
369 | if (((gimple_code (stmt) == GIMPLE_COND | |
370 | && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
fffa8e82 | 371 | || !constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
372 | loop))) | |
d583c979 | 373 | || (gimple_code (stmt) == GIMPLE_SWITCH |
1a91d914 | 374 | && !constant_after_peeling (gimple_switch_index ( |
375 | as_a <gswitch *> (stmt)), | |
376 | stmt, loop))) | |
d583c979 | 377 | && (!exit || bb != exit->src)) |
378 | size->num_branches_on_hot_path++; | |
379 | } | |
380 | } | |
f1f41a6c | 381 | path.release (); |
aa2ba534 | 382 | if (dump_file && (dump_flags & TDF_DETAILS)) |
383 | fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, | |
384 | size->eliminated_by_peeling, size->last_iteration, | |
385 | size->last_iteration_eliminated_by_peeling); | |
48e1416a | 386 | |
aa2ba534 | 387 | free (body); |
84eb345f | 388 | return false; |
aa2ba534 | 389 | } |
604f7b8a | 390 | |
aa2ba534 | 391 | /* Estimate number of insns of completely unrolled loop. |
392 | It is (NUNROLL + 1) * size of loop body with taking into account | |
393 | the fact that in last copy everything after exit conditional | |
394 | is dead and that some instructions will be eliminated after | |
395 | peeling. | |
604f7b8a | 396 | |
c31fb425 | 397 | Loop body is likely going to simplify further, this is difficult |
aa2ba534 | 398 | to guess, we just decrease the result by 1/3. */ |
604f7b8a | 399 | |
400 | static unsigned HOST_WIDE_INT | |
aa2ba534 | 401 | estimated_unrolled_size (struct loop_size *size, |
604f7b8a | 402 | unsigned HOST_WIDE_INT nunroll) |
403 | { | |
aa2ba534 | 404 | HOST_WIDE_INT unr_insns = ((nunroll) |
405 | * (HOST_WIDE_INT) (size->overall | |
406 | - size->eliminated_by_peeling)); | |
407 | if (!nunroll) | |
408 | unr_insns = 0; | |
409 | unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; | |
410 | ||
411 | unr_insns = unr_insns * 2 / 3; | |
604f7b8a | 412 | if (unr_insns <= 0) |
413 | unr_insns = 1; | |
604f7b8a | 414 | |
415 | return unr_insns; | |
416 | } | |
417 | ||
c790d986 | 418 | /* Loop LOOP is known to not loop. See if there is an edge in the loop |
419 | body that can be remove to make the loop to always exit and at | |
420 | the same time it does not make any code potentially executed | |
421 | during the last iteration dead. | |
422 | ||
4cf494ec | 423 | After complete unrolling we still may get rid of the conditional |
c790d986 | 424 | on the exit in the last copy even if we have no idea what it does. |
425 | This is quite common case for loops of form | |
426 | ||
427 | int a[5]; | |
428 | for (i=0;i<b;i++) | |
429 | a[i]=0; | |
430 | ||
431 | Here we prove the loop to iterate 5 times but we do not know | |
432 | it from induction variable. | |
433 | ||
434 | For now we handle only simple case where there is exit condition | |
435 | just before the latch block and the latch block contains no statements | |
436 | with side effect that may otherwise terminate the execution of loop | |
437 | (such as by EH or by terminating the program or longjmp). | |
438 | ||
439 | In the general case we may want to cancel the paths leading to statements | |
440 | loop-niter identified as having undefined effect in the last iteration. | |
441 | The other cases are hopefully rare and will be cleaned up later. */ | |
442 | ||
f86b328b | 443 | static edge |
c790d986 | 444 | loop_edge_to_cancel (struct loop *loop) |
445 | { | |
f1f41a6c | 446 | vec<edge> exits; |
c790d986 | 447 | unsigned i; |
448 | edge edge_to_cancel; | |
449 | gimple_stmt_iterator gsi; | |
450 | ||
451 | /* We want only one predecestor of the loop. */ | |
452 | if (EDGE_COUNT (loop->latch->preds) > 1) | |
453 | return NULL; | |
454 | ||
455 | exits = get_loop_exit_edges (loop); | |
456 | ||
f1f41a6c | 457 | FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) |
c790d986 | 458 | { |
459 | /* Find the other edge than the loop exit | |
460 | leaving the conditoinal. */ | |
461 | if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) | |
462 | continue; | |
463 | if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) | |
464 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); | |
465 | else | |
466 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); | |
467 | ||
248022b2 | 468 | /* We only can handle conditionals. */ |
469 | if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
470 | continue; | |
471 | ||
c790d986 | 472 | /* We should never have conditionals in the loop latch. */ |
473 | gcc_assert (edge_to_cancel->dest != loop->header); | |
474 | ||
475 | /* Check that it leads to loop latch. */ | |
476 | if (edge_to_cancel->dest != loop->latch) | |
477 | continue; | |
478 | ||
f1f41a6c | 479 | exits.release (); |
c790d986 | 480 | |
481 | /* Verify that the code in loop latch does nothing that may end program | |
482 | execution without really reaching the exit. This may include | |
483 | non-pure/const function calls, EH statements, volatile ASMs etc. */ | |
484 | for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) | |
485 | if (gimple_has_side_effects (gsi_stmt (gsi))) | |
486 | return NULL; | |
487 | return edge_to_cancel; | |
488 | } | |
f1f41a6c | 489 | exits.release (); |
c790d986 | 490 | return NULL; |
491 | } | |
492 | ||
72276d01 | 493 | /* Remove all tests for exits that are known to be taken after LOOP was |
494 | peeled NPEELED times. Put gcc_unreachable before every statement | |
495 | known to not be executed. */ | |
496 | ||
497 | static bool | |
498 | remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled) | |
499 | { | |
500 | struct nb_iter_bound *elt; | |
501 | bool changed = false; | |
502 | ||
503 | for (elt = loop->bounds; elt; elt = elt->next) | |
504 | { | |
505 | /* If statement is known to be undefined after peeling, turn it | |
506 | into unreachable (or trap when debugging experience is supposed | |
507 | to be good). */ | |
508 | if (!elt->is_exit | |
796b6678 | 509 | && wi::ltu_p (elt->bound, npeeled)) |
72276d01 | 510 | { |
511 | gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); | |
1a91d914 | 512 | gcall *stmt = gimple_build_call |
72276d01 | 513 | (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); |
72276d01 | 514 | gimple_set_location (stmt, gimple_location (elt->stmt)); |
515 | gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
3b43af65 | 516 | split_block (gimple_bb (stmt), stmt); |
72276d01 | 517 | changed = true; |
518 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
519 | { | |
520 | fprintf (dump_file, "Forced statement unreachable: "); | |
1ffa4346 | 521 | print_gimple_stmt (dump_file, elt->stmt, 0); |
72276d01 | 522 | } |
523 | } | |
524 | /* If we know the exit will be taken after peeling, update. */ | |
525 | else if (elt->is_exit | |
796b6678 | 526 | && wi::leu_p (elt->bound, npeeled)) |
72276d01 | 527 | { |
528 | basic_block bb = gimple_bb (elt->stmt); | |
529 | edge exit_edge = EDGE_SUCC (bb, 0); | |
530 | ||
531 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
532 | { | |
533 | fprintf (dump_file, "Forced exit to be taken: "); | |
1ffa4346 | 534 | print_gimple_stmt (dump_file, elt->stmt, 0); |
72276d01 | 535 | } |
536 | if (!loop_exit_edge_p (loop, exit_edge)) | |
537 | exit_edge = EDGE_SUCC (bb, 1); | |
720cfc43 | 538 | exit_edge->probability = profile_probability::always (); |
72276d01 | 539 | gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); |
1a91d914 | 540 | gcond *cond_stmt = as_a <gcond *> (elt->stmt); |
72276d01 | 541 | if (exit_edge->flags & EDGE_TRUE_VALUE) |
1a91d914 | 542 | gimple_cond_make_true (cond_stmt); |
72276d01 | 543 | else |
1a91d914 | 544 | gimple_cond_make_false (cond_stmt); |
545 | update_stmt (cond_stmt); | |
72276d01 | 546 | changed = true; |
547 | } | |
548 | } | |
549 | return changed; | |
550 | } | |
551 | ||
552 | /* Remove all exits that are known to be never taken because of the loop bound | |
553 | discovered. */ | |
554 | ||
555 | static bool | |
556 | remove_redundant_iv_tests (struct loop *loop) | |
557 | { | |
558 | struct nb_iter_bound *elt; | |
559 | bool changed = false; | |
560 | ||
561 | if (!loop->any_upper_bound) | |
562 | return false; | |
563 | for (elt = loop->bounds; elt; elt = elt->next) | |
564 | { | |
565 | /* Exit is pointless if it won't be taken before loop reaches | |
566 | upper bound. */ | |
567 | if (elt->is_exit && loop->any_upper_bound | |
796b6678 | 568 | && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) |
72276d01 | 569 | { |
570 | basic_block bb = gimple_bb (elt->stmt); | |
571 | edge exit_edge = EDGE_SUCC (bb, 0); | |
572 | struct tree_niter_desc niter; | |
573 | ||
574 | if (!loop_exit_edge_p (loop, exit_edge)) | |
575 | exit_edge = EDGE_SUCC (bb, 1); | |
576 | ||
577 | /* Only when we know the actual number of iterations, not | |
578 | just a bound, we can remove the exit. */ | |
579 | if (!number_of_iterations_exit (loop, exit_edge, | |
3a690dce | 580 | &niter, false, false) |
581 | || !integer_onep (niter.assumptions) | |
72276d01 | 582 | || !integer_zerop (niter.may_be_zero) |
583 | || !niter.niter | |
584 | || TREE_CODE (niter.niter) != INTEGER_CST | |
cd9b5516 | 585 | || !wi::ltu_p (loop->nb_iterations_upper_bound, |
586 | wi::to_widest (niter.niter))) | |
72276d01 | 587 | continue; |
588 | ||
589 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
590 | { | |
591 | fprintf (dump_file, "Removed pointless exit: "); | |
1ffa4346 | 592 | print_gimple_stmt (dump_file, elt->stmt, 0); |
72276d01 | 593 | } |
1a91d914 | 594 | gcond *cond_stmt = as_a <gcond *> (elt->stmt); |
72276d01 | 595 | if (exit_edge->flags & EDGE_TRUE_VALUE) |
1a91d914 | 596 | gimple_cond_make_false (cond_stmt); |
72276d01 | 597 | else |
1a91d914 | 598 | gimple_cond_make_true (cond_stmt); |
599 | update_stmt (cond_stmt); | |
72276d01 | 600 | changed = true; |
601 | } | |
602 | } | |
603 | return changed; | |
604 | } | |
605 | ||
0cfe7a23 | 606 | /* Stores loops that will be unlooped and edges that will be removed |
607 | after we process whole loop tree. */ | |
f1f41a6c | 608 | static vec<loop_p> loops_to_unloop; |
609 | static vec<int> loops_to_unloop_nunroll; | |
0cfe7a23 | 610 | static vec<edge> edges_to_remove; |
b96f8145 | 611 | /* Stores loops that has been peeled. */ |
612 | static bitmap peeled_loops; | |
72276d01 | 613 | |
614 | /* Cancel all fully unrolled loops by putting __builtin_unreachable | |
615 | on the latch edge. | |
616 | We do it after all unrolling since unlooping moves basic blocks | |
617 | across loop boundaries trashing loop closed SSA form as well | |
618 | as SCEV info needed to be intact during unrolling. | |
619 | ||
c790d986 | 620 | IRRED_INVALIDATED is used to bookkeep if information about |
621 | irreducible regions may become invalid as a result | |
9f0ac045 | 622 | of the transformation. |
623 | LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case | |
624 | when we need to go into loop closed SSA form. */ | |
bb445479 | 625 | |
f86b328b | 626 | static void |
72276d01 | 627 | unloop_loops (bitmap loop_closed_ssa_invalidated, |
628 | bool *irred_invalidated) | |
629 | { | |
f1f41a6c | 630 | while (loops_to_unloop.length ()) |
72276d01 | 631 | { |
f1f41a6c | 632 | struct loop *loop = loops_to_unloop.pop (); |
633 | int n_unroll = loops_to_unloop_nunroll.pop (); | |
72276d01 | 634 | basic_block latch = loop->latch; |
635 | edge latch_edge = loop_latch_edge (loop); | |
636 | int flags = latch_edge->flags; | |
637 | location_t locus = latch_edge->goto_locus; | |
1a91d914 | 638 | gcall *stmt; |
72276d01 | 639 | gimple_stmt_iterator gsi; |
640 | ||
641 | remove_exits_and_undefined_stmts (loop, n_unroll); | |
642 | ||
643 | /* Unloop destroys the latch edge. */ | |
644 | unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); | |
645 | ||
646 | /* Create new basic block for the latch edge destination and wire | |
647 | it in. */ | |
648 | stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
649 | latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); | |
720cfc43 | 650 | latch_edge->probability = profile_probability::never (); |
72276d01 | 651 | latch_edge->flags |= flags; |
652 | latch_edge->goto_locus = locus; | |
653 | ||
7465dbcd | 654 | add_bb_to_loop (latch_edge->dest, current_loops->tree_root); |
db9cef39 | 655 | latch_edge->dest->count = profile_count::zero (); |
72276d01 | 656 | set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); |
657 | ||
658 | gsi = gsi_start_bb (latch_edge->dest); | |
659 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
660 | } | |
f1f41a6c | 661 | loops_to_unloop.release (); |
662 | loops_to_unloop_nunroll.release (); | |
be6d8ddc | 663 | |
25565afc | 664 | /* Remove edges in peeled copies. Given remove_path removes dominated |
665 | regions we need to cope with removal of already removed paths. */ | |
be6d8ddc | 666 | unsigned i; |
667 | edge e; | |
25565afc | 668 | auto_vec<int, 20> src_bbs; |
669 | src_bbs.reserve_exact (edges_to_remove.length ()); | |
be6d8ddc | 670 | FOR_EACH_VEC_ELT (edges_to_remove, i, e) |
25565afc | 671 | src_bbs.quick_push (e->src->index); |
672 | FOR_EACH_VEC_ELT (edges_to_remove, i, e) | |
673 | if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) | |
674 | { | |
675 | bool ok = remove_path (e, irred_invalidated, | |
676 | loop_closed_ssa_invalidated); | |
677 | gcc_assert (ok); | |
678 | } | |
be6d8ddc | 679 | edges_to_remove.release (); |
72276d01 | 680 | } |
681 | ||
682 | /* Tries to unroll LOOP completely, i.e. NITER times. | |
683 | UL determines which loops we are allowed to unroll. | |
f55775aa | 684 | EXIT is the exit of the loop that should be eliminated. |
72276d01 | 685 | MAXITER specfy bound on number of iterations, -1 if it is |
f55775aa | 686 | not known or too large for HOST_WIDE_INT. The location |
687 | LOCUS corresponding to the loop is used when emitting | |
688 | a summary of the unroll to the dump file. */ | |
72276d01 | 689 | |
bb445479 | 690 | static bool |
7194de72 | 691 | try_unroll_loop_completely (struct loop *loop, |
1d183080 | 692 | edge exit, tree niter, bool may_be_zero, |
c790d986 | 693 | enum unroll_level ul, |
f55775aa | 694 | HOST_WIDE_INT maxiter, |
c309657f | 695 | dump_user_location_t locus, bool allow_peel) |
bb445479 | 696 | { |
2a09b28c | 697 | unsigned HOST_WIDE_INT n_unroll = 0; |
c790d986 | 698 | bool n_unroll_found = false; |
c790d986 | 699 | edge edge_to_cancel = NULL; |
bb445479 | 700 | |
c790d986 | 701 | /* See if we proved number of iterations to be low constant. |
bb445479 | 702 | |
c790d986 | 703 | EXIT is an edge that will be removed in all but last iteration of |
704 | the loop. | |
705 | ||
706 | EDGE_TO_CACNEL is an edge that will be removed from the last iteration | |
707 | of the unrolled sequence and is expected to make the final loop not | |
708 | rolling. | |
709 | ||
710 | If the number of execution of loop is determined by standard induction | |
711 | variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving | |
712 | from the iv test. */ | |
e913b5cd | 713 | if (tree_fits_uhwi_p (niter)) |
c790d986 | 714 | { |
e913b5cd | 715 | n_unroll = tree_to_uhwi (niter); |
c790d986 | 716 | n_unroll_found = true; |
717 | edge_to_cancel = EDGE_SUCC (exit->src, 0); | |
718 | if (edge_to_cancel == exit) | |
719 | edge_to_cancel = EDGE_SUCC (exit->src, 1); | |
720 | } | |
f4d3c071 | 721 | /* We do not know the number of iterations and thus we cannot eliminate |
c790d986 | 722 | the EXIT edge. */ |
723 | else | |
52c6c602 | 724 | exit = NULL; |
c790d986 | 725 | |
726 | /* See if we can improve our estimate by using recorded loop bounds. */ | |
866fc6a0 | 727 | if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH) |
728 | && maxiter >= 0 | |
c790d986 | 729 | && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) |
730 | { | |
731 | n_unroll = maxiter; | |
732 | n_unroll_found = true; | |
f4d3c071 | 733 | /* Loop terminates before the IV variable test, so we cannot |
c790d986 | 734 | remove it in the last iteration. */ |
735 | edge_to_cancel = NULL; | |
736 | } | |
737 | ||
738 | if (!n_unroll_found) | |
bb445479 | 739 | return false; |
bb445479 | 740 | |
2a09b28c | 741 | if (!loop->unroll |
742 | && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
04e3ee8a | 743 | { |
744 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
745 | fprintf (dump_file, "Not unrolling loop %d " | |
2c290da1 | 746 | "(--param max-completely-peel-times limit reached).\n", |
04e3ee8a | 747 | loop->num); |
748 | return false; | |
749 | } | |
bb445479 | 750 | |
c790d986 | 751 | if (!edge_to_cancel) |
752 | edge_to_cancel = loop_edge_to_cancel (loop); | |
753 | ||
bb445479 | 754 | if (n_unroll) |
755 | { | |
604f7b8a | 756 | if (ul == UL_SINGLE_ITER) |
bb445479 | 757 | return false; |
758 | ||
2a09b28c | 759 | if (loop->unroll) |
84eb345f | 760 | { |
2a09b28c | 761 | /* If the unrolling factor is too large, bail out. */ |
762 | if (n_unroll > (unsigned)loop->unroll) | |
763 | { | |
764 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
765 | fprintf (dump_file, | |
766 | "Not unrolling loop %d: " | |
767 | "user didn't want it unrolled completely.\n", | |
768 | loop->num); | |
769 | return false; | |
770 | } | |
84eb345f | 771 | } |
2a09b28c | 772 | else |
d88fd237 | 773 | { |
2a09b28c | 774 | struct loop_size size; |
775 | /* EXIT can be removed only if we are sure it passes first N_UNROLL | |
776 | iterations. */ | |
777 | bool remove_exit = (exit && niter | |
778 | && TREE_CODE (niter) == INTEGER_CST | |
779 | && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
780 | bool large | |
781 | = tree_estimate_loop_size | |
782 | (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, | |
783 | PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); | |
784 | if (large) | |
785 | { | |
786 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
787 | fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
788 | loop->num); | |
789 | return false; | |
790 | } | |
d88fd237 | 791 | |
2a09b28c | 792 | unsigned HOST_WIDE_INT ninsns = size.overall; |
793 | unsigned HOST_WIDE_INT unr_insns | |
794 | = estimated_unrolled_size (&size, n_unroll); | |
d583c979 | 795 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2a09b28c | 796 | { |
797 | fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
798 | fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
799 | (int) unr_insns); | |
800 | } | |
801 | ||
802 | /* If the code is going to shrink, we don't need to be extra | |
803 | cautious on guessing if the unrolling is going to be | |
804 | profitable. */ | |
805 | if (unr_insns | |
806 | /* If there is IV variable that will become constant, we | |
807 | save one instruction in the loop prologue we do not | |
808 | account otherwise. */ | |
809 | <= ninsns + (size.constant_iv != false)) | |
810 | ; | |
811 | /* We unroll only inner loops, because we do not consider it | |
812 | profitable otheriwse. We still can cancel loopback edge | |
813 | of not rolling loop; this is always a good idea. */ | |
814 | else if (ul == UL_NO_GROWTH) | |
815 | { | |
816 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
817 | fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
818 | loop->num); | |
819 | return false; | |
820 | } | |
821 | /* Outer loops tend to be less interesting candidates for | |
822 | complete unrolling unless we can do a lot of propagation | |
823 | into the inner loop body. For now we disable outer loop | |
824 | unrolling when the code would grow. */ | |
825 | else if (loop->inner) | |
826 | { | |
827 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
828 | fprintf (dump_file, "Not unrolling loop %d: " | |
829 | "it is not innermost and code would grow.\n", | |
830 | loop->num); | |
831 | return false; | |
832 | } | |
833 | /* If there is call on a hot path through the loop, then | |
834 | there is most probably not much to optimize. */ | |
835 | else if (size.num_non_pure_calls_on_hot_path) | |
836 | { | |
837 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
838 | fprintf (dump_file, "Not unrolling loop %d: " | |
839 | "contains call and code would grow.\n", | |
840 | loop->num); | |
841 | return false; | |
842 | } | |
843 | /* If there is pure/const call in the function, then we can | |
844 | still optimize the unrolled loop body if it contains some | |
845 | other interesting code than the calls and code storing or | |
846 | cumulating the return value. */ | |
847 | else if (size.num_pure_calls_on_hot_path | |
848 | /* One IV increment, one test, one ivtmp store and | |
849 | one useful stmt. That is about minimal loop | |
850 | doing pure call. */ | |
851 | && (size.non_call_stmts_on_hot_path | |
852 | <= 3 + size.num_pure_calls_on_hot_path)) | |
853 | { | |
854 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
855 | fprintf (dump_file, "Not unrolling loop %d: " | |
856 | "contains just pure calls and code would grow.\n", | |
857 | loop->num); | |
858 | return false; | |
859 | } | |
860 | /* Complete unrolling is major win when control flow is | |
861 | removed and one big basic block is created. If the loop | |
862 | contains control flow the optimization may still be a win | |
863 | because of eliminating the loop overhead but it also may | |
864 | blow the branch predictor tables. Limit number of | |
865 | branches on the hot path through the peeled sequence. */ | |
866 | else if (size.num_branches_on_hot_path * (int)n_unroll | |
867 | > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
868 | { | |
869 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
870 | fprintf (dump_file, "Not unrolling loop %d: " | |
871 | "number of branches on hot path in the unrolled " | |
872 | "sequence reaches --param max-peel-branches limit.\n", | |
873 | loop->num); | |
874 | return false; | |
875 | } | |
876 | else if (unr_insns | |
877 | > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
878 | { | |
879 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
880 | fprintf (dump_file, "Not unrolling loop %d: " | |
881 | "number of insns in the unrolled sequence reaches " | |
882 | "--param max-completely-peeled-insns limit.\n", | |
883 | loop->num); | |
884 | return false; | |
885 | } | |
604f7b8a | 886 | } |
fb54ef7c | 887 | |
01020a5f | 888 | initialize_original_copy_tables (); |
3c6549f8 | 889 | auto_sbitmap wont_exit (n_unroll + 1); |
eedd711b | 890 | if (exit && niter |
891 | && TREE_CODE (niter) == INTEGER_CST | |
892 | && wi::leu_p (n_unroll, wi::to_widest (niter))) | |
893 | { | |
894 | bitmap_ones (wont_exit); | |
895 | if (wi::eq_p (wi::to_widest (niter), n_unroll) | |
896 | || edge_to_cancel) | |
897 | bitmap_clear_bit (wont_exit, 0); | |
898 | } | |
899 | else | |
900 | { | |
901 | exit = NULL; | |
902 | bitmap_clear (wont_exit); | |
903 | } | |
1d183080 | 904 | if (may_be_zero) |
905 | bitmap_clear_bit (wont_exit, 1); | |
fb54ef7c | 906 | |
75a70cf9 | 907 | if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
908 | n_unroll, wont_exit, | |
0cfe7a23 | 909 | exit, &edges_to_remove, |
75a70cf9 | 910 | DLTHE_FLAG_UPDATE_FREQ |
911 | | DLTHE_FLAG_COMPLETTE_PEEL)) | |
bb445479 | 912 | { |
01020a5f | 913 | free_original_copy_tables (); |
d583c979 | 914 | if (dump_file && (dump_flags & TDF_DETAILS)) |
915 | fprintf (dump_file, "Failed to duplicate the loop\n"); | |
bb445479 | 916 | return false; |
917 | } | |
40ffaada | 918 | |
01020a5f | 919 | free_original_copy_tables (); |
bb445479 | 920 | } |
bb445479 | 921 | |
c790d986 | 922 | /* Remove the conditional from the last copy of the loop. */ |
923 | if (edge_to_cancel) | |
924 | { | |
1a91d914 | 925 | gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); |
eedd711b | 926 | force_edge_cold (edge_to_cancel, true); |
c790d986 | 927 | if (edge_to_cancel->flags & EDGE_TRUE_VALUE) |
928 | gimple_cond_make_false (cond); | |
929 | else | |
930 | gimple_cond_make_true (cond); | |
931 | update_stmt (cond); | |
2a09b28c | 932 | /* Do not remove the path, as doing so may remove outer loop and |
933 | confuse bookkeeping code in tree_unroll_loops_completely. */ | |
c790d986 | 934 | } |
c790d986 | 935 | |
72276d01 | 936 | /* Store the loop for later unlooping and exit removal. */ |
f1f41a6c | 937 | loops_to_unloop.safe_push (loop); |
938 | loops_to_unloop_nunroll.safe_push (n_unroll); | |
095dcfa3 | 939 | |
f55775aa | 940 | if (dump_enabled_p ()) |
c790d986 | 941 | { |
942 | if (!n_unroll) | |
f55775aa | 943 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, |
6ee2edad | 944 | "loop turned into non-loop; it never loops\n"); |
c790d986 | 945 | else |
f55775aa | 946 | { |
947 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
6ee2edad | 948 | "loop with %d iterations completely unrolled", |
2a09b28c | 949 | (int) n_unroll); |
db9cef39 | 950 | if (loop->header->count.initialized_p ()) |
f55775aa | 951 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, |
952 | " (header execution count %d)", | |
db9cef39 | 953 | (int)loop->header->count.to_gcov_type ()); |
f55775aa | 954 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); |
955 | } | |
956 | } | |
957 | ||
958 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
959 | { | |
c790d986 | 960 | if (exit) |
961 | fprintf (dump_file, "Exit condition of peeled iterations was " | |
962 | "eliminated.\n"); | |
963 | if (edge_to_cancel) | |
964 | fprintf (dump_file, "Last iteration exit edge was proved true.\n"); | |
965 | else | |
966 | fprintf (dump_file, "Latch of last iteration was marked by " | |
967 | "__builtin_unreachable ().\n"); | |
968 | } | |
bb445479 | 969 | |
970 | return true; | |
971 | } | |
972 | ||
c836de3f | 973 | /* Return number of instructions after peeling. */ |
974 | static unsigned HOST_WIDE_INT | |
975 | estimated_peeled_sequence_size (struct loop_size *size, | |
976 | unsigned HOST_WIDE_INT npeel) | |
977 | { | |
978 | return MAX (npeel * (HOST_WIDE_INT) (size->overall | |
979 | - size->eliminated_by_peeling), 1); | |
980 | } | |
981 | ||
982 | /* If the loop is expected to iterate N times and is | |
983 | small enough, duplicate the loop body N+1 times before | |
984 | the loop itself. This way the hot path will never | |
985 | enter the loop. | |
986 | Parameters are the same as for try_unroll_loops_completely */ | |
987 | ||
988 | static bool | |
989 | try_peel_loop (struct loop *loop, | |
1d183080 | 990 | edge exit, tree niter, bool may_be_zero, |
c836de3f | 991 | HOST_WIDE_INT maxiter) |
992 | { | |
39ab2939 | 993 | HOST_WIDE_INT npeel; |
c836de3f | 994 | struct loop_size size; |
995 | int peeled_size; | |
c836de3f | 996 | |
2a09b28c | 997 | if (!flag_peel_loops |
998 | || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 | |
b96f8145 | 999 | || !peeled_loops) |
c836de3f | 1000 | return false; |
1001 | ||
b96f8145 | 1002 | if (bitmap_bit_p (peeled_loops, loop->num)) |
1003 | { | |
1004 | if (dump_file) | |
1005 | fprintf (dump_file, "Not peeling: loop is already peeled\n"); | |
1006 | return false; | |
1007 | } | |
1008 | ||
2a09b28c | 1009 | /* We don't peel loops that will be unrolled as this can duplicate a |
1010 | loop more times than the user requested. */ | |
1011 | if (loop->unroll) | |
1012 | { | |
1013 | if (dump_file) | |
1014 | fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); | |
1015 | return false; | |
1016 | } | |
1017 | ||
3ccfbed6 | 1018 | /* Peel only innermost loops. |
1019 | While the code is perfectly capable of peeling non-innermost loops, | |
1020 | the heuristics would probably need some improvements. */ | |
c836de3f | 1021 | if (loop->inner) |
1022 | { | |
1023 | if (dump_file) | |
2a09b28c | 1024 | fprintf (dump_file, "Not peeling: outer loop\n"); |
c836de3f | 1025 | return false; |
1026 | } | |
1027 | ||
1028 | if (!optimize_loop_for_speed_p (loop)) | |
1029 | { | |
1030 | if (dump_file) | |
2a09b28c | 1031 | fprintf (dump_file, "Not peeling: cold loop\n"); |
c836de3f | 1032 | return false; |
1033 | } | |
1034 | ||
1035 | /* Check if there is an estimate on the number of iterations. */ | |
1036 | npeel = estimated_loop_iterations_int (loop); | |
b96f8145 | 1037 | if (npeel < 0) |
1038 | npeel = likely_max_loop_iterations_int (loop); | |
c836de3f | 1039 | if (npeel < 0) |
1040 | { | |
1041 | if (dump_file) | |
1042 | fprintf (dump_file, "Not peeling: number of iterations is not " | |
1043 | "estimated\n"); | |
1044 | return false; | |
1045 | } | |
1046 | if (maxiter >= 0 && maxiter <= npeel) | |
1047 | { | |
1048 | if (dump_file) | |
2a09b28c | 1049 | fprintf (dump_file, "Not peeling: upper bound is known so can " |
4cf494ec | 1050 | "unroll completely\n"); |
c836de3f | 1051 | return false; |
1052 | } | |
1053 | ||
1054 | /* We want to peel estimated number of iterations + 1 (so we never | |
1055 | enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES | |
1056 | and be sure to avoid overflows. */ | |
1057 | if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) | |
1058 | { | |
1059 | if (dump_file) | |
2a09b28c | 1060 | fprintf (dump_file, "Not peeling: rolls too much " |
39ab2939 | 1061 | "(%i + 1 > --param max-peel-times)\n", (int) npeel); |
c836de3f | 1062 | return false; |
1063 | } | |
1064 | npeel++; | |
1065 | ||
1066 | /* Check peeled loops size. */ | |
1067 | tree_estimate_loop_size (loop, exit, NULL, &size, | |
1068 | PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); | |
39ab2939 | 1069 | if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) |
c836de3f | 1070 | > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) |
1071 | { | |
1072 | if (dump_file) | |
2a09b28c | 1073 | fprintf (dump_file, "Not peeling: peeled sequence size is too large " |
c836de3f | 1074 | "(%i insns > --param max-peel-insns)", peeled_size); |
1075 | return false; | |
1076 | } | |
1077 | ||
1078 | /* Duplicate possibly eliminating the exits. */ | |
1079 | initialize_original_copy_tables (); | |
3c6549f8 | 1080 | auto_sbitmap wont_exit (npeel + 1); |
3ccfbed6 | 1081 | if (exit && niter |
1082 | && TREE_CODE (niter) == INTEGER_CST | |
1083 | && wi::leu_p (npeel, wi::to_widest (niter))) | |
1084 | { | |
1085 | bitmap_ones (wont_exit); | |
b96f8145 | 1086 | bitmap_clear_bit (wont_exit, 0); |
3ccfbed6 | 1087 | } |
1088 | else | |
1089 | { | |
1090 | exit = NULL; | |
1091 | bitmap_clear (wont_exit); | |
1092 | } | |
1d183080 | 1093 | if (may_be_zero) |
1094 | bitmap_clear_bit (wont_exit, 1); | |
c836de3f | 1095 | if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1096 | npeel, wont_exit, | |
0cfe7a23 | 1097 | exit, &edges_to_remove, |
3ccfbed6 | 1098 | DLTHE_FLAG_UPDATE_FREQ)) |
c836de3f | 1099 | { |
1100 | free_original_copy_tables (); | |
c836de3f | 1101 | return false; |
1102 | } | |
c836de3f | 1103 | free_original_copy_tables (); |
1104 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1105 | { | |
1106 | fprintf (dump_file, "Peeled loop %d, %i times.\n", | |
39ab2939 | 1107 | loop->num, (int) npeel); |
c836de3f | 1108 | } |
3ccfbed6 | 1109 | if (loop->any_estimate) |
1110 | { | |
1111 | if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) | |
1112 | loop->nb_iterations_estimate -= npeel; | |
1113 | else | |
1114 | loop->nb_iterations_estimate = 0; | |
1115 | } | |
c836de3f | 1116 | if (loop->any_upper_bound) |
3ccfbed6 | 1117 | { |
b96f8145 | 1118 | if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) |
3ccfbed6 | 1119 | loop->nb_iterations_upper_bound -= npeel; |
1120 | else | |
1121 | loop->nb_iterations_upper_bound = 0; | |
1122 | } | |
8e3ffe30 | 1123 | if (loop->any_likely_upper_bound) |
3ccfbed6 | 1124 | { |
b96f8145 | 1125 | if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) |
3ccfbed6 | 1126 | loop->nb_iterations_likely_upper_bound -= npeel; |
1127 | else | |
1128 | { | |
1129 | loop->any_estimate = true; | |
1130 | loop->nb_iterations_estimate = 0; | |
1131 | loop->nb_iterations_likely_upper_bound = 0; | |
1132 | } | |
1133 | } | |
db9cef39 | 1134 | profile_count entry_count = profile_count::zero (); |
3ccfbed6 | 1135 | |
0cfe7a23 | 1136 | edge e; |
3ccfbed6 | 1137 | edge_iterator ei; |
1138 | FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1139 | if (e->src != loop->latch) | |
1140 | { | |
db9cef39 | 1141 | if (e->src->count.initialized_p ()) |
913b81c4 | 1142 | entry_count += e->src->count; |
3ccfbed6 | 1143 | gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); |
1144 | } | |
913b81c4 | 1145 | profile_probability p; |
205ce1aa | 1146 | p = entry_count.probability_in (loop->header->count); |
ca69b069 | 1147 | scale_loop_profile (loop, p, 0); |
b96f8145 | 1148 | bitmap_set_bit (peeled_loops, loop->num); |
c836de3f | 1149 | return true; |
1150 | } | |
7194de72 | 1151 | /* Adds a canonical induction variable to LOOP if suitable. |
48e1416a | 1152 | CREATE_IV is true if we may create a new iv. UL determines |
604f7b8a | 1153 | which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
48e1416a | 1154 | to determine the number of iterations of a loop by direct evaluation. |
72276d01 | 1155 | Returns true if cfg is changed. */ |
bb445479 | 1156 | |
1157 | static bool | |
7194de72 | 1158 | canonicalize_loop_induction_variables (struct loop *loop, |
604f7b8a | 1159 | bool create_iv, enum unroll_level ul, |
866fc6a0 | 1160 | bool try_eval, bool allow_peel) |
bb445479 | 1161 | { |
1162 | edge exit = NULL; | |
1163 | tree niter; | |
72276d01 | 1164 | HOST_WIDE_INT maxiter; |
1165 | bool modified = false; | |
c309657f | 1166 | dump_user_location_t locus; |
1d183080 | 1167 | struct tree_niter_desc niter_desc; |
1168 | bool may_be_zero = false; | |
bb445479 | 1169 | |
1d183080 | 1170 | /* For unrolling allow conditional constant or zero iterations, thus |
1171 | perform loop-header copying on-the-fly. */ | |
f55775aa | 1172 | exit = single_exit (loop); |
1d183080 | 1173 | niter = chrec_dont_know; |
1174 | if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) | |
1175 | { | |
1176 | niter = niter_desc.niter; | |
1177 | may_be_zero | |
1178 | = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); | |
1179 | } | |
bb445479 | 1180 | if (TREE_CODE (niter) == INTEGER_CST) |
c309657f | 1181 | locus = last_stmt (exit->src); |
b091dc59 | 1182 | else |
1183 | { | |
1d183080 | 1184 | /* For non-constant niter fold may_be_zero into niter again. */ |
1185 | if (may_be_zero) | |
1186 | { | |
1187 | if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1188 | niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), | |
1189 | niter_desc.may_be_zero, | |
1190 | build_int_cst (TREE_TYPE (niter), 0), niter); | |
1191 | else | |
1192 | niter = chrec_dont_know; | |
1193 | may_be_zero = false; | |
1194 | } | |
1195 | ||
b091dc59 | 1196 | /* If the loop has more than one exit, try checking all of them |
1197 | for # of iterations determinable through scev. */ | |
f55775aa | 1198 | if (!exit) |
b091dc59 | 1199 | niter = find_loop_niter (loop, &exit); |
1200 | ||
1201 | /* Finally if everything else fails, try brute force evaluation. */ | |
1202 | if (try_eval | |
1203 | && (chrec_contains_undetermined (niter) | |
1204 | || TREE_CODE (niter) != INTEGER_CST)) | |
1205 | niter = find_loop_niter_by_eval (loop, &exit); | |
1206 | ||
f55775aa | 1207 | if (exit) |
c309657f | 1208 | locus = last_stmt (exit->src); |
f55775aa | 1209 | |
c790d986 | 1210 | if (TREE_CODE (niter) != INTEGER_CST) |
1211 | exit = NULL; | |
b091dc59 | 1212 | } |
bb445479 | 1213 | |
c790d986 | 1214 | /* We work exceptionally hard here to estimate the bound |
1215 | by find_loop_niter_by_eval. Be sure to keep it for future. */ | |
1216 | if (niter && TREE_CODE (niter) == INTEGER_CST) | |
57337fec | 1217 | { |
5de9d3ed | 1218 | record_niter_bound (loop, wi::to_widest (niter), |
57337fec | 1219 | exit == single_likely_exit (loop), true); |
1220 | } | |
c790d986 | 1221 | |
72276d01 | 1222 | /* Force re-computation of loop bounds so we can remove redundant exits. */ |
1223 | maxiter = max_loop_iterations_int (loop); | |
1224 | ||
c790d986 | 1225 | if (dump_file && (dump_flags & TDF_DETAILS) |
1226 | && TREE_CODE (niter) == INTEGER_CST) | |
bb445479 | 1227 | { |
1228 | fprintf (dump_file, "Loop %d iterates ", loop->num); | |
1229 | print_generic_expr (dump_file, niter, TDF_SLIM); | |
1230 | fprintf (dump_file, " times.\n"); | |
1231 | } | |
c790d986 | 1232 | if (dump_file && (dump_flags & TDF_DETAILS) |
72276d01 | 1233 | && maxiter >= 0) |
c790d986 | 1234 | { |
1235 | fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, | |
72276d01 | 1236 | (int)maxiter); |
c790d986 | 1237 | } |
8e3ffe30 | 1238 | if (dump_file && (dump_flags & TDF_DETAILS) |
1239 | && likely_max_loop_iterations_int (loop) >= 0) | |
1240 | { | |
eedd711b | 1241 | fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", |
1242 | loop->num, (int)likely_max_loop_iterations_int (loop)); | |
8e3ffe30 | 1243 | } |
bb445479 | 1244 | |
72276d01 | 1245 | /* Remove exits that are known to be never taken based on loop bound. |
1246 | Needs to be called after compilation of max_loop_iterations_int that | |
1247 | populates the loop bounds. */ | |
1248 | modified |= remove_redundant_iv_tests (loop); | |
1249 | ||
1d183080 | 1250 | if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, |
1251 | maxiter, locus, allow_peel)) | |
bb445479 | 1252 | return true; |
1253 | ||
c790d986 | 1254 | if (create_iv |
57337fec | 1255 | && niter && !chrec_contains_undetermined (niter) |
1256 | && exit && just_once_each_iteration_p (loop, exit->src)) | |
1d183080 | 1257 | { |
1258 | tree iv_niter = niter; | |
1259 | if (may_be_zero) | |
1260 | { | |
1261 | if (COMPARISON_CLASS_P (niter_desc.may_be_zero)) | |
1262 | iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter), | |
1263 | niter_desc.may_be_zero, | |
1264 | build_int_cst (TREE_TYPE (iv_niter), 0), | |
1265 | iv_niter); | |
1266 | else | |
1267 | iv_niter = NULL_TREE; | |
1268 | } | |
1269 | if (iv_niter) | |
1270 | create_canonical_iv (loop, exit, iv_niter); | |
1271 | } | |
bb445479 | 1272 | |
c836de3f | 1273 | if (ul == UL_ALL) |
1d183080 | 1274 | modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter); |
c836de3f | 1275 | |
72276d01 | 1276 | return modified; |
bb445479 | 1277 | } |
1278 | ||
1279 | /* The main entry point of the pass. Adds canonical induction variables | |
7194de72 | 1280 | to the suitable loops. */ |
bb445479 | 1281 | |
4c641bf8 | 1282 | unsigned int |
7194de72 | 1283 | canonicalize_induction_variables (void) |
bb445479 | 1284 | { |
bb445479 | 1285 | struct loop *loop; |
053fdd99 | 1286 | bool changed = false; |
c790d986 | 1287 | bool irred_invalidated = false; |
9f0ac045 | 1288 | bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); |
48e1416a | 1289 | |
46480a95 | 1290 | estimate_numbers_of_iterations (cfun); |
72276d01 | 1291 | |
f21d4d00 | 1292 | FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
bb445479 | 1293 | { |
17519ba0 | 1294 | changed |= canonicalize_loop_induction_variables (loop, |
1295 | true, UL_SINGLE_ITER, | |
866fc6a0 | 1296 | true, false); |
bb445479 | 1297 | } |
ea1c5c31 | 1298 | gcc_assert (!need_ssa_update_p (cfun)); |
bb445479 | 1299 | |
72276d01 | 1300 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
c790d986 | 1301 | if (irred_invalidated |
1302 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1303 | mark_irreducible_loops (); | |
1304 | ||
08162157 | 1305 | /* Clean up the information about numbers of iterations, since brute force |
1306 | evaluation could reveal new information. */ | |
866da453 | 1307 | free_numbers_of_iterations_estimates (cfun); |
08162157 | 1308 | scev_reset (); |
1309 | ||
9f0ac045 | 1310 | if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1311 | { | |
1312 | gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); | |
1313 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1314 | } | |
1315 | BITMAP_FREE (loop_closed_ssa_invalidated); | |
1316 | ||
bb445479 | 1317 | if (changed) |
4c641bf8 | 1318 | return TODO_cleanup_cfg; |
1319 | return 0; | |
bb445479 | 1320 | } |
1321 | ||
042301ef | 1322 | /* Process loops from innermost to outer, stopping at the innermost |
1323 | loop we unrolled. */ | |
1324 | ||
1325 | static bool | |
1326 | tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | |
0cfe7a23 | 1327 | bitmap father_bbs, struct loop *loop) |
042301ef | 1328 | { |
1329 | struct loop *loop_father; | |
1330 | bool changed = false; | |
1331 | struct loop *inner; | |
1332 | enum unroll_level ul; | |
b5d7c99e | 1333 | unsigned num = number_of_loops (cfun); |
042301ef | 1334 | |
b5d7c99e | 1335 | /* Process inner loops first. Don't walk loops added by the recursive |
1336 | calls because SSA form is not up-to-date. They can be handled in the | |
1337 | next iteration. */ | |
bfb7f32b | 1338 | bitmap child_father_bbs = NULL; |
042301ef | 1339 | for (inner = loop->inner; inner != NULL; inner = inner->next) |
b5d7c99e | 1340 | if ((unsigned) inner->num < num) |
bfb7f32b | 1341 | { |
1342 | if (!child_father_bbs) | |
1343 | child_father_bbs = BITMAP_ALLOC (NULL); | |
1344 | if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, | |
1345 | child_father_bbs, inner)) | |
1346 | { | |
1347 | bitmap_ior_into (father_bbs, child_father_bbs); | |
1348 | bitmap_clear (child_father_bbs); | |
1349 | changed = true; | |
1350 | } | |
1351 | } | |
1352 | if (child_father_bbs) | |
1353 | BITMAP_FREE (child_father_bbs); | |
b5d7c99e | 1354 | |
042301ef | 1355 | /* If we changed an inner loop we cannot process outer loops in this |
1356 | iteration because SSA form is not up-to-date. Continue with | |
1357 | siblings of outer loops instead. */ | |
1358 | if (changed) | |
bfb7f32b | 1359 | { |
1360 | /* If we are recorded as father clear all other fathers that | |
1361 | are necessarily covered already to avoid redundant work. */ | |
1362 | if (bitmap_bit_p (father_bbs, loop->header->index)) | |
1363 | { | |
1364 | bitmap_clear (father_bbs); | |
1365 | bitmap_set_bit (father_bbs, loop->header->index); | |
1366 | } | |
1367 | return true; | |
1368 | } | |
042301ef | 1369 | |
3d483a94 | 1370 | /* Don't unroll #pragma omp simd loops until the vectorizer |
1371 | attempts to vectorize those. */ | |
4c73695b | 1372 | if (loop->force_vectorize) |
3d483a94 | 1373 | return false; |
1374 | ||
042301ef | 1375 | /* Try to unroll this loop. */ |
1376 | loop_father = loop_outer (loop); | |
1377 | if (!loop_father) | |
1378 | return false; | |
1379 | ||
2a09b28c | 1380 | if (loop->unroll > 1) |
1381 | ul = UL_ALL; | |
1382 | else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
042301ef | 1383 | /* Unroll outermost loops only if asked to do so or they do |
1384 | not cause code growth. */ | |
1385 | && (unroll_outer || loop_outer (loop_father))) | |
1386 | ul = UL_ALL; | |
1387 | else | |
1388 | ul = UL_NO_GROWTH; | |
1389 | ||
1390 | if (canonicalize_loop_induction_variables | |
866fc6a0 | 1391 | (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer)) |
042301ef | 1392 | { |
1393 | /* If we'll continue unrolling, we need to propagate constants | |
1394 | within the new basic blocks to fold away induction variable | |
1395 | computations; otherwise, the size might blow up before the | |
1396 | iteration is complete and the IR eventually cleaned up. */ | |
0cfe7a23 | 1397 | if (loop_outer (loop_father)) |
bfb7f32b | 1398 | { |
1399 | /* Once we process our father we will have processed | |
1400 | the fathers of our children as well, so avoid doing | |
1401 | redundant work and clear fathers we've gathered sofar. */ | |
1402 | bitmap_clear (father_bbs); | |
1403 | bitmap_set_bit (father_bbs, loop_father->header->index); | |
1404 | } | |
042301ef | 1405 | |
1406 | return true; | |
1407 | } | |
1408 | ||
1409 | return false; | |
1410 | } | |
1411 | ||
604f7b8a | 1412 | /* Unroll LOOPS completely if they iterate just few times. Unless |
1413 | MAY_INCREASE_SIZE is true, perform the unrolling only if the | |
1414 | size of the code does not increase. */ | |
bb445479 | 1415 | |
2a09b28c | 1416 | static unsigned int |
d88fd237 | 1417 | tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
bb445479 | 1418 | { |
0cfe7a23 | 1419 | bitmap father_bbs = BITMAP_ALLOC (NULL); |
d88fd237 | 1420 | bool changed; |
793a0ab5 | 1421 | int iteration = 0; |
9f0ac045 | 1422 | bool irred_invalidated = false; |
bb445479 | 1423 | |
46480a95 | 1424 | estimate_numbers_of_iterations (cfun); |
1425 | ||
d88fd237 | 1426 | do |
bb445479 | 1427 | { |
d88fd237 | 1428 | changed = false; |
9f0ac045 | 1429 | bitmap loop_closed_ssa_invalidated = NULL; |
1430 | ||
1431 | if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) | |
1432 | loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
bb445479 | 1433 | |
d4f078b5 | 1434 | free_numbers_of_iterations_estimates (cfun); |
46480a95 | 1435 | estimate_numbers_of_iterations (cfun); |
72276d01 | 1436 | |
042301ef | 1437 | changed = tree_unroll_loops_completely_1 (may_increase_size, |
0cfe7a23 | 1438 | unroll_outer, father_bbs, |
042301ef | 1439 | current_loops->tree_root); |
d88fd237 | 1440 | if (changed) |
1441 | { | |
2ebfc881 | 1442 | unsigned i; |
1443 | ||
72276d01 | 1444 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
c790d986 | 1445 | |
f4d3c071 | 1446 | /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */ |
9f0ac045 | 1447 | if (loop_closed_ssa_invalidated |
1448 | && !bitmap_empty_p (loop_closed_ssa_invalidated)) | |
1449 | rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, | |
1450 | TODO_update_ssa); | |
1451 | else | |
1452 | update_ssa (TODO_update_ssa); | |
ea1c5c31 | 1453 | |
0cfe7a23 | 1454 | /* father_bbs is a bitmap of loop father header BB indices. |
1455 | Translate that to what non-root loops these BBs belong to now. */ | |
1456 | bitmap_iterator bi; | |
1457 | bitmap fathers = BITMAP_ALLOC (NULL); | |
1458 | EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi) | |
1459 | { | |
1460 | basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1461 | if (! unrolled_loop_bb) | |
1462 | continue; | |
1463 | if (loop_outer (unrolled_loop_bb->loop_father)) | |
1464 | bitmap_set_bit (fathers, | |
1465 | unrolled_loop_bb->loop_father->num); | |
1466 | } | |
1467 | bitmap_clear (father_bbs); | |
2ebfc881 | 1468 | /* Propagate the constants within the new basic blocks. */ |
0cfe7a23 | 1469 | EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) |
1470 | { | |
1471 | loop_p father = get_loop (cfun, i); | |
51e85e64 | 1472 | bitmap exit_bbs = BITMAP_ALLOC (NULL); |
1473 | loop_exit *exit = father->exits->next; | |
1474 | while (exit->e) | |
1475 | { | |
1476 | bitmap_set_bit (exit_bbs, exit->e->dest->index); | |
1477 | exit = exit->next; | |
1478 | } | |
1479 | do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs); | |
0cfe7a23 | 1480 | } |
1481 | BITMAP_FREE (fathers); | |
2ebfc881 | 1482 | |
d88fd237 | 1483 | /* This will take care of removing completely unrolled loops |
1484 | from the loop structures so we can continue unrolling now | |
1485 | innermost loops. */ | |
b2a225ba | 1486 | if (cleanup_tree_cfg ()) |
1487 | update_ssa (TODO_update_ssa_only_virtuals); | |
d88fd237 | 1488 | |
1489 | /* Clean up the information about numbers of iterations, since | |
1490 | complete unrolling might have invalidated it. */ | |
1491 | scev_reset (); | |
382ecba7 | 1492 | if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
9f0ac045 | 1493 | verify_loop_closed_ssa (true); |
d88fd237 | 1494 | } |
9f0ac045 | 1495 | if (loop_closed_ssa_invalidated) |
1496 | BITMAP_FREE (loop_closed_ssa_invalidated); | |
d88fd237 | 1497 | } |
793a0ab5 | 1498 | while (changed |
1499 | && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); | |
08162157 | 1500 | |
0cfe7a23 | 1501 | BITMAP_FREE (father_bbs); |
2ebfc881 | 1502 | |
9f0ac045 | 1503 | if (irred_invalidated |
1504 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1505 | mark_irreducible_loops (); | |
1506 | ||
4c641bf8 | 1507 | return 0; |
bb445479 | 1508 | } |
f86b328b | 1509 | |
1510 | /* Canonical induction variable creation pass. */ | |
1511 | ||
f86b328b | 1512 | namespace { |
1513 | ||
1514 | const pass_data pass_data_iv_canon = | |
1515 | { | |
1516 | GIMPLE_PASS, /* type */ | |
1517 | "ivcanon", /* name */ | |
1518 | OPTGROUP_LOOP, /* optinfo_flags */ | |
f86b328b | 1519 | TV_TREE_LOOP_IVCANON, /* tv_id */ |
1520 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1521 | 0, /* properties_provided */ | |
1522 | 0, /* properties_destroyed */ | |
1523 | 0, /* todo_flags_start */ | |
1524 | 0, /* todo_flags_finish */ | |
1525 | }; | |
1526 | ||
1527 | class pass_iv_canon : public gimple_opt_pass | |
1528 | { | |
1529 | public: | |
1530 | pass_iv_canon (gcc::context *ctxt) | |
1531 | : gimple_opt_pass (pass_data_iv_canon, ctxt) | |
1532 | {} | |
1533 | ||
1534 | /* opt_pass methods: */ | |
31315c24 | 1535 | virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; } |
65b0537f | 1536 | virtual unsigned int execute (function *fun); |
f86b328b | 1537 | |
1538 | }; // class pass_iv_canon | |
1539 | ||
65b0537f | 1540 | unsigned int |
1541 | pass_iv_canon::execute (function *fun) | |
1542 | { | |
1543 | if (number_of_loops (fun) <= 1) | |
1544 | return 0; | |
1545 | ||
1546 | return canonicalize_induction_variables (); | |
1547 | } | |
1548 | ||
f86b328b | 1549 | } // anon namespace |
1550 | ||
1551 | gimple_opt_pass * | |
1552 | make_pass_iv_canon (gcc::context *ctxt) | |
1553 | { | |
1554 | return new pass_iv_canon (ctxt); | |
1555 | } | |
1556 | ||
1557 | /* Complete unrolling of loops. */ | |
1558 | ||
f86b328b | 1559 | namespace { |
1560 | ||
1561 | const pass_data pass_data_complete_unroll = | |
1562 | { | |
1563 | GIMPLE_PASS, /* type */ | |
1564 | "cunroll", /* name */ | |
1565 | OPTGROUP_LOOP, /* optinfo_flags */ | |
f86b328b | 1566 | TV_COMPLETE_UNROLL, /* tv_id */ |
1567 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1568 | 0, /* properties_provided */ | |
1569 | 0, /* properties_destroyed */ | |
1570 | 0, /* todo_flags_start */ | |
1571 | 0, /* todo_flags_finish */ | |
1572 | }; | |
1573 | ||
1574 | class pass_complete_unroll : public gimple_opt_pass | |
1575 | { | |
1576 | public: | |
1577 | pass_complete_unroll (gcc::context *ctxt) | |
1578 | : gimple_opt_pass (pass_data_complete_unroll, ctxt) | |
1579 | {} | |
1580 | ||
1581 | /* opt_pass methods: */ | |
65b0537f | 1582 | virtual unsigned int execute (function *); |
f86b328b | 1583 | |
1584 | }; // class pass_complete_unroll | |
1585 | ||
65b0537f | 1586 | unsigned int |
1587 | pass_complete_unroll::execute (function *fun) | |
1588 | { | |
1589 | if (number_of_loops (fun) <= 1) | |
1590 | return 0; | |
1591 | ||
b96f8145 | 1592 | /* If we ever decide to run loop peeling more than once, we will need to |
1593 | track loops already peeled in loop structures themselves to avoid | |
1594 | re-peeling the same loop multiple times. */ | |
1595 | if (flag_peel_loops) | |
1596 | peeled_loops = BITMAP_ALLOC (NULL); | |
2a09b28c | 1597 | unsigned int val = tree_unroll_loops_completely (flag_unroll_loops |
1598 | || flag_peel_loops | |
1599 | || optimize >= 3, true); | |
b96f8145 | 1600 | if (peeled_loops) |
1601 | { | |
1602 | BITMAP_FREE (peeled_loops); | |
1603 | peeled_loops = NULL; | |
1604 | } | |
1605 | return val; | |
65b0537f | 1606 | } |
1607 | ||
f86b328b | 1608 | } // anon namespace |
1609 | ||
1610 | gimple_opt_pass * | |
1611 | make_pass_complete_unroll (gcc::context *ctxt) | |
1612 | { | |
1613 | return new pass_complete_unroll (ctxt); | |
1614 | } | |
1615 | ||
1616 | /* Complete unrolling of inner loops. */ | |
1617 | ||
f86b328b | 1618 | namespace { |
1619 | ||
1620 | const pass_data pass_data_complete_unrolli = | |
1621 | { | |
1622 | GIMPLE_PASS, /* type */ | |
1623 | "cunrolli", /* name */ | |
1624 | OPTGROUP_LOOP, /* optinfo_flags */ | |
f86b328b | 1625 | TV_COMPLETE_UNROLL, /* tv_id */ |
1626 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1627 | 0, /* properties_provided */ | |
1628 | 0, /* properties_destroyed */ | |
1629 | 0, /* todo_flags_start */ | |
8b88439e | 1630 | 0, /* todo_flags_finish */ |
f86b328b | 1631 | }; |
1632 | ||
1633 | class pass_complete_unrolli : public gimple_opt_pass | |
1634 | { | |
1635 | public: | |
1636 | pass_complete_unrolli (gcc::context *ctxt) | |
1637 | : gimple_opt_pass (pass_data_complete_unrolli, ctxt) | |
1638 | {} | |
1639 | ||
1640 | /* opt_pass methods: */ | |
31315c24 | 1641 | virtual bool gate (function *) { return optimize >= 2; } |
65b0537f | 1642 | virtual unsigned int execute (function *); |
f86b328b | 1643 | |
1644 | }; // class pass_complete_unrolli | |
1645 | ||
65b0537f | 1646 | unsigned int |
1647 | pass_complete_unrolli::execute (function *fun) | |
1648 | { | |
1649 | unsigned ret = 0; | |
1650 | ||
2a09b28c | 1651 | loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
65b0537f | 1652 | if (number_of_loops (fun) > 1) |
1653 | { | |
1654 | scev_initialize (); | |
1655 | ret = tree_unroll_loops_completely (optimize >= 3, false); | |
65b0537f | 1656 | scev_finalize (); |
1657 | } | |
1658 | loop_optimizer_finalize (); | |
1659 | ||
1660 | return ret; | |
1661 | } | |
1662 | ||
f86b328b | 1663 | } // anon namespace |
1664 | ||
1665 | gimple_opt_pass * | |
1666 | make_pass_complete_unrolli (gcc::context *ctxt) | |
1667 | { | |
1668 | return new pass_complete_unrolli (ctxt); | |
1669 | } | |
1670 | ||
1671 |