]>
Commit | Line | Data |
---|---|---|
0588ac84 | 1 | /* Induction variable canonicalization and loop peeling. |
cbe34bb5 | 2 | Copyright (C) 2004-2017 Free Software Foundation, Inc. |
b8698a0f | 3 | |
82b85a85 | 4 | This file is part of GCC. |
b8698a0f | 5 | |
82b85a85 ZD |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 8 | Free Software Foundation; either version 3, or (at your option) any |
82b85a85 | 9 | later version. |
b8698a0f | 10 | |
82b85a85 ZD |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
b8698a0f | 15 | |
82b85a85 | 16 | You should have received a copy of the GNU General Public License |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
82b85a85 ZD |
19 | |
20 | /* This pass detects the loops that iterate a constant number of times, | |
b8698a0f | 21 | adds a canonical induction variable (step -1, tested against 0) |
82b85a85 ZD |
22 | and replaces the exit test. This enables the less powerful rtl |
23 | level analysis to use this information. | |
24 | ||
25 | This might spoil the code in some cases (by increasing register pressure). | |
26 | Note that in the case the new variable is not needed, ivopts will get rid | |
27 | of it, so it might only be a problem when there are no other linear induction | |
28 | variables. In that case the created optimization possibilities are likely | |
29 | to pay up. | |
30 | ||
f8934be7 | 31 | We also perform |
1c5fd343 | 32 | - complete unrolling (or peeling) when the loops is rolling few enough |
f8934be7 JH |
33 | times |
34 | - simple peeling (i.e. copying few initial iterations prior the loop) | |
35 | when number of iteration estimate is known (typically by the profile | |
36 | info). */ | |
82b85a85 ZD |
37 | |
38 | #include "config.h" | |
39 | #include "system.h" | |
40 | #include "coretypes.h" | |
c7131fb2 | 41 | #include "backend.h" |
82b85a85 | 42 | #include "tree.h" |
c7131fb2 | 43 | #include "gimple.h" |
957060b5 AM |
44 | #include "cfghooks.h" |
45 | #include "tree-pass.h" | |
c7131fb2 | 46 | #include "ssa.h" |
957060b5 AM |
47 | #include "cgraph.h" |
48 | #include "gimple-pretty-print.h" | |
40e23961 | 49 | #include "fold-const.h" |
59f2e9d8 | 50 | #include "profile.h" |
2fb9a547 AM |
51 | #include "gimple-fold.h" |
52 | #include "tree-eh.h" | |
5be5c238 | 53 | #include "gimple-iterator.h" |
442b4905 | 54 | #include "tree-cfg.h" |
e28030cf AM |
55 | #include "tree-ssa-loop-manip.h" |
56 | #include "tree-ssa-loop-niter.h" | |
442b4905 AM |
57 | #include "tree-ssa-loop.h" |
58 | #include "tree-into-ssa.h" | |
82b85a85 | 59 | #include "cfgloop.h" |
82b85a85 ZD |
60 | #include "tree-chrec.h" |
61 | #include "tree-scalar-evolution.h" | |
62 | #include "params.h" | |
82b85a85 | 63 | #include "tree-inline.h" |
4484a35a | 64 | #include "tree-cfgcleanup.h" |
9b2b7279 | 65 | #include "builtins.h" |
82b85a85 | 66 | |
91a01f21 ZD |
67 | /* Specifies types of loops that may be unrolled. */ |
68 | ||
69 | enum unroll_level | |
70 | { | |
bb22512c | 71 | UL_SINGLE_ITER, /* Only loops that exit immediately in the first |
91a01f21 ZD |
72 | iteration. */ |
73 | UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase | |
74 | of code size. */ | |
75 | UL_ALL /* All suitable loops. */ | |
76 | }; | |
77 | ||
82b85a85 ZD |
78 | /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT |
79 | is the exit edge whose condition is replaced. */ | |
80 | ||
81 | static void | |
82 | create_canonical_iv (struct loop *loop, edge exit, tree niter) | |
83 | { | |
84 | edge in; | |
726a989a | 85 | tree type, var; |
538dd0b7 | 86 | gcond *cond; |
726a989a | 87 | gimple_stmt_iterator incr_at; |
82b85a85 ZD |
88 | enum tree_code cmp; |
89 | ||
90 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
91 | { | |
92 | fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num); | |
93 | print_generic_expr (dump_file, niter, TDF_SLIM); | |
94 | fprintf (dump_file, " iterations.\n"); | |
95 | } | |
96 | ||
538dd0b7 | 97 | cond = as_a <gcond *> (last_stmt (exit->src)); |
628f6a4e | 98 | in = EDGE_SUCC (exit->src, 0); |
82b85a85 | 99 | if (in == exit) |
628f6a4e | 100 | in = EDGE_SUCC (exit->src, 1); |
82b85a85 ZD |
101 | |
102 | /* Note that we do not need to worry about overflows, since | |
103 | type of niter is always unsigned and all comparisons are | |
104 | just for equality/nonequality -- i.e. everything works | |
105 | with a modulo arithmetics. */ | |
106 | ||
107 | type = TREE_TYPE (niter); | |
987b67bc KH |
108 | niter = fold_build2 (PLUS_EXPR, type, |
109 | niter, | |
110 | build_int_cst (type, 1)); | |
726a989a | 111 | incr_at = gsi_last_bb (in->src); |
82b85a85 | 112 | create_iv (niter, |
57decb7e | 113 | build_int_cst (type, -1), |
82b85a85 ZD |
114 | NULL_TREE, loop, |
115 | &incr_at, false, NULL, &var); | |
116 | ||
117 | cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; | |
726a989a RB |
118 | gimple_cond_set_code (cond, cmp); |
119 | gimple_cond_set_lhs (cond, var); | |
120 | gimple_cond_set_rhs (cond, build_int_cst (type, 0)); | |
f430bae8 | 121 | update_stmt (cond); |
82b85a85 ZD |
122 | } |
123 | ||
08f1af2e JH |
124 | /* Describe size of loop as detected by tree_estimate_loop_size. */ |
125 | struct loop_size | |
126 | { | |
127 | /* Number of instructions in the loop. */ | |
128 | int overall; | |
129 | ||
130 | /* Number of instructions that will be likely optimized out in | |
131 | peeled iterations of loop (i.e. computation based on induction | |
132 | variable where induction variable starts at known constant.) */ | |
133 | int eliminated_by_peeling; | |
134 | ||
135 | /* Same statistics for last iteration of loop: it is smaller because | |
136 | instructions after exit are not executed. */ | |
137 | int last_iteration; | |
138 | int last_iteration_eliminated_by_peeling; | |
519cac4a JH |
139 | |
140 | /* If some IV computation will become constant. */ | |
141 | bool constant_iv; | |
142 | ||
143 | /* Number of call stmts that are not a builtin and are pure or const | |
144 | present on the hot path. */ | |
145 | int num_pure_calls_on_hot_path; | |
146 | /* Number of call stmts that are not a builtin and are not pure nor const | |
147 | present on the hot path. */ | |
148 | int num_non_pure_calls_on_hot_path; | |
149 | /* Number of statements other than calls in the loop. */ | |
150 | int non_call_stmts_on_hot_path; | |
151 | /* Number of branches seen on the hot path. */ | |
152 | int num_branches_on_hot_path; | |
08f1af2e JH |
153 | }; |
154 | ||
155 | /* Return true if OP in STMT will be constant after peeling LOOP. */ | |
156 | ||
157 | static bool | |
355fe088 | 158 | constant_after_peeling (tree op, gimple *stmt, struct loop *loop) |
08f1af2e | 159 | { |
08f1af2e JH |
160 | if (is_gimple_min_invariant (op)) |
161 | return true; | |
b8698a0f | 162 | |
08f1af2e JH |
163 | /* We can still fold accesses to constant arrays when index is known. */ |
164 | if (TREE_CODE (op) != SSA_NAME) | |
165 | { | |
166 | tree base = op; | |
167 | ||
168 | /* First make fast look if we see constant array inside. */ | |
169 | while (handled_component_p (base)) | |
170 | base = TREE_OPERAND (base, 0); | |
a093356a | 171 | if ((DECL_P (base) |
6a6dac52 | 172 | && ctor_for_folding (base) != error_mark_node) |
08f1af2e JH |
173 | || CONSTANT_CLASS_P (base)) |
174 | { | |
175 | /* If so, see if we understand all the indices. */ | |
176 | base = op; | |
177 | while (handled_component_p (base)) | |
178 | { | |
179 | if (TREE_CODE (base) == ARRAY_REF | |
180 | && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) | |
181 | return false; | |
182 | base = TREE_OPERAND (base, 0); | |
183 | } | |
184 | return true; | |
185 | } | |
186 | return false; | |
187 | } | |
188 | ||
7fd1a747 RB |
189 | /* Induction variables are constants when defined in loop. */ |
190 | if (loop_containing_stmt (stmt) != loop) | |
08f1af2e | 191 | return false; |
7fd1a747 RB |
192 | tree ev = analyze_scalar_evolution (loop, op); |
193 | if (chrec_contains_undetermined (ev) | |
194 | || chrec_contains_symbols (ev)) | |
08f1af2e JH |
195 | return false; |
196 | return true; | |
197 | } | |
198 | ||
519cac4a JH |
199 | /* Computes an estimated number of insns in LOOP. |
200 | EXIT (if non-NULL) is an exite edge that will be eliminated in all but last | |
201 | iteration of the loop. | |
202 | EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration | |
203 | of loop. | |
0588ac84 | 204 | Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. |
9d676bf6 | 205 | Stop estimating after UPPER_BOUND is met. Return true in this case. */ |
08f1af2e | 206 | |
0588ac84 | 207 | static bool |
ce120587 JH |
208 | tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, |
209 | struct loop_size *size, int upper_bound) | |
08f1af2e JH |
210 | { |
211 | basic_block *body = get_loop_body (loop); | |
212 | gimple_stmt_iterator gsi; | |
213 | unsigned int i; | |
214 | bool after_exit; | |
9771b263 | 215 | vec<basic_block> path = get_loop_hot_path (loop); |
08f1af2e JH |
216 | |
217 | size->overall = 0; | |
218 | size->eliminated_by_peeling = 0; | |
219 | size->last_iteration = 0; | |
220 | size->last_iteration_eliminated_by_peeling = 0; | |
519cac4a JH |
221 | size->num_pure_calls_on_hot_path = 0; |
222 | size->num_non_pure_calls_on_hot_path = 0; | |
223 | size->non_call_stmts_on_hot_path = 0; | |
224 | size->num_branches_on_hot_path = 0; | |
225 | size->constant_iv = 0; | |
08f1af2e JH |
226 | |
227 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
228 | fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); | |
229 | for (i = 0; i < loop->num_nodes; i++) | |
230 | { | |
b7442c2f JH |
231 | if (edge_to_cancel && body[i] != edge_to_cancel->src |
232 | && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) | |
08f1af2e JH |
233 | after_exit = true; |
234 | else | |
235 | after_exit = false; | |
236 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
ce120587 JH |
237 | fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, |
238 | after_exit); | |
08f1af2e JH |
239 | |
240 | for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
241 | { | |
355fe088 | 242 | gimple *stmt = gsi_stmt (gsi); |
08f1af2e JH |
243 | int num = estimate_num_insns (stmt, &eni_size_weights); |
244 | bool likely_eliminated = false; | |
519cac4a JH |
245 | bool likely_eliminated_last = false; |
246 | bool likely_eliminated_peeled = false; | |
08f1af2e JH |
247 | |
248 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
249 | { | |
250 | fprintf (dump_file, " size: %3i ", num); | |
ef6cb4c7 | 251 | print_gimple_stmt (dump_file, gsi_stmt (gsi), 0); |
08f1af2e JH |
252 | } |
253 | ||
254 | /* Look for reasons why we might optimize this stmt away. */ | |
255 | ||
1dc8d15b | 256 | if (!gimple_has_side_effects (stmt)) |
08f1af2e | 257 | { |
1dc8d15b JH |
258 | /* Exit conditional. */ |
259 | if (exit && body[i] == exit->src | |
260 | && stmt == last_stmt (exit->src)) | |
261 | { | |
262 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
263 | fprintf (dump_file, " Exit condition will be eliminated " | |
264 | "in peeled copies.\n"); | |
265 | likely_eliminated_peeled = true; | |
266 | } | |
267 | if (edge_to_cancel && body[i] == edge_to_cancel->src | |
268 | && stmt == last_stmt (edge_to_cancel->src)) | |
269 | { | |
270 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
271 | fprintf (dump_file, " Exit condition will be eliminated " | |
272 | "in last copy.\n"); | |
273 | likely_eliminated_last = true; | |
274 | } | |
275 | /* Sets of IV variables */ | |
276 | if (gimple_code (stmt) == GIMPLE_ASSIGN | |
277 | && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) | |
278 | { | |
279 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
280 | fprintf (dump_file, " Induction variable computation will" | |
281 | " be folded away.\n"); | |
282 | likely_eliminated = true; | |
283 | } | |
284 | /* Assignments of IV variables. */ | |
285 | else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
286 | && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | |
287 | && constant_after_peeling (gimple_assign_rhs1 (stmt), | |
56bbfaf6 | 288 | stmt, loop) |
1dc8d15b JH |
289 | && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS |
290 | || constant_after_peeling (gimple_assign_rhs2 (stmt), | |
291 | stmt, loop))) | |
292 | { | |
293 | size->constant_iv = true; | |
294 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
295 | fprintf (dump_file, | |
296 | " Constant expression will be folded away.\n"); | |
297 | likely_eliminated = true; | |
298 | } | |
299 | /* Conditionals. */ | |
300 | else if ((gimple_code (stmt) == GIMPLE_COND | |
301 | && constant_after_peeling (gimple_cond_lhs (stmt), stmt, | |
302 | loop) | |
303 | && constant_after_peeling (gimple_cond_rhs (stmt), stmt, | |
304 | loop) | |
305 | /* We don't simplify all constant compares so make sure | |
306 | they are not both constant already. See PR70288. */ | |
307 | && (! is_gimple_min_invariant (gimple_cond_lhs (stmt)) | |
308 | || ! is_gimple_min_invariant | |
309 | (gimple_cond_rhs (stmt)))) | |
310 | || (gimple_code (stmt) == GIMPLE_SWITCH | |
311 | && constant_after_peeling (gimple_switch_index ( | |
312 | as_a <gswitch *> | |
313 | (stmt)), | |
314 | stmt, loop) | |
315 | && ! is_gimple_min_invariant | |
316 | (gimple_switch_index | |
317 | (as_a <gswitch *> (stmt))))) | |
318 | { | |
319 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
320 | fprintf (dump_file, " Constant conditional.\n"); | |
321 | likely_eliminated = true; | |
322 | } | |
08f1af2e JH |
323 | } |
324 | ||
325 | size->overall += num; | |
519cac4a | 326 | if (likely_eliminated || likely_eliminated_peeled) |
08f1af2e JH |
327 | size->eliminated_by_peeling += num; |
328 | if (!after_exit) | |
329 | { | |
330 | size->last_iteration += num; | |
519cac4a | 331 | if (likely_eliminated || likely_eliminated_last) |
08f1af2e JH |
332 | size->last_iteration_eliminated_by_peeling += num; |
333 | } | |
0588ac84 JH |
334 | if ((size->overall * 3 / 2 - size->eliminated_by_peeling |
335 | - size->last_iteration_eliminated_by_peeling) > upper_bound) | |
336 | { | |
337 | free (body); | |
9d676bf6 | 338 | path.release (); |
0588ac84 JH |
339 | return true; |
340 | } | |
08f1af2e JH |
341 | } |
342 | } | |
9771b263 | 343 | while (path.length ()) |
519cac4a | 344 | { |
9771b263 | 345 | basic_block bb = path.pop (); |
519cac4a JH |
346 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
347 | { | |
355fe088 | 348 | gimple *stmt = gsi_stmt (gsi); |
ce120587 JH |
349 | if (gimple_code (stmt) == GIMPLE_CALL |
350 | && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))) | |
519cac4a JH |
351 | { |
352 | int flags = gimple_call_flags (stmt); | |
ce120587 | 353 | if (flags & (ECF_PURE | ECF_CONST)) |
519cac4a JH |
354 | size->num_pure_calls_on_hot_path++; |
355 | else | |
356 | size->num_non_pure_calls_on_hot_path++; | |
357 | size->num_branches_on_hot_path ++; | |
358 | } | |
ce120587 JH |
359 | /* Count inexpensive calls as non-calls, because they will likely |
360 | expand inline. */ | |
361 | else if (gimple_code (stmt) != GIMPLE_DEBUG) | |
519cac4a JH |
362 | size->non_call_stmts_on_hot_path++; |
363 | if (((gimple_code (stmt) == GIMPLE_COND | |
364 | && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) | |
ce120587 JH |
365 | || constant_after_peeling (gimple_cond_rhs (stmt), stmt, |
366 | loop))) | |
519cac4a | 367 | || (gimple_code (stmt) == GIMPLE_SWITCH |
538dd0b7 DM |
368 | && !constant_after_peeling (gimple_switch_index ( |
369 | as_a <gswitch *> (stmt)), | |
370 | stmt, loop))) | |
519cac4a JH |
371 | && (!exit || bb != exit->src)) |
372 | size->num_branches_on_hot_path++; | |
373 | } | |
374 | } | |
9771b263 | 375 | path.release (); |
08f1af2e JH |
376 | if (dump_file && (dump_flags & TDF_DETAILS)) |
377 | fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, | |
378 | size->eliminated_by_peeling, size->last_iteration, | |
379 | size->last_iteration_eliminated_by_peeling); | |
b8698a0f | 380 | |
08f1af2e | 381 | free (body); |
0588ac84 | 382 | return false; |
08f1af2e | 383 | } |
91a01f21 | 384 | |
08f1af2e JH |
385 | /* Estimate number of insns of completely unrolled loop. |
386 | It is (NUNROLL + 1) * size of loop body with taking into account | |
387 | the fact that in last copy everything after exit conditional | |
388 | is dead and that some instructions will be eliminated after | |
389 | peeling. | |
91a01f21 | 390 | |
c0d18c6c | 391 | Loop body is likely going to simplify further, this is difficult |
08f1af2e | 392 | to guess, we just decrease the result by 1/3. */ |
91a01f21 ZD |
393 | |
394 | static unsigned HOST_WIDE_INT | |
08f1af2e | 395 | estimated_unrolled_size (struct loop_size *size, |
91a01f21 ZD |
396 | unsigned HOST_WIDE_INT nunroll) |
397 | { | |
08f1af2e JH |
398 | HOST_WIDE_INT unr_insns = ((nunroll) |
399 | * (HOST_WIDE_INT) (size->overall | |
400 | - size->eliminated_by_peeling)); | |
401 | if (!nunroll) | |
402 | unr_insns = 0; | |
403 | unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; | |
404 | ||
405 | unr_insns = unr_insns * 2 / 3; | |
91a01f21 ZD |
406 | if (unr_insns <= 0) |
407 | unr_insns = 1; | |
91a01f21 ZD |
408 | |
409 | return unr_insns; | |
410 | } | |
411 | ||
b7442c2f JH |
412 | /* Loop LOOP is known to not loop. See if there is an edge in the loop |
413 | body that can be remove to make the loop to always exit and at | |
414 | the same time it does not make any code potentially executed | |
415 | during the last iteration dead. | |
416 | ||
1c5fd343 | 417 | After complete unrolling we still may get rid of the conditional |
b7442c2f JH |
418 | on the exit in the last copy even if we have no idea what it does. |
419 | This is quite common case for loops of form | |
420 | ||
421 | int a[5]; | |
422 | for (i=0;i<b;i++) | |
423 | a[i]=0; | |
424 | ||
425 | Here we prove the loop to iterate 5 times but we do not know | |
426 | it from induction variable. | |
427 | ||
428 | For now we handle only simple case where there is exit condition | |
429 | just before the latch block and the latch block contains no statements | |
430 | with side effect that may otherwise terminate the execution of loop | |
431 | (such as by EH or by terminating the program or longjmp). | |
432 | ||
433 | In the general case we may want to cancel the paths leading to statements | |
434 | loop-niter identified as having undefined effect in the last iteration. | |
435 | The other cases are hopefully rare and will be cleaned up later. */ | |
436 | ||
71343877 | 437 | static edge |
b7442c2f JH |
438 | loop_edge_to_cancel (struct loop *loop) |
439 | { | |
9771b263 | 440 | vec<edge> exits; |
b7442c2f JH |
441 | unsigned i; |
442 | edge edge_to_cancel; | |
443 | gimple_stmt_iterator gsi; | |
444 | ||
445 | /* We want only one predecestor of the loop. */ | |
446 | if (EDGE_COUNT (loop->latch->preds) > 1) | |
447 | return NULL; | |
448 | ||
449 | exits = get_loop_exit_edges (loop); | |
450 | ||
9771b263 | 451 | FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) |
b7442c2f JH |
452 | { |
453 | /* Find the other edge than the loop exit | |
454 | leaving the conditoinal. */ | |
455 | if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) | |
456 | continue; | |
457 | if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) | |
458 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); | |
459 | else | |
460 | edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); | |
461 | ||
a093356a JH |
462 | /* We only can handle conditionals. */ |
463 | if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
464 | continue; | |
465 | ||
b7442c2f JH |
466 | /* We should never have conditionals in the loop latch. */ |
467 | gcc_assert (edge_to_cancel->dest != loop->header); | |
468 | ||
469 | /* Check that it leads to loop latch. */ | |
470 | if (edge_to_cancel->dest != loop->latch) | |
471 | continue; | |
472 | ||
9771b263 | 473 | exits.release (); |
b7442c2f JH |
474 | |
475 | /* Verify that the code in loop latch does nothing that may end program | |
476 | execution without really reaching the exit. This may include | |
477 | non-pure/const function calls, EH statements, volatile ASMs etc. */ | |
478 | for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) | |
479 | if (gimple_has_side_effects (gsi_stmt (gsi))) | |
480 | return NULL; | |
481 | return edge_to_cancel; | |
482 | } | |
9771b263 | 483 | exits.release (); |
b7442c2f JH |
484 | return NULL; |
485 | } | |
486 | ||
f2a1b469 JH |
487 | /* Remove all tests for exits that are known to be taken after LOOP was |
488 | peeled NPEELED times. Put gcc_unreachable before every statement | |
489 | known to not be executed. */ | |
490 | ||
491 | static bool | |
492 | remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled) | |
493 | { | |
494 | struct nb_iter_bound *elt; | |
495 | bool changed = false; | |
496 | ||
497 | for (elt = loop->bounds; elt; elt = elt->next) | |
498 | { | |
499 | /* If statement is known to be undefined after peeling, turn it | |
500 | into unreachable (or trap when debugging experience is supposed | |
501 | to be good). */ | |
502 | if (!elt->is_exit | |
807e902e | 503 | && wi::ltu_p (elt->bound, npeeled)) |
f2a1b469 JH |
504 | { |
505 | gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); | |
538dd0b7 | 506 | gcall *stmt = gimple_build_call |
f2a1b469 | 507 | (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); |
f2a1b469 JH |
508 | gimple_set_location (stmt, gimple_location (elt->stmt)); |
509 | gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); | |
ad3e2ba0 | 510 | split_block (gimple_bb (stmt), stmt); |
f2a1b469 JH |
511 | changed = true; |
512 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
513 | { | |
514 | fprintf (dump_file, "Forced statement unreachable: "); | |
ef6cb4c7 | 515 | print_gimple_stmt (dump_file, elt->stmt, 0); |
f2a1b469 JH |
516 | } |
517 | } | |
518 | /* If we know the exit will be taken after peeling, update. */ | |
519 | else if (elt->is_exit | |
807e902e | 520 | && wi::leu_p (elt->bound, npeeled)) |
f2a1b469 JH |
521 | { |
522 | basic_block bb = gimple_bb (elt->stmt); | |
523 | edge exit_edge = EDGE_SUCC (bb, 0); | |
524 | ||
525 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
526 | { | |
527 | fprintf (dump_file, "Forced exit to be taken: "); | |
ef6cb4c7 | 528 | print_gimple_stmt (dump_file, elt->stmt, 0); |
f2a1b469 JH |
529 | } |
530 | if (!loop_exit_edge_p (loop, exit_edge)) | |
531 | exit_edge = EDGE_SUCC (bb, 1); | |
8fb0ae82 JH |
532 | exit_edge->probability = REG_BR_PROB_BASE; |
533 | exit_edge->count = exit_edge->src->count; | |
f2a1b469 | 534 | gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); |
538dd0b7 | 535 | gcond *cond_stmt = as_a <gcond *> (elt->stmt); |
f2a1b469 | 536 | if (exit_edge->flags & EDGE_TRUE_VALUE) |
538dd0b7 | 537 | gimple_cond_make_true (cond_stmt); |
f2a1b469 | 538 | else |
538dd0b7 DM |
539 | gimple_cond_make_false (cond_stmt); |
540 | update_stmt (cond_stmt); | |
f2a1b469 JH |
541 | changed = true; |
542 | } | |
543 | } | |
544 | return changed; | |
545 | } | |
546 | ||
547 | /* Remove all exits that are known to be never taken because of the loop bound | |
548 | discovered. */ | |
549 | ||
550 | static bool | |
551 | remove_redundant_iv_tests (struct loop *loop) | |
552 | { | |
553 | struct nb_iter_bound *elt; | |
554 | bool changed = false; | |
555 | ||
556 | if (!loop->any_upper_bound) | |
557 | return false; | |
558 | for (elt = loop->bounds; elt; elt = elt->next) | |
559 | { | |
560 | /* Exit is pointless if it won't be taken before loop reaches | |
561 | upper bound. */ | |
562 | if (elt->is_exit && loop->any_upper_bound | |
807e902e | 563 | && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) |
f2a1b469 JH |
564 | { |
565 | basic_block bb = gimple_bb (elt->stmt); | |
566 | edge exit_edge = EDGE_SUCC (bb, 0); | |
567 | struct tree_niter_desc niter; | |
568 | ||
569 | if (!loop_exit_edge_p (loop, exit_edge)) | |
570 | exit_edge = EDGE_SUCC (bb, 1); | |
571 | ||
572 | /* Only when we know the actual number of iterations, not | |
573 | just a bound, we can remove the exit. */ | |
574 | if (!number_of_iterations_exit (loop, exit_edge, | |
13ea799b RB |
575 | &niter, false, false) |
576 | || !integer_onep (niter.assumptions) | |
f2a1b469 JH |
577 | || !integer_zerop (niter.may_be_zero) |
578 | || !niter.niter | |
579 | || TREE_CODE (niter.niter) != INTEGER_CST | |
807e902e KZ |
580 | || !wi::ltu_p (loop->nb_iterations_upper_bound, |
581 | wi::to_widest (niter.niter))) | |
f2a1b469 JH |
582 | continue; |
583 | ||
584 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
585 | { | |
586 | fprintf (dump_file, "Removed pointless exit: "); | |
ef6cb4c7 | 587 | print_gimple_stmt (dump_file, elt->stmt, 0); |
f2a1b469 | 588 | } |
538dd0b7 | 589 | gcond *cond_stmt = as_a <gcond *> (elt->stmt); |
f2a1b469 | 590 | if (exit_edge->flags & EDGE_TRUE_VALUE) |
538dd0b7 | 591 | gimple_cond_make_false (cond_stmt); |
f2a1b469 | 592 | else |
538dd0b7 DM |
593 | gimple_cond_make_true (cond_stmt); |
594 | update_stmt (cond_stmt); | |
f2a1b469 JH |
595 | changed = true; |
596 | } | |
597 | } | |
598 | return changed; | |
599 | } | |
600 | ||
b66af6ed RB |
601 | /* Stores loops that will be unlooped and edges that will be removed |
602 | after we process whole loop tree. */ | |
9771b263 DN |
603 | static vec<loop_p> loops_to_unloop; |
604 | static vec<int> loops_to_unloop_nunroll; | |
b66af6ed | 605 | static vec<edge> edges_to_remove; |
a448ff40 JH |
606 | /* Stores loops that has been peeled. */ |
607 | static bitmap peeled_loops; | |
f2a1b469 JH |
608 | |
609 | /* Cancel all fully unrolled loops by putting __builtin_unreachable | |
610 | on the latch edge. | |
611 | We do it after all unrolling since unlooping moves basic blocks | |
612 | across loop boundaries trashing loop closed SSA form as well | |
613 | as SCEV info needed to be intact during unrolling. | |
614 | ||
b7442c2f JH |
615 | IRRED_INVALIDATED is used to bookkeep if information about |
616 | irreducible regions may become invalid as a result | |
1a7de201 JH |
617 | of the transformation. |
618 | LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case | |
619 | when we need to go into loop closed SSA form. */ | |
82b85a85 | 620 | |
71343877 | 621 | static void |
f2a1b469 JH |
622 | unloop_loops (bitmap loop_closed_ssa_invalidated, |
623 | bool *irred_invalidated) | |
624 | { | |
9771b263 | 625 | while (loops_to_unloop.length ()) |
f2a1b469 | 626 | { |
9771b263 DN |
627 | struct loop *loop = loops_to_unloop.pop (); |
628 | int n_unroll = loops_to_unloop_nunroll.pop (); | |
f2a1b469 JH |
629 | basic_block latch = loop->latch; |
630 | edge latch_edge = loop_latch_edge (loop); | |
631 | int flags = latch_edge->flags; | |
632 | location_t locus = latch_edge->goto_locus; | |
538dd0b7 | 633 | gcall *stmt; |
f2a1b469 JH |
634 | gimple_stmt_iterator gsi; |
635 | ||
636 | remove_exits_and_undefined_stmts (loop, n_unroll); | |
637 | ||
638 | /* Unloop destroys the latch edge. */ | |
639 | unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); | |
640 | ||
641 | /* Create new basic block for the latch edge destination and wire | |
642 | it in. */ | |
643 | stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); | |
644 | latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); | |
645 | latch_edge->probability = 0; | |
3995f3a2 | 646 | latch_edge->count = profile_count::zero (); |
f2a1b469 JH |
647 | latch_edge->flags |= flags; |
648 | latch_edge->goto_locus = locus; | |
649 | ||
eb2afa1a | 650 | add_bb_to_loop (latch_edge->dest, current_loops->tree_root); |
3995f3a2 | 651 | latch_edge->dest->count = profile_count::zero (); |
f2a1b469 JH |
652 | latch_edge->dest->frequency = 0; |
653 | set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); | |
654 | ||
655 | gsi = gsi_start_bb (latch_edge->dest); | |
656 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
657 | } | |
9771b263 DN |
658 | loops_to_unloop.release (); |
659 | loops_to_unloop_nunroll.release (); | |
9fcebb5a RB |
660 | |
661 | /* Remove edges in peeled copies. */ | |
662 | unsigned i; | |
663 | edge e; | |
664 | FOR_EACH_VEC_ELT (edges_to_remove, i, e) | |
665 | { | |
eb2afa1a | 666 | bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated); |
9fcebb5a RB |
667 | gcc_assert (ok); |
668 | } | |
669 | edges_to_remove.release (); | |
f2a1b469 JH |
670 | } |
671 | ||
672 | /* Tries to unroll LOOP completely, i.e. NITER times. | |
673 | UL determines which loops we are allowed to unroll. | |
e25a6711 | 674 | EXIT is the exit of the loop that should be eliminated. |
f2a1b469 | 675 | MAXITER specfy bound on number of iterations, -1 if it is |
e25a6711 TJ |
676 | not known or too large for HOST_WIDE_INT. The location |
677 | LOCUS corresponding to the loop is used when emitting | |
678 | a summary of the unroll to the dump file. */ | |
f2a1b469 | 679 | |
82b85a85 | 680 | static bool |
d73be268 | 681 | try_unroll_loop_completely (struct loop *loop, |
82b85a85 | 682 | edge exit, tree niter, |
b7442c2f | 683 | enum unroll_level ul, |
e25a6711 TJ |
684 | HOST_WIDE_INT maxiter, |
685 | location_t locus) | |
82b85a85 | 686 | { |
d03f2c17 | 687 | unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; |
08f1af2e | 688 | struct loop_size size; |
b7442c2f | 689 | bool n_unroll_found = false; |
b7442c2f | 690 | edge edge_to_cancel = NULL; |
8264c84d | 691 | dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; |
82b85a85 | 692 | |
b7442c2f | 693 | /* See if we proved number of iterations to be low constant. |
82b85a85 | 694 | |
b7442c2f JH |
695 | EXIT is an edge that will be removed in all but last iteration of |
696 | the loop. | |
697 | ||
698 | EDGE_TO_CACNEL is an edge that will be removed from the last iteration | |
699 | of the unrolled sequence and is expected to make the final loop not | |
700 | rolling. | |
701 | ||
702 | If the number of execution of loop is determined by standard induction | |
703 | variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving | |
704 | from the iv test. */ | |
cc269bb6 | 705 | if (tree_fits_uhwi_p (niter)) |
b7442c2f | 706 | { |
ae7e9ddd | 707 | n_unroll = tree_to_uhwi (niter); |
b7442c2f JH |
708 | n_unroll_found = true; |
709 | edge_to_cancel = EDGE_SUCC (exit->src, 0); | |
710 | if (edge_to_cancel == exit) | |
711 | edge_to_cancel = EDGE_SUCC (exit->src, 1); | |
712 | } | |
713 | /* We do not know the number of iterations and thus we can not eliminate | |
714 | the EXIT edge. */ | |
715 | else | |
716 | exit = NULL; | |
717 | ||
718 | /* See if we can improve our estimate by using recorded loop bounds. */ | |
b7442c2f JH |
719 | if (maxiter >= 0 |
720 | && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) | |
721 | { | |
722 | n_unroll = maxiter; | |
723 | n_unroll_found = true; | |
724 | /* Loop terminates before the IV variable test, so we can not | |
725 | remove it in the last iteration. */ | |
726 | edge_to_cancel = NULL; | |
727 | } | |
728 | ||
729 | if (!n_unroll_found) | |
82b85a85 | 730 | return false; |
82b85a85 | 731 | |
d03f2c17 EB |
732 | if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) |
733 | { | |
734 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
735 | fprintf (dump_file, "Not unrolling loop %d " | |
f4ca9095 | 736 | "(--param max-completely-peel-times limit reached).\n", |
d03f2c17 EB |
737 | loop->num); |
738 | return false; | |
739 | } | |
82b85a85 | 740 | |
b7442c2f JH |
741 | if (!edge_to_cancel) |
742 | edge_to_cancel = loop_edge_to_cancel (loop); | |
743 | ||
82b85a85 ZD |
744 | if (n_unroll) |
745 | { | |
0588ac84 | 746 | bool large; |
91a01f21 | 747 | if (ul == UL_SINGLE_ITER) |
82b85a85 ZD |
748 | return false; |
749 | ||
98bdbb39 JH |
750 | /* EXIT can be removed only if we are sure it passes first N_UNROLL |
751 | iterations. */ | |
752 | bool remove_exit = (exit && niter | |
753 | && TREE_CODE (niter) == INTEGER_CST | |
754 | && wi::leu_p (n_unroll, wi::to_widest (niter))); | |
755 | ||
0588ac84 | 756 | large = tree_estimate_loop_size |
98bdbb39 | 757 | (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, |
0588ac84 | 758 | PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); |
08f1af2e | 759 | ninsns = size.overall; |
0588ac84 JH |
760 | if (large) |
761 | { | |
762 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
763 | fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", | |
764 | loop->num); | |
765 | return false; | |
766 | } | |
82b85a85 | 767 | |
08f1af2e | 768 | unr_insns = estimated_unrolled_size (&size, n_unroll); |
d6e840ee RG |
769 | if (dump_file && (dump_flags & TDF_DETAILS)) |
770 | { | |
771 | fprintf (dump_file, " Loop size: %d\n", (int) ninsns); | |
772 | fprintf (dump_file, " Estimated size after unrolling: %d\n", | |
773 | (int) unr_insns); | |
774 | } | |
775 | ||
519cac4a JH |
776 | /* If the code is going to shrink, we don't need to be extra cautious |
777 | on guessing if the unrolling is going to be profitable. */ | |
778 | if (unr_insns | |
779 | /* If there is IV variable that will become constant, we save | |
780 | one instruction in the loop prologue we do not account | |
781 | otherwise. */ | |
782 | <= ninsns + (size.constant_iv != false)) | |
783 | ; | |
b7442c2f JH |
784 | /* We unroll only inner loops, because we do not consider it profitable |
785 | otheriwse. We still can cancel loopback edge of not rolling loop; | |
786 | this is always a good idea. */ | |
519cac4a JH |
787 | else if (ul == UL_NO_GROWTH) |
788 | { | |
789 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
790 | fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", | |
791 | loop->num); | |
792 | return false; | |
793 | } | |
1c5fd343 | 794 | /* Outer loops tend to be less interesting candidates for complete |
519cac4a JH |
795 | unrolling unless we can do a lot of propagation into the inner loop |
796 | body. For now we disable outer loop unrolling when the code would | |
797 | grow. */ | |
798 | else if (loop->inner) | |
b7442c2f JH |
799 | { |
800 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
519cac4a | 801 | fprintf (dump_file, "Not unrolling loop %d: " |
b7442c2f JH |
802 | "it is not innermost and code would grow.\n", |
803 | loop->num); | |
804 | return false; | |
805 | } | |
519cac4a JH |
806 | /* If there is call on a hot path through the loop, then |
807 | there is most probably not much to optimize. */ | |
808 | else if (size.num_non_pure_calls_on_hot_path) | |
678e7c65 RG |
809 | { |
810 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
519cac4a JH |
811 | fprintf (dump_file, "Not unrolling loop %d: " |
812 | "contains call and code would grow.\n", | |
678e7c65 RG |
813 | loop->num); |
814 | return false; | |
815 | } | |
519cac4a JH |
816 | /* If there is pure/const call in the function, then we |
817 | can still optimize the unrolled loop body if it contains | |
818 | some other interesting code than the calls and code | |
819 | storing or cumulating the return value. */ | |
820 | else if (size.num_pure_calls_on_hot_path | |
821 | /* One IV increment, one test, one ivtmp store | |
c0d18c6c | 822 | and one useful stmt. That is about minimal loop |
519cac4a JH |
823 | doing pure call. */ |
824 | && (size.non_call_stmts_on_hot_path | |
825 | <= 3 + size.num_pure_calls_on_hot_path)) | |
91a01f21 | 826 | { |
91a01f21 | 827 | if (dump_file && (dump_flags & TDF_DETAILS)) |
519cac4a JH |
828 | fprintf (dump_file, "Not unrolling loop %d: " |
829 | "contains just pure calls and code would grow.\n", | |
830 | loop->num); | |
831 | return false; | |
832 | } | |
0f33a54e | 833 | /* Complete unrolling is a major win when control flow is removed and |
519cac4a JH |
834 | one big basic block is created. If the loop contains control flow |
835 | the optimization may still be a win because of eliminating the loop | |
836 | overhead but it also may blow the branch predictor tables. | |
837 | Limit number of branches on the hot path through the peeled | |
838 | sequence. */ | |
839 | else if (size.num_branches_on_hot_path * (int)n_unroll | |
840 | > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) | |
841 | { | |
842 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
843 | fprintf (dump_file, "Not unrolling loop %d: " | |
844 | " number of branches on hot path in the unrolled sequence" | |
845 | " reach --param max-peel-branches limit.\n", | |
846 | loop->num); | |
847 | return false; | |
848 | } | |
849 | else if (unr_insns | |
850 | > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) | |
851 | { | |
852 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
853 | fprintf (dump_file, "Not unrolling loop %d: " | |
854 | "(--param max-completely-peeled-insns limit reached).\n", | |
b7442c2f | 855 | loop->num); |
d6e840ee | 856 | return false; |
91a01f21 | 857 | } |
8fb0ae82 JH |
858 | if (!n_unroll) |
859 | dump_printf_loc (report_flags, locus, | |
860 | "loop turned into non-loop; it never loops.\n"); | |
178df94f | 861 | |
6580ee77 | 862 | initialize_original_copy_tables (); |
7ba9e72d | 863 | auto_sbitmap wont_exit (n_unroll + 1); |
98bdbb39 JH |
864 | if (exit && niter |
865 | && TREE_CODE (niter) == INTEGER_CST | |
866 | && wi::leu_p (n_unroll, wi::to_widest (niter))) | |
867 | { | |
868 | bitmap_ones (wont_exit); | |
869 | if (wi::eq_p (wi::to_widest (niter), n_unroll) | |
870 | || edge_to_cancel) | |
871 | bitmap_clear_bit (wont_exit, 0); | |
872 | } | |
873 | else | |
874 | { | |
875 | exit = NULL; | |
876 | bitmap_clear (wont_exit); | |
877 | } | |
178df94f | 878 | |
726a989a RB |
879 | if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
880 | n_unroll, wont_exit, | |
b66af6ed | 881 | exit, &edges_to_remove, |
726a989a RB |
882 | DLTHE_FLAG_UPDATE_FREQ |
883 | | DLTHE_FLAG_COMPLETTE_PEEL)) | |
82b85a85 | 884 | { |
6580ee77 | 885 | free_original_copy_tables (); |
519cac4a JH |
886 | if (dump_file && (dump_flags & TDF_DETAILS)) |
887 | fprintf (dump_file, "Failed to duplicate the loop\n"); | |
82b85a85 ZD |
888 | return false; |
889 | } | |
6c74788e | 890 | |
6580ee77 | 891 | free_original_copy_tables (); |
82b85a85 | 892 | } |
82b85a85 | 893 | |
b7442c2f JH |
894 | /* Remove the conditional from the last copy of the loop. */ |
895 | if (edge_to_cancel) | |
896 | { | |
538dd0b7 | 897 | gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); |
98bdbb39 | 898 | force_edge_cold (edge_to_cancel, true); |
b7442c2f JH |
899 | if (edge_to_cancel->flags & EDGE_TRUE_VALUE) |
900 | gimple_cond_make_false (cond); | |
901 | else | |
902 | gimple_cond_make_true (cond); | |
903 | update_stmt (cond); | |
904 | /* Do not remove the path. Doing so may remove outer loop | |
905 | and confuse bookkeeping code in tree_unroll_loops_completelly. */ | |
906 | } | |
b7442c2f | 907 | |
f2a1b469 | 908 | /* Store the loop for later unlooping and exit removal. */ |
9771b263 DN |
909 | loops_to_unloop.safe_push (loop); |
910 | loops_to_unloop_nunroll.safe_push (n_unroll); | |
84d65814 | 911 | |
e25a6711 | 912 | if (dump_enabled_p ()) |
b7442c2f JH |
913 | { |
914 | if (!n_unroll) | |
e25a6711 | 915 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, |
9cc1fb4b | 916 | "loop turned into non-loop; it never loops\n"); |
b7442c2f | 917 | else |
e25a6711 TJ |
918 | { |
919 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, | |
9cc1fb4b XDL |
920 | "loop with %d iterations completely unrolled", |
921 | (int) (n_unroll + 1)); | |
3995f3a2 | 922 | if (loop->header->count.initialized_p ()) |
e25a6711 TJ |
923 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, |
924 | " (header execution count %d)", | |
3995f3a2 | 925 | (int)loop->header->count.to_gcov_type ()); |
e25a6711 TJ |
926 | dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n"); |
927 | } | |
928 | } | |
929 | ||
930 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
931 | { | |
b7442c2f JH |
932 | if (exit) |
933 | fprintf (dump_file, "Exit condition of peeled iterations was " | |
934 | "eliminated.\n"); | |
935 | if (edge_to_cancel) | |
936 | fprintf (dump_file, "Last iteration exit edge was proved true.\n"); | |
937 | else | |
938 | fprintf (dump_file, "Latch of last iteration was marked by " | |
939 | "__builtin_unreachable ().\n"); | |
940 | } | |
82b85a85 ZD |
941 | |
942 | return true; | |
943 | } | |
944 | ||
f8934be7 JH |
945 | /* Return number of instructions after peeling. */ |
946 | static unsigned HOST_WIDE_INT | |
947 | estimated_peeled_sequence_size (struct loop_size *size, | |
948 | unsigned HOST_WIDE_INT npeel) | |
949 | { | |
950 | return MAX (npeel * (HOST_WIDE_INT) (size->overall | |
951 | - size->eliminated_by_peeling), 1); | |
952 | } | |
953 | ||
954 | /* If the loop is expected to iterate N times and is | |
955 | small enough, duplicate the loop body N+1 times before | |
956 | the loop itself. This way the hot path will never | |
957 | enter the loop. | |
958 | Parameters are the same as for try_unroll_loops_completely */ | |
959 | ||
960 | static bool | |
961 | try_peel_loop (struct loop *loop, | |
962 | edge exit, tree niter, | |
963 | HOST_WIDE_INT maxiter) | |
964 | { | |
3c10b1e6 | 965 | HOST_WIDE_INT npeel; |
f8934be7 JH |
966 | struct loop_size size; |
967 | int peeled_size; | |
f8934be7 | 968 | |
a448ff40 JH |
969 | if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 |
970 | || !peeled_loops) | |
f8934be7 JH |
971 | return false; |
972 | ||
a448ff40 JH |
973 | if (bitmap_bit_p (peeled_loops, loop->num)) |
974 | { | |
975 | if (dump_file) | |
976 | fprintf (dump_file, "Not peeling: loop is already peeled\n"); | |
977 | return false; | |
978 | } | |
979 | ||
eb897cfc JH |
980 | /* Peel only innermost loops. |
981 | While the code is perfectly capable of peeling non-innermost loops, | |
982 | the heuristics would probably need some improvements. */ | |
f8934be7 JH |
983 | if (loop->inner) |
984 | { | |
985 | if (dump_file) | |
986 | fprintf (dump_file, "Not peeling: outer loop\n"); | |
987 | return false; | |
988 | } | |
989 | ||
990 | if (!optimize_loop_for_speed_p (loop)) | |
991 | { | |
992 | if (dump_file) | |
993 | fprintf (dump_file, "Not peeling: cold loop\n"); | |
994 | return false; | |
995 | } | |
996 | ||
997 | /* Check if there is an estimate on the number of iterations. */ | |
998 | npeel = estimated_loop_iterations_int (loop); | |
a448ff40 JH |
999 | if (npeel < 0) |
1000 | npeel = likely_max_loop_iterations_int (loop); | |
f8934be7 JH |
1001 | if (npeel < 0) |
1002 | { | |
1003 | if (dump_file) | |
1004 | fprintf (dump_file, "Not peeling: number of iterations is not " | |
1005 | "estimated\n"); | |
1006 | return false; | |
1007 | } | |
1008 | if (maxiter >= 0 && maxiter <= npeel) | |
1009 | { | |
1010 | if (dump_file) | |
1011 | fprintf (dump_file, "Not peeling: upper bound is known so can " | |
1c5fd343 | 1012 | "unroll completely\n"); |
f8934be7 JH |
1013 | return false; |
1014 | } | |
1015 | ||
1016 | /* We want to peel estimated number of iterations + 1 (so we never | |
1017 | enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES | |
1018 | and be sure to avoid overflows. */ | |
1019 | if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) | |
1020 | { | |
1021 | if (dump_file) | |
1022 | fprintf (dump_file, "Not peeling: rolls too much " | |
3c10b1e6 | 1023 | "(%i + 1 > --param max-peel-times)\n", (int) npeel); |
f8934be7 JH |
1024 | return false; |
1025 | } | |
1026 | npeel++; | |
1027 | ||
1028 | /* Check peeled loops size. */ | |
1029 | tree_estimate_loop_size (loop, exit, NULL, &size, | |
1030 | PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); | |
3c10b1e6 | 1031 | if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel)) |
f8934be7 JH |
1032 | > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) |
1033 | { | |
1034 | if (dump_file) | |
1035 | fprintf (dump_file, "Not peeling: peeled sequence size is too large " | |
1036 | "(%i insns > --param max-peel-insns)", peeled_size); | |
1037 | return false; | |
1038 | } | |
1039 | ||
1040 | /* Duplicate possibly eliminating the exits. */ | |
1041 | initialize_original_copy_tables (); | |
7ba9e72d | 1042 | auto_sbitmap wont_exit (npeel + 1); |
eb897cfc JH |
1043 | if (exit && niter |
1044 | && TREE_CODE (niter) == INTEGER_CST | |
1045 | && wi::leu_p (npeel, wi::to_widest (niter))) | |
1046 | { | |
1047 | bitmap_ones (wont_exit); | |
a448ff40 | 1048 | bitmap_clear_bit (wont_exit, 0); |
eb897cfc JH |
1049 | } |
1050 | else | |
1051 | { | |
1052 | exit = NULL; | |
1053 | bitmap_clear (wont_exit); | |
1054 | } | |
f8934be7 JH |
1055 | if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1056 | npeel, wont_exit, | |
b66af6ed | 1057 | exit, &edges_to_remove, |
eb897cfc | 1058 | DLTHE_FLAG_UPDATE_FREQ)) |
f8934be7 JH |
1059 | { |
1060 | free_original_copy_tables (); | |
f8934be7 JH |
1061 | return false; |
1062 | } | |
f8934be7 JH |
1063 | free_original_copy_tables (); |
1064 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1065 | { | |
1066 | fprintf (dump_file, "Peeled loop %d, %i times.\n", | |
3c10b1e6 | 1067 | loop->num, (int) npeel); |
f8934be7 | 1068 | } |
eb897cfc JH |
1069 | if (loop->any_estimate) |
1070 | { | |
1071 | if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) | |
1072 | loop->nb_iterations_estimate -= npeel; | |
1073 | else | |
1074 | loop->nb_iterations_estimate = 0; | |
1075 | } | |
f8934be7 | 1076 | if (loop->any_upper_bound) |
eb897cfc | 1077 | { |
a448ff40 | 1078 | if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) |
eb897cfc JH |
1079 | loop->nb_iterations_upper_bound -= npeel; |
1080 | else | |
1081 | loop->nb_iterations_upper_bound = 0; | |
1082 | } | |
105e29c5 | 1083 | if (loop->any_likely_upper_bound) |
eb897cfc | 1084 | { |
a448ff40 | 1085 | if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) |
eb897cfc JH |
1086 | loop->nb_iterations_likely_upper_bound -= npeel; |
1087 | else | |
1088 | { | |
1089 | loop->any_estimate = true; | |
1090 | loop->nb_iterations_estimate = 0; | |
1091 | loop->nb_iterations_likely_upper_bound = 0; | |
1092 | } | |
1093 | } | |
3995f3a2 | 1094 | profile_count entry_count = profile_count::zero (); |
eb897cfc JH |
1095 | int entry_freq = 0; |
1096 | ||
b66af6ed | 1097 | edge e; |
eb897cfc JH |
1098 | edge_iterator ei; |
1099 | FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1100 | if (e->src != loop->latch) | |
1101 | { | |
3995f3a2 JH |
1102 | if (e->src->count.initialized_p ()) |
1103 | entry_count = e->src->count + e->src->count; | |
eb897cfc JH |
1104 | entry_freq += e->src->frequency; |
1105 | gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); | |
1106 | } | |
1107 | int scale = 1; | |
3995f3a2 JH |
1108 | if (loop->header->count > 0) |
1109 | scale = entry_count.probability_in (loop->header->count); | |
eb897cfc JH |
1110 | else if (loop->header->frequency) |
1111 | scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency); | |
1112 | scale_loop_profile (loop, scale, 0); | |
a448ff40 | 1113 | bitmap_set_bit (peeled_loops, loop->num); |
f8934be7 JH |
1114 | return true; |
1115 | } | |
d73be268 | 1116 | /* Adds a canonical induction variable to LOOP if suitable. |
b8698a0f | 1117 | CREATE_IV is true if we may create a new iv. UL determines |
91a01f21 | 1118 | which loops we are allowed to completely unroll. If TRY_EVAL is true, we try |
b8698a0f | 1119 | to determine the number of iterations of a loop by direct evaluation. |
f2a1b469 | 1120 | Returns true if cfg is changed. */ |
82b85a85 ZD |
1121 | |
1122 | static bool | |
d73be268 | 1123 | canonicalize_loop_induction_variables (struct loop *loop, |
91a01f21 | 1124 | bool create_iv, enum unroll_level ul, |
f2a1b469 | 1125 | bool try_eval) |
82b85a85 ZD |
1126 | { |
1127 | edge exit = NULL; | |
1128 | tree niter; | |
f2a1b469 JH |
1129 | HOST_WIDE_INT maxiter; |
1130 | bool modified = false; | |
e25a6711 | 1131 | location_t locus = UNKNOWN_LOCATION; |
82b85a85 | 1132 | |
a14865db | 1133 | niter = number_of_latch_executions (loop); |
e25a6711 | 1134 | exit = single_exit (loop); |
82b85a85 | 1135 | if (TREE_CODE (niter) == INTEGER_CST) |
e25a6711 | 1136 | locus = gimple_location (last_stmt (exit->src)); |
ca4c3169 ZD |
1137 | else |
1138 | { | |
1139 | /* If the loop has more than one exit, try checking all of them | |
1140 | for # of iterations determinable through scev. */ | |
e25a6711 | 1141 | if (!exit) |
ca4c3169 ZD |
1142 | niter = find_loop_niter (loop, &exit); |
1143 | ||
1144 | /* Finally if everything else fails, try brute force evaluation. */ | |
1145 | if (try_eval | |
1146 | && (chrec_contains_undetermined (niter) | |
1147 | || TREE_CODE (niter) != INTEGER_CST)) | |
1148 | niter = find_loop_niter_by_eval (loop, &exit); | |
1149 | ||
e25a6711 TJ |
1150 | if (exit) |
1151 | locus = gimple_location (last_stmt (exit->src)); | |
1152 | ||
b7442c2f JH |
1153 | if (TREE_CODE (niter) != INTEGER_CST) |
1154 | exit = NULL; | |
ca4c3169 | 1155 | } |
82b85a85 | 1156 | |
b7442c2f JH |
1157 | /* We work exceptionally hard here to estimate the bound |
1158 | by find_loop_niter_by_eval. Be sure to keep it for future. */ | |
1159 | if (niter && TREE_CODE (niter) == INTEGER_CST) | |
0450d718 | 1160 | { |
807e902e | 1161 | record_niter_bound (loop, wi::to_widest (niter), |
0450d718 JH |
1162 | exit == single_likely_exit (loop), true); |
1163 | } | |
b7442c2f | 1164 | |
f2a1b469 JH |
1165 | /* Force re-computation of loop bounds so we can remove redundant exits. */ |
1166 | maxiter = max_loop_iterations_int (loop); | |
1167 | ||
b7442c2f JH |
1168 | if (dump_file && (dump_flags & TDF_DETAILS) |
1169 | && TREE_CODE (niter) == INTEGER_CST) | |
82b85a85 ZD |
1170 | { |
1171 | fprintf (dump_file, "Loop %d iterates ", loop->num); | |
1172 | print_generic_expr (dump_file, niter, TDF_SLIM); | |
1173 | fprintf (dump_file, " times.\n"); | |
1174 | } | |
b7442c2f | 1175 | if (dump_file && (dump_flags & TDF_DETAILS) |
f2a1b469 | 1176 | && maxiter >= 0) |
b7442c2f JH |
1177 | { |
1178 | fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, | |
f2a1b469 | 1179 | (int)maxiter); |
b7442c2f | 1180 | } |
105e29c5 JH |
1181 | if (dump_file && (dump_flags & TDF_DETAILS) |
1182 | && likely_max_loop_iterations_int (loop) >= 0) | |
1183 | { | |
98bdbb39 JH |
1184 | fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", |
1185 | loop->num, (int)likely_max_loop_iterations_int (loop)); | |
105e29c5 | 1186 | } |
82b85a85 | 1187 | |
f2a1b469 JH |
1188 | /* Remove exits that are known to be never taken based on loop bound. |
1189 | Needs to be called after compilation of max_loop_iterations_int that | |
1190 | populates the loop bounds. */ | |
1191 | modified |= remove_redundant_iv_tests (loop); | |
1192 | ||
e25a6711 | 1193 | if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus)) |
82b85a85 ZD |
1194 | return true; |
1195 | ||
b7442c2f | 1196 | if (create_iv |
0450d718 JH |
1197 | && niter && !chrec_contains_undetermined (niter) |
1198 | && exit && just_once_each_iteration_p (loop, exit->src)) | |
82b85a85 ZD |
1199 | create_canonical_iv (loop, exit, niter); |
1200 | ||
f8934be7 JH |
1201 | if (ul == UL_ALL) |
1202 | modified |= try_peel_loop (loop, exit, niter, maxiter); | |
1203 | ||
f2a1b469 | 1204 | return modified; |
82b85a85 ZD |
1205 | } |
1206 | ||
1207 | /* The main entry point of the pass. Adds canonical induction variables | |
d73be268 | 1208 | to the suitable loops. */ |
82b85a85 | 1209 | |
c7f965b6 | 1210 | unsigned int |
d73be268 | 1211 | canonicalize_induction_variables (void) |
82b85a85 | 1212 | { |
82b85a85 | 1213 | struct loop *loop; |
2b271002 | 1214 | bool changed = false; |
b7442c2f | 1215 | bool irred_invalidated = false; |
1a7de201 | 1216 | bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); |
b8698a0f | 1217 | |
adb7eaa2 | 1218 | estimate_numbers_of_iterations (cfun); |
f2a1b469 | 1219 | |
f0bd40b1 | 1220 | FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
82b85a85 | 1221 | { |
42fd6772 ZD |
1222 | changed |= canonicalize_loop_induction_variables (loop, |
1223 | true, UL_SINGLE_ITER, | |
f2a1b469 | 1224 | true); |
82b85a85 | 1225 | } |
18eb76eb | 1226 | gcc_assert (!need_ssa_update_p (cfun)); |
82b85a85 | 1227 | |
f2a1b469 | 1228 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
b7442c2f JH |
1229 | if (irred_invalidated |
1230 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1231 | mark_irreducible_loops (); | |
1232 | ||
47bcd07d ZD |
1233 | /* Clean up the information about numbers of iterations, since brute force |
1234 | evaluation could reveal new information. */ | |
01f1c24e | 1235 | free_numbers_of_iterations_estimates (cfun); |
47bcd07d ZD |
1236 | scev_reset (); |
1237 | ||
1a7de201 JH |
1238 | if (!bitmap_empty_p (loop_closed_ssa_invalidated)) |
1239 | { | |
1240 | gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); | |
1241 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1242 | } | |
1243 | BITMAP_FREE (loop_closed_ssa_invalidated); | |
1244 | ||
82b85a85 | 1245 | if (changed) |
c7f965b6 AP |
1246 | return TODO_cleanup_cfg; |
1247 | return 0; | |
82b85a85 ZD |
1248 | } |
1249 | ||
fea4ea73 EB |
1250 | /* Propagate constant SSA_NAMEs defined in basic block BB. */ |
1251 | ||
1252 | static void | |
1253 | propagate_constants_for_unrolling (basic_block bb) | |
1254 | { | |
fea4ea73 | 1255 | /* Look for degenerate PHI nodes with constant argument. */ |
538dd0b7 | 1256 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); ) |
fea4ea73 | 1257 | { |
538dd0b7 | 1258 | gphi *phi = gsi.phi (); |
fea4ea73 EB |
1259 | tree result = gimple_phi_result (phi); |
1260 | tree arg = gimple_phi_arg_def (phi, 0); | |
1261 | ||
ef318184 RB |
1262 | if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result) |
1263 | && gimple_phi_num_args (phi) == 1 | |
7fd1a747 | 1264 | && CONSTANT_CLASS_P (arg)) |
fea4ea73 | 1265 | { |
57548bf5 | 1266 | replace_uses_by (result, arg); |
fea4ea73 EB |
1267 | gsi_remove (&gsi, true); |
1268 | release_ssa_name (result); | |
1269 | } | |
1270 | else | |
1271 | gsi_next (&gsi); | |
1272 | } | |
1273 | ||
1274 | /* Look for assignments to SSA names with constant RHS. */ | |
538dd0b7 | 1275 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) |
fea4ea73 | 1276 | { |
355fe088 | 1277 | gimple *stmt = gsi_stmt (gsi); |
fea4ea73 EB |
1278 | tree lhs; |
1279 | ||
1280 | if (is_gimple_assign (stmt) | |
7fd1a747 | 1281 | && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant |
fea4ea73 | 1282 | && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME) |
9ff09a22 | 1283 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) |
fea4ea73 | 1284 | { |
57548bf5 | 1285 | replace_uses_by (lhs, gimple_assign_rhs1 (stmt)); |
fea4ea73 EB |
1286 | gsi_remove (&gsi, true); |
1287 | release_ssa_name (lhs); | |
1288 | } | |
1289 | else | |
1290 | gsi_next (&gsi); | |
1291 | } | |
1292 | } | |
1293 | ||
1b2253d4 RB |
1294 | /* Process loops from innermost to outer, stopping at the innermost |
1295 | loop we unrolled. */ | |
1296 | ||
1297 | static bool | |
1298 | tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, | |
b66af6ed | 1299 | bitmap father_bbs, struct loop *loop) |
1b2253d4 RB |
1300 | { |
1301 | struct loop *loop_father; | |
1302 | bool changed = false; | |
1303 | struct loop *inner; | |
1304 | enum unroll_level ul; | |
1305 | ||
1306 | /* Process inner loops first. */ | |
1307 | for (inner = loop->inner; inner != NULL; inner = inner->next) | |
1308 | changed |= tree_unroll_loops_completely_1 (may_increase_size, | |
b66af6ed | 1309 | unroll_outer, father_bbs, |
1b2253d4 RB |
1310 | inner); |
1311 | ||
1312 | /* If we changed an inner loop we cannot process outer loops in this | |
1313 | iteration because SSA form is not up-to-date. Continue with | |
1314 | siblings of outer loops instead. */ | |
1315 | if (changed) | |
1316 | return true; | |
1317 | ||
74bf76ed JJ |
1318 | /* Don't unroll #pragma omp simd loops until the vectorizer |
1319 | attempts to vectorize those. */ | |
b15b5979 | 1320 | if (loop->force_vectorize) |
74bf76ed JJ |
1321 | return false; |
1322 | ||
1b2253d4 RB |
1323 | /* Try to unroll this loop. */ |
1324 | loop_father = loop_outer (loop); | |
1325 | if (!loop_father) | |
1326 | return false; | |
1327 | ||
1328 | if (may_increase_size && optimize_loop_nest_for_speed_p (loop) | |
1329 | /* Unroll outermost loops only if asked to do so or they do | |
1330 | not cause code growth. */ | |
1331 | && (unroll_outer || loop_outer (loop_father))) | |
1332 | ul = UL_ALL; | |
1333 | else | |
1334 | ul = UL_NO_GROWTH; | |
1335 | ||
1336 | if (canonicalize_loop_induction_variables | |
1337 | (loop, false, ul, !flag_tree_loop_ivcanon)) | |
1338 | { | |
1339 | /* If we'll continue unrolling, we need to propagate constants | |
1340 | within the new basic blocks to fold away induction variable | |
1341 | computations; otherwise, the size might blow up before the | |
1342 | iteration is complete and the IR eventually cleaned up. */ | |
b66af6ed RB |
1343 | if (loop_outer (loop_father)) |
1344 | bitmap_set_bit (father_bbs, loop_father->header->index); | |
1b2253d4 RB |
1345 | |
1346 | return true; | |
1347 | } | |
1348 | ||
1349 | return false; | |
1350 | } | |
1351 | ||
91a01f21 ZD |
1352 | /* Unroll LOOPS completely if they iterate just few times. Unless |
1353 | MAY_INCREASE_SIZE is true, perform the unrolling only if the | |
1354 | size of the code does not increase. */ | |
82b85a85 | 1355 | |
c7f965b6 | 1356 | unsigned int |
d6e840ee | 1357 | tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) |
82b85a85 | 1358 | { |
b66af6ed | 1359 | bitmap father_bbs = BITMAP_ALLOC (NULL); |
d6e840ee | 1360 | bool changed; |
30bc1dca | 1361 | int iteration = 0; |
1a7de201 | 1362 | bool irred_invalidated = false; |
82b85a85 | 1363 | |
adb7eaa2 RB |
1364 | estimate_numbers_of_iterations (cfun); |
1365 | ||
d6e840ee | 1366 | do |
82b85a85 | 1367 | { |
d6e840ee | 1368 | changed = false; |
1a7de201 JH |
1369 | bitmap loop_closed_ssa_invalidated = NULL; |
1370 | ||
1371 | if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) | |
1372 | loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); | |
82b85a85 | 1373 | |
61183076 | 1374 | free_numbers_of_iterations_estimates (cfun); |
adb7eaa2 | 1375 | estimate_numbers_of_iterations (cfun); |
f2a1b469 | 1376 | |
1b2253d4 | 1377 | changed = tree_unroll_loops_completely_1 (may_increase_size, |
b66af6ed | 1378 | unroll_outer, father_bbs, |
1b2253d4 | 1379 | current_loops->tree_root); |
d6e840ee RG |
1380 | if (changed) |
1381 | { | |
fea4ea73 EB |
1382 | unsigned i; |
1383 | ||
f2a1b469 | 1384 | unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated); |
b7442c2f | 1385 | |
f2a1b469 | 1386 | /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */ |
1a7de201 JH |
1387 | if (loop_closed_ssa_invalidated |
1388 | && !bitmap_empty_p (loop_closed_ssa_invalidated)) | |
1389 | rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, | |
1390 | TODO_update_ssa); | |
1391 | else | |
1392 | update_ssa (TODO_update_ssa); | |
18eb76eb | 1393 | |
b66af6ed RB |
1394 | /* father_bbs is a bitmap of loop father header BB indices. |
1395 | Translate that to what non-root loops these BBs belong to now. */ | |
1396 | bitmap_iterator bi; | |
1397 | bitmap fathers = BITMAP_ALLOC (NULL); | |
1398 | EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi) | |
1399 | { | |
1400 | basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i); | |
1401 | if (! unrolled_loop_bb) | |
1402 | continue; | |
1403 | if (loop_outer (unrolled_loop_bb->loop_father)) | |
1404 | bitmap_set_bit (fathers, | |
1405 | unrolled_loop_bb->loop_father->num); | |
1406 | } | |
1407 | bitmap_clear (father_bbs); | |
fea4ea73 | 1408 | /* Propagate the constants within the new basic blocks. */ |
b66af6ed RB |
1409 | EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi) |
1410 | { | |
1411 | loop_p father = get_loop (cfun, i); | |
1412 | basic_block *body = get_loop_body_in_dom_order (father); | |
1413 | for (unsigned j = 0; j < father->num_nodes; j++) | |
1414 | propagate_constants_for_unrolling (body[j]); | |
1415 | free (body); | |
1416 | } | |
1417 | BITMAP_FREE (fathers); | |
fea4ea73 | 1418 | |
d6e840ee RG |
1419 | /* This will take care of removing completely unrolled loops |
1420 | from the loop structures so we can continue unrolling now | |
1421 | innermost loops. */ | |
ace4eb90 RG |
1422 | if (cleanup_tree_cfg ()) |
1423 | update_ssa (TODO_update_ssa_only_virtuals); | |
d6e840ee RG |
1424 | |
1425 | /* Clean up the information about numbers of iterations, since | |
1426 | complete unrolling might have invalidated it. */ | |
1427 | scev_reset (); | |
b2b29377 | 1428 | if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA)) |
1a7de201 | 1429 | verify_loop_closed_ssa (true); |
d6e840ee | 1430 | } |
1a7de201 JH |
1431 | if (loop_closed_ssa_invalidated) |
1432 | BITMAP_FREE (loop_closed_ssa_invalidated); | |
d6e840ee | 1433 | } |
30bc1dca RG |
1434 | while (changed |
1435 | && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); | |
47bcd07d | 1436 | |
b66af6ed | 1437 | BITMAP_FREE (father_bbs); |
fea4ea73 | 1438 | |
1a7de201 JH |
1439 | if (irred_invalidated |
1440 | && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) | |
1441 | mark_irreducible_loops (); | |
1442 | ||
c7f965b6 | 1443 | return 0; |
82b85a85 | 1444 | } |
71343877 AM |
1445 | |
1446 | /* Canonical induction variable creation pass. */ | |
1447 | ||
71343877 AM |
1448 | namespace { |
1449 | ||
1450 | const pass_data pass_data_iv_canon = | |
1451 | { | |
1452 | GIMPLE_PASS, /* type */ | |
1453 | "ivcanon", /* name */ | |
1454 | OPTGROUP_LOOP, /* optinfo_flags */ | |
71343877 AM |
1455 | TV_TREE_LOOP_IVCANON, /* tv_id */ |
1456 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1457 | 0, /* properties_provided */ | |
1458 | 0, /* properties_destroyed */ | |
1459 | 0, /* todo_flags_start */ | |
1460 | 0, /* todo_flags_finish */ | |
1461 | }; | |
1462 | ||
1463 | class pass_iv_canon : public gimple_opt_pass | |
1464 | { | |
1465 | public: | |
1466 | pass_iv_canon (gcc::context *ctxt) | |
1467 | : gimple_opt_pass (pass_data_iv_canon, ctxt) | |
1468 | {} | |
1469 | ||
1470 | /* opt_pass methods: */ | |
1a3d085c | 1471 | virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; } |
be55bfe6 | 1472 | virtual unsigned int execute (function *fun); |
71343877 AM |
1473 | |
1474 | }; // class pass_iv_canon | |
1475 | ||
be55bfe6 TS |
1476 | unsigned int |
1477 | pass_iv_canon::execute (function *fun) | |
1478 | { | |
1479 | if (number_of_loops (fun) <= 1) | |
1480 | return 0; | |
1481 | ||
1482 | return canonicalize_induction_variables (); | |
1483 | } | |
1484 | ||
71343877 AM |
1485 | } // anon namespace |
1486 | ||
1487 | gimple_opt_pass * | |
1488 | make_pass_iv_canon (gcc::context *ctxt) | |
1489 | { | |
1490 | return new pass_iv_canon (ctxt); | |
1491 | } | |
1492 | ||
1493 | /* Complete unrolling of loops. */ | |
1494 | ||
71343877 AM |
1495 | namespace { |
1496 | ||
1497 | const pass_data pass_data_complete_unroll = | |
1498 | { | |
1499 | GIMPLE_PASS, /* type */ | |
1500 | "cunroll", /* name */ | |
1501 | OPTGROUP_LOOP, /* optinfo_flags */ | |
71343877 AM |
1502 | TV_COMPLETE_UNROLL, /* tv_id */ |
1503 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1504 | 0, /* properties_provided */ | |
1505 | 0, /* properties_destroyed */ | |
1506 | 0, /* todo_flags_start */ | |
1507 | 0, /* todo_flags_finish */ | |
1508 | }; | |
1509 | ||
1510 | class pass_complete_unroll : public gimple_opt_pass | |
1511 | { | |
1512 | public: | |
1513 | pass_complete_unroll (gcc::context *ctxt) | |
1514 | : gimple_opt_pass (pass_data_complete_unroll, ctxt) | |
1515 | {} | |
1516 | ||
1517 | /* opt_pass methods: */ | |
be55bfe6 | 1518 | virtual unsigned int execute (function *); |
71343877 AM |
1519 | |
1520 | }; // class pass_complete_unroll | |
1521 | ||
be55bfe6 TS |
1522 | unsigned int |
1523 | pass_complete_unroll::execute (function *fun) | |
1524 | { | |
1525 | if (number_of_loops (fun) <= 1) | |
1526 | return 0; | |
1527 | ||
a448ff40 JH |
1528 | /* If we ever decide to run loop peeling more than once, we will need to |
1529 | track loops already peeled in loop structures themselves to avoid | |
1530 | re-peeling the same loop multiple times. */ | |
1531 | if (flag_peel_loops) | |
1532 | peeled_loops = BITMAP_ALLOC (NULL); | |
1533 | int val = tree_unroll_loops_completely (flag_unroll_loops | |
1534 | || flag_peel_loops | |
1535 | || optimize >= 3, true); | |
1536 | if (peeled_loops) | |
1537 | { | |
1538 | BITMAP_FREE (peeled_loops); | |
1539 | peeled_loops = NULL; | |
1540 | } | |
1541 | return val; | |
be55bfe6 TS |
1542 | } |
1543 | ||
71343877 AM |
1544 | } // anon namespace |
1545 | ||
1546 | gimple_opt_pass * | |
1547 | make_pass_complete_unroll (gcc::context *ctxt) | |
1548 | { | |
1549 | return new pass_complete_unroll (ctxt); | |
1550 | } | |
1551 | ||
1552 | /* Complete unrolling of inner loops. */ | |
1553 | ||
71343877 AM |
1554 | namespace { |
1555 | ||
1556 | const pass_data pass_data_complete_unrolli = | |
1557 | { | |
1558 | GIMPLE_PASS, /* type */ | |
1559 | "cunrolli", /* name */ | |
1560 | OPTGROUP_LOOP, /* optinfo_flags */ | |
71343877 AM |
1561 | TV_COMPLETE_UNROLL, /* tv_id */ |
1562 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1563 | 0, /* properties_provided */ | |
1564 | 0, /* properties_destroyed */ | |
1565 | 0, /* todo_flags_start */ | |
3bea341f | 1566 | 0, /* todo_flags_finish */ |
71343877 AM |
1567 | }; |
1568 | ||
1569 | class pass_complete_unrolli : public gimple_opt_pass | |
1570 | { | |
1571 | public: | |
1572 | pass_complete_unrolli (gcc::context *ctxt) | |
1573 | : gimple_opt_pass (pass_data_complete_unrolli, ctxt) | |
1574 | {} | |
1575 | ||
1576 | /* opt_pass methods: */ | |
1a3d085c | 1577 | virtual bool gate (function *) { return optimize >= 2; } |
be55bfe6 | 1578 | virtual unsigned int execute (function *); |
71343877 AM |
1579 | |
1580 | }; // class pass_complete_unrolli | |
1581 | ||
be55bfe6 TS |
1582 | unsigned int |
1583 | pass_complete_unrolli::execute (function *fun) | |
1584 | { | |
1585 | unsigned ret = 0; | |
1586 | ||
1587 | loop_optimizer_init (LOOPS_NORMAL | |
1588 | | LOOPS_HAVE_RECORDED_EXITS); | |
1589 | if (number_of_loops (fun) > 1) | |
1590 | { | |
1591 | scev_initialize (); | |
1592 | ret = tree_unroll_loops_completely (optimize >= 3, false); | |
be55bfe6 TS |
1593 | scev_finalize (); |
1594 | } | |
1595 | loop_optimizer_finalize (); | |
1596 | ||
1597 | return ret; | |
1598 | } | |
1599 | ||
71343877 AM |
1600 | } // anon namespace |
1601 | ||
1602 | gimple_opt_pass * | |
1603 | make_pass_complete_unrolli (gcc::context *ctxt) | |
1604 | { | |
1605 | return new pass_complete_unrolli (ctxt); | |
1606 | } | |
1607 | ||
1608 |