]>
Commit | Line | Data |
---|---|---|
862be747 | 1 | /* Natural loop analysis code for GNU compiler. |
711789cc | 2 | Copyright (C) 2002-2013 Free Software Foundation, Inc. |
862be747 | 3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 8 | Software Foundation; either version 3, or (at your option) any later |
862be747 | 9 | version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
862be747 | 19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
24 | #include "rtl.h" | |
25 | #include "hard-reg-set.h" | |
42fe97ed | 26 | #include "obstack.h" |
862be747 | 27 | #include "basic-block.h" |
28 | #include "cfgloop.h" | |
29 | #include "expr.h" | |
3f9439d7 | 30 | #include "graphds.h" |
47dd2e78 | 31 | #include "params.h" |
862be747 | 32 | |
e8aa5a28 | 33 | struct target_cfgloop default_target_cfgloop; |
34 | #if SWITCHABLE_TARGET | |
35 | struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop; | |
36 | #endif | |
37 | ||
862be747 | 38 | /* Checks whether BB is executed exactly once in each LOOP iteration. */ |
9c1ccc0f | 39 | |
862be747 | 40 | bool |
7ecb5bb2 | 41 | just_once_each_iteration_p (const struct loop *loop, const_basic_block bb) |
862be747 | 42 | { |
43 | /* It must be executed at least once each iteration. */ | |
0051c76a | 44 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
862be747 | 45 | return false; |
46 | ||
47 | /* And just once. */ | |
48 | if (bb->loop_father != loop) | |
49 | return false; | |
50 | ||
51 | /* But this was not enough. We might have some irreducible loop here. */ | |
52 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
53 | return false; | |
54 | ||
55 | return true; | |
56 | } | |
57 | ||
a5414ff5 | 58 | /* Marks blocks and edges that are part of non-recognized loops; i.e. we |
59 | throw away all latch edges and mark blocks inside any remaining cycle. | |
60 | Everything is a bit complicated due to fact we do not want to do this | |
61 | for parts of cycles that only "pass" through some loop -- i.e. for | |
62 | each cycle, we want to mark blocks that belong directly to innermost | |
69b23c5d | 63 | loop containing the whole cycle. |
a0c938f0 | 64 | |
69b23c5d | 65 | LOOPS is the loop tree. */ |
66 | ||
fe672ac0 | 67 | #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun)) |
69b23c5d | 68 | #define BB_REPR(BB) ((BB)->index + 1) |
69 | ||
c9263b6a | 70 | bool |
7194de72 | 71 | mark_irreducible_loops (void) |
862be747 | 72 | { |
862be747 | 73 | basic_block act; |
c9263b6a | 74 | struct graph_edge *ge; |
69b23c5d | 75 | edge e; |
cd665a06 | 76 | edge_iterator ei; |
3f9439d7 | 77 | int src, dest; |
78 | unsigned depth; | |
69b23c5d | 79 | struct graph *g; |
41f75a99 | 80 | int num = number_of_loops (cfun); |
3f9439d7 | 81 | struct loop *cloop; |
c9263b6a | 82 | bool irred_loop_found = false; |
83 | int i; | |
862be747 | 84 | |
7a3bf727 | 85 | gcc_assert (current_loops != NULL); |
86 | ||
a5414ff5 | 87 | /* Reset the flags. */ |
34154e27 | 88 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
89 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
a5414ff5 | 90 | { |
91 | act->flags &= ~BB_IRREDUCIBLE_LOOP; | |
cd665a06 | 92 | FOR_EACH_EDGE (e, ei, act->succs) |
a5414ff5 | 93 | e->flags &= ~EDGE_IRREDUCIBLE_LOOP; |
94 | } | |
95 | ||
862be747 | 96 | /* Create the edge lists. */ |
fe672ac0 | 97 | g = new_graph (last_basic_block_for_fn (cfun) + num); |
69b23c5d | 98 | |
34154e27 | 99 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
100 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
cd665a06 | 101 | FOR_EACH_EDGE (e, ei, act->succs) |
862be747 | 102 | { |
a0c938f0 | 103 | /* Ignore edges to exit. */ |
34154e27 | 104 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
862be747 | 105 | continue; |
69b23c5d | 106 | |
88e6f696 | 107 | src = BB_REPR (act); |
108 | dest = BB_REPR (e->dest); | |
69b23c5d | 109 | |
7a3bf727 | 110 | /* Ignore latch edges. */ |
111 | if (e->dest->loop_father->header == e->dest | |
112 | && e->dest->loop_father->latch == act) | |
113 | continue; | |
114 | ||
115 | /* Edges inside a single loop should be left where they are. Edges | |
116 | to subloop headers should lead to representative of the subloop, | |
117 | but from the same place. | |
69b23c5d | 118 | |
7a3bf727 | 119 | Edges exiting loops should lead from representative |
120 | of the son of nearest common ancestor of the loops in that | |
121 | act lays. */ | |
122 | ||
123 | if (e->dest->loop_father->header == e->dest) | |
124 | dest = LOOP_REPR (e->dest->loop_father); | |
125 | ||
126 | if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) | |
127 | { | |
128 | depth = 1 + loop_depth (find_common_loop (act->loop_father, | |
129 | e->dest->loop_father)); | |
130 | if (depth == loop_depth (act->loop_father)) | |
131 | cloop = act->loop_father; | |
132 | else | |
f1f41a6c | 133 | cloop = (*act->loop_father->superloops)[depth]; |
7a3bf727 | 134 | |
135 | src = LOOP_REPR (cloop); | |
862be747 | 136 | } |
69b23c5d | 137 | |
3f9439d7 | 138 | add_edge (g, src, dest)->data = e; |
862be747 | 139 | } |
140 | ||
3f9439d7 | 141 | /* Find the strongly connected components. */ |
142 | graphds_scc (g, NULL); | |
862be747 | 143 | |
69b23c5d | 144 | /* Mark the irreducible loops. */ |
c9263b6a | 145 | for (i = 0; i < g->n_vertices; i++) |
146 | for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) | |
147 | { | |
148 | edge real = (edge) ge->data; | |
149 | /* edge E in graph G is irreducible if it connects two vertices in the | |
150 | same scc. */ | |
151 | ||
152 | /* All edges should lead from a component with higher number to the | |
153 | one with lower one. */ | |
154 | gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); | |
155 | ||
156 | if (g->vertices[ge->src].component != g->vertices[ge->dest].component) | |
157 | continue; | |
158 | ||
159 | real->flags |= EDGE_IRREDUCIBLE_LOOP; | |
160 | irred_loop_found = true; | |
161 | if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) | |
162 | real->src->flags |= BB_IRREDUCIBLE_LOOP; | |
163 | } | |
862be747 | 164 | |
69b23c5d | 165 | free_graph (g); |
862be747 | 166 | |
f24ec26f | 167 | loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); |
c9263b6a | 168 | return irred_loop_found; |
862be747 | 169 | } |
170 | ||
171 | /* Counts number of insns inside LOOP. */ | |
172 | int | |
7ecb5bb2 | 173 | num_loop_insns (const struct loop *loop) |
862be747 | 174 | { |
175 | basic_block *bbs, bb; | |
176 | unsigned i, ninsns = 0; | |
177 | rtx insn; | |
178 | ||
179 | bbs = get_loop_body (loop); | |
180 | for (i = 0; i < loop->num_nodes; i++) | |
181 | { | |
182 | bb = bbs[i]; | |
9845d120 | 183 | FOR_BB_INSNS (bb, insn) |
184 | if (NONDEBUG_INSN_P (insn)) | |
c87a3eff | 185 | ninsns++; |
862be747 | 186 | } |
47c3d424 | 187 | free (bbs); |
188 | ||
189 | if (!ninsns) | |
190 | ninsns = 1; /* To avoid division by zero. */ | |
4c9e08a4 | 191 | |
862be747 | 192 | return ninsns; |
193 | } | |
194 | ||
195 | /* Counts number of insns executed on average per iteration LOOP. */ | |
196 | int | |
7ecb5bb2 | 197 | average_num_loop_insns (const struct loop *loop) |
862be747 | 198 | { |
199 | basic_block *bbs, bb; | |
200 | unsigned i, binsns, ninsns, ratio; | |
201 | rtx insn; | |
202 | ||
203 | ninsns = 0; | |
204 | bbs = get_loop_body (loop); | |
205 | for (i = 0; i < loop->num_nodes; i++) | |
206 | { | |
207 | bb = bbs[i]; | |
208 | ||
9845d120 | 209 | binsns = 0; |
210 | FOR_BB_INSNS (bb, insn) | |
211 | if (NONDEBUG_INSN_P (insn)) | |
c87a3eff | 212 | binsns++; |
862be747 | 213 | |
214 | ratio = loop->header->frequency == 0 | |
215 | ? BB_FREQ_MAX | |
216 | : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; | |
217 | ninsns += binsns * ratio; | |
218 | } | |
47c3d424 | 219 | free (bbs); |
4c9e08a4 | 220 | |
862be747 | 221 | ninsns /= BB_FREQ_MAX; |
222 | if (!ninsns) | |
223 | ninsns = 1; /* To avoid division by zero. */ | |
224 | ||
225 | return ninsns; | |
226 | } | |
227 | ||
d97e22fb | 228 | /* Returns expected number of iterations of LOOP, according to |
229 | measured or guessed profile. No bounding is done on the | |
230 | value. */ | |
231 | ||
232 | gcov_type | |
233 | expected_loop_iterations_unbounded (const struct loop *loop) | |
862be747 | 234 | { |
235 | edge e; | |
cd665a06 | 236 | edge_iterator ei; |
862be747 | 237 | |
7a22afab | 238 | if (loop->latch->count || loop->header->count) |
862be747 | 239 | { |
240 | gcov_type count_in, count_latch, expected; | |
241 | ||
242 | count_in = 0; | |
243 | count_latch = 0; | |
244 | ||
cd665a06 | 245 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
862be747 | 246 | if (e->src == loop->latch) |
247 | count_latch = e->count; | |
248 | else | |
249 | count_in += e->count; | |
250 | ||
251 | if (count_in == 0) | |
a0c938f0 | 252 | expected = count_latch * 2; |
d04f7eb9 | 253 | else |
a0c938f0 | 254 | expected = (count_latch + count_in - 1) / count_in; |
862be747 | 255 | |
d97e22fb | 256 | return expected; |
862be747 | 257 | } |
258 | else | |
259 | { | |
260 | int freq_in, freq_latch; | |
261 | ||
262 | freq_in = 0; | |
263 | freq_latch = 0; | |
264 | ||
cd665a06 | 265 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
862be747 | 266 | if (e->src == loop->latch) |
267 | freq_latch = EDGE_FREQUENCY (e); | |
268 | else | |
269 | freq_in += EDGE_FREQUENCY (e); | |
270 | ||
271 | if (freq_in == 0) | |
d04f7eb9 | 272 | return freq_latch * 2; |
862be747 | 273 | |
274 | return (freq_latch + freq_in - 1) / freq_in; | |
275 | } | |
276 | } | |
2d49f824 | 277 | |
d97e22fb | 278 | /* Returns expected number of LOOP iterations. The returned value is bounded |
279 | by REG_BR_PROB_BASE. */ | |
280 | ||
281 | unsigned | |
282 | expected_loop_iterations (const struct loop *loop) | |
283 | { | |
284 | gcov_type expected = expected_loop_iterations_unbounded (loop); | |
285 | return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); | |
286 | } | |
287 | ||
2d49f824 | 288 | /* Returns the maximum level of nesting of subloops of LOOP. */ |
289 | ||
290 | unsigned | |
291 | get_loop_level (const struct loop *loop) | |
292 | { | |
293 | const struct loop *ploop; | |
294 | unsigned mx = 0, l; | |
295 | ||
296 | for (ploop = loop->inner; ploop; ploop = ploop->next) | |
297 | { | |
298 | l = get_loop_level (ploop); | |
299 | if (l >= mx) | |
300 | mx = l + 1; | |
301 | } | |
302 | return mx; | |
303 | } | |
3a0ecac2 | 304 | |
305 | /* Returns estimate on cost of computing SEQ. */ | |
306 | ||
307 | static unsigned | |
f529eb25 | 308 | seq_cost (const_rtx seq, bool speed) |
3a0ecac2 | 309 | { |
310 | unsigned cost = 0; | |
311 | rtx set; | |
312 | ||
313 | for (; seq; seq = NEXT_INSN (seq)) | |
314 | { | |
315 | set = single_set (seq); | |
316 | if (set) | |
b72d459f | 317 | cost += set_rtx_cost (set, speed); |
3a0ecac2 | 318 | else |
319 | cost++; | |
320 | } | |
321 | ||
322 | return cost; | |
323 | } | |
324 | ||
3a0ecac2 | 325 | /* Initialize the constants for computing set costs. */ |
326 | ||
327 | void | |
328 | init_set_costs (void) | |
329 | { | |
f529eb25 | 330 | int speed; |
3a0ecac2 | 331 | rtx seq; |
332 | rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); | |
333 | rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); | |
334 | rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2); | |
335 | rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); | |
336 | unsigned i; | |
337 | ||
6d8b68a3 | 338 | target_avail_regs = 0; |
a6b74a67 | 339 | target_clobbered_regs = 0; |
3a0ecac2 | 340 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
341 | if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | |
342 | && !fixed_regs[i]) | |
a6b74a67 | 343 | { |
344 | target_avail_regs++; | |
345 | if (call_used_regs[i]) | |
346 | target_clobbered_regs++; | |
347 | } | |
3a0ecac2 | 348 | |
dec41e98 | 349 | target_res_regs = 3; |
3a0ecac2 | 350 | |
f529eb25 | 351 | for (speed = 0; speed < 2; speed++) |
352 | { | |
353 | crtl->maybe_hot_insn_p = speed; | |
354 | /* Set up the costs for using extra registers: | |
355 | ||
356 | 1) If not many free registers remain, we should prefer having an | |
357 | additional move to decreasing the number of available registers. | |
358 | (TARGET_REG_COST). | |
359 | 2) If no registers are available, we need to spill, which may require | |
360 | storing the old value to memory and loading it back | |
361 | (TARGET_SPILL_COST). */ | |
362 | ||
363 | start_sequence (); | |
364 | emit_move_insn (reg1, reg2); | |
365 | seq = get_insns (); | |
366 | end_sequence (); | |
367 | target_reg_cost [speed] = seq_cost (seq, speed); | |
368 | ||
369 | start_sequence (); | |
370 | emit_move_insn (mem, reg1); | |
371 | emit_move_insn (reg2, mem); | |
372 | seq = get_insns (); | |
373 | end_sequence (); | |
374 | target_spill_cost [speed] = seq_cost (seq, speed); | |
375 | } | |
376 | default_rtl_profile (); | |
3a0ecac2 | 377 | } |
378 | ||
25153338 | 379 | /* Estimates cost of increased register pressure caused by making N_NEW new |
380 | registers live around the loop. N_OLD is the number of registers live | |
a6b74a67 | 381 | around the loop. If CALL_P is true, also take into account that |
382 | call-used registers may be clobbered in the loop body, reducing the | |
383 | number of available registers before we spill. */ | |
3a0ecac2 | 384 | |
385 | unsigned | |
a6b74a67 | 386 | estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, |
387 | bool call_p) | |
3a0ecac2 | 388 | { |
47dd2e78 | 389 | unsigned cost; |
25153338 | 390 | unsigned regs_needed = n_new + n_old; |
a6b74a67 | 391 | unsigned available_regs = target_avail_regs; |
392 | ||
393 | /* If there is a call in the loop body, the call-clobbered registers | |
394 | are not available for loop invariants. */ | |
395 | if (call_p) | |
396 | available_regs = available_regs - target_clobbered_regs; | |
3a0ecac2 | 397 | |
25153338 | 398 | /* If we have enough registers, we should use them and not restrict |
399 | the transformations unnecessarily. */ | |
a6b74a67 | 400 | if (regs_needed + target_res_regs <= available_regs) |
25153338 | 401 | return 0; |
402 | ||
a6b74a67 | 403 | if (regs_needed <= available_regs) |
47dd2e78 | 404 | /* If we are close to running out of registers, try to preserve |
405 | them. */ | |
f529eb25 | 406 | cost = target_reg_cost [speed] * n_new; |
47dd2e78 | 407 | else |
408 | /* If we run out of registers, it is very expensive to add another | |
409 | one. */ | |
f529eb25 | 410 | cost = target_spill_cost [speed] * n_new; |
47dd2e78 | 411 | |
cf709bf6 | 412 | if (optimize && (flag_ira_region == IRA_REGION_ALL |
413 | || flag_ira_region == IRA_REGION_MIXED) | |
41f75a99 | 414 | && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM) |
47dd2e78 | 415 | /* IRA regional allocation deals with high register pressure |
416 | better. So decrease the cost (to do more accurate the cost | |
417 | calculation for IRA, we need to know how many registers lives | |
418 | through the loop transparently). */ | |
419 | cost /= 2; | |
420 | ||
421 | return cost; | |
3a0ecac2 | 422 | } |
423 | ||
7194de72 | 424 | /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ |
ffc6b5d5 | 425 | |
426 | void | |
7194de72 | 427 | mark_loop_exit_edges (void) |
ffc6b5d5 | 428 | { |
429 | basic_block bb; | |
430 | edge e; | |
a0c938f0 | 431 | |
41f75a99 | 432 | if (number_of_loops (cfun) <= 1) |
ffc6b5d5 | 433 | return; |
434 | ||
fc00614f | 435 | FOR_EACH_BB_FN (bb, cfun) |
ffc6b5d5 | 436 | { |
437 | edge_iterator ei; | |
438 | ||
ffc6b5d5 | 439 | FOR_EACH_EDGE (e, ei, bb->succs) |
440 | { | |
9e3536f4 | 441 | if (loop_outer (bb->loop_father) |
c088dce6 | 442 | && loop_exit_edge_p (bb->loop_father, e)) |
ffc6b5d5 | 443 | e->flags |= EDGE_LOOP_EXIT; |
444 | else | |
445 | e->flags &= ~EDGE_LOOP_EXIT; | |
446 | } | |
447 | } | |
448 | } | |
449 | ||
3681186e | 450 | /* Return exit edge if loop has only one exit that is likely |
451 | to be executed on runtime (i.e. it is not EH or leading | |
452 | to noreturn call. */ | |
453 | ||
454 | edge | |
455 | single_likely_exit (struct loop *loop) | |
456 | { | |
457 | edge found = single_exit (loop); | |
f1f41a6c | 458 | vec<edge> exits; |
3681186e | 459 | unsigned i; |
460 | edge ex; | |
461 | ||
462 | if (found) | |
463 | return found; | |
464 | exits = get_loop_exit_edges (loop); | |
f1f41a6c | 465 | FOR_EACH_VEC_ELT (exits, i, ex) |
3681186e | 466 | { |
467 | if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL)) | |
468 | continue; | |
469 | /* The constant of 5 is set in a way so noreturn calls are | |
470 | ruled out by this test. The static branch prediction algorithm | |
471 | will not assign such a low probability to conditionals for usual | |
472 | reasons. */ | |
f26d8580 | 473 | if (profile_status_for_fn (cfun) != PROFILE_ABSENT |
3681186e | 474 | && ex->probability < 5 && !ex->count) |
475 | continue; | |
476 | if (!found) | |
477 | found = ex; | |
478 | else | |
479 | { | |
f1f41a6c | 480 | exits.release (); |
3681186e | 481 | return NULL; |
482 | } | |
483 | } | |
f1f41a6c | 484 | exits.release (); |
3681186e | 485 | return found; |
486 | } | |
d583c979 | 487 | |
488 | ||
489 | /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs | |
490 | order against direction of edges from latch. Specially, if | |
491 | header != latch, latch is the 1-st block. */ | |
492 | ||
f1f41a6c | 493 | vec<basic_block> |
d583c979 | 494 | get_loop_hot_path (const struct loop *loop) |
495 | { | |
496 | basic_block bb = loop->header; | |
1e094109 | 497 | vec<basic_block> path = vNULL; |
d583c979 | 498 | bitmap visited = BITMAP_ALLOC (NULL); |
499 | ||
500 | while (true) | |
501 | { | |
502 | edge_iterator ei; | |
503 | edge e; | |
504 | edge best = NULL; | |
505 | ||
f1f41a6c | 506 | path.safe_push (bb); |
d583c979 | 507 | bitmap_set_bit (visited, bb->index); |
508 | FOR_EACH_EDGE (e, ei, bb->succs) | |
509 | if ((!best || e->probability > best->probability) | |
510 | && !loop_exit_edge_p (loop, e) | |
511 | && !bitmap_bit_p (visited, e->dest->index)) | |
512 | best = e; | |
513 | if (!best || best->dest == loop->header) | |
514 | break; | |
515 | bb = best->dest; | |
516 | } | |
517 | BITMAP_FREE (visited); | |
518 | return path; | |
519 | } |