]>
Commit | Line | Data |
---|---|---|
3d436d2a | 1 | /* Natural loop analysis code for GNU compiler. |
5624e564 | 2 | Copyright (C) 2002-2015 Free Software Foundation, Inc. |
3d436d2a ZD |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 8 | Software Foundation; either version 3, or (at your option) any later |
3d436d2a ZD |
9 | version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
3d436d2a ZD |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
c7131fb2 | 23 | #include "backend.h" |
9fdcd34e | 24 | #include "predict.h" |
3d436d2a | 25 | #include "rtl.h" |
7932a3db | 26 | #include "obstack.h" |
3d436d2a | 27 | #include "cfgloop.h" |
c7131fb2 | 28 | #include "tree.h" |
36566b39 | 29 | #include "flags.h" |
36566b39 | 30 | #include "alias.h" |
36566b39 PK |
31 | #include "insn-config.h" |
32 | #include "expmed.h" | |
33 | #include "dojump.h" | |
34 | #include "explow.h" | |
35 | #include "calls.h" | |
36 | #include "emit-rtl.h" | |
37 | #include "varasm.h" | |
38 | #include "stmt.h" | |
3d436d2a | 39 | #include "expr.h" |
66f97d31 | 40 | #include "graphds.h" |
058e97ec | 41 | #include "params.h" |
3d436d2a | 42 | |
4391924a RS |
43 | struct target_cfgloop default_target_cfgloop; |
44 | #if SWITCHABLE_TARGET | |
45 | struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop; | |
46 | #endif | |
47 | ||
3d436d2a | 48 | /* Checks whether BB is executed exactly once in each LOOP iteration. */ |
f2dca510 | 49 | |
3d436d2a | 50 | bool |
ed7a4b4b | 51 | just_once_each_iteration_p (const struct loop *loop, const_basic_block bb) |
3d436d2a ZD |
52 | { |
53 | /* It must be executed at least once each iteration. */ | |
d47cc544 | 54 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
3d436d2a ZD |
55 | return false; |
56 | ||
57 | /* And just once. */ | |
58 | if (bb->loop_father != loop) | |
59 | return false; | |
60 | ||
61 | /* But this was not enough. We might have some irreducible loop here. */ | |
62 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
63 | return false; | |
64 | ||
65 | return true; | |
66 | } | |
67 | ||
35b07080 ZD |
68 | /* Marks blocks and edges that are part of non-recognized loops; i.e. we |
69 | throw away all latch edges and mark blocks inside any remaining cycle. | |
70 | Everything is a bit complicated due to fact we do not want to do this | |
71 | for parts of cycles that only "pass" through some loop -- i.e. for | |
72 | each cycle, we want to mark blocks that belong directly to innermost | |
cfbe3efe | 73 | loop containing the whole cycle. |
c22cacf3 | 74 | |
cfbe3efe ZD |
75 | LOOPS is the loop tree. */ |
76 | ||
8b1c6fd7 | 77 | #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun)) |
cfbe3efe ZD |
78 | #define BB_REPR(BB) ((BB)->index + 1) |
79 | ||
2de58650 | 80 | bool |
d73be268 | 81 | mark_irreducible_loops (void) |
3d436d2a | 82 | { |
3d436d2a | 83 | basic_block act; |
2de58650 | 84 | struct graph_edge *ge; |
cfbe3efe | 85 | edge e; |
628f6a4e | 86 | edge_iterator ei; |
66f97d31 ZD |
87 | int src, dest; |
88 | unsigned depth; | |
cfbe3efe | 89 | struct graph *g; |
0fc822d0 | 90 | int num = number_of_loops (cfun); |
66f97d31 | 91 | struct loop *cloop; |
2de58650 JH |
92 | bool irred_loop_found = false; |
93 | int i; | |
3d436d2a | 94 | |
d51157de ZD |
95 | gcc_assert (current_loops != NULL); |
96 | ||
35b07080 | 97 | /* Reset the flags. */ |
fefa31b5 DM |
98 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
99 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
35b07080 ZD |
100 | { |
101 | act->flags &= ~BB_IRREDUCIBLE_LOOP; | |
628f6a4e | 102 | FOR_EACH_EDGE (e, ei, act->succs) |
35b07080 ZD |
103 | e->flags &= ~EDGE_IRREDUCIBLE_LOOP; |
104 | } | |
105 | ||
3d436d2a | 106 | /* Create the edge lists. */ |
8b1c6fd7 | 107 | g = new_graph (last_basic_block_for_fn (cfun) + num); |
cfbe3efe | 108 | |
fefa31b5 DM |
109 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
110 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
628f6a4e | 111 | FOR_EACH_EDGE (e, ei, act->succs) |
3d436d2a | 112 | { |
c22cacf3 | 113 | /* Ignore edges to exit. */ |
fefa31b5 | 114 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
3d436d2a | 115 | continue; |
cfbe3efe | 116 | |
598ec7bd ZD |
117 | src = BB_REPR (act); |
118 | dest = BB_REPR (e->dest); | |
cfbe3efe | 119 | |
d51157de ZD |
120 | /* Ignore latch edges. */ |
121 | if (e->dest->loop_father->header == e->dest | |
122 | && e->dest->loop_father->latch == act) | |
123 | continue; | |
124 | ||
125 | /* Edges inside a single loop should be left where they are. Edges | |
126 | to subloop headers should lead to representative of the subloop, | |
127 | but from the same place. | |
cfbe3efe | 128 | |
d51157de ZD |
129 | Edges exiting loops should lead from representative |
130 | of the son of nearest common ancestor of the loops in that | |
131 | act lays. */ | |
132 | ||
133 | if (e->dest->loop_father->header == e->dest) | |
134 | dest = LOOP_REPR (e->dest->loop_father); | |
135 | ||
136 | if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) | |
137 | { | |
138 | depth = 1 + loop_depth (find_common_loop (act->loop_father, | |
139 | e->dest->loop_father)); | |
140 | if (depth == loop_depth (act->loop_father)) | |
141 | cloop = act->loop_father; | |
142 | else | |
9771b263 | 143 | cloop = (*act->loop_father->superloops)[depth]; |
d51157de ZD |
144 | |
145 | src = LOOP_REPR (cloop); | |
3d436d2a | 146 | } |
cfbe3efe | 147 | |
66f97d31 | 148 | add_edge (g, src, dest)->data = e; |
3d436d2a ZD |
149 | } |
150 | ||
66f97d31 ZD |
151 | /* Find the strongly connected components. */ |
152 | graphds_scc (g, NULL); | |
3d436d2a | 153 | |
cfbe3efe | 154 | /* Mark the irreducible loops. */ |
2de58650 JH |
155 | for (i = 0; i < g->n_vertices; i++) |
156 | for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) | |
157 | { | |
158 | edge real = (edge) ge->data; | |
159 | /* edge E in graph G is irreducible if it connects two vertices in the | |
160 | same scc. */ | |
161 | ||
162 | /* All edges should lead from a component with higher number to the | |
163 | one with lower one. */ | |
164 | gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); | |
165 | ||
166 | if (g->vertices[ge->src].component != g->vertices[ge->dest].component) | |
167 | continue; | |
168 | ||
169 | real->flags |= EDGE_IRREDUCIBLE_LOOP; | |
170 | irred_loop_found = true; | |
171 | if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) | |
172 | real->src->flags |= BB_IRREDUCIBLE_LOOP; | |
173 | } | |
3d436d2a | 174 | |
cfbe3efe | 175 | free_graph (g); |
3d436d2a | 176 | |
f87000d0 | 177 | loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); |
2de58650 | 178 | return irred_loop_found; |
3d436d2a ZD |
179 | } |
180 | ||
181 | /* Counts number of insns inside LOOP. */ | |
182 | int | |
ed7a4b4b | 183 | num_loop_insns (const struct loop *loop) |
3d436d2a ZD |
184 | { |
185 | basic_block *bbs, bb; | |
186 | unsigned i, ninsns = 0; | |
1f75b71e | 187 | rtx_insn *insn; |
3d436d2a ZD |
188 | |
189 | bbs = get_loop_body (loop); | |
190 | for (i = 0; i < loop->num_nodes; i++) | |
191 | { | |
192 | bb = bbs[i]; | |
b5b8b0ac AO |
193 | FOR_BB_INSNS (bb, insn) |
194 | if (NONDEBUG_INSN_P (insn)) | |
91f4cfe3 | 195 | ninsns++; |
3d436d2a | 196 | } |
53a51cef JJ |
197 | free (bbs); |
198 | ||
199 | if (!ninsns) | |
200 | ninsns = 1; /* To avoid division by zero. */ | |
d329e058 | 201 | |
3d436d2a ZD |
202 | return ninsns; |
203 | } | |
204 | ||
205 | /* Counts number of insns executed on average per iteration LOOP. */ | |
206 | int | |
ed7a4b4b | 207 | average_num_loop_insns (const struct loop *loop) |
3d436d2a ZD |
208 | { |
209 | basic_block *bbs, bb; | |
210 | unsigned i, binsns, ninsns, ratio; | |
1f75b71e | 211 | rtx_insn *insn; |
3d436d2a ZD |
212 | |
213 | ninsns = 0; | |
214 | bbs = get_loop_body (loop); | |
215 | for (i = 0; i < loop->num_nodes; i++) | |
216 | { | |
217 | bb = bbs[i]; | |
218 | ||
b5b8b0ac AO |
219 | binsns = 0; |
220 | FOR_BB_INSNS (bb, insn) | |
221 | if (NONDEBUG_INSN_P (insn)) | |
91f4cfe3 | 222 | binsns++; |
3d436d2a ZD |
223 | |
224 | ratio = loop->header->frequency == 0 | |
225 | ? BB_FREQ_MAX | |
226 | : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; | |
227 | ninsns += binsns * ratio; | |
228 | } | |
53a51cef | 229 | free (bbs); |
d329e058 | 230 | |
3d436d2a ZD |
231 | ninsns /= BB_FREQ_MAX; |
232 | if (!ninsns) | |
233 | ninsns = 1; /* To avoid division by zero. */ | |
234 | ||
235 | return ninsns; | |
236 | } | |
237 | ||
ac84e05e ZD |
238 | /* Returns expected number of iterations of LOOP, according to |
239 | measured or guessed profile. No bounding is done on the | |
240 | value. */ | |
241 | ||
242 | gcov_type | |
243 | expected_loop_iterations_unbounded (const struct loop *loop) | |
3d436d2a ZD |
244 | { |
245 | edge e; | |
628f6a4e | 246 | edge_iterator ei; |
3d436d2a | 247 | |
997de8ed | 248 | if (loop->latch->count || loop->header->count) |
3d436d2a ZD |
249 | { |
250 | gcov_type count_in, count_latch, expected; | |
251 | ||
252 | count_in = 0; | |
253 | count_latch = 0; | |
254 | ||
628f6a4e | 255 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
3d436d2a ZD |
256 | if (e->src == loop->latch) |
257 | count_latch = e->count; | |
258 | else | |
259 | count_in += e->count; | |
260 | ||
261 | if (count_in == 0) | |
c22cacf3 | 262 | expected = count_latch * 2; |
bade3a00 | 263 | else |
c22cacf3 | 264 | expected = (count_latch + count_in - 1) / count_in; |
3d436d2a | 265 | |
ac84e05e | 266 | return expected; |
3d436d2a ZD |
267 | } |
268 | else | |
269 | { | |
270 | int freq_in, freq_latch; | |
271 | ||
272 | freq_in = 0; | |
273 | freq_latch = 0; | |
274 | ||
628f6a4e | 275 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
3d436d2a ZD |
276 | if (e->src == loop->latch) |
277 | freq_latch = EDGE_FREQUENCY (e); | |
278 | else | |
279 | freq_in += EDGE_FREQUENCY (e); | |
280 | ||
281 | if (freq_in == 0) | |
bade3a00 | 282 | return freq_latch * 2; |
3d436d2a ZD |
283 | |
284 | return (freq_latch + freq_in - 1) / freq_in; | |
285 | } | |
286 | } | |
689ba89d | 287 | |
ac84e05e ZD |
288 | /* Returns expected number of LOOP iterations. The returned value is bounded |
289 | by REG_BR_PROB_BASE. */ | |
290 | ||
291 | unsigned | |
292 | expected_loop_iterations (const struct loop *loop) | |
293 | { | |
294 | gcov_type expected = expected_loop_iterations_unbounded (loop); | |
295 | return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); | |
296 | } | |
297 | ||
689ba89d ZD |
298 | /* Returns the maximum level of nesting of subloops of LOOP. */ |
299 | ||
300 | unsigned | |
301 | get_loop_level (const struct loop *loop) | |
302 | { | |
303 | const struct loop *ploop; | |
304 | unsigned mx = 0, l; | |
305 | ||
306 | for (ploop = loop->inner; ploop; ploop = ploop->next) | |
307 | { | |
308 | l = get_loop_level (ploop); | |
309 | if (l >= mx) | |
310 | mx = l + 1; | |
311 | } | |
312 | return mx; | |
313 | } | |
5e962776 | 314 | |
5e962776 ZD |
315 | /* Initialize the constants for computing set costs. */ |
316 | ||
317 | void | |
318 | init_set_costs (void) | |
319 | { | |
f40751dd | 320 | int speed; |
1f75b71e | 321 | rtx_insn *seq; |
c3dc5e66 RS |
322 | rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1); |
323 | rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2); | |
324 | rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3); | |
5e962776 ZD |
325 | rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); |
326 | unsigned i; | |
327 | ||
b5deb7b6 | 328 | target_avail_regs = 0; |
bec922f0 | 329 | target_clobbered_regs = 0; |
5e962776 ZD |
330 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
331 | if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | |
332 | && !fixed_regs[i]) | |
bec922f0 SL |
333 | { |
334 | target_avail_regs++; | |
335 | if (call_used_regs[i]) | |
336 | target_clobbered_regs++; | |
337 | } | |
5e962776 | 338 | |
8b11a64c | 339 | target_res_regs = 3; |
5e962776 | 340 | |
f40751dd JH |
341 | for (speed = 0; speed < 2; speed++) |
342 | { | |
343 | crtl->maybe_hot_insn_p = speed; | |
344 | /* Set up the costs for using extra registers: | |
345 | ||
346 | 1) If not many free registers remain, we should prefer having an | |
347 | additional move to decreasing the number of available registers. | |
348 | (TARGET_REG_COST). | |
349 | 2) If no registers are available, we need to spill, which may require | |
350 | storing the old value to memory and loading it back | |
351 | (TARGET_SPILL_COST). */ | |
352 | ||
353 | start_sequence (); | |
354 | emit_move_insn (reg1, reg2); | |
355 | seq = get_insns (); | |
356 | end_sequence (); | |
357 | target_reg_cost [speed] = seq_cost (seq, speed); | |
358 | ||
359 | start_sequence (); | |
360 | emit_move_insn (mem, reg1); | |
361 | emit_move_insn (reg2, mem); | |
362 | seq = get_insns (); | |
363 | end_sequence (); | |
364 | target_spill_cost [speed] = seq_cost (seq, speed); | |
365 | } | |
366 | default_rtl_profile (); | |
5e962776 ZD |
367 | } |
368 | ||
a154b43a ZD |
369 | /* Estimates cost of increased register pressure caused by making N_NEW new |
370 | registers live around the loop. N_OLD is the number of registers live | |
bec922f0 SL |
371 | around the loop. If CALL_P is true, also take into account that |
372 | call-used registers may be clobbered in the loop body, reducing the | |
373 | number of available registers before we spill. */ | |
5e962776 ZD |
374 | |
375 | unsigned | |
bec922f0 SL |
376 | estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, |
377 | bool call_p) | |
5e962776 | 378 | { |
058e97ec | 379 | unsigned cost; |
a154b43a | 380 | unsigned regs_needed = n_new + n_old; |
bec922f0 SL |
381 | unsigned available_regs = target_avail_regs; |
382 | ||
383 | /* If there is a call in the loop body, the call-clobbered registers | |
384 | are not available for loop invariants. */ | |
385 | if (call_p) | |
386 | available_regs = available_regs - target_clobbered_regs; | |
5e962776 | 387 | |
a154b43a ZD |
388 | /* If we have enough registers, we should use them and not restrict |
389 | the transformations unnecessarily. */ | |
bec922f0 | 390 | if (regs_needed + target_res_regs <= available_regs) |
a154b43a ZD |
391 | return 0; |
392 | ||
bec922f0 | 393 | if (regs_needed <= available_regs) |
058e97ec VM |
394 | /* If we are close to running out of registers, try to preserve |
395 | them. */ | |
f40751dd | 396 | cost = target_reg_cost [speed] * n_new; |
058e97ec VM |
397 | else |
398 | /* If we run out of registers, it is very expensive to add another | |
399 | one. */ | |
f40751dd | 400 | cost = target_spill_cost [speed] * n_new; |
058e97ec | 401 | |
2af2dbdc VM |
402 | if (optimize && (flag_ira_region == IRA_REGION_ALL |
403 | || flag_ira_region == IRA_REGION_MIXED) | |
0fc822d0 | 404 | && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM) |
058e97ec VM |
405 | /* IRA regional allocation deals with high register pressure |
406 | better. So decrease the cost (to do more accurate the cost | |
407 | calculation for IRA, we need to know how many registers lives | |
408 | through the loop transparently). */ | |
409 | cost /= 2; | |
410 | ||
411 | return cost; | |
5e962776 ZD |
412 | } |
413 | ||
d73be268 | 414 | /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ |
70388d94 ZD |
415 | |
416 | void | |
d73be268 | 417 | mark_loop_exit_edges (void) |
70388d94 ZD |
418 | { |
419 | basic_block bb; | |
420 | edge e; | |
c22cacf3 | 421 | |
0fc822d0 | 422 | if (number_of_loops (cfun) <= 1) |
70388d94 ZD |
423 | return; |
424 | ||
11cd3bed | 425 | FOR_EACH_BB_FN (bb, cfun) |
70388d94 ZD |
426 | { |
427 | edge_iterator ei; | |
428 | ||
70388d94 ZD |
429 | FOR_EACH_EDGE (e, ei, bb->succs) |
430 | { | |
9ba025a2 | 431 | if (loop_outer (bb->loop_father) |
2ff3e325 | 432 | && loop_exit_edge_p (bb->loop_father, e)) |
70388d94 ZD |
433 | e->flags |= EDGE_LOOP_EXIT; |
434 | else | |
435 | e->flags &= ~EDGE_LOOP_EXIT; | |
436 | } | |
437 | } | |
438 | } | |
439 | ||
f9bf4777 JH |
440 | /* Return exit edge if loop has only one exit that is likely |
441 | to be executed on runtime (i.e. it is not EH or leading | |
442 | to noreturn call. */ | |
443 | ||
444 | edge | |
445 | single_likely_exit (struct loop *loop) | |
446 | { | |
447 | edge found = single_exit (loop); | |
9771b263 | 448 | vec<edge> exits; |
f9bf4777 JH |
449 | unsigned i; |
450 | edge ex; | |
451 | ||
452 | if (found) | |
453 | return found; | |
454 | exits = get_loop_exit_edges (loop); | |
9771b263 | 455 | FOR_EACH_VEC_ELT (exits, i, ex) |
f9bf4777 JH |
456 | { |
457 | if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL)) | |
458 | continue; | |
459 | /* The constant of 5 is set in a way so noreturn calls are | |
460 | ruled out by this test. The static branch prediction algorithm | |
461 | will not assign such a low probability to conditionals for usual | |
462 | reasons. */ | |
0a6a6ac9 | 463 | if (profile_status_for_fn (cfun) != PROFILE_ABSENT |
f9bf4777 JH |
464 | && ex->probability < 5 && !ex->count) |
465 | continue; | |
466 | if (!found) | |
467 | found = ex; | |
468 | else | |
469 | { | |
9771b263 | 470 | exits.release (); |
f9bf4777 JH |
471 | return NULL; |
472 | } | |
473 | } | |
9771b263 | 474 | exits.release (); |
f9bf4777 JH |
475 | return found; |
476 | } | |
519cac4a JH |
477 | |
478 | ||
479 | /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs | |
480 | order against direction of edges from latch. Specially, if | |
481 | header != latch, latch is the 1-st block. */ | |
482 | ||
9771b263 | 483 | vec<basic_block> |
519cac4a JH |
484 | get_loop_hot_path (const struct loop *loop) |
485 | { | |
486 | basic_block bb = loop->header; | |
6e1aa848 | 487 | vec<basic_block> path = vNULL; |
519cac4a JH |
488 | bitmap visited = BITMAP_ALLOC (NULL); |
489 | ||
490 | while (true) | |
491 | { | |
492 | edge_iterator ei; | |
493 | edge e; | |
494 | edge best = NULL; | |
495 | ||
9771b263 | 496 | path.safe_push (bb); |
519cac4a JH |
497 | bitmap_set_bit (visited, bb->index); |
498 | FOR_EACH_EDGE (e, ei, bb->succs) | |
499 | if ((!best || e->probability > best->probability) | |
500 | && !loop_exit_edge_p (loop, e) | |
501 | && !bitmap_bit_p (visited, e->dest->index)) | |
502 | best = e; | |
503 | if (!best || best->dest == loop->header) | |
504 | break; | |
505 | bb = best->dest; | |
506 | } | |
507 | BITMAP_FREE (visited); | |
508 | return path; | |
509 | } |