]>
Commit | Line | Data |
---|---|---|
3d436d2a | 1 | /* Natural loop analysis code for GNU compiler. |
66647d44 JJ |
2 | Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
3 | Free Software Foundation, Inc. | |
3d436d2a ZD |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
3d436d2a ZD |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
3d436d2a ZD |
20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "rtl.h" | |
26 | #include "hard-reg-set.h" | |
7932a3db | 27 | #include "obstack.h" |
3d436d2a ZD |
28 | #include "basic-block.h" |
29 | #include "cfgloop.h" | |
30 | #include "expr.h" | |
31 | #include "output.h" | |
66f97d31 | 32 | #include "graphds.h" |
058e97ec | 33 | #include "params.h" |
3d436d2a | 34 | |
3d436d2a | 35 | /* Checks whether BB is executed exactly once in each LOOP iteration. */ |
f2dca510 | 36 | |
3d436d2a | 37 | bool |
ed7a4b4b | 38 | just_once_each_iteration_p (const struct loop *loop, const_basic_block bb) |
3d436d2a ZD |
39 | { |
40 | /* It must be executed at least once each iteration. */ | |
d47cc544 | 41 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
3d436d2a ZD |
42 | return false; |
43 | ||
44 | /* And just once. */ | |
45 | if (bb->loop_father != loop) | |
46 | return false; | |
47 | ||
48 | /* But this was not enough. We might have some irreducible loop here. */ | |
49 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
50 | return false; | |
51 | ||
52 | return true; | |
53 | } | |
54 | ||
35b07080 ZD |
55 | /* Marks blocks and edges that are part of non-recognized loops; i.e. we |
56 | throw away all latch edges and mark blocks inside any remaining cycle. | |
57 | Everything is a bit complicated due to fact we do not want to do this | |
58 | for parts of cycles that only "pass" through some loop -- i.e. for | |
59 | each cycle, we want to mark blocks that belong directly to innermost | |
cfbe3efe | 60 | loop containing the whole cycle. |
c22cacf3 | 61 | |
cfbe3efe ZD |
62 | LOOPS is the loop tree. */ |
63 | ||
64 | #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block) | |
65 | #define BB_REPR(BB) ((BB)->index + 1) | |
66 | ||
2de58650 | 67 | bool |
d73be268 | 68 | mark_irreducible_loops (void) |
3d436d2a | 69 | { |
3d436d2a | 70 | basic_block act; |
2de58650 | 71 | struct graph_edge *ge; |
cfbe3efe | 72 | edge e; |
628f6a4e | 73 | edge_iterator ei; |
66f97d31 ZD |
74 | int src, dest; |
75 | unsigned depth; | |
cfbe3efe | 76 | struct graph *g; |
d51157de | 77 | int num = number_of_loops (); |
66f97d31 | 78 | struct loop *cloop; |
2de58650 JH |
79 | bool irred_loop_found = false; |
80 | int i; | |
3d436d2a | 81 | |
d51157de ZD |
82 | gcc_assert (current_loops != NULL); |
83 | ||
35b07080 ZD |
84 | /* Reset the flags. */ |
85 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) | |
86 | { | |
87 | act->flags &= ~BB_IRREDUCIBLE_LOOP; | |
628f6a4e | 88 | FOR_EACH_EDGE (e, ei, act->succs) |
35b07080 ZD |
89 | e->flags &= ~EDGE_IRREDUCIBLE_LOOP; |
90 | } | |
91 | ||
3d436d2a | 92 | /* Create the edge lists. */ |
598ec7bd | 93 | g = new_graph (last_basic_block + num); |
cfbe3efe | 94 | |
3d436d2a | 95 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) |
628f6a4e | 96 | FOR_EACH_EDGE (e, ei, act->succs) |
3d436d2a | 97 | { |
c22cacf3 MS |
98 | /* Ignore edges to exit. */ |
99 | if (e->dest == EXIT_BLOCK_PTR) | |
3d436d2a | 100 | continue; |
cfbe3efe | 101 | |
598ec7bd ZD |
102 | src = BB_REPR (act); |
103 | dest = BB_REPR (e->dest); | |
cfbe3efe | 104 | |
d51157de ZD |
105 | /* Ignore latch edges. */ |
106 | if (e->dest->loop_father->header == e->dest | |
107 | && e->dest->loop_father->latch == act) | |
108 | continue; | |
109 | ||
110 | /* Edges inside a single loop should be left where they are. Edges | |
111 | to subloop headers should lead to representative of the subloop, | |
112 | but from the same place. | |
cfbe3efe | 113 | |
d51157de ZD |
114 | Edges exiting loops should lead from representative |
115 | of the son of nearest common ancestor of the loops in that | |
116 | act lays. */ | |
117 | ||
118 | if (e->dest->loop_father->header == e->dest) | |
119 | dest = LOOP_REPR (e->dest->loop_father); | |
120 | ||
121 | if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) | |
122 | { | |
123 | depth = 1 + loop_depth (find_common_loop (act->loop_father, | |
124 | e->dest->loop_father)); | |
125 | if (depth == loop_depth (act->loop_father)) | |
126 | cloop = act->loop_father; | |
127 | else | |
128 | cloop = VEC_index (loop_p, act->loop_father->superloops, depth); | |
129 | ||
130 | src = LOOP_REPR (cloop); | |
3d436d2a | 131 | } |
cfbe3efe | 132 | |
66f97d31 | 133 | add_edge (g, src, dest)->data = e; |
3d436d2a ZD |
134 | } |
135 | ||
66f97d31 ZD |
136 | /* Find the strongly connected components. */ |
137 | graphds_scc (g, NULL); | |
3d436d2a | 138 | |
cfbe3efe | 139 | /* Mark the irreducible loops. */ |
2de58650 JH |
140 | for (i = 0; i < g->n_vertices; i++) |
141 | for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) | |
142 | { | |
143 | edge real = (edge) ge->data; | |
144 | /* edge E in graph G is irreducible if it connects two vertices in the | |
145 | same scc. */ | |
146 | ||
147 | /* All edges should lead from a component with higher number to the | |
148 | one with lower one. */ | |
149 | gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); | |
150 | ||
151 | if (g->vertices[ge->src].component != g->vertices[ge->dest].component) | |
152 | continue; | |
153 | ||
154 | real->flags |= EDGE_IRREDUCIBLE_LOOP; | |
155 | irred_loop_found = true; | |
156 | if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) | |
157 | real->src->flags |= BB_IRREDUCIBLE_LOOP; | |
158 | } | |
3d436d2a | 159 | |
cfbe3efe | 160 | free_graph (g); |
3d436d2a | 161 | |
f87000d0 | 162 | loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); |
2de58650 | 163 | return irred_loop_found; |
3d436d2a ZD |
164 | } |
165 | ||
166 | /* Counts number of insns inside LOOP. */ | |
167 | int | |
ed7a4b4b | 168 | num_loop_insns (const struct loop *loop) |
3d436d2a ZD |
169 | { |
170 | basic_block *bbs, bb; | |
171 | unsigned i, ninsns = 0; | |
172 | rtx insn; | |
173 | ||
174 | bbs = get_loop_body (loop); | |
175 | for (i = 0; i < loop->num_nodes; i++) | |
176 | { | |
177 | bb = bbs[i]; | |
178 | ninsns++; | |
a813c111 | 179 | for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn)) |
91f4cfe3 ZD |
180 | if (INSN_P (insn)) |
181 | ninsns++; | |
3d436d2a ZD |
182 | } |
183 | free(bbs); | |
d329e058 | 184 | |
3d436d2a ZD |
185 | return ninsns; |
186 | } | |
187 | ||
188 | /* Counts number of insns executed on average per iteration LOOP. */ | |
189 | int | |
ed7a4b4b | 190 | average_num_loop_insns (const struct loop *loop) |
3d436d2a ZD |
191 | { |
192 | basic_block *bbs, bb; | |
193 | unsigned i, binsns, ninsns, ratio; | |
194 | rtx insn; | |
195 | ||
196 | ninsns = 0; | |
197 | bbs = get_loop_body (loop); | |
198 | for (i = 0; i < loop->num_nodes; i++) | |
199 | { | |
200 | bb = bbs[i]; | |
201 | ||
202 | binsns = 1; | |
a813c111 | 203 | for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn)) |
91f4cfe3 ZD |
204 | if (INSN_P (insn)) |
205 | binsns++; | |
3d436d2a ZD |
206 | |
207 | ratio = loop->header->frequency == 0 | |
208 | ? BB_FREQ_MAX | |
209 | : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; | |
210 | ninsns += binsns * ratio; | |
211 | } | |
212 | free(bbs); | |
d329e058 | 213 | |
3d436d2a ZD |
214 | ninsns /= BB_FREQ_MAX; |
215 | if (!ninsns) | |
216 | ninsns = 1; /* To avoid division by zero. */ | |
217 | ||
218 | return ninsns; | |
219 | } | |
220 | ||
ac84e05e ZD |
221 | /* Returns expected number of iterations of LOOP, according to |
222 | measured or guessed profile. No bounding is done on the | |
223 | value. */ | |
224 | ||
225 | gcov_type | |
226 | expected_loop_iterations_unbounded (const struct loop *loop) | |
3d436d2a ZD |
227 | { |
228 | edge e; | |
628f6a4e | 229 | edge_iterator ei; |
3d436d2a | 230 | |
997de8ed | 231 | if (loop->latch->count || loop->header->count) |
3d436d2a ZD |
232 | { |
233 | gcov_type count_in, count_latch, expected; | |
234 | ||
235 | count_in = 0; | |
236 | count_latch = 0; | |
237 | ||
628f6a4e | 238 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
3d436d2a ZD |
239 | if (e->src == loop->latch) |
240 | count_latch = e->count; | |
241 | else | |
242 | count_in += e->count; | |
243 | ||
244 | if (count_in == 0) | |
c22cacf3 | 245 | expected = count_latch * 2; |
bade3a00 | 246 | else |
c22cacf3 | 247 | expected = (count_latch + count_in - 1) / count_in; |
3d436d2a | 248 | |
ac84e05e | 249 | return expected; |
3d436d2a ZD |
250 | } |
251 | else | |
252 | { | |
253 | int freq_in, freq_latch; | |
254 | ||
255 | freq_in = 0; | |
256 | freq_latch = 0; | |
257 | ||
628f6a4e | 258 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
3d436d2a ZD |
259 | if (e->src == loop->latch) |
260 | freq_latch = EDGE_FREQUENCY (e); | |
261 | else | |
262 | freq_in += EDGE_FREQUENCY (e); | |
263 | ||
264 | if (freq_in == 0) | |
bade3a00 | 265 | return freq_latch * 2; |
3d436d2a ZD |
266 | |
267 | return (freq_latch + freq_in - 1) / freq_in; | |
268 | } | |
269 | } | |
689ba89d | 270 | |
ac84e05e ZD |
271 | /* Returns expected number of LOOP iterations. The returned value is bounded |
272 | by REG_BR_PROB_BASE. */ | |
273 | ||
274 | unsigned | |
275 | expected_loop_iterations (const struct loop *loop) | |
276 | { | |
277 | gcov_type expected = expected_loop_iterations_unbounded (loop); | |
278 | return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); | |
279 | } | |
280 | ||
689ba89d ZD |
281 | /* Returns the maximum level of nesting of subloops of LOOP. */ |
282 | ||
283 | unsigned | |
284 | get_loop_level (const struct loop *loop) | |
285 | { | |
286 | const struct loop *ploop; | |
287 | unsigned mx = 0, l; | |
288 | ||
289 | for (ploop = loop->inner; ploop; ploop = ploop->next) | |
290 | { | |
291 | l = get_loop_level (ploop); | |
292 | if (l >= mx) | |
293 | mx = l + 1; | |
294 | } | |
295 | return mx; | |
296 | } | |
5e962776 ZD |
297 | |
298 | /* Returns estimate on cost of computing SEQ. */ | |
299 | ||
300 | static unsigned | |
f40751dd | 301 | seq_cost (const_rtx seq, bool speed) |
5e962776 ZD |
302 | { |
303 | unsigned cost = 0; | |
304 | rtx set; | |
305 | ||
306 | for (; seq; seq = NEXT_INSN (seq)) | |
307 | { | |
308 | set = single_set (seq); | |
309 | if (set) | |
f40751dd | 310 | cost += rtx_cost (set, SET, speed); |
5e962776 ZD |
311 | else |
312 | cost++; | |
313 | } | |
314 | ||
315 | return cost; | |
316 | } | |
317 | ||
318 | /* The properties of the target. */ | |
319 | ||
8b11a64c | 320 | unsigned target_avail_regs; /* Number of available registers. */ |
a154b43a ZD |
321 | unsigned target_res_regs; /* Number of registers reserved for temporary |
322 | expressions. */ | |
f40751dd | 323 | unsigned target_reg_cost[2]; /* The cost for register when there still |
a154b43a ZD |
324 | is some reserve, but we are approaching |
325 | the number of available registers. */ | |
f40751dd | 326 | unsigned target_spill_cost[2]; /* The cost for register when we need |
a154b43a | 327 | to spill. */ |
5e962776 ZD |
328 | |
329 | /* Initialize the constants for computing set costs. */ | |
330 | ||
331 | void | |
332 | init_set_costs (void) | |
333 | { | |
f40751dd | 334 | int speed; |
5e962776 ZD |
335 | rtx seq; |
336 | rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); | |
337 | rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); | |
338 | rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2); | |
339 | rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); | |
340 | unsigned i; | |
341 | ||
b5deb7b6 | 342 | target_avail_regs = 0; |
5e962776 ZD |
343 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
344 | if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | |
345 | && !fixed_regs[i]) | |
8b11a64c | 346 | target_avail_regs++; |
5e962776 | 347 | |
8b11a64c | 348 | target_res_regs = 3; |
5e962776 | 349 | |
f40751dd JH |
350 | for (speed = 0; speed < 2; speed++) |
351 | { | |
352 | crtl->maybe_hot_insn_p = speed; | |
353 | /* Set up the costs for using extra registers: | |
354 | ||
355 | 1) If not many free registers remain, we should prefer having an | |
356 | additional move to decreasing the number of available registers. | |
357 | (TARGET_REG_COST). | |
358 | 2) If no registers are available, we need to spill, which may require | |
359 | storing the old value to memory and loading it back | |
360 | (TARGET_SPILL_COST). */ | |
361 | ||
362 | start_sequence (); | |
363 | emit_move_insn (reg1, reg2); | |
364 | seq = get_insns (); | |
365 | end_sequence (); | |
366 | target_reg_cost [speed] = seq_cost (seq, speed); | |
367 | ||
368 | start_sequence (); | |
369 | emit_move_insn (mem, reg1); | |
370 | emit_move_insn (reg2, mem); | |
371 | seq = get_insns (); | |
372 | end_sequence (); | |
373 | target_spill_cost [speed] = seq_cost (seq, speed); | |
374 | } | |
375 | default_rtl_profile (); | |
5e962776 ZD |
376 | } |
377 | ||
a154b43a ZD |
378 | /* Estimates cost of increased register pressure caused by making N_NEW new |
379 | registers live around the loop. N_OLD is the number of registers live | |
380 | around the loop. */ | |
5e962776 ZD |
381 | |
382 | unsigned | |
f40751dd | 383 | estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) |
5e962776 | 384 | { |
058e97ec | 385 | unsigned cost; |
a154b43a | 386 | unsigned regs_needed = n_new + n_old; |
5e962776 | 387 | |
a154b43a ZD |
388 | /* If we have enough registers, we should use them and not restrict |
389 | the transformations unnecessarily. */ | |
8b11a64c | 390 | if (regs_needed + target_res_regs <= target_avail_regs) |
a154b43a ZD |
391 | return 0; |
392 | ||
a154b43a | 393 | if (regs_needed <= target_avail_regs) |
058e97ec VM |
394 | /* If we are close to running out of registers, try to preserve |
395 | them. */ | |
f40751dd | 396 | cost = target_reg_cost [speed] * n_new; |
058e97ec VM |
397 | else |
398 | /* If we run out of registers, it is very expensive to add another | |
399 | one. */ | |
f40751dd | 400 | cost = target_spill_cost [speed] * n_new; |
058e97ec | 401 | |
2af2dbdc VM |
402 | if (optimize && (flag_ira_region == IRA_REGION_ALL |
403 | || flag_ira_region == IRA_REGION_MIXED) | |
058e97ec VM |
404 | && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM) |
405 | /* IRA regional allocation deals with high register pressure | |
406 | better. So decrease the cost (to do more accurate the cost | |
407 | calculation for IRA, we need to know how many registers lives | |
408 | through the loop transparently). */ | |
409 | cost /= 2; | |
410 | ||
411 | return cost; | |
5e962776 ZD |
412 | } |
413 | ||
d73be268 | 414 | /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ |
70388d94 ZD |
415 | |
416 | void | |
d73be268 | 417 | mark_loop_exit_edges (void) |
70388d94 ZD |
418 | { |
419 | basic_block bb; | |
420 | edge e; | |
c22cacf3 | 421 | |
d51157de | 422 | if (number_of_loops () <= 1) |
70388d94 ZD |
423 | return; |
424 | ||
425 | FOR_EACH_BB (bb) | |
426 | { | |
427 | edge_iterator ei; | |
428 | ||
70388d94 ZD |
429 | FOR_EACH_EDGE (e, ei, bb->succs) |
430 | { | |
9ba025a2 | 431 | if (loop_outer (bb->loop_father) |
2ff3e325 | 432 | && loop_exit_edge_p (bb->loop_father, e)) |
70388d94 ZD |
433 | e->flags |= EDGE_LOOP_EXIT; |
434 | else | |
435 | e->flags &= ~EDGE_LOOP_EXIT; | |
436 | } | |
437 | } | |
438 | } | |
439 |