]>
Commit | Line | Data |
---|---|---|
3d436d2a | 1 | /* Natural loop analysis code for GNU compiler. |
aeee4812 | 2 | Copyright (C) 2002-2023 Free Software Foundation, Inc. |
3d436d2a ZD |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 8 | Software Foundation; either version 3, or (at your option) any later |
3d436d2a ZD |
9 | version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
3d436d2a ZD |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
c7131fb2 | 23 | #include "backend.h" |
3d436d2a | 24 | #include "rtl.h" |
c7131fb2 | 25 | #include "tree.h" |
957060b5 | 26 | #include "predict.h" |
4d0cdd0c | 27 | #include "memmodel.h" |
957060b5 AM |
28 | #include "emit-rtl.h" |
29 | #include "cfgloop.h" | |
36566b39 | 30 | #include "explow.h" |
3d436d2a | 31 | #include "expr.h" |
66f97d31 | 32 | #include "graphds.h" |
692dc070 | 33 | #include "sreal.h" |
43b484fb RS |
34 | #include "regs.h" |
35 | #include "function-abi.h" | |
3d436d2a | 36 | |
4391924a RS |
37 | struct target_cfgloop default_target_cfgloop; |
38 | #if SWITCHABLE_TARGET | |
39 | struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop; | |
40 | #endif | |
41 | ||
3d436d2a | 42 | /* Checks whether BB is executed exactly once in each LOOP iteration. */ |
f2dca510 | 43 | |
3d436d2a | 44 | bool |
99b1c316 | 45 | just_once_each_iteration_p (const class loop *loop, const_basic_block bb) |
3d436d2a ZD |
46 | { |
47 | /* It must be executed at least once each iteration. */ | |
d47cc544 | 48 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
3d436d2a ZD |
49 | return false; |
50 | ||
51 | /* And just once. */ | |
52 | if (bb->loop_father != loop) | |
53 | return false; | |
54 | ||
55 | /* But this was not enough. We might have some irreducible loop here. */ | |
56 | if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
57 | return false; | |
58 | ||
59 | return true; | |
60 | } | |
61 | ||
35b07080 ZD |
62 | /* Marks blocks and edges that are part of non-recognized loops; i.e. we |
63 | throw away all latch edges and mark blocks inside any remaining cycle. | |
64 | Everything is a bit complicated due to fact we do not want to do this | |
65 | for parts of cycles that only "pass" through some loop -- i.e. for | |
66 | each cycle, we want to mark blocks that belong directly to innermost | |
cfbe3efe | 67 | loop containing the whole cycle. |
c22cacf3 | 68 | |
cfbe3efe ZD |
69 | LOOPS is the loop tree. */ |
70 | ||
8b1c6fd7 | 71 | #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun)) |
cfbe3efe ZD |
72 | #define BB_REPR(BB) ((BB)->index + 1) |
73 | ||
2de58650 | 74 | bool |
d73be268 | 75 | mark_irreducible_loops (void) |
3d436d2a | 76 | { |
3d436d2a | 77 | basic_block act; |
2de58650 | 78 | struct graph_edge *ge; |
cfbe3efe | 79 | edge e; |
628f6a4e | 80 | edge_iterator ei; |
66f97d31 ZD |
81 | int src, dest; |
82 | unsigned depth; | |
cfbe3efe | 83 | struct graph *g; |
0fc822d0 | 84 | int num = number_of_loops (cfun); |
99b1c316 | 85 | class loop *cloop; |
2de58650 JH |
86 | bool irred_loop_found = false; |
87 | int i; | |
3d436d2a | 88 | |
d51157de ZD |
89 | gcc_assert (current_loops != NULL); |
90 | ||
35b07080 | 91 | /* Reset the flags. */ |
fefa31b5 DM |
92 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
93 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
35b07080 ZD |
94 | { |
95 | act->flags &= ~BB_IRREDUCIBLE_LOOP; | |
628f6a4e | 96 | FOR_EACH_EDGE (e, ei, act->succs) |
35b07080 ZD |
97 | e->flags &= ~EDGE_IRREDUCIBLE_LOOP; |
98 | } | |
99 | ||
3d436d2a | 100 | /* Create the edge lists. */ |
8b1c6fd7 | 101 | g = new_graph (last_basic_block_for_fn (cfun) + num); |
cfbe3efe | 102 | |
fefa31b5 DM |
103 | FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
104 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
628f6a4e | 105 | FOR_EACH_EDGE (e, ei, act->succs) |
3d436d2a | 106 | { |
c22cacf3 | 107 | /* Ignore edges to exit. */ |
fefa31b5 | 108 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
3d436d2a | 109 | continue; |
cfbe3efe | 110 | |
598ec7bd ZD |
111 | src = BB_REPR (act); |
112 | dest = BB_REPR (e->dest); | |
cfbe3efe | 113 | |
d51157de ZD |
114 | /* Ignore latch edges. */ |
115 | if (e->dest->loop_father->header == e->dest | |
dfef1164 | 116 | && dominated_by_p (CDI_DOMINATORS, act, e->dest)) |
d51157de ZD |
117 | continue; |
118 | ||
119 | /* Edges inside a single loop should be left where they are. Edges | |
120 | to subloop headers should lead to representative of the subloop, | |
121 | but from the same place. | |
cfbe3efe | 122 | |
d51157de ZD |
123 | Edges exiting loops should lead from representative |
124 | of the son of nearest common ancestor of the loops in that | |
125 | act lays. */ | |
126 | ||
127 | if (e->dest->loop_father->header == e->dest) | |
128 | dest = LOOP_REPR (e->dest->loop_father); | |
129 | ||
130 | if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) | |
131 | { | |
132 | depth = 1 + loop_depth (find_common_loop (act->loop_father, | |
133 | e->dest->loop_father)); | |
134 | if (depth == loop_depth (act->loop_father)) | |
135 | cloop = act->loop_father; | |
136 | else | |
9771b263 | 137 | cloop = (*act->loop_father->superloops)[depth]; |
d51157de ZD |
138 | |
139 | src = LOOP_REPR (cloop); | |
3d436d2a | 140 | } |
cfbe3efe | 141 | |
66f97d31 | 142 | add_edge (g, src, dest)->data = e; |
3d436d2a ZD |
143 | } |
144 | ||
66f97d31 ZD |
145 | /* Find the strongly connected components. */ |
146 | graphds_scc (g, NULL); | |
3d436d2a | 147 | |
cfbe3efe | 148 | /* Mark the irreducible loops. */ |
2de58650 JH |
149 | for (i = 0; i < g->n_vertices; i++) |
150 | for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) | |
151 | { | |
152 | edge real = (edge) ge->data; | |
153 | /* edge E in graph G is irreducible if it connects two vertices in the | |
154 | same scc. */ | |
155 | ||
156 | /* All edges should lead from a component with higher number to the | |
157 | one with lower one. */ | |
158 | gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); | |
159 | ||
160 | if (g->vertices[ge->src].component != g->vertices[ge->dest].component) | |
161 | continue; | |
162 | ||
163 | real->flags |= EDGE_IRREDUCIBLE_LOOP; | |
164 | irred_loop_found = true; | |
165 | if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) | |
166 | real->src->flags |= BB_IRREDUCIBLE_LOOP; | |
167 | } | |
3d436d2a | 168 | |
cfbe3efe | 169 | free_graph (g); |
3d436d2a | 170 | |
f87000d0 | 171 | loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); |
2de58650 | 172 | return irred_loop_found; |
3d436d2a ZD |
173 | } |
174 | ||
175 | /* Counts number of insns inside LOOP. */ | |
176 | int | |
99b1c316 | 177 | num_loop_insns (const class loop *loop) |
3d436d2a ZD |
178 | { |
179 | basic_block *bbs, bb; | |
180 | unsigned i, ninsns = 0; | |
1f75b71e | 181 | rtx_insn *insn; |
3d436d2a ZD |
182 | |
183 | bbs = get_loop_body (loop); | |
184 | for (i = 0; i < loop->num_nodes; i++) | |
185 | { | |
186 | bb = bbs[i]; | |
b5b8b0ac AO |
187 | FOR_BB_INSNS (bb, insn) |
188 | if (NONDEBUG_INSN_P (insn)) | |
91f4cfe3 | 189 | ninsns++; |
3d436d2a | 190 | } |
53a51cef JJ |
191 | free (bbs); |
192 | ||
193 | if (!ninsns) | |
194 | ninsns = 1; /* To avoid division by zero. */ | |
d329e058 | 195 | |
3d436d2a ZD |
196 | return ninsns; |
197 | } | |
198 | ||
199 | /* Counts number of insns executed on average per iteration LOOP. */ | |
200 | int | |
99b1c316 | 201 | average_num_loop_insns (const class loop *loop) |
3d436d2a ZD |
202 | { |
203 | basic_block *bbs, bb; | |
692dc070 JH |
204 | unsigned i, binsns; |
205 | sreal ninsns; | |
1f75b71e | 206 | rtx_insn *insn; |
3d436d2a ZD |
207 | |
208 | ninsns = 0; | |
209 | bbs = get_loop_body (loop); | |
210 | for (i = 0; i < loop->num_nodes; i++) | |
211 | { | |
212 | bb = bbs[i]; | |
213 | ||
b5b8b0ac AO |
214 | binsns = 0; |
215 | FOR_BB_INSNS (bb, insn) | |
216 | if (NONDEBUG_INSN_P (insn)) | |
91f4cfe3 | 217 | binsns++; |
3d436d2a | 218 | |
692dc070 JH |
219 | ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count); |
220 | /* Avoid overflows. */ | |
221 | if (ninsns > 1000000) | |
b38e86dd KL |
222 | { |
223 | free (bbs); | |
224 | return 1000000; | |
225 | } | |
3d436d2a | 226 | } |
53a51cef | 227 | free (bbs); |
d329e058 | 228 | |
692dc070 JH |
229 | int64_t ret = ninsns.to_int (); |
230 | if (!ret) | |
231 | ret = 1; /* To avoid division by zero. */ | |
3d436d2a | 232 | |
692dc070 | 233 | return ret; |
3d436d2a ZD |
234 | } |
235 | ||
89619f87 JH |
236 | /* Return true if BB profile can be used to determine the expected number of |
237 | iterations (that is number of executions of latch edge(s) for each | |
238 | entry of the loop. If this is the case initialize RET with the number | |
239 | of iterations. | |
65739a68 | 240 | |
89619f87 JH |
241 | RELIABLE is set if profile indiates that the returned value should be |
242 | realistic estimate. (This is the case if we read profile and did not | |
243 | messed it up yet and not the case of guessed profiles.) | |
ac84e05e | 244 | |
89619f87 JH |
245 | This function uses only CFG profile. We track more reliable info in |
246 | loop_info structure and for loop optimization heuristics more relevant | |
247 | is get_estimated_loop_iterations API. */ | |
248 | ||
249 | bool | |
250 | expected_loop_iterations_by_profile (const class loop *loop, sreal *ret, | |
251 | bool *reliable) | |
3d436d2a | 252 | { |
89619f87 JH |
253 | profile_count header_count = loop->header->count; |
254 | if (reliable) | |
255 | *reliable = false; | |
256 | ||
257 | /* TODO: For single exit loops we can use loop exit edge probability. | |
258 | It also may be reliable while loop itself was adjusted. */ | |
259 | if (!header_count.initialized_p () | |
260 | || !header_count.nonzero_p ()) | |
261 | return false; | |
262 | ||
263 | profile_count count_in = profile_count::zero (); | |
3d436d2a | 264 | edge e; |
628f6a4e | 265 | edge_iterator ei; |
97c53806 | 266 | |
89619f87 JH |
267 | /* For single-latch loops avoid querying dominators. */ |
268 | if (loop->latch) | |
3d436d2a | 269 | { |
89619f87 | 270 | bool found = false; |
628f6a4e | 271 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
89619f87 | 272 | if (e->src != loop->latch) |
ef30ab83 | 273 | count_in += e->count (); |
89619f87 JH |
274 | else |
275 | found = true; | |
276 | /* If latch is not found, loop is inconsistent. */ | |
277 | gcc_checking_assert (found); | |
278 | } | |
279 | else | |
280 | FOR_EACH_EDGE (e, ei, loop->header->preds) | |
281 | if (!dominated_by_p (CDI_DOMINATORS, e->src, loop->header)) | |
282 | count_in += e->count (); | |
283 | ||
284 | bool known; | |
285 | /* Number of iterations is number of executions of latch edge. */ | |
286 | *ret = (header_count - count_in).to_sreal_scale (count_in, &known); | |
287 | if (!known) | |
288 | return false; | |
289 | if (reliable) | |
290 | { | |
291 | /* Header should have at least count_in many executions. | |
292 | Give up on clearly inconsistent profile. */ | |
293 | if (header_count < count_in && header_count.differs_from_p (count_in)) | |
65739a68 | 294 | { |
89619f87 JH |
295 | if (dump_file && (dump_flags & TDF_DETAILS)) |
296 | fprintf (dump_file, "Inconsistent bb profile of loop %i\n", | |
297 | loop->num); | |
298 | *reliable = false; | |
65739a68 | 299 | } |
bade3a00 | 300 | else |
89619f87 | 301 | *reliable = count_in.reliable_p () && header_count.reliable_p (); |
3d436d2a | 302 | } |
89619f87 JH |
303 | return true; |
304 | } | |
305 | ||
ea272814 JH |
306 | /* Return true if loop CFG profile may be unrealistically flat. |
307 | This is a common case, since average loops iterate only about 5 times. | |
308 | In the case we do not have profile feedback or do not know real number of | |
309 | iterations during profile estimation, we are likely going to predict it with | |
310 | similar low iteration count. For static loop profiles we also artificially | |
311 | cap profile of loops with known large iteration count so they do not appear | |
312 | significantly more hot than other loops with unknown iteration counts. | |
313 | ||
314 | For loop optimization heuristics we ignore CFG profile and instead | |
315 | use get_estimated_loop_iterations API which returns estimate | |
316 | only when it is realistic. For unknown counts some optimizations, | |
317 | like vectorizer or unroller make guess that iteration count will | |
318 | be large. In this case we need to avoid scaling down the profile | |
319 | after the loop transform. */ | |
320 | ||
321 | bool | |
322 | maybe_flat_loop_profile (const class loop *loop) | |
323 | { | |
324 | bool reliable; | |
325 | sreal ret; | |
326 | ||
327 | if (!expected_loop_iterations_by_profile (loop, &ret, &reliable)) | |
328 | return true; | |
329 | ||
330 | /* Reliable CFG estimates ought never be flat. Sanity check with | |
331 | nb_iterations_estimate. If those differ, it is a but in profile | |
332 | updating code */ | |
333 | if (reliable) | |
334 | { | |
335 | int64_t intret = ret.to_nearest_int (); | |
336 | if (loop->any_estimate | |
337 | && (wi::ltu_p (intret * 2, loop->nb_iterations_estimate) | |
338 | || wi::gtu_p (intret, loop->nb_iterations_estimate * 2))) | |
339 | { | |
340 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
341 | fprintf (dump_file, | |
342 | "Loop %i has inconsistent iterations estimates: " | |
343 | "reliable CFG based iteration estimate is %f " | |
344 | "while nb_iterations_estimate is %i\n", | |
345 | loop->num, | |
346 | ret.to_double (), | |
347 | (int)loop->nb_iterations_estimate.to_shwi ()); | |
348 | return true; | |
349 | } | |
350 | return false; | |
351 | } | |
352 | ||
353 | /* Allow some margin of error and see if we are close to known bounds. | |
354 | sreal (9,-3) is 9/8 */ | |
355 | int64_t intret = (ret * sreal (9, -3)).to_nearest_int (); | |
356 | if (loop->any_upper_bound && wi::geu_p (intret, loop->nb_iterations_upper_bound)) | |
357 | return false; | |
358 | if (loop->any_likely_upper_bound | |
359 | && wi::geu_p (intret, loop->nb_iterations_likely_upper_bound)) | |
360 | return false; | |
361 | if (loop->any_estimate | |
362 | && wi::geu_p (intret, loop->nb_iterations_estimate)) | |
363 | return false; | |
364 | return true; | |
365 | } | |
366 | ||
89619f87 JH |
367 | /* Returns expected number of iterations of LOOP, according to |
368 | measured or guessed profile. | |
369 | ||
370 | This functions attempts to return "sane" value even if profile | |
371 | information is not good enough to derive osmething. */ | |
372 | ||
373 | gcov_type | |
374 | expected_loop_iterations_unbounded (const class loop *loop, | |
375 | bool *read_profile_p) | |
376 | { | |
377 | gcov_type expected = -1; | |
378 | ||
379 | if (read_profile_p) | |
380 | *read_profile_p = false; | |
381 | ||
382 | sreal sreal_expected; | |
383 | if (expected_loop_iterations_by_profile | |
384 | (loop, &sreal_expected, read_profile_p)) | |
feeee840 | 385 | expected = sreal_expected.to_nearest_int (); |
e7a74006 | 386 | else |
89619f87 | 387 | expected = param_avg_loop_niter; |
97c53806 | 388 | |
89619f87 JH |
389 | HOST_WIDE_INT max = get_max_loop_iterations_int (loop); |
390 | if (max != -1 && max < expected) | |
391 | return max; | |
e7a74006 | 392 | |
97c53806 | 393 | return expected; |
3d436d2a | 394 | } |
689ba89d | 395 | |
ac84e05e ZD |
396 | /* Returns expected number of LOOP iterations. The returned value is bounded |
397 | by REG_BR_PROB_BASE. */ | |
398 | ||
399 | unsigned | |
99b1c316 | 400 | expected_loop_iterations (class loop *loop) |
ac84e05e ZD |
401 | { |
402 | gcov_type expected = expected_loop_iterations_unbounded (loop); | |
403 | return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); | |
404 | } | |
405 | ||
689ba89d ZD |
406 | /* Returns the maximum level of nesting of subloops of LOOP. */ |
407 | ||
408 | unsigned | |
99b1c316 | 409 | get_loop_level (const class loop *loop) |
689ba89d | 410 | { |
99b1c316 | 411 | const class loop *ploop; |
689ba89d ZD |
412 | unsigned mx = 0, l; |
413 | ||
414 | for (ploop = loop->inner; ploop; ploop = ploop->next) | |
415 | { | |
416 | l = get_loop_level (ploop); | |
417 | if (l >= mx) | |
418 | mx = l + 1; | |
419 | } | |
420 | return mx; | |
421 | } | |
5e962776 | 422 | |
5e962776 ZD |
423 | /* Initialize the constants for computing set costs. */ |
424 | ||
425 | void | |
426 | init_set_costs (void) | |
427 | { | |
f40751dd | 428 | int speed; |
1f75b71e | 429 | rtx_insn *seq; |
c3dc5e66 RS |
430 | rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1); |
431 | rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2); | |
432 | rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3); | |
5e962776 ZD |
433 | rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); |
434 | unsigned i; | |
435 | ||
b5deb7b6 | 436 | target_avail_regs = 0; |
bec922f0 | 437 | target_clobbered_regs = 0; |
5e962776 ZD |
438 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
439 | if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | |
440 | && !fixed_regs[i]) | |
bec922f0 SL |
441 | { |
442 | target_avail_regs++; | |
43b484fb RS |
443 | /* ??? This is only a rough heuristic. It doesn't cope well |
444 | with alternative ABIs, but that's an optimization rather than | |
445 | correctness issue. */ | |
446 | if (default_function_abi.clobbers_full_reg_p (i)) | |
bec922f0 SL |
447 | target_clobbered_regs++; |
448 | } | |
5e962776 | 449 | |
8b11a64c | 450 | target_res_regs = 3; |
5e962776 | 451 | |
f40751dd JH |
452 | for (speed = 0; speed < 2; speed++) |
453 | { | |
454 | crtl->maybe_hot_insn_p = speed; | |
455 | /* Set up the costs for using extra registers: | |
456 | ||
457 | 1) If not many free registers remain, we should prefer having an | |
458 | additional move to decreasing the number of available registers. | |
459 | (TARGET_REG_COST). | |
460 | 2) If no registers are available, we need to spill, which may require | |
461 | storing the old value to memory and loading it back | |
462 | (TARGET_SPILL_COST). */ | |
463 | ||
464 | start_sequence (); | |
465 | emit_move_insn (reg1, reg2); | |
466 | seq = get_insns (); | |
467 | end_sequence (); | |
468 | target_reg_cost [speed] = seq_cost (seq, speed); | |
469 | ||
470 | start_sequence (); | |
471 | emit_move_insn (mem, reg1); | |
472 | emit_move_insn (reg2, mem); | |
473 | seq = get_insns (); | |
474 | end_sequence (); | |
475 | target_spill_cost [speed] = seq_cost (seq, speed); | |
476 | } | |
477 | default_rtl_profile (); | |
5e962776 ZD |
478 | } |
479 | ||
a154b43a ZD |
480 | /* Estimates cost of increased register pressure caused by making N_NEW new |
481 | registers live around the loop. N_OLD is the number of registers live | |
bec922f0 SL |
482 | around the loop. If CALL_P is true, also take into account that |
483 | call-used registers may be clobbered in the loop body, reducing the | |
484 | number of available registers before we spill. */ | |
5e962776 ZD |
485 | |
486 | unsigned | |
bec922f0 SL |
487 | estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, |
488 | bool call_p) | |
5e962776 | 489 | { |
058e97ec | 490 | unsigned cost; |
a154b43a | 491 | unsigned regs_needed = n_new + n_old; |
bec922f0 SL |
492 | unsigned available_regs = target_avail_regs; |
493 | ||
494 | /* If there is a call in the loop body, the call-clobbered registers | |
495 | are not available for loop invariants. */ | |
496 | if (call_p) | |
497 | available_regs = available_regs - target_clobbered_regs; | |
5e962776 | 498 | |
a154b43a ZD |
499 | /* If we have enough registers, we should use them and not restrict |
500 | the transformations unnecessarily. */ | |
bec922f0 | 501 | if (regs_needed + target_res_regs <= available_regs) |
a154b43a ZD |
502 | return 0; |
503 | ||
bec922f0 | 504 | if (regs_needed <= available_regs) |
058e97ec VM |
505 | /* If we are close to running out of registers, try to preserve |
506 | them. */ | |
f40751dd | 507 | cost = target_reg_cost [speed] * n_new; |
058e97ec VM |
508 | else |
509 | /* If we run out of registers, it is very expensive to add another | |
510 | one. */ | |
f40751dd | 511 | cost = target_spill_cost [speed] * n_new; |
058e97ec | 512 | |
2af2dbdc VM |
513 | if (optimize && (flag_ira_region == IRA_REGION_ALL |
514 | || flag_ira_region == IRA_REGION_MIXED) | |
028d4092 | 515 | && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num) |
058e97ec VM |
516 | /* IRA regional allocation deals with high register pressure |
517 | better. So decrease the cost (to do more accurate the cost | |
518 | calculation for IRA, we need to know how many registers lives | |
519 | through the loop transparently). */ | |
520 | cost /= 2; | |
521 | ||
522 | return cost; | |
5e962776 ZD |
523 | } |
524 | ||
d73be268 | 525 | /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ |
70388d94 ZD |
526 | |
527 | void | |
d73be268 | 528 | mark_loop_exit_edges (void) |
70388d94 ZD |
529 | { |
530 | basic_block bb; | |
531 | edge e; | |
c22cacf3 | 532 | |
0fc822d0 | 533 | if (number_of_loops (cfun) <= 1) |
70388d94 ZD |
534 | return; |
535 | ||
11cd3bed | 536 | FOR_EACH_BB_FN (bb, cfun) |
70388d94 ZD |
537 | { |
538 | edge_iterator ei; | |
539 | ||
70388d94 ZD |
540 | FOR_EACH_EDGE (e, ei, bb->succs) |
541 | { | |
9ba025a2 | 542 | if (loop_outer (bb->loop_father) |
2ff3e325 | 543 | && loop_exit_edge_p (bb->loop_father, e)) |
70388d94 ZD |
544 | e->flags |= EDGE_LOOP_EXIT; |
545 | else | |
546 | e->flags &= ~EDGE_LOOP_EXIT; | |
547 | } | |
548 | } | |
549 | } | |
550 | ||
f9bf4777 JH |
551 | /* Return exit edge if loop has only one exit that is likely |
552 | to be executed on runtime (i.e. it is not EH or leading | |
553 | to noreturn call. */ | |
554 | ||
555 | edge | |
00dcc88a | 556 | single_likely_exit (class loop *loop, const vec<edge> &exits) |
f9bf4777 JH |
557 | { |
558 | edge found = single_exit (loop); | |
f9bf4777 JH |
559 | unsigned i; |
560 | edge ex; | |
561 | ||
562 | if (found) | |
563 | return found; | |
9771b263 | 564 | FOR_EACH_VEC_ELT (exits, i, ex) |
f9bf4777 | 565 | { |
af2bbc51 JH |
566 | if (probably_never_executed_edge_p (cfun, ex) |
567 | /* We want to rule out paths to noreturns but not low probabilities | |
568 | resulting from adjustments or combining. | |
569 | FIXME: once we have better quality tracking, make this more | |
570 | robust. */ | |
571 | || ex->probability <= profile_probability::very_unlikely ()) | |
f9bf4777 JH |
572 | continue; |
573 | if (!found) | |
574 | found = ex; | |
575 | else | |
f10d2d85 | 576 | return NULL; |
f9bf4777 | 577 | } |
f9bf4777 JH |
578 | return found; |
579 | } | |
519cac4a JH |
580 | |
581 | ||
582 | /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs | |
583 | order against direction of edges from latch. Specially, if | |
584 | header != latch, latch is the 1-st block. */ | |
585 | ||
a165040e | 586 | auto_vec<basic_block> |
99b1c316 | 587 | get_loop_hot_path (const class loop *loop) |
519cac4a JH |
588 | { |
589 | basic_block bb = loop->header; | |
caeb8892 | 590 | auto_vec<basic_block> path; |
519cac4a JH |
591 | bitmap visited = BITMAP_ALLOC (NULL); |
592 | ||
593 | while (true) | |
594 | { | |
595 | edge_iterator ei; | |
596 | edge e; | |
597 | edge best = NULL; | |
598 | ||
9771b263 | 599 | path.safe_push (bb); |
519cac4a JH |
600 | bitmap_set_bit (visited, bb->index); |
601 | FOR_EACH_EDGE (e, ei, bb->succs) | |
602 | if ((!best || e->probability > best->probability) | |
603 | && !loop_exit_edge_p (loop, e) | |
604 | && !bitmap_bit_p (visited, e->dest->index)) | |
605 | best = e; | |
606 | if (!best || best->dest == loop->header) | |
607 | break; | |
608 | bb = best->dest; | |
609 | } | |
610 | BITMAP_FREE (visited); | |
611 | return path; | |
612 | } |