]>
Commit | Line | Data |
---|---|---|
f1ebdfc5 | 1 | /* Branch prediction routines for the GNU compiler. |
05e643de | 2 | Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. |
f1ebdfc5 | 3 | |
bfdade77 | 4 | This file is part of GCC. |
f1ebdfc5 | 5 | |
bfdade77 RK |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 2, or (at your option) any later | |
9 | version. | |
f1ebdfc5 | 10 | |
bfdade77 RK |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
f1ebdfc5 | 15 | |
bfdade77 RK |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING. If not, write to the Free | |
18 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
19 | 02111-1307, USA. */ | |
f1ebdfc5 JE |
20 | |
21 | /* References: | |
22 | ||
23 | [1] "Branch Prediction for Free" | |
24 | Ball and Larus; PLDI '93. | |
25 | [2] "Static Branch Frequency and Program Profile Analysis" | |
26 | Wu and Larus; MICRO-27. | |
27 | [3] "Corpus-based Static Branch Prediction" | |
3ef42a0c | 28 | Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */ |
f1ebdfc5 JE |
29 | |
30 | ||
31 | #include "config.h" | |
32 | #include "system.h" | |
4977bab6 ZW |
33 | #include "coretypes.h" |
34 | #include "tm.h" | |
f1ebdfc5 JE |
35 | #include "tree.h" |
36 | #include "rtl.h" | |
37 | #include "tm_p.h" | |
efc9bd41 | 38 | #include "hard-reg-set.h" |
f1ebdfc5 JE |
39 | #include "basic-block.h" |
40 | #include "insn-config.h" | |
41 | #include "regs.h" | |
f1ebdfc5 JE |
42 | #include "flags.h" |
43 | #include "output.h" | |
44 | #include "function.h" | |
45 | #include "except.h" | |
46 | #include "toplev.h" | |
47 | #include "recog.h" | |
f1ebdfc5 | 48 | #include "expr.h" |
4db384c9 | 49 | #include "predict.h" |
d79f9ec9 | 50 | #include "coverage.h" |
ac5e69da | 51 | #include "sreal.h" |
194734e9 JH |
52 | #include "params.h" |
53 | #include "target.h" | |
355be0dc | 54 | #include "loop.h" |
3d436d2a | 55 | #include "cfgloop.h" |
8aa18a7d | 56 | |
fbe3b30b SB |
57 | /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, |
58 | 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ | |
ac5e69da JZ |
59 | static sreal real_zero, real_one, real_almost_one, real_br_prob_base, |
60 | real_inv_br_prob_base, real_one_half, real_bb_freq_max; | |
f1ebdfc5 | 61 | |
c66f079e | 62 | /* Random guesstimation given names. */ |
c66f079e | 63 | #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1) |
c66f079e | 64 | #define PROB_EVEN (REG_BR_PROB_BASE / 2) |
c66f079e RH |
65 | #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY) |
66 | #define PROB_ALWAYS (REG_BR_PROB_BASE) | |
f1ebdfc5 | 67 | |
79a490a9 AJ |
68 | static bool predicted_by_p (basic_block, enum br_predictor); |
69 | static void combine_predictions_for_insn (rtx, basic_block); | |
70 | static void dump_prediction (enum br_predictor, int, basic_block, int); | |
71 | static void estimate_loops_at_level (struct loop *loop); | |
72 | static void propagate_freq (struct loop *); | |
73 | static void estimate_bb_frequencies (struct loops *); | |
74 | static void counts_to_freqs (void); | |
75 | static void process_note_predictions (basic_block, int *, dominance_info, | |
76 | dominance_info); | |
77 | static void process_note_prediction (basic_block, int *, dominance_info, | |
78 | dominance_info, int, int); | |
79 | static bool last_basic_block_p (basic_block); | |
80 | static void compute_function_frequency (void); | |
81 | static void choose_function_section (void); | |
82 | static bool can_predict_insn_p (rtx); | |
ee92cb46 | 83 | |
4db384c9 JH |
84 | /* Information we hold about each branch predictor. |
85 | Filled using information from predict.def. */ | |
bfdade77 | 86 | |
4db384c9 | 87 | struct predictor_info |
ee92cb46 | 88 | { |
8b60264b KG |
89 | const char *const name; /* Name used in the debugging dumps. */ |
90 | const int hitrate; /* Expected hitrate used by | |
91 | predict_insn_def call. */ | |
92 | const int flags; | |
4db384c9 | 93 | }; |
ee92cb46 | 94 | |
134d3a2e JH |
95 | /* Use given predictor without Dempster-Shaffer theory if it matches |
96 | using first_match heuristics. */ | |
97 | #define PRED_FLAG_FIRST_MATCH 1 | |
98 | ||
99 | /* Recompute hitrate in percent to our representation. */ | |
100 | ||
bfdade77 | 101 | #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100) |
134d3a2e JH |
102 | |
103 | #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS}, | |
bfdade77 | 104 | static const struct predictor_info predictor_info[]= { |
4db384c9 JH |
105 | #include "predict.def" |
106 | ||
dc297297 | 107 | /* Upper bound on predictors. */ |
134d3a2e | 108 | {NULL, 0, 0} |
4db384c9 JH |
109 | }; |
110 | #undef DEF_PREDICTOR | |
194734e9 JH |
111 | |
112 | /* Return true in case BB can be CPU intensive and should be optimized | |
d55d8fc7 | 113 | for maximal performance. */ |
194734e9 JH |
114 | |
115 | bool | |
79a490a9 | 116 | maybe_hot_bb_p (basic_block bb) |
194734e9 | 117 | { |
cdb23767 | 118 | if (profile_info && flag_branch_probabilities |
194734e9 | 119 | && (bb->count |
cdb23767 | 120 | < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) |
194734e9 JH |
121 | return false; |
122 | if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) | |
123 | return false; | |
124 | return true; | |
125 | } | |
126 | ||
127 | /* Return true in case BB is cold and should be optimized for size. */ | |
128 | ||
129 | bool | |
79a490a9 | 130 | probably_cold_bb_p (basic_block bb) |
194734e9 | 131 | { |
cdb23767 | 132 | if (profile_info && flag_branch_probabilities |
194734e9 | 133 | && (bb->count |
cdb23767 | 134 | < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) |
194734e9 JH |
135 | return true; |
136 | if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) | |
137 | return true; | |
138 | return false; | |
139 | } | |
140 | ||
141 | /* Return true in case BB is probably never executed. */ | |
142 | bool | |
79a490a9 | 143 | probably_never_executed_bb_p (basic_block bb) |
194734e9 | 144 | { |
cdb23767 NS |
145 | if (profile_info && flag_branch_probabilities) |
146 | return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0; | |
194734e9 JH |
147 | return false; |
148 | } | |
149 | ||
969d70ca JH |
150 | /* Return true if the one of outgoing edges is already predicted by |
151 | PREDICTOR. */ | |
152 | ||
153 | static bool | |
79a490a9 | 154 | predicted_by_p (basic_block bb, enum br_predictor predictor) |
969d70ca JH |
155 | { |
156 | rtx note; | |
a813c111 | 157 | if (!INSN_P (BB_END (bb))) |
969d70ca | 158 | return false; |
a813c111 | 159 | for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1)) |
969d70ca JH |
160 | if (REG_NOTE_KIND (note) == REG_BR_PRED |
161 | && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) | |
162 | return true; | |
163 | return false; | |
164 | } | |
ee92cb46 | 165 | |
4db384c9 | 166 | void |
79a490a9 | 167 | predict_insn (rtx insn, enum br_predictor predictor, int probability) |
4db384c9 | 168 | { |
ee92cb46 JH |
169 | if (!any_condjump_p (insn)) |
170 | abort (); | |
d50672ef JH |
171 | if (!flag_guess_branch_prob) |
172 | return; | |
bfdade77 | 173 | |
ee92cb46 | 174 | REG_NOTES (insn) |
4db384c9 JH |
175 | = gen_rtx_EXPR_LIST (REG_BR_PRED, |
176 | gen_rtx_CONCAT (VOIDmode, | |
177 | GEN_INT ((int) predictor), | |
178 | GEN_INT ((int) probability)), | |
179 | REG_NOTES (insn)); | |
180 | } | |
181 | ||
182 | /* Predict insn by given predictor. */ | |
bfdade77 | 183 | |
4db384c9 | 184 | void |
79a490a9 AJ |
185 | predict_insn_def (rtx insn, enum br_predictor predictor, |
186 | enum prediction taken) | |
4db384c9 JH |
187 | { |
188 | int probability = predictor_info[(int) predictor].hitrate; | |
bfdade77 | 189 | |
4db384c9 JH |
190 | if (taken != TAKEN) |
191 | probability = REG_BR_PROB_BASE - probability; | |
bfdade77 | 192 | |
4db384c9 | 193 | predict_insn (insn, predictor, probability); |
ee92cb46 JH |
194 | } |
195 | ||
196 | /* Predict edge E with given probability if possible. */ | |
bfdade77 | 197 | |
4db384c9 | 198 | void |
79a490a9 | 199 | predict_edge (edge e, enum br_predictor predictor, int probability) |
ee92cb46 JH |
200 | { |
201 | rtx last_insn; | |
a813c111 | 202 | last_insn = BB_END (e->src); |
ee92cb46 JH |
203 | |
204 | /* We can store the branch prediction information only about | |
205 | conditional jumps. */ | |
206 | if (!any_condjump_p (last_insn)) | |
207 | return; | |
208 | ||
209 | /* We always store probability of branching. */ | |
210 | if (e->flags & EDGE_FALLTHRU) | |
211 | probability = REG_BR_PROB_BASE - probability; | |
212 | ||
4db384c9 JH |
213 | predict_insn (last_insn, predictor, probability); |
214 | } | |
215 | ||
2ffa9932 JH |
216 | /* Return true when we can store prediction on insn INSN. |
217 | At the moment we represent predictions only on conditional | |
218 | jumps, not at computed jump or other complicated cases. */ | |
219 | static bool | |
79a490a9 | 220 | can_predict_insn_p (rtx insn) |
2ffa9932 JH |
221 | { |
222 | return (GET_CODE (insn) == JUMP_INSN | |
223 | && any_condjump_p (insn) | |
224 | && BLOCK_FOR_INSN (insn)->succ->succ_next); | |
225 | } | |
226 | ||
4db384c9 | 227 | /* Predict edge E by given predictor if possible. */ |
bfdade77 | 228 | |
4db384c9 | 229 | void |
79a490a9 AJ |
230 | predict_edge_def (edge e, enum br_predictor predictor, |
231 | enum prediction taken) | |
4db384c9 JH |
232 | { |
233 | int probability = predictor_info[(int) predictor].hitrate; | |
234 | ||
235 | if (taken != TAKEN) | |
236 | probability = REG_BR_PROB_BASE - probability; | |
bfdade77 | 237 | |
4db384c9 JH |
238 | predict_edge (e, predictor, probability); |
239 | } | |
240 | ||
241 | /* Invert all branch predictions or probability notes in the INSN. This needs | |
242 | to be done each time we invert the condition used by the jump. */ | |
bfdade77 | 243 | |
4db384c9 | 244 | void |
79a490a9 | 245 | invert_br_probabilities (rtx insn) |
4db384c9 | 246 | { |
bfdade77 RK |
247 | rtx note; |
248 | ||
249 | for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) | |
250 | if (REG_NOTE_KIND (note) == REG_BR_PROB) | |
251 | XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0))); | |
252 | else if (REG_NOTE_KIND (note) == REG_BR_PRED) | |
253 | XEXP (XEXP (note, 0), 1) | |
254 | = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); | |
4db384c9 JH |
255 | } |
256 | ||
257 | /* Dump information about the branch prediction to the output file. */ | |
bfdade77 | 258 | |
4db384c9 | 259 | static void |
79a490a9 AJ |
260 | dump_prediction (enum br_predictor predictor, int probability, |
261 | basic_block bb, int used) | |
4db384c9 JH |
262 | { |
263 | edge e = bb->succ; | |
264 | ||
265 | if (!rtl_dump_file) | |
266 | return; | |
267 | ||
fbc2782e | 268 | while (e && (e->flags & EDGE_FALLTHRU)) |
4db384c9 JH |
269 | e = e->succ_next; |
270 | ||
d195b46f | 271 | fprintf (rtl_dump_file, " %s heuristics%s: %.1f%%", |
4db384c9 | 272 | predictor_info[predictor].name, |
bfdade77 | 273 | used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); |
4db384c9 JH |
274 | |
275 | if (bb->count) | |
25c3a4ef | 276 | { |
35d6d8c1 | 277 | fprintf (rtl_dump_file, " exec "); |
bfdade77 | 278 | fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, bb->count); |
fbc2782e DD |
279 | if (e) |
280 | { | |
281 | fprintf (rtl_dump_file, " hit "); | |
282 | fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, e->count); | |
283 | fprintf (rtl_dump_file, " (%.1f%%)", e->count * 100.0 / bb->count); | |
284 | } | |
25c3a4ef | 285 | } |
bfdade77 | 286 | |
4db384c9 JH |
287 | fprintf (rtl_dump_file, "\n"); |
288 | } | |
289 | ||
290 | /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB | |
291 | note if not already present. Remove now useless REG_BR_PRED notes. */ | |
bfdade77 | 292 | |
4db384c9 | 293 | static void |
79a490a9 | 294 | combine_predictions_for_insn (rtx insn, basic_block bb) |
4db384c9 JH |
295 | { |
296 | rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0); | |
297 | rtx *pnote = ®_NOTES (insn); | |
bfdade77 | 298 | rtx note; |
4db384c9 JH |
299 | int best_probability = PROB_EVEN; |
300 | int best_predictor = END_PREDICTORS; | |
134d3a2e JH |
301 | int combined_probability = REG_BR_PROB_BASE / 2; |
302 | int d; | |
d195b46f JH |
303 | bool first_match = false; |
304 | bool found = false; | |
4db384c9 JH |
305 | |
306 | if (rtl_dump_file) | |
44f49863 | 307 | fprintf (rtl_dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn), |
0b17ab2f | 308 | bb->index); |
4db384c9 JH |
309 | |
310 | /* We implement "first match" heuristics and use probability guessed | |
57cb6d52 | 311 | by predictor with smallest index. In the future we will use better |
4db384c9 | 312 | probability combination techniques. */ |
bfdade77 RK |
313 | for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) |
314 | if (REG_NOTE_KIND (note) == REG_BR_PRED) | |
315 | { | |
316 | int predictor = INTVAL (XEXP (XEXP (note, 0), 0)); | |
317 | int probability = INTVAL (XEXP (XEXP (note, 0), 1)); | |
318 | ||
319 | found = true; | |
320 | if (best_predictor > predictor) | |
321 | best_probability = probability, best_predictor = predictor; | |
322 | ||
323 | d = (combined_probability * probability | |
324 | + (REG_BR_PROB_BASE - combined_probability) | |
325 | * (REG_BR_PROB_BASE - probability)); | |
326 | ||
327 | /* Use FP math to avoid overflows of 32bit integers. */ | |
571a03b8 JJ |
328 | if (d == 0) |
329 | /* If one probability is 0% and one 100%, avoid division by zero. */ | |
330 | combined_probability = REG_BR_PROB_BASE / 2; | |
331 | else | |
332 | combined_probability = (((double) combined_probability) * probability | |
333 | * REG_BR_PROB_BASE / d + 0.5); | |
bfdade77 RK |
334 | } |
335 | ||
336 | /* Decide which heuristic to use. In case we didn't match anything, | |
337 | use no_prediction heuristic, in case we did match, use either | |
d195b46f JH |
338 | first match or Dempster-Shaffer theory depending on the flags. */ |
339 | ||
134d3a2e | 340 | if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) |
d195b46f JH |
341 | first_match = true; |
342 | ||
343 | if (!found) | |
344 | dump_prediction (PRED_NO_PREDICTION, combined_probability, bb, true); | |
345 | else | |
346 | { | |
bfdade77 | 347 | dump_prediction (PRED_DS_THEORY, combined_probability, bb, !first_match); |
d195b46f JH |
348 | dump_prediction (PRED_FIRST_MATCH, best_probability, bb, first_match); |
349 | } | |
350 | ||
351 | if (first_match) | |
134d3a2e | 352 | combined_probability = best_probability; |
d195b46f JH |
353 | dump_prediction (PRED_COMBINED, combined_probability, bb, true); |
354 | ||
355 | while (*pnote) | |
356 | { | |
357 | if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) | |
358 | { | |
359 | int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0)); | |
360 | int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); | |
361 | ||
362 | dump_prediction (predictor, probability, bb, | |
363 | !first_match || best_predictor == predictor); | |
6a4d6760 | 364 | *pnote = XEXP (*pnote, 1); |
d195b46f JH |
365 | } |
366 | else | |
6a4d6760 | 367 | pnote = &XEXP (*pnote, 1); |
d195b46f | 368 | } |
bfdade77 | 369 | |
4db384c9 JH |
370 | if (!prob_note) |
371 | { | |
372 | REG_NOTES (insn) | |
373 | = gen_rtx_EXPR_LIST (REG_BR_PROB, | |
134d3a2e | 374 | GEN_INT (combined_probability), REG_NOTES (insn)); |
bfdade77 | 375 | |
134d3a2e JH |
376 | /* Save the prediction into CFG in case we are seeing non-degenerated |
377 | conditional jump. */ | |
378 | if (bb->succ->succ_next) | |
379 | { | |
380 | BRANCH_EDGE (bb)->probability = combined_probability; | |
bfdade77 RK |
381 | FALLTHRU_EDGE (bb)->probability |
382 | = REG_BR_PROB_BASE - combined_probability; | |
134d3a2e | 383 | } |
4db384c9 | 384 | } |
ee92cb46 JH |
385 | } |
386 | ||
f1ebdfc5 JE |
387 | /* Statically estimate the probability that a branch will be taken. |
388 | ??? In the next revision there will be a number of other predictors added | |
389 | from the above references. Further, each heuristic will be factored out | |
390 | into its own function for clarity (and to facilitate the combination of | |
65169dcf | 391 | predictions). */ |
f1ebdfc5 JE |
392 | |
393 | void | |
79a490a9 | 394 | estimate_probability (struct loops *loops_info) |
f1ebdfc5 | 395 | { |
355be0dc | 396 | dominance_info dominators, post_dominators; |
e0082a72 | 397 | basic_block bb; |
3d436d2a | 398 | unsigned i; |
f1ebdfc5 | 399 | |
355be0dc JH |
400 | connect_infinite_loops_to_exit (); |
401 | dominators = calculate_dominance_info (CDI_DOMINATORS); | |
402 | post_dominators = calculate_dominance_info (CDI_POST_DOMINATORS); | |
0b92ff33 | 403 | |
65169dcf JE |
404 | /* Try to predict out blocks in a loop that are not part of a |
405 | natural loop. */ | |
2ecfd709 | 406 | for (i = 1; i < loops_info->num; i++) |
f1ebdfc5 | 407 | { |
2ecfd709 | 408 | basic_block bb, *bbs; |
3d436d2a | 409 | unsigned j; |
0dd0e980 | 410 | int exits; |
2ecfd709 | 411 | struct loop *loop = loops_info->parray[i]; |
3d436d2a ZD |
412 | struct loop_desc desc; |
413 | unsigned HOST_WIDE_INT niter; | |
f1ebdfc5 | 414 | |
0dd0e980 JH |
415 | flow_loop_scan (loops_info, loop, LOOP_EXIT_EDGES); |
416 | exits = loop->num_exits; | |
417 | ||
3d436d2a ZD |
418 | if (simple_loop_p (loops_info, loop, &desc) |
419 | && desc.const_iter) | |
420 | { | |
6efcd268 | 421 | int prob; |
3d436d2a ZD |
422 | niter = desc.niter + 1; |
423 | if (niter == 0) /* We might overflow here. */ | |
424 | niter = desc.niter; | |
425 | ||
6efcd268 JH |
426 | prob = (REG_BR_PROB_BASE |
427 | - (REG_BR_PROB_BASE + niter /2) / niter); | |
428 | /* Branch prediction algorithm gives 0 frequency for everything | |
429 | after the end of loop for loop having 0 probability to finish. */ | |
430 | if (prob == REG_BR_PROB_BASE) | |
431 | prob = REG_BR_PROB_BASE - 1; | |
3d436d2a | 432 | predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS, |
6efcd268 | 433 | prob); |
3d436d2a ZD |
434 | } |
435 | ||
2ecfd709 ZD |
436 | bbs = get_loop_body (loop); |
437 | for (j = 0; j < loop->num_nodes; j++) | |
438 | { | |
439 | int header_found = 0; | |
440 | edge e; | |
441 | ||
442 | bb = bbs[j]; | |
bfdade77 | 443 | |
969d70ca JH |
444 | /* Bypass loop heuristics on continue statement. These |
445 | statements construct loops via "non-loop" constructs | |
446 | in the source language and are better to be handled | |
447 | separately. */ | |
a813c111 | 448 | if (!can_predict_insn_p (BB_END (bb)) |
2ffa9932 | 449 | || predicted_by_p (bb, PRED_CONTINUE)) |
969d70ca JH |
450 | continue; |
451 | ||
2ecfd709 ZD |
452 | /* Loop branch heuristics - predict an edge back to a |
453 | loop's head as taken. */ | |
454 | for (e = bb->succ; e; e = e->succ_next) | |
455 | if (e->dest == loop->header | |
456 | && e->src == loop->latch) | |
457 | { | |
458 | header_found = 1; | |
459 | predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); | |
460 | } | |
bfdade77 | 461 | |
2ecfd709 | 462 | /* Loop exit heuristics - predict an edge exiting the loop if the |
d55d8fc7 | 463 | conditional has no loop header successors as not taken. */ |
2ecfd709 ZD |
464 | if (!header_found) |
465 | for (e = bb->succ; e; e = e->succ_next) | |
466 | if (e->dest->index < 0 | |
467 | || !flow_bb_inside_loop_p (loop, e->dest)) | |
468 | predict_edge | |
469 | (e, PRED_LOOP_EXIT, | |
470 | (REG_BR_PROB_BASE | |
471 | - predictor_info [(int) PRED_LOOP_EXIT].hitrate) | |
472 | / exits); | |
473 | } | |
f1ebdfc5 JE |
474 | } |
475 | ||
134d3a2e | 476 | /* Attempt to predict conditional jumps using a number of heuristics. */ |
e0082a72 | 477 | FOR_EACH_BB (bb) |
f1ebdfc5 | 478 | { |
a813c111 | 479 | rtx last_insn = BB_END (bb); |
f1ebdfc5 | 480 | rtx cond, earliest; |
152897b1 | 481 | edge e; |
f1ebdfc5 | 482 | |
2ffa9932 | 483 | if (! can_predict_insn_p (last_insn)) |
f1ebdfc5 | 484 | continue; |
9bcbfc52 | 485 | |
0b92ff33 JH |
486 | for (e = bb->succ; e; e = e->succ_next) |
487 | { | |
969d70ca JH |
488 | /* Predict early returns to be probable, as we've already taken |
489 | care for error returns and other are often used for fast paths | |
490 | trought function. */ | |
491 | if ((e->dest == EXIT_BLOCK_PTR | |
492 | || (e->dest->succ && !e->dest->succ->succ_next | |
493 | && e->dest->succ->dest == EXIT_BLOCK_PTR)) | |
494 | && !predicted_by_p (bb, PRED_NULL_RETURN) | |
495 | && !predicted_by_p (bb, PRED_CONST_RETURN) | |
496 | && !predicted_by_p (bb, PRED_NEGATIVE_RETURN) | |
497 | && !last_basic_block_p (e->dest)) | |
498 | predict_edge_def (e, PRED_EARLY_RETURN, TAKEN); | |
0b92ff33 JH |
499 | |
500 | /* Look for block we are guarding (ie we dominate it, | |
501 | but it doesn't postdominate us). */ | |
bfdade77 | 502 | if (e->dest != EXIT_BLOCK_PTR && e->dest != bb |
355be0dc JH |
503 | && dominated_by_p (dominators, e->dest, e->src) |
504 | && !dominated_by_p (post_dominators, e->src, e->dest)) | |
0b92ff33 JH |
505 | { |
506 | rtx insn; | |
bfdade77 | 507 | |
0b92ff33 JH |
508 | /* The call heuristic claims that a guarded function call |
509 | is improbable. This is because such calls are often used | |
510 | to signal exceptional situations such as printing error | |
511 | messages. */ | |
a813c111 | 512 | for (insn = BB_HEAD (e->dest); insn != NEXT_INSN (BB_END (e->dest)); |
0b92ff33 JH |
513 | insn = NEXT_INSN (insn)) |
514 | if (GET_CODE (insn) == CALL_INSN | |
515 | /* Constant and pure calls are hardly used to signalize | |
516 | something exceptional. */ | |
24a28584 | 517 | && ! CONST_OR_PURE_CALL_P (insn)) |
0b92ff33 JH |
518 | { |
519 | predict_edge_def (e, PRED_CALL, NOT_TAKEN); | |
520 | break; | |
521 | } | |
522 | } | |
523 | } | |
ee92cb46 | 524 | |
ec6ec6aa | 525 | cond = get_condition (last_insn, &earliest, false); |
ee92cb46 JH |
526 | if (! cond) |
527 | continue; | |
152897b1 | 528 | |
24c3bf68 JE |
529 | /* Try "pointer heuristic." |
530 | A comparison ptr == 0 is predicted as false. | |
531 | Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | |
0dd0e980 JH |
532 | if (GET_RTX_CLASS (GET_CODE (cond)) == '<' |
533 | && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0))) | |
534 | || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1))))) | |
bfdade77 RK |
535 | { |
536 | if (GET_CODE (cond) == EQ) | |
4db384c9 | 537 | predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN); |
bfdade77 | 538 | else if (GET_CODE (cond) == NE) |
4db384c9 | 539 | predict_insn_def (last_insn, PRED_POINTER, TAKEN); |
bfdade77 | 540 | } |
0dd0e980 | 541 | else |
bfdade77 | 542 | |
24c3bf68 JE |
543 | /* Try "opcode heuristic." |
544 | EQ tests are usually false and NE tests are usually true. Also, | |
f1ebdfc5 JE |
545 | most quantities are positive, so we can make the appropriate guesses |
546 | about signed comparisons against zero. */ | |
0dd0e980 JH |
547 | switch (GET_CODE (cond)) |
548 | { | |
549 | case CONST_INT: | |
550 | /* Unconditional branch. */ | |
551 | predict_insn_def (last_insn, PRED_UNCONDITIONAL, | |
552 | cond == const0_rtx ? NOT_TAKEN : TAKEN); | |
553 | break; | |
554 | ||
555 | case EQ: | |
556 | case UNEQ: | |
557 | /* Floating point comparisons appears to behave in a very | |
d55d8fc7 | 558 | unpredictable way because of special role of = tests in |
0dd0e980 JH |
559 | FP code. */ |
560 | if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) | |
561 | ; | |
562 | /* Comparisons with 0 are often used for booleans and there is | |
3d042e77 | 563 | nothing useful to predict about them. */ |
bfdade77 RK |
564 | else if (XEXP (cond, 1) == const0_rtx |
565 | || XEXP (cond, 0) == const0_rtx) | |
0dd0e980 JH |
566 | ; |
567 | else | |
568 | predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN); | |
569 | break; | |
bfdade77 | 570 | |
0dd0e980 JH |
571 | case NE: |
572 | case LTGT: | |
573 | /* Floating point comparisons appears to behave in a very | |
d55d8fc7 | 574 | unpredictable way because of special role of = tests in |
0dd0e980 JH |
575 | FP code. */ |
576 | if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) | |
577 | ; | |
578 | /* Comparisons with 0 are often used for booleans and there is | |
3d042e77 | 579 | nothing useful to predict about them. */ |
bfdade77 RK |
580 | else if (XEXP (cond, 1) == const0_rtx |
581 | || XEXP (cond, 0) == const0_rtx) | |
0dd0e980 JH |
582 | ; |
583 | else | |
584 | predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN); | |
585 | break; | |
bfdade77 | 586 | |
0dd0e980 JH |
587 | case ORDERED: |
588 | predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN); | |
589 | break; | |
bfdade77 | 590 | |
0dd0e980 JH |
591 | case UNORDERED: |
592 | predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN); | |
593 | break; | |
bfdade77 | 594 | |
0dd0e980 JH |
595 | case LE: |
596 | case LT: | |
597 | if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx | |
598 | || XEXP (cond, 1) == constm1_rtx) | |
599 | predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN); | |
600 | break; | |
bfdade77 | 601 | |
0dd0e980 JH |
602 | case GE: |
603 | case GT: | |
604 | if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx | |
605 | || XEXP (cond, 1) == constm1_rtx) | |
606 | predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN); | |
607 | break; | |
608 | ||
609 | default: | |
610 | break; | |
611 | } | |
f1ebdfc5 | 612 | } |
4db384c9 JH |
613 | |
614 | /* Attach the combined probability to each conditional jump. */ | |
e0082a72 | 615 | FOR_EACH_BB (bb) |
a813c111 SB |
616 | if (GET_CODE (BB_END (bb)) == JUMP_INSN |
617 | && any_condjump_p (BB_END (bb)) | |
e0082a72 | 618 | && bb->succ->succ_next != NULL) |
a813c111 | 619 | combine_predictions_for_insn (BB_END (bb), bb); |
4db384c9 | 620 | |
355be0dc JH |
621 | free_dominance_info (post_dominators); |
622 | free_dominance_info (dominators); | |
861f9cd0 | 623 | |
355be0dc | 624 | remove_fake_edges (); |
861f9cd0 | 625 | estimate_bb_frequencies (loops_info); |
f1ebdfc5 | 626 | } |
994a57cd | 627 | \f |
bfdade77 RK |
628 | /* __builtin_expect dropped tokens into the insn stream describing expected |
629 | values of registers. Generate branch probabilities based off these | |
630 | values. */ | |
f1ebdfc5 | 631 | |
994a57cd | 632 | void |
79a490a9 | 633 | expected_value_to_br_prob (void) |
994a57cd | 634 | { |
36244024 | 635 | rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX; |
994a57cd RH |
636 | |
637 | for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) | |
638 | { | |
10f13594 RH |
639 | switch (GET_CODE (insn)) |
640 | { | |
641 | case NOTE: | |
642 | /* Look for expected value notes. */ | |
643 | if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE) | |
644 | { | |
645 | ev = NOTE_EXPECTED_VALUE (insn); | |
646 | ev_reg = XEXP (ev, 0); | |
49778644 | 647 | delete_insn (insn); |
10f13594 RH |
648 | } |
649 | continue; | |
650 | ||
651 | case CODE_LABEL: | |
652 | /* Never propagate across labels. */ | |
653 | ev = NULL_RTX; | |
654 | continue; | |
994a57cd | 655 | |
10f13594 | 656 | case JUMP_INSN: |
a1f300c0 | 657 | /* Look for simple conditional branches. If we haven't got an |
10f13594 | 658 | expected value yet, no point going further. */ |
bfdade77 RK |
659 | if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX |
660 | || ! any_condjump_p (insn)) | |
10f13594 RH |
661 | continue; |
662 | break; | |
bfdade77 RK |
663 | |
664 | default: | |
665 | /* Look for insns that clobber the EV register. */ | |
666 | if (ev && reg_set_p (ev_reg, insn)) | |
667 | ev = NULL_RTX; | |
668 | continue; | |
10f13594 RH |
669 | } |
670 | ||
671 | /* Collect the branch condition, hopefully relative to EV_REG. */ | |
d9490f2f RH |
672 | /* ??? At present we'll miss things like |
673 | (expected_value (eq r70 0)) | |
674 | (set r71 -1) | |
675 | (set r80 (lt r70 r71)) | |
676 | (set pc (if_then_else (ne r80 0) ...)) | |
57cb6d52 | 677 | as canonicalize_condition will render this to us as |
d9490f2f RH |
678 | (lt r70, r71) |
679 | Could use cselib to try and reduce this further. */ | |
24ee7cae | 680 | cond = XEXP (SET_SRC (pc_set (insn)), 0); |
ec6ec6aa | 681 | cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg, false); |
bfdade77 | 682 | if (! cond || XEXP (cond, 0) != ev_reg |
d9490f2f | 683 | || GET_CODE (XEXP (cond, 1)) != CONST_INT) |
994a57cd RH |
684 | continue; |
685 | ||
57cb6d52 | 686 | /* Substitute and simplify. Given that the expression we're |
994a57cd RH |
687 | building involves two constants, we should wind up with either |
688 | true or false. */ | |
689 | cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode, | |
690 | XEXP (ev, 1), XEXP (cond, 1)); | |
691 | cond = simplify_rtx (cond); | |
692 | ||
693 | /* Turn the condition into a scaled branch probability. */ | |
1b28186a | 694 | if (cond != const_true_rtx && cond != const0_rtx) |
994a57cd | 695 | abort (); |
4db384c9 | 696 | predict_insn_def (insn, PRED_BUILTIN_EXPECT, |
1b28186a | 697 | cond == const_true_rtx ? TAKEN : NOT_TAKEN); |
994a57cd RH |
698 | } |
699 | } | |
861f9cd0 | 700 | \f |
79a490a9 AJ |
701 | /* Check whether this is the last basic block of function. Commonly |
702 | there is one extra common cleanup block. */ | |
969d70ca | 703 | static bool |
79a490a9 | 704 | last_basic_block_p (basic_block bb) |
969d70ca | 705 | { |
f6366fc7 ZD |
706 | if (bb == EXIT_BLOCK_PTR) |
707 | return false; | |
708 | ||
709 | return (bb->next_bb == EXIT_BLOCK_PTR | |
710 | || (bb->next_bb->next_bb == EXIT_BLOCK_PTR | |
969d70ca | 711 | && bb->succ && !bb->succ->succ_next |
f6366fc7 | 712 | && bb->succ->dest->next_bb == EXIT_BLOCK_PTR)); |
969d70ca JH |
713 | } |
714 | ||
79a490a9 AJ |
715 | /* Sets branch probabilities according to PREDiction and |
716 | FLAGS. HEADS[bb->index] should be index of basic block in that we | |
717 | need to alter branch predictions (i.e. the first of our dominators | |
718 | such that we do not post-dominate it) (but we fill this information | |
719 | on demand, so -1 may be there in case this was not needed yet). */ | |
969d70ca JH |
720 | |
721 | static void | |
79a490a9 AJ |
722 | process_note_prediction (basic_block bb, int *heads, |
723 | dominance_info dominators, | |
724 | dominance_info post_dominators, int pred, | |
725 | int flags) | |
969d70ca JH |
726 | { |
727 | edge e; | |
728 | int y; | |
729 | bool taken; | |
730 | ||
731 | taken = flags & IS_TAKEN; | |
732 | ||
0b17ab2f | 733 | if (heads[bb->index] < 0) |
969d70ca JH |
734 | { |
735 | /* This is first time we need this field in heads array; so | |
736 | find first dominator that we do not post-dominate (we are | |
737 | using already known members of heads array). */ | |
355be0dc JH |
738 | basic_block ai = bb; |
739 | basic_block next_ai = get_immediate_dominator (dominators, bb); | |
969d70ca JH |
740 | int head; |
741 | ||
355be0dc | 742 | while (heads[next_ai->index] < 0) |
969d70ca | 743 | { |
355be0dc | 744 | if (!dominated_by_p (post_dominators, next_ai, bb)) |
969d70ca | 745 | break; |
355be0dc | 746 | heads[next_ai->index] = ai->index; |
969d70ca | 747 | ai = next_ai; |
355be0dc | 748 | next_ai = get_immediate_dominator (dominators, next_ai); |
969d70ca | 749 | } |
355be0dc JH |
750 | if (!dominated_by_p (post_dominators, next_ai, bb)) |
751 | head = next_ai->index; | |
969d70ca | 752 | else |
355be0dc JH |
753 | head = heads[next_ai->index]; |
754 | while (next_ai != bb) | |
969d70ca JH |
755 | { |
756 | next_ai = ai; | |
355be0dc JH |
757 | if (heads[ai->index] == ENTRY_BLOCK) |
758 | ai = ENTRY_BLOCK_PTR; | |
759 | else | |
760 | ai = BASIC_BLOCK (heads[ai->index]); | |
761 | heads[next_ai->index] = head; | |
969d70ca JH |
762 | } |
763 | } | |
0b17ab2f | 764 | y = heads[bb->index]; |
969d70ca JH |
765 | |
766 | /* Now find the edge that leads to our branch and aply the prediction. */ | |
767 | ||
a813c111 | 768 | if (y == last_basic_block || !can_predict_insn_p (BB_END (BASIC_BLOCK (y)))) |
969d70ca JH |
769 | return; |
770 | for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next) | |
0b17ab2f | 771 | if (e->dest->index >= 0 |
355be0dc | 772 | && dominated_by_p (post_dominators, e->dest, bb)) |
969d70ca JH |
773 | predict_edge_def (e, pred, taken); |
774 | } | |
775 | ||
776 | /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them | |
777 | into branch probabilities. For description of heads array, see | |
778 | process_note_prediction. */ | |
779 | ||
780 | static void | |
79a490a9 AJ |
781 | process_note_predictions (basic_block bb, int *heads, |
782 | dominance_info dominators, | |
783 | dominance_info post_dominators) | |
969d70ca JH |
784 | { |
785 | rtx insn; | |
786 | edge e; | |
787 | ||
d55d8fc7 | 788 | /* Additionally, we check here for blocks with no successors. */ |
969d70ca JH |
789 | int contained_noreturn_call = 0; |
790 | int was_bb_head = 0; | |
791 | int noreturn_block = 1; | |
792 | ||
a813c111 SB |
793 | for (insn = BB_END (bb); insn; |
794 | was_bb_head |= (insn == BB_HEAD (bb)), insn = PREV_INSN (insn)) | |
969d70ca JH |
795 | { |
796 | if (GET_CODE (insn) != NOTE) | |
797 | { | |
798 | if (was_bb_head) | |
799 | break; | |
800 | else | |
801 | { | |
802 | /* Noreturn calls cause program to exit, therefore they are | |
803 | always predicted as not taken. */ | |
804 | if (GET_CODE (insn) == CALL_INSN | |
805 | && find_reg_note (insn, REG_NORETURN, NULL)) | |
806 | contained_noreturn_call = 1; | |
807 | continue; | |
808 | } | |
809 | } | |
810 | if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PREDICTION) | |
811 | { | |
812 | int alg = (int) NOTE_PREDICTION_ALG (insn); | |
813 | /* Process single prediction note. */ | |
814 | process_note_prediction (bb, | |
815 | heads, | |
816 | dominators, | |
817 | post_dominators, | |
818 | alg, (int) NOTE_PREDICTION_FLAGS (insn)); | |
819 | delete_insn (insn); | |
820 | } | |
821 | } | |
822 | for (e = bb->succ; e; e = e->succ_next) | |
823 | if (!(e->flags & EDGE_FAKE)) | |
824 | noreturn_block = 0; | |
825 | if (contained_noreturn_call) | |
826 | { | |
827 | /* This block ended from other reasons than because of return. | |
828 | If it is because of noreturn call, this should certainly not | |
829 | be taken. Otherwise it is probably some error recovery. */ | |
830 | process_note_prediction (bb, | |
831 | heads, | |
832 | dominators, | |
833 | post_dominators, PRED_NORETURN, NOT_TAKEN); | |
834 | } | |
835 | } | |
836 | ||
837 | /* Gathers NOTE_INSN_PREDICTIONs and turns them into | |
838 | branch probabilities. */ | |
839 | ||
840 | void | |
79a490a9 | 841 | note_prediction_to_br_prob (void) |
969d70ca | 842 | { |
e0082a72 | 843 | basic_block bb; |
355be0dc JH |
844 | dominance_info post_dominators, dominators; |
845 | int *heads; | |
969d70ca JH |
846 | |
847 | /* To enable handling of noreturn blocks. */ | |
848 | add_noreturn_fake_exit_edges (); | |
849 | connect_infinite_loops_to_exit (); | |
850 | ||
355be0dc JH |
851 | post_dominators = calculate_dominance_info (CDI_POST_DOMINATORS); |
852 | dominators = calculate_dominance_info (CDI_DOMINATORS); | |
969d70ca | 853 | |
d55bc081 ZD |
854 | heads = xmalloc (sizeof (int) * last_basic_block); |
855 | memset (heads, -1, sizeof (int) * last_basic_block); | |
856 | heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block; | |
969d70ca JH |
857 | |
858 | /* Process all prediction notes. */ | |
859 | ||
e0082a72 ZD |
860 | FOR_EACH_BB (bb) |
861 | process_note_predictions (bb, heads, dominators, post_dominators); | |
969d70ca | 862 | |
355be0dc JH |
863 | free_dominance_info (post_dominators); |
864 | free_dominance_info (dominators); | |
969d70ca JH |
865 | free (heads); |
866 | ||
867 | remove_fake_edges (); | |
868 | } | |
869 | \f | |
57cb6d52 | 870 | /* This is used to carry information about basic blocks. It is |
861f9cd0 JH |
871 | attached to the AUX field of the standard CFG block. */ |
872 | ||
873 | typedef struct block_info_def | |
874 | { | |
875 | /* Estimated frequency of execution of basic_block. */ | |
ac5e69da | 876 | sreal frequency; |
861f9cd0 JH |
877 | |
878 | /* To keep queue of basic blocks to process. */ | |
879 | basic_block next; | |
880 | ||
ba228239 | 881 | /* True if block needs to be visited in propagate_freq. */ |
247a370b JH |
882 | int tovisit:1; |
883 | ||
eaec9b3d | 884 | /* Number of predecessors we need to visit first. */ |
754d9299 | 885 | int npredecessors; |
861f9cd0 JH |
886 | } *block_info; |
887 | ||
888 | /* Similar information for edges. */ | |
889 | typedef struct edge_info_def | |
890 | { | |
891 | /* In case edge is an loopback edge, the probability edge will be reached | |
892 | in case header is. Estimated number of iterations of the loop can be | |
8aa18a7d | 893 | then computed as 1 / (1 - back_edge_prob). */ |
ac5e69da | 894 | sreal back_edge_prob; |
861f9cd0 JH |
895 | /* True if the edge is an loopback edge in the natural loop. */ |
896 | int back_edge:1; | |
897 | } *edge_info; | |
898 | ||
899 | #define BLOCK_INFO(B) ((block_info) (B)->aux) | |
900 | #define EDGE_INFO(E) ((edge_info) (E)->aux) | |
901 | ||
902 | /* Helper function for estimate_bb_frequencies. | |
2ecfd709 | 903 | Propagate the frequencies for LOOP. */ |
bfdade77 | 904 | |
861f9cd0 | 905 | static void |
79a490a9 | 906 | propagate_freq (struct loop *loop) |
861f9cd0 | 907 | { |
2ecfd709 | 908 | basic_block head = loop->header; |
e0082a72 ZD |
909 | basic_block bb; |
910 | basic_block last; | |
861f9cd0 JH |
911 | edge e; |
912 | basic_block nextbb; | |
247a370b | 913 | |
eaec9b3d | 914 | /* For each basic block we need to visit count number of his predecessors |
247a370b | 915 | we need to visit first. */ |
e0082a72 | 916 | FOR_EACH_BB (bb) |
247a370b | 917 | { |
247a370b JH |
918 | if (BLOCK_INFO (bb)->tovisit) |
919 | { | |
920 | int count = 0; | |
bfdade77 | 921 | |
247a370b JH |
922 | for (e = bb->pred; e; e = e->pred_next) |
923 | if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK)) | |
924 | count++; | |
925 | else if (BLOCK_INFO (e->src)->tovisit | |
926 | && rtl_dump_file && !EDGE_INFO (e)->back_edge) | |
927 | fprintf (rtl_dump_file, | |
928 | "Irreducible region hit, ignoring edge to %i->%i\n", | |
0b17ab2f | 929 | e->src->index, bb->index); |
754d9299 | 930 | BLOCK_INFO (bb)->npredecessors = count; |
247a370b JH |
931 | } |
932 | } | |
861f9cd0 | 933 | |
8aa18a7d | 934 | memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); |
e0082a72 ZD |
935 | last = head; |
936 | for (bb = head; bb; bb = nextbb) | |
861f9cd0 | 937 | { |
ac5e69da | 938 | sreal cyclic_probability, frequency; |
8aa18a7d JH |
939 | |
940 | memcpy (&cyclic_probability, &real_zero, sizeof (real_zero)); | |
941 | memcpy (&frequency, &real_zero, sizeof (real_zero)); | |
861f9cd0 JH |
942 | |
943 | nextbb = BLOCK_INFO (bb)->next; | |
944 | BLOCK_INFO (bb)->next = NULL; | |
945 | ||
946 | /* Compute frequency of basic block. */ | |
947 | if (bb != head) | |
948 | { | |
247a370b | 949 | #ifdef ENABLE_CHECKING |
861f9cd0 | 950 | for (e = bb->pred; e; e = e->pred_next) |
247a370b JH |
951 | if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK)) |
952 | abort (); | |
953 | #endif | |
861f9cd0 JH |
954 | |
955 | for (e = bb->pred; e; e = e->pred_next) | |
956 | if (EDGE_INFO (e)->back_edge) | |
8aa18a7d | 957 | { |
ac5e69da JZ |
958 | sreal_add (&cyclic_probability, &cyclic_probability, |
959 | &EDGE_INFO (e)->back_edge_prob); | |
8aa18a7d | 960 | } |
247a370b | 961 | else if (!(e->flags & EDGE_DFS_BACK)) |
8aa18a7d | 962 | { |
ac5e69da | 963 | sreal tmp; |
8aa18a7d JH |
964 | |
965 | /* frequency += (e->probability | |
966 | * BLOCK_INFO (e->src)->frequency / | |
967 | REG_BR_PROB_BASE); */ | |
968 | ||
ac5e69da JZ |
969 | sreal_init (&tmp, e->probability, 0); |
970 | sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency); | |
971 | sreal_mul (&tmp, &tmp, &real_inv_br_prob_base); | |
972 | sreal_add (&frequency, &frequency, &tmp); | |
8aa18a7d JH |
973 | } |
974 | ||
ac5e69da JZ |
975 | if (sreal_compare (&cyclic_probability, &real_zero) == 0) |
976 | { | |
977 | memcpy (&BLOCK_INFO (bb)->frequency, &frequency, | |
978 | sizeof (frequency)); | |
979 | } | |
fbe3b30b SB |
980 | else |
981 | { | |
ac5e69da JZ |
982 | if (sreal_compare (&cyclic_probability, &real_almost_one) > 0) |
983 | { | |
984 | memcpy (&cyclic_probability, &real_almost_one, | |
985 | sizeof (real_almost_one)); | |
986 | } | |
861f9cd0 | 987 | |
79a490a9 | 988 | /* BLOCK_INFO (bb)->frequency = frequency |
ac5e69da | 989 | / (1 - cyclic_probability) */ |
861f9cd0 | 990 | |
ac5e69da JZ |
991 | sreal_sub (&cyclic_probability, &real_one, &cyclic_probability); |
992 | sreal_div (&BLOCK_INFO (bb)->frequency, | |
993 | &frequency, &cyclic_probability); | |
fbe3b30b | 994 | } |
861f9cd0 JH |
995 | } |
996 | ||
247a370b | 997 | BLOCK_INFO (bb)->tovisit = 0; |
861f9cd0 JH |
998 | |
999 | /* Compute back edge frequencies. */ | |
1000 | for (e = bb->succ; e; e = e->succ_next) | |
1001 | if (e->dest == head) | |
8aa18a7d | 1002 | { |
ac5e69da | 1003 | sreal tmp; |
8aa18a7d JH |
1004 | |
1005 | /* EDGE_INFO (e)->back_edge_prob | |
1006 | = ((e->probability * BLOCK_INFO (bb)->frequency) | |
1007 | / REG_BR_PROB_BASE); */ | |
8aa18a7d | 1008 | |
ac5e69da JZ |
1009 | sreal_init (&tmp, e->probability, 0); |
1010 | sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency); | |
1011 | sreal_mul (&EDGE_INFO (e)->back_edge_prob, | |
1012 | &tmp, &real_inv_br_prob_base); | |
8aa18a7d | 1013 | } |
861f9cd0 | 1014 | |
57cb6d52 | 1015 | /* Propagate to successor blocks. */ |
861f9cd0 | 1016 | for (e = bb->succ; e; e = e->succ_next) |
247a370b | 1017 | if (!(e->flags & EDGE_DFS_BACK) |
754d9299 | 1018 | && BLOCK_INFO (e->dest)->npredecessors) |
861f9cd0 | 1019 | { |
754d9299 JM |
1020 | BLOCK_INFO (e->dest)->npredecessors--; |
1021 | if (!BLOCK_INFO (e->dest)->npredecessors) | |
247a370b JH |
1022 | { |
1023 | if (!nextbb) | |
1024 | nextbb = e->dest; | |
1025 | else | |
1026 | BLOCK_INFO (last)->next = e->dest; | |
bfdade77 | 1027 | |
247a370b JH |
1028 | last = e->dest; |
1029 | } | |
1030 | } | |
861f9cd0 JH |
1031 | } |
1032 | } | |
1033 | ||
57cb6d52 | 1034 | /* Estimate probabilities of loopback edges in loops at same nest level. */ |
bfdade77 | 1035 | |
861f9cd0 | 1036 | static void |
79a490a9 | 1037 | estimate_loops_at_level (struct loop *first_loop) |
861f9cd0 | 1038 | { |
2ecfd709 | 1039 | struct loop *loop; |
861f9cd0 JH |
1040 | |
1041 | for (loop = first_loop; loop; loop = loop->next) | |
1042 | { | |
861f9cd0 | 1043 | edge e; |
2ecfd709 | 1044 | basic_block *bbs; |
3d436d2a | 1045 | unsigned i; |
861f9cd0 JH |
1046 | |
1047 | estimate_loops_at_level (loop->inner); | |
79a490a9 | 1048 | |
2ecfd709 | 1049 | if (loop->latch->succ) /* Do not do this for dummy function loop. */ |
861f9cd0 | 1050 | { |
2ecfd709 ZD |
1051 | /* Find current loop back edge and mark it. */ |
1052 | e = loop_latch_edge (loop); | |
1053 | EDGE_INFO (e)->back_edge = 1; | |
1054 | } | |
1055 | ||
1056 | bbs = get_loop_body (loop); | |
1057 | for (i = 0; i < loop->num_nodes; i++) | |
1058 | BLOCK_INFO (bbs[i])->tovisit = 1; | |
1059 | free (bbs); | |
1060 | propagate_freq (loop); | |
861f9cd0 JH |
1061 | } |
1062 | } | |
1063 | ||
1064 | /* Convert counts measured by profile driven feedback to frequencies. */ | |
bfdade77 | 1065 | |
861f9cd0 | 1066 | static void |
79a490a9 | 1067 | counts_to_freqs (void) |
861f9cd0 | 1068 | { |
4977bab6 | 1069 | gcov_type count_max = 1; |
e0082a72 | 1070 | basic_block bb; |
0b17ab2f | 1071 | |
e0082a72 ZD |
1072 | FOR_EACH_BB (bb) |
1073 | count_max = MAX (bb->count, count_max); | |
861f9cd0 | 1074 | |
e0082a72 ZD |
1075 | FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) |
1076 | bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; | |
861f9cd0 JH |
1077 | } |
1078 | ||
bfdade77 RK |
1079 | /* Return true if function is likely to be expensive, so there is no point to |
1080 | optimize performance of prologue, epilogue or do inlining at the expense | |
d55d8fc7 | 1081 | of code size growth. THRESHOLD is the limit of number of instructions |
bfdade77 RK |
1082 | function can execute at average to be still considered not expensive. */ |
1083 | ||
6ab16dd9 | 1084 | bool |
79a490a9 | 1085 | expensive_function_p (int threshold) |
6ab16dd9 JH |
1086 | { |
1087 | unsigned int sum = 0; | |
e0082a72 | 1088 | basic_block bb; |
5197bd50 | 1089 | unsigned int limit; |
6ab16dd9 JH |
1090 | |
1091 | /* We can not compute accurately for large thresholds due to scaled | |
1092 | frequencies. */ | |
1093 | if (threshold > BB_FREQ_MAX) | |
1094 | abort (); | |
1095 | ||
eaec9b3d | 1096 | /* Frequencies are out of range. This either means that function contains |
6ab16dd9 JH |
1097 | internal loop executing more than BB_FREQ_MAX times or profile feedback |
1098 | is available and function has not been executed at all. */ | |
1099 | if (ENTRY_BLOCK_PTR->frequency == 0) | |
1100 | return true; | |
6a4d6760 | 1101 | |
6ab16dd9 JH |
1102 | /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ |
1103 | limit = ENTRY_BLOCK_PTR->frequency * threshold; | |
e0082a72 | 1104 | FOR_EACH_BB (bb) |
6ab16dd9 | 1105 | { |
6ab16dd9 JH |
1106 | rtx insn; |
1107 | ||
a813c111 | 1108 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); |
6ab16dd9 | 1109 | insn = NEXT_INSN (insn)) |
bfdade77 RK |
1110 | if (active_insn_p (insn)) |
1111 | { | |
1112 | sum += bb->frequency; | |
1113 | if (sum > limit) | |
1114 | return true; | |
6ab16dd9 JH |
1115 | } |
1116 | } | |
bfdade77 | 1117 | |
6ab16dd9 JH |
1118 | return false; |
1119 | } | |
1120 | ||
861f9cd0 | 1121 | /* Estimate basic blocks frequency by given branch probabilities. */ |
bfdade77 | 1122 | |
861f9cd0 | 1123 | static void |
79a490a9 | 1124 | estimate_bb_frequencies (struct loops *loops) |
861f9cd0 | 1125 | { |
e0082a72 | 1126 | basic_block bb; |
ac5e69da | 1127 | sreal freq_max; |
8aa18a7d | 1128 | |
194734e9 JH |
1129 | if (flag_branch_probabilities) |
1130 | counts_to_freqs (); | |
1131 | else | |
1132 | { | |
c4f6b78e RE |
1133 | static int real_values_initialized = 0; |
1134 | ||
1135 | if (!real_values_initialized) | |
1136 | { | |
85bb9c2a | 1137 | real_values_initialized = 1; |
c4f6b78e RE |
1138 | sreal_init (&real_zero, 0, 0); |
1139 | sreal_init (&real_one, 1, 0); | |
1140 | sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0); | |
1141 | sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0); | |
1142 | sreal_init (&real_one_half, 1, -1); | |
1143 | sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base); | |
1144 | sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base); | |
1145 | } | |
861f9cd0 | 1146 | |
194734e9 JH |
1147 | mark_dfs_back_edges (); |
1148 | /* Fill in the probability values in flowgraph based on the REG_BR_PROB | |
1149 | notes. */ | |
e0082a72 | 1150 | FOR_EACH_BB (bb) |
194734e9 | 1151 | { |
a813c111 | 1152 | rtx last_insn = BB_END (bb); |
861f9cd0 | 1153 | |
2ffa9932 | 1154 | if (!can_predict_insn_p (last_insn)) |
194734e9 JH |
1155 | { |
1156 | /* We can predict only conditional jumps at the moment. | |
1157 | Expect each edge to be equally probable. | |
1158 | ?? In the future we want to make abnormal edges improbable. */ | |
1159 | int nedges = 0; | |
1160 | edge e; | |
861f9cd0 | 1161 | |
e0082a72 | 1162 | for (e = bb->succ; e; e = e->succ_next) |
194734e9 JH |
1163 | { |
1164 | nedges++; | |
1165 | if (e->probability != 0) | |
1166 | break; | |
1167 | } | |
1168 | if (!e) | |
e0082a72 | 1169 | for (e = bb->succ; e; e = e->succ_next) |
194734e9 JH |
1170 | e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; |
1171 | } | |
1172 | } | |
1173 | ||
1174 | ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE; | |
1175 | ||
1176 | /* Set up block info for each basic block. */ | |
1177 | alloc_aux_for_blocks (sizeof (struct block_info_def)); | |
1178 | alloc_aux_for_edges (sizeof (struct edge_info_def)); | |
e0082a72 | 1179 | FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) |
861f9cd0 | 1180 | { |
861f9cd0 | 1181 | edge e; |
194734e9 JH |
1182 | |
1183 | BLOCK_INFO (bb)->tovisit = 0; | |
1184 | for (e = bb->succ; e; e = e->succ_next) | |
861f9cd0 | 1185 | { |
ac5e69da JZ |
1186 | sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0); |
1187 | sreal_mul (&EDGE_INFO (e)->back_edge_prob, | |
1188 | &EDGE_INFO (e)->back_edge_prob, | |
1189 | &real_inv_br_prob_base); | |
861f9cd0 | 1190 | } |
861f9cd0 | 1191 | } |
bfdade77 | 1192 | |
194734e9 JH |
1193 | /* First compute probabilities locally for each loop from innermost |
1194 | to outermost to examine probabilities for back edges. */ | |
1195 | estimate_loops_at_level (loops->tree_root); | |
861f9cd0 | 1196 | |
194734e9 | 1197 | memcpy (&freq_max, &real_zero, sizeof (real_zero)); |
e0082a72 | 1198 | FOR_EACH_BB (bb) |
ac5e69da JZ |
1199 | if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0) |
1200 | memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max)); | |
fbe3b30b | 1201 | |
ac5e69da | 1202 | sreal_div (&freq_max, &real_bb_freq_max, &freq_max); |
e0082a72 | 1203 | FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) |
8aa18a7d | 1204 | { |
ac5e69da | 1205 | sreal tmp; |
bfdade77 | 1206 | |
ac5e69da JZ |
1207 | sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max); |
1208 | sreal_add (&tmp, &tmp, &real_one_half); | |
1209 | bb->frequency = sreal_to_int (&tmp); | |
194734e9 | 1210 | } |
bfdade77 | 1211 | |
194734e9 JH |
1212 | free_aux_for_blocks (); |
1213 | free_aux_for_edges (); | |
1214 | } | |
1215 | compute_function_frequency (); | |
1216 | if (flag_reorder_functions) | |
1217 | choose_function_section (); | |
1218 | } | |
861f9cd0 | 1219 | |
194734e9 JH |
1220 | /* Decide whether function is hot, cold or unlikely executed. */ |
1221 | static void | |
79a490a9 | 1222 | compute_function_frequency (void) |
194734e9 | 1223 | { |
e0082a72 ZD |
1224 | basic_block bb; |
1225 | ||
cdb23767 | 1226 | if (!profile_info || !flag_branch_probabilities) |
194734e9 JH |
1227 | return; |
1228 | cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED; | |
e0082a72 | 1229 | FOR_EACH_BB (bb) |
861f9cd0 | 1230 | { |
194734e9 JH |
1231 | if (maybe_hot_bb_p (bb)) |
1232 | { | |
1233 | cfun->function_frequency = FUNCTION_FREQUENCY_HOT; | |
1234 | return; | |
1235 | } | |
1236 | if (!probably_never_executed_bb_p (bb)) | |
1237 | cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; | |
861f9cd0 | 1238 | } |
194734e9 | 1239 | } |
861f9cd0 | 1240 | |
194734e9 JH |
1241 | /* Choose appropriate section for the function. */ |
1242 | static void | |
79a490a9 | 1243 | choose_function_section (void) |
194734e9 JH |
1244 | { |
1245 | if (DECL_SECTION_NAME (current_function_decl) | |
c07f146f JH |
1246 | || !targetm.have_named_sections |
1247 | /* Theoretically we can split the gnu.linkonce text section too, | |
79a490a9 | 1248 | but this requires more work as the frequency needs to match |
c07f146f JH |
1249 | for all generated objects so we need to merge the frequency |
1250 | of all instances. For now just never set frequency for these. */ | |
c728da61 | 1251 | || DECL_ONE_ONLY (current_function_decl)) |
194734e9 JH |
1252 | return; |
1253 | if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT) | |
1254 | DECL_SECTION_NAME (current_function_decl) = | |
1255 | build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME); | |
1256 | if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
1257 | DECL_SECTION_NAME (current_function_decl) = | |
1258 | build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME), | |
1259 | UNLIKELY_EXECUTED_TEXT_SECTION_NAME); | |
861f9cd0 | 1260 | } |