]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/predict.c
Merge tree-ssa-20020619-branch into mainline.
[thirdparty/gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* References:
22
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
29
30
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "tm.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
41 #include "regs.h"
42 #include "flags.h"
43 #include "output.h"
44 #include "function.h"
45 #include "except.h"
46 #include "toplev.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "predict.h"
50 #include "coverage.h"
51 #include "sreal.h"
52 #include "params.h"
53 #include "target.h"
54 #include "loop.h"
55 #include "cfgloop.h"
56 #include "tree-flow.h"
57 #include "ggc.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
60 #include "timevar.h"
61
62 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
63 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
64 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
65 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
66
67 /* Random guesstimation given names. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
72
73 static void combine_predictions_for_insn (rtx, basic_block);
74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
75 static void estimate_loops_at_level (struct loop *loop);
76 static void propagate_freq (struct loop *);
77 static void estimate_bb_frequencies (struct loops *);
78 static int counts_to_freqs (void);
79 static void process_note_predictions (basic_block, int *);
80 static void process_note_prediction (basic_block, int *, int, int);
81 static bool last_basic_block_p (basic_block);
82 static void compute_function_frequency (void);
83 static void choose_function_section (void);
84 static bool can_predict_insn_p (rtx);
85
86 /* Information we hold about each branch predictor.
87 Filled using information from predict.def. */
88
89 struct predictor_info
90 {
91 const char *const name; /* Name used in the debugging dumps. */
92 const int hitrate; /* Expected hitrate used by
93 predict_insn_def call. */
94 const int flags;
95 };
96
97 /* Use given predictor without Dempster-Shaffer theory if it matches
98 using first_match heuristics. */
99 #define PRED_FLAG_FIRST_MATCH 1
100
101 /* Recompute hitrate in percent to our representation. */
102
103 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
104
105 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
106 static const struct predictor_info predictor_info[]= {
107 #include "predict.def"
108
109 /* Upper bound on predictors. */
110 {NULL, 0, 0}
111 };
112 #undef DEF_PREDICTOR
113
114 /* Return true in case BB can be CPU intensive and should be optimized
115 for maximal performance. */
116
117 bool
118 maybe_hot_bb_p (basic_block bb)
119 {
120 if (profile_info && flag_branch_probabilities
121 && (bb->count
122 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
123 return false;
124 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
125 return false;
126 return true;
127 }
128
129 /* Return true in case BB is cold and should be optimized for size. */
130
131 bool
132 probably_cold_bb_p (basic_block bb)
133 {
134 if (profile_info && flag_branch_probabilities
135 && (bb->count
136 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
137 return true;
138 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
139 return true;
140 return false;
141 }
142
143 /* Return true in case BB is probably never executed. */
144 bool
145 probably_never_executed_bb_p (basic_block bb)
146 {
147 if (profile_info && flag_branch_probabilities)
148 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
149 return false;
150 }
151
152 /* Return true if the one of outgoing edges is already predicted by
153 PREDICTOR. */
154
155 bool
156 rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
157 {
158 rtx note;
159 if (!INSN_P (BB_END (bb)))
160 return false;
161 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
162 if (REG_NOTE_KIND (note) == REG_BR_PRED
163 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
164 return true;
165 return false;
166 }
167
168 /* Return true if the one of outgoing edges is already predicted by
169 PREDICTOR. */
170
171 bool
172 tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
173 {
174 struct edge_prediction *i = bb_ann (bb)->predictions;
175 for (i = bb_ann (bb)->predictions; i; i = i->next)
176 if (i->predictor == predictor)
177 return true;
178 return false;
179 }
180
181 void
182 predict_insn (rtx insn, enum br_predictor predictor, int probability)
183 {
184 if (!any_condjump_p (insn))
185 abort ();
186 if (!flag_guess_branch_prob)
187 return;
188
189 REG_NOTES (insn)
190 = gen_rtx_EXPR_LIST (REG_BR_PRED,
191 gen_rtx_CONCAT (VOIDmode,
192 GEN_INT ((int) predictor),
193 GEN_INT ((int) probability)),
194 REG_NOTES (insn));
195 }
196
197 /* Predict insn by given predictor. */
198
199 void
200 predict_insn_def (rtx insn, enum br_predictor predictor,
201 enum prediction taken)
202 {
203 int probability = predictor_info[(int) predictor].hitrate;
204
205 if (taken != TAKEN)
206 probability = REG_BR_PROB_BASE - probability;
207
208 predict_insn (insn, predictor, probability);
209 }
210
211 /* Predict edge E with given probability if possible. */
212
213 void
214 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
215 {
216 rtx last_insn;
217 last_insn = BB_END (e->src);
218
219 /* We can store the branch prediction information only about
220 conditional jumps. */
221 if (!any_condjump_p (last_insn))
222 return;
223
224 /* We always store probability of branching. */
225 if (e->flags & EDGE_FALLTHRU)
226 probability = REG_BR_PROB_BASE - probability;
227
228 predict_insn (last_insn, predictor, probability);
229 }
230
231 /* Predict edge E with the given PROBABILITY. */
232 void
233 tree_predict_edge (edge e, enum br_predictor predictor, int probability)
234 {
235 struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
236
237 i->next = bb_ann (e->src)->predictions;
238 bb_ann (e->src)->predictions = i;
239 i->probability = probability;
240 i->predictor = predictor;
241 i->edge = e;
242 }
243
244 /* Return true when we can store prediction on insn INSN.
245 At the moment we represent predictions only on conditional
246 jumps, not at computed jump or other complicated cases. */
247 static bool
248 can_predict_insn_p (rtx insn)
249 {
250 return (GET_CODE (insn) == JUMP_INSN
251 && any_condjump_p (insn)
252 && BLOCK_FOR_INSN (insn)->succ->succ_next);
253 }
254
255 /* Predict edge E by given predictor if possible. */
256
257 void
258 predict_edge_def (edge e, enum br_predictor predictor,
259 enum prediction taken)
260 {
261 int probability = predictor_info[(int) predictor].hitrate;
262
263 if (taken != TAKEN)
264 probability = REG_BR_PROB_BASE - probability;
265
266 predict_edge (e, predictor, probability);
267 }
268
269 /* Invert all branch predictions or probability notes in the INSN. This needs
270 to be done each time we invert the condition used by the jump. */
271
272 void
273 invert_br_probabilities (rtx insn)
274 {
275 rtx note;
276
277 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
278 if (REG_NOTE_KIND (note) == REG_BR_PROB)
279 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
280 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
281 XEXP (XEXP (note, 0), 1)
282 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
283 }
284
285 /* Dump information about the branch prediction to the output file. */
286
287 static void
288 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
289 basic_block bb, int used)
290 {
291 edge e = bb->succ;
292
293 if (!file)
294 return;
295
296 while (e && (e->flags & EDGE_FALLTHRU))
297 e = e->succ_next;
298
299 fprintf (file, " %s heuristics%s: %.1f%%",
300 predictor_info[predictor].name,
301 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
302
303 if (bb->count)
304 {
305 fprintf (file, " exec ");
306 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
307 if (e)
308 {
309 fprintf (file, " hit ");
310 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
311 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
312 }
313 }
314
315 fprintf (file, "\n");
316 }
317
318 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
319 note if not already present. Remove now useless REG_BR_PRED notes. */
320
321 static void
322 combine_predictions_for_insn (rtx insn, basic_block bb)
323 {
324 rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0);
325 rtx *pnote = &REG_NOTES (insn);
326 rtx note;
327 int best_probability = PROB_EVEN;
328 int best_predictor = END_PREDICTORS;
329 int combined_probability = REG_BR_PROB_BASE / 2;
330 int d;
331 bool first_match = false;
332 bool found = false;
333
334 if (dump_file)
335 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
336 bb->index);
337
338 /* We implement "first match" heuristics and use probability guessed
339 by predictor with smallest index. */
340 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
341 if (REG_NOTE_KIND (note) == REG_BR_PRED)
342 {
343 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
344 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
345
346 found = true;
347 if (best_predictor > predictor)
348 best_probability = probability, best_predictor = predictor;
349
350 d = (combined_probability * probability
351 + (REG_BR_PROB_BASE - combined_probability)
352 * (REG_BR_PROB_BASE - probability));
353
354 /* Use FP math to avoid overflows of 32bit integers. */
355 if (d == 0)
356 /* If one probability is 0% and one 100%, avoid division by zero. */
357 combined_probability = REG_BR_PROB_BASE / 2;
358 else
359 combined_probability = (((double) combined_probability) * probability
360 * REG_BR_PROB_BASE / d + 0.5);
361 }
362
363 /* Decide which heuristic to use. In case we didn't match anything,
364 use no_prediction heuristic, in case we did match, use either
365 first match or Dempster-Shaffer theory depending on the flags. */
366
367 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
368 first_match = true;
369
370 if (!found)
371 dump_prediction (dump_file, PRED_NO_PREDICTION,
372 combined_probability, bb, true);
373 else
374 {
375 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
376 bb, !first_match);
377 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
378 bb, first_match);
379 }
380
381 if (first_match)
382 combined_probability = best_probability;
383 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
384
385 while (*pnote)
386 {
387 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
388 {
389 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
390 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
391
392 dump_prediction (dump_file, predictor, probability, bb,
393 !first_match || best_predictor == predictor);
394 *pnote = XEXP (*pnote, 1);
395 }
396 else
397 pnote = &XEXP (*pnote, 1);
398 }
399
400 if (!prob_note)
401 {
402 REG_NOTES (insn)
403 = gen_rtx_EXPR_LIST (REG_BR_PROB,
404 GEN_INT (combined_probability), REG_NOTES (insn));
405
406 /* Save the prediction into CFG in case we are seeing non-degenerated
407 conditional jump. */
408 if (bb->succ->succ_next)
409 {
410 BRANCH_EDGE (bb)->probability = combined_probability;
411 FALLTHRU_EDGE (bb)->probability
412 = REG_BR_PROB_BASE - combined_probability;
413 }
414 }
415 }
416
417 /* Combine predictions into single probability and store them into CFG.
418 Remove now useless prediction entries. */
419
420 static void
421 combine_predictions_for_bb (FILE *file, basic_block bb)
422 {
423 int best_probability = PROB_EVEN;
424 int best_predictor = END_PREDICTORS;
425 int combined_probability = REG_BR_PROB_BASE / 2;
426 int d;
427 bool first_match = false;
428 bool found = false;
429 struct edge_prediction *pred;
430 int nedges = 0;
431 edge e, first = NULL, second = NULL;
432
433 for (e = bb->succ; e; e = e->succ_next)
434 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
435 {
436 nedges ++;
437 if (first && !second)
438 second = e;
439 if (!first)
440 first = e;
441 }
442
443 /* When there is no successor or only one choice, prediction is easy.
444
445 We are lazy for now and predict only basic blocks with two outgoing
446 edges. It is possible to predict generic case too, but we have to
447 ignore first match heuristics and do more involved combining. Implement
448 this later. */
449 if (nedges != 2)
450 {
451 for (e = bb->succ; e; e = e->succ_next)
452 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
453 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
454 else
455 e->probability = 0;
456 bb_ann (bb)->predictions = NULL;
457 if (file)
458 fprintf (file, "%i edges in bb %i predicted to even probabilities\n",
459 nedges, bb->index);
460 return;
461 }
462
463 if (file)
464 fprintf (file, "Predictions for bb %i\n", bb->index);
465
466 /* We implement "first match" heuristics and use probability guessed
467 by predictor with smallest index. */
468 for (pred = bb_ann (bb)->predictions; pred; pred = pred->next)
469 {
470 int predictor = pred->predictor;
471 int probability = pred->probability;
472
473 if (pred->edge != first)
474 probability = REG_BR_PROB_BASE - probability;
475
476 found = true;
477 if (best_predictor > predictor)
478 best_probability = probability, best_predictor = predictor;
479
480 d = (combined_probability * probability
481 + (REG_BR_PROB_BASE - combined_probability)
482 * (REG_BR_PROB_BASE - probability));
483
484 /* Use FP math to avoid overflows of 32bit integers. */
485 if (d == 0)
486 /* If one probability is 0% and one 100%, avoid division by zero. */
487 combined_probability = REG_BR_PROB_BASE / 2;
488 else
489 combined_probability = (((double) combined_probability) * probability
490 * REG_BR_PROB_BASE / d + 0.5);
491 }
492
493 /* Decide which heuristic to use. In case we didn't match anything,
494 use no_prediction heuristic, in case we did match, use either
495 first match or Dempster-Shaffer theory depending on the flags. */
496
497 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
498 first_match = true;
499
500 if (!found)
501 dump_prediction (file, PRED_NO_PREDICTION, combined_probability, bb, true);
502 else
503 {
504 dump_prediction (file, PRED_DS_THEORY, combined_probability, bb,
505 !first_match);
506 dump_prediction (file, PRED_FIRST_MATCH, best_probability, bb,
507 first_match);
508 }
509
510 if (first_match)
511 combined_probability = best_probability;
512 dump_prediction (file, PRED_COMBINED, combined_probability, bb, true);
513
514 for (pred = bb_ann (bb)->predictions; pred; pred = pred->next)
515 {
516 int predictor = pred->predictor;
517 int probability = pred->probability;
518
519 if (pred->edge != bb->succ)
520 probability = REG_BR_PROB_BASE - probability;
521 dump_prediction (file, predictor, probability, bb,
522 !first_match || best_predictor == predictor);
523 }
524 bb_ann (bb)->predictions = NULL;
525
526 first->probability = combined_probability;
527 second->probability = REG_BR_PROB_BASE - combined_probability;
528 }
529
530 /* Predict edge probabilities by exploiting loop structure.
531 When SIMPLELOOPS is set, attempt to count number of iterations by analyzing
532 RTL. */
533 static void
534 predict_loops (struct loops *loops_info, bool simpleloops)
535 {
536 unsigned i;
537
538 /* Try to predict out blocks in a loop that are not part of a
539 natural loop. */
540 for (i = 1; i < loops_info->num; i++)
541 {
542 basic_block bb, *bbs;
543 unsigned j;
544 int exits;
545 struct loop *loop = loops_info->parray[i];
546 struct niter_desc desc;
547 unsigned HOST_WIDE_INT niter;
548
549 flow_loop_scan (loop, LOOP_EXIT_EDGES);
550 exits = loop->num_exits;
551
552 if (simpleloops)
553 {
554 iv_analysis_loop_init (loop);
555 find_simple_exit (loop, &desc);
556
557 if (desc.simple_p && desc.const_iter)
558 {
559 int prob;
560 niter = desc.niter + 1;
561 if (niter == 0) /* We might overflow here. */
562 niter = desc.niter;
563
564 prob = (REG_BR_PROB_BASE
565 - (REG_BR_PROB_BASE + niter /2) / niter);
566 /* Branch prediction algorithm gives 0 frequency for everything
567 after the end of loop for loop having 0 probability to finish. */
568 if (prob == REG_BR_PROB_BASE)
569 prob = REG_BR_PROB_BASE - 1;
570 predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
571 prob);
572 }
573 }
574
575 bbs = get_loop_body (loop);
576
577 for (j = 0; j < loop->num_nodes; j++)
578 {
579 int header_found = 0;
580 edge e;
581
582 bb = bbs[j];
583
584 /* Bypass loop heuristics on continue statement. These
585 statements construct loops via "non-loop" constructs
586 in the source language and are better to be handled
587 separately. */
588 if ((simpleloops && !can_predict_insn_p (BB_END (bb)))
589 || predicted_by_p (bb, PRED_CONTINUE))
590 continue;
591
592 /* Loop branch heuristics - predict an edge back to a
593 loop's head as taken. */
594 for (e = bb->succ; e; e = e->succ_next)
595 if (e->dest == loop->header
596 && e->src == loop->latch)
597 {
598 header_found = 1;
599 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
600 }
601
602 /* Loop exit heuristics - predict an edge exiting the loop if the
603 conditional has no loop header successors as not taken. */
604 if (!header_found)
605 for (e = bb->succ; e; e = e->succ_next)
606 if (e->dest->index < 0
607 || !flow_bb_inside_loop_p (loop, e->dest))
608 predict_edge
609 (e, PRED_LOOP_EXIT,
610 (REG_BR_PROB_BASE
611 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
612 / exits);
613 }
614
615 /* Free basic blocks from get_loop_body. */
616 free (bbs);
617 }
618 }
619
620 /* Statically estimate the probability that a branch will be taken and produce
621 estimated profile. When profile feedback is present never executed portions
622 of function gets estimated. */
623
624 void
625 estimate_probability (struct loops *loops_info)
626 {
627 basic_block bb;
628
629 connect_infinite_loops_to_exit ();
630 calculate_dominance_info (CDI_DOMINATORS);
631 calculate_dominance_info (CDI_POST_DOMINATORS);
632
633 predict_loops (loops_info, true);
634
635 iv_analysis_done ();
636
637 /* Attempt to predict conditional jumps using a number of heuristics. */
638 FOR_EACH_BB (bb)
639 {
640 rtx last_insn = BB_END (bb);
641 rtx cond, earliest;
642 edge e;
643
644 if (! can_predict_insn_p (last_insn))
645 continue;
646
647 for (e = bb->succ; e; e = e->succ_next)
648 {
649 /* Predict early returns to be probable, as we've already taken
650 care for error returns and other are often used for fast paths
651 trought function. */
652 if ((e->dest == EXIT_BLOCK_PTR
653 || (e->dest->succ && !e->dest->succ->succ_next
654 && e->dest->succ->dest == EXIT_BLOCK_PTR))
655 && !predicted_by_p (bb, PRED_NULL_RETURN)
656 && !predicted_by_p (bb, PRED_CONST_RETURN)
657 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
658 && !last_basic_block_p (e->dest))
659 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
660
661 /* Look for block we are guarding (ie we dominate it,
662 but it doesn't postdominate us). */
663 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
664 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
665 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
666 {
667 rtx insn;
668
669 /* The call heuristic claims that a guarded function call
670 is improbable. This is because such calls are often used
671 to signal exceptional situations such as printing error
672 messages. */
673 for (insn = BB_HEAD (e->dest); insn != NEXT_INSN (BB_END (e->dest));
674 insn = NEXT_INSN (insn))
675 if (GET_CODE (insn) == CALL_INSN
676 /* Constant and pure calls are hardly used to signalize
677 something exceptional. */
678 && ! CONST_OR_PURE_CALL_P (insn))
679 {
680 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
681 break;
682 }
683 }
684 }
685
686 cond = get_condition (last_insn, &earliest, false);
687 if (! cond)
688 continue;
689
690 /* Try "pointer heuristic."
691 A comparison ptr == 0 is predicted as false.
692 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
693 if (COMPARISON_P (cond)
694 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
695 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
696 {
697 if (GET_CODE (cond) == EQ)
698 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
699 else if (GET_CODE (cond) == NE)
700 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
701 }
702 else
703
704 /* Try "opcode heuristic."
705 EQ tests are usually false and NE tests are usually true. Also,
706 most quantities are positive, so we can make the appropriate guesses
707 about signed comparisons against zero. */
708 switch (GET_CODE (cond))
709 {
710 case CONST_INT:
711 /* Unconditional branch. */
712 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
713 cond == const0_rtx ? NOT_TAKEN : TAKEN);
714 break;
715
716 case EQ:
717 case UNEQ:
718 /* Floating point comparisons appears to behave in a very
719 unpredictable way because of special role of = tests in
720 FP code. */
721 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
722 ;
723 /* Comparisons with 0 are often used for booleans and there is
724 nothing useful to predict about them. */
725 else if (XEXP (cond, 1) == const0_rtx
726 || XEXP (cond, 0) == const0_rtx)
727 ;
728 else
729 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
730 break;
731
732 case NE:
733 case LTGT:
734 /* Floating point comparisons appears to behave in a very
735 unpredictable way because of special role of = tests in
736 FP code. */
737 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
738 ;
739 /* Comparisons with 0 are often used for booleans and there is
740 nothing useful to predict about them. */
741 else if (XEXP (cond, 1) == const0_rtx
742 || XEXP (cond, 0) == const0_rtx)
743 ;
744 else
745 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
746 break;
747
748 case ORDERED:
749 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
750 break;
751
752 case UNORDERED:
753 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
754 break;
755
756 case LE:
757 case LT:
758 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
759 || XEXP (cond, 1) == constm1_rtx)
760 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
761 break;
762
763 case GE:
764 case GT:
765 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
766 || XEXP (cond, 1) == constm1_rtx)
767 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
768 break;
769
770 default:
771 break;
772 }
773 }
774
775 /* Attach the combined probability to each conditional jump. */
776 FOR_EACH_BB (bb)
777 if (GET_CODE (BB_END (bb)) == JUMP_INSN
778 && any_condjump_p (BB_END (bb))
779 && bb->succ->succ_next != NULL)
780 combine_predictions_for_insn (BB_END (bb), bb);
781
782 remove_fake_edges ();
783 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
784 notes. */
785 FOR_EACH_BB (bb)
786 {
787 rtx last_insn = BB_END (bb);
788
789 if (!can_predict_insn_p (last_insn))
790 {
791 /* We can predict only conditional jumps at the moment.
792 Expect each edge to be equally probable.
793 ?? In the future we want to make abnormal edges improbable. */
794 int nedges = 0;
795 edge e;
796
797 for (e = bb->succ; e; e = e->succ_next)
798 {
799 nedges++;
800 if (e->probability != 0)
801 break;
802 }
803 if (!e)
804 for (e = bb->succ; e; e = e->succ_next)
805 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
806 }
807 }
808 estimate_bb_frequencies (loops_info);
809 free_dominance_info (CDI_POST_DOMINATORS);
810 }
811 \f
812
813 /* Predict using opcode of the last statement in basic block. */
814 static void
815 tree_predict_by_opcode (basic_block bb)
816 {
817 tree stmt = last_stmt (bb);
818 edge then_edge;
819 tree cond;
820 tree op0;
821 tree type;
822
823 if (!stmt || TREE_CODE (stmt) != COND_EXPR)
824 return;
825 for (then_edge = bb->succ; then_edge; then_edge = then_edge->succ_next)
826 if (then_edge->flags & EDGE_TRUE_VALUE)
827 break;
828 cond = TREE_OPERAND (stmt, 0);
829 if (TREE_CODE_CLASS (TREE_CODE (cond)) != '<')
830 return;
831 op0 = TREE_OPERAND (cond, 0);
832 type = TREE_TYPE (op0);
833 /* Try "pointer heuristic."
834 A comparison ptr == 0 is predicted as false.
835 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
836 if (POINTER_TYPE_P (type))
837 {
838 if (TREE_CODE (cond) == EQ_EXPR)
839 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
840 else if (TREE_CODE (cond) == NE_EXPR)
841 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
842 }
843 else
844
845 /* Try "opcode heuristic."
846 EQ tests are usually false and NE tests are usually true. Also,
847 most quantities are positive, so we can make the appropriate guesses
848 about signed comparisons against zero. */
849 switch (TREE_CODE (cond))
850 {
851 case EQ_EXPR:
852 case UNEQ_EXPR:
853 /* Floating point comparisons appears to behave in a very
854 unpredictable way because of special role of = tests in
855 FP code. */
856 if (FLOAT_TYPE_P (type))
857 ;
858 /* Comparisons with 0 are often used for booleans and there is
859 nothing useful to predict about them. */
860 else if (integer_zerop (op0)
861 || integer_zerop (TREE_OPERAND (cond, 1)))
862 ;
863 else
864 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
865 break;
866
867 case NE_EXPR:
868 /* Floating point comparisons appears to behave in a very
869 unpredictable way because of special role of = tests in
870 FP code. */
871 if (FLOAT_TYPE_P (type))
872 ;
873 /* Comparisons with 0 are often used for booleans and there is
874 nothing useful to predict about them. */
875 else if (integer_zerop (op0)
876 || integer_zerop (TREE_OPERAND (cond, 1)))
877 ;
878 else
879 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
880 break;
881
882 case ORDERED_EXPR:
883 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
884 break;
885
886 case UNORDERED_EXPR:
887 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
888 break;
889
890 case LE_EXPR:
891 case LT_EXPR:
892 if (integer_zerop (TREE_OPERAND (cond, 1))
893 || integer_onep (TREE_OPERAND (cond, 1))
894 || integer_all_onesp (TREE_OPERAND (cond, 1))
895 || real_zerop (TREE_OPERAND (cond, 1))
896 || real_onep (TREE_OPERAND (cond, 1))
897 || real_minus_onep (TREE_OPERAND (cond, 1)))
898 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
899 break;
900
901 case GE_EXPR:
902 case GT_EXPR:
903 if (integer_zerop (TREE_OPERAND (cond, 1))
904 || integer_onep (TREE_OPERAND (cond, 1))
905 || integer_all_onesp (TREE_OPERAND (cond, 1))
906 || real_zerop (TREE_OPERAND (cond, 1))
907 || real_onep (TREE_OPERAND (cond, 1))
908 || real_minus_onep (TREE_OPERAND (cond, 1)))
909 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
910 break;
911
912 default:
913 break;
914 }
915 }
916
917 /* Predict branch probabilities and estimate profile of the tree CFG. */
918 static void
919 tree_estimate_probability (void)
920 {
921 basic_block bb;
922 struct loops loops_info;
923
924 flow_loops_find (&loops_info, LOOP_TREE);
925 if (dump_file && (dump_flags & TDF_DETAILS))
926 flow_loops_dump (&loops_info, dump_file, NULL, 0);
927
928 connect_infinite_loops_to_exit ();
929 calculate_dominance_info (CDI_DOMINATORS);
930 calculate_dominance_info (CDI_POST_DOMINATORS);
931
932 predict_loops (&loops_info, false);
933
934 FOR_EACH_BB (bb)
935 {
936 edge e;
937
938 for (e = bb->succ; e; e = e->succ_next)
939 {
940 /* Predict early returns to be probable, as we've already taken
941 care for error returns and other are often used for fast paths
942 trought function. */
943 if ((e->dest == EXIT_BLOCK_PTR
944 || (e->dest->succ && !e->dest->succ->succ_next
945 && e->dest->succ->dest == EXIT_BLOCK_PTR))
946 && !predicted_by_p (bb, PRED_NULL_RETURN)
947 && !predicted_by_p (bb, PRED_CONST_RETURN)
948 && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
949 && !last_basic_block_p (e->dest))
950 predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
951
952 /* Look for block we are guarding (ie we dominate it,
953 but it doesn't postdominate us). */
954 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
955 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
956 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
957 {
958 block_stmt_iterator bi;
959
960 /* The call heuristic claims that a guarded function call
961 is improbable. This is because such calls are often used
962 to signal exceptional situations such as printing error
963 messages. */
964 for (bi = bsi_start (e->dest); !bsi_end_p (bi);
965 bsi_next (&bi))
966 {
967 tree stmt = bsi_stmt (bi);
968 if ((TREE_CODE (stmt) == CALL_EXPR
969 || (TREE_CODE (stmt) == MODIFY_EXPR
970 && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR))
971 /* Constant and pure calls are hardly used to signalize
972 something exceptional. */
973 && TREE_SIDE_EFFECTS (stmt))
974 {
975 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
976 break;
977 }
978 }
979 }
980 }
981 tree_predict_by_opcode (bb);
982 }
983 FOR_EACH_BB (bb)
984 combine_predictions_for_bb (dump_file, bb);
985
986 estimate_bb_frequencies (&loops_info);
987 free_dominance_info (CDI_POST_DOMINATORS);
988 remove_fake_edges ();
989 flow_loops_free (&loops_info);
990 if (dump_file && (dump_flags & TDF_DETAILS))
991 dump_tree_cfg (dump_file, dump_flags);
992 }
993 \f
994 /* __builtin_expect dropped tokens into the insn stream describing expected
995 values of registers. Generate branch probabilities based off these
996 values. */
997
998 void
999 expected_value_to_br_prob (void)
1000 {
1001 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
1002
1003 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
1004 {
1005 switch (GET_CODE (insn))
1006 {
1007 case NOTE:
1008 /* Look for expected value notes. */
1009 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
1010 {
1011 ev = NOTE_EXPECTED_VALUE (insn);
1012 ev_reg = XEXP (ev, 0);
1013 delete_insn (insn);
1014 }
1015 continue;
1016
1017 case CODE_LABEL:
1018 /* Never propagate across labels. */
1019 ev = NULL_RTX;
1020 continue;
1021
1022 case JUMP_INSN:
1023 /* Look for simple conditional branches. If we haven't got an
1024 expected value yet, no point going further. */
1025 if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX
1026 || ! any_condjump_p (insn))
1027 continue;
1028 break;
1029
1030 default:
1031 /* Look for insns that clobber the EV register. */
1032 if (ev && reg_set_p (ev_reg, insn))
1033 ev = NULL_RTX;
1034 continue;
1035 }
1036
1037 /* Collect the branch condition, hopefully relative to EV_REG. */
1038 /* ??? At present we'll miss things like
1039 (expected_value (eq r70 0))
1040 (set r71 -1)
1041 (set r80 (lt r70 r71))
1042 (set pc (if_then_else (ne r80 0) ...))
1043 as canonicalize_condition will render this to us as
1044 (lt r70, r71)
1045 Could use cselib to try and reduce this further. */
1046 cond = XEXP (SET_SRC (pc_set (insn)), 0);
1047 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg, false);
1048 if (! cond || XEXP (cond, 0) != ev_reg
1049 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
1050 continue;
1051
1052 /* Substitute and simplify. Given that the expression we're
1053 building involves two constants, we should wind up with either
1054 true or false. */
1055 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
1056 XEXP (ev, 1), XEXP (cond, 1));
1057 cond = simplify_rtx (cond);
1058
1059 /* Turn the condition into a scaled branch probability. */
1060 if (cond != const_true_rtx && cond != const0_rtx)
1061 abort ();
1062 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
1063 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
1064 }
1065 }
1066 \f
1067 /* Check whether this is the last basic block of function. Commonly
1068 there is one extra common cleanup block. */
1069 static bool
1070 last_basic_block_p (basic_block bb)
1071 {
1072 if (bb == EXIT_BLOCK_PTR)
1073 return false;
1074
1075 return (bb->next_bb == EXIT_BLOCK_PTR
1076 || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1077 && bb->succ && !bb->succ->succ_next
1078 && bb->succ->dest->next_bb == EXIT_BLOCK_PTR));
1079 }
1080
1081 /* Sets branch probabilities according to PREDiction and
1082 FLAGS. HEADS[bb->index] should be index of basic block in that we
1083 need to alter branch predictions (i.e. the first of our dominators
1084 such that we do not post-dominate it) (but we fill this information
1085 on demand, so -1 may be there in case this was not needed yet). */
1086
1087 static void
1088 process_note_prediction (basic_block bb, int *heads, int pred, int flags)
1089 {
1090 edge e;
1091 int y;
1092 bool taken;
1093
1094 taken = flags & IS_TAKEN;
1095
1096 if (heads[bb->index] < 0)
1097 {
1098 /* This is first time we need this field in heads array; so
1099 find first dominator that we do not post-dominate (we are
1100 using already known members of heads array). */
1101 basic_block ai = bb;
1102 basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1103 int head;
1104
1105 while (heads[next_ai->index] < 0)
1106 {
1107 if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1108 break;
1109 heads[next_ai->index] = ai->index;
1110 ai = next_ai;
1111 next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1112 }
1113 if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1114 head = next_ai->index;
1115 else
1116 head = heads[next_ai->index];
1117 while (next_ai != bb)
1118 {
1119 next_ai = ai;
1120 if (heads[ai->index] == ENTRY_BLOCK)
1121 ai = ENTRY_BLOCK_PTR;
1122 else
1123 ai = BASIC_BLOCK (heads[ai->index]);
1124 heads[next_ai->index] = head;
1125 }
1126 }
1127 y = heads[bb->index];
1128
1129 /* Now find the edge that leads to our branch and aply the prediction. */
1130
1131 if (y == last_basic_block || !can_predict_insn_p (BB_END (BASIC_BLOCK (y))))
1132 return;
1133 for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next)
1134 if (e->dest->index >= 0
1135 && dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1136 predict_edge_def (e, pred, taken);
1137 }
1138
1139 /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them
1140 into branch probabilities. For description of heads array, see
1141 process_note_prediction. */
1142
1143 static void
1144 process_note_predictions (basic_block bb, int *heads)
1145 {
1146 rtx insn;
1147 edge e;
1148
1149 /* Additionally, we check here for blocks with no successors. */
1150 int contained_noreturn_call = 0;
1151 int was_bb_head = 0;
1152 int noreturn_block = 1;
1153
1154 for (insn = BB_END (bb); insn;
1155 was_bb_head |= (insn == BB_HEAD (bb)), insn = PREV_INSN (insn))
1156 {
1157 if (GET_CODE (insn) != NOTE)
1158 {
1159 if (was_bb_head)
1160 break;
1161 else
1162 {
1163 /* Noreturn calls cause program to exit, therefore they are
1164 always predicted as not taken. */
1165 if (GET_CODE (insn) == CALL_INSN
1166 && find_reg_note (insn, REG_NORETURN, NULL))
1167 contained_noreturn_call = 1;
1168 continue;
1169 }
1170 }
1171 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PREDICTION)
1172 {
1173 int alg = (int) NOTE_PREDICTION_ALG (insn);
1174 /* Process single prediction note. */
1175 process_note_prediction (bb,
1176 heads,
1177 alg, (int) NOTE_PREDICTION_FLAGS (insn));
1178 delete_insn (insn);
1179 }
1180 }
1181 for (e = bb->succ; e; e = e->succ_next)
1182 if (!(e->flags & EDGE_FAKE))
1183 noreturn_block = 0;
1184 if (contained_noreturn_call)
1185 {
1186 /* This block ended from other reasons than because of return.
1187 If it is because of noreturn call, this should certainly not
1188 be taken. Otherwise it is probably some error recovery. */
1189 process_note_prediction (bb, heads, PRED_NORETURN, NOT_TAKEN);
1190 }
1191 }
1192
1193 /* Gathers NOTE_INSN_PREDICTIONs and turns them into
1194 branch probabilities. */
1195
1196 void
1197 note_prediction_to_br_prob (void)
1198 {
1199 basic_block bb;
1200 int *heads;
1201
1202 /* To enable handling of noreturn blocks. */
1203 add_noreturn_fake_exit_edges ();
1204 connect_infinite_loops_to_exit ();
1205
1206 calculate_dominance_info (CDI_POST_DOMINATORS);
1207 calculate_dominance_info (CDI_DOMINATORS);
1208
1209 heads = xmalloc (sizeof (int) * last_basic_block);
1210 memset (heads, -1, sizeof (int) * last_basic_block);
1211 heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1212
1213 /* Process all prediction notes. */
1214
1215 FOR_EACH_BB (bb)
1216 process_note_predictions (bb, heads);
1217
1218 free_dominance_info (CDI_POST_DOMINATORS);
1219 free_dominance_info (CDI_DOMINATORS);
1220 free (heads);
1221
1222 remove_fake_edges ();
1223 }
1224 \f
1225 /* This is used to carry information about basic blocks. It is
1226 attached to the AUX field of the standard CFG block. */
1227
1228 typedef struct block_info_def
1229 {
1230 /* Estimated frequency of execution of basic_block. */
1231 sreal frequency;
1232
1233 /* To keep queue of basic blocks to process. */
1234 basic_block next;
1235
1236 /* True if block needs to be visited in propagate_freq. */
1237 unsigned int tovisit:1;
1238
1239 /* Number of predecessors we need to visit first. */
1240 int npredecessors;
1241 } *block_info;
1242
1243 /* Similar information for edges. */
1244 typedef struct edge_info_def
1245 {
1246 /* In case edge is an loopback edge, the probability edge will be reached
1247 in case header is. Estimated number of iterations of the loop can be
1248 then computed as 1 / (1 - back_edge_prob). */
1249 sreal back_edge_prob;
1250 /* True if the edge is an loopback edge in the natural loop. */
1251 unsigned int back_edge:1;
1252 } *edge_info;
1253
1254 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1255 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1256
1257 /* Helper function for estimate_bb_frequencies.
1258 Propagate the frequencies for LOOP. */
1259
1260 static void
1261 propagate_freq (struct loop *loop)
1262 {
1263 basic_block head = loop->header;
1264 basic_block bb;
1265 basic_block last;
1266 edge e;
1267 basic_block nextbb;
1268
1269 /* For each basic block we need to visit count number of his predecessors
1270 we need to visit first. */
1271 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1272 {
1273 if (BLOCK_INFO (bb)->tovisit)
1274 {
1275 int count = 0;
1276
1277 for (e = bb->pred; e; e = e->pred_next)
1278 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
1279 count++;
1280 else if (BLOCK_INFO (e->src)->tovisit
1281 && dump_file && !EDGE_INFO (e)->back_edge)
1282 fprintf (dump_file,
1283 "Irreducible region hit, ignoring edge to %i->%i\n",
1284 e->src->index, bb->index);
1285 BLOCK_INFO (bb)->npredecessors = count;
1286 }
1287 }
1288
1289 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1290 last = head;
1291 for (bb = head; bb; bb = nextbb)
1292 {
1293 sreal cyclic_probability, frequency;
1294
1295 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1296 memcpy (&frequency, &real_zero, sizeof (real_zero));
1297
1298 nextbb = BLOCK_INFO (bb)->next;
1299 BLOCK_INFO (bb)->next = NULL;
1300
1301 /* Compute frequency of basic block. */
1302 if (bb != head)
1303 {
1304 #ifdef ENABLE_CHECKING
1305 for (e = bb->pred; e; e = e->pred_next)
1306 if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
1307 abort ();
1308 #endif
1309
1310 for (e = bb->pred; e; e = e->pred_next)
1311 if (EDGE_INFO (e)->back_edge)
1312 {
1313 sreal_add (&cyclic_probability, &cyclic_probability,
1314 &EDGE_INFO (e)->back_edge_prob);
1315 }
1316 else if (!(e->flags & EDGE_DFS_BACK))
1317 {
1318 sreal tmp;
1319
1320 /* frequency += (e->probability
1321 * BLOCK_INFO (e->src)->frequency /
1322 REG_BR_PROB_BASE); */
1323
1324 sreal_init (&tmp, e->probability, 0);
1325 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1326 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1327 sreal_add (&frequency, &frequency, &tmp);
1328 }
1329
1330 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1331 {
1332 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1333 sizeof (frequency));
1334 }
1335 else
1336 {
1337 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1338 {
1339 memcpy (&cyclic_probability, &real_almost_one,
1340 sizeof (real_almost_one));
1341 }
1342
1343 /* BLOCK_INFO (bb)->frequency = frequency
1344 / (1 - cyclic_probability) */
1345
1346 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1347 sreal_div (&BLOCK_INFO (bb)->frequency,
1348 &frequency, &cyclic_probability);
1349 }
1350 }
1351
1352 BLOCK_INFO (bb)->tovisit = 0;
1353
1354 /* Compute back edge frequencies. */
1355 for (e = bb->succ; e; e = e->succ_next)
1356 if (e->dest == head)
1357 {
1358 sreal tmp;
1359
1360 /* EDGE_INFO (e)->back_edge_prob
1361 = ((e->probability * BLOCK_INFO (bb)->frequency)
1362 / REG_BR_PROB_BASE); */
1363
1364 sreal_init (&tmp, e->probability, 0);
1365 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1366 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1367 &tmp, &real_inv_br_prob_base);
1368 }
1369
1370 /* Propagate to successor blocks. */
1371 for (e = bb->succ; e; e = e->succ_next)
1372 if (!(e->flags & EDGE_DFS_BACK)
1373 && BLOCK_INFO (e->dest)->npredecessors)
1374 {
1375 BLOCK_INFO (e->dest)->npredecessors--;
1376 if (!BLOCK_INFO (e->dest)->npredecessors)
1377 {
1378 if (!nextbb)
1379 nextbb = e->dest;
1380 else
1381 BLOCK_INFO (last)->next = e->dest;
1382
1383 last = e->dest;
1384 }
1385 }
1386 }
1387 }
1388
1389 /* Estimate probabilities of loopback edges in loops at same nest level. */
1390
1391 static void
1392 estimate_loops_at_level (struct loop *first_loop)
1393 {
1394 struct loop *loop;
1395
1396 for (loop = first_loop; loop; loop = loop->next)
1397 {
1398 edge e;
1399 basic_block *bbs;
1400 unsigned i;
1401
1402 estimate_loops_at_level (loop->inner);
1403
1404 if (loop->latch->succ) /* Do not do this for dummy function loop. */
1405 {
1406 /* Find current loop back edge and mark it. */
1407 e = loop_latch_edge (loop);
1408 EDGE_INFO (e)->back_edge = 1;
1409 }
1410
1411 bbs = get_loop_body (loop);
1412 for (i = 0; i < loop->num_nodes; i++)
1413 BLOCK_INFO (bbs[i])->tovisit = 1;
1414 free (bbs);
1415 propagate_freq (loop);
1416 }
1417 }
1418
1419 /* Convert counts measured by profile driven feedback to frequencies.
1420 Return nonzero iff there was any nonzero execution count. */
1421
1422 static int
1423 counts_to_freqs (void)
1424 {
1425 gcov_type count_max, true_count_max = 0;
1426 basic_block bb;
1427
1428 FOR_EACH_BB (bb)
1429 true_count_max = MAX (bb->count, true_count_max);
1430
1431 count_max = MAX (true_count_max, 1);
1432 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1433 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1434 return true_count_max;
1435 }
1436
1437 /* Return true if function is likely to be expensive, so there is no point to
1438 optimize performance of prologue, epilogue or do inlining at the expense
1439 of code size growth. THRESHOLD is the limit of number of instructions
1440 function can execute at average to be still considered not expensive. */
1441
1442 bool
1443 expensive_function_p (int threshold)
1444 {
1445 unsigned int sum = 0;
1446 basic_block bb;
1447 unsigned int limit;
1448
1449 /* We can not compute accurately for large thresholds due to scaled
1450 frequencies. */
1451 if (threshold > BB_FREQ_MAX)
1452 abort ();
1453
1454 /* Frequencies are out of range. This either means that function contains
1455 internal loop executing more than BB_FREQ_MAX times or profile feedback
1456 is available and function has not been executed at all. */
1457 if (ENTRY_BLOCK_PTR->frequency == 0)
1458 return true;
1459
1460 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1461 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1462 FOR_EACH_BB (bb)
1463 {
1464 rtx insn;
1465
1466 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1467 insn = NEXT_INSN (insn))
1468 if (active_insn_p (insn))
1469 {
1470 sum += bb->frequency;
1471 if (sum > limit)
1472 return true;
1473 }
1474 }
1475
1476 return false;
1477 }
1478
1479 /* Estimate basic blocks frequency by given branch probabilities. */
1480
1481 static void
1482 estimate_bb_frequencies (struct loops *loops)
1483 {
1484 basic_block bb;
1485 sreal freq_max;
1486
1487 if (!flag_branch_probabilities || !counts_to_freqs ())
1488 {
1489 static int real_values_initialized = 0;
1490
1491 if (!real_values_initialized)
1492 {
1493 real_values_initialized = 1;
1494 sreal_init (&real_zero, 0, 0);
1495 sreal_init (&real_one, 1, 0);
1496 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1497 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1498 sreal_init (&real_one_half, 1, -1);
1499 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1500 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1501 }
1502
1503 mark_dfs_back_edges ();
1504
1505 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
1506
1507 /* Set up block info for each basic block. */
1508 alloc_aux_for_blocks (sizeof (struct block_info_def));
1509 alloc_aux_for_edges (sizeof (struct edge_info_def));
1510 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1511 {
1512 edge e;
1513
1514 BLOCK_INFO (bb)->tovisit = 0;
1515 for (e = bb->succ; e; e = e->succ_next)
1516 {
1517 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1518 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1519 &EDGE_INFO (e)->back_edge_prob,
1520 &real_inv_br_prob_base);
1521 }
1522 }
1523
1524 /* First compute probabilities locally for each loop from innermost
1525 to outermost to examine probabilities for back edges. */
1526 estimate_loops_at_level (loops->tree_root);
1527
1528 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1529 FOR_EACH_BB (bb)
1530 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1531 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1532
1533 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1534 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1535 {
1536 sreal tmp;
1537
1538 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1539 sreal_add (&tmp, &tmp, &real_one_half);
1540 bb->frequency = sreal_to_int (&tmp);
1541 }
1542
1543 free_aux_for_blocks ();
1544 free_aux_for_edges ();
1545 }
1546 compute_function_frequency ();
1547 if (flag_reorder_functions)
1548 choose_function_section ();
1549 }
1550
1551 /* Decide whether function is hot, cold or unlikely executed. */
1552 static void
1553 compute_function_frequency (void)
1554 {
1555 basic_block bb;
1556
1557 if (!profile_info || !flag_branch_probabilities)
1558 return;
1559 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1560 FOR_EACH_BB (bb)
1561 {
1562 if (maybe_hot_bb_p (bb))
1563 {
1564 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1565 return;
1566 }
1567 if (!probably_never_executed_bb_p (bb))
1568 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1569 }
1570 }
1571
1572 /* Choose appropriate section for the function. */
1573 static void
1574 choose_function_section (void)
1575 {
1576 if (DECL_SECTION_NAME (current_function_decl)
1577 || !targetm.have_named_sections
1578 /* Theoretically we can split the gnu.linkonce text section too,
1579 but this requires more work as the frequency needs to match
1580 for all generated objects so we need to merge the frequency
1581 of all instances. For now just never set frequency for these. */
1582 || DECL_ONE_ONLY (current_function_decl))
1583 return;
1584 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1585 DECL_SECTION_NAME (current_function_decl) =
1586 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1587 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1588 DECL_SECTION_NAME (current_function_decl) =
1589 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1590 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1591 }
1592
1593
1594 struct tree_opt_pass pass_profile =
1595 {
1596 "profile", /* name */
1597 NULL, /* gate */
1598 tree_estimate_probability, /* execute */
1599 NULL, /* sub */
1600 NULL, /* next */
1601 0, /* static_pass_number */
1602 TV_BRANCH_PROB, /* tv_id */
1603 PROP_cfg, /* properties_required */
1604 0, /* properties_provided */
1605 0, /* properties_destroyed */
1606 0, /* todo_flags_start */
1607 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */
1608 };