]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/predict.c
i386.c (make_resolver_func): Update.
[thirdparty/gcc.git] / gcc / predict.c
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2017 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* References:
21
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
28
29
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "memmodel.h"
41 #include "emit-rtl.h"
42 #include "cgraph.h"
43 #include "coverage.h"
44 #include "diagnostic-core.h"
45 #include "gimple-predict.h"
46 #include "fold-const.h"
47 #include "calls.h"
48 #include "cfganal.h"
49 #include "profile.h"
50 #include "sreal.h"
51 #include "params.h"
52 #include "cfgloop.h"
53 #include "gimple-iterator.h"
54 #include "tree-cfg.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
58 #include "ipa-utils.h"
59 #include "gimple-pretty-print.h"
60
61 /* Enum with reasons why a predictor is ignored. */
62
63 enum predictor_reason
64 {
65 REASON_NONE,
66 REASON_IGNORED,
67 REASON_SINGLE_EDGE_DUPLICATE,
68 REASON_EDGE_PAIR_DUPLICATE
69 };
70
71 /* String messages for the aforementioned enum. */
72
73 static const char *reason_messages[] = {"", " (ignored)",
74 " (single edge duplicate)", " (edge pair duplicate)"};
75
76 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
77 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
78 static sreal real_almost_one, real_br_prob_base,
79 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
80
81 static void combine_predictions_for_insn (rtx_insn *, basic_block);
82 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
83 enum predictor_reason, edge);
84 static void predict_paths_leading_to (basic_block, enum br_predictor,
85 enum prediction,
86 struct loop *in_loop = NULL);
87 static void predict_paths_leading_to_edge (edge, enum br_predictor,
88 enum prediction,
89 struct loop *in_loop = NULL);
90 static bool can_predict_insn_p (const rtx_insn *);
91
92 /* Information we hold about each branch predictor.
93 Filled using information from predict.def. */
94
95 struct predictor_info
96 {
97 const char *const name; /* Name used in the debugging dumps. */
98 const int hitrate; /* Expected hitrate used by
99 predict_insn_def call. */
100 const int flags;
101 };
102
103 /* Use given predictor without Dempster-Shaffer theory if it matches
104 using first_match heuristics. */
105 #define PRED_FLAG_FIRST_MATCH 1
106
107 /* Recompute hitrate in percent to our representation. */
108
109 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
110
111 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
112 static const struct predictor_info predictor_info[]= {
113 #include "predict.def"
114
115 /* Upper bound on predictors. */
116 {NULL, 0, 0}
117 };
118 #undef DEF_PREDICTOR
119
120 /* Return TRUE if frequency FREQ is considered to be hot. */
121
122 static inline bool
123 maybe_hot_frequency_p (struct function *fun, int freq)
124 {
125 struct cgraph_node *node = cgraph_node::get (fun->decl);
126 if (!profile_info
127 || !opt_for_fn (fun->decl, flag_branch_probabilities))
128 {
129 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
130 return false;
131 if (node->frequency == NODE_FREQUENCY_HOT)
132 return true;
133 }
134 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
135 return true;
136 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
137 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
138 return false;
139 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
140 return false;
141 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
142 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
143 return false;
144 return true;
145 }
146
147 static gcov_type min_count = -1;
148
149 /* Determine the threshold for hot BB counts. */
150
151 gcov_type
152 get_hot_bb_threshold ()
153 {
154 gcov_working_set_t *ws;
155 if (min_count == -1)
156 {
157 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
158 gcc_assert (ws);
159 min_count = ws->min_counter;
160 }
161 return min_count;
162 }
163
164 /* Set the threshold for hot BB counts. */
165
166 void
167 set_hot_bb_threshold (gcov_type min)
168 {
169 min_count = min;
170 }
171
172 /* Return TRUE if frequency FREQ is considered to be hot. */
173
174 bool
175 maybe_hot_count_p (struct function *, profile_count count)
176 {
177 if (!count.initialized_p ())
178 return true;
179 /* Code executed at most once is not hot. */
180 if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
181 return false;
182 return (count.to_gcov_type () >= get_hot_bb_threshold ());
183 }
184
185 /* Return true in case BB can be CPU intensive and should be optimized
186 for maximal performance. */
187
188 bool
189 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
190 {
191 gcc_checking_assert (fun);
192 if (profile_status_for_fn (fun) == PROFILE_READ)
193 return maybe_hot_count_p (fun, bb->count);
194 return maybe_hot_frequency_p (fun, bb->frequency);
195 }
196
197 /* Return true in case BB can be CPU intensive and should be optimized
198 for maximal performance. */
199
200 bool
201 maybe_hot_edge_p (edge e)
202 {
203 if (profile_status_for_fn (cfun) == PROFILE_READ)
204 return maybe_hot_count_p (cfun, e->count);
205 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
206 }
207
208 /* Return true if profile COUNT and FREQUENCY, or function FUN static
209 node frequency reflects never being executed. */
210
211 static bool
212 probably_never_executed (struct function *fun,
213 profile_count count, int)
214 {
215 gcc_checking_assert (fun);
216 if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ)
217 {
218 if (count == profile_count::zero ())
219 return true;
220
221 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
222 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs)
223 return false;
224 return true;
225 }
226 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
227 && (cgraph_node::get (fun->decl)->frequency
228 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
229 return true;
230 return false;
231 }
232
233
234 /* Return true in case BB is probably never executed. */
235
236 bool
237 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
238 {
239 return probably_never_executed (fun, bb->count, bb->frequency);
240 }
241
242
243 /* Return true in case edge E is probably never executed. */
244
245 bool
246 probably_never_executed_edge_p (struct function *fun, edge e)
247 {
248 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
249 }
250
251 /* Return true when current function should always be optimized for size. */
252
253 bool
254 optimize_function_for_size_p (struct function *fun)
255 {
256 if (!fun || !fun->decl)
257 return optimize_size;
258 cgraph_node *n = cgraph_node::get (fun->decl);
259 return n && n->optimize_for_size_p ();
260 }
261
262 /* Return true when current function should always be optimized for speed. */
263
264 bool
265 optimize_function_for_speed_p (struct function *fun)
266 {
267 return !optimize_function_for_size_p (fun);
268 }
269
270 /* Return the optimization type that should be used for the function FUN. */
271
272 optimization_type
273 function_optimization_type (struct function *fun)
274 {
275 return (optimize_function_for_speed_p (fun)
276 ? OPTIMIZE_FOR_SPEED
277 : OPTIMIZE_FOR_SIZE);
278 }
279
280 /* Return TRUE when BB should be optimized for size. */
281
282 bool
283 optimize_bb_for_size_p (const_basic_block bb)
284 {
285 return (optimize_function_for_size_p (cfun)
286 || (bb && !maybe_hot_bb_p (cfun, bb)));
287 }
288
289 /* Return TRUE when BB should be optimized for speed. */
290
291 bool
292 optimize_bb_for_speed_p (const_basic_block bb)
293 {
294 return !optimize_bb_for_size_p (bb);
295 }
296
297 /* Return the optimization type that should be used for block BB. */
298
299 optimization_type
300 bb_optimization_type (const_basic_block bb)
301 {
302 return (optimize_bb_for_speed_p (bb)
303 ? OPTIMIZE_FOR_SPEED
304 : OPTIMIZE_FOR_SIZE);
305 }
306
307 /* Return TRUE when BB should be optimized for size. */
308
309 bool
310 optimize_edge_for_size_p (edge e)
311 {
312 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
313 }
314
315 /* Return TRUE when BB should be optimized for speed. */
316
317 bool
318 optimize_edge_for_speed_p (edge e)
319 {
320 return !optimize_edge_for_size_p (e);
321 }
322
323 /* Return TRUE when BB should be optimized for size. */
324
325 bool
326 optimize_insn_for_size_p (void)
327 {
328 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
329 }
330
331 /* Return TRUE when BB should be optimized for speed. */
332
333 bool
334 optimize_insn_for_speed_p (void)
335 {
336 return !optimize_insn_for_size_p ();
337 }
338
339 /* Return TRUE when LOOP should be optimized for size. */
340
341 bool
342 optimize_loop_for_size_p (struct loop *loop)
343 {
344 return optimize_bb_for_size_p (loop->header);
345 }
346
347 /* Return TRUE when LOOP should be optimized for speed. */
348
349 bool
350 optimize_loop_for_speed_p (struct loop *loop)
351 {
352 return optimize_bb_for_speed_p (loop->header);
353 }
354
355 /* Return TRUE when LOOP nest should be optimized for speed. */
356
357 bool
358 optimize_loop_nest_for_speed_p (struct loop *loop)
359 {
360 struct loop *l = loop;
361 if (optimize_loop_for_speed_p (loop))
362 return true;
363 l = loop->inner;
364 while (l && l != loop)
365 {
366 if (optimize_loop_for_speed_p (l))
367 return true;
368 if (l->inner)
369 l = l->inner;
370 else if (l->next)
371 l = l->next;
372 else
373 {
374 while (l != loop && !l->next)
375 l = loop_outer (l);
376 if (l != loop)
377 l = l->next;
378 }
379 }
380 return false;
381 }
382
383 /* Return TRUE when LOOP nest should be optimized for size. */
384
385 bool
386 optimize_loop_nest_for_size_p (struct loop *loop)
387 {
388 return !optimize_loop_nest_for_speed_p (loop);
389 }
390
391 /* Return true when edge E is likely to be well predictable by branch
392 predictor. */
393
394 bool
395 predictable_edge_p (edge e)
396 {
397 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
398 return false;
399 if ((e->probability
400 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
401 || (REG_BR_PROB_BASE - e->probability
402 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
403 return true;
404 return false;
405 }
406
407
408 /* Set RTL expansion for BB profile. */
409
410 void
411 rtl_profile_for_bb (basic_block bb)
412 {
413 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
414 }
415
416 /* Set RTL expansion for edge profile. */
417
418 void
419 rtl_profile_for_edge (edge e)
420 {
421 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
422 }
423
424 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
425 void
426 default_rtl_profile (void)
427 {
428 crtl->maybe_hot_insn_p = true;
429 }
430
431 /* Return true if the one of outgoing edges is already predicted by
432 PREDICTOR. */
433
434 bool
435 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
436 {
437 rtx note;
438 if (!INSN_P (BB_END (bb)))
439 return false;
440 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
441 if (REG_NOTE_KIND (note) == REG_BR_PRED
442 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
443 return true;
444 return false;
445 }
446
447 /* Structure representing predictions in tree level. */
448
449 struct edge_prediction {
450 struct edge_prediction *ep_next;
451 edge ep_edge;
452 enum br_predictor ep_predictor;
453 int ep_probability;
454 };
455
456 /* This map contains for a basic block the list of predictions for the
457 outgoing edges. */
458
459 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
460
461 /* Return true if the one of outgoing edges is already predicted by
462 PREDICTOR. */
463
464 bool
465 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
466 {
467 struct edge_prediction *i;
468 edge_prediction **preds = bb_predictions->get (bb);
469
470 if (!preds)
471 return false;
472
473 for (i = *preds; i; i = i->ep_next)
474 if (i->ep_predictor == predictor)
475 return true;
476 return false;
477 }
478
479 /* Return true if the one of outgoing edges is already predicted by
480 PREDICTOR for edge E predicted as TAKEN. */
481
482 bool
483 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
484 {
485 struct edge_prediction *i;
486 basic_block bb = e->src;
487 edge_prediction **preds = bb_predictions->get (bb);
488 if (!preds)
489 return false;
490
491 int probability = predictor_info[(int) predictor].hitrate;
492
493 if (taken != TAKEN)
494 probability = REG_BR_PROB_BASE - probability;
495
496 for (i = *preds; i; i = i->ep_next)
497 if (i->ep_predictor == predictor
498 && i->ep_edge == e
499 && i->ep_probability == probability)
500 return true;
501 return false;
502 }
503
504 /* Return true when the probability of edge is reliable.
505
506 The profile guessing code is good at predicting branch outcome (ie.
507 taken/not taken), that is predicted right slightly over 75% of time.
508 It is however notoriously poor on predicting the probability itself.
509 In general the profile appear a lot flatter (with probabilities closer
510 to 50%) than the reality so it is bad idea to use it to drive optimization
511 such as those disabling dynamic branch prediction for well predictable
512 branches.
513
514 There are two exceptions - edges leading to noreturn edges and edges
515 predicted by number of iterations heuristics are predicted well. This macro
516 should be able to distinguish those, but at the moment it simply check for
517 noreturn heuristic that is only one giving probability over 99% or bellow
518 1%. In future we might want to propagate reliability information across the
519 CFG if we find this information useful on multiple places. */
520 static bool
521 probability_reliable_p (int prob)
522 {
523 return (profile_status_for_fn (cfun) == PROFILE_READ
524 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
525 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
526 }
527
528 /* Same predicate as above, working on edges. */
529 bool
530 edge_probability_reliable_p (const_edge e)
531 {
532 return probability_reliable_p (e->probability);
533 }
534
535 /* Same predicate as edge_probability_reliable_p, working on notes. */
536 bool
537 br_prob_note_reliable_p (const_rtx note)
538 {
539 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
540 return probability_reliable_p (XINT (note, 0));
541 }
542
543 static void
544 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
545 {
546 gcc_assert (any_condjump_p (insn));
547 if (!flag_guess_branch_prob)
548 return;
549
550 add_reg_note (insn, REG_BR_PRED,
551 gen_rtx_CONCAT (VOIDmode,
552 GEN_INT ((int) predictor),
553 GEN_INT ((int) probability)));
554 }
555
556 /* Predict insn by given predictor. */
557
558 void
559 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
560 enum prediction taken)
561 {
562 int probability = predictor_info[(int) predictor].hitrate;
563
564 if (taken != TAKEN)
565 probability = REG_BR_PROB_BASE - probability;
566
567 predict_insn (insn, predictor, probability);
568 }
569
570 /* Predict edge E with given probability if possible. */
571
572 void
573 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
574 {
575 rtx_insn *last_insn;
576 last_insn = BB_END (e->src);
577
578 /* We can store the branch prediction information only about
579 conditional jumps. */
580 if (!any_condjump_p (last_insn))
581 return;
582
583 /* We always store probability of branching. */
584 if (e->flags & EDGE_FALLTHRU)
585 probability = REG_BR_PROB_BASE - probability;
586
587 predict_insn (last_insn, predictor, probability);
588 }
589
590 /* Predict edge E with the given PROBABILITY. */
591 void
592 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
593 {
594 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
595 && EDGE_COUNT (e->src->succs) > 1
596 && flag_guess_branch_prob
597 && optimize)
598 {
599 struct edge_prediction *i = XNEW (struct edge_prediction);
600 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
601
602 i->ep_next = preds;
603 preds = i;
604 i->ep_probability = probability;
605 i->ep_predictor = predictor;
606 i->ep_edge = e;
607 }
608 }
609
610 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
611 to the filter function. */
612
613 void
614 filter_predictions (edge_prediction **preds,
615 bool (*filter) (edge_prediction *, void *), void *data)
616 {
617 if (!bb_predictions)
618 return;
619
620 if (preds)
621 {
622 struct edge_prediction **prediction = preds;
623 struct edge_prediction *next;
624
625 while (*prediction)
626 {
627 if ((*filter) (*prediction, data))
628 prediction = &((*prediction)->ep_next);
629 else
630 {
631 next = (*prediction)->ep_next;
632 free (*prediction);
633 *prediction = next;
634 }
635 }
636 }
637 }
638
639 /* Filter function predicate that returns true for a edge predicate P
640 if its edge is equal to DATA. */
641
642 bool
643 equal_edge_p (edge_prediction *p, void *data)
644 {
645 return p->ep_edge == (edge)data;
646 }
647
648 /* Remove all predictions on given basic block that are attached
649 to edge E. */
650 void
651 remove_predictions_associated_with_edge (edge e)
652 {
653 if (!bb_predictions)
654 return;
655
656 edge_prediction **preds = bb_predictions->get (e->src);
657 filter_predictions (preds, equal_edge_p, e);
658 }
659
660 /* Clears the list of predictions stored for BB. */
661
662 static void
663 clear_bb_predictions (basic_block bb)
664 {
665 edge_prediction **preds = bb_predictions->get (bb);
666 struct edge_prediction *pred, *next;
667
668 if (!preds)
669 return;
670
671 for (pred = *preds; pred; pred = next)
672 {
673 next = pred->ep_next;
674 free (pred);
675 }
676 *preds = NULL;
677 }
678
679 /* Return true when we can store prediction on insn INSN.
680 At the moment we represent predictions only on conditional
681 jumps, not at computed jump or other complicated cases. */
682 static bool
683 can_predict_insn_p (const rtx_insn *insn)
684 {
685 return (JUMP_P (insn)
686 && any_condjump_p (insn)
687 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
688 }
689
690 /* Predict edge E by given predictor if possible. */
691
692 void
693 predict_edge_def (edge e, enum br_predictor predictor,
694 enum prediction taken)
695 {
696 int probability = predictor_info[(int) predictor].hitrate;
697
698 if (taken != TAKEN)
699 probability = REG_BR_PROB_BASE - probability;
700
701 predict_edge (e, predictor, probability);
702 }
703
704 /* Invert all branch predictions or probability notes in the INSN. This needs
705 to be done each time we invert the condition used by the jump. */
706
707 void
708 invert_br_probabilities (rtx insn)
709 {
710 rtx note;
711
712 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713 if (REG_NOTE_KIND (note) == REG_BR_PROB)
714 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716 XEXP (XEXP (note, 0), 1)
717 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
718 }
719
720 /* Dump information about the branch prediction to the output file. */
721
722 static void
723 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724 basic_block bb, enum predictor_reason reason = REASON_NONE,
725 edge ep_edge = NULL)
726 {
727 edge e = ep_edge;
728 edge_iterator ei;
729
730 if (!file)
731 return;
732
733 if (e == NULL)
734 FOR_EACH_EDGE (e, ei, bb->succs)
735 if (! (e->flags & EDGE_FALLTHRU))
736 break;
737
738 char edge_info_str[128];
739 if (ep_edge)
740 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
741 ep_edge->dest->index);
742 else
743 edge_info_str[0] = '\0';
744
745 fprintf (file, " %s heuristics%s%s: %.1f%%",
746 predictor_info[predictor].name,
747 edge_info_str, reason_messages[reason],
748 probability * 100.0 / REG_BR_PROB_BASE);
749
750 if (bb->count.initialized_p ())
751 {
752 fprintf (file, " exec ");
753 bb->count.dump (file);
754 if (e)
755 {
756 fprintf (file, " hit ");
757 e->count.dump (file);
758 fprintf (file, " (%.1f%%)", e->count.to_gcov_type() * 100.0
759 / bb->count.to_gcov_type ());
760 }
761 }
762
763 fprintf (file, "\n");
764 }
765
766 /* We can not predict the probabilities of outgoing edges of bb. Set them
767 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
768 even probability for all edges not mentioned in the set. These edges
769 are given PROB_VERY_UNLIKELY probability. */
770
771 static void
772 set_even_probabilities (basic_block bb,
773 hash_set<edge> *unlikely_edges = NULL)
774 {
775 unsigned nedges = 0;
776 edge e = NULL;
777 edge_iterator ei;
778
779 FOR_EACH_EDGE (e, ei, bb->succs)
780 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
781 nedges ++;
782
783 /* Make the distribution even if all edges are unlikely. */
784 unsigned unlikely_count = unlikely_edges ? unlikely_edges->elements () : 0;
785 if (unlikely_count == nedges)
786 {
787 unlikely_edges = NULL;
788 unlikely_count = 0;
789 }
790
791 unsigned c = nedges - unlikely_count;
792
793 FOR_EACH_EDGE (e, ei, bb->succs)
794 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
795 {
796 if (unlikely_edges != NULL && unlikely_edges->contains (e))
797 e->probability = PROB_VERY_UNLIKELY;
798 else
799 e->probability = (REG_BR_PROB_BASE + c / 2) / c;
800 }
801 else
802 e->probability = 0;
803 }
804
805 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
806 note if not already present. Remove now useless REG_BR_PRED notes. */
807
808 static void
809 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
810 {
811 rtx prob_note;
812 rtx *pnote;
813 rtx note;
814 int best_probability = PROB_EVEN;
815 enum br_predictor best_predictor = END_PREDICTORS;
816 int combined_probability = REG_BR_PROB_BASE / 2;
817 int d;
818 bool first_match = false;
819 bool found = false;
820
821 if (!can_predict_insn_p (insn))
822 {
823 set_even_probabilities (bb);
824 return;
825 }
826
827 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
828 pnote = &REG_NOTES (insn);
829 if (dump_file)
830 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
831 bb->index);
832
833 /* We implement "first match" heuristics and use probability guessed
834 by predictor with smallest index. */
835 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
836 if (REG_NOTE_KIND (note) == REG_BR_PRED)
837 {
838 enum br_predictor predictor = ((enum br_predictor)
839 INTVAL (XEXP (XEXP (note, 0), 0)));
840 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
841
842 found = true;
843 if (best_predictor > predictor
844 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
845 best_probability = probability, best_predictor = predictor;
846
847 d = (combined_probability * probability
848 + (REG_BR_PROB_BASE - combined_probability)
849 * (REG_BR_PROB_BASE - probability));
850
851 /* Use FP math to avoid overflows of 32bit integers. */
852 if (d == 0)
853 /* If one probability is 0% and one 100%, avoid division by zero. */
854 combined_probability = REG_BR_PROB_BASE / 2;
855 else
856 combined_probability = (((double) combined_probability) * probability
857 * REG_BR_PROB_BASE / d + 0.5);
858 }
859
860 /* Decide which heuristic to use. In case we didn't match anything,
861 use no_prediction heuristic, in case we did match, use either
862 first match or Dempster-Shaffer theory depending on the flags. */
863
864 if (best_predictor != END_PREDICTORS)
865 first_match = true;
866
867 if (!found)
868 dump_prediction (dump_file, PRED_NO_PREDICTION,
869 combined_probability, bb);
870 else
871 {
872 if (!first_match)
873 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
874 bb, !first_match ? REASON_NONE : REASON_IGNORED);
875 else
876 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
877 bb, first_match ? REASON_NONE : REASON_IGNORED);
878 }
879
880 if (first_match)
881 combined_probability = best_probability;
882 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
883
884 while (*pnote)
885 {
886 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
887 {
888 enum br_predictor predictor = ((enum br_predictor)
889 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
890 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
891
892 dump_prediction (dump_file, predictor, probability, bb,
893 (!first_match || best_predictor == predictor)
894 ? REASON_NONE : REASON_IGNORED);
895 *pnote = XEXP (*pnote, 1);
896 }
897 else
898 pnote = &XEXP (*pnote, 1);
899 }
900
901 if (!prob_note)
902 {
903 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
904
905 /* Save the prediction into CFG in case we are seeing non-degenerated
906 conditional jump. */
907 if (!single_succ_p (bb))
908 {
909 BRANCH_EDGE (bb)->probability = combined_probability;
910 FALLTHRU_EDGE (bb)->probability
911 = REG_BR_PROB_BASE - combined_probability;
912 }
913 }
914 else if (!single_succ_p (bb))
915 {
916 int prob = XINT (prob_note, 0);
917
918 BRANCH_EDGE (bb)->probability = prob;
919 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
920 }
921 else
922 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
923 }
924
925 /* Edge prediction hash traits. */
926
927 struct predictor_hash: pointer_hash <edge_prediction>
928 {
929
930 static inline hashval_t hash (const edge_prediction *);
931 static inline bool equal (const edge_prediction *, const edge_prediction *);
932 };
933
934 /* Calculate hash value of an edge prediction P based on predictor and
935 normalized probability. */
936
937 inline hashval_t
938 predictor_hash::hash (const edge_prediction *p)
939 {
940 inchash::hash hstate;
941 hstate.add_int (p->ep_predictor);
942
943 int prob = p->ep_probability;
944 if (prob > REG_BR_PROB_BASE / 2)
945 prob = REG_BR_PROB_BASE - prob;
946
947 hstate.add_int (prob);
948
949 return hstate.end ();
950 }
951
952 /* Return true whether edge predictions P1 and P2 use the same predictor and
953 have equal (or opposed probability). */
954
955 inline bool
956 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
957 {
958 return (p1->ep_predictor == p2->ep_predictor
959 && (p1->ep_probability == p2->ep_probability
960 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
961 }
962
963 struct predictor_hash_traits: predictor_hash,
964 typed_noop_remove <edge_prediction *> {};
965
966 /* Return true if edge prediction P is not in DATA hash set. */
967
968 static bool
969 not_removed_prediction_p (edge_prediction *p, void *data)
970 {
971 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
972 return !remove->contains (p);
973 }
974
975 /* Prune predictions for a basic block BB. Currently we do following
976 clean-up steps:
977
978 1) remove duplicate prediction that is guessed with the same probability
979 (different than 1/2) to both edge
980 2) remove duplicates for a prediction that belongs with the same probability
981 to a single edge
982
983 */
984
985 static void
986 prune_predictions_for_bb (basic_block bb)
987 {
988 edge_prediction **preds = bb_predictions->get (bb);
989
990 if (preds)
991 {
992 hash_table <predictor_hash_traits> s (13);
993 hash_set <edge_prediction *> remove;
994
995 /* Step 1: identify predictors that should be removed. */
996 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
997 {
998 edge_prediction *existing = s.find (pred);
999 if (existing)
1000 {
1001 if (pred->ep_edge == existing->ep_edge
1002 && pred->ep_probability == existing->ep_probability)
1003 {
1004 /* Remove a duplicate predictor. */
1005 dump_prediction (dump_file, pred->ep_predictor,
1006 pred->ep_probability, bb,
1007 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1008
1009 remove.add (pred);
1010 }
1011 else if (pred->ep_edge != existing->ep_edge
1012 && pred->ep_probability == existing->ep_probability
1013 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1014 {
1015 /* Remove both predictors as they predict the same
1016 for both edges. */
1017 dump_prediction (dump_file, existing->ep_predictor,
1018 pred->ep_probability, bb,
1019 REASON_EDGE_PAIR_DUPLICATE,
1020 existing->ep_edge);
1021 dump_prediction (dump_file, pred->ep_predictor,
1022 pred->ep_probability, bb,
1023 REASON_EDGE_PAIR_DUPLICATE,
1024 pred->ep_edge);
1025
1026 remove.add (existing);
1027 remove.add (pred);
1028 }
1029 }
1030
1031 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1032 *slot2 = pred;
1033 }
1034
1035 /* Step 2: Remove predictors. */
1036 filter_predictions (preds, not_removed_prediction_p, &remove);
1037 }
1038 }
1039
1040 /* Combine predictions into single probability and store them into CFG.
1041 Remove now useless prediction entries.
1042 If DRY_RUN is set, only produce dumps and do not modify profile. */
1043
1044 static void
1045 combine_predictions_for_bb (basic_block bb, bool dry_run)
1046 {
1047 int best_probability = PROB_EVEN;
1048 enum br_predictor best_predictor = END_PREDICTORS;
1049 int combined_probability = REG_BR_PROB_BASE / 2;
1050 int d;
1051 bool first_match = false;
1052 bool found = false;
1053 struct edge_prediction *pred;
1054 int nedges = 0;
1055 edge e, first = NULL, second = NULL;
1056 edge_iterator ei;
1057
1058 FOR_EACH_EDGE (e, ei, bb->succs)
1059 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
1060 {
1061 nedges ++;
1062 if (first && !second)
1063 second = e;
1064 if (!first)
1065 first = e;
1066 }
1067
1068 /* When there is no successor or only one choice, prediction is easy.
1069
1070 When we have a basic block with more than 2 successors, the situation
1071 is more complicated as DS theory cannot be used literally.
1072 More precisely, let's assume we predicted edge e1 with probability p1,
1073 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1074 need to find probability of e.g. m1({b2}), which we don't know.
1075 The only approximation is to equally distribute 1-p1 to all edges
1076 different from b1.
1077
1078 According to numbers we've got from SPEC2006 benchark, there's only
1079 one interesting reliable predictor (noreturn call), which can be
1080 handled with a bit easier approach. */
1081 if (nedges != 2)
1082 {
1083 hash_set<edge> unlikely_edges (4);
1084
1085 /* Identify all edges that have a probability close to very unlikely.
1086 Doing the approach for very unlikely doesn't worth for doing as
1087 there's no such probability in SPEC2006 benchmark. */
1088 edge_prediction **preds = bb_predictions->get (bb);
1089 if (preds)
1090 for (pred = *preds; pred; pred = pred->ep_next)
1091 if (pred->ep_probability <= PROB_VERY_UNLIKELY)
1092 unlikely_edges.add (pred->ep_edge);
1093
1094 if (!bb->count.initialized_p () && !dry_run)
1095 set_even_probabilities (bb, &unlikely_edges);
1096 clear_bb_predictions (bb);
1097 if (dump_file)
1098 {
1099 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1100 if (unlikely_edges.elements () == 0)
1101 fprintf (dump_file,
1102 "%i edges in bb %i predicted to even probabilities\n",
1103 nedges, bb->index);
1104 else
1105 {
1106 fprintf (dump_file,
1107 "%i edges in bb %i predicted with some unlikely edges\n",
1108 nedges, bb->index);
1109 FOR_EACH_EDGE (e, ei, bb->succs)
1110 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
1111 dump_prediction (dump_file, PRED_COMBINED, e->probability,
1112 bb, REASON_NONE, e);
1113 }
1114 }
1115 return;
1116 }
1117
1118 if (dump_file)
1119 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1120
1121 prune_predictions_for_bb (bb);
1122
1123 edge_prediction **preds = bb_predictions->get (bb);
1124
1125 if (preds)
1126 {
1127 /* We implement "first match" heuristics and use probability guessed
1128 by predictor with smallest index. */
1129 for (pred = *preds; pred; pred = pred->ep_next)
1130 {
1131 enum br_predictor predictor = pred->ep_predictor;
1132 int probability = pred->ep_probability;
1133
1134 if (pred->ep_edge != first)
1135 probability = REG_BR_PROB_BASE - probability;
1136
1137 found = true;
1138 /* First match heuristics would be widly confused if we predicted
1139 both directions. */
1140 if (best_predictor > predictor
1141 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1142 {
1143 struct edge_prediction *pred2;
1144 int prob = probability;
1145
1146 for (pred2 = (struct edge_prediction *) *preds;
1147 pred2; pred2 = pred2->ep_next)
1148 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1149 {
1150 int probability2 = pred2->ep_probability;
1151
1152 if (pred2->ep_edge != first)
1153 probability2 = REG_BR_PROB_BASE - probability2;
1154
1155 if ((probability < REG_BR_PROB_BASE / 2) !=
1156 (probability2 < REG_BR_PROB_BASE / 2))
1157 break;
1158
1159 /* If the same predictor later gave better result, go for it! */
1160 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1161 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1162 prob = probability2;
1163 }
1164 if (!pred2)
1165 best_probability = prob, best_predictor = predictor;
1166 }
1167
1168 d = (combined_probability * probability
1169 + (REG_BR_PROB_BASE - combined_probability)
1170 * (REG_BR_PROB_BASE - probability));
1171
1172 /* Use FP math to avoid overflows of 32bit integers. */
1173 if (d == 0)
1174 /* If one probability is 0% and one 100%, avoid division by zero. */
1175 combined_probability = REG_BR_PROB_BASE / 2;
1176 else
1177 combined_probability = (((double) combined_probability)
1178 * probability
1179 * REG_BR_PROB_BASE / d + 0.5);
1180 }
1181 }
1182
1183 /* Decide which heuristic to use. In case we didn't match anything,
1184 use no_prediction heuristic, in case we did match, use either
1185 first match or Dempster-Shaffer theory depending on the flags. */
1186
1187 if (best_predictor != END_PREDICTORS)
1188 first_match = true;
1189
1190 if (!found)
1191 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1192 else
1193 {
1194 if (!first_match)
1195 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1196 !first_match ? REASON_NONE : REASON_IGNORED);
1197 else
1198 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1199 first_match ? REASON_NONE : REASON_IGNORED);
1200 }
1201
1202 if (first_match)
1203 combined_probability = best_probability;
1204 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1205
1206 if (preds)
1207 {
1208 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1209 {
1210 enum br_predictor predictor = pred->ep_predictor;
1211 int probability = pred->ep_probability;
1212
1213 dump_prediction (dump_file, predictor, probability, bb,
1214 (!first_match || best_predictor == predictor)
1215 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1216 }
1217 }
1218 clear_bb_predictions (bb);
1219
1220 if (!bb->count.initialized_p () && !dry_run)
1221 {
1222 first->probability = combined_probability;
1223 second->probability = REG_BR_PROB_BASE - combined_probability;
1224 }
1225 }
1226
1227 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1228 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1229
1230 T1 and T2 should be one of the following cases:
1231 1. T1 is SSA_NAME, T2 is NULL
1232 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1233 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1234
1235 static tree
1236 strips_small_constant (tree t1, tree t2)
1237 {
1238 tree ret = NULL;
1239 int value = 0;
1240
1241 if (!t1)
1242 return NULL;
1243 else if (TREE_CODE (t1) == SSA_NAME)
1244 ret = t1;
1245 else if (tree_fits_shwi_p (t1))
1246 value = tree_to_shwi (t1);
1247 else
1248 return NULL;
1249
1250 if (!t2)
1251 return ret;
1252 else if (tree_fits_shwi_p (t2))
1253 value = tree_to_shwi (t2);
1254 else if (TREE_CODE (t2) == SSA_NAME)
1255 {
1256 if (ret)
1257 return NULL;
1258 else
1259 ret = t2;
1260 }
1261
1262 if (value <= 4 && value >= -4)
1263 return ret;
1264 else
1265 return NULL;
1266 }
1267
1268 /* Return the SSA_NAME in T or T's operands.
1269 Return NULL if SSA_NAME cannot be found. */
1270
1271 static tree
1272 get_base_value (tree t)
1273 {
1274 if (TREE_CODE (t) == SSA_NAME)
1275 return t;
1276
1277 if (!BINARY_CLASS_P (t))
1278 return NULL;
1279
1280 switch (TREE_OPERAND_LENGTH (t))
1281 {
1282 case 1:
1283 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1284 case 2:
1285 return strips_small_constant (TREE_OPERAND (t, 0),
1286 TREE_OPERAND (t, 1));
1287 default:
1288 return NULL;
1289 }
1290 }
1291
1292 /* Check the compare STMT in LOOP. If it compares an induction
1293 variable to a loop invariant, return true, and save
1294 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1295 Otherwise return false and set LOOP_INVAIANT to NULL. */
1296
1297 static bool
1298 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1299 tree *loop_invariant,
1300 enum tree_code *compare_code,
1301 tree *loop_step,
1302 tree *loop_iv_base)
1303 {
1304 tree op0, op1, bound, base;
1305 affine_iv iv0, iv1;
1306 enum tree_code code;
1307 tree step;
1308
1309 code = gimple_cond_code (stmt);
1310 *loop_invariant = NULL;
1311
1312 switch (code)
1313 {
1314 case GT_EXPR:
1315 case GE_EXPR:
1316 case NE_EXPR:
1317 case LT_EXPR:
1318 case LE_EXPR:
1319 case EQ_EXPR:
1320 break;
1321
1322 default:
1323 return false;
1324 }
1325
1326 op0 = gimple_cond_lhs (stmt);
1327 op1 = gimple_cond_rhs (stmt);
1328
1329 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1330 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1331 return false;
1332 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1333 return false;
1334 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1335 return false;
1336 if (TREE_CODE (iv0.step) != INTEGER_CST
1337 || TREE_CODE (iv1.step) != INTEGER_CST)
1338 return false;
1339 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1340 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1341 return false;
1342
1343 if (integer_zerop (iv0.step))
1344 {
1345 if (code != NE_EXPR && code != EQ_EXPR)
1346 code = invert_tree_comparison (code, false);
1347 bound = iv0.base;
1348 base = iv1.base;
1349 if (tree_fits_shwi_p (iv1.step))
1350 step = iv1.step;
1351 else
1352 return false;
1353 }
1354 else
1355 {
1356 bound = iv1.base;
1357 base = iv0.base;
1358 if (tree_fits_shwi_p (iv0.step))
1359 step = iv0.step;
1360 else
1361 return false;
1362 }
1363
1364 if (TREE_CODE (bound) != INTEGER_CST)
1365 bound = get_base_value (bound);
1366 if (!bound)
1367 return false;
1368 if (TREE_CODE (base) != INTEGER_CST)
1369 base = get_base_value (base);
1370 if (!base)
1371 return false;
1372
1373 *loop_invariant = bound;
1374 *compare_code = code;
1375 *loop_step = step;
1376 *loop_iv_base = base;
1377 return true;
1378 }
1379
1380 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1381
1382 static bool
1383 expr_coherent_p (tree t1, tree t2)
1384 {
1385 gimple *stmt;
1386 tree ssa_name_1 = NULL;
1387 tree ssa_name_2 = NULL;
1388
1389 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1390 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1391
1392 if (t1 == t2)
1393 return true;
1394
1395 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1396 return true;
1397 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1398 return false;
1399
1400 /* Check to see if t1 is expressed/defined with t2. */
1401 stmt = SSA_NAME_DEF_STMT (t1);
1402 gcc_assert (stmt != NULL);
1403 if (is_gimple_assign (stmt))
1404 {
1405 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1406 if (ssa_name_1 && ssa_name_1 == t2)
1407 return true;
1408 }
1409
1410 /* Check to see if t2 is expressed/defined with t1. */
1411 stmt = SSA_NAME_DEF_STMT (t2);
1412 gcc_assert (stmt != NULL);
1413 if (is_gimple_assign (stmt))
1414 {
1415 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1416 if (ssa_name_2 && ssa_name_2 == t1)
1417 return true;
1418 }
1419
1420 /* Compare if t1 and t2's def_stmts are identical. */
1421 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1422 return true;
1423 else
1424 return false;
1425 }
1426
1427 /* Return true if E is predicted by one of loop heuristics. */
1428
1429 static bool
1430 predicted_by_loop_heuristics_p (basic_block bb)
1431 {
1432 struct edge_prediction *i;
1433 edge_prediction **preds = bb_predictions->get (bb);
1434
1435 if (!preds)
1436 return false;
1437
1438 for (i = *preds; i; i = i->ep_next)
1439 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1440 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1441 || i->ep_predictor == PRED_LOOP_ITERATIONS
1442 || i->ep_predictor == PRED_LOOP_EXIT
1443 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1444 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1445 return true;
1446 return false;
1447 }
1448
1449 /* Predict branch probability of BB when BB contains a branch that compares
1450 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1451 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1452
1453 E.g.
1454 for (int i = 0; i < bound; i++) {
1455 if (i < bound - 2)
1456 computation_1();
1457 else
1458 computation_2();
1459 }
1460
1461 In this loop, we will predict the branch inside the loop to be taken. */
1462
1463 static void
1464 predict_iv_comparison (struct loop *loop, basic_block bb,
1465 tree loop_bound_var,
1466 tree loop_iv_base_var,
1467 enum tree_code loop_bound_code,
1468 int loop_bound_step)
1469 {
1470 gimple *stmt;
1471 tree compare_var, compare_base;
1472 enum tree_code compare_code;
1473 tree compare_step_var;
1474 edge then_edge;
1475 edge_iterator ei;
1476
1477 if (predicted_by_loop_heuristics_p (bb))
1478 return;
1479
1480 stmt = last_stmt (bb);
1481 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1482 return;
1483 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1484 loop, &compare_var,
1485 &compare_code,
1486 &compare_step_var,
1487 &compare_base))
1488 return;
1489
1490 /* Find the taken edge. */
1491 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1492 if (then_edge->flags & EDGE_TRUE_VALUE)
1493 break;
1494
1495 /* When comparing an IV to a loop invariant, NE is more likely to be
1496 taken while EQ is more likely to be not-taken. */
1497 if (compare_code == NE_EXPR)
1498 {
1499 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1500 return;
1501 }
1502 else if (compare_code == EQ_EXPR)
1503 {
1504 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1505 return;
1506 }
1507
1508 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1509 return;
1510
1511 /* If loop bound, base and compare bound are all constants, we can
1512 calculate the probability directly. */
1513 if (tree_fits_shwi_p (loop_bound_var)
1514 && tree_fits_shwi_p (compare_var)
1515 && tree_fits_shwi_p (compare_base))
1516 {
1517 int probability;
1518 bool overflow, overall_overflow = false;
1519 widest_int compare_count, tem;
1520
1521 /* (loop_bound - base) / compare_step */
1522 tem = wi::sub (wi::to_widest (loop_bound_var),
1523 wi::to_widest (compare_base), SIGNED, &overflow);
1524 overall_overflow |= overflow;
1525 widest_int loop_count = wi::div_trunc (tem,
1526 wi::to_widest (compare_step_var),
1527 SIGNED, &overflow);
1528 overall_overflow |= overflow;
1529
1530 if (!wi::neg_p (wi::to_widest (compare_step_var))
1531 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1532 {
1533 /* (loop_bound - compare_bound) / compare_step */
1534 tem = wi::sub (wi::to_widest (loop_bound_var),
1535 wi::to_widest (compare_var), SIGNED, &overflow);
1536 overall_overflow |= overflow;
1537 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1538 SIGNED, &overflow);
1539 overall_overflow |= overflow;
1540 }
1541 else
1542 {
1543 /* (compare_bound - base) / compare_step */
1544 tem = wi::sub (wi::to_widest (compare_var),
1545 wi::to_widest (compare_base), SIGNED, &overflow);
1546 overall_overflow |= overflow;
1547 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1548 SIGNED, &overflow);
1549 overall_overflow |= overflow;
1550 }
1551 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1552 ++compare_count;
1553 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1554 ++loop_count;
1555 if (wi::neg_p (compare_count))
1556 compare_count = 0;
1557 if (wi::neg_p (loop_count))
1558 loop_count = 0;
1559 if (loop_count == 0)
1560 probability = 0;
1561 else if (wi::cmps (compare_count, loop_count) == 1)
1562 probability = REG_BR_PROB_BASE;
1563 else
1564 {
1565 tem = compare_count * REG_BR_PROB_BASE;
1566 tem = wi::udiv_trunc (tem, loop_count);
1567 probability = tem.to_uhwi ();
1568 }
1569
1570 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1571 if (!overall_overflow)
1572 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1573
1574 return;
1575 }
1576
1577 if (expr_coherent_p (loop_bound_var, compare_var))
1578 {
1579 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1580 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1581 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1582 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1583 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1584 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1585 else if (loop_bound_code == NE_EXPR)
1586 {
1587 /* If the loop backedge condition is "(i != bound)", we do
1588 the comparison based on the step of IV:
1589 * step < 0 : backedge condition is like (i > bound)
1590 * step > 0 : backedge condition is like (i < bound) */
1591 gcc_assert (loop_bound_step != 0);
1592 if (loop_bound_step > 0
1593 && (compare_code == LT_EXPR
1594 || compare_code == LE_EXPR))
1595 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1596 else if (loop_bound_step < 0
1597 && (compare_code == GT_EXPR
1598 || compare_code == GE_EXPR))
1599 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1600 else
1601 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1602 }
1603 else
1604 /* The branch is predicted not-taken if loop_bound_code is
1605 opposite with compare_code. */
1606 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1607 }
1608 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1609 {
1610 /* For cases like:
1611 for (i = s; i < h; i++)
1612 if (i > s + 2) ....
1613 The branch should be predicted taken. */
1614 if (loop_bound_step > 0
1615 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1616 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1617 else if (loop_bound_step < 0
1618 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1619 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1620 else
1621 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1622 }
1623 }
1624
1625 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1626 exits are resulted from short-circuit conditions that will generate an
1627 if_tmp. E.g.:
1628
1629 if (foo() || global > 10)
1630 break;
1631
1632 This will be translated into:
1633
1634 BB3:
1635 loop header...
1636 BB4:
1637 if foo() goto BB6 else goto BB5
1638 BB5:
1639 if global > 10 goto BB6 else goto BB7
1640 BB6:
1641 goto BB7
1642 BB7:
1643 iftmp = (PHI 0(BB5), 1(BB6))
1644 if iftmp == 1 goto BB8 else goto BB3
1645 BB8:
1646 outside of the loop...
1647
1648 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1649 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1650 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1651 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1652
1653 static void
1654 predict_extra_loop_exits (edge exit_edge)
1655 {
1656 unsigned i;
1657 bool check_value_one;
1658 gimple *lhs_def_stmt;
1659 gphi *phi_stmt;
1660 tree cmp_rhs, cmp_lhs;
1661 gimple *last;
1662 gcond *cmp_stmt;
1663
1664 last = last_stmt (exit_edge->src);
1665 if (!last)
1666 return;
1667 cmp_stmt = dyn_cast <gcond *> (last);
1668 if (!cmp_stmt)
1669 return;
1670
1671 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1672 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1673 if (!TREE_CONSTANT (cmp_rhs)
1674 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1675 return;
1676 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1677 return;
1678
1679 /* If check_value_one is true, only the phi_args with value '1' will lead
1680 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1681 loop exit. */
1682 check_value_one = (((integer_onep (cmp_rhs))
1683 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1684 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1685
1686 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1687 if (!lhs_def_stmt)
1688 return;
1689
1690 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1691 if (!phi_stmt)
1692 return;
1693
1694 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1695 {
1696 edge e1;
1697 edge_iterator ei;
1698 tree val = gimple_phi_arg_def (phi_stmt, i);
1699 edge e = gimple_phi_arg_edge (phi_stmt, i);
1700
1701 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1702 continue;
1703 if ((check_value_one ^ integer_onep (val)) == 1)
1704 continue;
1705 if (EDGE_COUNT (e->src->succs) != 1)
1706 {
1707 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1708 continue;
1709 }
1710
1711 FOR_EACH_EDGE (e1, ei, e->src->preds)
1712 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1713 }
1714 }
1715
1716
1717 /* Predict edge probabilities by exploiting loop structure. */
1718
1719 static void
1720 predict_loops (void)
1721 {
1722 struct loop *loop;
1723 basic_block bb;
1724 hash_set <struct loop *> with_recursion(10);
1725
1726 FOR_EACH_BB_FN (bb, cfun)
1727 {
1728 gimple_stmt_iterator gsi;
1729 tree decl;
1730
1731 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1732 if (is_gimple_call (gsi_stmt (gsi))
1733 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1734 && recursive_call_p (current_function_decl, decl))
1735 {
1736 loop = bb->loop_father;
1737 while (loop && !with_recursion.add (loop))
1738 loop = loop_outer (loop);
1739 }
1740 }
1741
1742 /* Try to predict out blocks in a loop that are not part of a
1743 natural loop. */
1744 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1745 {
1746 basic_block bb, *bbs;
1747 unsigned j, n_exits = 0;
1748 vec<edge> exits;
1749 struct tree_niter_desc niter_desc;
1750 edge ex;
1751 struct nb_iter_bound *nb_iter;
1752 enum tree_code loop_bound_code = ERROR_MARK;
1753 tree loop_bound_step = NULL;
1754 tree loop_bound_var = NULL;
1755 tree loop_iv_base = NULL;
1756 gcond *stmt = NULL;
1757 bool recursion = with_recursion.contains (loop);
1758
1759 exits = get_loop_exit_edges (loop);
1760 FOR_EACH_VEC_ELT (exits, j, ex)
1761 if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE)))
1762 n_exits ++;
1763 if (!n_exits)
1764 {
1765 exits.release ();
1766 continue;
1767 }
1768
1769 if (dump_file && (dump_flags & TDF_DETAILS))
1770 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1771 loop->num, recursion ? " (with recursion)":"", n_exits);
1772 if (dump_file && (dump_flags & TDF_DETAILS)
1773 && max_loop_iterations_int (loop) >= 0)
1774 {
1775 fprintf (dump_file,
1776 "Loop %d iterates at most %i times.\n", loop->num,
1777 (int)max_loop_iterations_int (loop));
1778 }
1779 if (dump_file && (dump_flags & TDF_DETAILS)
1780 && likely_max_loop_iterations_int (loop) >= 0)
1781 {
1782 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1783 loop->num, (int)likely_max_loop_iterations_int (loop));
1784 }
1785
1786 FOR_EACH_VEC_ELT (exits, j, ex)
1787 {
1788 tree niter = NULL;
1789 HOST_WIDE_INT nitercst;
1790 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1791 int probability;
1792 enum br_predictor predictor;
1793 widest_int nit;
1794
1795 if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1796 continue;
1797 /* Loop heuristics do not expect exit conditional to be inside
1798 inner loop. We predict from innermost to outermost loop. */
1799 if (predicted_by_loop_heuristics_p (ex->src))
1800 {
1801 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "Skipping exit %i->%i because "
1803 "it is already predicted.\n",
1804 ex->src->index, ex->dest->index);
1805 continue;
1806 }
1807 predict_extra_loop_exits (ex);
1808
1809 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1810 niter = niter_desc.niter;
1811 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1812 niter = loop_niter_by_eval (loop, ex);
1813 if (dump_file && (dump_flags & TDF_DETAILS)
1814 && TREE_CODE (niter) == INTEGER_CST)
1815 {
1816 fprintf (dump_file, "Exit %i->%i %d iterates ",
1817 ex->src->index, ex->dest->index,
1818 loop->num);
1819 print_generic_expr (dump_file, niter, TDF_SLIM);
1820 fprintf (dump_file, " times.\n");
1821 }
1822
1823 if (TREE_CODE (niter) == INTEGER_CST)
1824 {
1825 if (tree_fits_uhwi_p (niter)
1826 && max
1827 && compare_tree_int (niter, max - 1) == -1)
1828 nitercst = tree_to_uhwi (niter) + 1;
1829 else
1830 nitercst = max;
1831 predictor = PRED_LOOP_ITERATIONS;
1832 }
1833 /* If we have just one exit and we can derive some information about
1834 the number of iterations of the loop from the statements inside
1835 the loop, use it to predict this exit. */
1836 else if (n_exits == 1
1837 && estimated_stmt_executions (loop, &nit))
1838 {
1839 if (wi::gtu_p (nit, max))
1840 nitercst = max;
1841 else
1842 nitercst = nit.to_shwi ();
1843 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1844 }
1845 /* If we have likely upper bound, trust it for very small iteration
1846 counts. Such loops would otherwise get mispredicted by standard
1847 LOOP_EXIT heuristics. */
1848 else if (n_exits == 1
1849 && likely_max_stmt_executions (loop, &nit)
1850 && wi::ltu_p (nit,
1851 RDIV (REG_BR_PROB_BASE,
1852 REG_BR_PROB_BASE
1853 - predictor_info
1854 [recursion
1855 ? PRED_LOOP_EXIT_WITH_RECURSION
1856 : PRED_LOOP_EXIT].hitrate)))
1857 {
1858 nitercst = nit.to_shwi ();
1859 predictor = PRED_LOOP_ITERATIONS_MAX;
1860 }
1861 else
1862 {
1863 if (dump_file && (dump_flags & TDF_DETAILS))
1864 fprintf (dump_file, "Nothing known about exit %i->%i.\n",
1865 ex->src->index, ex->dest->index);
1866 continue;
1867 }
1868
1869 if (dump_file && (dump_flags & TDF_DETAILS))
1870 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
1871 (int)nitercst, predictor_info[predictor].name);
1872 /* If the prediction for number of iterations is zero, do not
1873 predict the exit edges. */
1874 if (nitercst == 0)
1875 continue;
1876
1877 probability = RDIV (REG_BR_PROB_BASE, nitercst);
1878 predict_edge (ex, predictor, probability);
1879 }
1880 exits.release ();
1881
1882 /* Find information about loop bound variables. */
1883 for (nb_iter = loop->bounds; nb_iter;
1884 nb_iter = nb_iter->next)
1885 if (nb_iter->stmt
1886 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1887 {
1888 stmt = as_a <gcond *> (nb_iter->stmt);
1889 break;
1890 }
1891 if (!stmt && last_stmt (loop->header)
1892 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1893 stmt = as_a <gcond *> (last_stmt (loop->header));
1894 if (stmt)
1895 is_comparison_with_loop_invariant_p (stmt, loop,
1896 &loop_bound_var,
1897 &loop_bound_code,
1898 &loop_bound_step,
1899 &loop_iv_base);
1900
1901 bbs = get_loop_body (loop);
1902
1903 for (j = 0; j < loop->num_nodes; j++)
1904 {
1905 edge e;
1906 edge_iterator ei;
1907
1908 bb = bbs[j];
1909
1910 /* Bypass loop heuristics on continue statement. These
1911 statements construct loops via "non-loop" constructs
1912 in the source language and are better to be handled
1913 separately. */
1914 if (predicted_by_p (bb, PRED_CONTINUE))
1915 {
1916 if (dump_file && (dump_flags & TDF_DETAILS))
1917 fprintf (dump_file, "BB %i predicted by continue.\n",
1918 bb->index);
1919 continue;
1920 }
1921
1922 /* If we already used more reliable loop exit predictors, do not
1923 bother with PRED_LOOP_EXIT. */
1924 if (!predicted_by_loop_heuristics_p (bb))
1925 {
1926 /* For loop with many exits we don't want to predict all exits
1927 with the pretty large probability, because if all exits are
1928 considered in row, the loop would be predicted to iterate
1929 almost never. The code to divide probability by number of
1930 exits is very rough. It should compute the number of exits
1931 taken in each patch through function (not the overall number
1932 of exits that might be a lot higher for loops with wide switch
1933 statements in them) and compute n-th square root.
1934
1935 We limit the minimal probability by 2% to avoid
1936 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1937 as this was causing regression in perl benchmark containing such
1938 a wide loop. */
1939
1940 int probability = ((REG_BR_PROB_BASE
1941 - predictor_info
1942 [recursion
1943 ? PRED_LOOP_EXIT_WITH_RECURSION
1944 : PRED_LOOP_EXIT].hitrate)
1945 / n_exits);
1946 if (probability < HITRATE (2))
1947 probability = HITRATE (2);
1948 FOR_EACH_EDGE (e, ei, bb->succs)
1949 if (e->dest->index < NUM_FIXED_BLOCKS
1950 || !flow_bb_inside_loop_p (loop, e->dest))
1951 {
1952 if (dump_file && (dump_flags & TDF_DETAILS))
1953 fprintf (dump_file,
1954 "Predicting exit %i->%i with prob %i.\n",
1955 e->src->index, e->dest->index, probability);
1956 predict_edge (e,
1957 recursion ? PRED_LOOP_EXIT_WITH_RECURSION
1958 : PRED_LOOP_EXIT, probability);
1959 }
1960 }
1961 if (loop_bound_var)
1962 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1963 loop_bound_code,
1964 tree_to_shwi (loop_bound_step));
1965 }
1966
1967 /* In the following code
1968 for (loop1)
1969 if (cond)
1970 for (loop2)
1971 body;
1972 guess that cond is unlikely. */
1973 if (loop_outer (loop)->num)
1974 {
1975 basic_block bb = NULL;
1976 edge preheader_edge = loop_preheader_edge (loop);
1977
1978 if (single_pred_p (preheader_edge->src)
1979 && single_succ_p (preheader_edge->src))
1980 preheader_edge = single_pred_edge (preheader_edge->src);
1981
1982 gimple *stmt = last_stmt (preheader_edge->src);
1983 /* Pattern match fortran loop preheader:
1984 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
1985 _17 = (logical(kind=4)) _16;
1986 if (_17 != 0)
1987 goto <bb 11>;
1988 else
1989 goto <bb 13>;
1990
1991 Loop guard branch prediction says nothing about duplicated loop
1992 headers produced by fortran frontend and in this case we want
1993 to predict paths leading to this preheader. */
1994
1995 if (stmt
1996 && gimple_code (stmt) == GIMPLE_COND
1997 && gimple_cond_code (stmt) == NE_EXPR
1998 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
1999 && integer_zerop (gimple_cond_rhs (stmt)))
2000 {
2001 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2002 if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2003 && gimple_expr_code (call_stmt) == NOP_EXPR
2004 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2005 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2006 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2007 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2008 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2009 && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2010 == PRED_FORTRAN_LOOP_PREHEADER)
2011 bb = preheader_edge->src;
2012 }
2013 if (!bb)
2014 {
2015 if (!dominated_by_p (CDI_DOMINATORS,
2016 loop_outer (loop)->latch, loop->header))
2017 predict_paths_leading_to_edge (loop_preheader_edge (loop),
2018 recursion
2019 ? PRED_LOOP_GUARD_WITH_RECURSION
2020 : PRED_LOOP_GUARD,
2021 NOT_TAKEN,
2022 loop_outer (loop));
2023 }
2024 else
2025 {
2026 if (!dominated_by_p (CDI_DOMINATORS,
2027 loop_outer (loop)->latch, bb))
2028 predict_paths_leading_to (bb,
2029 recursion
2030 ? PRED_LOOP_GUARD_WITH_RECURSION
2031 : PRED_LOOP_GUARD,
2032 NOT_TAKEN,
2033 loop_outer (loop));
2034 }
2035 }
2036
2037 /* Free basic blocks from get_loop_body. */
2038 free (bbs);
2039 }
2040 }
2041
2042 /* Attempt to predict probabilities of BB outgoing edges using local
2043 properties. */
2044 static void
2045 bb_estimate_probability_locally (basic_block bb)
2046 {
2047 rtx_insn *last_insn = BB_END (bb);
2048 rtx cond;
2049
2050 if (! can_predict_insn_p (last_insn))
2051 return;
2052 cond = get_condition (last_insn, NULL, false, false);
2053 if (! cond)
2054 return;
2055
2056 /* Try "pointer heuristic."
2057 A comparison ptr == 0 is predicted as false.
2058 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2059 if (COMPARISON_P (cond)
2060 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2061 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2062 {
2063 if (GET_CODE (cond) == EQ)
2064 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2065 else if (GET_CODE (cond) == NE)
2066 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2067 }
2068 else
2069
2070 /* Try "opcode heuristic."
2071 EQ tests are usually false and NE tests are usually true. Also,
2072 most quantities are positive, so we can make the appropriate guesses
2073 about signed comparisons against zero. */
2074 switch (GET_CODE (cond))
2075 {
2076 case CONST_INT:
2077 /* Unconditional branch. */
2078 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2079 cond == const0_rtx ? NOT_TAKEN : TAKEN);
2080 break;
2081
2082 case EQ:
2083 case UNEQ:
2084 /* Floating point comparisons appears to behave in a very
2085 unpredictable way because of special role of = tests in
2086 FP code. */
2087 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2088 ;
2089 /* Comparisons with 0 are often used for booleans and there is
2090 nothing useful to predict about them. */
2091 else if (XEXP (cond, 1) == const0_rtx
2092 || XEXP (cond, 0) == const0_rtx)
2093 ;
2094 else
2095 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2096 break;
2097
2098 case NE:
2099 case LTGT:
2100 /* Floating point comparisons appears to behave in a very
2101 unpredictable way because of special role of = tests in
2102 FP code. */
2103 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2104 ;
2105 /* Comparisons with 0 are often used for booleans and there is
2106 nothing useful to predict about them. */
2107 else if (XEXP (cond, 1) == const0_rtx
2108 || XEXP (cond, 0) == const0_rtx)
2109 ;
2110 else
2111 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2112 break;
2113
2114 case ORDERED:
2115 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2116 break;
2117
2118 case UNORDERED:
2119 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2120 break;
2121
2122 case LE:
2123 case LT:
2124 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2125 || XEXP (cond, 1) == constm1_rtx)
2126 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2127 break;
2128
2129 case GE:
2130 case GT:
2131 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2132 || XEXP (cond, 1) == constm1_rtx)
2133 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2134 break;
2135
2136 default:
2137 break;
2138 }
2139 }
2140
2141 /* Set edge->probability for each successor edge of BB. */
2142 void
2143 guess_outgoing_edge_probabilities (basic_block bb)
2144 {
2145 bb_estimate_probability_locally (bb);
2146 combine_predictions_for_insn (BB_END (bb), bb);
2147 }
2148 \f
2149 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
2150
2151 /* Helper function for expr_expected_value. */
2152
2153 static tree
2154 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2155 tree op1, bitmap visited, enum br_predictor *predictor)
2156 {
2157 gimple *def;
2158
2159 if (predictor)
2160 *predictor = PRED_UNCONDITIONAL;
2161
2162 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2163 {
2164 if (TREE_CONSTANT (op0))
2165 return op0;
2166
2167 if (code == IMAGPART_EXPR)
2168 {
2169 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2170 {
2171 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2172 if (is_gimple_call (def)
2173 && gimple_call_internal_p (def)
2174 && (gimple_call_internal_fn (def)
2175 == IFN_ATOMIC_COMPARE_EXCHANGE))
2176 {
2177 /* Assume that any given atomic operation has low contention,
2178 and thus the compare-and-swap operation succeeds. */
2179 if (predictor)
2180 *predictor = PRED_COMPARE_AND_SWAP;
2181 return build_one_cst (TREE_TYPE (op0));
2182 }
2183 }
2184 }
2185
2186 if (code != SSA_NAME)
2187 return NULL_TREE;
2188
2189 def = SSA_NAME_DEF_STMT (op0);
2190
2191 /* If we were already here, break the infinite cycle. */
2192 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
2193 return NULL;
2194
2195 if (gimple_code (def) == GIMPLE_PHI)
2196 {
2197 /* All the arguments of the PHI node must have the same constant
2198 length. */
2199 int i, n = gimple_phi_num_args (def);
2200 tree val = NULL, new_val;
2201
2202 for (i = 0; i < n; i++)
2203 {
2204 tree arg = PHI_ARG_DEF (def, i);
2205 enum br_predictor predictor2;
2206
2207 /* If this PHI has itself as an argument, we cannot
2208 determine the string length of this argument. However,
2209 if we can find an expected constant value for the other
2210 PHI args then we can still be sure that this is
2211 likely a constant. So be optimistic and just
2212 continue with the next argument. */
2213 if (arg == PHI_RESULT (def))
2214 continue;
2215
2216 new_val = expr_expected_value (arg, visited, &predictor2);
2217
2218 /* It is difficult to combine value predictors. Simply assume
2219 that later predictor is weaker and take its prediction. */
2220 if (predictor && *predictor < predictor2)
2221 *predictor = predictor2;
2222 if (!new_val)
2223 return NULL;
2224 if (!val)
2225 val = new_val;
2226 else if (!operand_equal_p (val, new_val, false))
2227 return NULL;
2228 }
2229 return val;
2230 }
2231 if (is_gimple_assign (def))
2232 {
2233 if (gimple_assign_lhs (def) != op0)
2234 return NULL;
2235
2236 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2237 gimple_assign_rhs1 (def),
2238 gimple_assign_rhs_code (def),
2239 gimple_assign_rhs2 (def),
2240 visited, predictor);
2241 }
2242
2243 if (is_gimple_call (def))
2244 {
2245 tree decl = gimple_call_fndecl (def);
2246 if (!decl)
2247 {
2248 if (gimple_call_internal_p (def)
2249 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2250 {
2251 gcc_assert (gimple_call_num_args (def) == 3);
2252 tree val = gimple_call_arg (def, 0);
2253 if (TREE_CONSTANT (val))
2254 return val;
2255 if (predictor)
2256 {
2257 tree val2 = gimple_call_arg (def, 2);
2258 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2259 && tree_fits_uhwi_p (val2)
2260 && tree_to_uhwi (val2) < END_PREDICTORS);
2261 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2262 }
2263 return gimple_call_arg (def, 1);
2264 }
2265 return NULL;
2266 }
2267 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2268 switch (DECL_FUNCTION_CODE (decl))
2269 {
2270 case BUILT_IN_EXPECT:
2271 {
2272 tree val;
2273 if (gimple_call_num_args (def) != 2)
2274 return NULL;
2275 val = gimple_call_arg (def, 0);
2276 if (TREE_CONSTANT (val))
2277 return val;
2278 if (predictor)
2279 *predictor = PRED_BUILTIN_EXPECT;
2280 return gimple_call_arg (def, 1);
2281 }
2282
2283 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2284 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2285 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2286 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2287 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2288 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2289 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2290 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2291 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2292 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2293 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2294 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2295 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2296 /* Assume that any given atomic operation has low contention,
2297 and thus the compare-and-swap operation succeeds. */
2298 if (predictor)
2299 *predictor = PRED_COMPARE_AND_SWAP;
2300 return boolean_true_node;
2301 default:
2302 break;
2303 }
2304 }
2305
2306 return NULL;
2307 }
2308
2309 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2310 {
2311 tree res;
2312 enum br_predictor predictor2;
2313 op0 = expr_expected_value (op0, visited, predictor);
2314 if (!op0)
2315 return NULL;
2316 op1 = expr_expected_value (op1, visited, &predictor2);
2317 if (predictor && *predictor < predictor2)
2318 *predictor = predictor2;
2319 if (!op1)
2320 return NULL;
2321 res = fold_build2 (code, type, op0, op1);
2322 if (TREE_CONSTANT (res))
2323 return res;
2324 return NULL;
2325 }
2326 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2327 {
2328 tree res;
2329 op0 = expr_expected_value (op0, visited, predictor);
2330 if (!op0)
2331 return NULL;
2332 res = fold_build1 (code, type, op0);
2333 if (TREE_CONSTANT (res))
2334 return res;
2335 return NULL;
2336 }
2337 return NULL;
2338 }
2339
2340 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2341 The function is used by builtin_expect branch predictor so the evidence
2342 must come from this construct and additional possible constant folding.
2343
2344 We may want to implement more involved value guess (such as value range
2345 propagation based prediction), but such tricks shall go to new
2346 implementation. */
2347
2348 static tree
2349 expr_expected_value (tree expr, bitmap visited,
2350 enum br_predictor *predictor)
2351 {
2352 enum tree_code code;
2353 tree op0, op1;
2354
2355 if (TREE_CONSTANT (expr))
2356 {
2357 if (predictor)
2358 *predictor = PRED_UNCONDITIONAL;
2359 return expr;
2360 }
2361
2362 extract_ops_from_tree (expr, &code, &op0, &op1);
2363 return expr_expected_value_1 (TREE_TYPE (expr),
2364 op0, code, op1, visited, predictor);
2365 }
2366 \f
2367 /* Predict using opcode of the last statement in basic block. */
2368 static void
2369 tree_predict_by_opcode (basic_block bb)
2370 {
2371 gimple *stmt = last_stmt (bb);
2372 edge then_edge;
2373 tree op0, op1;
2374 tree type;
2375 tree val;
2376 enum tree_code cmp;
2377 edge_iterator ei;
2378 enum br_predictor predictor;
2379
2380 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2381 return;
2382 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2383 if (then_edge->flags & EDGE_TRUE_VALUE)
2384 break;
2385 op0 = gimple_cond_lhs (stmt);
2386 op1 = gimple_cond_rhs (stmt);
2387 cmp = gimple_cond_code (stmt);
2388 type = TREE_TYPE (op0);
2389 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (),
2390 &predictor);
2391 if (val && TREE_CODE (val) == INTEGER_CST)
2392 {
2393 if (predictor == PRED_BUILTIN_EXPECT)
2394 {
2395 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2396
2397 gcc_assert (percent >= 0 && percent <= 100);
2398 if (integer_zerop (val))
2399 percent = 100 - percent;
2400 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2401 }
2402 else
2403 predict_edge_def (then_edge, predictor,
2404 integer_zerop (val) ? NOT_TAKEN : TAKEN);
2405 }
2406 /* Try "pointer heuristic."
2407 A comparison ptr == 0 is predicted as false.
2408 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2409 if (POINTER_TYPE_P (type))
2410 {
2411 if (cmp == EQ_EXPR)
2412 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2413 else if (cmp == NE_EXPR)
2414 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2415 }
2416 else
2417
2418 /* Try "opcode heuristic."
2419 EQ tests are usually false and NE tests are usually true. Also,
2420 most quantities are positive, so we can make the appropriate guesses
2421 about signed comparisons against zero. */
2422 switch (cmp)
2423 {
2424 case EQ_EXPR:
2425 case UNEQ_EXPR:
2426 /* Floating point comparisons appears to behave in a very
2427 unpredictable way because of special role of = tests in
2428 FP code. */
2429 if (FLOAT_TYPE_P (type))
2430 ;
2431 /* Comparisons with 0 are often used for booleans and there is
2432 nothing useful to predict about them. */
2433 else if (integer_zerop (op0) || integer_zerop (op1))
2434 ;
2435 else
2436 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2437 break;
2438
2439 case NE_EXPR:
2440 case LTGT_EXPR:
2441 /* Floating point comparisons appears to behave in a very
2442 unpredictable way because of special role of = tests in
2443 FP code. */
2444 if (FLOAT_TYPE_P (type))
2445 ;
2446 /* Comparisons with 0 are often used for booleans and there is
2447 nothing useful to predict about them. */
2448 else if (integer_zerop (op0)
2449 || integer_zerop (op1))
2450 ;
2451 else
2452 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2453 break;
2454
2455 case ORDERED_EXPR:
2456 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2457 break;
2458
2459 case UNORDERED_EXPR:
2460 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2461 break;
2462
2463 case LE_EXPR:
2464 case LT_EXPR:
2465 if (integer_zerop (op1)
2466 || integer_onep (op1)
2467 || integer_all_onesp (op1)
2468 || real_zerop (op1)
2469 || real_onep (op1)
2470 || real_minus_onep (op1))
2471 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2472 break;
2473
2474 case GE_EXPR:
2475 case GT_EXPR:
2476 if (integer_zerop (op1)
2477 || integer_onep (op1)
2478 || integer_all_onesp (op1)
2479 || real_zerop (op1)
2480 || real_onep (op1)
2481 || real_minus_onep (op1))
2482 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2483 break;
2484
2485 default:
2486 break;
2487 }
2488 }
2489
2490 /* Returns TRUE if the STMT is exit(0) like statement. */
2491
2492 static bool
2493 is_exit_with_zero_arg (const gimple *stmt)
2494 {
2495 /* This is not exit, _exit or _Exit. */
2496 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2497 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2498 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2499 return false;
2500
2501 /* Argument is an interger zero. */
2502 return integer_zerop (gimple_call_arg (stmt, 0));
2503 }
2504
2505 /* Try to guess whether the value of return means error code. */
2506
2507 static enum br_predictor
2508 return_prediction (tree val, enum prediction *prediction)
2509 {
2510 /* VOID. */
2511 if (!val)
2512 return PRED_NO_PREDICTION;
2513 /* Different heuristics for pointers and scalars. */
2514 if (POINTER_TYPE_P (TREE_TYPE (val)))
2515 {
2516 /* NULL is usually not returned. */
2517 if (integer_zerop (val))
2518 {
2519 *prediction = NOT_TAKEN;
2520 return PRED_NULL_RETURN;
2521 }
2522 }
2523 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2524 {
2525 /* Negative return values are often used to indicate
2526 errors. */
2527 if (TREE_CODE (val) == INTEGER_CST
2528 && tree_int_cst_sgn (val) < 0)
2529 {
2530 *prediction = NOT_TAKEN;
2531 return PRED_NEGATIVE_RETURN;
2532 }
2533 /* Constant return values seems to be commonly taken.
2534 Zero/one often represent booleans so exclude them from the
2535 heuristics. */
2536 if (TREE_CONSTANT (val)
2537 && (!integer_zerop (val) && !integer_onep (val)))
2538 {
2539 *prediction = NOT_TAKEN;
2540 return PRED_CONST_RETURN;
2541 }
2542 }
2543 return PRED_NO_PREDICTION;
2544 }
2545
2546 /* Find the basic block with return expression and look up for possible
2547 return value trying to apply RETURN_PREDICTION heuristics. */
2548 static void
2549 apply_return_prediction (void)
2550 {
2551 greturn *return_stmt = NULL;
2552 tree return_val;
2553 edge e;
2554 gphi *phi;
2555 int phi_num_args, i;
2556 enum br_predictor pred;
2557 enum prediction direction;
2558 edge_iterator ei;
2559
2560 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2561 {
2562 gimple *last = last_stmt (e->src);
2563 if (last
2564 && gimple_code (last) == GIMPLE_RETURN)
2565 {
2566 return_stmt = as_a <greturn *> (last);
2567 break;
2568 }
2569 }
2570 if (!e)
2571 return;
2572 return_val = gimple_return_retval (return_stmt);
2573 if (!return_val)
2574 return;
2575 if (TREE_CODE (return_val) != SSA_NAME
2576 || !SSA_NAME_DEF_STMT (return_val)
2577 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2578 return;
2579 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2580 phi_num_args = gimple_phi_num_args (phi);
2581 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2582
2583 /* Avoid the degenerate case where all return values form the function
2584 belongs to same category (ie they are all positive constants)
2585 so we can hardly say something about them. */
2586 for (i = 1; i < phi_num_args; i++)
2587 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2588 break;
2589 if (i != phi_num_args)
2590 for (i = 0; i < phi_num_args; i++)
2591 {
2592 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2593 if (pred != PRED_NO_PREDICTION)
2594 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2595 direction);
2596 }
2597 }
2598
2599 /* Look for basic block that contains unlikely to happen events
2600 (such as noreturn calls) and mark all paths leading to execution
2601 of this basic blocks as unlikely. */
2602
2603 static void
2604 tree_bb_level_predictions (void)
2605 {
2606 basic_block bb;
2607 bool has_return_edges = false;
2608 edge e;
2609 edge_iterator ei;
2610
2611 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2612 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2613 {
2614 has_return_edges = true;
2615 break;
2616 }
2617
2618 apply_return_prediction ();
2619
2620 FOR_EACH_BB_FN (bb, cfun)
2621 {
2622 gimple_stmt_iterator gsi;
2623
2624 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2625 {
2626 gimple *stmt = gsi_stmt (gsi);
2627 tree decl;
2628
2629 if (is_gimple_call (stmt))
2630 {
2631 if (gimple_call_noreturn_p (stmt)
2632 && has_return_edges
2633 && !is_exit_with_zero_arg (stmt))
2634 predict_paths_leading_to (bb, PRED_NORETURN,
2635 NOT_TAKEN);
2636 decl = gimple_call_fndecl (stmt);
2637 if (decl
2638 && lookup_attribute ("cold",
2639 DECL_ATTRIBUTES (decl)))
2640 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2641 NOT_TAKEN);
2642 if (decl && recursive_call_p (current_function_decl, decl))
2643 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
2644 NOT_TAKEN);
2645 }
2646 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2647 {
2648 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2649 gimple_predict_outcome (stmt));
2650 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2651 hints to callers. */
2652 }
2653 }
2654 }
2655 }
2656
2657 /* Callback for hash_map::traverse, asserts that the pointer map is
2658 empty. */
2659
2660 bool
2661 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2662 void *)
2663 {
2664 gcc_assert (!value);
2665 return false;
2666 }
2667
2668 /* Predict branch probabilities and estimate profile for basic block BB. */
2669
2670 static void
2671 tree_estimate_probability_bb (basic_block bb)
2672 {
2673 edge e;
2674 edge_iterator ei;
2675 gimple *last;
2676
2677 FOR_EACH_EDGE (e, ei, bb->succs)
2678 {
2679 /* Predict edges to user labels with attributes. */
2680 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2681 {
2682 gimple_stmt_iterator gi;
2683 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2684 {
2685 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2686 tree decl;
2687
2688 if (!label_stmt)
2689 break;
2690 decl = gimple_label_label (label_stmt);
2691 if (DECL_ARTIFICIAL (decl))
2692 continue;
2693
2694 /* Finally, we have a user-defined label. */
2695 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2696 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2697 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2698 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2699 }
2700 }
2701
2702 /* Predict early returns to be probable, as we've already taken
2703 care for error returns and other cases are often used for
2704 fast paths through function.
2705
2706 Since we've already removed the return statements, we are
2707 looking for CFG like:
2708
2709 if (conditional)
2710 {
2711 ..
2712 goto return_block
2713 }
2714 some other blocks
2715 return_block:
2716 return_stmt. */
2717 if (e->dest != bb->next_bb
2718 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2719 && single_succ_p (e->dest)
2720 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2721 && (last = last_stmt (e->dest)) != NULL
2722 && gimple_code (last) == GIMPLE_RETURN)
2723 {
2724 edge e1;
2725 edge_iterator ei1;
2726
2727 if (single_succ_p (bb))
2728 {
2729 FOR_EACH_EDGE (e1, ei1, bb->preds)
2730 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2731 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2732 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2733 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2734 }
2735 else
2736 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2737 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2738 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2739 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2740 }
2741
2742 /* Look for block we are guarding (ie we dominate it,
2743 but it doesn't postdominate us). */
2744 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2745 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2746 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2747 {
2748 gimple_stmt_iterator bi;
2749
2750 /* The call heuristic claims that a guarded function call
2751 is improbable. This is because such calls are often used
2752 to signal exceptional situations such as printing error
2753 messages. */
2754 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2755 gsi_next (&bi))
2756 {
2757 gimple *stmt = gsi_stmt (bi);
2758 if (is_gimple_call (stmt)
2759 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
2760 /* Constant and pure calls are hardly used to signalize
2761 something exceptional. */
2762 && gimple_has_side_effects (stmt))
2763 {
2764 if (gimple_call_fndecl (stmt))
2765 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2766 else if (virtual_method_call_p (gimple_call_fn (stmt)))
2767 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
2768 else
2769 predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
2770 break;
2771 }
2772 }
2773 }
2774 }
2775 tree_predict_by_opcode (bb);
2776 }
2777
2778 /* Predict branch probabilities and estimate profile of the tree CFG.
2779 This function can be called from the loop optimizers to recompute
2780 the profile information.
2781 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2782
2783 void
2784 tree_estimate_probability (bool dry_run)
2785 {
2786 basic_block bb;
2787
2788 add_noreturn_fake_exit_edges ();
2789 connect_infinite_loops_to_exit ();
2790 /* We use loop_niter_by_eval, which requires that the loops have
2791 preheaders. */
2792 create_preheaders (CP_SIMPLE_PREHEADERS);
2793 calculate_dominance_info (CDI_POST_DOMINATORS);
2794
2795 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2796 tree_bb_level_predictions ();
2797 record_loop_exits ();
2798
2799 if (number_of_loops (cfun) > 1)
2800 predict_loops ();
2801
2802 FOR_EACH_BB_FN (bb, cfun)
2803 tree_estimate_probability_bb (bb);
2804
2805 FOR_EACH_BB_FN (bb, cfun)
2806 combine_predictions_for_bb (bb, dry_run);
2807
2808 if (flag_checking)
2809 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2810
2811 delete bb_predictions;
2812 bb_predictions = NULL;
2813
2814 if (!dry_run)
2815 estimate_bb_frequencies (false);
2816 free_dominance_info (CDI_POST_DOMINATORS);
2817 remove_fake_exit_edges ();
2818 }
2819 \f
2820 /* Predict edges to successors of CUR whose sources are not postdominated by
2821 BB by PRED and recurse to all postdominators. */
2822
2823 static void
2824 predict_paths_for_bb (basic_block cur, basic_block bb,
2825 enum br_predictor pred,
2826 enum prediction taken,
2827 bitmap visited, struct loop *in_loop = NULL)
2828 {
2829 edge e;
2830 edge_iterator ei;
2831 basic_block son;
2832
2833 /* If we exited the loop or CUR is unconditional in the loop, there is
2834 nothing to do. */
2835 if (in_loop
2836 && (!flow_bb_inside_loop_p (in_loop, cur)
2837 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
2838 return;
2839
2840 /* We are looking for all edges forming edge cut induced by
2841 set of all blocks postdominated by BB. */
2842 FOR_EACH_EDGE (e, ei, cur->preds)
2843 if (e->src->index >= NUM_FIXED_BLOCKS
2844 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2845 {
2846 edge e2;
2847 edge_iterator ei2;
2848 bool found = false;
2849
2850 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2851 if (e->flags & (EDGE_EH | EDGE_FAKE))
2852 continue;
2853 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2854
2855 /* See if there is an edge from e->src that is not abnormal
2856 and does not lead to BB and does not exit the loop. */
2857 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2858 if (e2 != e
2859 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2860 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
2861 && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
2862 {
2863 found = true;
2864 break;
2865 }
2866
2867 /* If there is non-abnormal path leaving e->src, predict edge
2868 using predictor. Otherwise we need to look for paths
2869 leading to e->src.
2870
2871 The second may lead to infinite loop in the case we are predicitng
2872 regions that are only reachable by abnormal edges. We simply
2873 prevent visiting given BB twice. */
2874 if (found)
2875 {
2876 if (!edge_predicted_by_p (e, pred, taken))
2877 predict_edge_def (e, pred, taken);
2878 }
2879 else if (bitmap_set_bit (visited, e->src->index))
2880 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
2881 }
2882 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2883 son;
2884 son = next_dom_son (CDI_POST_DOMINATORS, son))
2885 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
2886 }
2887
2888 /* Sets branch probabilities according to PREDiction and
2889 FLAGS. */
2890
2891 static void
2892 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2893 enum prediction taken, struct loop *in_loop)
2894 {
2895 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
2896 }
2897
2898 /* Like predict_paths_leading_to but take edge instead of basic block. */
2899
2900 static void
2901 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2902 enum prediction taken, struct loop *in_loop)
2903 {
2904 bool has_nonloop_edge = false;
2905 edge_iterator ei;
2906 edge e2;
2907
2908 basic_block bb = e->src;
2909 FOR_EACH_EDGE (e2, ei, bb->succs)
2910 if (e2->dest != e->src && e2->dest != e->dest
2911 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2912 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2913 {
2914 has_nonloop_edge = true;
2915 break;
2916 }
2917 if (!has_nonloop_edge)
2918 {
2919 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
2920 }
2921 else
2922 predict_edge_def (e, pred, taken);
2923 }
2924 \f
2925 /* This is used to carry information about basic blocks. It is
2926 attached to the AUX field of the standard CFG block. */
2927
2928 struct block_info
2929 {
2930 /* Estimated frequency of execution of basic_block. */
2931 sreal frequency;
2932
2933 /* To keep queue of basic blocks to process. */
2934 basic_block next;
2935
2936 /* Number of predecessors we need to visit first. */
2937 int npredecessors;
2938 };
2939
2940 /* Similar information for edges. */
2941 struct edge_prob_info
2942 {
2943 /* In case edge is a loopback edge, the probability edge will be reached
2944 in case header is. Estimated number of iterations of the loop can be
2945 then computed as 1 / (1 - back_edge_prob). */
2946 sreal back_edge_prob;
2947 /* True if the edge is a loopback edge in the natural loop. */
2948 unsigned int back_edge:1;
2949 };
2950
2951 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2952 #undef EDGE_INFO
2953 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2954
2955 /* Helper function for estimate_bb_frequencies.
2956 Propagate the frequencies in blocks marked in
2957 TOVISIT, starting in HEAD. */
2958
2959 static void
2960 propagate_freq (basic_block head, bitmap tovisit)
2961 {
2962 basic_block bb;
2963 basic_block last;
2964 unsigned i;
2965 edge e;
2966 basic_block nextbb;
2967 bitmap_iterator bi;
2968
2969 /* For each basic block we need to visit count number of his predecessors
2970 we need to visit first. */
2971 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2972 {
2973 edge_iterator ei;
2974 int count = 0;
2975
2976 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2977
2978 FOR_EACH_EDGE (e, ei, bb->preds)
2979 {
2980 bool visit = bitmap_bit_p (tovisit, e->src->index);
2981
2982 if (visit && !(e->flags & EDGE_DFS_BACK))
2983 count++;
2984 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2985 fprintf (dump_file,
2986 "Irreducible region hit, ignoring edge to %i->%i\n",
2987 e->src->index, bb->index);
2988 }
2989 BLOCK_INFO (bb)->npredecessors = count;
2990 /* When function never returns, we will never process exit block. */
2991 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2992 {
2993 bb->count = profile_count::zero ();
2994 bb->frequency = 0;
2995 }
2996 }
2997
2998 BLOCK_INFO (head)->frequency = 1;
2999 last = head;
3000 for (bb = head; bb; bb = nextbb)
3001 {
3002 edge_iterator ei;
3003 sreal cyclic_probability = 0;
3004 sreal frequency = 0;
3005
3006 nextbb = BLOCK_INFO (bb)->next;
3007 BLOCK_INFO (bb)->next = NULL;
3008
3009 /* Compute frequency of basic block. */
3010 if (bb != head)
3011 {
3012 if (flag_checking)
3013 FOR_EACH_EDGE (e, ei, bb->preds)
3014 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3015 || (e->flags & EDGE_DFS_BACK));
3016
3017 FOR_EACH_EDGE (e, ei, bb->preds)
3018 if (EDGE_INFO (e)->back_edge)
3019 {
3020 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3021 }
3022 else if (!(e->flags & EDGE_DFS_BACK))
3023 {
3024 /* frequency += (e->probability
3025 * BLOCK_INFO (e->src)->frequency /
3026 REG_BR_PROB_BASE); */
3027
3028 sreal tmp = e->probability;
3029 tmp *= BLOCK_INFO (e->src)->frequency;
3030 tmp *= real_inv_br_prob_base;
3031 frequency += tmp;
3032 }
3033
3034 if (cyclic_probability == 0)
3035 {
3036 BLOCK_INFO (bb)->frequency = frequency;
3037 }
3038 else
3039 {
3040 if (cyclic_probability > real_almost_one)
3041 cyclic_probability = real_almost_one;
3042
3043 /* BLOCK_INFO (bb)->frequency = frequency
3044 / (1 - cyclic_probability) */
3045
3046 cyclic_probability = sreal (1) - cyclic_probability;
3047 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
3048 }
3049 }
3050
3051 bitmap_clear_bit (tovisit, bb->index);
3052
3053 e = find_edge (bb, head);
3054 if (e)
3055 {
3056 /* EDGE_INFO (e)->back_edge_prob
3057 = ((e->probability * BLOCK_INFO (bb)->frequency)
3058 / REG_BR_PROB_BASE); */
3059
3060 sreal tmp = e->probability;
3061 tmp *= BLOCK_INFO (bb)->frequency;
3062 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
3063 }
3064
3065 /* Propagate to successor blocks. */
3066 FOR_EACH_EDGE (e, ei, bb->succs)
3067 if (!(e->flags & EDGE_DFS_BACK)
3068 && BLOCK_INFO (e->dest)->npredecessors)
3069 {
3070 BLOCK_INFO (e->dest)->npredecessors--;
3071 if (!BLOCK_INFO (e->dest)->npredecessors)
3072 {
3073 if (!nextbb)
3074 nextbb = e->dest;
3075 else
3076 BLOCK_INFO (last)->next = e->dest;
3077
3078 last = e->dest;
3079 }
3080 }
3081 }
3082 }
3083
3084 /* Estimate frequencies in loops at same nest level. */
3085
3086 static void
3087 estimate_loops_at_level (struct loop *first_loop)
3088 {
3089 struct loop *loop;
3090
3091 for (loop = first_loop; loop; loop = loop->next)
3092 {
3093 edge e;
3094 basic_block *bbs;
3095 unsigned i;
3096 auto_bitmap tovisit;
3097
3098 estimate_loops_at_level (loop->inner);
3099
3100 /* Find current loop back edge and mark it. */
3101 e = loop_latch_edge (loop);
3102 EDGE_INFO (e)->back_edge = 1;
3103
3104 bbs = get_loop_body (loop);
3105 for (i = 0; i < loop->num_nodes; i++)
3106 bitmap_set_bit (tovisit, bbs[i]->index);
3107 free (bbs);
3108 propagate_freq (loop->header, tovisit);
3109 }
3110 }
3111
3112 /* Propagates frequencies through structure of loops. */
3113
3114 static void
3115 estimate_loops (void)
3116 {
3117 auto_bitmap tovisit;
3118 basic_block bb;
3119
3120 /* Start by estimating the frequencies in the loops. */
3121 if (number_of_loops (cfun) > 1)
3122 estimate_loops_at_level (current_loops->tree_root->inner);
3123
3124 /* Now propagate the frequencies through all the blocks. */
3125 FOR_ALL_BB_FN (bb, cfun)
3126 {
3127 bitmap_set_bit (tovisit, bb->index);
3128 }
3129 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
3130 }
3131
3132 /* Drop the profile for NODE to guessed, and update its frequency based on
3133 whether it is expected to be hot given the CALL_COUNT. */
3134
3135 static void
3136 drop_profile (struct cgraph_node *node, profile_count call_count)
3137 {
3138 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3139 /* In the case where this was called by another function with a
3140 dropped profile, call_count will be 0. Since there are no
3141 non-zero call counts to this function, we don't know for sure
3142 whether it is hot, and therefore it will be marked normal below. */
3143 bool hot = maybe_hot_count_p (NULL, call_count);
3144
3145 if (dump_file)
3146 fprintf (dump_file,
3147 "Dropping 0 profile for %s. %s based on calls.\n",
3148 node->dump_name (),
3149 hot ? "Function is hot" : "Function is normal");
3150 /* We only expect to miss profiles for functions that are reached
3151 via non-zero call edges in cases where the function may have
3152 been linked from another module or library (COMDATs and extern
3153 templates). See the comments below for handle_missing_profiles.
3154 Also, only warn in cases where the missing counts exceed the
3155 number of training runs. In certain cases with an execv followed
3156 by a no-return call the profile for the no-return call is not
3157 dumped and there can be a mismatch. */
3158 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3159 && call_count > profile_info->runs)
3160 {
3161 if (flag_profile_correction)
3162 {
3163 if (dump_file)
3164 fprintf (dump_file,
3165 "Missing counts for called function %s\n",
3166 node->dump_name ());
3167 }
3168 else
3169 warning (0, "Missing counts for called function %s",
3170 node->dump_name ());
3171 }
3172
3173 profile_status_for_fn (fn)
3174 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3175 node->frequency
3176 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3177 }
3178
3179 /* In the case of COMDAT routines, multiple object files will contain the same
3180 function and the linker will select one for the binary. In that case
3181 all the other copies from the profile instrument binary will be missing
3182 profile counts. Look for cases where this happened, due to non-zero
3183 call counts going to 0-count functions, and drop the profile to guessed
3184 so that we can use the estimated probabilities and avoid optimizing only
3185 for size.
3186
3187 The other case where the profile may be missing is when the routine
3188 is not going to be emitted to the object file, e.g. for "extern template"
3189 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3190 all other cases of non-zero calls to 0-count functions. */
3191
3192 void
3193 handle_missing_profiles (void)
3194 {
3195 struct cgraph_node *node;
3196 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
3197 auto_vec<struct cgraph_node *, 64> worklist;
3198
3199 /* See if 0 count function has non-0 count callers. In this case we
3200 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3201 FOR_EACH_DEFINED_FUNCTION (node)
3202 {
3203 struct cgraph_edge *e;
3204 profile_count call_count = profile_count::zero ();
3205 gcov_type max_tp_first_run = 0;
3206 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3207
3208 if (!(node->count == profile_count::zero ()))
3209 continue;
3210 for (e = node->callers; e; e = e->next_caller)
3211 {
3212 if (e->count.initialized_p () > 0)
3213 {
3214 call_count = call_count + e->count;
3215
3216 if (e->caller->tp_first_run > max_tp_first_run)
3217 max_tp_first_run = e->caller->tp_first_run;
3218 }
3219 }
3220
3221 /* If time profile is missing, let assign the maximum that comes from
3222 caller functions. */
3223 if (!node->tp_first_run && max_tp_first_run)
3224 node->tp_first_run = max_tp_first_run + 1;
3225
3226 if (call_count > 0
3227 && fn && fn->cfg
3228 && (call_count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs))
3229 {
3230 drop_profile (node, call_count);
3231 worklist.safe_push (node);
3232 }
3233 }
3234
3235 /* Propagate the profile dropping to other 0-count COMDATs that are
3236 potentially called by COMDATs we already dropped the profile on. */
3237 while (worklist.length () > 0)
3238 {
3239 struct cgraph_edge *e;
3240
3241 node = worklist.pop ();
3242 for (e = node->callees; e; e = e->next_caller)
3243 {
3244 struct cgraph_node *callee = e->callee;
3245 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3246
3247 if (callee->count > 0)
3248 continue;
3249 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
3250 && profile_status_for_fn (fn) == PROFILE_READ)
3251 {
3252 drop_profile (node, profile_count::zero ());
3253 worklist.safe_push (callee);
3254 }
3255 }
3256 }
3257 }
3258
3259 /* Convert counts measured by profile driven feedback to frequencies.
3260 Return nonzero iff there was any nonzero execution count. */
3261
3262 bool
3263 counts_to_freqs (void)
3264 {
3265 gcov_type count_max;
3266 profile_count true_count_max = profile_count::zero ();
3267 basic_block bb;
3268
3269 /* Don't overwrite the estimated frequencies when the profile for
3270 the function is missing. We may drop this function PROFILE_GUESSED
3271 later in drop_profile (). */
3272 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
3273 || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
3274 return 0;
3275
3276 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3277 if (bb->count > true_count_max)
3278 true_count_max = bb->count;
3279
3280 count_max = MAX (true_count_max.to_gcov_type (), 1);
3281
3282 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3283 if (bb->count.initialized_p ())
3284 bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max);
3285
3286 return !(true_count_max == profile_count::zero ());
3287 }
3288
3289 /* Return true if function is likely to be expensive, so there is no point to
3290 optimize performance of prologue, epilogue or do inlining at the expense
3291 of code size growth. THRESHOLD is the limit of number of instructions
3292 function can execute at average to be still considered not expensive. */
3293
3294 bool
3295 expensive_function_p (int threshold)
3296 {
3297 unsigned int sum = 0;
3298 basic_block bb;
3299 unsigned int limit;
3300
3301 /* We can not compute accurately for large thresholds due to scaled
3302 frequencies. */
3303 gcc_assert (threshold <= BB_FREQ_MAX);
3304
3305 /* Frequencies are out of range. This either means that function contains
3306 internal loop executing more than BB_FREQ_MAX times or profile feedback
3307 is available and function has not been executed at all. */
3308 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
3309 return true;
3310
3311 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
3312 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
3313 FOR_EACH_BB_FN (bb, cfun)
3314 {
3315 rtx_insn *insn;
3316
3317 FOR_BB_INSNS (bb, insn)
3318 if (active_insn_p (insn))
3319 {
3320 sum += bb->frequency;
3321 if (sum > limit)
3322 return true;
3323 }
3324 }
3325
3326 return false;
3327 }
3328
3329 /* Estimate and propagate basic block frequencies using the given branch
3330 probabilities. If FORCE is true, the frequencies are used to estimate
3331 the counts even when there are already non-zero profile counts. */
3332
3333 void
3334 estimate_bb_frequencies (bool force)
3335 {
3336 basic_block bb;
3337 sreal freq_max;
3338
3339 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
3340 {
3341 static int real_values_initialized = 0;
3342
3343 if (!real_values_initialized)
3344 {
3345 real_values_initialized = 1;
3346 real_br_prob_base = REG_BR_PROB_BASE;
3347 real_bb_freq_max = BB_FREQ_MAX;
3348 real_one_half = sreal (1, -1);
3349 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
3350 real_almost_one = sreal (1) - real_inv_br_prob_base;
3351 }
3352
3353 mark_dfs_back_edges ();
3354
3355 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3356 REG_BR_PROB_BASE;
3357
3358 /* Set up block info for each basic block. */
3359 alloc_aux_for_blocks (sizeof (block_info));
3360 alloc_aux_for_edges (sizeof (edge_prob_info));
3361 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3362 {
3363 edge e;
3364 edge_iterator ei;
3365
3366 FOR_EACH_EDGE (e, ei, bb->succs)
3367 {
3368 EDGE_INFO (e)->back_edge_prob = e->probability;
3369 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
3370 }
3371 }
3372
3373 /* First compute frequencies locally for each loop from innermost
3374 to outermost to examine frequencies for back edges. */
3375 estimate_loops ();
3376
3377 freq_max = 0;
3378 FOR_EACH_BB_FN (bb, cfun)
3379 if (freq_max < BLOCK_INFO (bb)->frequency)
3380 freq_max = BLOCK_INFO (bb)->frequency;
3381
3382 freq_max = real_bb_freq_max / freq_max;
3383 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3384 {
3385 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
3386 bb->frequency = tmp.to_int ();
3387 }
3388
3389 free_aux_for_blocks ();
3390 free_aux_for_edges ();
3391 }
3392 compute_function_frequency ();
3393 }
3394
3395 /* Decide whether function is hot, cold or unlikely executed. */
3396 void
3397 compute_function_frequency (void)
3398 {
3399 basic_block bb;
3400 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3401
3402 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3403 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3404 node->only_called_at_startup = true;
3405 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3406 node->only_called_at_exit = true;
3407
3408 if (profile_status_for_fn (cfun) != PROFILE_READ)
3409 {
3410 int flags = flags_from_decl_or_type (current_function_decl);
3411 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3412 != NULL)
3413 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3414 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3415 != NULL)
3416 node->frequency = NODE_FREQUENCY_HOT;
3417 else if (flags & ECF_NORETURN)
3418 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3419 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3420 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3421 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3422 || DECL_STATIC_DESTRUCTOR (current_function_decl))
3423 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3424 return;
3425 }
3426
3427 /* Only first time try to drop function into unlikely executed.
3428 After inlining the roundoff errors may confuse us.
3429 Ipa-profile pass will drop functions only called from unlikely
3430 functions to unlikely and that is most of what we care about. */
3431 if (!cfun->after_inlining)
3432 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3433 FOR_EACH_BB_FN (bb, cfun)
3434 {
3435 if (maybe_hot_bb_p (cfun, bb))
3436 {
3437 node->frequency = NODE_FREQUENCY_HOT;
3438 return;
3439 }
3440 if (!probably_never_executed_bb_p (cfun, bb))
3441 node->frequency = NODE_FREQUENCY_NORMAL;
3442 }
3443 }
3444
3445 /* Build PREDICT_EXPR. */
3446 tree
3447 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3448 {
3449 tree t = build1 (PREDICT_EXPR, void_type_node,
3450 build_int_cst (integer_type_node, predictor));
3451 SET_PREDICT_EXPR_OUTCOME (t, taken);
3452 return t;
3453 }
3454
3455 const char *
3456 predictor_name (enum br_predictor predictor)
3457 {
3458 return predictor_info[predictor].name;
3459 }
3460
3461 /* Predict branch probabilities and estimate profile of the tree CFG. */
3462
3463 namespace {
3464
3465 const pass_data pass_data_profile =
3466 {
3467 GIMPLE_PASS, /* type */
3468 "profile_estimate", /* name */
3469 OPTGROUP_NONE, /* optinfo_flags */
3470 TV_BRANCH_PROB, /* tv_id */
3471 PROP_cfg, /* properties_required */
3472 0, /* properties_provided */
3473 0, /* properties_destroyed */
3474 0, /* todo_flags_start */
3475 0, /* todo_flags_finish */
3476 };
3477
3478 class pass_profile : public gimple_opt_pass
3479 {
3480 public:
3481 pass_profile (gcc::context *ctxt)
3482 : gimple_opt_pass (pass_data_profile, ctxt)
3483 {}
3484
3485 /* opt_pass methods: */
3486 virtual bool gate (function *) { return flag_guess_branch_prob; }
3487 virtual unsigned int execute (function *);
3488
3489 }; // class pass_profile
3490
3491 unsigned int
3492 pass_profile::execute (function *fun)
3493 {
3494 unsigned nb_loops;
3495
3496 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3497 return 0;
3498
3499 loop_optimizer_init (LOOPS_NORMAL);
3500 if (dump_file && (dump_flags & TDF_DETAILS))
3501 flow_loops_dump (dump_file, NULL, 0);
3502
3503 mark_irreducible_loops ();
3504
3505 nb_loops = number_of_loops (fun);
3506 if (nb_loops > 1)
3507 scev_initialize ();
3508
3509 tree_estimate_probability (false);
3510
3511 if (nb_loops > 1)
3512 scev_finalize ();
3513
3514 loop_optimizer_finalize ();
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3516 gimple_dump_cfg (dump_file, dump_flags);
3517 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3518 profile_status_for_fn (fun) = PROFILE_GUESSED;
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 {
3521 struct loop *loop;
3522 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3523 if (loop->header->frequency)
3524 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
3525 loop->num,
3526 (int)expected_loop_iterations_unbounded (loop));
3527 }
3528 return 0;
3529 }
3530
3531 } // anon namespace
3532
3533 gimple_opt_pass *
3534 make_pass_profile (gcc::context *ctxt)
3535 {
3536 return new pass_profile (ctxt);
3537 }
3538
3539 namespace {
3540
3541 const pass_data pass_data_strip_predict_hints =
3542 {
3543 GIMPLE_PASS, /* type */
3544 "*strip_predict_hints", /* name */
3545 OPTGROUP_NONE, /* optinfo_flags */
3546 TV_BRANCH_PROB, /* tv_id */
3547 PROP_cfg, /* properties_required */
3548 0, /* properties_provided */
3549 0, /* properties_destroyed */
3550 0, /* todo_flags_start */
3551 0, /* todo_flags_finish */
3552 };
3553
3554 class pass_strip_predict_hints : public gimple_opt_pass
3555 {
3556 public:
3557 pass_strip_predict_hints (gcc::context *ctxt)
3558 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3559 {}
3560
3561 /* opt_pass methods: */
3562 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3563 virtual unsigned int execute (function *);
3564
3565 }; // class pass_strip_predict_hints
3566
3567 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3568 we no longer need. */
3569 unsigned int
3570 pass_strip_predict_hints::execute (function *fun)
3571 {
3572 basic_block bb;
3573 gimple *ass_stmt;
3574 tree var;
3575 bool changed = false;
3576
3577 FOR_EACH_BB_FN (bb, fun)
3578 {
3579 gimple_stmt_iterator bi;
3580 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3581 {
3582 gimple *stmt = gsi_stmt (bi);
3583
3584 if (gimple_code (stmt) == GIMPLE_PREDICT)
3585 {
3586 gsi_remove (&bi, true);
3587 changed = true;
3588 continue;
3589 }
3590 else if (is_gimple_call (stmt))
3591 {
3592 tree fndecl = gimple_call_fndecl (stmt);
3593
3594 if ((fndecl
3595 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3596 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3597 && gimple_call_num_args (stmt) == 2)
3598 || (gimple_call_internal_p (stmt)
3599 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3600 {
3601 var = gimple_call_lhs (stmt);
3602 changed = true;
3603 if (var)
3604 {
3605 ass_stmt
3606 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3607 gsi_replace (&bi, ass_stmt, true);
3608 }
3609 else
3610 {
3611 gsi_remove (&bi, true);
3612 continue;
3613 }
3614 }
3615 }
3616 gsi_next (&bi);
3617 }
3618 }
3619 return changed ? TODO_cleanup_cfg : 0;
3620 }
3621
3622 } // anon namespace
3623
3624 gimple_opt_pass *
3625 make_pass_strip_predict_hints (gcc::context *ctxt)
3626 {
3627 return new pass_strip_predict_hints (ctxt);
3628 }
3629
3630 /* Rebuild function frequencies. Passes are in general expected to
3631 maintain profile by hand, however in some cases this is not possible:
3632 for example when inlining several functions with loops freuqencies might run
3633 out of scale and thus needs to be recomputed. */
3634
3635 void
3636 rebuild_frequencies (void)
3637 {
3638 timevar_push (TV_REBUILD_FREQUENCIES);
3639
3640 /* When the max bb count in the function is small, there is a higher
3641 chance that there were truncation errors in the integer scaling
3642 of counts by inlining and other optimizations. This could lead
3643 to incorrect classification of code as being cold when it isn't.
3644 In that case, force the estimation of bb counts/frequencies from the
3645 branch probabilities, rather than computing frequencies from counts,
3646 which may also lead to frequencies incorrectly reduced to 0. There
3647 is less precision in the probabilities, so we only do this for small
3648 max counts. */
3649 profile_count count_max = profile_count::zero ();
3650 basic_block bb;
3651 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3652 if (bb->count > count_max)
3653 count_max = bb->count;
3654
3655 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3656 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3657 && count_max < REG_BR_PROB_BASE / 10))
3658 {
3659 loop_optimizer_init (0);
3660 add_noreturn_fake_exit_edges ();
3661 mark_irreducible_loops ();
3662 connect_infinite_loops_to_exit ();
3663 estimate_bb_frequencies (true);
3664 remove_fake_exit_edges ();
3665 loop_optimizer_finalize ();
3666 }
3667 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3668 counts_to_freqs ();
3669 else
3670 gcc_unreachable ();
3671 timevar_pop (TV_REBUILD_FREQUENCIES);
3672 }
3673
3674 /* Perform a dry run of the branch prediction pass and report comparsion of
3675 the predicted and real profile into the dump file. */
3676
3677 void
3678 report_predictor_hitrates (void)
3679 {
3680 unsigned nb_loops;
3681
3682 loop_optimizer_init (LOOPS_NORMAL);
3683 if (dump_file && (dump_flags & TDF_DETAILS))
3684 flow_loops_dump (dump_file, NULL, 0);
3685
3686 mark_irreducible_loops ();
3687
3688 nb_loops = number_of_loops (cfun);
3689 if (nb_loops > 1)
3690 scev_initialize ();
3691
3692 tree_estimate_probability (true);
3693
3694 if (nb_loops > 1)
3695 scev_finalize ();
3696
3697 loop_optimizer_finalize ();
3698 }
3699
3700 /* Force edge E to be cold.
3701 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3702 keep low probability to represent possible error in a guess. This is used
3703 i.e. in case we predict loop to likely iterate given number of times but
3704 we are not 100% sure.
3705
3706 This function locally updates profile without attempt to keep global
3707 consistency which can not be reached in full generality without full profile
3708 rebuild from probabilities alone. Doing so is not necessarily a good idea
3709 because frequencies and counts may be more realistic then probabilities.
3710
3711 In some cases (such as for elimination of early exits during full loop
3712 unrolling) the caller can ensure that profile will get consistent
3713 afterwards. */
3714
3715 void
3716 force_edge_cold (edge e, bool impossible)
3717 {
3718 profile_count count_sum = profile_count::zero ();
3719 int prob_sum = 0;
3720 edge_iterator ei;
3721 edge e2;
3722 profile_count old_count = e->count;
3723 int old_probability = e->probability;
3724 int prob_scale = REG_BR_PROB_BASE;
3725
3726 /* If edge is already improbably or cold, just return. */
3727 if (e->probability <= (impossible ? PROB_VERY_UNLIKELY : 0)
3728 && (!impossible || e->count == profile_count::zero ()))
3729 return;
3730 FOR_EACH_EDGE (e2, ei, e->src->succs)
3731 if (e2 != e)
3732 {
3733 if (e2->count.initialized_p ())
3734 count_sum += e2->count;
3735 prob_sum += e2->probability;
3736 }
3737
3738 /* If there are other edges out of e->src, redistribute probabilitity
3739 there. */
3740 if (prob_sum)
3741 {
3742 e->probability
3743 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
3744 if (impossible)
3745 e->count = profile_count::zero ();
3746 if (old_probability)
3747 e->count = e->count.apply_scale (e->probability, old_probability);
3748 else
3749 e->count = e->count.apply_scale (1, REG_BR_PROB_BASE);
3750
3751 prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
3752 prob_sum);
3753 if (dump_file && (dump_flags & TDF_DETAILS))
3754 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
3755 "probability to other edges.\n",
3756 e->src->index, e->dest->index,
3757 impossible ? "impossible" : "cold");
3758 profile_count count_sum2 = count_sum + old_count - e->count;
3759 FOR_EACH_EDGE (e2, ei, e->src->succs)
3760 if (e2 != e)
3761 {
3762 if (count_sum > 0)
3763 e2->count.apply_scale (count_sum2, count_sum);
3764 e2->probability = RDIV (e2->probability * prob_scale,
3765 REG_BR_PROB_BASE);
3766 }
3767 }
3768 /* If all edges out of e->src are unlikely, the basic block itself
3769 is unlikely. */
3770 else
3771 {
3772 e->probability = REG_BR_PROB_BASE;
3773
3774 /* If we did not adjusting, the source basic block has no likely edeges
3775 leaving other direction. In that case force that bb cold, too.
3776 This in general is difficult task to do, but handle special case when
3777 BB has only one predecestor. This is common case when we are updating
3778 after loop transforms. */
3779 if (!prob_sum && count_sum == profile_count::zero ()
3780 && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1))
3781 {
3782 int old_frequency = e->src->frequency;
3783 if (dump_file && (dump_flags & TDF_DETAILS))
3784 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
3785 impossible ? "impossible" : "cold");
3786 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
3787 if (impossible)
3788 e->src->count = e->count = profile_count::zero ();
3789 else
3790 e->src->count = e->count = e->count.apply_scale (e->src->frequency,
3791 old_frequency);
3792 force_edge_cold (single_pred_edge (e->src), impossible);
3793 }
3794 else if (dump_file && (dump_flags & TDF_DETAILS)
3795 && maybe_hot_bb_p (cfun, e->src))
3796 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
3797 impossible ? "impossible" : "cold");
3798 }
3799 }