gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "basic-block.h"
  40 #include "insn-config.h"
  41 #include "regs.h"
  42 #include "flags.h"
  43 #include "function.h"
  44 #include "except.h"
  45 #include "diagnostic-core.h"
  46 #include "recog.h"
  47 #include "expr.h"
  48 #include "predict.h"
  49 #include "coverage.h"
  50 #include "sreal.h"
  51 #include "params.h"
  52 #include "target.h"
  53 #include "cfgloop.h"
  54 #include "pointer-set.h"
  55 #include "hash-map.h"
  56 #include "tree-ssa-alias.h"
  57 #include "internal-fn.h"
  58 #include "gimple-expr.h"
  59 #include "is-a.h"
  60 #include "gimple.h"
  61 #include "gimple-iterator.h"
  62 #include "gimple-ssa.h"
  63 #include "cgraph.h"
  64 #include "tree-cfg.h"
  65 #include "tree-phinodes.h"
  66 #include "ssa-iterators.h"
  67 #include "tree-ssa-loop-niter.h"
  68 #include "tree-ssa-loop.h"
  69 #include "tree-pass.h"
  70 #include "tree-scalar-evolution.h"
  71 #include "cfgloop.h"
  72
  73 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  74                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  75 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  76              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  77
  78 static void combine_predictions_for_insn (rtx, basic_block);
  79 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  80 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  81 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  82 static bool can_predict_insn_p (const_rtx);
  83
  84 /* Information we hold about each branch predictor.
  85    Filled using information from predict.def.  */
  86
  87 struct predictor_info
  88 {
  89   const char *const name;       /* Name used in the debugging dumps.  */
  90   const int hitrate;            /* Expected hitrate used by
  91                                    predict_insn_def call.  */
  92   const int flags;
  93 };
  94
  95 /* Use given predictor without Dempster-Shaffer theory if it matches
  96    using first_match heuristics.  */
  97 #define PRED_FLAG_FIRST_MATCH 1
  98
  99 /* Recompute hitrate in percent to our representation.  */
 100
 101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 102
 103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 104 static const struct predictor_info predictor_info[]= {
 105 #include "predict.def"
 106
 107   /* Upper bound on predictors.  */
 108   {NULL, 0, 0}
 109 };
 110 #undef DEF_PREDICTOR
 111
 112 /* Return TRUE if frequency FREQ is considered to be hot.  */
 113
 114 static inline bool
 115 maybe_hot_frequency_p (struct function *fun, int freq)
 116 {
 117   struct cgraph_node *node = cgraph_node::get (fun->decl);
 118   if (!profile_info || !flag_branch_probabilities)
 119     {
 120       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 121         return false;
 122       if (node->frequency == NODE_FREQUENCY_HOT)
 123         return true;
 124     }
 125   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 126     return true;
 127   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 128       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 129     return false;
 130   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 131     return false;
 132   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 133               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 134     return false;
 135   return true;
 136 }
 137
 138 static gcov_type min_count = -1;
 139
 140 /* Determine the threshold for hot BB counts.  */
 141
 142 gcov_type
 143 get_hot_bb_threshold ()
 144 {
 145   gcov_working_set_t *ws;
 146   if (min_count == -1)
 147     {
 148       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 149       gcc_assert (ws);
 150       min_count = ws->min_counter;
 151     }
 152   return min_count;
 153 }
 154
 155 /* Set the threshold for hot BB counts.  */
 156
 157 void
 158 set_hot_bb_threshold (gcov_type min)
 159 {
 160   min_count = min;
 161 }
 162
 163 /* Return TRUE if frequency FREQ is considered to be hot.  */
 164
 165 static inline bool
 166 maybe_hot_count_p (struct function *fun, gcov_type count)
 167 {
 168   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 169     return true;
 170   /* Code executed at most once is not hot.  */
 171   if (profile_info->runs >= count)
 172     return false;
 173   return (count >= get_hot_bb_threshold ());
 174 }
 175
 176 /* Return true in case BB can be CPU intensive and should be optimized
 177    for maximal performance.  */
 178
 179 bool
 180 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 181 {
 182   gcc_checking_assert (fun);
 183   if (profile_status_for_fn (fun) == PROFILE_READ)
 184     return maybe_hot_count_p (fun, bb->count);
 185   return maybe_hot_frequency_p (fun, bb->frequency);
 186 }
 187
 188 /* Return true if the call can be hot.  */
 189
 190 bool
 191 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 192 {
 193   if (profile_info && flag_branch_probabilities
 194       && !maybe_hot_count_p (NULL,
 195                              edge->count))
 196     return false;
 197   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 198       || (edge->callee
 199           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 200     return false;
 201   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 202       && (edge->callee
 203           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 204     return false;
 205   if (optimize_size)
 206     return false;
 207   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 208     return true;
 209   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 210       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 211     return false;
 212   if (flag_guess_branch_prob)
 213     {
 214       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 215           || edge->frequency <= (CGRAPH_FREQ_BASE
 216                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 217         return false;
 218     }
 219   return true;
 220 }
 221
 222 /* Return true in case BB can be CPU intensive and should be optimized
 223    for maximal performance.  */
 224
 225 bool
 226 maybe_hot_edge_p (edge e)
 227 {
 228   if (profile_status_for_fn (cfun) == PROFILE_READ)
 229     return maybe_hot_count_p (cfun, e->count);
 230   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 231 }
 232
 233
 234
 235 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 236    node frequency reflects never being executed.  */
 237
 238 static bool
 239 probably_never_executed (struct function *fun,
 240                          gcov_type count, int frequency)
 241 {
 242   gcc_checking_assert (fun);
 243   if (profile_status_for_fn (cfun) == PROFILE_READ)
 244     {
 245       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 246       if (count * unlikely_count_fraction >= profile_info->runs)
 247         return false;
 248       if (!frequency)
 249         return true;
 250       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
 251         return false;
 252       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 253         {
 254           gcov_type computed_count;
 255           /* Check for possibility of overflow, in which case entry bb count
 256              is large enough to do the division first without losing much
 257              precision.  */
 258           if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
 259               REG_BR_PROB_BASE)
 260             {
 261               gcov_type scaled_count
 262                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
 263              unlikely_count_fraction;
 264               computed_count = RDIV (scaled_count,
 265                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 266             }
 267           else
 268             {
 269               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
 270                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 271               computed_count *= frequency * unlikely_count_fraction;
 272             }
 273           if (computed_count >= profile_info->runs)
 274             return false;
 275         }
 276       return true;
 277     }
 278   if ((!profile_info || !flag_branch_probabilities)
 279       && (cgraph_node::get (fun->decl)->frequency
 280           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 281     return true;
 282   return false;
 283 }
 284
 285
 286 /* Return true in case BB is probably never executed.  */
 287
 288 bool
 289 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 290 {
 291   return probably_never_executed (fun, bb->count, bb->frequency);
 292 }
 293
 294
 295 /* Return true in case edge E is probably never executed.  */
 296
 297 bool
 298 probably_never_executed_edge_p (struct function *fun, edge e)
 299 {
 300   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 301 }
 302
 303 /* Return true if function should be optimized for size.  */
 304
 305 bool
 306 cgraph_node::optimize_for_size_p (void)
 307 {
 308   if (optimize_size)
 309     return true;
 310   if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 311     return true;
 312   else
 313     return false;
 314 }
 315
 316 /* Return true when current function should always be optimized for size.  */
 317
 318 bool
 319 optimize_function_for_size_p (struct function *fun)
 320 {
 321   if (optimize_size)
 322     return true;
 323   if (!fun || !fun->decl)
 324     return false;
 325
 326   cgraph_node *n = cgraph_node::get (fun->decl);
 327   return n && n->optimize_for_size_p ();
 328 }
 329
 330 /* Return true when current function should always be optimized for speed.  */
 331
 332 bool
 333 optimize_function_for_speed_p (struct function *fun)
 334 {
 335   return !optimize_function_for_size_p (fun);
 336 }
 337
 338 /* Return TRUE when BB should be optimized for size.  */
 339
 340 bool
 341 optimize_bb_for_size_p (const_basic_block bb)
 342 {
 343   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 344 }
 345
 346 /* Return TRUE when BB should be optimized for speed.  */
 347
 348 bool
 349 optimize_bb_for_speed_p (const_basic_block bb)
 350 {
 351   return !optimize_bb_for_size_p (bb);
 352 }
 353
 354 /* Return TRUE when BB should be optimized for size.  */
 355
 356 bool
 357 optimize_edge_for_size_p (edge e)
 358 {
 359   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 360 }
 361
 362 /* Return TRUE when BB should be optimized for speed.  */
 363
 364 bool
 365 optimize_edge_for_speed_p (edge e)
 366 {
 367   return !optimize_edge_for_size_p (e);
 368 }
 369
 370 /* Return TRUE when BB should be optimized for size.  */
 371
 372 bool
 373 optimize_insn_for_size_p (void)
 374 {
 375   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 376 }
 377
 378 /* Return TRUE when BB should be optimized for speed.  */
 379
 380 bool
 381 optimize_insn_for_speed_p (void)
 382 {
 383   return !optimize_insn_for_size_p ();
 384 }
 385
 386 /* Return TRUE when LOOP should be optimized for size.  */
 387
 388 bool
 389 optimize_loop_for_size_p (struct loop *loop)
 390 {
 391   return optimize_bb_for_size_p (loop->header);
 392 }
 393
 394 /* Return TRUE when LOOP should be optimized for speed.  */
 395
 396 bool
 397 optimize_loop_for_speed_p (struct loop *loop)
 398 {
 399   return optimize_bb_for_speed_p (loop->header);
 400 }
 401
 402 /* Return TRUE when LOOP nest should be optimized for speed.  */
 403
 404 bool
 405 optimize_loop_nest_for_speed_p (struct loop *loop)
 406 {
 407   struct loop *l = loop;
 408   if (optimize_loop_for_speed_p (loop))
 409     return true;
 410   l = loop->inner;
 411   while (l && l != loop)
 412     {
 413       if (optimize_loop_for_speed_p (l))
 414         return true;
 415       if (l->inner)
 416         l = l->inner;
 417       else if (l->next)
 418         l = l->next;
 419       else
 420         {
 421           while (l != loop && !l->next)
 422             l = loop_outer (l);
 423           if (l != loop)
 424             l = l->next;
 425         }
 426     }
 427   return false;
 428 }
 429
 430 /* Return TRUE when LOOP nest should be optimized for size.  */
 431
 432 bool
 433 optimize_loop_nest_for_size_p (struct loop *loop)
 434 {
 435   return !optimize_loop_nest_for_speed_p (loop);
 436 }
 437
 438 /* Return true when edge E is likely to be well predictable by branch
 439    predictor.  */
 440
 441 bool
 442 predictable_edge_p (edge e)
 443 {
 444   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 445     return false;
 446   if ((e->probability
 447        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 448       || (REG_BR_PROB_BASE - e->probability
 449           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 450     return true;
 451   return false;
 452 }
 453
 454
 455 /* Set RTL expansion for BB profile.  */
 456
 457 void
 458 rtl_profile_for_bb (basic_block bb)
 459 {
 460   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 461 }
 462
 463 /* Set RTL expansion for edge profile.  */
 464
 465 void
 466 rtl_profile_for_edge (edge e)
 467 {
 468   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 469 }
 470
 471 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 472 void
 473 default_rtl_profile (void)
 474 {
 475   crtl->maybe_hot_insn_p = true;
 476 }
 477
 478 /* Return true if the one of outgoing edges is already predicted by
 479    PREDICTOR.  */
 480
 481 bool
 482 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 483 {
 484   rtx note;
 485   if (!INSN_P (BB_END (bb)))
 486     return false;
 487   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 488     if (REG_NOTE_KIND (note) == REG_BR_PRED
 489         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 490       return true;
 491   return false;
 492 }
 493
 494 /*  Structure representing predictions in tree level. */
 495
 496 struct edge_prediction {
 497     struct edge_prediction *ep_next;
 498     edge ep_edge;
 499     enum br_predictor ep_predictor;
 500     int ep_probability;
 501 };
 502
 503 /* This map contains for a basic block the list of predictions for the
 504    outgoing edges.  */
 505
 506 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 507
 508 /* Return true if the one of outgoing edges is already predicted by
 509    PREDICTOR.  */
 510
 511 bool
 512 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 513 {
 514   struct edge_prediction *i;
 515   edge_prediction **preds = bb_predictions->get (bb);
 516
 517   if (!preds)
 518     return false;
 519
 520   for (i = *preds; i; i = i->ep_next)
 521     if (i->ep_predictor == predictor)
 522       return true;
 523   return false;
 524 }
 525
 526 /* Return true when the probability of edge is reliable.
 527
 528    The profile guessing code is good at predicting branch outcome (ie.
 529    taken/not taken), that is predicted right slightly over 75% of time.
 530    It is however notoriously poor on predicting the probability itself.
 531    In general the profile appear a lot flatter (with probabilities closer
 532    to 50%) than the reality so it is bad idea to use it to drive optimization
 533    such as those disabling dynamic branch prediction for well predictable
 534    branches.
 535
 536    There are two exceptions - edges leading to noreturn edges and edges
 537    predicted by number of iterations heuristics are predicted well.  This macro
 538    should be able to distinguish those, but at the moment it simply check for
 539    noreturn heuristic that is only one giving probability over 99% or bellow
 540    1%.  In future we might want to propagate reliability information across the
 541    CFG if we find this information useful on multiple places.   */
 542 static bool
 543 probability_reliable_p (int prob)
 544 {
 545   return (profile_status_for_fn (cfun) == PROFILE_READ
 546           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 547               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 548 }
 549
 550 /* Same predicate as above, working on edges.  */
 551 bool
 552 edge_probability_reliable_p (const_edge e)
 553 {
 554   return probability_reliable_p (e->probability);
 555 }
 556
 557 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 558 bool
 559 br_prob_note_reliable_p (const_rtx note)
 560 {
 561   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 562   return probability_reliable_p (XINT (note, 0));
 563 }
 564
 565 static void
 566 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 567 {
 568   gcc_assert (any_condjump_p (insn));
 569   if (!flag_guess_branch_prob)
 570     return;
 571
 572   add_reg_note (insn, REG_BR_PRED,
 573                 gen_rtx_CONCAT (VOIDmode,
 574                                 GEN_INT ((int) predictor),
 575                                 GEN_INT ((int) probability)));
 576 }
 577
 578 /* Predict insn by given predictor.  */
 579
 580 void
 581 predict_insn_def (rtx insn, enum br_predictor predictor,
 582                   enum prediction taken)
 583 {
 584    int probability = predictor_info[(int) predictor].hitrate;
 585
 586    if (taken != TAKEN)
 587      probability = REG_BR_PROB_BASE - probability;
 588
 589    predict_insn (insn, predictor, probability);
 590 }
 591
 592 /* Predict edge E with given probability if possible.  */
 593
 594 void
 595 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 596 {
 597   rtx last_insn;
 598   last_insn = BB_END (e->src);
 599
 600   /* We can store the branch prediction information only about
 601      conditional jumps.  */
 602   if (!any_condjump_p (last_insn))
 603     return;
 604
 605   /* We always store probability of branching.  */
 606   if (e->flags & EDGE_FALLTHRU)
 607     probability = REG_BR_PROB_BASE - probability;
 608
 609   predict_insn (last_insn, predictor, probability);
 610 }
 611
 612 /* Predict edge E with the given PROBABILITY.  */
 613 void
 614 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 615 {
 616   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 617   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 618        1)
 619       && flag_guess_branch_prob && optimize)
 620     {
 621       struct edge_prediction *i = XNEW (struct edge_prediction);
 622       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 623
 624       i->ep_next = preds;
 625       preds = i;
 626       i->ep_probability = probability;
 627       i->ep_predictor = predictor;
 628       i->ep_edge = e;
 629     }
 630 }
 631
 632 /* Remove all predictions on given basic block that are attached
 633    to edge E.  */
 634 void
 635 remove_predictions_associated_with_edge (edge e)
 636 {
 637   if (!bb_predictions)
 638     return;
 639
 640   edge_prediction **preds = bb_predictions->get (e->src);
 641
 642   if (preds)
 643     {
 644       struct edge_prediction **prediction = preds;
 645       struct edge_prediction *next;
 646
 647       while (*prediction)
 648         {
 649           if ((*prediction)->ep_edge == e)
 650             {
 651               next = (*prediction)->ep_next;
 652               free (*prediction);
 653               *prediction = next;
 654             }
 655           else
 656             prediction = &((*prediction)->ep_next);
 657         }
 658     }
 659 }
 660
 661 /* Clears the list of predictions stored for BB.  */
 662
 663 static void
 664 clear_bb_predictions (basic_block bb)
 665 {
 666   edge_prediction **preds = bb_predictions->get (bb);
 667   struct edge_prediction *pred, *next;
 668
 669   if (!preds)
 670     return;
 671
 672   for (pred = *preds; pred; pred = next)
 673     {
 674       next = pred->ep_next;
 675       free (pred);
 676     }
 677   *preds = NULL;
 678 }
 679
 680 /* Return true when we can store prediction on insn INSN.
 681    At the moment we represent predictions only on conditional
 682    jumps, not at computed jump or other complicated cases.  */
 683 static bool
 684 can_predict_insn_p (const_rtx insn)
 685 {
 686   return (JUMP_P (insn)
 687           && any_condjump_p (insn)
 688           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 689 }
 690
 691 /* Predict edge E by given predictor if possible.  */
 692
 693 void
 694 predict_edge_def (edge e, enum br_predictor predictor,
 695                   enum prediction taken)
 696 {
 697    int probability = predictor_info[(int) predictor].hitrate;
 698
 699    if (taken != TAKEN)
 700      probability = REG_BR_PROB_BASE - probability;
 701
 702    predict_edge (e, predictor, probability);
 703 }
 704
 705 /* Invert all branch predictions or probability notes in the INSN.  This needs
 706    to be done each time we invert the condition used by the jump.  */
 707
 708 void
 709 invert_br_probabilities (rtx insn)
 710 {
 711   rtx note;
 712
 713   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 714     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 715       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 716     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 717       XEXP (XEXP (note, 0), 1)
 718         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 719 }
 720
 721 /* Dump information about the branch prediction to the output file.  */
 722
 723 static void
 724 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 725                  basic_block bb, int used)
 726 {
 727   edge e;
 728   edge_iterator ei;
 729
 730   if (!file)
 731     return;
 732
 733   FOR_EACH_EDGE (e, ei, bb->succs)
 734     if (! (e->flags & EDGE_FALLTHRU))
 735       break;
 736
 737   fprintf (file, "  %s heuristics%s: %.1f%%",
 738            predictor_info[predictor].name,
 739            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 740
 741   if (bb->count)
 742     {
 743       fprintf (file, "  exec %"PRId64, bb->count);
 744       if (e)
 745         {
 746           fprintf (file, " hit %"PRId64, e->count);
 747           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 748         }
 749     }
 750
 751   fprintf (file, "\n");
 752 }
 753
 754 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 755    evenly and hope for the best.  */
 756 static void
 757 set_even_probabilities (basic_block bb)
 758 {
 759   int nedges = 0;
 760   edge e;
 761   edge_iterator ei;
 762
 763   FOR_EACH_EDGE (e, ei, bb->succs)
 764     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 765       nedges ++;
 766   FOR_EACH_EDGE (e, ei, bb->succs)
 767     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 768       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 769     else
 770       e->probability = 0;
 771 }
 772
 773 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 774    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 775
 776 static void
 777 combine_predictions_for_insn (rtx insn, basic_block bb)
 778 {
 779   rtx prob_note;
 780   rtx *pnote;
 781   rtx note;
 782   int best_probability = PROB_EVEN;
 783   enum br_predictor best_predictor = END_PREDICTORS;
 784   int combined_probability = REG_BR_PROB_BASE / 2;
 785   int d;
 786   bool first_match = false;
 787   bool found = false;
 788
 789   if (!can_predict_insn_p (insn))
 790     {
 791       set_even_probabilities (bb);
 792       return;
 793     }
 794
 795   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 796   pnote = &REG_NOTES (insn);
 797   if (dump_file)
 798     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 799              bb->index);
 800
 801   /* We implement "first match" heuristics and use probability guessed
 802      by predictor with smallest index.  */
 803   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 804     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 805       {
 806         enum br_predictor predictor = ((enum br_predictor)
 807                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 808         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 809
 810         found = true;
 811         if (best_predictor > predictor)
 812           best_probability = probability, best_predictor = predictor;
 813
 814         d = (combined_probability * probability
 815              + (REG_BR_PROB_BASE - combined_probability)
 816              * (REG_BR_PROB_BASE - probability));
 817
 818         /* Use FP math to avoid overflows of 32bit integers.  */
 819         if (d == 0)
 820           /* If one probability is 0% and one 100%, avoid division by zero.  */
 821           combined_probability = REG_BR_PROB_BASE / 2;
 822         else
 823           combined_probability = (((double) combined_probability) * probability
 824                                   * REG_BR_PROB_BASE / d + 0.5);
 825       }
 826
 827   /* Decide which heuristic to use.  In case we didn't match anything,
 828      use no_prediction heuristic, in case we did match, use either
 829      first match or Dempster-Shaffer theory depending on the flags.  */
 830
 831   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 832     first_match = true;
 833
 834   if (!found)
 835     dump_prediction (dump_file, PRED_NO_PREDICTION,
 836                      combined_probability, bb, true);
 837   else
 838     {
 839       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 840                        bb, !first_match);
 841       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 842                        bb, first_match);
 843     }
 844
 845   if (first_match)
 846     combined_probability = best_probability;
 847   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 848
 849   while (*pnote)
 850     {
 851       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 852         {
 853           enum br_predictor predictor = ((enum br_predictor)
 854                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 855           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 856
 857           dump_prediction (dump_file, predictor, probability, bb,
 858                            !first_match || best_predictor == predictor);
 859           *pnote = XEXP (*pnote, 1);
 860         }
 861       else
 862         pnote = &XEXP (*pnote, 1);
 863     }
 864
 865   if (!prob_note)
 866     {
 867       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 868
 869       /* Save the prediction into CFG in case we are seeing non-degenerated
 870          conditional jump.  */
 871       if (!single_succ_p (bb))
 872         {
 873           BRANCH_EDGE (bb)->probability = combined_probability;
 874           FALLTHRU_EDGE (bb)->probability
 875             = REG_BR_PROB_BASE - combined_probability;
 876         }
 877     }
 878   else if (!single_succ_p (bb))
 879     {
 880       int prob = XINT (prob_note, 0);
 881
 882       BRANCH_EDGE (bb)->probability = prob;
 883       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 884     }
 885   else
 886     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 887 }
 888
 889 /* Combine predictions into single probability and store them into CFG.
 890    Remove now useless prediction entries.  */
 891
 892 static void
 893 combine_predictions_for_bb (basic_block bb)
 894 {
 895   int best_probability = PROB_EVEN;
 896   enum br_predictor best_predictor = END_PREDICTORS;
 897   int combined_probability = REG_BR_PROB_BASE / 2;
 898   int d;
 899   bool first_match = false;
 900   bool found = false;
 901   struct edge_prediction *pred;
 902   int nedges = 0;
 903   edge e, first = NULL, second = NULL;
 904   edge_iterator ei;
 905
 906   FOR_EACH_EDGE (e, ei, bb->succs)
 907     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 908       {
 909         nedges ++;
 910         if (first && !second)
 911           second = e;
 912         if (!first)
 913           first = e;
 914       }
 915
 916   /* When there is no successor or only one choice, prediction is easy.
 917
 918      We are lazy for now and predict only basic blocks with two outgoing
 919      edges.  It is possible to predict generic case too, but we have to
 920      ignore first match heuristics and do more involved combining.  Implement
 921      this later.  */
 922   if (nedges != 2)
 923     {
 924       if (!bb->count)
 925         set_even_probabilities (bb);
 926       clear_bb_predictions (bb);
 927       if (dump_file)
 928         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 929                  nedges, bb->index);
 930       return;
 931     }
 932
 933   if (dump_file)
 934     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 935
 936   edge_prediction **preds = bb_predictions->get (bb);
 937   if (preds)
 938     {
 939       /* We implement "first match" heuristics and use probability guessed
 940          by predictor with smallest index.  */
 941       for (pred = *preds; pred; pred = pred->ep_next)
 942         {
 943           enum br_predictor predictor = pred->ep_predictor;
 944           int probability = pred->ep_probability;
 945
 946           if (pred->ep_edge != first)
 947             probability = REG_BR_PROB_BASE - probability;
 948
 949           found = true;
 950           /* First match heuristics would be widly confused if we predicted
 951              both directions.  */
 952           if (best_predictor > predictor)
 953             {
 954               struct edge_prediction *pred2;
 955               int prob = probability;
 956
 957               for (pred2 = (struct edge_prediction *) *preds;
 958                    pred2; pred2 = pred2->ep_next)
 959                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 960                  {
 961                    int probability2 = pred->ep_probability;
 962
 963                    if (pred2->ep_edge != first)
 964                      probability2 = REG_BR_PROB_BASE - probability2;
 965
 966                    if ((probability < REG_BR_PROB_BASE / 2) !=
 967                        (probability2 < REG_BR_PROB_BASE / 2))
 968                      break;
 969
 970                    /* If the same predictor later gave better result, go for it! */
 971                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 972                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 973                      prob = probability2;
 974                  }
 975               if (!pred2)
 976                 best_probability = prob, best_predictor = predictor;
 977             }
 978
 979           d = (combined_probability * probability
 980                + (REG_BR_PROB_BASE - combined_probability)
 981                * (REG_BR_PROB_BASE - probability));
 982
 983           /* Use FP math to avoid overflows of 32bit integers.  */
 984           if (d == 0)
 985             /* If one probability is 0% and one 100%, avoid division by zero.  */
 986             combined_probability = REG_BR_PROB_BASE / 2;
 987           else
 988             combined_probability = (((double) combined_probability)
 989                                     * probability
 990                                     * REG_BR_PROB_BASE / d + 0.5);
 991         }
 992     }
 993
 994   /* Decide which heuristic to use.  In case we didn't match anything,
 995      use no_prediction heuristic, in case we did match, use either
 996      first match or Dempster-Shaffer theory depending on the flags.  */
 997
 998   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 999     first_match = true;
1000
1001   if (!found)
1002     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1003   else
1004     {
1005       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1006                        !first_match);
1007       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1008                        first_match);
1009     }
1010
1011   if (first_match)
1012     combined_probability = best_probability;
1013   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1014
1015   if (preds)
1016     {
1017       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1018         {
1019           enum br_predictor predictor = pred->ep_predictor;
1020           int probability = pred->ep_probability;
1021
1022           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1023             probability = REG_BR_PROB_BASE - probability;
1024           dump_prediction (dump_file, predictor, probability, bb,
1025                            !first_match || best_predictor == predictor);
1026         }
1027     }
1028   clear_bb_predictions (bb);
1029
1030   if (!bb->count)
1031     {
1032       first->probability = combined_probability;
1033       second->probability = REG_BR_PROB_BASE - combined_probability;
1034     }
1035 }
1036
1037 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1038    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1039
1040    T1 and T2 should be one of the following cases:
1041      1. T1 is SSA_NAME, T2 is NULL
1042      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1043      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1044
1045 static tree
1046 strips_small_constant (tree t1, tree t2)
1047 {
1048   tree ret = NULL;
1049   int value = 0;
1050
1051   if (!t1)
1052     return NULL;
1053   else if (TREE_CODE (t1) == SSA_NAME)
1054     ret = t1;
1055   else if (tree_fits_shwi_p (t1))
1056     value = tree_to_shwi (t1);
1057   else
1058     return NULL;
1059
1060   if (!t2)
1061     return ret;
1062   else if (tree_fits_shwi_p (t2))
1063     value = tree_to_shwi (t2);
1064   else if (TREE_CODE (t2) == SSA_NAME)
1065     {
1066       if (ret)
1067         return NULL;
1068       else
1069         ret = t2;
1070     }
1071
1072   if (value <= 4 && value >= -4)
1073     return ret;
1074   else
1075     return NULL;
1076 }
1077
1078 /* Return the SSA_NAME in T or T's operands.
1079    Return NULL if SSA_NAME cannot be found.  */
1080
1081 static tree
1082 get_base_value (tree t)
1083 {
1084   if (TREE_CODE (t) == SSA_NAME)
1085     return t;
1086
1087   if (!BINARY_CLASS_P (t))
1088     return NULL;
1089
1090   switch (TREE_OPERAND_LENGTH (t))
1091     {
1092     case 1:
1093       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1094     case 2:
1095       return strips_small_constant (TREE_OPERAND (t, 0),
1096                                     TREE_OPERAND (t, 1));
1097     default:
1098       return NULL;
1099     }
1100 }
1101
1102 /* Check the compare STMT in LOOP. If it compares an induction
1103    variable to a loop invariant, return true, and save
1104    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1105    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1106
1107 static bool
1108 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1109                                      tree *loop_invariant,
1110                                      enum tree_code *compare_code,
1111                                      tree *loop_step,
1112                                      tree *loop_iv_base)
1113 {
1114   tree op0, op1, bound, base;
1115   affine_iv iv0, iv1;
1116   enum tree_code code;
1117   tree step;
1118
1119   code = gimple_cond_code (stmt);
1120   *loop_invariant = NULL;
1121
1122   switch (code)
1123     {
1124     case GT_EXPR:
1125     case GE_EXPR:
1126     case NE_EXPR:
1127     case LT_EXPR:
1128     case LE_EXPR:
1129     case EQ_EXPR:
1130       break;
1131
1132     default:
1133       return false;
1134     }
1135
1136   op0 = gimple_cond_lhs (stmt);
1137   op1 = gimple_cond_rhs (stmt);
1138
1139   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1140        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1141     return false;
1142   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1143     return false;
1144   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1145     return false;
1146   if (TREE_CODE (iv0.step) != INTEGER_CST
1147       || TREE_CODE (iv1.step) != INTEGER_CST)
1148     return false;
1149   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1150       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1151     return false;
1152
1153   if (integer_zerop (iv0.step))
1154     {
1155       if (code != NE_EXPR && code != EQ_EXPR)
1156         code = invert_tree_comparison (code, false);
1157       bound = iv0.base;
1158       base = iv1.base;
1159       if (tree_fits_shwi_p (iv1.step))
1160         step = iv1.step;
1161       else
1162         return false;
1163     }
1164   else
1165     {
1166       bound = iv1.base;
1167       base = iv0.base;
1168       if (tree_fits_shwi_p (iv0.step))
1169         step = iv0.step;
1170       else
1171         return false;
1172     }
1173
1174   if (TREE_CODE (bound) != INTEGER_CST)
1175     bound = get_base_value (bound);
1176   if (!bound)
1177     return false;
1178   if (TREE_CODE (base) != INTEGER_CST)
1179     base = get_base_value (base);
1180   if (!base)
1181     return false;
1182
1183   *loop_invariant = bound;
1184   *compare_code = code;
1185   *loop_step = step;
1186   *loop_iv_base = base;
1187   return true;
1188 }
1189
1190 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1191
1192 static bool
1193 expr_coherent_p (tree t1, tree t2)
1194 {
1195   gimple stmt;
1196   tree ssa_name_1 = NULL;
1197   tree ssa_name_2 = NULL;
1198
1199   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1200   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1201
1202   if (t1 == t2)
1203     return true;
1204
1205   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1206     return true;
1207   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1208     return false;
1209
1210   /* Check to see if t1 is expressed/defined with t2.  */
1211   stmt = SSA_NAME_DEF_STMT (t1);
1212   gcc_assert (stmt != NULL);
1213   if (is_gimple_assign (stmt))
1214     {
1215       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1216       if (ssa_name_1 && ssa_name_1 == t2)
1217         return true;
1218     }
1219
1220   /* Check to see if t2 is expressed/defined with t1.  */
1221   stmt = SSA_NAME_DEF_STMT (t2);
1222   gcc_assert (stmt != NULL);
1223   if (is_gimple_assign (stmt))
1224     {
1225       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1226       if (ssa_name_2 && ssa_name_2 == t1)
1227         return true;
1228     }
1229
1230   /* Compare if t1 and t2's def_stmts are identical.  */
1231   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1232     return true;
1233   else
1234     return false;
1235 }
1236
1237 /* Predict branch probability of BB when BB contains a branch that compares
1238    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1239    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1240
1241    E.g.
1242      for (int i = 0; i < bound; i++) {
1243        if (i < bound - 2)
1244          computation_1();
1245        else
1246          computation_2();
1247      }
1248
1249   In this loop, we will predict the branch inside the loop to be taken.  */
1250
1251 static void
1252 predict_iv_comparison (struct loop *loop, basic_block bb,
1253                        tree loop_bound_var,
1254                        tree loop_iv_base_var,
1255                        enum tree_code loop_bound_code,
1256                        int loop_bound_step)
1257 {
1258   gimple stmt;
1259   tree compare_var, compare_base;
1260   enum tree_code compare_code;
1261   tree compare_step_var;
1262   edge then_edge;
1263   edge_iterator ei;
1264
1265   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1266       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1267       || predicted_by_p (bb, PRED_LOOP_EXIT))
1268     return;
1269
1270   stmt = last_stmt (bb);
1271   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1272     return;
1273   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1274                                             &compare_code,
1275                                             &compare_step_var,
1276                                             &compare_base))
1277     return;
1278
1279   /* Find the taken edge.  */
1280   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1281     if (then_edge->flags & EDGE_TRUE_VALUE)
1282       break;
1283
1284   /* When comparing an IV to a loop invariant, NE is more likely to be
1285      taken while EQ is more likely to be not-taken.  */
1286   if (compare_code == NE_EXPR)
1287     {
1288       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1289       return;
1290     }
1291   else if (compare_code == EQ_EXPR)
1292     {
1293       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1294       return;
1295     }
1296
1297   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1298     return;
1299
1300   /* If loop bound, base and compare bound are all constants, we can
1301      calculate the probability directly.  */
1302   if (tree_fits_shwi_p (loop_bound_var)
1303       && tree_fits_shwi_p (compare_var)
1304       && tree_fits_shwi_p (compare_base))
1305     {
1306       int probability;
1307       bool overflow, overall_overflow = false;
1308       widest_int compare_count, tem;
1309
1310       /* (loop_bound - base) / compare_step */
1311       tem = wi::sub (wi::to_widest (loop_bound_var),
1312                      wi::to_widest (compare_base), SIGNED, &overflow);
1313       overall_overflow |= overflow;
1314       widest_int loop_count = wi::div_trunc (tem,
1315                                              wi::to_widest (compare_step_var),
1316                                              SIGNED, &overflow);
1317       overall_overflow |= overflow;
1318
1319       if (!wi::neg_p (wi::to_widest (compare_step_var))
1320           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1321         {
1322           /* (loop_bound - compare_bound) / compare_step */
1323           tem = wi::sub (wi::to_widest (loop_bound_var),
1324                          wi::to_widest (compare_var), SIGNED, &overflow);
1325           overall_overflow |= overflow;
1326           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1327                                          SIGNED, &overflow);
1328           overall_overflow |= overflow;
1329         }
1330       else
1331         {
1332           /* (compare_bound - base) / compare_step */
1333           tem = wi::sub (wi::to_widest (compare_var),
1334                          wi::to_widest (compare_base), SIGNED, &overflow);
1335           overall_overflow |= overflow;
1336           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1337                                          SIGNED, &overflow);
1338           overall_overflow |= overflow;
1339         }
1340       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1341         ++compare_count;
1342       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1343         ++loop_count;
1344       if (wi::neg_p (compare_count))
1345         compare_count = 0;
1346       if (wi::neg_p (loop_count))
1347         loop_count = 0;
1348       if (loop_count == 0)
1349         probability = 0;
1350       else if (wi::cmps (compare_count, loop_count) == 1)
1351         probability = REG_BR_PROB_BASE;
1352       else
1353         {
1354           tem = compare_count * REG_BR_PROB_BASE;
1355           tem = wi::udiv_trunc (tem, loop_count);
1356           probability = tem.to_uhwi ();
1357         }
1358
1359       if (!overall_overflow)
1360         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1361
1362       return;
1363     }
1364
1365   if (expr_coherent_p (loop_bound_var, compare_var))
1366     {
1367       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1368           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1369         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1370       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1371                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1372         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1373       else if (loop_bound_code == NE_EXPR)
1374         {
1375           /* If the loop backedge condition is "(i != bound)", we do
1376              the comparison based on the step of IV:
1377              * step < 0 : backedge condition is like (i > bound)
1378              * step > 0 : backedge condition is like (i < bound)  */
1379           gcc_assert (loop_bound_step != 0);
1380           if (loop_bound_step > 0
1381               && (compare_code == LT_EXPR
1382                   || compare_code == LE_EXPR))
1383             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1384           else if (loop_bound_step < 0
1385                    && (compare_code == GT_EXPR
1386                        || compare_code == GE_EXPR))
1387             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1388           else
1389             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1390         }
1391       else
1392         /* The branch is predicted not-taken if loop_bound_code is
1393            opposite with compare_code.  */
1394         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1395     }
1396   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1397     {
1398       /* For cases like:
1399            for (i = s; i < h; i++)
1400              if (i > s + 2) ....
1401          The branch should be predicted taken.  */
1402       if (loop_bound_step > 0
1403           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1404         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1405       else if (loop_bound_step < 0
1406                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1407         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1408       else
1409         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1410     }
1411 }
1412
1413 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1414    exits are resulted from short-circuit conditions that will generate an
1415    if_tmp. E.g.:
1416
1417    if (foo() || global > 10)
1418      break;
1419
1420    This will be translated into:
1421
1422    BB3:
1423      loop header...
1424    BB4:
1425      if foo() goto BB6 else goto BB5
1426    BB5:
1427      if global > 10 goto BB6 else goto BB7
1428    BB6:
1429      goto BB7
1430    BB7:
1431      iftmp = (PHI 0(BB5), 1(BB6))
1432      if iftmp == 1 goto BB8 else goto BB3
1433    BB8:
1434      outside of the loop...
1435
1436    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1437    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1438    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1439    exits to predict them using PRED_LOOP_EXIT.  */
1440
1441 static void
1442 predict_extra_loop_exits (edge exit_edge)
1443 {
1444   unsigned i;
1445   bool check_value_one;
1446   gimple phi_stmt;
1447   tree cmp_rhs, cmp_lhs;
1448   gimple cmp_stmt = last_stmt (exit_edge->src);
1449
1450   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1451     return;
1452   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1453   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1454   if (!TREE_CONSTANT (cmp_rhs)
1455       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1456     return;
1457   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1458     return;
1459
1460   /* If check_value_one is true, only the phi_args with value '1' will lead
1461      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1462      loop exit.  */
1463   check_value_one = (((integer_onep (cmp_rhs))
1464                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1465                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1466
1467   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1468   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1469     return;
1470
1471   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1472     {
1473       edge e1;
1474       edge_iterator ei;
1475       tree val = gimple_phi_arg_def (phi_stmt, i);
1476       edge e = gimple_phi_arg_edge (phi_stmt, i);
1477
1478       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1479         continue;
1480       if ((check_value_one ^ integer_onep (val)) == 1)
1481         continue;
1482       if (EDGE_COUNT (e->src->succs) != 1)
1483         {
1484           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1485           continue;
1486         }
1487
1488       FOR_EACH_EDGE (e1, ei, e->src->preds)
1489         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1490     }
1491 }
1492
1493 /* Predict edge probabilities by exploiting loop structure.  */
1494
1495 static void
1496 predict_loops (void)
1497 {
1498   struct loop *loop;
1499
1500   /* Try to predict out blocks in a loop that are not part of a
1501      natural loop.  */
1502   FOR_EACH_LOOP (loop, 0)
1503     {
1504       basic_block bb, *bbs;
1505       unsigned j, n_exits;
1506       vec<edge> exits;
1507       struct tree_niter_desc niter_desc;
1508       edge ex;
1509       struct nb_iter_bound *nb_iter;
1510       enum tree_code loop_bound_code = ERROR_MARK;
1511       tree loop_bound_step = NULL;
1512       tree loop_bound_var = NULL;
1513       tree loop_iv_base = NULL;
1514       gimple stmt = NULL;
1515
1516       exits = get_loop_exit_edges (loop);
1517       n_exits = exits.length ();
1518       if (!n_exits)
1519         {
1520           exits.release ();
1521           continue;
1522         }
1523
1524       FOR_EACH_VEC_ELT (exits, j, ex)
1525         {
1526           tree niter = NULL;
1527           HOST_WIDE_INT nitercst;
1528           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1529           int probability;
1530           enum br_predictor predictor;
1531
1532           predict_extra_loop_exits (ex);
1533
1534           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1535             niter = niter_desc.niter;
1536           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1537             niter = loop_niter_by_eval (loop, ex);
1538
1539           if (TREE_CODE (niter) == INTEGER_CST)
1540             {
1541               if (tree_fits_uhwi_p (niter)
1542                   && max
1543                   && compare_tree_int (niter, max - 1) == -1)
1544                 nitercst = tree_to_uhwi (niter) + 1;
1545               else
1546                 nitercst = max;
1547               predictor = PRED_LOOP_ITERATIONS;
1548             }
1549           /* If we have just one exit and we can derive some information about
1550              the number of iterations of the loop from the statements inside
1551              the loop, use it to predict this exit.  */
1552           else if (n_exits == 1)
1553             {
1554               nitercst = estimated_stmt_executions_int (loop);
1555               if (nitercst < 0)
1556                 continue;
1557               if (nitercst > max)
1558                 nitercst = max;
1559
1560               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1561             }
1562           else
1563             continue;
1564
1565           /* If the prediction for number of iterations is zero, do not
1566              predict the exit edges.  */
1567           if (nitercst == 0)
1568             continue;
1569
1570           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1571           predict_edge (ex, predictor, probability);
1572         }
1573       exits.release ();
1574
1575       /* Find information about loop bound variables.  */
1576       for (nb_iter = loop->bounds; nb_iter;
1577            nb_iter = nb_iter->next)
1578         if (nb_iter->stmt
1579             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1580           {
1581             stmt = nb_iter->stmt;
1582             break;
1583           }
1584       if (!stmt && last_stmt (loop->header)
1585           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1586         stmt = last_stmt (loop->header);
1587       if (stmt)
1588         is_comparison_with_loop_invariant_p (stmt, loop,
1589                                              &loop_bound_var,
1590                                              &loop_bound_code,
1591                                              &loop_bound_step,
1592                                              &loop_iv_base);
1593
1594       bbs = get_loop_body (loop);
1595
1596       for (j = 0; j < loop->num_nodes; j++)
1597         {
1598           int header_found = 0;
1599           edge e;
1600           edge_iterator ei;
1601
1602           bb = bbs[j];
1603
1604           /* Bypass loop heuristics on continue statement.  These
1605              statements construct loops via "non-loop" constructs
1606              in the source language and are better to be handled
1607              separately.  */
1608           if (predicted_by_p (bb, PRED_CONTINUE))
1609             continue;
1610
1611           /* Loop branch heuristics - predict an edge back to a
1612              loop's head as taken.  */
1613           if (bb == loop->latch)
1614             {
1615               e = find_edge (loop->latch, loop->header);
1616               if (e)
1617                 {
1618                   header_found = 1;
1619                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1620                 }
1621             }
1622
1623           /* Loop exit heuristics - predict an edge exiting the loop if the
1624              conditional has no loop header successors as not taken.  */
1625           if (!header_found
1626               /* If we already used more reliable loop exit predictors, do not
1627                  bother with PRED_LOOP_EXIT.  */
1628               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1629               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1630             {
1631               /* For loop with many exits we don't want to predict all exits
1632                  with the pretty large probability, because if all exits are
1633                  considered in row, the loop would be predicted to iterate
1634                  almost never.  The code to divide probability by number of
1635                  exits is very rough.  It should compute the number of exits
1636                  taken in each patch through function (not the overall number
1637                  of exits that might be a lot higher for loops with wide switch
1638                  statements in them) and compute n-th square root.
1639
1640                  We limit the minimal probability by 2% to avoid
1641                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1642                  as this was causing regression in perl benchmark containing such
1643                  a wide loop.  */
1644
1645               int probability = ((REG_BR_PROB_BASE
1646                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1647                                  / n_exits);
1648               if (probability < HITRATE (2))
1649                 probability = HITRATE (2);
1650               FOR_EACH_EDGE (e, ei, bb->succs)
1651                 if (e->dest->index < NUM_FIXED_BLOCKS
1652                     || !flow_bb_inside_loop_p (loop, e->dest))
1653                   predict_edge (e, PRED_LOOP_EXIT, probability);
1654             }
1655           if (loop_bound_var)
1656             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1657                                    loop_bound_code,
1658                                    tree_to_shwi (loop_bound_step));
1659         }
1660
1661       /* Free basic blocks from get_loop_body.  */
1662       free (bbs);
1663     }
1664 }
1665
1666 /* Attempt to predict probabilities of BB outgoing edges using local
1667    properties.  */
1668 static void
1669 bb_estimate_probability_locally (basic_block bb)
1670 {
1671   rtx last_insn = BB_END (bb);
1672   rtx cond;
1673
1674   if (! can_predict_insn_p (last_insn))
1675     return;
1676   cond = get_condition (last_insn, NULL, false, false);
1677   if (! cond)
1678     return;
1679
1680   /* Try "pointer heuristic."
1681      A comparison ptr == 0 is predicted as false.
1682      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1683   if (COMPARISON_P (cond)
1684       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1685           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1686     {
1687       if (GET_CODE (cond) == EQ)
1688         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1689       else if (GET_CODE (cond) == NE)
1690         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1691     }
1692   else
1693
1694   /* Try "opcode heuristic."
1695      EQ tests are usually false and NE tests are usually true. Also,
1696      most quantities are positive, so we can make the appropriate guesses
1697      about signed comparisons against zero.  */
1698     switch (GET_CODE (cond))
1699       {
1700       case CONST_INT:
1701         /* Unconditional branch.  */
1702         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1703                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1704         break;
1705
1706       case EQ:
1707       case UNEQ:
1708         /* Floating point comparisons appears to behave in a very
1709            unpredictable way because of special role of = tests in
1710            FP code.  */
1711         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1712           ;
1713         /* Comparisons with 0 are often used for booleans and there is
1714            nothing useful to predict about them.  */
1715         else if (XEXP (cond, 1) == const0_rtx
1716                  || XEXP (cond, 0) == const0_rtx)
1717           ;
1718         else
1719           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1720         break;
1721
1722       case NE:
1723       case LTGT:
1724         /* Floating point comparisons appears to behave in a very
1725            unpredictable way because of special role of = tests in
1726            FP code.  */
1727         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1728           ;
1729         /* Comparisons with 0 are often used for booleans and there is
1730            nothing useful to predict about them.  */
1731         else if (XEXP (cond, 1) == const0_rtx
1732                  || XEXP (cond, 0) == const0_rtx)
1733           ;
1734         else
1735           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1736         break;
1737
1738       case ORDERED:
1739         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1740         break;
1741
1742       case UNORDERED:
1743         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1744         break;
1745
1746       case LE:
1747       case LT:
1748         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1749             || XEXP (cond, 1) == constm1_rtx)
1750           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1751         break;
1752
1753       case GE:
1754       case GT:
1755         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1756             || XEXP (cond, 1) == constm1_rtx)
1757           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1758         break;
1759
1760       default:
1761         break;
1762       }
1763 }
1764
1765 /* Set edge->probability for each successor edge of BB.  */
1766 void
1767 guess_outgoing_edge_probabilities (basic_block bb)
1768 {
1769   bb_estimate_probability_locally (bb);
1770   combine_predictions_for_insn (BB_END (bb), bb);
1771 }
1772 \f
1773 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1774
1775 /* Helper function for expr_expected_value.  */
1776
1777 static tree
1778 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1779                        tree op1, bitmap visited, enum br_predictor *predictor)
1780 {
1781   gimple def;
1782
1783   if (predictor)
1784     *predictor = PRED_UNCONDITIONAL;
1785
1786   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1787     {
1788       if (TREE_CONSTANT (op0))
1789         return op0;
1790
1791       if (code != SSA_NAME)
1792         return NULL_TREE;
1793
1794       def = SSA_NAME_DEF_STMT (op0);
1795
1796       /* If we were already here, break the infinite cycle.  */
1797       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1798         return NULL;
1799
1800       if (gimple_code (def) == GIMPLE_PHI)
1801         {
1802           /* All the arguments of the PHI node must have the same constant
1803              length.  */
1804           int i, n = gimple_phi_num_args (def);
1805           tree val = NULL, new_val;
1806
1807           for (i = 0; i < n; i++)
1808             {
1809               tree arg = PHI_ARG_DEF (def, i);
1810               enum br_predictor predictor2;
1811
1812               /* If this PHI has itself as an argument, we cannot
1813                  determine the string length of this argument.  However,
1814                  if we can find an expected constant value for the other
1815                  PHI args then we can still be sure that this is
1816                  likely a constant.  So be optimistic and just
1817                  continue with the next argument.  */
1818               if (arg == PHI_RESULT (def))
1819                 continue;
1820
1821               new_val = expr_expected_value (arg, visited, &predictor2);
1822
1823               /* It is difficult to combine value predictors.  Simply assume
1824                  that later predictor is weaker and take its prediction.  */
1825               if (predictor && *predictor < predictor2)
1826                 *predictor = predictor2;
1827               if (!new_val)
1828                 return NULL;
1829               if (!val)
1830                 val = new_val;
1831               else if (!operand_equal_p (val, new_val, false))
1832                 return NULL;
1833             }
1834           return val;
1835         }
1836       if (is_gimple_assign (def))
1837         {
1838           if (gimple_assign_lhs (def) != op0)
1839             return NULL;
1840
1841           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1842                                         gimple_assign_rhs1 (def),
1843                                         gimple_assign_rhs_code (def),
1844                                         gimple_assign_rhs2 (def),
1845                                         visited, predictor);
1846         }
1847
1848       if (is_gimple_call (def))
1849         {
1850           tree decl = gimple_call_fndecl (def);
1851           if (!decl)
1852             {
1853               if (gimple_call_internal_p (def)
1854                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1855                 {
1856                   gcc_assert (gimple_call_num_args (def) == 3);
1857                   tree val = gimple_call_arg (def, 0);
1858                   if (TREE_CONSTANT (val))
1859                     return val;
1860                   if (predictor)
1861                     {
1862                       *predictor = PRED_BUILTIN_EXPECT;
1863                       tree val2 = gimple_call_arg (def, 2);
1864                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1865                                   && tree_fits_uhwi_p (val2)
1866                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1867                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1868                     }
1869                   return gimple_call_arg (def, 1);
1870                 }
1871               return NULL;
1872             }
1873           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1874             switch (DECL_FUNCTION_CODE (decl))
1875               {
1876               case BUILT_IN_EXPECT:
1877                 {
1878                   tree val;
1879                   if (gimple_call_num_args (def) != 2)
1880                     return NULL;
1881                   val = gimple_call_arg (def, 0);
1882                   if (TREE_CONSTANT (val))
1883                     return val;
1884                   if (predictor)
1885                     *predictor = PRED_BUILTIN_EXPECT;
1886                   return gimple_call_arg (def, 1);
1887                 }
1888
1889               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1890               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1891               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1892               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1893               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1894               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1895               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1896               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1897               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1898               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1899               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1900               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1901               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1902                 /* Assume that any given atomic operation has low contention,
1903                    and thus the compare-and-swap operation succeeds.  */
1904                 if (predictor)
1905                   *predictor = PRED_COMPARE_AND_SWAP;
1906                 return boolean_true_node;
1907             }
1908         }
1909
1910       return NULL;
1911     }
1912
1913   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1914     {
1915       tree res;
1916       enum br_predictor predictor2;
1917       op0 = expr_expected_value (op0, visited, predictor);
1918       if (!op0)
1919         return NULL;
1920       op1 = expr_expected_value (op1, visited, &predictor2);
1921       if (predictor && *predictor < predictor2)
1922         *predictor = predictor2;
1923       if (!op1)
1924         return NULL;
1925       res = fold_build2 (code, type, op0, op1);
1926       if (TREE_CONSTANT (res))
1927         return res;
1928       return NULL;
1929     }
1930   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1931     {
1932       tree res;
1933       op0 = expr_expected_value (op0, visited, predictor);
1934       if (!op0)
1935         return NULL;
1936       res = fold_build1 (code, type, op0);
1937       if (TREE_CONSTANT (res))
1938         return res;
1939       return NULL;
1940     }
1941   return NULL;
1942 }
1943
1944 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1945    The function is used by builtin_expect branch predictor so the evidence
1946    must come from this construct and additional possible constant folding.
1947
1948    We may want to implement more involved value guess (such as value range
1949    propagation based prediction), but such tricks shall go to new
1950    implementation.  */
1951
1952 static tree
1953 expr_expected_value (tree expr, bitmap visited,
1954                      enum br_predictor *predictor)
1955 {
1956   enum tree_code code;
1957   tree op0, op1;
1958
1959   if (TREE_CONSTANT (expr))
1960     {
1961       if (predictor)
1962         *predictor = PRED_UNCONDITIONAL;
1963       return expr;
1964     }
1965
1966   extract_ops_from_tree (expr, &code, &op0, &op1);
1967   return expr_expected_value_1 (TREE_TYPE (expr),
1968                                 op0, code, op1, visited, predictor);
1969 }
1970 \f
1971 /* Predict using opcode of the last statement in basic block.  */
1972 static void
1973 tree_predict_by_opcode (basic_block bb)
1974 {
1975   gimple stmt = last_stmt (bb);
1976   edge then_edge;
1977   tree op0, op1;
1978   tree type;
1979   tree val;
1980   enum tree_code cmp;
1981   bitmap visited;
1982   edge_iterator ei;
1983   enum br_predictor predictor;
1984
1985   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1986     return;
1987   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1988     if (then_edge->flags & EDGE_TRUE_VALUE)
1989       break;
1990   op0 = gimple_cond_lhs (stmt);
1991   op1 = gimple_cond_rhs (stmt);
1992   cmp = gimple_cond_code (stmt);
1993   type = TREE_TYPE (op0);
1994   visited = BITMAP_ALLOC (NULL);
1995   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1996                                &predictor);
1997   BITMAP_FREE (visited);
1998   if (val && TREE_CODE (val) == INTEGER_CST)
1999     {
2000       if (predictor == PRED_BUILTIN_EXPECT)
2001         {
2002           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2003
2004           gcc_assert (percent >= 0 && percent <= 100);
2005           if (integer_zerop (val))
2006             percent = 100 - percent;
2007           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2008         }
2009       else
2010         predict_edge (then_edge, predictor,
2011                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
2012     }
2013   /* Try "pointer heuristic."
2014      A comparison ptr == 0 is predicted as false.
2015      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2016   if (POINTER_TYPE_P (type))
2017     {
2018       if (cmp == EQ_EXPR)
2019         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2020       else if (cmp == NE_EXPR)
2021         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2022     }
2023   else
2024
2025   /* Try "opcode heuristic."
2026      EQ tests are usually false and NE tests are usually true. Also,
2027      most quantities are positive, so we can make the appropriate guesses
2028      about signed comparisons against zero.  */
2029     switch (cmp)
2030       {
2031       case EQ_EXPR:
2032       case UNEQ_EXPR:
2033         /* Floating point comparisons appears to behave in a very
2034            unpredictable way because of special role of = tests in
2035            FP code.  */
2036         if (FLOAT_TYPE_P (type))
2037           ;
2038         /* Comparisons with 0 are often used for booleans and there is
2039            nothing useful to predict about them.  */
2040         else if (integer_zerop (op0) || integer_zerop (op1))
2041           ;
2042         else
2043           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2044         break;
2045
2046       case NE_EXPR:
2047       case LTGT_EXPR:
2048         /* Floating point comparisons appears to behave in a very
2049            unpredictable way because of special role of = tests in
2050            FP code.  */
2051         if (FLOAT_TYPE_P (type))
2052           ;
2053         /* Comparisons with 0 are often used for booleans and there is
2054            nothing useful to predict about them.  */
2055         else if (integer_zerop (op0)
2056                  || integer_zerop (op1))
2057           ;
2058         else
2059           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2060         break;
2061
2062       case ORDERED_EXPR:
2063         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2064         break;
2065
2066       case UNORDERED_EXPR:
2067         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2068         break;
2069
2070       case LE_EXPR:
2071       case LT_EXPR:
2072         if (integer_zerop (op1)
2073             || integer_onep (op1)
2074             || integer_all_onesp (op1)
2075             || real_zerop (op1)
2076             || real_onep (op1)
2077             || real_minus_onep (op1))
2078           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2079         break;
2080
2081       case GE_EXPR:
2082       case GT_EXPR:
2083         if (integer_zerop (op1)
2084             || integer_onep (op1)
2085             || integer_all_onesp (op1)
2086             || real_zerop (op1)
2087             || real_onep (op1)
2088             || real_minus_onep (op1))
2089           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2090         break;
2091
2092       default:
2093         break;
2094       }
2095 }
2096
2097 /* Try to guess whether the value of return means error code.  */
2098
2099 static enum br_predictor
2100 return_prediction (tree val, enum prediction *prediction)
2101 {
2102   /* VOID.  */
2103   if (!val)
2104     return PRED_NO_PREDICTION;
2105   /* Different heuristics for pointers and scalars.  */
2106   if (POINTER_TYPE_P (TREE_TYPE (val)))
2107     {
2108       /* NULL is usually not returned.  */
2109       if (integer_zerop (val))
2110         {
2111           *prediction = NOT_TAKEN;
2112           return PRED_NULL_RETURN;
2113         }
2114     }
2115   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2116     {
2117       /* Negative return values are often used to indicate
2118          errors.  */
2119       if (TREE_CODE (val) == INTEGER_CST
2120           && tree_int_cst_sgn (val) < 0)
2121         {
2122           *prediction = NOT_TAKEN;
2123           return PRED_NEGATIVE_RETURN;
2124         }
2125       /* Constant return values seems to be commonly taken.
2126          Zero/one often represent booleans so exclude them from the
2127          heuristics.  */
2128       if (TREE_CONSTANT (val)
2129           && (!integer_zerop (val) && !integer_onep (val)))
2130         {
2131           *prediction = TAKEN;
2132           return PRED_CONST_RETURN;
2133         }
2134     }
2135   return PRED_NO_PREDICTION;
2136 }
2137
2138 /* Find the basic block with return expression and look up for possible
2139    return value trying to apply RETURN_PREDICTION heuristics.  */
2140 static void
2141 apply_return_prediction (void)
2142 {
2143   gimple return_stmt = NULL;
2144   tree return_val;
2145   edge e;
2146   gimple phi;
2147   int phi_num_args, i;
2148   enum br_predictor pred;
2149   enum prediction direction;
2150   edge_iterator ei;
2151
2152   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2153     {
2154       return_stmt = last_stmt (e->src);
2155       if (return_stmt
2156           && gimple_code (return_stmt) == GIMPLE_RETURN)
2157         break;
2158     }
2159   if (!e)
2160     return;
2161   return_val = gimple_return_retval (return_stmt);
2162   if (!return_val)
2163     return;
2164   if (TREE_CODE (return_val) != SSA_NAME
2165       || !SSA_NAME_DEF_STMT (return_val)
2166       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2167     return;
2168   phi = SSA_NAME_DEF_STMT (return_val);
2169   phi_num_args = gimple_phi_num_args (phi);
2170   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2171
2172   /* Avoid the degenerate case where all return values form the function
2173      belongs to same category (ie they are all positive constants)
2174      so we can hardly say something about them.  */
2175   for (i = 1; i < phi_num_args; i++)
2176     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2177       break;
2178   if (i != phi_num_args)
2179     for (i = 0; i < phi_num_args; i++)
2180       {
2181         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2182         if (pred != PRED_NO_PREDICTION)
2183           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2184                                          direction);
2185       }
2186 }
2187
2188 /* Look for basic block that contains unlikely to happen events
2189    (such as noreturn calls) and mark all paths leading to execution
2190    of this basic blocks as unlikely.  */
2191
2192 static void
2193 tree_bb_level_predictions (void)
2194 {
2195   basic_block bb;
2196   bool has_return_edges = false;
2197   edge e;
2198   edge_iterator ei;
2199
2200   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2201     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2202       {
2203         has_return_edges = true;
2204         break;
2205       }
2206
2207   apply_return_prediction ();
2208
2209   FOR_EACH_BB_FN (bb, cfun)
2210     {
2211       gimple_stmt_iterator gsi;
2212
2213       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2214         {
2215           gimple stmt = gsi_stmt (gsi);
2216           tree decl;
2217
2218           if (is_gimple_call (stmt))
2219             {
2220               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2221                   && has_return_edges)
2222                 predict_paths_leading_to (bb, PRED_NORETURN,
2223                                           NOT_TAKEN);
2224               decl = gimple_call_fndecl (stmt);
2225               if (decl
2226                   && lookup_attribute ("cold",
2227                                        DECL_ATTRIBUTES (decl)))
2228                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2229                                           NOT_TAKEN);
2230             }
2231           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2232             {
2233               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2234                                         gimple_predict_outcome (stmt));
2235               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2236                  hints to callers.  */
2237             }
2238         }
2239     }
2240 }
2241
2242 #ifdef ENABLE_CHECKING
2243
2244 /* Callback for hash_map::traverse, asserts that the pointer map is
2245    empty.  */
2246
2247 bool
2248 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2249                  void *)
2250 {
2251   gcc_assert (!value);
2252   return false;
2253 }
2254 #endif
2255
2256 /* Predict branch probabilities and estimate profile for basic block BB.  */
2257
2258 static void
2259 tree_estimate_probability_bb (basic_block bb)
2260 {
2261   edge e;
2262   edge_iterator ei;
2263   gimple last;
2264
2265   FOR_EACH_EDGE (e, ei, bb->succs)
2266     {
2267       /* Predict edges to user labels with attributes.  */
2268       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2269         {
2270           gimple_stmt_iterator gi;
2271           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2272             {
2273               gimple stmt = gsi_stmt (gi);
2274               tree decl;
2275
2276               if (gimple_code (stmt) != GIMPLE_LABEL)
2277                 break;
2278               decl = gimple_label_label (stmt);
2279               if (DECL_ARTIFICIAL (decl))
2280                 continue;
2281
2282               /* Finally, we have a user-defined label.  */
2283               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2284                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2285               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2286                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2287             }
2288         }
2289
2290       /* Predict early returns to be probable, as we've already taken
2291          care for error returns and other cases are often used for
2292          fast paths through function.
2293
2294          Since we've already removed the return statements, we are
2295          looking for CFG like:
2296
2297          if (conditional)
2298          {
2299          ..
2300          goto return_block
2301          }
2302          some other blocks
2303          return_block:
2304          return_stmt.  */
2305       if (e->dest != bb->next_bb
2306           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2307           && single_succ_p (e->dest)
2308           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2309           && (last = last_stmt (e->dest)) != NULL
2310           && gimple_code (last) == GIMPLE_RETURN)
2311         {
2312           edge e1;
2313           edge_iterator ei1;
2314
2315           if (single_succ_p (bb))
2316             {
2317               FOR_EACH_EDGE (e1, ei1, bb->preds)
2318                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2319                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2320                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2321                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2322             }
2323           else
2324             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2325                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2326                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2327               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2328         }
2329
2330       /* Look for block we are guarding (ie we dominate it,
2331          but it doesn't postdominate us).  */
2332       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2333           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2334           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2335         {
2336           gimple_stmt_iterator bi;
2337
2338           /* The call heuristic claims that a guarded function call
2339              is improbable.  This is because such calls are often used
2340              to signal exceptional situations such as printing error
2341              messages.  */
2342           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2343                gsi_next (&bi))
2344             {
2345               gimple stmt = gsi_stmt (bi);
2346               if (is_gimple_call (stmt)
2347                   /* Constant and pure calls are hardly used to signalize
2348                      something exceptional.  */
2349                   && gimple_has_side_effects (stmt))
2350                 {
2351                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2352                   break;
2353                 }
2354             }
2355         }
2356     }
2357   tree_predict_by_opcode (bb);
2358 }
2359
2360 /* Predict branch probabilities and estimate profile of the tree CFG.
2361    This function can be called from the loop optimizers to recompute
2362    the profile information.  */
2363
2364 void
2365 tree_estimate_probability (void)
2366 {
2367   basic_block bb;
2368
2369   add_noreturn_fake_exit_edges ();
2370   connect_infinite_loops_to_exit ();
2371   /* We use loop_niter_by_eval, which requires that the loops have
2372      preheaders.  */
2373   create_preheaders (CP_SIMPLE_PREHEADERS);
2374   calculate_dominance_info (CDI_POST_DOMINATORS);
2375
2376   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2377   tree_bb_level_predictions ();
2378   record_loop_exits ();
2379
2380   if (number_of_loops (cfun) > 1)
2381     predict_loops ();
2382
2383   FOR_EACH_BB_FN (bb, cfun)
2384     tree_estimate_probability_bb (bb);
2385
2386   FOR_EACH_BB_FN (bb, cfun)
2387     combine_predictions_for_bb (bb);
2388
2389 #ifdef ENABLE_CHECKING
2390   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2391 #endif
2392   delete bb_predictions;
2393   bb_predictions = NULL;
2394
2395   estimate_bb_frequencies (false);
2396   free_dominance_info (CDI_POST_DOMINATORS);
2397   remove_fake_exit_edges ();
2398 }
2399 \f
2400 /* Predict edges to successors of CUR whose sources are not postdominated by
2401    BB by PRED and recurse to all postdominators.  */
2402
2403 static void
2404 predict_paths_for_bb (basic_block cur, basic_block bb,
2405                       enum br_predictor pred,
2406                       enum prediction taken,
2407                       bitmap visited)
2408 {
2409   edge e;
2410   edge_iterator ei;
2411   basic_block son;
2412
2413   /* We are looking for all edges forming edge cut induced by
2414      set of all blocks postdominated by BB.  */
2415   FOR_EACH_EDGE (e, ei, cur->preds)
2416     if (e->src->index >= NUM_FIXED_BLOCKS
2417         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2418     {
2419       edge e2;
2420       edge_iterator ei2;
2421       bool found = false;
2422
2423       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2424       if (e->flags & (EDGE_EH | EDGE_FAKE))
2425         continue;
2426       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2427
2428       /* See if there is an edge from e->src that is not abnormal
2429          and does not lead to BB.  */
2430       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2431         if (e2 != e
2432             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2433             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2434           {
2435             found = true;
2436             break;
2437           }
2438
2439       /* If there is non-abnormal path leaving e->src, predict edge
2440          using predictor.  Otherwise we need to look for paths
2441          leading to e->src.
2442
2443          The second may lead to infinite loop in the case we are predicitng
2444          regions that are only reachable by abnormal edges.  We simply
2445          prevent visiting given BB twice.  */
2446       if (found)
2447         predict_edge_def (e, pred, taken);
2448       else if (bitmap_set_bit (visited, e->src->index))
2449         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2450     }
2451   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2452        son;
2453        son = next_dom_son (CDI_POST_DOMINATORS, son))
2454     predict_paths_for_bb (son, bb, pred, taken, visited);
2455 }
2456
2457 /* Sets branch probabilities according to PREDiction and
2458    FLAGS.  */
2459
2460 static void
2461 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2462                           enum prediction taken)
2463 {
2464   bitmap visited = BITMAP_ALLOC (NULL);
2465   predict_paths_for_bb (bb, bb, pred, taken, visited);
2466   BITMAP_FREE (visited);
2467 }
2468
2469 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2470
2471 static void
2472 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2473                                enum prediction taken)
2474 {
2475   bool has_nonloop_edge = false;
2476   edge_iterator ei;
2477   edge e2;
2478
2479   basic_block bb = e->src;
2480   FOR_EACH_EDGE (e2, ei, bb->succs)
2481     if (e2->dest != e->src && e2->dest != e->dest
2482         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2483         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2484       {
2485         has_nonloop_edge = true;
2486         break;
2487       }
2488   if (!has_nonloop_edge)
2489     {
2490       bitmap visited = BITMAP_ALLOC (NULL);
2491       predict_paths_for_bb (bb, bb, pred, taken, visited);
2492       BITMAP_FREE (visited);
2493     }
2494   else
2495     predict_edge_def (e, pred, taken);
2496 }
2497 \f
2498 /* This is used to carry information about basic blocks.  It is
2499    attached to the AUX field of the standard CFG block.  */
2500
2501 typedef struct block_info_def
2502 {
2503   /* Estimated frequency of execution of basic_block.  */
2504   sreal frequency;
2505
2506   /* To keep queue of basic blocks to process.  */
2507   basic_block next;
2508
2509   /* Number of predecessors we need to visit first.  */
2510   int npredecessors;
2511 } *block_info;
2512
2513 /* Similar information for edges.  */
2514 typedef struct edge_info_def
2515 {
2516   /* In case edge is a loopback edge, the probability edge will be reached
2517      in case header is.  Estimated number of iterations of the loop can be
2518      then computed as 1 / (1 - back_edge_prob).  */
2519   sreal back_edge_prob;
2520   /* True if the edge is a loopback edge in the natural loop.  */
2521   unsigned int back_edge:1;
2522 } *edge_info;
2523
2524 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2525 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2526
2527 /* Helper function for estimate_bb_frequencies.
2528    Propagate the frequencies in blocks marked in
2529    TOVISIT, starting in HEAD.  */
2530
2531 static void
2532 propagate_freq (basic_block head, bitmap tovisit)
2533 {
2534   basic_block bb;
2535   basic_block last;
2536   unsigned i;
2537   edge e;
2538   basic_block nextbb;
2539   bitmap_iterator bi;
2540
2541   /* For each basic block we need to visit count number of his predecessors
2542      we need to visit first.  */
2543   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2544     {
2545       edge_iterator ei;
2546       int count = 0;
2547
2548       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2549
2550       FOR_EACH_EDGE (e, ei, bb->preds)
2551         {
2552           bool visit = bitmap_bit_p (tovisit, e->src->index);
2553
2554           if (visit && !(e->flags & EDGE_DFS_BACK))
2555             count++;
2556           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2557             fprintf (dump_file,
2558                      "Irreducible region hit, ignoring edge to %i->%i\n",
2559                      e->src->index, bb->index);
2560         }
2561       BLOCK_INFO (bb)->npredecessors = count;
2562       /* When function never returns, we will never process exit block.  */
2563       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2564         bb->count = bb->frequency = 0;
2565     }
2566
2567   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2568   last = head;
2569   for (bb = head; bb; bb = nextbb)
2570     {
2571       edge_iterator ei;
2572       sreal cyclic_probability, frequency;
2573
2574       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2575       memcpy (&frequency, &real_zero, sizeof (real_zero));
2576
2577       nextbb = BLOCK_INFO (bb)->next;
2578       BLOCK_INFO (bb)->next = NULL;
2579
2580       /* Compute frequency of basic block.  */
2581       if (bb != head)
2582         {
2583 #ifdef ENABLE_CHECKING
2584           FOR_EACH_EDGE (e, ei, bb->preds)
2585             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2586                         || (e->flags & EDGE_DFS_BACK));
2587 #endif
2588
2589           FOR_EACH_EDGE (e, ei, bb->preds)
2590             if (EDGE_INFO (e)->back_edge)
2591               {
2592                 sreal_add (&cyclic_probability, &cyclic_probability,
2593                            &EDGE_INFO (e)->back_edge_prob);
2594               }
2595             else if (!(e->flags & EDGE_DFS_BACK))
2596               {
2597                 sreal tmp;
2598
2599                 /*  frequency += (e->probability
2600                                   * BLOCK_INFO (e->src)->frequency /
2601                                   REG_BR_PROB_BASE);  */
2602
2603                 sreal_init (&tmp, e->probability, 0);
2604                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2605                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2606                 sreal_add (&frequency, &frequency, &tmp);
2607               }
2608
2609           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2610             {
2611               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2612                       sizeof (frequency));
2613             }
2614           else
2615             {
2616               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2617                 {
2618                   memcpy (&cyclic_probability, &real_almost_one,
2619                           sizeof (real_almost_one));
2620                 }
2621
2622               /* BLOCK_INFO (bb)->frequency = frequency
2623                                               / (1 - cyclic_probability) */
2624
2625               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2626               sreal_div (&BLOCK_INFO (bb)->frequency,
2627                          &frequency, &cyclic_probability);
2628             }
2629         }
2630
2631       bitmap_clear_bit (tovisit, bb->index);
2632
2633       e = find_edge (bb, head);
2634       if (e)
2635         {
2636           sreal tmp;
2637
2638           /* EDGE_INFO (e)->back_edge_prob
2639              = ((e->probability * BLOCK_INFO (bb)->frequency)
2640              / REG_BR_PROB_BASE); */
2641
2642           sreal_init (&tmp, e->probability, 0);
2643           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2644           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2645                      &tmp, &real_inv_br_prob_base);
2646         }
2647
2648       /* Propagate to successor blocks.  */
2649       FOR_EACH_EDGE (e, ei, bb->succs)
2650         if (!(e->flags & EDGE_DFS_BACK)
2651             && BLOCK_INFO (e->dest)->npredecessors)
2652           {
2653             BLOCK_INFO (e->dest)->npredecessors--;
2654             if (!BLOCK_INFO (e->dest)->npredecessors)
2655               {
2656                 if (!nextbb)
2657                   nextbb = e->dest;
2658                 else
2659                   BLOCK_INFO (last)->next = e->dest;
2660
2661                 last = e->dest;
2662               }
2663           }
2664     }
2665 }
2666
2667 /* Estimate frequencies in loops at same nest level.  */
2668
2669 static void
2670 estimate_loops_at_level (struct loop *first_loop)
2671 {
2672   struct loop *loop;
2673
2674   for (loop = first_loop; loop; loop = loop->next)
2675     {
2676       edge e;
2677       basic_block *bbs;
2678       unsigned i;
2679       bitmap tovisit = BITMAP_ALLOC (NULL);
2680
2681       estimate_loops_at_level (loop->inner);
2682
2683       /* Find current loop back edge and mark it.  */
2684       e = loop_latch_edge (loop);
2685       EDGE_INFO (e)->back_edge = 1;
2686
2687       bbs = get_loop_body (loop);
2688       for (i = 0; i < loop->num_nodes; i++)
2689         bitmap_set_bit (tovisit, bbs[i]->index);
2690       free (bbs);
2691       propagate_freq (loop->header, tovisit);
2692       BITMAP_FREE (tovisit);
2693     }
2694 }
2695
2696 /* Propagates frequencies through structure of loops.  */
2697
2698 static void
2699 estimate_loops (void)
2700 {
2701   bitmap tovisit = BITMAP_ALLOC (NULL);
2702   basic_block bb;
2703
2704   /* Start by estimating the frequencies in the loops.  */
2705   if (number_of_loops (cfun) > 1)
2706     estimate_loops_at_level (current_loops->tree_root->inner);
2707
2708   /* Now propagate the frequencies through all the blocks.  */
2709   FOR_ALL_BB_FN (bb, cfun)
2710     {
2711       bitmap_set_bit (tovisit, bb->index);
2712     }
2713   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2714   BITMAP_FREE (tovisit);
2715 }
2716
2717 /* Drop the profile for NODE to guessed, and update its frequency based on
2718    whether it is expected to be hot given the CALL_COUNT.  */
2719
2720 static void
2721 drop_profile (struct cgraph_node *node, gcov_type call_count)
2722 {
2723   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2724   /* In the case where this was called by another function with a
2725      dropped profile, call_count will be 0. Since there are no
2726      non-zero call counts to this function, we don't know for sure
2727      whether it is hot, and therefore it will be marked normal below.  */
2728   bool hot = maybe_hot_count_p (NULL, call_count);
2729
2730   if (dump_file)
2731     fprintf (dump_file,
2732              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2733              node->name (), node->order,
2734              hot ? "Function is hot" : "Function is normal");
2735   /* We only expect to miss profiles for functions that are reached
2736      via non-zero call edges in cases where the function may have
2737      been linked from another module or library (COMDATs and extern
2738      templates). See the comments below for handle_missing_profiles.
2739      Also, only warn in cases where the missing counts exceed the
2740      number of training runs. In certain cases with an execv followed
2741      by a no-return call the profile for the no-return call is not
2742      dumped and there can be a mismatch.  */
2743   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2744       && call_count > profile_info->runs)
2745     {
2746       if (flag_profile_correction)
2747         {
2748           if (dump_file)
2749             fprintf (dump_file,
2750                      "Missing counts for called function %s/%i\n",
2751                      node->name (), node->order);
2752         }
2753       else
2754         warning (0, "Missing counts for called function %s/%i",
2755                  node->name (), node->order);
2756     }
2757
2758   profile_status_for_fn (fn)
2759       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2760   node->frequency
2761       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2762 }
2763
2764 /* In the case of COMDAT routines, multiple object files will contain the same
2765    function and the linker will select one for the binary. In that case
2766    all the other copies from the profile instrument binary will be missing
2767    profile counts. Look for cases where this happened, due to non-zero
2768    call counts going to 0-count functions, and drop the profile to guessed
2769    so that we can use the estimated probabilities and avoid optimizing only
2770    for size.
2771
2772    The other case where the profile may be missing is when the routine
2773    is not going to be emitted to the object file, e.g. for "extern template"
2774    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2775    all other cases of non-zero calls to 0-count functions.  */
2776
2777 void
2778 handle_missing_profiles (void)
2779 {
2780   struct cgraph_node *node;
2781   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2782   vec<struct cgraph_node *> worklist;
2783   worklist.create (64);
2784
2785   /* See if 0 count function has non-0 count callers.  In this case we
2786      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2787   FOR_EACH_DEFINED_FUNCTION (node)
2788     {
2789       struct cgraph_edge *e;
2790       gcov_type call_count = 0;
2791       gcov_type max_tp_first_run = 0;
2792       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2793
2794       if (node->count)
2795         continue;
2796       for (e = node->callers; e; e = e->next_caller)
2797       {
2798         call_count += e->count;
2799
2800         if (e->caller->tp_first_run > max_tp_first_run)
2801           max_tp_first_run = e->caller->tp_first_run;
2802       }
2803
2804       /* If time profile is missing, let assign the maximum that comes from
2805          caller functions.  */
2806       if (!node->tp_first_run && max_tp_first_run)
2807         node->tp_first_run = max_tp_first_run + 1;
2808
2809       if (call_count
2810           && fn && fn->cfg
2811           && (call_count * unlikely_count_fraction >= profile_info->runs))
2812         {
2813           drop_profile (node, call_count);
2814           worklist.safe_push (node);
2815         }
2816     }
2817
2818   /* Propagate the profile dropping to other 0-count COMDATs that are
2819      potentially called by COMDATs we already dropped the profile on.  */
2820   while (worklist.length () > 0)
2821     {
2822       struct cgraph_edge *e;
2823
2824       node = worklist.pop ();
2825       for (e = node->callees; e; e = e->next_caller)
2826         {
2827           struct cgraph_node *callee = e->callee;
2828           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2829
2830           if (callee->count > 0)
2831             continue;
2832           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2833               && profile_status_for_fn (fn) == PROFILE_READ)
2834             {
2835               drop_profile (node, 0);
2836               worklist.safe_push (callee);
2837             }
2838         }
2839     }
2840   worklist.release ();
2841 }
2842
2843 /* Convert counts measured by profile driven feedback to frequencies.
2844    Return nonzero iff there was any nonzero execution count.  */
2845
2846 int
2847 counts_to_freqs (void)
2848 {
2849   gcov_type count_max, true_count_max = 0;
2850   basic_block bb;
2851
2852   /* Don't overwrite the estimated frequencies when the profile for
2853      the function is missing.  We may drop this function PROFILE_GUESSED
2854      later in drop_profile ().  */
2855   if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2856     return 0;
2857
2858   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2859     true_count_max = MAX (bb->count, true_count_max);
2860
2861   count_max = MAX (true_count_max, 1);
2862   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2863     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2864
2865   return true_count_max;
2866 }
2867
2868 /* Return true if function is likely to be expensive, so there is no point to
2869    optimize performance of prologue, epilogue or do inlining at the expense
2870    of code size growth.  THRESHOLD is the limit of number of instructions
2871    function can execute at average to be still considered not expensive.  */
2872
2873 bool
2874 expensive_function_p (int threshold)
2875 {
2876   unsigned int sum = 0;
2877   basic_block bb;
2878   unsigned int limit;
2879
2880   /* We can not compute accurately for large thresholds due to scaled
2881      frequencies.  */
2882   gcc_assert (threshold <= BB_FREQ_MAX);
2883
2884   /* Frequencies are out of range.  This either means that function contains
2885      internal loop executing more than BB_FREQ_MAX times or profile feedback
2886      is available and function has not been executed at all.  */
2887   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2888     return true;
2889
2890   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2891   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2892   FOR_EACH_BB_FN (bb, cfun)
2893     {
2894       rtx insn;
2895
2896       FOR_BB_INSNS (bb, insn)
2897         if (active_insn_p (insn))
2898           {
2899             sum += bb->frequency;
2900             if (sum > limit)
2901               return true;
2902         }
2903     }
2904
2905   return false;
2906 }
2907
2908 /* Estimate and propagate basic block frequencies using the given branch
2909    probabilities.  If FORCE is true, the frequencies are used to estimate
2910    the counts even when there are already non-zero profile counts.  */
2911
2912 void
2913 estimate_bb_frequencies (bool force)
2914 {
2915   basic_block bb;
2916   sreal freq_max;
2917
2918   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2919     {
2920       static int real_values_initialized = 0;
2921
2922       if (!real_values_initialized)
2923         {
2924           real_values_initialized = 1;
2925           sreal_init (&real_zero, 0, 0);
2926           sreal_init (&real_one, 1, 0);
2927           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2928           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2929           sreal_init (&real_one_half, 1, -1);
2930           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2931           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2932         }
2933
2934       mark_dfs_back_edges ();
2935
2936       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2937          REG_BR_PROB_BASE;
2938
2939       /* Set up block info for each basic block.  */
2940       alloc_aux_for_blocks (sizeof (struct block_info_def));
2941       alloc_aux_for_edges (sizeof (struct edge_info_def));
2942       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2943         {
2944           edge e;
2945           edge_iterator ei;
2946
2947           FOR_EACH_EDGE (e, ei, bb->succs)
2948             {
2949               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2950               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2951                          &EDGE_INFO (e)->back_edge_prob,
2952                          &real_inv_br_prob_base);
2953             }
2954         }
2955
2956       /* First compute frequencies locally for each loop from innermost
2957          to outermost to examine frequencies for back edges.  */
2958       estimate_loops ();
2959
2960       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2961       FOR_EACH_BB_FN (bb, cfun)
2962         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2963           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2964
2965       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2966       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2967         {
2968           sreal tmp;
2969
2970           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2971           sreal_add (&tmp, &tmp, &real_one_half);
2972           bb->frequency = sreal_to_int (&tmp);
2973         }
2974
2975       free_aux_for_blocks ();
2976       free_aux_for_edges ();
2977     }
2978   compute_function_frequency ();
2979 }
2980
2981 /* Decide whether function is hot, cold or unlikely executed.  */
2982 void
2983 compute_function_frequency (void)
2984 {
2985   basic_block bb;
2986   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2987
2988   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2989       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2990     node->only_called_at_startup = true;
2991   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2992     node->only_called_at_exit = true;
2993
2994   if (profile_status_for_fn (cfun) != PROFILE_READ)
2995     {
2996       int flags = flags_from_decl_or_type (current_function_decl);
2997       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2998           != NULL)
2999         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3000       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3001                != NULL)
3002         node->frequency = NODE_FREQUENCY_HOT;
3003       else if (flags & ECF_NORETURN)
3004         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3005       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3006         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3007       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3008                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3009         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3010       return;
3011     }
3012
3013   /* Only first time try to drop function into unlikely executed.
3014      After inlining the roundoff errors may confuse us.
3015      Ipa-profile pass will drop functions only called from unlikely
3016      functions to unlikely and that is most of what we care about.  */
3017   if (!cfun->after_inlining)
3018     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3019   FOR_EACH_BB_FN (bb, cfun)
3020     {
3021       if (maybe_hot_bb_p (cfun, bb))
3022         {
3023           node->frequency = NODE_FREQUENCY_HOT;
3024           return;
3025         }
3026       if (!probably_never_executed_bb_p (cfun, bb))
3027         node->frequency = NODE_FREQUENCY_NORMAL;
3028     }
3029 }
3030
3031 /* Build PREDICT_EXPR.  */
3032 tree
3033 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3034 {
3035   tree t = build1 (PREDICT_EXPR, void_type_node,
3036                    build_int_cst (integer_type_node, predictor));
3037   SET_PREDICT_EXPR_OUTCOME (t, taken);
3038   return t;
3039 }
3040
3041 const char *
3042 predictor_name (enum br_predictor predictor)
3043 {
3044   return predictor_info[predictor].name;
3045 }
3046
3047 /* Predict branch probabilities and estimate profile of the tree CFG. */
3048
3049 namespace {
3050
3051 const pass_data pass_data_profile =
3052 {
3053   GIMPLE_PASS, /* type */
3054   "profile_estimate", /* name */
3055   OPTGROUP_NONE, /* optinfo_flags */
3056   TV_BRANCH_PROB, /* tv_id */
3057   PROP_cfg, /* properties_required */
3058   0, /* properties_provided */
3059   0, /* properties_destroyed */
3060   0, /* todo_flags_start */
3061   0, /* todo_flags_finish */
3062 };
3063
3064 class pass_profile : public gimple_opt_pass
3065 {
3066 public:
3067   pass_profile (gcc::context *ctxt)
3068     : gimple_opt_pass (pass_data_profile, ctxt)
3069   {}
3070
3071   /* opt_pass methods: */
3072   virtual bool gate (function *) { return flag_guess_branch_prob; }
3073   virtual unsigned int execute (function *);
3074
3075 }; // class pass_profile
3076
3077 unsigned int
3078 pass_profile::execute (function *fun)
3079 {
3080   unsigned nb_loops;
3081
3082   loop_optimizer_init (LOOPS_NORMAL);
3083   if (dump_file && (dump_flags & TDF_DETAILS))
3084     flow_loops_dump (dump_file, NULL, 0);
3085
3086   mark_irreducible_loops ();
3087
3088   nb_loops = number_of_loops (fun);
3089   if (nb_loops > 1)
3090     scev_initialize ();
3091
3092   tree_estimate_probability ();
3093
3094   if (nb_loops > 1)
3095     scev_finalize ();
3096
3097   loop_optimizer_finalize ();
3098   if (dump_file && (dump_flags & TDF_DETAILS))
3099     gimple_dump_cfg (dump_file, dump_flags);
3100  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3101     profile_status_for_fn (fun) = PROFILE_GUESSED;
3102   return 0;
3103 }
3104
3105 } // anon namespace
3106
3107 gimple_opt_pass *
3108 make_pass_profile (gcc::context *ctxt)
3109 {
3110   return new pass_profile (ctxt);
3111 }
3112
3113 namespace {
3114
3115 const pass_data pass_data_strip_predict_hints =
3116 {
3117   GIMPLE_PASS, /* type */
3118   "*strip_predict_hints", /* name */
3119   OPTGROUP_NONE, /* optinfo_flags */
3120   TV_BRANCH_PROB, /* tv_id */
3121   PROP_cfg, /* properties_required */
3122   0, /* properties_provided */
3123   0, /* properties_destroyed */
3124   0, /* todo_flags_start */
3125   0, /* todo_flags_finish */
3126 };
3127
3128 class pass_strip_predict_hints : public gimple_opt_pass
3129 {
3130 public:
3131   pass_strip_predict_hints (gcc::context *ctxt)
3132     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3133   {}
3134
3135   /* opt_pass methods: */
3136   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3137   virtual unsigned int execute (function *);
3138
3139 }; // class pass_strip_predict_hints
3140
3141 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3142    we no longer need.  */
3143 unsigned int
3144 pass_strip_predict_hints::execute (function *fun)
3145 {
3146   basic_block bb;
3147   gimple ass_stmt;
3148   tree var;
3149
3150   FOR_EACH_BB_FN (bb, fun)
3151     {
3152       gimple_stmt_iterator bi;
3153       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3154         {
3155           gimple stmt = gsi_stmt (bi);
3156
3157           if (gimple_code (stmt) == GIMPLE_PREDICT)
3158             {
3159               gsi_remove (&bi, true);
3160               continue;
3161             }
3162           else if (is_gimple_call (stmt))
3163             {
3164               tree fndecl = gimple_call_fndecl (stmt);
3165
3166               if ((fndecl
3167                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3168                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3169                    && gimple_call_num_args (stmt) == 2)
3170                   || (gimple_call_internal_p (stmt)
3171                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3172                 {
3173                   var = gimple_call_lhs (stmt);
3174                   if (var)
3175                     {
3176                       ass_stmt
3177                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3178                       gsi_replace (&bi, ass_stmt, true);
3179                     }
3180                   else
3181                     {
3182                       gsi_remove (&bi, true);
3183                       continue;
3184                     }
3185                 }
3186             }
3187           gsi_next (&bi);
3188         }
3189     }
3190   return 0;
3191 }
3192
3193 } // anon namespace
3194
3195 gimple_opt_pass *
3196 make_pass_strip_predict_hints (gcc::context *ctxt)
3197 {
3198   return new pass_strip_predict_hints (ctxt);
3199 }
3200
3201 /* Rebuild function frequencies.  Passes are in general expected to
3202    maintain profile by hand, however in some cases this is not possible:
3203    for example when inlining several functions with loops freuqencies might run
3204    out of scale and thus needs to be recomputed.  */
3205
3206 void
3207 rebuild_frequencies (void)
3208 {
3209   timevar_push (TV_REBUILD_FREQUENCIES);
3210
3211   /* When the max bb count in the function is small, there is a higher
3212      chance that there were truncation errors in the integer scaling
3213      of counts by inlining and other optimizations. This could lead
3214      to incorrect classification of code as being cold when it isn't.
3215      In that case, force the estimation of bb counts/frequencies from the
3216      branch probabilities, rather than computing frequencies from counts,
3217      which may also lead to frequencies incorrectly reduced to 0. There
3218      is less precision in the probabilities, so we only do this for small
3219      max counts.  */
3220   gcov_type count_max = 0;
3221   basic_block bb;
3222   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3223     count_max = MAX (bb->count, count_max);
3224
3225   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3226       || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3227     {
3228       loop_optimizer_init (0);
3229       add_noreturn_fake_exit_edges ();
3230       mark_irreducible_loops ();
3231       connect_infinite_loops_to_exit ();
3232       estimate_bb_frequencies (true);
3233       remove_fake_exit_edges ();
3234       loop_optimizer_finalize ();
3235     }
3236   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3237     counts_to_freqs ();
3238   else
3239     gcc_unreachable ();
3240   timevar_pop (TV_REBUILD_FREQUENCIES);
3241 }