gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This (greedy) algorithm constructs traces in several rounds.
  21    The construction starts from "seeds".  The seed for the first round
  22    is the entry point of the function.  When there are more than one seed,
  23    the one with the lowest key in the heap is selected first (see bb_to_key).
  24    Then the algorithm repeatedly adds the most probable successor to the end
  25    of a trace.  Finally it connects the traces.
  26
  27    There are two parameters: Branch Threshold and Exec Threshold.
  28    If the probability of an edge to a successor of the current basic block is
  29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  30    then the successor will be the seed in one of the next rounds.
  31    Each round has these parameters lower than the previous one.
  32    The last round has to have these parameters set to zero so that the
  33    remaining blocks are picked up.
  34
  35    The algorithm selects the most probable successor from all unvisited
  36    successors and successors that have been added to this trace.
  37    The other successors (that has not been "sent" to the next round) will be
  38    other seeds for this round and the secondary traces will start from them.
  39    If the successor has not been visited in this trace, it is added to the
  40    trace (however, there is some heuristic for simple branches).
  41    If the successor has been visited in this trace, a loop has been found.
  42    If the loop has many iterations, the loop is rotated so that the source
  43    block of the most probable edge going out of the loop is the last block
  44    of the trace.
  45    If the loop has few iterations and there is no edge from the last block of
  46    the loop going out of the loop, the loop header is duplicated.
  47
  48    When connecting traces, the algorithm first checks whether there is an edge
  49    from the last block of a trace to the first block of another trace.
  50    When there are still some unconnected traces it checks whether there exists
  51    a basic block BB such that BB is a successor of the last block of a trace
  52    and BB is a predecessor of the first block of another trace.  In this case,
  53    BB is duplicated, added at the end of the first trace and the traces are
  54    connected through it.
  55    The rest of traces are simply connected so there will be a jump to the
  56    beginning of the rest of traces.
  57
  58    The above description is for the full algorithm, which is used when the
  59    function is optimized for speed.  When the function is optimized for size,
  60    in order to reduce long jumps and connect more fallthru edges, the
  61    algorithm is modified as follows:
  62    (1) Break long traces to short ones.  A trace is broken at a block that has
  63    multiple predecessors/ successors during trace discovery.  When connecting
  64    traces, only connect Trace n with Trace n + 1.  This change reduces most
  65    long jumps compared with the above algorithm.
  66    (2) Ignore the edge probability and frequency for fallthru edges.
  67    (3) Keep the original order of blocks when there is no chance to fall
  68    through.  We rely on the results of cfg_cleanup.
  69
  70    To implement the change for code size optimization, block's index is
  71    selected as the key and all traces are found in one round.
  72
  73    References:
  74
  75    "Software Trace Cache"
  76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  77    http://citeseer.nj.nec.com/15361.html
  78
  79 */
  80
  81 #include "config.h"
  82 #include "system.h"
  83 #include "coretypes.h"
  84 #include "tm.h"
  85 #include "tree.h"
  86 #include "rtl.h"
  87 #include "regs.h"
  88 #include "flags.h"
  89 #include "output.h"
  90 #include "fibheap.h"
  91 #include "target.h"
  92 #include "hashtab.h"
  93 #include "hash-set.h"
  94 #include "vec.h"
  95 #include "machmode.h"
  96 #include "hard-reg-set.h"
  97 #include "input.h"
  98 #include "function.h"
  99 #include "tm_p.h"
 100 #include "obstack.h"
 101 #include "expr.h"
 102 #include "params.h"
 103 #include "diagnostic-core.h"
 104 #include "toplev.h" /* user_defined_section_attribute */
 105 #include "tree-pass.h"
 106 #include "df.h"
 107 #include "bb-reorder.h"
 108 #include "cgraph.h"
 109 #include "except.h"
 110
 111 /* The number of rounds.  In most cases there will only be 4 rounds, but
 112    when partitioning hot and cold basic blocks into separate sections of
 113    the object file there will be an extra round.  */
 114 #define N_ROUNDS 5
 115
 116 /* Stubs in case we don't have a return insn.
 117    We have to check at run time too, not only compile time.  */
 118
 119 #ifndef HAVE_return
 120 #define HAVE_return 0
 121 #define gen_return() NULL_RTX
 122 #endif
 123
 124
 125 struct target_bb_reorder default_target_bb_reorder;
 126 #if SWITCHABLE_TARGET
 127 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 128 #endif
 129
 130 #define uncond_jump_length \
 131   (this_target_bb_reorder->x_uncond_jump_length)
 132
 133 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 134 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 135
 136 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 137 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 138
 139 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 140    block the edge destination is not duplicated while connecting traces.  */
 141 #define DUPLICATION_THRESHOLD 100
 142
 143 /* Structure to hold needed information for each basic block.  */
 144 typedef struct bbro_basic_block_data_def
 145 {
 146   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 147   int start_of_trace;
 148
 149   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 150   int end_of_trace;
 151
 152   /* Which trace is the bb in?  */
 153   int in_trace;
 154
 155   /* Which trace was this bb visited in?  */
 156   int visited;
 157
 158   /* Which heap is BB in (if any)?  */
 159   fibheap_t heap;
 160
 161   /* Which heap node is BB in (if any)?  */
 162   fibnode_t node;
 163 } bbro_basic_block_data;
 164
 165 /* The current size of the following dynamic array.  */
 166 static int array_size;
 167
 168 /* The array which holds needed information for basic blocks.  */
 169 static bbro_basic_block_data *bbd;
 170
 171 /* To avoid frequent reallocation the size of arrays is greater than needed,
 172    the number of elements is (not less than) 1.25 * size_wanted.  */
 173 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 174
 175 /* Free the memory and set the pointer to NULL.  */
 176 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 177
 178 /* Structure for holding information about a trace.  */
 179 struct trace
 180 {
 181   /* First and last basic block of the trace.  */
 182   basic_block first, last;
 183
 184   /* The round of the STC creation which this trace was found in.  */
 185   int round;
 186
 187   /* The length (i.e. the number of basic blocks) of the trace.  */
 188   int length;
 189 };
 190
 191 /* Maximum frequency and count of one of the entry blocks.  */
 192 static int max_entry_frequency;
 193 static gcov_type max_entry_count;
 194
 195 /* Local function prototypes.  */
 196 static void find_traces (int *, struct trace *);
 197 static basic_block rotate_loop (edge, struct trace *, int);
 198 static void mark_bb_visited (basic_block, int);
 199 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 200                                  int, fibheap_t *, int);
 201 static basic_block copy_bb (basic_block, edge, basic_block, int);
 202 static fibheapkey_t bb_to_key (basic_block);
 203 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
 204                            const_edge);
 205 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 206                                    struct trace *);
 207 static void connect_traces (int, struct trace *);
 208 static bool copy_bb_p (const_basic_block, int);
 209 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 210 \f
 211 /* Return the trace number in which BB was visited.  */
 212
 213 static int
 214 bb_visited_trace (const_basic_block bb)
 215 {
 216   gcc_assert (bb->index < array_size);
 217   return bbd[bb->index].visited;
 218 }
 219
 220 /* This function marks BB that it was visited in trace number TRACE.  */
 221
 222 static void
 223 mark_bb_visited (basic_block bb, int trace)
 224 {
 225   bbd[bb->index].visited = trace;
 226   if (bbd[bb->index].heap)
 227     {
 228       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
 229       bbd[bb->index].heap = NULL;
 230       bbd[bb->index].node = NULL;
 231     }
 232 }
 233
 234 /* Check to see if bb should be pushed into the next round of trace
 235    collections or not.  Reasons for pushing the block forward are 1).
 236    If the block is cold, we are doing partitioning, and there will be
 237    another round (cold partition blocks are not supposed to be
 238    collected into traces until the very last round); or 2). There will
 239    be another round, and the basic block is not "hot enough" for the
 240    current round of trace collection.  */
 241
 242 static bool
 243 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 244                       int exec_th, gcov_type count_th)
 245 {
 246   bool there_exists_another_round;
 247   bool block_not_hot_enough;
 248
 249   there_exists_another_round = round < number_of_rounds - 1;
 250
 251   block_not_hot_enough = (bb->frequency < exec_th
 252                           || bb->count < count_th
 253                           || probably_never_executed_bb_p (cfun, bb));
 254
 255   if (there_exists_another_round
 256       && block_not_hot_enough)
 257     return true;
 258   else
 259     return false;
 260 }
 261
 262 /* Find the traces for Software Trace Cache.  Chain each trace through
 263    RBI()->next.  Store the number of traces to N_TRACES and description of
 264    traces to TRACES.  */
 265
 266 static void
 267 find_traces (int *n_traces, struct trace *traces)
 268 {
 269   int i;
 270   int number_of_rounds;
 271   edge e;
 272   edge_iterator ei;
 273   fibheap_t heap;
 274
 275   /* Add one extra round of trace collection when partitioning hot/cold
 276      basic blocks into separate sections.  The last round is for all the
 277      cold blocks (and ONLY the cold blocks).  */
 278
 279   number_of_rounds = N_ROUNDS - 1;
 280
 281   /* Insert entry points of function into heap.  */
 282   heap = fibheap_new ();
 283   max_entry_frequency = 0;
 284   max_entry_count = 0;
 285   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 286     {
 287       bbd[e->dest->index].heap = heap;
 288       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
 289                                                     e->dest);
 290       if (e->dest->frequency > max_entry_frequency)
 291         max_entry_frequency = e->dest->frequency;
 292       if (e->dest->count > max_entry_count)
 293         max_entry_count = e->dest->count;
 294     }
 295
 296   /* Find the traces.  */
 297   for (i = 0; i < number_of_rounds; i++)
 298     {
 299       gcov_type count_threshold;
 300
 301       if (dump_file)
 302         fprintf (dump_file, "STC - round %d\n", i + 1);
 303
 304       if (max_entry_count < INT_MAX / 1000)
 305         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 306       else
 307         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 308
 309       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 310                            max_entry_frequency * exec_threshold[i] / 1000,
 311                            count_threshold, traces, n_traces, i, &heap,
 312                            number_of_rounds);
 313     }
 314   fibheap_delete (heap);
 315
 316   if (dump_file)
 317     {
 318       for (i = 0; i < *n_traces; i++)
 319         {
 320           basic_block bb;
 321           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 322                    traces[i].round + 1);
 323           for (bb = traces[i].first;
 324                bb != traces[i].last;
 325                bb = (basic_block) bb->aux)
 326             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 327           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 328         }
 329       fflush (dump_file);
 330     }
 331 }
 332
 333 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 334    (with sequential number TRACE_N).  */
 335
 336 static basic_block
 337 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 338 {
 339   basic_block bb;
 340
 341   /* Information about the best end (end after rotation) of the loop.  */
 342   basic_block best_bb = NULL;
 343   edge best_edge = NULL;
 344   int best_freq = -1;
 345   gcov_type best_count = -1;
 346   /* The best edge is preferred when its destination is not visited yet
 347      or is a start block of some trace.  */
 348   bool is_preferred = false;
 349
 350   /* Find the most frequent edge that goes out from current trace.  */
 351   bb = back_edge->dest;
 352   do
 353     {
 354       edge e;
 355       edge_iterator ei;
 356
 357       FOR_EACH_EDGE (e, ei, bb->succs)
 358         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 359             && bb_visited_trace (e->dest) != trace_n
 360             && (e->flags & EDGE_CAN_FALLTHRU)
 361             && !(e->flags & EDGE_COMPLEX))
 362         {
 363           if (is_preferred)
 364             {
 365               /* The best edge is preferred.  */
 366               if (!bb_visited_trace (e->dest)
 367                   || bbd[e->dest->index].start_of_trace >= 0)
 368                 {
 369                   /* The current edge E is also preferred.  */
 370                   int freq = EDGE_FREQUENCY (e);
 371                   if (freq > best_freq || e->count > best_count)
 372                     {
 373                       best_freq = freq;
 374                       best_count = e->count;
 375                       best_edge = e;
 376                       best_bb = bb;
 377                     }
 378                 }
 379             }
 380           else
 381             {
 382               if (!bb_visited_trace (e->dest)
 383                   || bbd[e->dest->index].start_of_trace >= 0)
 384                 {
 385                   /* The current edge E is preferred.  */
 386                   is_preferred = true;
 387                   best_freq = EDGE_FREQUENCY (e);
 388                   best_count = e->count;
 389                   best_edge = e;
 390                   best_bb = bb;
 391                 }
 392               else
 393                 {
 394                   int freq = EDGE_FREQUENCY (e);
 395                   if (!best_edge || freq > best_freq || e->count > best_count)
 396                     {
 397                       best_freq = freq;
 398                       best_count = e->count;
 399                       best_edge = e;
 400                       best_bb = bb;
 401                     }
 402                 }
 403             }
 404         }
 405       bb = (basic_block) bb->aux;
 406     }
 407   while (bb != back_edge->dest);
 408
 409   if (best_bb)
 410     {
 411       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 412          the trace.  */
 413       if (back_edge->dest == trace->first)
 414         {
 415           trace->first = (basic_block) best_bb->aux;
 416         }
 417       else
 418         {
 419           basic_block prev_bb;
 420
 421           for (prev_bb = trace->first;
 422                prev_bb->aux != back_edge->dest;
 423                prev_bb = (basic_block) prev_bb->aux)
 424             ;
 425           prev_bb->aux = best_bb->aux;
 426
 427           /* Try to get rid of uncond jump to cond jump.  */
 428           if (single_succ_p (prev_bb))
 429             {
 430               basic_block header = single_succ (prev_bb);
 431
 432               /* Duplicate HEADER if it is a small block containing cond jump
 433                  in the end.  */
 434               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 435                   && !CROSSING_JUMP_P (BB_END (header)))
 436                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 437             }
 438         }
 439     }
 440   else
 441     {
 442       /* We have not found suitable loop tail so do no rotation.  */
 443       best_bb = back_edge->src;
 444     }
 445   best_bb->aux = NULL;
 446   return best_bb;
 447 }
 448
 449 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 450    not include basic blocks whose probability is lower than BRANCH_TH or whose
 451    frequency is lower than EXEC_TH into traces (or whose count is lower than
 452    COUNT_TH).  Store the new traces into TRACES and modify the number of
 453    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 454    The function expects starting basic blocks to be in *HEAP and will delete
 455    *HEAP and store starting points for the next round into new *HEAP.  */
 456
 457 static void
 458 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 459                      struct trace *traces, int *n_traces, int round,
 460                      fibheap_t *heap, int number_of_rounds)
 461 {
 462   /* Heap for discarded basic blocks which are possible starting points for
 463      the next round.  */
 464   fibheap_t new_heap = fibheap_new ();
 465   bool for_size = optimize_function_for_size_p (cfun);
 466
 467   while (!fibheap_empty (*heap))
 468     {
 469       basic_block bb;
 470       struct trace *trace;
 471       edge best_edge, e;
 472       fibheapkey_t key;
 473       edge_iterator ei;
 474
 475       bb = (basic_block) fibheap_extract_min (*heap);
 476       bbd[bb->index].heap = NULL;
 477       bbd[bb->index].node = NULL;
 478
 479       if (dump_file)
 480         fprintf (dump_file, "Getting bb %d\n", bb->index);
 481
 482       /* If the BB's frequency is too low, send BB to the next round.  When
 483          partitioning hot/cold blocks into separate sections, make sure all
 484          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 485          round.  When optimizing for size, do not push to next round.  */
 486
 487       if (!for_size
 488           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 489                                    count_th))
 490         {
 491           int key = bb_to_key (bb);
 492           bbd[bb->index].heap = new_heap;
 493           bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
 494
 495           if (dump_file)
 496             fprintf (dump_file,
 497                      "  Possible start point of next round: %d (key: %d)\n",
 498                      bb->index, key);
 499           continue;
 500         }
 501
 502       trace = traces + *n_traces;
 503       trace->first = bb;
 504       trace->round = round;
 505       trace->length = 0;
 506       bbd[bb->index].in_trace = *n_traces;
 507       (*n_traces)++;
 508
 509       do
 510         {
 511           int prob, freq;
 512           bool ends_in_call;
 513
 514           /* The probability and frequency of the best edge.  */
 515           int best_prob = INT_MIN / 2;
 516           int best_freq = INT_MIN / 2;
 517
 518           best_edge = NULL;
 519           mark_bb_visited (bb, *n_traces);
 520           trace->length++;
 521
 522           if (dump_file)
 523             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 524                      bb->index, *n_traces - 1);
 525
 526           ends_in_call = block_ends_with_call_p (bb);
 527
 528           /* Select the successor that will be placed after BB.  */
 529           FOR_EACH_EDGE (e, ei, bb->succs)
 530             {
 531               gcc_assert (!(e->flags & EDGE_FAKE));
 532
 533               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 534                 continue;
 535
 536               if (bb_visited_trace (e->dest)
 537                   && bb_visited_trace (e->dest) != *n_traces)
 538                 continue;
 539
 540               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 541                 continue;
 542
 543               prob = e->probability;
 544               freq = e->dest->frequency;
 545
 546               /* The only sensible preference for a call instruction is the
 547                  fallthru edge.  Don't bother selecting anything else.  */
 548               if (ends_in_call)
 549                 {
 550                   if (e->flags & EDGE_CAN_FALLTHRU)
 551                     {
 552                       best_edge = e;
 553                       best_prob = prob;
 554                       best_freq = freq;
 555                     }
 556                   continue;
 557                 }
 558
 559               /* Edge that cannot be fallthru or improbable or infrequent
 560                  successor (i.e. it is unsuitable successor).  When optimizing
 561                  for size, ignore the probability and frequency.  */
 562               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 563                   || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 564                       || e->count < count_th) && (!for_size)))
 565                 continue;
 566
 567               /* If partitioning hot/cold basic blocks, don't consider edges
 568                  that cross section boundaries.  */
 569
 570               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 571                                  best_edge))
 572                 {
 573                   best_edge = e;
 574                   best_prob = prob;
 575                   best_freq = freq;
 576                 }
 577             }
 578
 579           /* If the best destination has multiple predecessors, and can be
 580              duplicated cheaper than a jump, don't allow it to be added
 581              to a trace.  We'll duplicate it when connecting traces.  */
 582           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 583               && copy_bb_p (best_edge->dest, 0))
 584             best_edge = NULL;
 585
 586           /* If the best destination has multiple successors or predecessors,
 587              don't allow it to be added when optimizing for size.  This makes
 588              sure predecessors with smaller index are handled before the best
 589              destinarion.  It breaks long trace and reduces long jumps.
 590
 591              Take if-then-else as an example.
 592                 A
 593                / \
 594               B   C
 595                \ /
 596                 D
 597              If we do not remove the best edge B->D/C->D, the final order might
 598              be A B D ... C.  C is at the end of the program.  If D's successors
 599              and D are complicated, might need long jumps for A->C and C->D.
 600              Similar issue for order: A C D ... B.
 601
 602              After removing the best edge, the final result will be ABCD/ ACBD.
 603              It does not add jump compared with the previous order.  But it
 604              reduces the possibility of long jumps.  */
 605           if (best_edge && for_size
 606               && (EDGE_COUNT (best_edge->dest->succs) > 1
 607                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 608             best_edge = NULL;
 609
 610           /* Add all non-selected successors to the heaps.  */
 611           FOR_EACH_EDGE (e, ei, bb->succs)
 612             {
 613               if (e == best_edge
 614                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 615                   || bb_visited_trace (e->dest))
 616                 continue;
 617
 618               key = bb_to_key (e->dest);
 619
 620               if (bbd[e->dest->index].heap)
 621                 {
 622                   /* E->DEST is already in some heap.  */
 623                   if (key != bbd[e->dest->index].node->key)
 624                     {
 625                       if (dump_file)
 626                         {
 627                           fprintf (dump_file,
 628                                    "Changing key for bb %d from %ld to %ld.\n",
 629                                    e->dest->index,
 630                                    (long) bbd[e->dest->index].node->key,
 631                                    key);
 632                         }
 633                       fibheap_replace_key (bbd[e->dest->index].heap,
 634                                            bbd[e->dest->index].node, key);
 635                     }
 636                 }
 637               else
 638                 {
 639                   fibheap_t which_heap = *heap;
 640
 641                   prob = e->probability;
 642                   freq = EDGE_FREQUENCY (e);
 643
 644                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 645                       || (e->flags & EDGE_COMPLEX)
 646                       || prob < branch_th || freq < exec_th
 647                       || e->count < count_th)
 648                     {
 649                       /* When partitioning hot/cold basic blocks, make sure
 650                          the cold blocks (and only the cold blocks) all get
 651                          pushed to the last round of trace collection.  When
 652                          optimizing for size, do not push to next round.  */
 653
 654                       if (!for_size && push_to_next_round_p (e->dest, round,
 655                                                              number_of_rounds,
 656                                                              exec_th, count_th))
 657                         which_heap = new_heap;
 658                     }
 659
 660                   bbd[e->dest->index].heap = which_heap;
 661                   bbd[e->dest->index].node = fibheap_insert (which_heap,
 662                                                                 key, e->dest);
 663
 664                   if (dump_file)
 665                     {
 666                       fprintf (dump_file,
 667                                "  Possible start of %s round: %d (key: %ld)\n",
 668                                (which_heap == new_heap) ? "next" : "this",
 669                                e->dest->index, (long) key);
 670                     }
 671
 672                 }
 673             }
 674
 675           if (best_edge) /* Suitable successor was found.  */
 676             {
 677               if (bb_visited_trace (best_edge->dest) == *n_traces)
 678                 {
 679                   /* We do nothing with one basic block loops.  */
 680                   if (best_edge->dest != bb)
 681                     {
 682                       if (EDGE_FREQUENCY (best_edge)
 683                           > 4 * best_edge->dest->frequency / 5)
 684                         {
 685                           /* The loop has at least 4 iterations.  If the loop
 686                              header is not the first block of the function
 687                              we can rotate the loop.  */
 688
 689                           if (best_edge->dest
 690                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 691                             {
 692                               if (dump_file)
 693                                 {
 694                                   fprintf (dump_file,
 695                                            "Rotating loop %d - %d\n",
 696                                            best_edge->dest->index, bb->index);
 697                                 }
 698                               bb->aux = best_edge->dest;
 699                               bbd[best_edge->dest->index].in_trace =
 700                                                              (*n_traces) - 1;
 701                               bb = rotate_loop (best_edge, trace, *n_traces);
 702                             }
 703                         }
 704                       else
 705                         {
 706                           /* The loop has less than 4 iterations.  */
 707
 708                           if (single_succ_p (bb)
 709                               && copy_bb_p (best_edge->dest,
 710                                             optimize_edge_for_speed_p
 711                                             (best_edge)))
 712                             {
 713                               bb = copy_bb (best_edge->dest, best_edge, bb,
 714                                             *n_traces);
 715                               trace->length++;
 716                             }
 717                         }
 718                     }
 719
 720                   /* Terminate the trace.  */
 721                   break;
 722                 }
 723               else
 724                 {
 725                   /* Check for a situation
 726
 727                     A
 728                    /|
 729                   B |
 730                    \|
 731                     C
 732
 733                   where
 734                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 735                     >= EDGE_FREQUENCY (AC).
 736                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 737                   Best ordering is then A B C.
 738
 739                   When optimizing for size, A B C is always the best order.
 740
 741                   This situation is created for example by:
 742
 743                   if (A) B;
 744                   C;
 745
 746                   */
 747
 748                   FOR_EACH_EDGE (e, ei, bb->succs)
 749                     if (e != best_edge
 750                         && (e->flags & EDGE_CAN_FALLTHRU)
 751                         && !(e->flags & EDGE_COMPLEX)
 752                         && !bb_visited_trace (e->dest)
 753                         && single_pred_p (e->dest)
 754                         && !(e->flags & EDGE_CROSSING)
 755                         && single_succ_p (e->dest)
 756                         && (single_succ_edge (e->dest)->flags
 757                             & EDGE_CAN_FALLTHRU)
 758                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 759                         && single_succ (e->dest) == best_edge->dest
 760                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 761                             || for_size))
 762                       {
 763                         best_edge = e;
 764                         if (dump_file)
 765                           fprintf (dump_file, "Selecting BB %d\n",
 766                                    best_edge->dest->index);
 767                         break;
 768                       }
 769
 770                   bb->aux = best_edge->dest;
 771                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 772                   bb = best_edge->dest;
 773                 }
 774             }
 775         }
 776       while (best_edge);
 777       trace->last = bb;
 778       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 779       bbd[trace->last->index].end_of_trace = *n_traces - 1;
 780
 781       /* The trace is terminated so we have to recount the keys in heap
 782          (some block can have a lower key because now one of its predecessors
 783          is an end of the trace).  */
 784       FOR_EACH_EDGE (e, ei, bb->succs)
 785         {
 786           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 787               || bb_visited_trace (e->dest))
 788             continue;
 789
 790           if (bbd[e->dest->index].heap)
 791             {
 792               key = bb_to_key (e->dest);
 793               if (key != bbd[e->dest->index].node->key)
 794                 {
 795                   if (dump_file)
 796                     {
 797                       fprintf (dump_file,
 798                                "Changing key for bb %d from %ld to %ld.\n",
 799                                e->dest->index,
 800                                (long) bbd[e->dest->index].node->key, key);
 801                     }
 802                   fibheap_replace_key (bbd[e->dest->index].heap,
 803                                        bbd[e->dest->index].node,
 804                                        key);
 805                 }
 806             }
 807         }
 808     }
 809
 810   fibheap_delete (*heap);
 811
 812   /* "Return" the new heap.  */
 813   *heap = new_heap;
 814 }
 815
 816 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 817    it to trace after BB, mark OLD_BB visited and update pass' data structures
 818    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 819
 820 static basic_block
 821 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 822 {
 823   basic_block new_bb;
 824
 825   new_bb = duplicate_block (old_bb, e, bb);
 826   BB_COPY_PARTITION (new_bb, old_bb);
 827
 828   gcc_assert (e->dest == new_bb);
 829
 830   if (dump_file)
 831     fprintf (dump_file,
 832              "Duplicated bb %d (created bb %d)\n",
 833              old_bb->index, new_bb->index);
 834
 835   if (new_bb->index >= array_size
 836       || last_basic_block_for_fn (cfun) > array_size)
 837     {
 838       int i;
 839       int new_size;
 840
 841       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 842       new_size = GET_ARRAY_SIZE (new_size);
 843       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 844       for (i = array_size; i < new_size; i++)
 845         {
 846           bbd[i].start_of_trace = -1;
 847           bbd[i].end_of_trace = -1;
 848           bbd[i].in_trace = -1;
 849           bbd[i].visited = 0;
 850           bbd[i].heap = NULL;
 851           bbd[i].node = NULL;
 852         }
 853       array_size = new_size;
 854
 855       if (dump_file)
 856         {
 857           fprintf (dump_file,
 858                    "Growing the dynamic array to %d elements.\n",
 859                    array_size);
 860         }
 861     }
 862
 863   gcc_assert (!bb_visited_trace (e->dest));
 864   mark_bb_visited (new_bb, trace);
 865   new_bb->aux = bb->aux;
 866   bb->aux = new_bb;
 867
 868   bbd[new_bb->index].in_trace = trace;
 869
 870   return new_bb;
 871 }
 872
 873 /* Compute and return the key (for the heap) of the basic block BB.  */
 874
 875 static fibheapkey_t
 876 bb_to_key (basic_block bb)
 877 {
 878   edge e;
 879   edge_iterator ei;
 880   int priority = 0;
 881
 882   /* Use index as key to align with its original order.  */
 883   if (optimize_function_for_size_p (cfun))
 884     return bb->index;
 885
 886   /* Do not start in probably never executed blocks.  */
 887
 888   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 889       || probably_never_executed_bb_p (cfun, bb))
 890     return BB_FREQ_MAX;
 891
 892   /* Prefer blocks whose predecessor is an end of some trace
 893      or whose predecessor edge is EDGE_DFS_BACK.  */
 894   FOR_EACH_EDGE (e, ei, bb->preds)
 895     {
 896       if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 897            && bbd[e->src->index].end_of_trace >= 0)
 898           || (e->flags & EDGE_DFS_BACK))
 899         {
 900           int edge_freq = EDGE_FREQUENCY (e);
 901
 902           if (edge_freq > priority)
 903             priority = edge_freq;
 904         }
 905     }
 906
 907   if (priority)
 908     /* The block with priority should have significantly lower key.  */
 909     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 910
 911   return -bb->frequency;
 912 }
 913
 914 /* Return true when the edge E from basic block BB is better than the temporary
 915    best edge (details are in function).  The probability of edge E is PROB. The
 916    frequency of the successor is FREQ.  The current best probability is
 917    BEST_PROB, the best frequency is BEST_FREQ.
 918    The edge is considered to be equivalent when PROB does not differ much from
 919    BEST_PROB; similarly for frequency.  */
 920
 921 static bool
 922 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
 923                int best_prob, int best_freq, const_edge cur_best_edge)
 924 {
 925   bool is_better_edge;
 926
 927   /* The BEST_* values do not have to be best, but can be a bit smaller than
 928      maximum values.  */
 929   int diff_prob = best_prob / 10;
 930   int diff_freq = best_freq / 10;
 931
 932   /* The smaller one is better to keep the original order.  */
 933   if (optimize_function_for_size_p (cfun))
 934     return !cur_best_edge
 935            || cur_best_edge->dest->index > e->dest->index;
 936
 937   if (prob > best_prob + diff_prob)
 938     /* The edge has higher probability than the temporary best edge.  */
 939     is_better_edge = true;
 940   else if (prob < best_prob - diff_prob)
 941     /* The edge has lower probability than the temporary best edge.  */
 942     is_better_edge = false;
 943   else if (freq < best_freq - diff_freq)
 944     /* The edge and the temporary best edge  have almost equivalent
 945        probabilities.  The higher frequency of a successor now means
 946        that there is another edge going into that successor.
 947        This successor has lower frequency so it is better.  */
 948     is_better_edge = true;
 949   else if (freq > best_freq + diff_freq)
 950     /* This successor has higher frequency so it is worse.  */
 951     is_better_edge = false;
 952   else if (e->dest->prev_bb == bb)
 953     /* The edges have equivalent probabilities and the successors
 954        have equivalent frequencies.  Select the previous successor.  */
 955     is_better_edge = true;
 956   else
 957     is_better_edge = false;
 958
 959   /* If we are doing hot/cold partitioning, make sure that we always favor
 960      non-crossing edges over crossing edges.  */
 961
 962   if (!is_better_edge
 963       && flag_reorder_blocks_and_partition
 964       && cur_best_edge
 965       && (cur_best_edge->flags & EDGE_CROSSING)
 966       && !(e->flags & EDGE_CROSSING))
 967     is_better_edge = true;
 968
 969   return is_better_edge;
 970 }
 971
 972 /* Return true when the edge E is better than the temporary best edge
 973    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
 974    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
 975    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
 976    TRACES record the information about traces.
 977    When optimizing for size, the edge with smaller index is better.
 978    When optimizing for speed, the edge with bigger probability or longer trace
 979    is better.  */
 980
 981 static bool
 982 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
 983                        const_edge cur_best_edge, struct trace *traces)
 984 {
 985   int e_index;
 986   int b_index;
 987   bool is_better_edge;
 988
 989   if (!cur_best_edge)
 990     return true;
 991
 992   if (optimize_function_for_size_p (cfun))
 993     {
 994       e_index = src_index_p ? e->src->index : e->dest->index;
 995       b_index = src_index_p ? cur_best_edge->src->index
 996                               : cur_best_edge->dest->index;
 997       /* The smaller one is better to keep the original order.  */
 998       return b_index > e_index;
 999     }
1000
1001   if (src_index_p)
1002     {
1003       e_index = e->src->index;
1004
1005       if (e->probability > cur_best_edge->probability)
1006         /* The edge has higher probability than the temporary best edge.  */
1007         is_better_edge = true;
1008       else if (e->probability < cur_best_edge->probability)
1009         /* The edge has lower probability than the temporary best edge.  */
1010         is_better_edge = false;
1011       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1012         /* The edge and the temporary best edge have equivalent probabilities.
1013            The edge with longer trace is better.  */
1014         is_better_edge = true;
1015       else
1016         is_better_edge = false;
1017     }
1018   else
1019     {
1020       e_index = e->dest->index;
1021
1022       if (e->probability > cur_best_edge->probability)
1023         /* The edge has higher probability than the temporary best edge.  */
1024         is_better_edge = true;
1025       else if (e->probability < cur_best_edge->probability)
1026         /* The edge has lower probability than the temporary best edge.  */
1027         is_better_edge = false;
1028       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1029         /* The edge and the temporary best edge have equivalent probabilities.
1030            The edge with longer trace is better.  */
1031         is_better_edge = true;
1032       else
1033         is_better_edge = false;
1034     }
1035
1036   return is_better_edge;
1037 }
1038
1039 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1040
1041 static void
1042 connect_traces (int n_traces, struct trace *traces)
1043 {
1044   int i;
1045   bool *connected;
1046   bool two_passes;
1047   int last_trace;
1048   int current_pass;
1049   int current_partition;
1050   int freq_threshold;
1051   gcov_type count_threshold;
1052   bool for_size = optimize_function_for_size_p (cfun);
1053
1054   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1055   if (max_entry_count < INT_MAX / 1000)
1056     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1057   else
1058     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1059
1060   connected = XCNEWVEC (bool, n_traces);
1061   last_trace = -1;
1062   current_pass = 1;
1063   current_partition = BB_PARTITION (traces[0].first);
1064   two_passes = false;
1065
1066   if (crtl->has_bb_partition)
1067     for (i = 0; i < n_traces && !two_passes; i++)
1068       if (BB_PARTITION (traces[0].first)
1069           != BB_PARTITION (traces[i].first))
1070         two_passes = true;
1071
1072   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1073     {
1074       int t = i;
1075       int t2;
1076       edge e, best;
1077       int best_len;
1078
1079       if (i >= n_traces)
1080         {
1081           gcc_assert (two_passes && current_pass == 1);
1082           i = 0;
1083           t = i;
1084           current_pass = 2;
1085           if (current_partition == BB_HOT_PARTITION)
1086             current_partition = BB_COLD_PARTITION;
1087           else
1088             current_partition = BB_HOT_PARTITION;
1089         }
1090
1091       if (connected[t])
1092         continue;
1093
1094       if (two_passes
1095           && BB_PARTITION (traces[t].first) != current_partition)
1096         continue;
1097
1098       connected[t] = true;
1099
1100       /* Find the predecessor traces.  */
1101       for (t2 = t; t2 > 0;)
1102         {
1103           edge_iterator ei;
1104           best = NULL;
1105           best_len = 0;
1106           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1107             {
1108               int si = e->src->index;
1109
1110               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1111                   && (e->flags & EDGE_CAN_FALLTHRU)
1112                   && !(e->flags & EDGE_COMPLEX)
1113                   && bbd[si].end_of_trace >= 0
1114                   && !connected[bbd[si].end_of_trace]
1115                   && (BB_PARTITION (e->src) == current_partition)
1116                   && connect_better_edge_p (e, true, best_len, best, traces))
1117                 {
1118                   best = e;
1119                   best_len = traces[bbd[si].end_of_trace].length;
1120                 }
1121             }
1122           if (best)
1123             {
1124               best->src->aux = best->dest;
1125               t2 = bbd[best->src->index].end_of_trace;
1126               connected[t2] = true;
1127
1128               if (dump_file)
1129                 {
1130                   fprintf (dump_file, "Connection: %d %d\n",
1131                            best->src->index, best->dest->index);
1132                 }
1133             }
1134           else
1135             break;
1136         }
1137
1138       if (last_trace >= 0)
1139         traces[last_trace].last->aux = traces[t2].first;
1140       last_trace = t;
1141
1142       /* Find the successor traces.  */
1143       while (1)
1144         {
1145           /* Find the continuation of the chain.  */
1146           edge_iterator ei;
1147           best = NULL;
1148           best_len = 0;
1149           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1150             {
1151               int di = e->dest->index;
1152
1153               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1154                   && (e->flags & EDGE_CAN_FALLTHRU)
1155                   && !(e->flags & EDGE_COMPLEX)
1156                   && bbd[di].start_of_trace >= 0
1157                   && !connected[bbd[di].start_of_trace]
1158                   && (BB_PARTITION (e->dest) == current_partition)
1159                   && connect_better_edge_p (e, false, best_len, best, traces))
1160                 {
1161                   best = e;
1162                   best_len = traces[bbd[di].start_of_trace].length;
1163                 }
1164             }
1165
1166           if (for_size)
1167             {
1168               if (!best)
1169                 /* Stop finding the successor traces.  */
1170                 break;
1171
1172               /* It is OK to connect block n with block n + 1 or a block
1173                  before n.  For others, only connect to the loop header.  */
1174               if (best->dest->index > (traces[t].last->index + 1))
1175                 {
1176                   int count = EDGE_COUNT (best->dest->preds);
1177
1178                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1179                     if (e->flags & EDGE_DFS_BACK)
1180                       count--;
1181
1182                   /* If dest has multiple predecessors, skip it.  We expect
1183                      that one predecessor with smaller index connects with it
1184                      later.  */
1185                   if (count != 1)
1186                     break;
1187                 }
1188
1189               /* Only connect Trace n with Trace n + 1.  It is conservative
1190                  to keep the order as close as possible to the original order.
1191                  It also helps to reduce long jumps.  */
1192               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1193                 break;
1194
1195               if (dump_file)
1196                 fprintf (dump_file, "Connection: %d %d\n",
1197                          best->src->index, best->dest->index);
1198
1199               t = bbd[best->dest->index].start_of_trace;
1200               traces[last_trace].last->aux = traces[t].first;
1201               connected[t] = true;
1202               last_trace = t;
1203             }
1204           else if (best)
1205             {
1206               if (dump_file)
1207                 {
1208                   fprintf (dump_file, "Connection: %d %d\n",
1209                            best->src->index, best->dest->index);
1210                 }
1211               t = bbd[best->dest->index].start_of_trace;
1212               traces[last_trace].last->aux = traces[t].first;
1213               connected[t] = true;
1214               last_trace = t;
1215             }
1216           else
1217             {
1218               /* Try to connect the traces by duplication of 1 block.  */
1219               edge e2;
1220               basic_block next_bb = NULL;
1221               bool try_copy = false;
1222
1223               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1224                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1225                     && (e->flags & EDGE_CAN_FALLTHRU)
1226                     && !(e->flags & EDGE_COMPLEX)
1227                     && (!best || e->probability > best->probability))
1228                   {
1229                     edge_iterator ei;
1230                     edge best2 = NULL;
1231                     int best2_len = 0;
1232
1233                     /* If the destination is a start of a trace which is only
1234                        one block long, then no need to search the successor
1235                        blocks of the trace.  Accept it.  */
1236                     if (bbd[e->dest->index].start_of_trace >= 0
1237                         && traces[bbd[e->dest->index].start_of_trace].length
1238                            == 1)
1239                       {
1240                         best = e;
1241                         try_copy = true;
1242                         continue;
1243                       }
1244
1245                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1246                       {
1247                         int di = e2->dest->index;
1248
1249                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1250                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1251                                 && !(e2->flags & EDGE_COMPLEX)
1252                                 && bbd[di].start_of_trace >= 0
1253                                 && !connected[bbd[di].start_of_trace]
1254                                 && BB_PARTITION (e2->dest) == current_partition
1255                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1256                                 && e2->count >= count_threshold
1257                                 && (!best2
1258                                     || e2->probability > best2->probability
1259                                     || (e2->probability == best2->probability
1260                                         && traces[bbd[di].start_of_trace].length
1261                                            > best2_len))))
1262                           {
1263                             best = e;
1264                             best2 = e2;
1265                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1266                               best2_len = traces[bbd[di].start_of_trace].length;
1267                             else
1268                               best2_len = INT_MAX;
1269                             next_bb = e2->dest;
1270                             try_copy = true;
1271                           }
1272                       }
1273                   }
1274
1275               if (crtl->has_bb_partition)
1276                 try_copy = false;
1277
1278               /* Copy tiny blocks always; copy larger blocks only when the
1279                  edge is traversed frequently enough.  */
1280               if (try_copy
1281                   && copy_bb_p (best->dest,
1282                                 optimize_edge_for_speed_p (best)
1283                                 && EDGE_FREQUENCY (best) >= freq_threshold
1284                                 && best->count >= count_threshold))
1285                 {
1286                   basic_block new_bb;
1287
1288                   if (dump_file)
1289                     {
1290                       fprintf (dump_file, "Connection: %d %d ",
1291                                traces[t].last->index, best->dest->index);
1292                       if (!next_bb)
1293                         fputc ('\n', dump_file);
1294                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1295                         fprintf (dump_file, "exit\n");
1296                       else
1297                         fprintf (dump_file, "%d\n", next_bb->index);
1298                     }
1299
1300                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1301                   traces[t].last = new_bb;
1302                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1303                     {
1304                       t = bbd[next_bb->index].start_of_trace;
1305                       traces[last_trace].last->aux = traces[t].first;
1306                       connected[t] = true;
1307                       last_trace = t;
1308                     }
1309                   else
1310                     break;      /* Stop finding the successor traces.  */
1311                 }
1312               else
1313                 break;  /* Stop finding the successor traces.  */
1314             }
1315         }
1316     }
1317
1318   if (dump_file)
1319     {
1320       basic_block bb;
1321
1322       fprintf (dump_file, "Final order:\n");
1323       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1324         fprintf (dump_file, "%d ", bb->index);
1325       fprintf (dump_file, "\n");
1326       fflush (dump_file);
1327     }
1328
1329   FREE (connected);
1330 }
1331
1332 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1333    when code size is allowed to grow by duplication.  */
1334
1335 static bool
1336 copy_bb_p (const_basic_block bb, int code_may_grow)
1337 {
1338   int size = 0;
1339   int max_size = uncond_jump_length;
1340   rtx_insn *insn;
1341
1342   if (!bb->frequency)
1343     return false;
1344   if (EDGE_COUNT (bb->preds) < 2)
1345     return false;
1346   if (!can_duplicate_block_p (bb))
1347     return false;
1348
1349   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1350   if (EDGE_COUNT (bb->succs) > 8)
1351     return false;
1352
1353   if (code_may_grow && optimize_bb_for_speed_p (bb))
1354     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1355
1356   FOR_BB_INSNS (bb, insn)
1357     {
1358       if (INSN_P (insn))
1359         size += get_attr_min_length (insn);
1360     }
1361
1362   if (size <= max_size)
1363     return true;
1364
1365   if (dump_file)
1366     {
1367       fprintf (dump_file,
1368                "Block %d can't be copied because its size = %d.\n",
1369                bb->index, size);
1370     }
1371
1372   return false;
1373 }
1374
1375 /* Return the length of unconditional jump instruction.  */
1376
1377 int
1378 get_uncond_jump_length (void)
1379 {
1380   rtx_insn *label, *jump;
1381   int length;
1382
1383   label = emit_label_before (gen_label_rtx (), get_insns ());
1384   jump = emit_jump_insn (gen_jump (label));
1385
1386   length = get_attr_min_length (jump);
1387
1388   delete_insn (jump);
1389   delete_insn (label);
1390   return length;
1391 }
1392
1393 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1394    Duplicate the landing pad and split the edges so that no EH edge
1395    crosses partitions.  */
1396
1397 static void
1398 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1399 {
1400   eh_landing_pad new_lp;
1401   basic_block new_bb, last_bb, post_bb;
1402   rtx_insn *new_label, *jump;
1403   rtx post_label;
1404   unsigned new_partition;
1405   edge_iterator ei;
1406   edge e;
1407
1408   /* Generate the new landing-pad structure.  */
1409   new_lp = gen_eh_landing_pad (old_lp->region);
1410   new_lp->post_landing_pad = old_lp->post_landing_pad;
1411   new_lp->landing_pad = gen_label_rtx ();
1412   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1413
1414   /* Put appropriate instructions in new bb.  */
1415   new_label = emit_label (new_lp->landing_pad);
1416
1417   expand_dw2_landing_pad_for_region (old_lp->region);
1418
1419   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1420   post_bb = single_succ (post_bb);
1421   post_label = block_label (post_bb);
1422   jump = emit_jump_insn (gen_jump (post_label));
1423   JUMP_LABEL (jump) = post_label;
1424
1425   /* Create new basic block to be dest for lp.  */
1426   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1427   new_bb = create_basic_block (new_label, jump, last_bb);
1428   new_bb->aux = last_bb->aux;
1429   last_bb->aux = new_bb;
1430
1431   emit_barrier_after_bb (new_bb);
1432
1433   make_edge (new_bb, post_bb, 0);
1434
1435   /* Make sure new bb is in the other partition.  */
1436   new_partition = BB_PARTITION (old_bb);
1437   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1438   BB_SET_PARTITION (new_bb, new_partition);
1439
1440   /* Fix up the edges.  */
1441   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1442     if (BB_PARTITION (e->src) == new_partition)
1443       {
1444         rtx_insn *insn = BB_END (e->src);
1445         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1446
1447         gcc_assert (note != NULL);
1448         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1449         XEXP (note, 0) = GEN_INT (new_lp->index);
1450
1451         /* Adjust the edge to the new destination.  */
1452         redirect_edge_succ (e, new_bb);
1453       }
1454     else
1455       ei_next (&ei);
1456 }
1457
1458
1459 /* Ensure that all hot bbs are included in a hot path through the
1460    procedure. This is done by calling this function twice, once
1461    with WALK_UP true (to look for paths from the entry to hot bbs) and
1462    once with WALK_UP false (to look for paths from hot bbs to the exit).
1463    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1464    to BBS_IN_HOT_PARTITION.  */
1465
1466 static unsigned int
1467 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1468                     vec<basic_block> *bbs_in_hot_partition)
1469 {
1470   /* Callers check this.  */
1471   gcc_checking_assert (cold_bb_count);
1472
1473   /* Keep examining hot bbs while we still have some left to check
1474      and there are remaining cold bbs.  */
1475   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1476   while (! hot_bbs_to_check.is_empty ()
1477          && cold_bb_count)
1478     {
1479       basic_block bb = hot_bbs_to_check.pop ();
1480       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1481       edge e;
1482       edge_iterator ei;
1483       int highest_probability = 0;
1484       int highest_freq = 0;
1485       gcov_type highest_count = 0;
1486       bool found = false;
1487
1488       /* Walk the preds/succs and check if there is at least one already
1489          marked hot. Keep track of the most frequent pred/succ so that we
1490          can mark it hot if we don't find one.  */
1491       FOR_EACH_EDGE (e, ei, edges)
1492         {
1493           basic_block reach_bb = walk_up ? e->src : e->dest;
1494
1495           if (e->flags & EDGE_DFS_BACK)
1496             continue;
1497
1498           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1499           {
1500             found = true;
1501             break;
1502           }
1503           /* The following loop will look for the hottest edge via
1504              the edge count, if it is non-zero, then fallback to the edge
1505              frequency and finally the edge probability.  */
1506           if (e->count > highest_count)
1507             highest_count = e->count;
1508           int edge_freq = EDGE_FREQUENCY (e);
1509           if (edge_freq > highest_freq)
1510             highest_freq = edge_freq;
1511           if (e->probability > highest_probability)
1512             highest_probability = e->probability;
1513         }
1514
1515       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1516          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1517          then the most frequent pred (or succ) needs to be adjusted.  In the
1518          case where multiple preds/succs have the same frequency (e.g. a
1519          50-50 branch), then both will be adjusted.  */
1520       if (found)
1521         continue;
1522
1523       FOR_EACH_EDGE (e, ei, edges)
1524         {
1525           if (e->flags & EDGE_DFS_BACK)
1526             continue;
1527           /* Select the hottest edge using the edge count, if it is non-zero,
1528              then fallback to the edge frequency and finally the edge
1529              probability.  */
1530           if (highest_count)
1531             {
1532               if (e->count < highest_count)
1533                 continue;
1534             }
1535           else if (highest_freq)
1536             {
1537               if (EDGE_FREQUENCY (e) < highest_freq)
1538                 continue;
1539             }
1540           else if (e->probability < highest_probability)
1541             continue;
1542
1543           basic_block reach_bb = walk_up ? e->src : e->dest;
1544
1545           /* We have a hot bb with an immediate dominator that is cold.
1546              The dominator needs to be re-marked hot.  */
1547           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1548           cold_bb_count--;
1549
1550           /* Now we need to examine newly-hot reach_bb to see if it is also
1551              dominated by a cold bb.  */
1552           bbs_in_hot_partition->safe_push (reach_bb);
1553           hot_bbs_to_check.safe_push (reach_bb);
1554         }
1555     }
1556
1557   return cold_bb_count;
1558 }
1559
1560
1561 /* Find the basic blocks that are rarely executed and need to be moved to
1562    a separate section of the .o file (to cut down on paging and improve
1563    cache locality).  Return a vector of all edges that cross.  */
1564
1565 static vec<edge>
1566 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1567 {
1568   vec<edge> crossing_edges = vNULL;
1569   basic_block bb;
1570   edge e;
1571   edge_iterator ei;
1572   unsigned int cold_bb_count = 0;
1573   vec<basic_block> bbs_in_hot_partition = vNULL;
1574
1575   /* Mark which partition (hot/cold) each basic block belongs in.  */
1576   FOR_EACH_BB_FN (bb, cfun)
1577     {
1578       bool cold_bb = false;
1579
1580       if (probably_never_executed_bb_p (cfun, bb))
1581         {
1582           /* Handle profile insanities created by upstream optimizations
1583              by also checking the incoming edge weights. If there is a non-cold
1584              incoming edge, conservatively prevent this block from being split
1585              into the cold section.  */
1586           cold_bb = true;
1587           FOR_EACH_EDGE (e, ei, bb->preds)
1588             if (!probably_never_executed_edge_p (cfun, e))
1589               {
1590                 cold_bb = false;
1591                 break;
1592               }
1593         }
1594       if (cold_bb)
1595         {
1596           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1597           cold_bb_count++;
1598         }
1599       else
1600         {
1601           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1602           bbs_in_hot_partition.safe_push (bb);
1603         }
1604     }
1605
1606   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1607      Several different possibilities may include cold bbs along all paths
1608      to/from a hot bb. One is that there are edge weight insanities
1609      due to optimization phases that do not properly update basic block profile
1610      counts. The second is that the entry of the function may not be hot, because
1611      it is entered fewer times than the number of profile training runs, but there
1612      is a loop inside the function that causes blocks within the function to be
1613      above the threshold for hotness. This is fixed by walking up from hot bbs
1614      to the entry block, and then down from hot bbs to the exit, performing
1615      partitioning fixups as necessary.  */
1616   if (cold_bb_count)
1617     {
1618       mark_dfs_back_edges ();
1619       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1620                                           &bbs_in_hot_partition);
1621       if (cold_bb_count)
1622         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1623     }
1624
1625   /* The format of .gcc_except_table does not allow landing pads to
1626      be in a different partition as the throw.  Fix this by either
1627      moving or duplicating the landing pads.  */
1628   if (cfun->eh->lp_array)
1629     {
1630       unsigned i;
1631       eh_landing_pad lp;
1632
1633       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1634         {
1635           bool all_same, all_diff;
1636
1637           if (lp == NULL
1638               || lp->landing_pad == NULL_RTX
1639               || !LABEL_P (lp->landing_pad))
1640             continue;
1641
1642           all_same = all_diff = true;
1643           bb = BLOCK_FOR_INSN (lp->landing_pad);
1644           FOR_EACH_EDGE (e, ei, bb->preds)
1645             {
1646               gcc_assert (e->flags & EDGE_EH);
1647               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1648                 all_diff = false;
1649               else
1650                 all_same = false;
1651             }
1652
1653           if (all_same)
1654             ;
1655           else if (all_diff)
1656             {
1657               int which = BB_PARTITION (bb);
1658               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1659               BB_SET_PARTITION (bb, which);
1660             }
1661           else
1662             fix_up_crossing_landing_pad (lp, bb);
1663         }
1664     }
1665
1666   /* Mark every edge that crosses between sections.  */
1667
1668   FOR_EACH_BB_FN (bb, cfun)
1669     FOR_EACH_EDGE (e, ei, bb->succs)
1670       {
1671         unsigned int flags = e->flags;
1672
1673         /* We should never have EDGE_CROSSING set yet.  */
1674         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1675
1676         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1677             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1678             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1679           {
1680             crossing_edges.safe_push (e);
1681             flags |= EDGE_CROSSING;
1682           }
1683
1684         /* Now that we've split eh edges as appropriate, allow landing pads
1685            to be merged with the post-landing pads.  */
1686         flags &= ~EDGE_PRESERVE;
1687
1688         e->flags = flags;
1689       }
1690
1691   return crossing_edges;
1692 }
1693
1694 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1695
1696 static void
1697 set_edge_can_fallthru_flag (void)
1698 {
1699   basic_block bb;
1700
1701   FOR_EACH_BB_FN (bb, cfun)
1702     {
1703       edge e;
1704       edge_iterator ei;
1705
1706       FOR_EACH_EDGE (e, ei, bb->succs)
1707         {
1708           e->flags &= ~EDGE_CAN_FALLTHRU;
1709
1710           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1711           if (e->flags & EDGE_FALLTHRU)
1712             e->flags |= EDGE_CAN_FALLTHRU;
1713         }
1714
1715       /* If the BB ends with an invertible condjump all (2) edges are
1716          CAN_FALLTHRU edges.  */
1717       if (EDGE_COUNT (bb->succs) != 2)
1718         continue;
1719       if (!any_condjump_p (BB_END (bb)))
1720         continue;
1721       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1722         continue;
1723       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1724       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1725       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1726     }
1727 }
1728
1729 /* If any destination of a crossing edge does not have a label, add label;
1730    Convert any easy fall-through crossing edges to unconditional jumps.  */
1731
1732 static void
1733 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1734 {
1735   size_t i;
1736   edge e;
1737
1738   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1739     {
1740       basic_block src = e->src;
1741       basic_block dest = e->dest;
1742       rtx label;
1743       rtx_insn *new_jump;
1744
1745       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1746         continue;
1747
1748       /* Make sure dest has a label.  */
1749       label = block_label (dest);
1750
1751       /* Nothing to do for non-fallthru edges.  */
1752       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1753         continue;
1754       if ((e->flags & EDGE_FALLTHRU) == 0)
1755         continue;
1756
1757       /* If the block does not end with a control flow insn, then we
1758          can trivially add a jump to the end to fixup the crossing.
1759          Otherwise the jump will have to go in a new bb, which will
1760          be handled by fix_up_fall_thru_edges function.  */
1761       if (control_flow_insn_p (BB_END (src)))
1762         continue;
1763
1764       /* Make sure there's only one successor.  */
1765       gcc_assert (single_succ_p (src));
1766
1767       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1768       BB_END (src) = new_jump;
1769       JUMP_LABEL (new_jump) = label;
1770       LABEL_NUSES (label) += 1;
1771
1772       emit_barrier_after_bb (src);
1773
1774       /* Mark edge as non-fallthru.  */
1775       e->flags &= ~EDGE_FALLTHRU;
1776     }
1777 }
1778
1779 /* Find any bb's where the fall-through edge is a crossing edge (note that
1780    these bb's must also contain a conditional jump or end with a call
1781    instruction; we've already dealt with fall-through edges for blocks
1782    that didn't have a conditional jump or didn't end with call instruction
1783    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1784    edge to non-crossing edge by inserting a new bb to fall-through into.
1785    The new bb will contain an unconditional jump (crossing edge) to the
1786    original fall through destination.  */
1787
1788 static void
1789 fix_up_fall_thru_edges (void)
1790 {
1791   basic_block cur_bb;
1792   basic_block new_bb;
1793   edge succ1;
1794   edge succ2;
1795   edge fall_thru;
1796   edge cond_jump = NULL;
1797   edge e;
1798   bool cond_jump_crosses;
1799   int invert_worked;
1800   rtx_insn *old_jump;
1801   rtx fall_thru_label;
1802
1803   FOR_EACH_BB_FN (cur_bb, cfun)
1804     {
1805       fall_thru = NULL;
1806       if (EDGE_COUNT (cur_bb->succs) > 0)
1807         succ1 = EDGE_SUCC (cur_bb, 0);
1808       else
1809         succ1 = NULL;
1810
1811       if (EDGE_COUNT (cur_bb->succs) > 1)
1812         succ2 = EDGE_SUCC (cur_bb, 1);
1813       else
1814         succ2 = NULL;
1815
1816       /* Find the fall-through edge.  */
1817
1818       if (succ1
1819           && (succ1->flags & EDGE_FALLTHRU))
1820         {
1821           fall_thru = succ1;
1822           cond_jump = succ2;
1823         }
1824       else if (succ2
1825                && (succ2->flags & EDGE_FALLTHRU))
1826         {
1827           fall_thru = succ2;
1828           cond_jump = succ1;
1829         }
1830       else if (succ1
1831                && (block_ends_with_call_p (cur_bb)
1832                    || can_throw_internal (BB_END (cur_bb))))
1833         {
1834           edge e;
1835           edge_iterator ei;
1836
1837           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1838             if (e->flags & EDGE_FALLTHRU)
1839               {
1840                 fall_thru = e;
1841                 break;
1842               }
1843         }
1844
1845       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1846         {
1847           /* Check to see if the fall-thru edge is a crossing edge.  */
1848
1849           if (fall_thru->flags & EDGE_CROSSING)
1850             {
1851               /* The fall_thru edge crosses; now check the cond jump edge, if
1852                  it exists.  */
1853
1854               cond_jump_crosses = true;
1855               invert_worked  = 0;
1856               old_jump = BB_END (cur_bb);
1857
1858               /* Find the jump instruction, if there is one.  */
1859
1860               if (cond_jump)
1861                 {
1862                   if (!(cond_jump->flags & EDGE_CROSSING))
1863                     cond_jump_crosses = false;
1864
1865                   /* We know the fall-thru edge crosses; if the cond
1866                      jump edge does NOT cross, and its destination is the
1867                      next block in the bb order, invert the jump
1868                      (i.e. fix it so the fall through does not cross and
1869                      the cond jump does).  */
1870
1871                   if (!cond_jump_crosses)
1872                     {
1873                       /* Find label in fall_thru block. We've already added
1874                          any missing labels, so there must be one.  */
1875
1876                       fall_thru_label = block_label (fall_thru->dest);
1877
1878                       if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1879                         invert_worked = invert_jump (old_jump,
1880                                                      fall_thru_label,0);
1881                       if (invert_worked)
1882                         {
1883                           fall_thru->flags &= ~EDGE_FALLTHRU;
1884                           cond_jump->flags |= EDGE_FALLTHRU;
1885                           update_br_prob_note (cur_bb);
1886                           e = fall_thru;
1887                           fall_thru = cond_jump;
1888                           cond_jump = e;
1889                           cond_jump->flags |= EDGE_CROSSING;
1890                           fall_thru->flags &= ~EDGE_CROSSING;
1891                         }
1892                     }
1893                 }
1894
1895               if (cond_jump_crosses || !invert_worked)
1896                 {
1897                   /* This is the case where both edges out of the basic
1898                      block are crossing edges. Here we will fix up the
1899                      fall through edge. The jump edge will be taken care
1900                      of later.  The EDGE_CROSSING flag of fall_thru edge
1901                      is unset before the call to force_nonfallthru
1902                      function because if a new basic-block is created
1903                      this edge remains in the current section boundary
1904                      while the edge between new_bb and the fall_thru->dest
1905                      becomes EDGE_CROSSING.  */
1906
1907                   fall_thru->flags &= ~EDGE_CROSSING;
1908                   new_bb = force_nonfallthru (fall_thru);
1909
1910                   if (new_bb)
1911                     {
1912                       new_bb->aux = cur_bb->aux;
1913                       cur_bb->aux = new_bb;
1914
1915                       /* This is done by force_nonfallthru_and_redirect.  */
1916                       gcc_assert (BB_PARTITION (new_bb)
1917                                   == BB_PARTITION (cur_bb));
1918
1919                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1920                     }
1921                   else
1922                     {
1923                       /* If a new basic-block was not created; restore
1924                          the EDGE_CROSSING flag.  */
1925                       fall_thru->flags |= EDGE_CROSSING;
1926                     }
1927
1928                   /* Add barrier after new jump */
1929                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1930                 }
1931             }
1932         }
1933     }
1934 }
1935
1936 /* This function checks the destination block of a "crossing jump" to
1937    see if it has any crossing predecessors that begin with a code label
1938    and end with an unconditional jump.  If so, it returns that predecessor
1939    block.  (This is to avoid creating lots of new basic blocks that all
1940    contain unconditional jumps to the same destination).  */
1941
1942 static basic_block
1943 find_jump_block (basic_block jump_dest)
1944 {
1945   basic_block source_bb = NULL;
1946   edge e;
1947   rtx_insn *insn;
1948   edge_iterator ei;
1949
1950   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1951     if (e->flags & EDGE_CROSSING)
1952       {
1953         basic_block src = e->src;
1954
1955         /* Check each predecessor to see if it has a label, and contains
1956            only one executable instruction, which is an unconditional jump.
1957            If so, we can use it.  */
1958
1959         if (LABEL_P (BB_HEAD (src)))
1960           for (insn = BB_HEAD (src);
1961                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1962                insn = NEXT_INSN (insn))
1963             {
1964               if (INSN_P (insn)
1965                   && insn == BB_END (src)
1966                   && JUMP_P (insn)
1967                   && !any_condjump_p (insn))
1968                 {
1969                   source_bb = src;
1970                   break;
1971                 }
1972             }
1973
1974         if (source_bb)
1975           break;
1976       }
1977
1978   return source_bb;
1979 }
1980
1981 /* Find all BB's with conditional jumps that are crossing edges;
1982    insert a new bb and make the conditional jump branch to the new
1983    bb instead (make the new bb same color so conditional branch won't
1984    be a 'crossing' edge).  Insert an unconditional jump from the
1985    new bb to the original destination of the conditional jump.  */
1986
1987 static void
1988 fix_crossing_conditional_branches (void)
1989 {
1990   basic_block cur_bb;
1991   basic_block new_bb;
1992   basic_block dest;
1993   edge succ1;
1994   edge succ2;
1995   edge crossing_edge;
1996   edge new_edge;
1997   rtx_insn *old_jump;
1998   rtx set_src;
1999   rtx old_label = NULL_RTX;
2000   rtx new_label;
2001
2002   FOR_EACH_BB_FN (cur_bb, cfun)
2003     {
2004       crossing_edge = NULL;
2005       if (EDGE_COUNT (cur_bb->succs) > 0)
2006         succ1 = EDGE_SUCC (cur_bb, 0);
2007       else
2008         succ1 = NULL;
2009
2010       if (EDGE_COUNT (cur_bb->succs) > 1)
2011         succ2 = EDGE_SUCC (cur_bb, 1);
2012       else
2013         succ2 = NULL;
2014
2015       /* We already took care of fall-through edges, so only one successor
2016          can be a crossing edge.  */
2017
2018       if (succ1 && (succ1->flags & EDGE_CROSSING))
2019         crossing_edge = succ1;
2020       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2021         crossing_edge = succ2;
2022
2023       if (crossing_edge)
2024         {
2025           old_jump = BB_END (cur_bb);
2026
2027           /* Check to make sure the jump instruction is a
2028              conditional jump.  */
2029
2030           set_src = NULL_RTX;
2031
2032           if (any_condjump_p (old_jump))
2033             {
2034               if (GET_CODE (PATTERN (old_jump)) == SET)
2035                 set_src = SET_SRC (PATTERN (old_jump));
2036               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2037                 {
2038                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2039                   if (GET_CODE (set_src) == SET)
2040                     set_src = SET_SRC (set_src);
2041                   else
2042                     set_src = NULL_RTX;
2043                 }
2044             }
2045
2046           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2047             {
2048               if (GET_CODE (XEXP (set_src, 1)) == PC)
2049                 old_label = XEXP (set_src, 2);
2050               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2051                 old_label = XEXP (set_src, 1);
2052
2053               /* Check to see if new bb for jumping to that dest has
2054                  already been created; if so, use it; if not, create
2055                  a new one.  */
2056
2057               new_bb = find_jump_block (crossing_edge->dest);
2058
2059               if (new_bb)
2060                 new_label = block_label (new_bb);
2061               else
2062                 {
2063                   basic_block last_bb;
2064                   rtx_insn *new_jump;
2065
2066                   /* Create new basic block to be dest for
2067                      conditional jump.  */
2068
2069                   /* Put appropriate instructions in new bb.  */
2070
2071                   new_label = gen_label_rtx ();
2072                   emit_label (new_label);
2073
2074                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2075                   old_label = JUMP_LABEL (old_jump);
2076                   new_jump = emit_jump_insn (gen_jump (old_label));
2077                   JUMP_LABEL (new_jump) = old_label;
2078
2079                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2080                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2081                   new_bb->aux = last_bb->aux;
2082                   last_bb->aux = new_bb;
2083
2084                   emit_barrier_after_bb (new_bb);
2085
2086                   /* Make sure new bb is in same partition as source
2087                      of conditional branch.  */
2088                   BB_COPY_PARTITION (new_bb, cur_bb);
2089                 }
2090
2091               /* Make old jump branch to new bb.  */
2092
2093               redirect_jump (old_jump, new_label, 0);
2094
2095               /* Remove crossing_edge as predecessor of 'dest'.  */
2096
2097               dest = crossing_edge->dest;
2098
2099               redirect_edge_succ (crossing_edge, new_bb);
2100
2101               /* Make a new edge from new_bb to old dest; new edge
2102                  will be a successor for new_bb and a predecessor
2103                  for 'dest'.  */
2104
2105               if (EDGE_COUNT (new_bb->succs) == 0)
2106                 new_edge = make_edge (new_bb, dest, 0);
2107               else
2108                 new_edge = EDGE_SUCC (new_bb, 0);
2109
2110               crossing_edge->flags &= ~EDGE_CROSSING;
2111               new_edge->flags |= EDGE_CROSSING;
2112             }
2113         }
2114     }
2115 }
2116
2117 /* Find any unconditional branches that cross between hot and cold
2118    sections.  Convert them into indirect jumps instead.  */
2119
2120 static void
2121 fix_crossing_unconditional_branches (void)
2122 {
2123   basic_block cur_bb;
2124   rtx_insn *last_insn;
2125   rtx label;
2126   rtx label_addr;
2127   rtx_insn *indirect_jump_sequence;
2128   rtx_insn *jump_insn = NULL;
2129   rtx new_reg;
2130   rtx_insn *cur_insn;
2131   edge succ;
2132
2133   FOR_EACH_BB_FN (cur_bb, cfun)
2134     {
2135       last_insn = BB_END (cur_bb);
2136
2137       if (EDGE_COUNT (cur_bb->succs) < 1)
2138         continue;
2139
2140       succ = EDGE_SUCC (cur_bb, 0);
2141
2142       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2143          this point, no crossing jumps should be conditional.  */
2144
2145       if (JUMP_P (last_insn)
2146           && (succ->flags & EDGE_CROSSING))
2147         {
2148           gcc_assert (!any_condjump_p (last_insn));
2149
2150           /* Make sure the jump is not already an indirect or table jump.  */
2151
2152           if (!computed_jump_p (last_insn)
2153               && !tablejump_p (last_insn, NULL, NULL))
2154             {
2155               /* We have found a "crossing" unconditional branch.  Now
2156                  we must convert it to an indirect jump.  First create
2157                  reference of label, as target for jump.  */
2158
2159               label = JUMP_LABEL (last_insn);
2160               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2161               LABEL_NUSES (label) += 1;
2162
2163               /* Get a register to use for the indirect jump.  */
2164
2165               new_reg = gen_reg_rtx (Pmode);
2166
2167               /* Generate indirect the jump sequence.  */
2168
2169               start_sequence ();
2170               emit_move_insn (new_reg, label_addr);
2171               emit_indirect_jump (new_reg);
2172               indirect_jump_sequence = get_insns ();
2173               end_sequence ();
2174
2175               /* Make sure every instruction in the new jump sequence has
2176                  its basic block set to be cur_bb.  */
2177
2178               for (cur_insn = indirect_jump_sequence; cur_insn;
2179                    cur_insn = NEXT_INSN (cur_insn))
2180                 {
2181                   if (!BARRIER_P (cur_insn))
2182                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2183                   if (JUMP_P (cur_insn))
2184                     jump_insn = cur_insn;
2185                 }
2186
2187               /* Insert the new (indirect) jump sequence immediately before
2188                  the unconditional jump, then delete the unconditional jump.  */
2189
2190               emit_insn_before (indirect_jump_sequence, last_insn);
2191               delete_insn (last_insn);
2192
2193               JUMP_LABEL (jump_insn) = label;
2194               LABEL_NUSES (label)++;
2195
2196               /* Make BB_END for cur_bb be the jump instruction (NOT the
2197                  barrier instruction at the end of the sequence...).  */
2198
2199               BB_END (cur_bb) = jump_insn;
2200             }
2201         }
2202     }
2203 }
2204
2205 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2206
2207 static void
2208 update_crossing_jump_flags (void)
2209 {
2210   basic_block bb;
2211   edge e;
2212   edge_iterator ei;
2213
2214   FOR_EACH_BB_FN (bb, cfun)
2215     FOR_EACH_EDGE (e, ei, bb->succs)
2216       if (e->flags & EDGE_CROSSING)
2217         {
2218           if (JUMP_P (BB_END (bb))
2219               /* Some flags were added during fix_up_fall_thru_edges, via
2220                  force_nonfallthru_and_redirect.  */
2221               && !CROSSING_JUMP_P (BB_END (bb)))
2222             CROSSING_JUMP_P (BB_END (bb)) = 1;
2223           break;
2224         }
2225 }
2226
2227 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2228    the set of flags to pass to cfg_layout_initialize().  */
2229
2230 static void
2231 reorder_basic_blocks (void)
2232 {
2233   int n_traces;
2234   int i;
2235   struct trace *traces;
2236
2237   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2238
2239   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2240     return;
2241
2242   set_edge_can_fallthru_flag ();
2243   mark_dfs_back_edges ();
2244
2245   /* We are estimating the length of uncond jump insn only once since the code
2246      for getting the insn length always returns the minimal length now.  */
2247   if (uncond_jump_length == 0)
2248     uncond_jump_length = get_uncond_jump_length ();
2249
2250   /* We need to know some information for each basic block.  */
2251   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2252   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2253   for (i = 0; i < array_size; i++)
2254     {
2255       bbd[i].start_of_trace = -1;
2256       bbd[i].end_of_trace = -1;
2257       bbd[i].in_trace = -1;
2258       bbd[i].visited = 0;
2259       bbd[i].heap = NULL;
2260       bbd[i].node = NULL;
2261     }
2262
2263   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2264   n_traces = 0;
2265   find_traces (&n_traces, traces);
2266   connect_traces (n_traces, traces);
2267   FREE (traces);
2268   FREE (bbd);
2269
2270   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2271
2272   if (dump_file)
2273     {
2274       if (dump_flags & TDF_DETAILS)
2275         dump_reg_info (dump_file);
2276       dump_flow_info (dump_file, dump_flags);
2277     }
2278
2279   /* Signal that rtl_verify_flow_info_1 can now verify that there
2280      is at most one switch between hot/cold sections.  */
2281   crtl->bb_reorder_complete = true;
2282 }
2283
2284 /* Determine which partition the first basic block in the function
2285    belongs to, then find the first basic block in the current function
2286    that belongs to a different section, and insert a
2287    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2288    instruction stream.  When writing out the assembly code,
2289    encountering this note will make the compiler switch between the
2290    hot and cold text sections.  */
2291
2292 void
2293 insert_section_boundary_note (void)
2294 {
2295   basic_block bb;
2296   bool switched_sections = false;
2297   int current_partition = 0;
2298
2299   if (!crtl->has_bb_partition)
2300     return;
2301
2302   FOR_EACH_BB_FN (bb, cfun)
2303     {
2304       if (!current_partition)
2305         current_partition = BB_PARTITION (bb);
2306       if (BB_PARTITION (bb) != current_partition)
2307         {
2308           gcc_assert (!switched_sections);
2309           switched_sections = true;
2310           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2311           current_partition = BB_PARTITION (bb);
2312         }
2313     }
2314 }
2315
2316 namespace {
2317
2318 const pass_data pass_data_reorder_blocks =
2319 {
2320   RTL_PASS, /* type */
2321   "bbro", /* name */
2322   OPTGROUP_NONE, /* optinfo_flags */
2323   TV_REORDER_BLOCKS, /* tv_id */
2324   0, /* properties_required */
2325   0, /* properties_provided */
2326   0, /* properties_destroyed */
2327   0, /* todo_flags_start */
2328   0, /* todo_flags_finish */
2329 };
2330
2331 class pass_reorder_blocks : public rtl_opt_pass
2332 {
2333 public:
2334   pass_reorder_blocks (gcc::context *ctxt)
2335     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2336   {}
2337
2338   /* opt_pass methods: */
2339   virtual bool gate (function *)
2340     {
2341       if (targetm.cannot_modify_jumps_p ())
2342         return false;
2343       return (optimize > 0
2344               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2345     }
2346
2347   virtual unsigned int execute (function *);
2348
2349 }; // class pass_reorder_blocks
2350
2351 unsigned int
2352 pass_reorder_blocks::execute (function *fun)
2353 {
2354   basic_block bb;
2355
2356   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2357      splitting possibly introduced more crossjumping opportunities.  */
2358   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2359
2360   reorder_basic_blocks ();
2361   cleanup_cfg (CLEANUP_EXPENSIVE);
2362
2363   FOR_EACH_BB_FN (bb, fun)
2364     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2365       bb->aux = bb->next_bb;
2366   cfg_layout_finalize ();
2367
2368   return 0;
2369 }
2370
2371 } // anon namespace
2372
2373 rtl_opt_pass *
2374 make_pass_reorder_blocks (gcc::context *ctxt)
2375 {
2376   return new pass_reorder_blocks (ctxt);
2377 }
2378
2379 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2380    computed gotos that were factored early on in the compilation process to
2381    speed up edge based data flow.  We used to not unfactoring them again,
2382    which can seriously pessimize code with many computed jumps in the source
2383    code, such as interpreters.  See e.g. PR15242.  */
2384
2385 namespace {
2386
2387 const pass_data pass_data_duplicate_computed_gotos =
2388 {
2389   RTL_PASS, /* type */
2390   "compgotos", /* name */
2391   OPTGROUP_NONE, /* optinfo_flags */
2392   TV_REORDER_BLOCKS, /* tv_id */
2393   0, /* properties_required */
2394   0, /* properties_provided */
2395   0, /* properties_destroyed */
2396   0, /* todo_flags_start */
2397   0, /* todo_flags_finish */
2398 };
2399
2400 class pass_duplicate_computed_gotos : public rtl_opt_pass
2401 {
2402 public:
2403   pass_duplicate_computed_gotos (gcc::context *ctxt)
2404     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2405   {}
2406
2407   /* opt_pass methods: */
2408   virtual bool gate (function *);
2409   virtual unsigned int execute (function *);
2410
2411 }; // class pass_duplicate_computed_gotos
2412
2413 bool
2414 pass_duplicate_computed_gotos::gate (function *fun)
2415 {
2416   if (targetm.cannot_modify_jumps_p ())
2417     return false;
2418   return (optimize > 0
2419           && flag_expensive_optimizations
2420           && ! optimize_function_for_size_p (fun));
2421 }
2422
2423 unsigned int
2424 pass_duplicate_computed_gotos::execute (function *fun)
2425 {
2426   basic_block bb, new_bb;
2427   bitmap candidates;
2428   int max_size;
2429   bool changed = false;
2430
2431   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2432     return 0;
2433
2434   clear_bb_flags ();
2435   cfg_layout_initialize (0);
2436
2437   /* We are estimating the length of uncond jump insn only once
2438      since the code for getting the insn length always returns
2439      the minimal length now.  */
2440   if (uncond_jump_length == 0)
2441     uncond_jump_length = get_uncond_jump_length ();
2442
2443   max_size
2444     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2445   candidates = BITMAP_ALLOC (NULL);
2446
2447   /* Look for blocks that end in a computed jump, and see if such blocks
2448      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2449      mark it in the candidates.  */
2450   FOR_EACH_BB_FN (bb, fun)
2451     {
2452       rtx_insn *insn;
2453       edge e;
2454       edge_iterator ei;
2455       int size, all_flags;
2456
2457       /* Build the reorder chain for the original order of blocks.  */
2458       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2459         bb->aux = bb->next_bb;
2460
2461       /* Obviously the block has to end in a computed jump.  */
2462       if (!computed_jump_p (BB_END (bb)))
2463         continue;
2464
2465       /* Only consider blocks that can be duplicated.  */
2466       if (CROSSING_JUMP_P (BB_END (bb))
2467           || !can_duplicate_block_p (bb))
2468         continue;
2469
2470       /* Make sure that the block is small enough.  */
2471       size = 0;
2472       FOR_BB_INSNS (bb, insn)
2473         if (INSN_P (insn))
2474           {
2475             size += get_attr_min_length (insn);
2476             if (size > max_size)
2477                break;
2478           }
2479       if (size > max_size)
2480         continue;
2481
2482       /* Final check: there must not be any incoming abnormal edges.  */
2483       all_flags = 0;
2484       FOR_EACH_EDGE (e, ei, bb->preds)
2485         all_flags |= e->flags;
2486       if (all_flags & EDGE_COMPLEX)
2487         continue;
2488
2489       bitmap_set_bit (candidates, bb->index);
2490     }
2491
2492   /* Nothing to do if there is no computed jump here.  */
2493   if (bitmap_empty_p (candidates))
2494     goto done;
2495
2496   /* Duplicate computed gotos.  */
2497   FOR_EACH_BB_FN (bb, fun)
2498     {
2499       if (bb->flags & BB_VISITED)
2500         continue;
2501
2502       bb->flags |= BB_VISITED;
2503
2504       /* BB must have one outgoing edge.  That edge must not lead to
2505          the exit block or the next block.
2506          The destination must have more than one predecessor.  */
2507       if (!single_succ_p (bb)
2508           || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2509           || single_succ (bb) == bb->next_bb
2510           || single_pred_p (single_succ (bb)))
2511         continue;
2512
2513       /* The successor block has to be a duplication candidate.  */
2514       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2515         continue;
2516
2517       /* Don't duplicate a partition crossing edge, which requires difficult
2518          fixup.  */
2519       if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2520         continue;
2521
2522       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2523       new_bb->aux = bb->aux;
2524       bb->aux = new_bb;
2525       new_bb->flags |= BB_VISITED;
2526       changed = true;
2527     }
2528
2529  done:
2530   if (changed)
2531     {
2532       /* Duplicating blocks above will redirect edges and may cause hot
2533          blocks previously reached by both hot and cold blocks to become
2534          dominated only by cold blocks.  */
2535       fixup_partitions ();
2536
2537       /* Merge the duplicated blocks into predecessors, when possible.  */
2538       cfg_layout_finalize ();
2539       cleanup_cfg (0);
2540     }
2541   else
2542     cfg_layout_finalize ();
2543
2544   BITMAP_FREE (candidates);
2545   return 0;
2546 }
2547
2548 } // anon namespace
2549
2550 rtl_opt_pass *
2551 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2552 {
2553   return new pass_duplicate_computed_gotos (ctxt);
2554 }
2555
2556 /* This function is the main 'entrance' for the optimization that
2557    partitions hot and cold basic blocks into separate sections of the
2558    .o file (to improve performance and cache locality).  Ideally it
2559    would be called after all optimizations that rearrange the CFG have
2560    been called.  However part of this optimization may introduce new
2561    register usage, so it must be called before register allocation has
2562    occurred.  This means that this optimization is actually called
2563    well before the optimization that reorders basic blocks (see
2564    function above).
2565
2566    This optimization checks the feedback information to determine
2567    which basic blocks are hot/cold, updates flags on the basic blocks
2568    to indicate which section they belong in.  This information is
2569    later used for writing out sections in the .o file.  Because hot
2570    and cold sections can be arbitrarily large (within the bounds of
2571    memory), far beyond the size of a single function, it is necessary
2572    to fix up all edges that cross section boundaries, to make sure the
2573    instructions used can actually span the required distance.  The
2574    fixes are described below.
2575
2576    Fall-through edges must be changed into jumps; it is not safe or
2577    legal to fall through across a section boundary.  Whenever a
2578    fall-through edge crossing a section boundary is encountered, a new
2579    basic block is inserted (in the same section as the fall-through
2580    source), and the fall through edge is redirected to the new basic
2581    block.  The new basic block contains an unconditional jump to the
2582    original fall-through target.  (If the unconditional jump is
2583    insufficient to cross section boundaries, that is dealt with a
2584    little later, see below).
2585
2586    In order to deal with architectures that have short conditional
2587    branches (which cannot span all of memory) we take any conditional
2588    jump that attempts to cross a section boundary and add a level of
2589    indirection: it becomes a conditional jump to a new basic block, in
2590    the same section.  The new basic block contains an unconditional
2591    jump to the original target, in the other section.
2592
2593    For those architectures whose unconditional branch is also
2594    incapable of reaching all of memory, those unconditional jumps are
2595    converted into indirect jumps, through a register.
2596
2597    IMPORTANT NOTE: This optimization causes some messy interactions
2598    with the cfg cleanup optimizations; those optimizations want to
2599    merge blocks wherever possible, and to collapse indirect jump
2600    sequences (change "A jumps to B jumps to C" directly into "A jumps
2601    to C").  Those optimizations can undo the jump fixes that
2602    partitioning is required to make (see above), in order to ensure
2603    that jumps attempting to cross section boundaries are really able
2604    to cover whatever distance the jump requires (on many architectures
2605    conditional or unconditional jumps are not able to reach all of
2606    memory).  Therefore tests have to be inserted into each such
2607    optimization to make sure that it does not undo stuff necessary to
2608    cross partition boundaries.  This would be much less of a problem
2609    if we could perform this optimization later in the compilation, but
2610    unfortunately the fact that we may need to create indirect jumps
2611    (through registers) requires that this optimization be performed
2612    before register allocation.
2613
2614    Hot and cold basic blocks are partitioned and put in separate
2615    sections of the .o file, to reduce paging and improve cache
2616    performance (hopefully).  This can result in bits of code from the
2617    same function being widely separated in the .o file.  However this
2618    is not obvious to the current bb structure.  Therefore we must take
2619    care to ensure that: 1). There are no fall_thru edges that cross
2620    between sections; 2). For those architectures which have "short"
2621    conditional branches, all conditional branches that attempt to
2622    cross between sections are converted to unconditional branches;
2623    and, 3). For those architectures which have "short" unconditional
2624    branches, all unconditional branches that attempt to cross between
2625    sections are converted to indirect jumps.
2626
2627    The code for fixing up fall_thru edges that cross between hot and
2628    cold basic blocks does so by creating new basic blocks containing
2629    unconditional branches to the appropriate label in the "other"
2630    section.  The new basic block is then put in the same (hot or cold)
2631    section as the original conditional branch, and the fall_thru edge
2632    is modified to fall into the new basic block instead.  By adding
2633    this level of indirection we end up with only unconditional branches
2634    crossing between hot and cold sections.
2635
2636    Conditional branches are dealt with by adding a level of indirection.
2637    A new basic block is added in the same (hot/cold) section as the
2638    conditional branch, and the conditional branch is retargeted to the
2639    new basic block.  The new basic block contains an unconditional branch
2640    to the original target of the conditional branch (in the other section).
2641
2642    Unconditional branches are dealt with by converting them into
2643    indirect jumps.  */
2644
2645 namespace {
2646
2647 const pass_data pass_data_partition_blocks =
2648 {
2649   RTL_PASS, /* type */
2650   "bbpart", /* name */
2651   OPTGROUP_NONE, /* optinfo_flags */
2652   TV_REORDER_BLOCKS, /* tv_id */
2653   PROP_cfglayout, /* properties_required */
2654   0, /* properties_provided */
2655   0, /* properties_destroyed */
2656   0, /* todo_flags_start */
2657   0, /* todo_flags_finish */
2658 };
2659
2660 class pass_partition_blocks : public rtl_opt_pass
2661 {
2662 public:
2663   pass_partition_blocks (gcc::context *ctxt)
2664     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2665   {}
2666
2667   /* opt_pass methods: */
2668   virtual bool gate (function *);
2669   virtual unsigned int execute (function *);
2670
2671 }; // class pass_partition_blocks
2672
2673 bool
2674 pass_partition_blocks::gate (function *fun)
2675 {
2676   /* The optimization to partition hot/cold basic blocks into separate
2677      sections of the .o file does not work well with linkonce or with
2678      user defined section attributes.  Don't call it if either case
2679      arises.  */
2680   return (flag_reorder_blocks_and_partition
2681           && optimize
2682           /* See gate_handle_reorder_blocks.  We should not partition if
2683              we are going to omit the reordering.  */
2684           && optimize_function_for_speed_p (fun)
2685           && !DECL_COMDAT_GROUP (current_function_decl)
2686           && !user_defined_section_attribute);
2687 }
2688
2689 unsigned
2690 pass_partition_blocks::execute (function *fun)
2691 {
2692   vec<edge> crossing_edges;
2693
2694   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2695     return 0;
2696
2697   df_set_flags (DF_DEFER_INSN_RESCAN);
2698
2699   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2700   if (!crossing_edges.exists ())
2701     return 0;
2702
2703   crtl->has_bb_partition = true;
2704
2705   /* Make sure the source of any crossing edge ends in a jump and the
2706      destination of any crossing edge has a label.  */
2707   add_labels_and_missing_jumps (crossing_edges);
2708
2709   /* Convert all crossing fall_thru edges to non-crossing fall
2710      thrus to unconditional jumps (that jump to the original fall
2711      through dest).  */
2712   fix_up_fall_thru_edges ();
2713
2714   /* If the architecture does not have conditional branches that can
2715      span all of memory, convert crossing conditional branches into
2716      crossing unconditional branches.  */
2717   if (!HAS_LONG_COND_BRANCH)
2718     fix_crossing_conditional_branches ();
2719
2720   /* If the architecture does not have unconditional branches that
2721      can span all of memory, convert crossing unconditional branches
2722      into indirect jumps.  Since adding an indirect jump also adds
2723      a new register usage, update the register usage information as
2724      well.  */
2725   if (!HAS_LONG_UNCOND_BRANCH)
2726     fix_crossing_unconditional_branches ();
2727
2728   update_crossing_jump_flags ();
2729
2730   /* Clear bb->aux fields that the above routines were using.  */
2731   clear_aux_for_blocks ();
2732
2733   crossing_edges.release ();
2734
2735   /* ??? FIXME: DF generates the bb info for a block immediately.
2736      And by immediately, I mean *during* creation of the block.
2737
2738         #0  df_bb_refs_collect
2739         #1  in df_bb_refs_record
2740         #2  in create_basic_block_structure
2741
2742      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2743      will *always* fail, because no edges can have been added to the
2744      block yet.  Which of course means we don't add the right
2745      artificial refs, which means we fail df_verify (much) later.
2746
2747      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2748      that we also shouldn't grab data from the new blocks those new
2749      insns are in either.  In this way one can create the block, link
2750      it up properly, and have everything Just Work later, when deferred
2751      insns are processed.
2752
2753      In the meantime, we have no other option but to throw away all
2754      of the DF data and recompute it all.  */
2755   if (fun->eh->lp_array)
2756     {
2757       df_finish_pass (true);
2758       df_scan_alloc (NULL);
2759       df_scan_blocks ();
2760       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2761          data.  We blindly generated all of them when creating the new
2762          landing pad.  Delete those assignments we don't use.  */
2763       df_set_flags (DF_LR_RUN_DCE);
2764       df_analyze ();
2765     }
2766
2767   return 0;
2768 }
2769
2770 } // anon namespace
2771
2772 rtl_opt_pass *
2773 make_pass_partition_blocks (gcc::context *ctxt)
2774 {
2775   return new pass_partition_blocks (ctxt);
2776 }