gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This (greedy) algorithm constructs traces in several rounds.
  21    The construction starts from "seeds".  The seed for the first round
  22    is the entry point of the function.  When there are more than one seed,
  23    the one with the lowest key in the heap is selected first (see bb_to_key).
  24    Then the algorithm repeatedly adds the most probable successor to the end
  25    of a trace.  Finally it connects the traces.
  26
  27    There are two parameters: Branch Threshold and Exec Threshold.
  28    If the probability of an edge to a successor of the current basic block is
  29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  30    then the successor will be the seed in one of the next rounds.
  31    Each round has these parameters lower than the previous one.
  32    The last round has to have these parameters set to zero so that the
  33    remaining blocks are picked up.
  34
  35    The algorithm selects the most probable successor from all unvisited
  36    successors and successors that have been added to this trace.
  37    The other successors (that has not been "sent" to the next round) will be
  38    other seeds for this round and the secondary traces will start from them.
  39    If the successor has not been visited in this trace, it is added to the
  40    trace (however, there is some heuristic for simple branches).
  41    If the successor has been visited in this trace, a loop has been found.
  42    If the loop has many iterations, the loop is rotated so that the source
  43    block of the most probable edge going out of the loop is the last block
  44    of the trace.
  45    If the loop has few iterations and there is no edge from the last block of
  46    the loop going out of the loop, the loop header is duplicated.
  47
  48    When connecting traces, the algorithm first checks whether there is an edge
  49    from the last block of a trace to the first block of another trace.
  50    When there are still some unconnected traces it checks whether there exists
  51    a basic block BB such that BB is a successor of the last block of a trace
  52    and BB is a predecessor of the first block of another trace.  In this case,
  53    BB is duplicated, added at the end of the first trace and the traces are
  54    connected through it.
  55    The rest of traces are simply connected so there will be a jump to the
  56    beginning of the rest of traces.
  57
  58    The above description is for the full algorithm, which is used when the
  59    function is optimized for speed.  When the function is optimized for size,
  60    in order to reduce long jumps and connect more fallthru edges, the
  61    algorithm is modified as follows:
  62    (1) Break long traces to short ones.  A trace is broken at a block that has
  63    multiple predecessors/ successors during trace discovery.  When connecting
  64    traces, only connect Trace n with Trace n + 1.  This change reduces most
  65    long jumps compared with the above algorithm.
  66    (2) Ignore the edge probability and frequency for fallthru edges.
  67    (3) Keep the original order of blocks when there is no chance to fall
  68    through.  We rely on the results of cfg_cleanup.
  69
  70    To implement the change for code size optimization, block's index is
  71    selected as the key and all traces are found in one round.
  72
  73    References:
  74
  75    "Software Trace Cache"
  76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  77    http://citeseer.nj.nec.com/15361.html
  78
  79 */
  80
  81 #include "config.h"
  82 #include "system.h"
  83 #include "coretypes.h"
  84 #include "tm.h"
  85 #include "tree.h"
  86 #include "rtl.h"
  87 #include "regs.h"
  88 #include "flags.h"
  89 #include "output.h"
  90 #include "fibheap.h"
  91 #include "target.h"
  92 #include "function.h"
  93 #include "tm_p.h"
  94 #include "obstack.h"
  95 #include "expr.h"
  96 #include "params.h"
  97 #include "diagnostic-core.h"
  98 #include "toplev.h" /* user_defined_section_attribute */
  99 #include "tree-pass.h"
 100 #include "df.h"
 101 #include "bb-reorder.h"
 102 #include "except.h"
 103
 104 /* The number of rounds.  In most cases there will only be 4 rounds, but
 105    when partitioning hot and cold basic blocks into separate sections of
 106    the object file there will be an extra round.  */
 107 #define N_ROUNDS 5
 108
 109 /* Stubs in case we don't have a return insn.
 110    We have to check at run time too, not only compile time.  */
 111
 112 #ifndef HAVE_return
 113 #define HAVE_return 0
 114 #define gen_return() NULL_RTX
 115 #endif
 116
 117
 118 struct target_bb_reorder default_target_bb_reorder;
 119 #if SWITCHABLE_TARGET
 120 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 121 #endif
 122
 123 #define uncond_jump_length \
 124   (this_target_bb_reorder->x_uncond_jump_length)
 125
 126 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 127 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 128
 129 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 130 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 131
 132 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 133    block the edge destination is not duplicated while connecting traces.  */
 134 #define DUPLICATION_THRESHOLD 100
 135
 136 /* Structure to hold needed information for each basic block.  */
 137 typedef struct bbro_basic_block_data_def
 138 {
 139   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 140   int start_of_trace;
 141
 142   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 143   int end_of_trace;
 144
 145   /* Which trace is the bb in?  */
 146   int in_trace;
 147
 148   /* Which trace was this bb visited in?  */
 149   int visited;
 150
 151   /* Which heap is BB in (if any)?  */
 152   fibheap_t heap;
 153
 154   /* Which heap node is BB in (if any)?  */
 155   fibnode_t node;
 156 } bbro_basic_block_data;
 157
 158 /* The current size of the following dynamic array.  */
 159 static int array_size;
 160
 161 /* The array which holds needed information for basic blocks.  */
 162 static bbro_basic_block_data *bbd;
 163
 164 /* To avoid frequent reallocation the size of arrays is greater than needed,
 165    the number of elements is (not less than) 1.25 * size_wanted.  */
 166 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 167
 168 /* Free the memory and set the pointer to NULL.  */
 169 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 170
 171 /* Structure for holding information about a trace.  */
 172 struct trace
 173 {
 174   /* First and last basic block of the trace.  */
 175   basic_block first, last;
 176
 177   /* The round of the STC creation which this trace was found in.  */
 178   int round;
 179
 180   /* The length (i.e. the number of basic blocks) of the trace.  */
 181   int length;
 182 };
 183
 184 /* Maximum frequency and count of one of the entry blocks.  */
 185 static int max_entry_frequency;
 186 static gcov_type max_entry_count;
 187
 188 /* Local function prototypes.  */
 189 static void find_traces (int *, struct trace *);
 190 static basic_block rotate_loop (edge, struct trace *, int);
 191 static void mark_bb_visited (basic_block, int);
 192 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 193                                  int, fibheap_t *, int);
 194 static basic_block copy_bb (basic_block, edge, basic_block, int);
 195 static fibheapkey_t bb_to_key (basic_block);
 196 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
 197                            const_edge);
 198 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 199                                    struct trace *);
 200 static void connect_traces (int, struct trace *);
 201 static bool copy_bb_p (const_basic_block, int);
 202 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 203 \f
 204 /* Return the trace number in which BB was visited.  */
 205
 206 static int
 207 bb_visited_trace (const_basic_block bb)
 208 {
 209   gcc_assert (bb->index < array_size);
 210   return bbd[bb->index].visited;
 211 }
 212
 213 /* This function marks BB that it was visited in trace number TRACE.  */
 214
 215 static void
 216 mark_bb_visited (basic_block bb, int trace)
 217 {
 218   bbd[bb->index].visited = trace;
 219   if (bbd[bb->index].heap)
 220     {
 221       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
 222       bbd[bb->index].heap = NULL;
 223       bbd[bb->index].node = NULL;
 224     }
 225 }
 226
 227 /* Check to see if bb should be pushed into the next round of trace
 228    collections or not.  Reasons for pushing the block forward are 1).
 229    If the block is cold, we are doing partitioning, and there will be
 230    another round (cold partition blocks are not supposed to be
 231    collected into traces until the very last round); or 2). There will
 232    be another round, and the basic block is not "hot enough" for the
 233    current round of trace collection.  */
 234
 235 static bool
 236 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 237                       int exec_th, gcov_type count_th)
 238 {
 239   bool there_exists_another_round;
 240   bool block_not_hot_enough;
 241
 242   there_exists_another_round = round < number_of_rounds - 1;
 243
 244   block_not_hot_enough = (bb->frequency < exec_th
 245                           || bb->count < count_th
 246                           || probably_never_executed_bb_p (cfun, bb));
 247
 248   if (there_exists_another_round
 249       && block_not_hot_enough)
 250     return true;
 251   else
 252     return false;
 253 }
 254
 255 /* Find the traces for Software Trace Cache.  Chain each trace through
 256    RBI()->next.  Store the number of traces to N_TRACES and description of
 257    traces to TRACES.  */
 258
 259 static void
 260 find_traces (int *n_traces, struct trace *traces)
 261 {
 262   int i;
 263   int number_of_rounds;
 264   edge e;
 265   edge_iterator ei;
 266   fibheap_t heap;
 267
 268   /* Add one extra round of trace collection when partitioning hot/cold
 269      basic blocks into separate sections.  The last round is for all the
 270      cold blocks (and ONLY the cold blocks).  */
 271
 272   number_of_rounds = N_ROUNDS - 1;
 273
 274   /* Insert entry points of function into heap.  */
 275   heap = fibheap_new ();
 276   max_entry_frequency = 0;
 277   max_entry_count = 0;
 278   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 279     {
 280       bbd[e->dest->index].heap = heap;
 281       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
 282                                                     e->dest);
 283       if (e->dest->frequency > max_entry_frequency)
 284         max_entry_frequency = e->dest->frequency;
 285       if (e->dest->count > max_entry_count)
 286         max_entry_count = e->dest->count;
 287     }
 288
 289   /* Find the traces.  */
 290   for (i = 0; i < number_of_rounds; i++)
 291     {
 292       gcov_type count_threshold;
 293
 294       if (dump_file)
 295         fprintf (dump_file, "STC - round %d\n", i + 1);
 296
 297       if (max_entry_count < INT_MAX / 1000)
 298         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 299       else
 300         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 301
 302       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 303                            max_entry_frequency * exec_threshold[i] / 1000,
 304                            count_threshold, traces, n_traces, i, &heap,
 305                            number_of_rounds);
 306     }
 307   fibheap_delete (heap);
 308
 309   if (dump_file)
 310     {
 311       for (i = 0; i < *n_traces; i++)
 312         {
 313           basic_block bb;
 314           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 315                    traces[i].round + 1);
 316           for (bb = traces[i].first;
 317                bb != traces[i].last;
 318                bb = (basic_block) bb->aux)
 319             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 320           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 321         }
 322       fflush (dump_file);
 323     }
 324 }
 325
 326 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 327    (with sequential number TRACE_N).  */
 328
 329 static basic_block
 330 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 331 {
 332   basic_block bb;
 333
 334   /* Information about the best end (end after rotation) of the loop.  */
 335   basic_block best_bb = NULL;
 336   edge best_edge = NULL;
 337   int best_freq = -1;
 338   gcov_type best_count = -1;
 339   /* The best edge is preferred when its destination is not visited yet
 340      or is a start block of some trace.  */
 341   bool is_preferred = false;
 342
 343   /* Find the most frequent edge that goes out from current trace.  */
 344   bb = back_edge->dest;
 345   do
 346     {
 347       edge e;
 348       edge_iterator ei;
 349
 350       FOR_EACH_EDGE (e, ei, bb->succs)
 351         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 352             && bb_visited_trace (e->dest) != trace_n
 353             && (e->flags & EDGE_CAN_FALLTHRU)
 354             && !(e->flags & EDGE_COMPLEX))
 355         {
 356           if (is_preferred)
 357             {
 358               /* The best edge is preferred.  */
 359               if (!bb_visited_trace (e->dest)
 360                   || bbd[e->dest->index].start_of_trace >= 0)
 361                 {
 362                   /* The current edge E is also preferred.  */
 363                   int freq = EDGE_FREQUENCY (e);
 364                   if (freq > best_freq || e->count > best_count)
 365                     {
 366                       best_freq = freq;
 367                       best_count = e->count;
 368                       best_edge = e;
 369                       best_bb = bb;
 370                     }
 371                 }
 372             }
 373           else
 374             {
 375               if (!bb_visited_trace (e->dest)
 376                   || bbd[e->dest->index].start_of_trace >= 0)
 377                 {
 378                   /* The current edge E is preferred.  */
 379                   is_preferred = true;
 380                   best_freq = EDGE_FREQUENCY (e);
 381                   best_count = e->count;
 382                   best_edge = e;
 383                   best_bb = bb;
 384                 }
 385               else
 386                 {
 387                   int freq = EDGE_FREQUENCY (e);
 388                   if (!best_edge || freq > best_freq || e->count > best_count)
 389                     {
 390                       best_freq = freq;
 391                       best_count = e->count;
 392                       best_edge = e;
 393                       best_bb = bb;
 394                     }
 395                 }
 396             }
 397         }
 398       bb = (basic_block) bb->aux;
 399     }
 400   while (bb != back_edge->dest);
 401
 402   if (best_bb)
 403     {
 404       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 405          the trace.  */
 406       if (back_edge->dest == trace->first)
 407         {
 408           trace->first = (basic_block) best_bb->aux;
 409         }
 410       else
 411         {
 412           basic_block prev_bb;
 413
 414           for (prev_bb = trace->first;
 415                prev_bb->aux != back_edge->dest;
 416                prev_bb = (basic_block) prev_bb->aux)
 417             ;
 418           prev_bb->aux = best_bb->aux;
 419
 420           /* Try to get rid of uncond jump to cond jump.  */
 421           if (single_succ_p (prev_bb))
 422             {
 423               basic_block header = single_succ (prev_bb);
 424
 425               /* Duplicate HEADER if it is a small block containing cond jump
 426                  in the end.  */
 427               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 428                   && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
 429                                      NULL_RTX))
 430                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 431             }
 432         }
 433     }
 434   else
 435     {
 436       /* We have not found suitable loop tail so do no rotation.  */
 437       best_bb = back_edge->src;
 438     }
 439   best_bb->aux = NULL;
 440   return best_bb;
 441 }
 442
 443 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 444    not include basic blocks whose probability is lower than BRANCH_TH or whose
 445    frequency is lower than EXEC_TH into traces (or whose count is lower than
 446    COUNT_TH).  Store the new traces into TRACES and modify the number of
 447    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 448    The function expects starting basic blocks to be in *HEAP and will delete
 449    *HEAP and store starting points for the next round into new *HEAP.  */
 450
 451 static void
 452 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 453                      struct trace *traces, int *n_traces, int round,
 454                      fibheap_t *heap, int number_of_rounds)
 455 {
 456   /* Heap for discarded basic blocks which are possible starting points for
 457      the next round.  */
 458   fibheap_t new_heap = fibheap_new ();
 459   bool for_size = optimize_function_for_size_p (cfun);
 460
 461   while (!fibheap_empty (*heap))
 462     {
 463       basic_block bb;
 464       struct trace *trace;
 465       edge best_edge, e;
 466       fibheapkey_t key;
 467       edge_iterator ei;
 468
 469       bb = (basic_block) fibheap_extract_min (*heap);
 470       bbd[bb->index].heap = NULL;
 471       bbd[bb->index].node = NULL;
 472
 473       if (dump_file)
 474         fprintf (dump_file, "Getting bb %d\n", bb->index);
 475
 476       /* If the BB's frequency is too low, send BB to the next round.  When
 477          partitioning hot/cold blocks into separate sections, make sure all
 478          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 479          round.  When optimizing for size, do not push to next round.  */
 480
 481       if (!for_size
 482           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 483                                    count_th))
 484         {
 485           int key = bb_to_key (bb);
 486           bbd[bb->index].heap = new_heap;
 487           bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
 488
 489           if (dump_file)
 490             fprintf (dump_file,
 491                      "  Possible start point of next round: %d (key: %d)\n",
 492                      bb->index, key);
 493           continue;
 494         }
 495
 496       trace = traces + *n_traces;
 497       trace->first = bb;
 498       trace->round = round;
 499       trace->length = 0;
 500       bbd[bb->index].in_trace = *n_traces;
 501       (*n_traces)++;
 502
 503       do
 504         {
 505           int prob, freq;
 506           bool ends_in_call;
 507
 508           /* The probability and frequency of the best edge.  */
 509           int best_prob = INT_MIN / 2;
 510           int best_freq = INT_MIN / 2;
 511
 512           best_edge = NULL;
 513           mark_bb_visited (bb, *n_traces);
 514           trace->length++;
 515
 516           if (dump_file)
 517             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 518                      bb->index, *n_traces - 1);
 519
 520           ends_in_call = block_ends_with_call_p (bb);
 521
 522           /* Select the successor that will be placed after BB.  */
 523           FOR_EACH_EDGE (e, ei, bb->succs)
 524             {
 525               gcc_assert (!(e->flags & EDGE_FAKE));
 526
 527               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 528                 continue;
 529
 530               if (bb_visited_trace (e->dest)
 531                   && bb_visited_trace (e->dest) != *n_traces)
 532                 continue;
 533
 534               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 535                 continue;
 536
 537               prob = e->probability;
 538               freq = e->dest->frequency;
 539
 540               /* The only sensible preference for a call instruction is the
 541                  fallthru edge.  Don't bother selecting anything else.  */
 542               if (ends_in_call)
 543                 {
 544                   if (e->flags & EDGE_CAN_FALLTHRU)
 545                     {
 546                       best_edge = e;
 547                       best_prob = prob;
 548                       best_freq = freq;
 549                     }
 550                   continue;
 551                 }
 552
 553               /* Edge that cannot be fallthru or improbable or infrequent
 554                  successor (i.e. it is unsuitable successor).  When optimizing
 555                  for size, ignore the probability and frequency.  */
 556               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 557                   || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 558                       || e->count < count_th) && (!for_size)))
 559                 continue;
 560
 561               /* If partitioning hot/cold basic blocks, don't consider edges
 562                  that cross section boundaries.  */
 563
 564               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 565                                  best_edge))
 566                 {
 567                   best_edge = e;
 568                   best_prob = prob;
 569                   best_freq = freq;
 570                 }
 571             }
 572
 573           /* If the best destination has multiple predecessors, and can be
 574              duplicated cheaper than a jump, don't allow it to be added
 575              to a trace.  We'll duplicate it when connecting traces.  */
 576           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 577               && copy_bb_p (best_edge->dest, 0))
 578             best_edge = NULL;
 579
 580           /* If the best destination has multiple successors or predecessors,
 581              don't allow it to be added when optimizing for size.  This makes
 582              sure predecessors with smaller index are handled before the best
 583              destinarion.  It breaks long trace and reduces long jumps.
 584
 585              Take if-then-else as an example.
 586                 A
 587                / \
 588               B   C
 589                \ /
 590                 D
 591              If we do not remove the best edge B->D/C->D, the final order might
 592              be A B D ... C.  C is at the end of the program.  If D's successors
 593              and D are complicated, might need long jumps for A->C and C->D.
 594              Similar issue for order: A C D ... B.
 595
 596              After removing the best edge, the final result will be ABCD/ ACBD.
 597              It does not add jump compared with the previous order.  But it
 598              reduces the possibility of long jumps.  */
 599           if (best_edge && for_size
 600               && (EDGE_COUNT (best_edge->dest->succs) > 1
 601                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 602             best_edge = NULL;
 603
 604           /* Add all non-selected successors to the heaps.  */
 605           FOR_EACH_EDGE (e, ei, bb->succs)
 606             {
 607               if (e == best_edge
 608                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 609                   || bb_visited_trace (e->dest))
 610                 continue;
 611
 612               key = bb_to_key (e->dest);
 613
 614               if (bbd[e->dest->index].heap)
 615                 {
 616                   /* E->DEST is already in some heap.  */
 617                   if (key != bbd[e->dest->index].node->key)
 618                     {
 619                       if (dump_file)
 620                         {
 621                           fprintf (dump_file,
 622                                    "Changing key for bb %d from %ld to %ld.\n",
 623                                    e->dest->index,
 624                                    (long) bbd[e->dest->index].node->key,
 625                                    key);
 626                         }
 627                       fibheap_replace_key (bbd[e->dest->index].heap,
 628                                            bbd[e->dest->index].node, key);
 629                     }
 630                 }
 631               else
 632                 {
 633                   fibheap_t which_heap = *heap;
 634
 635                   prob = e->probability;
 636                   freq = EDGE_FREQUENCY (e);
 637
 638                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 639                       || (e->flags & EDGE_COMPLEX)
 640                       || prob < branch_th || freq < exec_th
 641                       || e->count < count_th)
 642                     {
 643                       /* When partitioning hot/cold basic blocks, make sure
 644                          the cold blocks (and only the cold blocks) all get
 645                          pushed to the last round of trace collection.  When
 646                          optimizing for size, do not push to next round.  */
 647
 648                       if (!for_size && push_to_next_round_p (e->dest, round,
 649                                                              number_of_rounds,
 650                                                              exec_th, count_th))
 651                         which_heap = new_heap;
 652                     }
 653
 654                   bbd[e->dest->index].heap = which_heap;
 655                   bbd[e->dest->index].node = fibheap_insert (which_heap,
 656                                                                 key, e->dest);
 657
 658                   if (dump_file)
 659                     {
 660                       fprintf (dump_file,
 661                                "  Possible start of %s round: %d (key: %ld)\n",
 662                                (which_heap == new_heap) ? "next" : "this",
 663                                e->dest->index, (long) key);
 664                     }
 665
 666                 }
 667             }
 668
 669           if (best_edge) /* Suitable successor was found.  */
 670             {
 671               if (bb_visited_trace (best_edge->dest) == *n_traces)
 672                 {
 673                   /* We do nothing with one basic block loops.  */
 674                   if (best_edge->dest != bb)
 675                     {
 676                       if (EDGE_FREQUENCY (best_edge)
 677                           > 4 * best_edge->dest->frequency / 5)
 678                         {
 679                           /* The loop has at least 4 iterations.  If the loop
 680                              header is not the first block of the function
 681                              we can rotate the loop.  */
 682
 683                           if (best_edge->dest
 684                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 685                             {
 686                               if (dump_file)
 687                                 {
 688                                   fprintf (dump_file,
 689                                            "Rotating loop %d - %d\n",
 690                                            best_edge->dest->index, bb->index);
 691                                 }
 692                               bb->aux = best_edge->dest;
 693                               bbd[best_edge->dest->index].in_trace =
 694                                                              (*n_traces) - 1;
 695                               bb = rotate_loop (best_edge, trace, *n_traces);
 696                             }
 697                         }
 698                       else
 699                         {
 700                           /* The loop has less than 4 iterations.  */
 701
 702                           if (single_succ_p (bb)
 703                               && copy_bb_p (best_edge->dest,
 704                                             optimize_edge_for_speed_p
 705                                             (best_edge)))
 706                             {
 707                               bb = copy_bb (best_edge->dest, best_edge, bb,
 708                                             *n_traces);
 709                               trace->length++;
 710                             }
 711                         }
 712                     }
 713
 714                   /* Terminate the trace.  */
 715                   break;
 716                 }
 717               else
 718                 {
 719                   /* Check for a situation
 720
 721                     A
 722                    /|
 723                   B |
 724                    \|
 725                     C
 726
 727                   where
 728                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 729                     >= EDGE_FREQUENCY (AC).
 730                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 731                   Best ordering is then A B C.
 732
 733                   When optimizing for size, A B C is always the best order.
 734
 735                   This situation is created for example by:
 736
 737                   if (A) B;
 738                   C;
 739
 740                   */
 741
 742                   FOR_EACH_EDGE (e, ei, bb->succs)
 743                     if (e != best_edge
 744                         && (e->flags & EDGE_CAN_FALLTHRU)
 745                         && !(e->flags & EDGE_COMPLEX)
 746                         && !bb_visited_trace (e->dest)
 747                         && single_pred_p (e->dest)
 748                         && !(e->flags & EDGE_CROSSING)
 749                         && single_succ_p (e->dest)
 750                         && (single_succ_edge (e->dest)->flags
 751                             & EDGE_CAN_FALLTHRU)
 752                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 753                         && single_succ (e->dest) == best_edge->dest
 754                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 755                             || for_size))
 756                       {
 757                         best_edge = e;
 758                         if (dump_file)
 759                           fprintf (dump_file, "Selecting BB %d\n",
 760                                    best_edge->dest->index);
 761                         break;
 762                       }
 763
 764                   bb->aux = best_edge->dest;
 765                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 766                   bb = best_edge->dest;
 767                 }
 768             }
 769         }
 770       while (best_edge);
 771       trace->last = bb;
 772       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 773       bbd[trace->last->index].end_of_trace = *n_traces - 1;
 774
 775       /* The trace is terminated so we have to recount the keys in heap
 776          (some block can have a lower key because now one of its predecessors
 777          is an end of the trace).  */
 778       FOR_EACH_EDGE (e, ei, bb->succs)
 779         {
 780           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 781               || bb_visited_trace (e->dest))
 782             continue;
 783
 784           if (bbd[e->dest->index].heap)
 785             {
 786               key = bb_to_key (e->dest);
 787               if (key != bbd[e->dest->index].node->key)
 788                 {
 789                   if (dump_file)
 790                     {
 791                       fprintf (dump_file,
 792                                "Changing key for bb %d from %ld to %ld.\n",
 793                                e->dest->index,
 794                                (long) bbd[e->dest->index].node->key, key);
 795                     }
 796                   fibheap_replace_key (bbd[e->dest->index].heap,
 797                                        bbd[e->dest->index].node,
 798                                        key);
 799                 }
 800             }
 801         }
 802     }
 803
 804   fibheap_delete (*heap);
 805
 806   /* "Return" the new heap.  */
 807   *heap = new_heap;
 808 }
 809
 810 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 811    it to trace after BB, mark OLD_BB visited and update pass' data structures
 812    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 813
 814 static basic_block
 815 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 816 {
 817   basic_block new_bb;
 818
 819   new_bb = duplicate_block (old_bb, e, bb);
 820   BB_COPY_PARTITION (new_bb, old_bb);
 821
 822   gcc_assert (e->dest == new_bb);
 823
 824   if (dump_file)
 825     fprintf (dump_file,
 826              "Duplicated bb %d (created bb %d)\n",
 827              old_bb->index, new_bb->index);
 828
 829   if (new_bb->index >= array_size || last_basic_block > array_size)
 830     {
 831       int i;
 832       int new_size;
 833
 834       new_size = MAX (last_basic_block, new_bb->index + 1);
 835       new_size = GET_ARRAY_SIZE (new_size);
 836       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 837       for (i = array_size; i < new_size; i++)
 838         {
 839           bbd[i].start_of_trace = -1;
 840           bbd[i].end_of_trace = -1;
 841           bbd[i].in_trace = -1;
 842           bbd[i].visited = 0;
 843           bbd[i].heap = NULL;
 844           bbd[i].node = NULL;
 845         }
 846       array_size = new_size;
 847
 848       if (dump_file)
 849         {
 850           fprintf (dump_file,
 851                    "Growing the dynamic array to %d elements.\n",
 852                    array_size);
 853         }
 854     }
 855
 856   gcc_assert (!bb_visited_trace (e->dest));
 857   mark_bb_visited (new_bb, trace);
 858   new_bb->aux = bb->aux;
 859   bb->aux = new_bb;
 860
 861   bbd[new_bb->index].in_trace = trace;
 862
 863   return new_bb;
 864 }
 865
 866 /* Compute and return the key (for the heap) of the basic block BB.  */
 867
 868 static fibheapkey_t
 869 bb_to_key (basic_block bb)
 870 {
 871   edge e;
 872   edge_iterator ei;
 873   int priority = 0;
 874
 875   /* Use index as key to align with its original order.  */
 876   if (optimize_function_for_size_p (cfun))
 877     return bb->index;
 878
 879   /* Do not start in probably never executed blocks.  */
 880
 881   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 882       || probably_never_executed_bb_p (cfun, bb))
 883     return BB_FREQ_MAX;
 884
 885   /* Prefer blocks whose predecessor is an end of some trace
 886      or whose predecessor edge is EDGE_DFS_BACK.  */
 887   FOR_EACH_EDGE (e, ei, bb->preds)
 888     {
 889       if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 890            && bbd[e->src->index].end_of_trace >= 0)
 891           || (e->flags & EDGE_DFS_BACK))
 892         {
 893           int edge_freq = EDGE_FREQUENCY (e);
 894
 895           if (edge_freq > priority)
 896             priority = edge_freq;
 897         }
 898     }
 899
 900   if (priority)
 901     /* The block with priority should have significantly lower key.  */
 902     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 903
 904   return -bb->frequency;
 905 }
 906
 907 /* Return true when the edge E from basic block BB is better than the temporary
 908    best edge (details are in function).  The probability of edge E is PROB. The
 909    frequency of the successor is FREQ.  The current best probability is
 910    BEST_PROB, the best frequency is BEST_FREQ.
 911    The edge is considered to be equivalent when PROB does not differ much from
 912    BEST_PROB; similarly for frequency.  */
 913
 914 static bool
 915 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
 916                int best_prob, int best_freq, const_edge cur_best_edge)
 917 {
 918   bool is_better_edge;
 919
 920   /* The BEST_* values do not have to be best, but can be a bit smaller than
 921      maximum values.  */
 922   int diff_prob = best_prob / 10;
 923   int diff_freq = best_freq / 10;
 924
 925   /* The smaller one is better to keep the original order.  */
 926   if (optimize_function_for_size_p (cfun))
 927     return !cur_best_edge
 928            || cur_best_edge->dest->index > e->dest->index;
 929
 930   if (prob > best_prob + diff_prob)
 931     /* The edge has higher probability than the temporary best edge.  */
 932     is_better_edge = true;
 933   else if (prob < best_prob - diff_prob)
 934     /* The edge has lower probability than the temporary best edge.  */
 935     is_better_edge = false;
 936   else if (freq < best_freq - diff_freq)
 937     /* The edge and the temporary best edge  have almost equivalent
 938        probabilities.  The higher frequency of a successor now means
 939        that there is another edge going into that successor.
 940        This successor has lower frequency so it is better.  */
 941     is_better_edge = true;
 942   else if (freq > best_freq + diff_freq)
 943     /* This successor has higher frequency so it is worse.  */
 944     is_better_edge = false;
 945   else if (e->dest->prev_bb == bb)
 946     /* The edges have equivalent probabilities and the successors
 947        have equivalent frequencies.  Select the previous successor.  */
 948     is_better_edge = true;
 949   else
 950     is_better_edge = false;
 951
 952   /* If we are doing hot/cold partitioning, make sure that we always favor
 953      non-crossing edges over crossing edges.  */
 954
 955   if (!is_better_edge
 956       && flag_reorder_blocks_and_partition
 957       && cur_best_edge
 958       && (cur_best_edge->flags & EDGE_CROSSING)
 959       && !(e->flags & EDGE_CROSSING))
 960     is_better_edge = true;
 961
 962   return is_better_edge;
 963 }
 964
 965 /* Return true when the edge E is better than the temporary best edge
 966    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
 967    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
 968    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
 969    TRACES record the information about traces.
 970    When optimizing for size, the edge with smaller index is better.
 971    When optimizing for speed, the edge with bigger probability or longer trace
 972    is better.  */
 973
 974 static bool
 975 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
 976                        const_edge cur_best_edge, struct trace *traces)
 977 {
 978   int e_index;
 979   int b_index;
 980   bool is_better_edge;
 981
 982   if (!cur_best_edge)
 983     return true;
 984
 985   if (optimize_function_for_size_p (cfun))
 986     {
 987       e_index = src_index_p ? e->src->index : e->dest->index;
 988       b_index = src_index_p ? cur_best_edge->src->index
 989                               : cur_best_edge->dest->index;
 990       /* The smaller one is better to keep the original order.  */
 991       return b_index > e_index;
 992     }
 993
 994   if (src_index_p)
 995     {
 996       e_index = e->src->index;
 997
 998       if (e->probability > cur_best_edge->probability)
 999         /* The edge has higher probability than the temporary best edge.  */
1000         is_better_edge = true;
1001       else if (e->probability < cur_best_edge->probability)
1002         /* The edge has lower probability than the temporary best edge.  */
1003         is_better_edge = false;
1004       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1005         /* The edge and the temporary best edge have equivalent probabilities.
1006            The edge with longer trace is better.  */
1007         is_better_edge = true;
1008       else
1009         is_better_edge = false;
1010     }
1011   else
1012     {
1013       e_index = e->dest->index;
1014
1015       if (e->probability > cur_best_edge->probability)
1016         /* The edge has higher probability than the temporary best edge.  */
1017         is_better_edge = true;
1018       else if (e->probability < cur_best_edge->probability)
1019         /* The edge has lower probability than the temporary best edge.  */
1020         is_better_edge = false;
1021       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1022         /* The edge and the temporary best edge have equivalent probabilities.
1023            The edge with longer trace is better.  */
1024         is_better_edge = true;
1025       else
1026         is_better_edge = false;
1027     }
1028
1029   return is_better_edge;
1030 }
1031
1032 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1033
1034 static void
1035 connect_traces (int n_traces, struct trace *traces)
1036 {
1037   int i;
1038   bool *connected;
1039   bool two_passes;
1040   int last_trace;
1041   int current_pass;
1042   int current_partition;
1043   int freq_threshold;
1044   gcov_type count_threshold;
1045   bool for_size = optimize_function_for_size_p (cfun);
1046
1047   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1048   if (max_entry_count < INT_MAX / 1000)
1049     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1050   else
1051     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1052
1053   connected = XCNEWVEC (bool, n_traces);
1054   last_trace = -1;
1055   current_pass = 1;
1056   current_partition = BB_PARTITION (traces[0].first);
1057   two_passes = false;
1058
1059   if (crtl->has_bb_partition)
1060     for (i = 0; i < n_traces && !two_passes; i++)
1061       if (BB_PARTITION (traces[0].first)
1062           != BB_PARTITION (traces[i].first))
1063         two_passes = true;
1064
1065   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1066     {
1067       int t = i;
1068       int t2;
1069       edge e, best;
1070       int best_len;
1071
1072       if (i >= n_traces)
1073         {
1074           gcc_assert (two_passes && current_pass == 1);
1075           i = 0;
1076           t = i;
1077           current_pass = 2;
1078           if (current_partition == BB_HOT_PARTITION)
1079             current_partition = BB_COLD_PARTITION;
1080           else
1081             current_partition = BB_HOT_PARTITION;
1082         }
1083
1084       if (connected[t])
1085         continue;
1086
1087       if (two_passes
1088           && BB_PARTITION (traces[t].first) != current_partition)
1089         continue;
1090
1091       connected[t] = true;
1092
1093       /* Find the predecessor traces.  */
1094       for (t2 = t; t2 > 0;)
1095         {
1096           edge_iterator ei;
1097           best = NULL;
1098           best_len = 0;
1099           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1100             {
1101               int si = e->src->index;
1102
1103               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1104                   && (e->flags & EDGE_CAN_FALLTHRU)
1105                   && !(e->flags & EDGE_COMPLEX)
1106                   && bbd[si].end_of_trace >= 0
1107                   && !connected[bbd[si].end_of_trace]
1108                   && (BB_PARTITION (e->src) == current_partition)
1109                   && connect_better_edge_p (e, true, best_len, best, traces))
1110                 {
1111                   best = e;
1112                   best_len = traces[bbd[si].end_of_trace].length;
1113                 }
1114             }
1115           if (best)
1116             {
1117               best->src->aux = best->dest;
1118               t2 = bbd[best->src->index].end_of_trace;
1119               connected[t2] = true;
1120
1121               if (dump_file)
1122                 {
1123                   fprintf (dump_file, "Connection: %d %d\n",
1124                            best->src->index, best->dest->index);
1125                 }
1126             }
1127           else
1128             break;
1129         }
1130
1131       if (last_trace >= 0)
1132         traces[last_trace].last->aux = traces[t2].first;
1133       last_trace = t;
1134
1135       /* Find the successor traces.  */
1136       while (1)
1137         {
1138           /* Find the continuation of the chain.  */
1139           edge_iterator ei;
1140           best = NULL;
1141           best_len = 0;
1142           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1143             {
1144               int di = e->dest->index;
1145
1146               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1147                   && (e->flags & EDGE_CAN_FALLTHRU)
1148                   && !(e->flags & EDGE_COMPLEX)
1149                   && bbd[di].start_of_trace >= 0
1150                   && !connected[bbd[di].start_of_trace]
1151                   && (BB_PARTITION (e->dest) == current_partition)
1152                   && connect_better_edge_p (e, false, best_len, best, traces))
1153                 {
1154                   best = e;
1155                   best_len = traces[bbd[di].start_of_trace].length;
1156                 }
1157             }
1158
1159           if (for_size)
1160             {
1161               if (!best)
1162                 /* Stop finding the successor traces.  */
1163                 break;
1164
1165               /* It is OK to connect block n with block n + 1 or a block
1166                  before n.  For others, only connect to the loop header.  */
1167               if (best->dest->index > (traces[t].last->index + 1))
1168                 {
1169                   int count = EDGE_COUNT (best->dest->preds);
1170
1171                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1172                     if (e->flags & EDGE_DFS_BACK)
1173                       count--;
1174
1175                   /* If dest has multiple predecessors, skip it.  We expect
1176                      that one predecessor with smaller index connects with it
1177                      later.  */
1178                   if (count != 1)
1179                     break;
1180                 }
1181
1182               /* Only connect Trace n with Trace n + 1.  It is conservative
1183                  to keep the order as close as possible to the original order.
1184                  It also helps to reduce long jumps.  */
1185               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1186                 break;
1187
1188               if (dump_file)
1189                 fprintf (dump_file, "Connection: %d %d\n",
1190                          best->src->index, best->dest->index);
1191
1192               t = bbd[best->dest->index].start_of_trace;
1193               traces[last_trace].last->aux = traces[t].first;
1194               connected[t] = true;
1195               last_trace = t;
1196             }
1197           else if (best)
1198             {
1199               if (dump_file)
1200                 {
1201                   fprintf (dump_file, "Connection: %d %d\n",
1202                            best->src->index, best->dest->index);
1203                 }
1204               t = bbd[best->dest->index].start_of_trace;
1205               traces[last_trace].last->aux = traces[t].first;
1206               connected[t] = true;
1207               last_trace = t;
1208             }
1209           else
1210             {
1211               /* Try to connect the traces by duplication of 1 block.  */
1212               edge e2;
1213               basic_block next_bb = NULL;
1214               bool try_copy = false;
1215
1216               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1217                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1218                     && (e->flags & EDGE_CAN_FALLTHRU)
1219                     && !(e->flags & EDGE_COMPLEX)
1220                     && (!best || e->probability > best->probability))
1221                   {
1222                     edge_iterator ei;
1223                     edge best2 = NULL;
1224                     int best2_len = 0;
1225
1226                     /* If the destination is a start of a trace which is only
1227                        one block long, then no need to search the successor
1228                        blocks of the trace.  Accept it.  */
1229                     if (bbd[e->dest->index].start_of_trace >= 0
1230                         && traces[bbd[e->dest->index].start_of_trace].length
1231                            == 1)
1232                       {
1233                         best = e;
1234                         try_copy = true;
1235                         continue;
1236                       }
1237
1238                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1239                       {
1240                         int di = e2->dest->index;
1241
1242                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1243                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1244                                 && !(e2->flags & EDGE_COMPLEX)
1245                                 && bbd[di].start_of_trace >= 0
1246                                 && !connected[bbd[di].start_of_trace]
1247                                 && BB_PARTITION (e2->dest) == current_partition
1248                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1249                                 && e2->count >= count_threshold
1250                                 && (!best2
1251                                     || e2->probability > best2->probability
1252                                     || (e2->probability == best2->probability
1253                                         && traces[bbd[di].start_of_trace].length
1254                                            > best2_len))))
1255                           {
1256                             best = e;
1257                             best2 = e2;
1258                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1259                               best2_len = traces[bbd[di].start_of_trace].length;
1260                             else
1261                               best2_len = INT_MAX;
1262                             next_bb = e2->dest;
1263                             try_copy = true;
1264                           }
1265                       }
1266                   }
1267
1268               if (crtl->has_bb_partition)
1269                 try_copy = false;
1270
1271               /* Copy tiny blocks always; copy larger blocks only when the
1272                  edge is traversed frequently enough.  */
1273               if (try_copy
1274                   && copy_bb_p (best->dest,
1275                                 optimize_edge_for_speed_p (best)
1276                                 && EDGE_FREQUENCY (best) >= freq_threshold
1277                                 && best->count >= count_threshold))
1278                 {
1279                   basic_block new_bb;
1280
1281                   if (dump_file)
1282                     {
1283                       fprintf (dump_file, "Connection: %d %d ",
1284                                traces[t].last->index, best->dest->index);
1285                       if (!next_bb)
1286                         fputc ('\n', dump_file);
1287                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1288                         fprintf (dump_file, "exit\n");
1289                       else
1290                         fprintf (dump_file, "%d\n", next_bb->index);
1291                     }
1292
1293                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1294                   traces[t].last = new_bb;
1295                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1296                     {
1297                       t = bbd[next_bb->index].start_of_trace;
1298                       traces[last_trace].last->aux = traces[t].first;
1299                       connected[t] = true;
1300                       last_trace = t;
1301                     }
1302                   else
1303                     break;      /* Stop finding the successor traces.  */
1304                 }
1305               else
1306                 break;  /* Stop finding the successor traces.  */
1307             }
1308         }
1309     }
1310
1311   if (dump_file)
1312     {
1313       basic_block bb;
1314
1315       fprintf (dump_file, "Final order:\n");
1316       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1317         fprintf (dump_file, "%d ", bb->index);
1318       fprintf (dump_file, "\n");
1319       fflush (dump_file);
1320     }
1321
1322   FREE (connected);
1323 }
1324
1325 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1326    when code size is allowed to grow by duplication.  */
1327
1328 static bool
1329 copy_bb_p (const_basic_block bb, int code_may_grow)
1330 {
1331   int size = 0;
1332   int max_size = uncond_jump_length;
1333   rtx insn;
1334
1335   if (!bb->frequency)
1336     return false;
1337   if (EDGE_COUNT (bb->preds) < 2)
1338     return false;
1339   if (!can_duplicate_block_p (bb))
1340     return false;
1341
1342   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1343   if (EDGE_COUNT (bb->succs) > 8)
1344     return false;
1345
1346   if (code_may_grow && optimize_bb_for_speed_p (bb))
1347     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1348
1349   FOR_BB_INSNS (bb, insn)
1350     {
1351       if (INSN_P (insn))
1352         size += get_attr_min_length (insn);
1353     }
1354
1355   if (size <= max_size)
1356     return true;
1357
1358   if (dump_file)
1359     {
1360       fprintf (dump_file,
1361                "Block %d can't be copied because its size = %d.\n",
1362                bb->index, size);
1363     }
1364
1365   return false;
1366 }
1367
1368 /* Return the length of unconditional jump instruction.  */
1369
1370 int
1371 get_uncond_jump_length (void)
1372 {
1373   rtx label, jump;
1374   int length;
1375
1376   label = emit_label_before (gen_label_rtx (), get_insns ());
1377   jump = emit_jump_insn (gen_jump (label));
1378
1379   length = get_attr_min_length (jump);
1380
1381   delete_insn (jump);
1382   delete_insn (label);
1383   return length;
1384 }
1385
1386 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1387    Duplicate the landing pad and split the edges so that no EH edge
1388    crosses partitions.  */
1389
1390 static void
1391 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1392 {
1393   eh_landing_pad new_lp;
1394   basic_block new_bb, last_bb, post_bb;
1395   rtx new_label, jump, post_label;
1396   unsigned new_partition;
1397   edge_iterator ei;
1398   edge e;
1399
1400   /* Generate the new landing-pad structure.  */
1401   new_lp = gen_eh_landing_pad (old_lp->region);
1402   new_lp->post_landing_pad = old_lp->post_landing_pad;
1403   new_lp->landing_pad = gen_label_rtx ();
1404   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1405
1406   /* Put appropriate instructions in new bb.  */
1407   new_label = emit_label (new_lp->landing_pad);
1408
1409   expand_dw2_landing_pad_for_region (old_lp->region);
1410
1411   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1412   post_bb = single_succ (post_bb);
1413   post_label = block_label (post_bb);
1414   jump = emit_jump_insn (gen_jump (post_label));
1415   JUMP_LABEL (jump) = post_label;
1416
1417   /* Create new basic block to be dest for lp.  */
1418   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1419   new_bb = create_basic_block (new_label, jump, last_bb);
1420   new_bb->aux = last_bb->aux;
1421   last_bb->aux = new_bb;
1422
1423   emit_barrier_after_bb (new_bb);
1424
1425   make_edge (new_bb, post_bb, 0);
1426
1427   /* Make sure new bb is in the other partition.  */
1428   new_partition = BB_PARTITION (old_bb);
1429   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1430   BB_SET_PARTITION (new_bb, new_partition);
1431
1432   /* Fix up the edges.  */
1433   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1434     if (BB_PARTITION (e->src) == new_partition)
1435       {
1436         rtx insn = BB_END (e->src);
1437         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1438
1439         gcc_assert (note != NULL);
1440         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1441         XEXP (note, 0) = GEN_INT (new_lp->index);
1442
1443         /* Adjust the edge to the new destination.  */
1444         redirect_edge_succ (e, new_bb);
1445       }
1446     else
1447       ei_next (&ei);
1448 }
1449
1450
1451 /* Ensure that all hot bbs are included in a hot path through the
1452    procedure. This is done by calling this function twice, once
1453    with WALK_UP true (to look for paths from the entry to hot bbs) and
1454    once with WALK_UP false (to look for paths from hot bbs to the exit).
1455    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1456    to BBS_IN_HOT_PARTITION.  */
1457
1458 static unsigned int
1459 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1460                     vec<basic_block> *bbs_in_hot_partition)
1461 {
1462   /* Callers check this.  */
1463   gcc_checking_assert (cold_bb_count);
1464
1465   /* Keep examining hot bbs while we still have some left to check
1466      and there are remaining cold bbs.  */
1467   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1468   while (! hot_bbs_to_check.is_empty ()
1469          && cold_bb_count)
1470     {
1471       basic_block bb = hot_bbs_to_check.pop ();
1472       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1473       edge e;
1474       edge_iterator ei;
1475       int highest_probability = 0;
1476       int highest_freq = 0;
1477       gcov_type highest_count = 0;
1478       bool found = false;
1479
1480       /* Walk the preds/succs and check if there is at least one already
1481          marked hot. Keep track of the most frequent pred/succ so that we
1482          can mark it hot if we don't find one.  */
1483       FOR_EACH_EDGE (e, ei, edges)
1484         {
1485           basic_block reach_bb = walk_up ? e->src : e->dest;
1486
1487           if (e->flags & EDGE_DFS_BACK)
1488             continue;
1489
1490           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1491           {
1492             found = true;
1493             break;
1494           }
1495           /* The following loop will look for the hottest edge via
1496              the edge count, if it is non-zero, then fallback to the edge
1497              frequency and finally the edge probability.  */
1498           if (e->count > highest_count)
1499             highest_count = e->count;
1500           int edge_freq = EDGE_FREQUENCY (e);
1501           if (edge_freq > highest_freq)
1502             highest_freq = edge_freq;
1503           if (e->probability > highest_probability)
1504             highest_probability = e->probability;
1505         }
1506
1507       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1508          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1509          then the most frequent pred (or succ) needs to be adjusted.  In the
1510          case where multiple preds/succs have the same frequency (e.g. a
1511          50-50 branch), then both will be adjusted.  */
1512       if (found)
1513         continue;
1514
1515       FOR_EACH_EDGE (e, ei, edges)
1516         {
1517           if (e->flags & EDGE_DFS_BACK)
1518             continue;
1519           /* Select the hottest edge using the edge count, if it is non-zero,
1520              then fallback to the edge frequency and finally the edge
1521              probability.  */
1522           if (highest_count)
1523             {
1524               if (e->count < highest_count)
1525                 continue;
1526             }
1527           else if (highest_freq)
1528             {
1529               if (EDGE_FREQUENCY (e) < highest_freq)
1530                 continue;
1531             }
1532           else if (e->probability < highest_probability)
1533             continue;
1534
1535           basic_block reach_bb = walk_up ? e->src : e->dest;
1536
1537           /* We have a hot bb with an immediate dominator that is cold.
1538              The dominator needs to be re-marked hot.  */
1539           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1540           cold_bb_count--;
1541
1542           /* Now we need to examine newly-hot reach_bb to see if it is also
1543              dominated by a cold bb.  */
1544           bbs_in_hot_partition->safe_push (reach_bb);
1545           hot_bbs_to_check.safe_push (reach_bb);
1546         }
1547     }
1548
1549   return cold_bb_count;
1550 }
1551
1552
1553 /* Find the basic blocks that are rarely executed and need to be moved to
1554    a separate section of the .o file (to cut down on paging and improve
1555    cache locality).  Return a vector of all edges that cross.  */
1556
1557 static vec<edge>
1558 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1559 {
1560   vec<edge> crossing_edges = vNULL;
1561   basic_block bb;
1562   edge e;
1563   edge_iterator ei;
1564   unsigned int cold_bb_count = 0;
1565   vec<basic_block> bbs_in_hot_partition = vNULL;
1566
1567   /* Mark which partition (hot/cold) each basic block belongs in.  */
1568   FOR_EACH_BB (bb)
1569     {
1570       bool cold_bb = false;
1571
1572       if (probably_never_executed_bb_p (cfun, bb))
1573         {
1574           /* Handle profile insanities created by upstream optimizations
1575              by also checking the incoming edge weights. If there is a non-cold
1576              incoming edge, conservatively prevent this block from being split
1577              into the cold section.  */
1578           cold_bb = true;
1579           FOR_EACH_EDGE (e, ei, bb->preds)
1580             if (!probably_never_executed_edge_p (cfun, e))
1581               {
1582                 cold_bb = false;
1583                 break;
1584               }
1585         }
1586       if (cold_bb)
1587         {
1588           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1589           cold_bb_count++;
1590         }
1591       else
1592         {
1593           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1594           bbs_in_hot_partition.safe_push (bb);
1595         }
1596     }
1597
1598   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1599      Several different possibilities may include cold bbs along all paths
1600      to/from a hot bb. One is that there are edge weight insanities
1601      due to optimization phases that do not properly update basic block profile
1602      counts. The second is that the entry of the function may not be hot, because
1603      it is entered fewer times than the number of profile training runs, but there
1604      is a loop inside the function that causes blocks within the function to be
1605      above the threshold for hotness. This is fixed by walking up from hot bbs
1606      to the entry block, and then down from hot bbs to the exit, performing
1607      partitioning fixups as necessary.  */
1608   if (cold_bb_count)
1609     {
1610       mark_dfs_back_edges ();
1611       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1612                                           &bbs_in_hot_partition);
1613       if (cold_bb_count)
1614         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1615     }
1616
1617   /* The format of .gcc_except_table does not allow landing pads to
1618      be in a different partition as the throw.  Fix this by either
1619      moving or duplicating the landing pads.  */
1620   if (cfun->eh->lp_array)
1621     {
1622       unsigned i;
1623       eh_landing_pad lp;
1624
1625       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1626         {
1627           bool all_same, all_diff;
1628
1629           if (lp == NULL
1630               || lp->landing_pad == NULL_RTX
1631               || !LABEL_P (lp->landing_pad))
1632             continue;
1633
1634           all_same = all_diff = true;
1635           bb = BLOCK_FOR_INSN (lp->landing_pad);
1636           FOR_EACH_EDGE (e, ei, bb->preds)
1637             {
1638               gcc_assert (e->flags & EDGE_EH);
1639               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1640                 all_diff = false;
1641               else
1642                 all_same = false;
1643             }
1644
1645           if (all_same)
1646             ;
1647           else if (all_diff)
1648             {
1649               int which = BB_PARTITION (bb);
1650               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1651               BB_SET_PARTITION (bb, which);
1652             }
1653           else
1654             fix_up_crossing_landing_pad (lp, bb);
1655         }
1656     }
1657
1658   /* Mark every edge that crosses between sections.  */
1659
1660   FOR_EACH_BB (bb)
1661     FOR_EACH_EDGE (e, ei, bb->succs)
1662       {
1663         unsigned int flags = e->flags;
1664
1665         /* We should never have EDGE_CROSSING set yet.  */
1666         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1667
1668         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1669             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1670             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1671           {
1672             crossing_edges.safe_push (e);
1673             flags |= EDGE_CROSSING;
1674           }
1675
1676         /* Now that we've split eh edges as appropriate, allow landing pads
1677            to be merged with the post-landing pads.  */
1678         flags &= ~EDGE_PRESERVE;
1679
1680         e->flags = flags;
1681       }
1682
1683   return crossing_edges;
1684 }
1685
1686 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1687
1688 static void
1689 set_edge_can_fallthru_flag (void)
1690 {
1691   basic_block bb;
1692
1693   FOR_EACH_BB (bb)
1694     {
1695       edge e;
1696       edge_iterator ei;
1697
1698       FOR_EACH_EDGE (e, ei, bb->succs)
1699         {
1700           e->flags &= ~EDGE_CAN_FALLTHRU;
1701
1702           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1703           if (e->flags & EDGE_FALLTHRU)
1704             e->flags |= EDGE_CAN_FALLTHRU;
1705         }
1706
1707       /* If the BB ends with an invertible condjump all (2) edges are
1708          CAN_FALLTHRU edges.  */
1709       if (EDGE_COUNT (bb->succs) != 2)
1710         continue;
1711       if (!any_condjump_p (BB_END (bb)))
1712         continue;
1713       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1714         continue;
1715       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1716       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1717       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1718     }
1719 }
1720
1721 /* If any destination of a crossing edge does not have a label, add label;
1722    Convert any easy fall-through crossing edges to unconditional jumps.  */
1723
1724 static void
1725 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1726 {
1727   size_t i;
1728   edge e;
1729
1730   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1731     {
1732       basic_block src = e->src;
1733       basic_block dest = e->dest;
1734       rtx label, new_jump;
1735
1736       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1737         continue;
1738
1739       /* Make sure dest has a label.  */
1740       label = block_label (dest);
1741
1742       /* Nothing to do for non-fallthru edges.  */
1743       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1744         continue;
1745       if ((e->flags & EDGE_FALLTHRU) == 0)
1746         continue;
1747
1748       /* If the block does not end with a control flow insn, then we
1749          can trivially add a jump to the end to fixup the crossing.
1750          Otherwise the jump will have to go in a new bb, which will
1751          be handled by fix_up_fall_thru_edges function.  */
1752       if (control_flow_insn_p (BB_END (src)))
1753         continue;
1754
1755       /* Make sure there's only one successor.  */
1756       gcc_assert (single_succ_p (src));
1757
1758       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1759       BB_END (src) = new_jump;
1760       JUMP_LABEL (new_jump) = label;
1761       LABEL_NUSES (label) += 1;
1762
1763       emit_barrier_after_bb (src);
1764
1765       /* Mark edge as non-fallthru.  */
1766       e->flags &= ~EDGE_FALLTHRU;
1767     }
1768 }
1769
1770 /* Find any bb's where the fall-through edge is a crossing edge (note that
1771    these bb's must also contain a conditional jump or end with a call
1772    instruction; we've already dealt with fall-through edges for blocks
1773    that didn't have a conditional jump or didn't end with call instruction
1774    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1775    edge to non-crossing edge by inserting a new bb to fall-through into.
1776    The new bb will contain an unconditional jump (crossing edge) to the
1777    original fall through destination.  */
1778
1779 static void
1780 fix_up_fall_thru_edges (void)
1781 {
1782   basic_block cur_bb;
1783   basic_block new_bb;
1784   edge succ1;
1785   edge succ2;
1786   edge fall_thru;
1787   edge cond_jump = NULL;
1788   edge e;
1789   bool cond_jump_crosses;
1790   int invert_worked;
1791   rtx old_jump;
1792   rtx fall_thru_label;
1793
1794   FOR_EACH_BB (cur_bb)
1795     {
1796       fall_thru = NULL;
1797       if (EDGE_COUNT (cur_bb->succs) > 0)
1798         succ1 = EDGE_SUCC (cur_bb, 0);
1799       else
1800         succ1 = NULL;
1801
1802       if (EDGE_COUNT (cur_bb->succs) > 1)
1803         succ2 = EDGE_SUCC (cur_bb, 1);
1804       else
1805         succ2 = NULL;
1806
1807       /* Find the fall-through edge.  */
1808
1809       if (succ1
1810           && (succ1->flags & EDGE_FALLTHRU))
1811         {
1812           fall_thru = succ1;
1813           cond_jump = succ2;
1814         }
1815       else if (succ2
1816                && (succ2->flags & EDGE_FALLTHRU))
1817         {
1818           fall_thru = succ2;
1819           cond_jump = succ1;
1820         }
1821       else if (succ1
1822                && (block_ends_with_call_p (cur_bb)
1823                    || can_throw_internal (BB_END (cur_bb))))
1824         {
1825           edge e;
1826           edge_iterator ei;
1827
1828           /* Find EDGE_CAN_FALLTHRU edge.  */
1829           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1830             if (e->flags & EDGE_CAN_FALLTHRU)
1831               {
1832                 fall_thru = e;
1833                 break;
1834               }
1835         }
1836
1837       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1838         {
1839           /* Check to see if the fall-thru edge is a crossing edge.  */
1840
1841           if (fall_thru->flags & EDGE_CROSSING)
1842             {
1843               /* The fall_thru edge crosses; now check the cond jump edge, if
1844                  it exists.  */
1845
1846               cond_jump_crosses = true;
1847               invert_worked  = 0;
1848               old_jump = BB_END (cur_bb);
1849
1850               /* Find the jump instruction, if there is one.  */
1851
1852               if (cond_jump)
1853                 {
1854                   if (!(cond_jump->flags & EDGE_CROSSING))
1855                     cond_jump_crosses = false;
1856
1857                   /* We know the fall-thru edge crosses; if the cond
1858                      jump edge does NOT cross, and its destination is the
1859                      next block in the bb order, invert the jump
1860                      (i.e. fix it so the fall through does not cross and
1861                      the cond jump does).  */
1862
1863                   if (!cond_jump_crosses)
1864                     {
1865                       /* Find label in fall_thru block. We've already added
1866                          any missing labels, so there must be one.  */
1867
1868                       fall_thru_label = block_label (fall_thru->dest);
1869
1870                       if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1871                         invert_worked = invert_jump (old_jump,
1872                                                      fall_thru_label,0);
1873                       if (invert_worked)
1874                         {
1875                           fall_thru->flags &= ~EDGE_FALLTHRU;
1876                           cond_jump->flags |= EDGE_FALLTHRU;
1877                           update_br_prob_note (cur_bb);
1878                           e = fall_thru;
1879                           fall_thru = cond_jump;
1880                           cond_jump = e;
1881                           cond_jump->flags |= EDGE_CROSSING;
1882                           fall_thru->flags &= ~EDGE_CROSSING;
1883                         }
1884                     }
1885                 }
1886
1887               if (cond_jump_crosses || !invert_worked)
1888                 {
1889                   /* This is the case where both edges out of the basic
1890                      block are crossing edges. Here we will fix up the
1891                      fall through edge. The jump edge will be taken care
1892                      of later.  The EDGE_CROSSING flag of fall_thru edge
1893                      is unset before the call to force_nonfallthru
1894                      function because if a new basic-block is created
1895                      this edge remains in the current section boundary
1896                      while the edge between new_bb and the fall_thru->dest
1897                      becomes EDGE_CROSSING.  */
1898
1899                   fall_thru->flags &= ~EDGE_CROSSING;
1900                   new_bb = force_nonfallthru (fall_thru);
1901
1902                   if (new_bb)
1903                     {
1904                       new_bb->aux = cur_bb->aux;
1905                       cur_bb->aux = new_bb;
1906
1907                       /* This is done by force_nonfallthru_and_redirect.  */
1908                       gcc_assert (BB_PARTITION (new_bb)
1909                                   == BB_PARTITION (cur_bb));
1910
1911                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1912                     }
1913                   else
1914                     {
1915                       /* If a new basic-block was not created; restore
1916                          the EDGE_CROSSING flag.  */
1917                       fall_thru->flags |= EDGE_CROSSING;
1918                     }
1919
1920                   /* Add barrier after new jump */
1921                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1922                 }
1923             }
1924         }
1925     }
1926 }
1927
1928 /* This function checks the destination block of a "crossing jump" to
1929    see if it has any crossing predecessors that begin with a code label
1930    and end with an unconditional jump.  If so, it returns that predecessor
1931    block.  (This is to avoid creating lots of new basic blocks that all
1932    contain unconditional jumps to the same destination).  */
1933
1934 static basic_block
1935 find_jump_block (basic_block jump_dest)
1936 {
1937   basic_block source_bb = NULL;
1938   edge e;
1939   rtx insn;
1940   edge_iterator ei;
1941
1942   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1943     if (e->flags & EDGE_CROSSING)
1944       {
1945         basic_block src = e->src;
1946
1947         /* Check each predecessor to see if it has a label, and contains
1948            only one executable instruction, which is an unconditional jump.
1949            If so, we can use it.  */
1950
1951         if (LABEL_P (BB_HEAD (src)))
1952           for (insn = BB_HEAD (src);
1953                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1954                insn = NEXT_INSN (insn))
1955             {
1956               if (INSN_P (insn)
1957                   && insn == BB_END (src)
1958                   && JUMP_P (insn)
1959                   && !any_condjump_p (insn))
1960                 {
1961                   source_bb = src;
1962                   break;
1963                 }
1964             }
1965
1966         if (source_bb)
1967           break;
1968       }
1969
1970   return source_bb;
1971 }
1972
1973 /* Find all BB's with conditional jumps that are crossing edges;
1974    insert a new bb and make the conditional jump branch to the new
1975    bb instead (make the new bb same color so conditional branch won't
1976    be a 'crossing' edge).  Insert an unconditional jump from the
1977    new bb to the original destination of the conditional jump.  */
1978
1979 static void
1980 fix_crossing_conditional_branches (void)
1981 {
1982   basic_block cur_bb;
1983   basic_block new_bb;
1984   basic_block dest;
1985   edge succ1;
1986   edge succ2;
1987   edge crossing_edge;
1988   edge new_edge;
1989   rtx old_jump;
1990   rtx set_src;
1991   rtx old_label = NULL_RTX;
1992   rtx new_label;
1993
1994   FOR_EACH_BB (cur_bb)
1995     {
1996       crossing_edge = NULL;
1997       if (EDGE_COUNT (cur_bb->succs) > 0)
1998         succ1 = EDGE_SUCC (cur_bb, 0);
1999       else
2000         succ1 = NULL;
2001
2002       if (EDGE_COUNT (cur_bb->succs) > 1)
2003         succ2 = EDGE_SUCC (cur_bb, 1);
2004       else
2005         succ2 = NULL;
2006
2007       /* We already took care of fall-through edges, so only one successor
2008          can be a crossing edge.  */
2009
2010       if (succ1 && (succ1->flags & EDGE_CROSSING))
2011         crossing_edge = succ1;
2012       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2013         crossing_edge = succ2;
2014
2015       if (crossing_edge)
2016         {
2017           old_jump = BB_END (cur_bb);
2018
2019           /* Check to make sure the jump instruction is a
2020              conditional jump.  */
2021
2022           set_src = NULL_RTX;
2023
2024           if (any_condjump_p (old_jump))
2025             {
2026               if (GET_CODE (PATTERN (old_jump)) == SET)
2027                 set_src = SET_SRC (PATTERN (old_jump));
2028               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2029                 {
2030                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2031                   if (GET_CODE (set_src) == SET)
2032                     set_src = SET_SRC (set_src);
2033                   else
2034                     set_src = NULL_RTX;
2035                 }
2036             }
2037
2038           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2039             {
2040               if (GET_CODE (XEXP (set_src, 1)) == PC)
2041                 old_label = XEXP (set_src, 2);
2042               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2043                 old_label = XEXP (set_src, 1);
2044
2045               /* Check to see if new bb for jumping to that dest has
2046                  already been created; if so, use it; if not, create
2047                  a new one.  */
2048
2049               new_bb = find_jump_block (crossing_edge->dest);
2050
2051               if (new_bb)
2052                 new_label = block_label (new_bb);
2053               else
2054                 {
2055                   basic_block last_bb;
2056                   rtx new_jump;
2057
2058                   /* Create new basic block to be dest for
2059                      conditional jump.  */
2060
2061                   /* Put appropriate instructions in new bb.  */
2062
2063                   new_label = gen_label_rtx ();
2064                   emit_label (new_label);
2065
2066                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2067                   old_label = JUMP_LABEL (old_jump);
2068                   new_jump = emit_jump_insn (gen_jump (old_label));
2069                   JUMP_LABEL (new_jump) = old_label;
2070
2071                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2072                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2073                   new_bb->aux = last_bb->aux;
2074                   last_bb->aux = new_bb;
2075
2076                   emit_barrier_after_bb (new_bb);
2077
2078                   /* Make sure new bb is in same partition as source
2079                      of conditional branch.  */
2080                   BB_COPY_PARTITION (new_bb, cur_bb);
2081                 }
2082
2083               /* Make old jump branch to new bb.  */
2084
2085               redirect_jump (old_jump, new_label, 0);
2086
2087               /* Remove crossing_edge as predecessor of 'dest'.  */
2088
2089               dest = crossing_edge->dest;
2090
2091               redirect_edge_succ (crossing_edge, new_bb);
2092
2093               /* Make a new edge from new_bb to old dest; new edge
2094                  will be a successor for new_bb and a predecessor
2095                  for 'dest'.  */
2096
2097               if (EDGE_COUNT (new_bb->succs) == 0)
2098                 new_edge = make_edge (new_bb, dest, 0);
2099               else
2100                 new_edge = EDGE_SUCC (new_bb, 0);
2101
2102               crossing_edge->flags &= ~EDGE_CROSSING;
2103               new_edge->flags |= EDGE_CROSSING;
2104             }
2105         }
2106     }
2107 }
2108
2109 /* Find any unconditional branches that cross between hot and cold
2110    sections.  Convert them into indirect jumps instead.  */
2111
2112 static void
2113 fix_crossing_unconditional_branches (void)
2114 {
2115   basic_block cur_bb;
2116   rtx last_insn;
2117   rtx label;
2118   rtx label_addr;
2119   rtx indirect_jump_sequence;
2120   rtx jump_insn = NULL_RTX;
2121   rtx new_reg;
2122   rtx cur_insn;
2123   edge succ;
2124
2125   FOR_EACH_BB (cur_bb)
2126     {
2127       last_insn = BB_END (cur_bb);
2128
2129       if (EDGE_COUNT (cur_bb->succs) < 1)
2130         continue;
2131
2132       succ = EDGE_SUCC (cur_bb, 0);
2133
2134       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2135          this point, no crossing jumps should be conditional.  */
2136
2137       if (JUMP_P (last_insn)
2138           && (succ->flags & EDGE_CROSSING))
2139         {
2140           gcc_assert (!any_condjump_p (last_insn));
2141
2142           /* Make sure the jump is not already an indirect or table jump.  */
2143
2144           if (!computed_jump_p (last_insn)
2145               && !tablejump_p (last_insn, NULL, NULL))
2146             {
2147               /* We have found a "crossing" unconditional branch.  Now
2148                  we must convert it to an indirect jump.  First create
2149                  reference of label, as target for jump.  */
2150
2151               label = JUMP_LABEL (last_insn);
2152               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2153               LABEL_NUSES (label) += 1;
2154
2155               /* Get a register to use for the indirect jump.  */
2156
2157               new_reg = gen_reg_rtx (Pmode);
2158
2159               /* Generate indirect the jump sequence.  */
2160
2161               start_sequence ();
2162               emit_move_insn (new_reg, label_addr);
2163               emit_indirect_jump (new_reg);
2164               indirect_jump_sequence = get_insns ();
2165               end_sequence ();
2166
2167               /* Make sure every instruction in the new jump sequence has
2168                  its basic block set to be cur_bb.  */
2169
2170               for (cur_insn = indirect_jump_sequence; cur_insn;
2171                    cur_insn = NEXT_INSN (cur_insn))
2172                 {
2173                   if (!BARRIER_P (cur_insn))
2174                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2175                   if (JUMP_P (cur_insn))
2176                     jump_insn = cur_insn;
2177                 }
2178
2179               /* Insert the new (indirect) jump sequence immediately before
2180                  the unconditional jump, then delete the unconditional jump.  */
2181
2182               emit_insn_before (indirect_jump_sequence, last_insn);
2183               delete_insn (last_insn);
2184
2185               /* Make BB_END for cur_bb be the jump instruction (NOT the
2186                  barrier instruction at the end of the sequence...).  */
2187
2188               BB_END (cur_bb) = jump_insn;
2189             }
2190         }
2191     }
2192 }
2193
2194 /* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
2195
2196 static void
2197 add_reg_crossing_jump_notes (void)
2198 {
2199   basic_block bb;
2200   edge e;
2201   edge_iterator ei;
2202
2203   FOR_EACH_BB (bb)
2204     FOR_EACH_EDGE (e, ei, bb->succs)
2205       if ((e->flags & EDGE_CROSSING)
2206           && JUMP_P (BB_END (e->src))
2207           /* Some notes were added during fix_up_fall_thru_edges, via
2208              force_nonfallthru_and_redirect.  */
2209           && !find_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX))
2210         add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
2211 }
2212
2213 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2214    the set of flags to pass to cfg_layout_initialize().  */
2215
2216 static void
2217 reorder_basic_blocks (void)
2218 {
2219   int n_traces;
2220   int i;
2221   struct trace *traces;
2222
2223   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2224
2225   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2226     return;
2227
2228   set_edge_can_fallthru_flag ();
2229   mark_dfs_back_edges ();
2230
2231   /* We are estimating the length of uncond jump insn only once since the code
2232      for getting the insn length always returns the minimal length now.  */
2233   if (uncond_jump_length == 0)
2234     uncond_jump_length = get_uncond_jump_length ();
2235
2236   /* We need to know some information for each basic block.  */
2237   array_size = GET_ARRAY_SIZE (last_basic_block);
2238   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2239   for (i = 0; i < array_size; i++)
2240     {
2241       bbd[i].start_of_trace = -1;
2242       bbd[i].end_of_trace = -1;
2243       bbd[i].in_trace = -1;
2244       bbd[i].visited = 0;
2245       bbd[i].heap = NULL;
2246       bbd[i].node = NULL;
2247     }
2248
2249   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2250   n_traces = 0;
2251   find_traces (&n_traces, traces);
2252   connect_traces (n_traces, traces);
2253   FREE (traces);
2254   FREE (bbd);
2255
2256   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2257
2258   if (dump_file)
2259     {
2260       if (dump_flags & TDF_DETAILS)
2261         dump_reg_info (dump_file);
2262       dump_flow_info (dump_file, dump_flags);
2263     }
2264
2265   /* Signal that rtl_verify_flow_info_1 can now verify that there
2266      is at most one switch between hot/cold sections.  */
2267   crtl->bb_reorder_complete = true;
2268 }
2269
2270 /* Determine which partition the first basic block in the function
2271    belongs to, then find the first basic block in the current function
2272    that belongs to a different section, and insert a
2273    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2274    instruction stream.  When writing out the assembly code,
2275    encountering this note will make the compiler switch between the
2276    hot and cold text sections.  */
2277
2278 void
2279 insert_section_boundary_note (void)
2280 {
2281   basic_block bb;
2282   bool switched_sections = false;
2283   int current_partition = 0;
2284
2285   if (!crtl->has_bb_partition)
2286     return;
2287
2288   FOR_EACH_BB (bb)
2289     {
2290       if (!current_partition)
2291         current_partition = BB_PARTITION (bb);
2292       if (BB_PARTITION (bb) != current_partition)
2293         {
2294           gcc_assert (!switched_sections);
2295           switched_sections = true;
2296           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2297           current_partition = BB_PARTITION (bb);
2298         }
2299     }
2300 }
2301
2302 static bool
2303 gate_handle_reorder_blocks (void)
2304 {
2305   if (targetm.cannot_modify_jumps_p ())
2306     return false;
2307   return (optimize > 0
2308           && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2309 }
2310
2311 static unsigned int
2312 rest_of_handle_reorder_blocks (void)
2313 {
2314   basic_block bb;
2315
2316   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2317      splitting possibly introduced more crossjumping opportunities.  */
2318   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2319
2320   reorder_basic_blocks ();
2321   cleanup_cfg (CLEANUP_EXPENSIVE);
2322
2323   FOR_EACH_BB (bb)
2324     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2325       bb->aux = bb->next_bb;
2326   cfg_layout_finalize ();
2327
2328   return 0;
2329 }
2330
2331 namespace {
2332
2333 const pass_data pass_data_reorder_blocks =
2334 {
2335   RTL_PASS, /* type */
2336   "bbro", /* name */
2337   OPTGROUP_NONE, /* optinfo_flags */
2338   true, /* has_gate */
2339   true, /* has_execute */
2340   TV_REORDER_BLOCKS, /* tv_id */
2341   0, /* properties_required */
2342   0, /* properties_provided */
2343   0, /* properties_destroyed */
2344   0, /* todo_flags_start */
2345   TODO_verify_rtl_sharing, /* todo_flags_finish */
2346 };
2347
2348 class pass_reorder_blocks : public rtl_opt_pass
2349 {
2350 public:
2351   pass_reorder_blocks (gcc::context *ctxt)
2352     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2353   {}
2354
2355   /* opt_pass methods: */
2356   bool gate () { return gate_handle_reorder_blocks (); }
2357   unsigned int execute () { return rest_of_handle_reorder_blocks (); }
2358
2359 }; // class pass_reorder_blocks
2360
2361 } // anon namespace
2362
2363 rtl_opt_pass *
2364 make_pass_reorder_blocks (gcc::context *ctxt)
2365 {
2366   return new pass_reorder_blocks (ctxt);
2367 }
2368
2369 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2370    computed gotos that were factored early on in the compilation process to
2371    speed up edge based data flow.  We used to not unfactoring them again,
2372    which can seriously pessimize code with many computed jumps in the source
2373    code, such as interpreters.  See e.g. PR15242.  */
2374
2375 static bool
2376 gate_duplicate_computed_gotos (void)
2377 {
2378   if (targetm.cannot_modify_jumps_p ())
2379     return false;
2380   return (optimize > 0
2381           && flag_expensive_optimizations
2382           && ! optimize_function_for_size_p (cfun));
2383 }
2384
2385
2386 static unsigned int
2387 duplicate_computed_gotos (void)
2388 {
2389   basic_block bb, new_bb;
2390   bitmap candidates;
2391   int max_size;
2392
2393   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2394     return 0;
2395
2396   clear_bb_flags ();
2397   cfg_layout_initialize (0);
2398
2399   /* We are estimating the length of uncond jump insn only once
2400      since the code for getting the insn length always returns
2401      the minimal length now.  */
2402   if (uncond_jump_length == 0)
2403     uncond_jump_length = get_uncond_jump_length ();
2404
2405   max_size
2406     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2407   candidates = BITMAP_ALLOC (NULL);
2408
2409   /* Look for blocks that end in a computed jump, and see if such blocks
2410      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2411      mark it in the candidates.  */
2412   FOR_EACH_BB (bb)
2413     {
2414       rtx insn;
2415       edge e;
2416       edge_iterator ei;
2417       int size, all_flags;
2418
2419       /* Build the reorder chain for the original order of blocks.  */
2420       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2421         bb->aux = bb->next_bb;
2422
2423       /* Obviously the block has to end in a computed jump.  */
2424       if (!computed_jump_p (BB_END (bb)))
2425         continue;
2426
2427       /* Only consider blocks that can be duplicated.  */
2428       if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2429           || !can_duplicate_block_p (bb))
2430         continue;
2431
2432       /* Make sure that the block is small enough.  */
2433       size = 0;
2434       FOR_BB_INSNS (bb, insn)
2435         if (INSN_P (insn))
2436           {
2437             size += get_attr_min_length (insn);
2438             if (size > max_size)
2439                break;
2440           }
2441       if (size > max_size)
2442         continue;
2443
2444       /* Final check: there must not be any incoming abnormal edges.  */
2445       all_flags = 0;
2446       FOR_EACH_EDGE (e, ei, bb->preds)
2447         all_flags |= e->flags;
2448       if (all_flags & EDGE_COMPLEX)
2449         continue;
2450
2451       bitmap_set_bit (candidates, bb->index);
2452     }
2453
2454   /* Nothing to do if there is no computed jump here.  */
2455   if (bitmap_empty_p (candidates))
2456     goto done;
2457
2458   /* Duplicate computed gotos.  */
2459   FOR_EACH_BB (bb)
2460     {
2461       if (bb->flags & BB_VISITED)
2462         continue;
2463
2464       bb->flags |= BB_VISITED;
2465
2466       /* BB must have one outgoing edge.  That edge must not lead to
2467          the exit block or the next block.
2468          The destination must have more than one predecessor.  */
2469       if (!single_succ_p (bb)
2470           || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2471           || single_succ (bb) == bb->next_bb
2472           || single_pred_p (single_succ (bb)))
2473         continue;
2474
2475       /* The successor block has to be a duplication candidate.  */
2476       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2477         continue;
2478
2479       /* Don't duplicate a partition crossing edge, which requires difficult
2480          fixup.  */
2481       if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX))
2482         continue;
2483
2484       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2485       new_bb->aux = bb->aux;
2486       bb->aux = new_bb;
2487       new_bb->flags |= BB_VISITED;
2488     }
2489
2490 done:
2491   cfg_layout_finalize ();
2492
2493   BITMAP_FREE (candidates);
2494   return 0;
2495 }
2496
2497 namespace {
2498
2499 const pass_data pass_data_duplicate_computed_gotos =
2500 {
2501   RTL_PASS, /* type */
2502   "compgotos", /* name */
2503   OPTGROUP_NONE, /* optinfo_flags */
2504   true, /* has_gate */
2505   true, /* has_execute */
2506   TV_REORDER_BLOCKS, /* tv_id */
2507   0, /* properties_required */
2508   0, /* properties_provided */
2509   0, /* properties_destroyed */
2510   0, /* todo_flags_start */
2511   TODO_verify_rtl_sharing, /* todo_flags_finish */
2512 };
2513
2514 class pass_duplicate_computed_gotos : public rtl_opt_pass
2515 {
2516 public:
2517   pass_duplicate_computed_gotos (gcc::context *ctxt)
2518     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2519   {}
2520
2521   /* opt_pass methods: */
2522   bool gate () { return gate_duplicate_computed_gotos (); }
2523   unsigned int execute () { return duplicate_computed_gotos (); }
2524
2525 }; // class pass_duplicate_computed_gotos
2526
2527 } // anon namespace
2528
2529 rtl_opt_pass *
2530 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2531 {
2532   return new pass_duplicate_computed_gotos (ctxt);
2533 }
2534
2535 static bool
2536 gate_handle_partition_blocks (void)
2537 {
2538   /* The optimization to partition hot/cold basic blocks into separate
2539      sections of the .o file does not work well with linkonce or with
2540      user defined section attributes.  Don't call it if either case
2541      arises.  */
2542   return (flag_reorder_blocks_and_partition
2543           && optimize
2544           /* See gate_handle_reorder_blocks.  We should not partition if
2545              we are going to omit the reordering.  */
2546           && optimize_function_for_speed_p (cfun)
2547           && !DECL_ONE_ONLY (current_function_decl)
2548           && !user_defined_section_attribute);
2549 }
2550
2551 /* This function is the main 'entrance' for the optimization that
2552    partitions hot and cold basic blocks into separate sections of the
2553    .o file (to improve performance and cache locality).  Ideally it
2554    would be called after all optimizations that rearrange the CFG have
2555    been called.  However part of this optimization may introduce new
2556    register usage, so it must be called before register allocation has
2557    occurred.  This means that this optimization is actually called
2558    well before the optimization that reorders basic blocks (see
2559    function above).
2560
2561    This optimization checks the feedback information to determine
2562    which basic blocks are hot/cold, updates flags on the basic blocks
2563    to indicate which section they belong in.  This information is
2564    later used for writing out sections in the .o file.  Because hot
2565    and cold sections can be arbitrarily large (within the bounds of
2566    memory), far beyond the size of a single function, it is necessary
2567    to fix up all edges that cross section boundaries, to make sure the
2568    instructions used can actually span the required distance.  The
2569    fixes are described below.
2570
2571    Fall-through edges must be changed into jumps; it is not safe or
2572    legal to fall through across a section boundary.  Whenever a
2573    fall-through edge crossing a section boundary is encountered, a new
2574    basic block is inserted (in the same section as the fall-through
2575    source), and the fall through edge is redirected to the new basic
2576    block.  The new basic block contains an unconditional jump to the
2577    original fall-through target.  (If the unconditional jump is
2578    insufficient to cross section boundaries, that is dealt with a
2579    little later, see below).
2580
2581    In order to deal with architectures that have short conditional
2582    branches (which cannot span all of memory) we take any conditional
2583    jump that attempts to cross a section boundary and add a level of
2584    indirection: it becomes a conditional jump to a new basic block, in
2585    the same section.  The new basic block contains an unconditional
2586    jump to the original target, in the other section.
2587
2588    For those architectures whose unconditional branch is also
2589    incapable of reaching all of memory, those unconditional jumps are
2590    converted into indirect jumps, through a register.
2591
2592    IMPORTANT NOTE: This optimization causes some messy interactions
2593    with the cfg cleanup optimizations; those optimizations want to
2594    merge blocks wherever possible, and to collapse indirect jump
2595    sequences (change "A jumps to B jumps to C" directly into "A jumps
2596    to C").  Those optimizations can undo the jump fixes that
2597    partitioning is required to make (see above), in order to ensure
2598    that jumps attempting to cross section boundaries are really able
2599    to cover whatever distance the jump requires (on many architectures
2600    conditional or unconditional jumps are not able to reach all of
2601    memory).  Therefore tests have to be inserted into each such
2602    optimization to make sure that it does not undo stuff necessary to
2603    cross partition boundaries.  This would be much less of a problem
2604    if we could perform this optimization later in the compilation, but
2605    unfortunately the fact that we may need to create indirect jumps
2606    (through registers) requires that this optimization be performed
2607    before register allocation.
2608
2609    Hot and cold basic blocks are partitioned and put in separate
2610    sections of the .o file, to reduce paging and improve cache
2611    performance (hopefully).  This can result in bits of code from the
2612    same function being widely separated in the .o file.  However this
2613    is not obvious to the current bb structure.  Therefore we must take
2614    care to ensure that: 1). There are no fall_thru edges that cross
2615    between sections; 2). For those architectures which have "short"
2616    conditional branches, all conditional branches that attempt to
2617    cross between sections are converted to unconditional branches;
2618    and, 3). For those architectures which have "short" unconditional
2619    branches, all unconditional branches that attempt to cross between
2620    sections are converted to indirect jumps.
2621
2622    The code for fixing up fall_thru edges that cross between hot and
2623    cold basic blocks does so by creating new basic blocks containing
2624    unconditional branches to the appropriate label in the "other"
2625    section.  The new basic block is then put in the same (hot or cold)
2626    section as the original conditional branch, and the fall_thru edge
2627    is modified to fall into the new basic block instead.  By adding
2628    this level of indirection we end up with only unconditional branches
2629    crossing between hot and cold sections.
2630
2631    Conditional branches are dealt with by adding a level of indirection.
2632    A new basic block is added in the same (hot/cold) section as the
2633    conditional branch, and the conditional branch is retargeted to the
2634    new basic block.  The new basic block contains an unconditional branch
2635    to the original target of the conditional branch (in the other section).
2636
2637    Unconditional branches are dealt with by converting them into
2638    indirect jumps.  */
2639
2640 static unsigned
2641 partition_hot_cold_basic_blocks (void)
2642 {
2643   vec<edge> crossing_edges;
2644
2645   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2646     return 0;
2647
2648   df_set_flags (DF_DEFER_INSN_RESCAN);
2649
2650   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2651   if (!crossing_edges.exists ())
2652     return 0;
2653
2654   crtl->has_bb_partition = true;
2655
2656   /* Make sure the source of any crossing edge ends in a jump and the
2657      destination of any crossing edge has a label.  */
2658   add_labels_and_missing_jumps (crossing_edges);
2659
2660   /* Convert all crossing fall_thru edges to non-crossing fall
2661      thrus to unconditional jumps (that jump to the original fall
2662      through dest).  */
2663   fix_up_fall_thru_edges ();
2664
2665   /* If the architecture does not have conditional branches that can
2666      span all of memory, convert crossing conditional branches into
2667      crossing unconditional branches.  */
2668   if (!HAS_LONG_COND_BRANCH)
2669     fix_crossing_conditional_branches ();
2670
2671   /* If the architecture does not have unconditional branches that
2672      can span all of memory, convert crossing unconditional branches
2673      into indirect jumps.  Since adding an indirect jump also adds
2674      a new register usage, update the register usage information as
2675      well.  */
2676   if (!HAS_LONG_UNCOND_BRANCH)
2677     fix_crossing_unconditional_branches ();
2678
2679   add_reg_crossing_jump_notes ();
2680
2681   /* Clear bb->aux fields that the above routines were using.  */
2682   clear_aux_for_blocks ();
2683
2684   crossing_edges.release ();
2685
2686   /* ??? FIXME: DF generates the bb info for a block immediately.
2687      And by immediately, I mean *during* creation of the block.
2688
2689         #0  df_bb_refs_collect
2690         #1  in df_bb_refs_record
2691         #2  in create_basic_block_structure
2692
2693      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2694      will *always* fail, because no edges can have been added to the
2695      block yet.  Which of course means we don't add the right
2696      artificial refs, which means we fail df_verify (much) later.
2697
2698      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2699      that we also shouldn't grab data from the new blocks those new
2700      insns are in either.  In this way one can create the block, link
2701      it up properly, and have everything Just Work later, when deferred
2702      insns are processed.
2703
2704      In the meantime, we have no other option but to throw away all
2705      of the DF data and recompute it all.  */
2706   if (cfun->eh->lp_array)
2707     {
2708       df_finish_pass (true);
2709       df_scan_alloc (NULL);
2710       df_scan_blocks ();
2711       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2712          data.  We blindly generated all of them when creating the new
2713          landing pad.  Delete those assignments we don't use.  */
2714       df_set_flags (DF_LR_RUN_DCE);
2715       df_analyze ();
2716     }
2717
2718   return TODO_verify_flow | TODO_verify_rtl_sharing;
2719 }
2720
2721 namespace {
2722
2723 const pass_data pass_data_partition_blocks =
2724 {
2725   RTL_PASS, /* type */
2726   "bbpart", /* name */
2727   OPTGROUP_NONE, /* optinfo_flags */
2728   true, /* has_gate */
2729   true, /* has_execute */
2730   TV_REORDER_BLOCKS, /* tv_id */
2731   PROP_cfglayout, /* properties_required */
2732   0, /* properties_provided */
2733   0, /* properties_destroyed */
2734   0, /* todo_flags_start */
2735   0, /* todo_flags_finish */
2736 };
2737
2738 class pass_partition_blocks : public rtl_opt_pass
2739 {
2740 public:
2741   pass_partition_blocks (gcc::context *ctxt)
2742     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2743   {}
2744
2745   /* opt_pass methods: */
2746   bool gate () { return gate_handle_partition_blocks (); }
2747   unsigned int execute () { return partition_hot_cold_basic_blocks (); }
2748
2749 }; // class pass_partition_blocks
2750
2751 } // anon namespace
2752
2753 rtl_opt_pass *
2754 make_pass_partition_blocks (gcc::context *ctxt)
2755 {
2756   return new pass_partition_blocks (ctxt);
2757 }