gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "hash-set.h"
  24 #include "machmode.h"
  25 #include "vec.h"
  26 #include "double-int.h"
  27 #include "input.h"
  28 #include "alias.h"
  29 #include "symtab.h"
  30 #include "options.h"
  31 #include "wide-int.h"
  32 #include "inchash.h"
  33 #include "tree.h"
  34 #include "fold-const.h"
  35 #include "flags.h"
  36 #include "predict.h"
  37 #include "tm.h"
  38 #include "hard-reg-set.h"
  39 #include "input.h"
  40 #include "function.h"
  41 #include "dominance.h"
  42 #include "cfg.h"
  43 #include "cfganal.h"
  44 #include "basic-block.h"
  45 #include "hash-table.h"
  46 #include "tree-ssa-alias.h"
  47 #include "internal-fn.h"
  48 #include "gimple-expr.h"
  49 #include "is-a.h"
  50 #include "gimple.h"
  51 #include "gimple-iterator.h"
  52 #include "gimple-ssa.h"
  53 #include "tree-phinodes.h"
  54 #include "tree-ssa.h"
  55 #include "tree-ssa-threadupdate.h"
  56 #include "ssa-iterators.h"
  57 #include "dumpfile.h"
  58 #include "cfgloop.h"
  59 #include "dbgcnt.h"
  60 #include "tree-cfg.h"
  61 #include "tree-pass.h"
  62
  63 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  64    one or more in-edges to B to instead reach the destination of an
  65    out-edge from B while preserving any side effects in B.
  66
  67    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  68    side effects of executing B.
  69
  70      1. Make a copy of B (including its outgoing edges and statements).  Call
  71         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  72
  73      2. Remove the control statement at the end of B' and all outgoing edges
  74         except B'->C.
  75
  76      3. Add a new argument to each PHI in C with the same value as the existing
  77         argument associated with edge B->C.  Associate the new PHI arguments
  78         with the edge B'->C.
  79
  80      4. For each PHI in B, find or create a PHI in B' with an identical
  81         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  82         value as the PHI in B associated with the edge A->B.  Associate
  83         the new argument in the PHI in B' with the edge A->B.
  84
  85      5. Change the edge A->B to A->B'.
  86
  87         5a. This automatically deletes any PHI arguments associated with the
  88             edge A->B in B.
  89
  90         5b. This automatically associates each new argument added in step 4
  91             with the edge A->B'.
  92
  93      6. Repeat for other incoming edges into B.
  94
  95      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  96
  97    Note that block duplication can be minimized by first collecting the
  98    set of unique destination blocks that the incoming edges should
  99    be threaded to.
 100
 101    We reduce the number of edges and statements we create by not copying all
 102    the outgoing edges and the control statement in step #1.  We instead create
 103    a template block without the outgoing edges and duplicate the template.
 104
 105    Another case this code handles is threading through a "joiner" block.  In
 106    this case, we do not know the destination of the joiner block, but one
 107    of the outgoing edges from the joiner block leads to a threadable path.  This
 108    case largely works as outlined above, except the duplicate of the joiner
 109    block still contains a full set of outgoing edges and its control statement.
 110    We just redirect one of its outgoing edges to our jump threading path.  */
 111
 112
 113 /* Steps #5 and #6 of the above algorithm are best implemented by walking
 114    all the incoming edges which thread to the same destination edge at
 115    the same time.  That avoids lots of table lookups to get information
 116    for the destination edge.
 117
 118    To realize that implementation we create a list of incoming edges
 119    which thread to the same outgoing edge.  Thus to implement steps
 120    #5 and #6 we traverse our hash table of outgoing edge information.
 121    For each entry we walk the list of incoming edges which thread to
 122    the current outgoing edge.  */
 123
 124 struct el
 125 {
 126   edge e;
 127   struct el *next;
 128 };
 129
 130 /* Main data structure recording information regarding B's duplicate
 131    blocks.  */
 132
 133 /* We need to efficiently record the unique thread destinations of this
 134    block and specific information associated with those destinations.  We
 135    may have many incoming edges threaded to the same outgoing edge.  This
 136    can be naturally implemented with a hash table.  */
 137
 138 struct redirection_data : typed_free_remove<redirection_data>
 139 {
 140   /* We support wiring up two block duplicates in a jump threading path.
 141
 142      One is a normal block copy where we remove the control statement
 143      and wire up its single remaining outgoing edge to the thread path.
 144
 145      The other is a joiner block where we leave the control statement
 146      in place, but wire one of the outgoing edges to a thread path.
 147
 148      In theory we could have multiple block duplicates in a jump
 149      threading path, but I haven't tried that.
 150
 151      The duplicate blocks appear in this array in the same order in
 152      which they appear in the jump thread path.  */
 153   basic_block dup_blocks[2];
 154
 155   /* The jump threading path.  */
 156   vec<jump_thread_edge *> *path;
 157
 158   /* A list of incoming edges which we want to thread to the
 159      same path.  */
 160   struct el *incoming_edges;
 161
 162   /* hash_table support.  */
 163   typedef redirection_data *value_type;
 164   typedef redirection_data *compare_type;
 165   static inline hashval_t hash (const redirection_data *);
 166   static inline int equal (const redirection_data *, const redirection_data *);
 167 };
 168
 169 /* Dump a jump threading path, including annotations about each
 170    edge in the path.  */
 171
 172 static void
 173 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 174                        bool registering)
 175 {
 176   fprintf (dump_file,
 177            "  %s%s jump thread: (%d, %d) incoming edge; ",
 178            (registering ? "Registering" : "Cancelling"),
 179            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 180            path[0]->e->src->index, path[0]->e->dest->index);
 181
 182   for (unsigned int i = 1; i < path.length (); i++)
 183     {
 184       /* We can get paths with a NULL edge when the final destination
 185          of a jump thread turns out to be a constant address.  We dump
 186          those paths when debugging, so we have to be prepared for that
 187          possibility here.  */
 188       if (path[i]->e == NULL)
 189         continue;
 190
 191       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 192         fprintf (dump_file, " (%d, %d) joiner; ",
 193                  path[i]->e->src->index, path[i]->e->dest->index);
 194       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 195        fprintf (dump_file, " (%d, %d) normal;",
 196                  path[i]->e->src->index, path[i]->e->dest->index);
 197       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 198        fprintf (dump_file, " (%d, %d) nocopy;",
 199                  path[i]->e->src->index, path[i]->e->dest->index);
 200       if (path[0]->type == EDGE_FSM_THREAD)
 201         fprintf (dump_file, " (%d, %d) ",
 202                  path[i]->e->src->index, path[i]->e->dest->index);
 203     }
 204   fputc ('\n', dump_file);
 205 }
 206
 207 /* Simple hashing function.  For any given incoming edge E, we're going
 208    to be most concerned with the final destination of its jump thread
 209    path.  So hash on the block index of the final edge in the path.  */
 210
 211 inline hashval_t
 212 redirection_data::hash (const redirection_data *p)
 213 {
 214   vec<jump_thread_edge *> *path = p->path;
 215   return path->last ()->e->dest->index;
 216 }
 217
 218 /* Given two hash table entries, return true if they have the same
 219    jump threading path.  */
 220 inline int
 221 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 222 {
 223   vec<jump_thread_edge *> *path1 = p1->path;
 224   vec<jump_thread_edge *> *path2 = p2->path;
 225
 226   if (path1->length () != path2->length ())
 227     return false;
 228
 229   for (unsigned int i = 1; i < path1->length (); i++)
 230     {
 231       if ((*path1)[i]->type != (*path2)[i]->type
 232           || (*path1)[i]->e != (*path2)[i]->e)
 233         return false;
 234     }
 235
 236   return true;
 237 }
 238
 239 /* Data structure of information to pass to hash table traversal routines.  */
 240 struct ssa_local_info_t
 241 {
 242   /* The current block we are working on.  */
 243   basic_block bb;
 244
 245   /* We only create a template block for the first duplicated block in a
 246      jump threading path as we may need many duplicates of that block.
 247
 248      The second duplicate block in a path is specific to that path.  Creating
 249      and sharing a template for that block is considerably more difficult.  */
 250   basic_block template_block;
 251
 252   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 253   bool jumps_threaded;
 254
 255   /* Blocks duplicated for the thread.  */
 256   bitmap duplicate_blocks;
 257 };
 258
 259 /* Passes which use the jump threading code register jump threading
 260    opportunities as they are discovered.  We keep the registered
 261    jump threading opportunities in this vector as edge pairs
 262    (original_edge, target_edge).  */
 263 static vec<vec<jump_thread_edge *> *> paths;
 264
 265 /* When we start updating the CFG for threading, data necessary for jump
 266    threading is attached to the AUX field for the incoming edge.  Use these
 267    macros to access the underlying structure attached to the AUX field.  */
 268 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 269
 270 /* Jump threading statistics.  */
 271
 272 struct thread_stats_d
 273 {
 274   unsigned long num_threaded_edges;
 275 };
 276
 277 struct thread_stats_d thread_stats;
 278
 279
 280 /* Remove the last statement in block BB if it is a control statement
 281    Also remove all outgoing edges except the edge which reaches DEST_BB.
 282    If DEST_BB is NULL, then remove all outgoing edges.  */
 283
 284 static void
 285 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 286 {
 287   gimple_stmt_iterator gsi;
 288   edge e;
 289   edge_iterator ei;
 290
 291   gsi = gsi_last_bb (bb);
 292
 293   /* If the duplicate ends with a control statement, then remove it.
 294
 295      Note that if we are duplicating the template block rather than the
 296      original basic block, then the duplicate might not have any real
 297      statements in it.  */
 298   if (!gsi_end_p (gsi)
 299       && gsi_stmt (gsi)
 300       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 301           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 302           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 303     gsi_remove (&gsi, true);
 304
 305   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 306     {
 307       if (e->dest != dest_bb)
 308         remove_edge (e);
 309       else
 310         ei_next (&ei);
 311     }
 312 }
 313
 314 /* Create a duplicate of BB.  Record the duplicate block in an array
 315    indexed by COUNT stored in RD.  */
 316
 317 static void
 318 create_block_for_threading (basic_block bb,
 319                             struct redirection_data *rd,
 320                             unsigned int count,
 321                             bitmap *duplicate_blocks)
 322 {
 323   edge_iterator ei;
 324   edge e;
 325
 326   /* We can use the generic block duplication code and simply remove
 327      the stuff we do not need.  */
 328   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 329
 330   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 331     e->aux = NULL;
 332
 333   /* Zero out the profile, since the block is unreachable for now.  */
 334   rd->dup_blocks[count]->frequency = 0;
 335   rd->dup_blocks[count]->count = 0;
 336   if (duplicate_blocks)
 337     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 338 }
 339
 340 /* Main data structure to hold information for duplicates of BB.  */
 341
 342 static hash_table<redirection_data> *redirection_data;
 343
 344 /* Given an outgoing edge E lookup and return its entry in our hash table.
 345
 346    If INSERT is true, then we insert the entry into the hash table if
 347    it is not already present.  INCOMING_EDGE is added to the list of incoming
 348    edges associated with E in the hash table.  */
 349
 350 static struct redirection_data *
 351 lookup_redirection_data (edge e, enum insert_option insert)
 352 {
 353   struct redirection_data **slot;
 354   struct redirection_data *elt;
 355   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 356
 357  /* Build a hash table element so we can see if E is already
 358      in the table.  */
 359   elt = XNEW (struct redirection_data);
 360   elt->path = path;
 361   elt->dup_blocks[0] = NULL;
 362   elt->dup_blocks[1] = NULL;
 363   elt->incoming_edges = NULL;
 364
 365   slot = redirection_data->find_slot (elt, insert);
 366
 367   /* This will only happen if INSERT is false and the entry is not
 368      in the hash table.  */
 369   if (slot == NULL)
 370     {
 371       free (elt);
 372       return NULL;
 373     }
 374
 375   /* This will only happen if E was not in the hash table and
 376      INSERT is true.  */
 377   if (*slot == NULL)
 378     {
 379       *slot = elt;
 380       elt->incoming_edges = XNEW (struct el);
 381       elt->incoming_edges->e = e;
 382       elt->incoming_edges->next = NULL;
 383       return elt;
 384     }
 385   /* E was in the hash table.  */
 386   else
 387     {
 388       /* Free ELT as we do not need it anymore, we will extract the
 389          relevant entry from the hash table itself.  */
 390       free (elt);
 391
 392       /* Get the entry stored in the hash table.  */
 393       elt = *slot;
 394
 395       /* If insertion was requested, then we need to add INCOMING_EDGE
 396          to the list of incoming edges associated with E.  */
 397       if (insert)
 398         {
 399           struct el *el = XNEW (struct el);
 400           el->next = elt->incoming_edges;
 401           el->e = e;
 402           elt->incoming_edges = el;
 403         }
 404
 405       return elt;
 406     }
 407 }
 408
 409 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 410    does not have a value associated with it.  */
 411
 412 static void
 413 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 414 {
 415   int src_idx = src_e->dest_idx;
 416   int tgt_idx = tgt_e->dest_idx;
 417
 418   /* Iterate over each PHI in e->dest.  */
 419   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 420                            gsi2 = gsi_start_phis (tgt_e->dest);
 421        !gsi_end_p (gsi);
 422        gsi_next (&gsi), gsi_next (&gsi2))
 423     {
 424       gphi *src_phi = gsi.phi ();
 425       gphi *dest_phi = gsi2.phi ();
 426       tree val = gimple_phi_arg_def (src_phi, src_idx);
 427       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 428
 429       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 430       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 431     }
 432 }
 433
 434 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 435    to see if it has constant value in a flow sensitive manner.  Set
 436    LOCUS to location of the constant phi arg and return the value.
 437    Return DEF directly if either PATH or idx is ZERO.  */
 438
 439 static tree
 440 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 441                          basic_block bb, int idx, source_location *locus)
 442 {
 443   tree arg;
 444   gphi *def_phi;
 445   basic_block def_bb;
 446
 447   if (path == NULL || idx == 0)
 448     return def;
 449
 450   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 451   if (!def_phi)
 452     return def;
 453
 454   def_bb = gimple_bb (def_phi);
 455   /* Don't propagate loop invariants into deeper loops.  */
 456   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 457     return def;
 458
 459   /* Backtrack jump threading path from IDX to see if def has constant
 460      value.  */
 461   for (int j = idx - 1; j >= 0; j--)
 462     {
 463       edge e = (*path)[j]->e;
 464       if (e->dest == def_bb)
 465         {
 466           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 467           if (is_gimple_min_invariant (arg))
 468             {
 469               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 470               return arg;
 471             }
 472           break;
 473         }
 474     }
 475
 476   return def;
 477 }
 478
 479 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 480    Try to backtrack jump threading PATH from node IDX to see if the arg
 481    has constant value, copy constant value instead of argument itself
 482    if yes.  */
 483
 484 static void
 485 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 486                vec<jump_thread_edge *> *path, int idx)
 487 {
 488   gphi_iterator gsi;
 489   int src_indx = src_e->dest_idx;
 490
 491   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 492     {
 493       gphi *phi = gsi.phi ();
 494       tree def = gimple_phi_arg_def (phi, src_indx);
 495       source_location locus = gimple_phi_arg_location (phi, src_indx);
 496
 497       if (TREE_CODE (def) == SSA_NAME
 498           && !virtual_operand_p (gimple_phi_result (phi)))
 499         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 500
 501       add_phi_arg (phi, def, tgt_e, locus);
 502     }
 503 }
 504
 505 /* We have recently made a copy of ORIG_BB, including its outgoing
 506    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 507    ORIG_BB has a new argument associated with edge from NEW_BB to the
 508    successor.  Initialize the PHI argument so that it is equal to the PHI
 509    argument associated with the edge from ORIG_BB to the successor.
 510    PATH and IDX are used to check if the new PHI argument has constant
 511    value in a flow sensitive manner.  */
 512
 513 static void
 514 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 515                          vec<jump_thread_edge *> *path, int idx)
 516 {
 517   edge_iterator ei;
 518   edge e;
 519
 520   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 521     {
 522       edge e2 = find_edge (new_bb, e->dest);
 523       copy_phi_args (e->dest, e, e2, path, idx);
 524     }
 525 }
 526
 527 /* Given a duplicate block and its single destination (both stored
 528    in RD).  Create an edge between the duplicate and its single
 529    destination.
 530
 531    Add an additional argument to any PHI nodes at the single
 532    destination.  IDX is the start node in jump threading path
 533    we start to check to see if the new PHI argument has constant
 534    value along the jump threading path.  */
 535
 536 static void
 537 create_edge_and_update_destination_phis (struct redirection_data *rd,
 538                                          basic_block bb, int idx)
 539 {
 540   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 541
 542   rescan_loop_exit (e, true, false);
 543   e->probability = REG_BR_PROB_BASE;
 544   e->count = bb->count;
 545
 546   /* We used to copy the thread path here.  That was added in 2007
 547      and dutifully updated through the representation changes in 2013.
 548
 549      In 2013 we added code to thread from an interior node through
 550      the backedge to another interior node.  That runs after the code
 551      to thread through loop headers from outside the loop.
 552
 553      The latter may delete edges in the CFG, including those
 554      which appeared in the jump threading path we copied here.  Thus
 555      we'd end up using a dangling pointer.
 556
 557      After reviewing the 2007/2011 code, I can't see how anything
 558      depended on copying the AUX field and clearly copying the jump
 559      threading path is problematical due to embedded edge pointers.
 560      It has been removed.  */
 561   e->aux = NULL;
 562
 563   /* If there are any PHI nodes at the destination of the outgoing edge
 564      from the duplicate block, then we will need to add a new argument
 565      to them.  The argument should have the same value as the argument
 566      associated with the outgoing edge stored in RD.  */
 567   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 568 }
 569
 570 /* Look through PATH beginning at START and return TRUE if there are
 571    any additional blocks that need to be duplicated.  Otherwise,
 572    return FALSE.  */
 573 static bool
 574 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 575                                  unsigned int start)
 576 {
 577   for (unsigned int i = start + 1; i < path->length (); i++)
 578     {
 579       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 580           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 581         return true;
 582     }
 583   return false;
 584 }
 585
 586
 587 /* Compute the amount of profile count/frequency coming into the jump threading
 588    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 589    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 590    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 591    identify blocks duplicated for jump threading, which have duplicated
 592    edges that need to be ignored in the analysis.  Return true if path contains
 593    a joiner, false otherwise.
 594
 595    In the non-joiner case, this is straightforward - all the counts/frequency
 596    flowing into the jump threading path should flow through the duplicated
 597    block and out of the duplicated path.
 598
 599    In the joiner case, it is very tricky.  Some of the counts flowing into
 600    the original path go offpath at the joiner.  The problem is that while
 601    we know how much total count goes off-path in the original control flow,
 602    we don't know how many of the counts corresponding to just the jump
 603    threading path go offpath at the joiner.
 604
 605    For example, assume we have the following control flow and identified
 606    jump threading paths:
 607
 608                 A     B     C
 609                  \    |    /
 610                Ea \   |Eb / Ec
 611                    \  |  /
 612                     v v v
 613                       J       <-- Joiner
 614                      / \
 615                 Eoff/   \Eon
 616                    /     \
 617                   v       v
 618                 Soff     Son  <--- Normal
 619                          /\
 620                       Ed/  \ Ee
 621                        /    \
 622                       v     v
 623                       D      E
 624
 625             Jump threading paths: A -> J -> Son -> D (path 1)
 626                                   C -> J -> Son -> E (path 2)
 627
 628    Note that the control flow could be more complicated:
 629    - Each jump threading path may have more than one incoming edge.  I.e. A and
 630    Ea could represent multiple incoming blocks/edges that are included in
 631    path 1.
 632    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 633    before or after the "normal" copy block).  These are not duplicated onto
 634    the jump threading path, as they are single-successor.
 635    - Any of the blocks along the path may have other incoming edges that
 636    are not part of any jump threading path, but add profile counts along
 637    the path.
 638
 639    In the aboe example, after all jump threading is complete, we will
 640    end up with the following control flow:
 641
 642                 A         B         C
 643                 |         |         |
 644               Ea|         |Eb     |Ec
 645                 |         |         |
 646                 v         v         v
 647                Ja         J        Jc
 648                / \      / \Eon'     / \
 649           Eona/   \   ---/---\--------   \Eonc
 650              /     \ /  /     \    \
 651             v       v  v       v          v
 652            Sona     Soff      Son       Sonc
 653              \           /\      /
 654               \___________    /  \  _____/
 655                           \  /    \/
 656                            vv      v
 657                             D      E
 658
 659    The main issue to notice here is that when we are processing path 1
 660    (A->J->Son->D) we need to figure out the outgoing edge weights to
 661    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 662    sum of the incoming weights to D remain Ed.  The problem with simply
 663    assuming that Ja (and Jc when processing path 2) has the same outgoing
 664    probabilities to its successors as the original block J, is that after
 665    all paths are processed and other edges/counts removed (e.g. none
 666    of Ec will reach D after processing path 2), we may end up with not
 667    enough count flowing along duplicated edge Sona->D.
 668
 669    Therefore, in the case of a joiner, we keep track of all counts
 670    coming in along the current path, as well as from predecessors not
 671    on any jump threading path (Eb in the above example).  While we
 672    first assume that the duplicated Eona for Ja->Sona has the same
 673    probability as the original, we later compensate for other jump
 674    threading paths that may eliminate edges.  We do that by keep track
 675    of all counts coming into the original path that are not in a jump
 676    thread (Eb in the above example, but as noted earlier, there could
 677    be other predecessors incoming to the path at various points, such
 678    as at Son).  Call this cumulative non-path count coming into the path
 679    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 680    least as big as (Ed - Enonpath), but no bigger than the minimum
 681    weight along the jump threading path.  The probabilities of both the
 682    original and duplicated joiner block J and Ja will be adjusted
 683    accordingly after the updates.  */
 684
 685 static bool
 686 compute_path_counts (struct redirection_data *rd,
 687                      ssa_local_info_t *local_info,
 688                      gcov_type *path_in_count_ptr,
 689                      gcov_type *path_out_count_ptr,
 690                      int *path_in_freq_ptr)
 691 {
 692   edge e = rd->incoming_edges->e;
 693   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 694   edge elast = path->last ()->e;
 695   gcov_type nonpath_count = 0;
 696   bool has_joiner = false;
 697   gcov_type path_in_count = 0;
 698   int path_in_freq = 0;
 699
 700   /* Start by accumulating incoming edge counts to the path's first bb
 701      into a couple buckets:
 702         path_in_count: total count of incoming edges that flow into the
 703                   current path.
 704         nonpath_count: total count of incoming edges that are not
 705                   flowing along *any* path.  These are the counts
 706                   that will still flow along the original path after
 707                   all path duplication is done by potentially multiple
 708                   calls to this routine.
 709      (any other incoming edge counts are for a different jump threading
 710      path that will be handled by a later call to this routine.)
 711      To make this easier, start by recording all incoming edges that flow into
 712      the current path in a bitmap.  We could add up the path's incoming edge
 713      counts here, but we still need to walk all the first bb's incoming edges
 714      below to add up the counts of the other edges not included in this jump
 715      threading path.  */
 716   struct el *next, *el;
 717   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 718   for (el = rd->incoming_edges; el; el = next)
 719     {
 720       next = el->next;
 721       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 722     }
 723   edge ein;
 724   edge_iterator ei;
 725   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 726     {
 727       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 728       /* Simply check the incoming edge src against the set captured above.  */
 729       if (ein_path
 730           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 731         {
 732           /* It is necessary but not sufficient that the last path edges
 733              are identical.  There may be different paths that share the
 734              same last path edge in the case where the last edge has a nocopy
 735              source block.  */
 736           gcc_assert (ein_path->last ()->e == elast);
 737           path_in_count += ein->count;
 738           path_in_freq += EDGE_FREQUENCY (ein);
 739         }
 740       else if (!ein_path)
 741         {
 742           /* Keep track of the incoming edges that are not on any jump-threading
 743              path.  These counts will still flow out of original path after all
 744              jump threading is complete.  */
 745             nonpath_count += ein->count;
 746         }
 747     }
 748
 749   /* This is needed due to insane incoming frequencies.  */
 750   if (path_in_freq > BB_FREQ_MAX)
 751     path_in_freq = BB_FREQ_MAX;
 752
 753   BITMAP_FREE (in_edge_srcs);
 754
 755   /* Now compute the fraction of the total count coming into the first
 756      path bb that is from the current threading path.  */
 757   gcov_type total_count = e->dest->count;
 758   /* Handle incoming profile insanities.  */
 759   if (total_count < path_in_count)
 760     path_in_count = total_count;
 761   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 762
 763   /* Walk the entire path to do some more computation in order to estimate
 764      how much of the path_in_count will flow out of the duplicated threading
 765      path.  In the non-joiner case this is straightforward (it should be
 766      the same as path_in_count, although we will handle incoming profile
 767      insanities by setting it equal to the minimum count along the path).
 768
 769      In the joiner case, we need to estimate how much of the path_in_count
 770      will stay on the threading path after the joiner's conditional branch.
 771      We don't really know for sure how much of the counts
 772      associated with this path go to each successor of the joiner, but we'll
 773      estimate based on the fraction of the total count coming into the path
 774      bb was from the threading paths (computed above in onpath_scale).
 775      Afterwards, we will need to do some fixup to account for other threading
 776      paths and possible profile insanities.
 777
 778      In order to estimate the joiner case's counts we also need to update
 779      nonpath_count with any additional counts coming into the path.  Other
 780      blocks along the path may have additional predecessors from outside
 781      the path.  */
 782   gcov_type path_out_count = path_in_count;
 783   gcov_type min_path_count = path_in_count;
 784   for (unsigned int i = 1; i < path->length (); i++)
 785     {
 786       edge epath = (*path)[i]->e;
 787       gcov_type cur_count = epath->count;
 788       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 789         {
 790           has_joiner = true;
 791           cur_count = apply_probability (cur_count, onpath_scale);
 792         }
 793       /* In the joiner case we need to update nonpath_count for any edges
 794          coming into the path that will contribute to the count flowing
 795          into the path successor.  */
 796       if (has_joiner && epath != elast)
 797       {
 798         /* Look for other incoming edges after joiner.  */
 799         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 800           {
 801             if (ein != epath
 802                 /* Ignore in edges from blocks we have duplicated for a
 803                    threading path, which have duplicated edge counts until
 804                    they are redirected by an invocation of this routine.  */
 805                 && !bitmap_bit_p (local_info->duplicate_blocks,
 806                                   ein->src->index))
 807               nonpath_count += ein->count;
 808           }
 809       }
 810       if (cur_count < path_out_count)
 811         path_out_count = cur_count;
 812       if (epath->count < min_path_count)
 813         min_path_count = epath->count;
 814     }
 815
 816   /* We computed path_out_count above assuming that this path targeted
 817      the joiner's on-path successor with the same likelihood as it
 818      reached the joiner.  However, other thread paths through the joiner
 819      may take a different path through the normal copy source block
 820      (i.e. they have a different elast), meaning that they do not
 821      contribute any counts to this path's elast.  As a result, it may
 822      turn out that this path must have more count flowing to the on-path
 823      successor of the joiner.  Essentially, all of this path's elast
 824      count must be contributed by this path and any nonpath counts
 825      (since any path through the joiner with a different elast will not
 826      include a copy of this elast in its duplicated path).
 827      So ensure that this path's path_out_count is at least the
 828      difference between elast->count and nonpath_count.  Otherwise the edge
 829      counts after threading will not be sane.  */
 830   if (has_joiner && path_out_count < elast->count - nonpath_count)
 831   {
 832     path_out_count = elast->count - nonpath_count;
 833     /* But neither can we go above the minimum count along the path
 834        we are duplicating.  This can be an issue due to profile
 835        insanities coming in to this pass.  */
 836     if (path_out_count > min_path_count)
 837       path_out_count = min_path_count;
 838   }
 839
 840   *path_in_count_ptr = path_in_count;
 841   *path_out_count_ptr = path_out_count;
 842   *path_in_freq_ptr = path_in_freq;
 843   return has_joiner;
 844 }
 845
 846
 847 /* Update the counts and frequencies for both an original path
 848    edge EPATH and its duplicate EDUP.  The duplicate source block
 849    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 850    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 851 static void
 852 update_profile (edge epath, edge edup, gcov_type path_in_count,
 853                 gcov_type path_out_count, int path_in_freq)
 854 {
 855
 856   /* First update the duplicated block's count / frequency.  */
 857   if (edup)
 858     {
 859       basic_block dup_block = edup->src;
 860       gcc_assert (dup_block->count == 0);
 861       gcc_assert (dup_block->frequency == 0);
 862       dup_block->count = path_in_count;
 863       dup_block->frequency = path_in_freq;
 864     }
 865
 866   /* Now update the original block's count and frequency in the
 867      opposite manner - remove the counts/freq that will flow
 868      into the duplicated block.  Handle underflow due to precision/
 869      rounding issues.  */
 870   epath->src->count -= path_in_count;
 871   if (epath->src->count < 0)
 872     epath->src->count = 0;
 873   epath->src->frequency -= path_in_freq;
 874   if (epath->src->frequency < 0)
 875     epath->src->frequency = 0;
 876
 877   /* Next update this path edge's original and duplicated counts.  We know
 878      that the duplicated path will have path_out_count flowing
 879      out of it (in the joiner case this is the count along the duplicated path
 880      out of the duplicated joiner).  This count can then be removed from the
 881      original path edge.  */
 882   if (edup)
 883     edup->count = path_out_count;
 884   epath->count -= path_out_count;
 885   gcc_assert (epath->count >= 0);
 886 }
 887
 888
 889 /* The duplicate and original joiner blocks may end up with different
 890    probabilities (different from both the original and from each other).
 891    Recompute the probabilities here once we have updated the edge
 892    counts and frequencies.  */
 893
 894 static void
 895 recompute_probabilities (basic_block bb)
 896 {
 897   edge esucc;
 898   edge_iterator ei;
 899   FOR_EACH_EDGE (esucc, ei, bb->succs)
 900     {
 901       if (!bb->count)
 902         continue;
 903
 904       /* Prevent overflow computation due to insane profiles.  */
 905       if (esucc->count < bb->count)
 906         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 907                                                  bb->count);
 908       else
 909         /* Can happen with missing/guessed probabilities, since we
 910            may determine that more is flowing along duplicated
 911            path than joiner succ probabilities allowed.
 912            Counts and freqs will be insane after jump threading,
 913            at least make sure probability is sane or we will
 914            get a flow verification error.
 915            Not much we can do to make counts/freqs sane without
 916            redoing the profile estimation.  */
 917         esucc->probability = REG_BR_PROB_BASE;
 918     }
 919 }
 920
 921
 922 /* Update the counts of the original and duplicated edges from a joiner
 923    that go off path, given that we have already determined that the
 924    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 925    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 926    edge from joiner is EPATH.  */
 927
 928 static void
 929 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 930                               gcov_type path_in_count,
 931                               gcov_type path_out_count)
 932 {
 933   /* Compute the count that currently flows off path from the joiner.
 934      In other words, the total count of joiner's out edges other than
 935      epath.  Compute this by walking the successors instead of
 936      subtracting epath's count from the joiner bb count, since there
 937      are sometimes slight insanities where the total out edge count is
 938      larger than the bb count (possibly due to rounding/truncation
 939      errors).  */
 940   gcov_type total_orig_off_path_count = 0;
 941   edge enonpath;
 942   edge_iterator ei;
 943   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 944     {
 945       if (enonpath == epath)
 946         continue;
 947       total_orig_off_path_count += enonpath->count;
 948     }
 949
 950   /* For the path that we are duplicating, the amount that will flow
 951      off path from the duplicated joiner is the delta between the
 952      path's cumulative in count and the portion of that count we
 953      estimated above as flowing from the joiner along the duplicated
 954      path.  */
 955   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 956
 957   /* Now do the actual updates of the off-path edges.  */
 958   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 959     {
 960       /* Look for edges going off of the threading path.  */
 961       if (enonpath == epath)
 962         continue;
 963
 964       /* Find the corresponding edge out of the duplicated joiner.  */
 965       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 966       gcc_assert (enonpathdup);
 967
 968       /* We can't use the original probability of the joiner's out
 969          edges, since the probabilities of the original branch
 970          and the duplicated branches may vary after all threading is
 971          complete.  But apportion the duplicated joiner's off-path
 972          total edge count computed earlier (total_dup_off_path_count)
 973          among the duplicated off-path edges based on their original
 974          ratio to the full off-path count (total_orig_off_path_count).
 975          */
 976       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 977                                       total_orig_off_path_count);
 978       /* Give the duplicated offpath edge a portion of the duplicated
 979          total.  */
 980       enonpathdup->count = apply_scale (scale,
 981                                         total_dup_off_path_count);
 982       /* Now update the original offpath edge count, handling underflow
 983          due to rounding errors.  */
 984       enonpath->count -= enonpathdup->count;
 985       if (enonpath->count < 0)
 986         enonpath->count = 0;
 987     }
 988 }
 989
 990
 991 /* Check if the paths through RD all have estimated frequencies but zero
 992    profile counts.  This is more accurate than checking the entry block
 993    for a zero profile count, since profile insanities sometimes creep in.  */
 994
 995 static bool
 996 estimated_freqs_path (struct redirection_data *rd)
 997 {
 998   edge e = rd->incoming_edges->e;
 999   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1000   edge ein;
1001   edge_iterator ei;
1002   bool non_zero_freq = false;
1003   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1004     {
1005       if (ein->count)
1006         return false;
1007       non_zero_freq |= ein->src->frequency != 0;
1008     }
1009
1010   for (unsigned int i = 1; i < path->length (); i++)
1011     {
1012       edge epath = (*path)[i]->e;
1013       if (epath->src->count)
1014         return false;
1015       non_zero_freq |= epath->src->frequency != 0;
1016       edge esucc;
1017       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1018         {
1019           if (esucc->count)
1020             return false;
1021           non_zero_freq |= esucc->src->frequency != 0;
1022         }
1023     }
1024   return non_zero_freq;
1025 }
1026
1027
1028 /* Invoked for routines that have guessed frequencies and no profile
1029    counts to record the block and edge frequencies for paths through RD
1030    in the profile count fields of those blocks and edges.  This is because
1031    ssa_fix_duplicate_block_edges incrementally updates the block and
1032    edge counts as edges are redirected, and it is difficult to do that
1033    for edge frequencies which are computed on the fly from the source
1034    block frequency and probability.  When a block frequency is updated
1035    its outgoing edge frequencies are affected and become difficult to
1036    adjust.  */
1037
1038 static void
1039 freqs_to_counts_path (struct redirection_data *rd)
1040 {
1041   edge e = rd->incoming_edges->e;
1042   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1043   edge ein;
1044   edge_iterator ei;
1045   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1046     {
1047       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1048          errors applying the probability when the frequencies are very
1049          small.  */
1050       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1051                                       ein->probability);
1052     }
1053
1054   for (unsigned int i = 1; i < path->length (); i++)
1055     {
1056       edge epath = (*path)[i]->e;
1057       edge esucc;
1058       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1059          errors applying the edge probability when the frequencies are very
1060          small.  */
1061       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1062       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1063         esucc->count = apply_probability (esucc->src->count,
1064                                           esucc->probability);
1065     }
1066 }
1067
1068
1069 /* For routines that have guessed frequencies and no profile counts, where we
1070    used freqs_to_counts_path to record block and edge frequencies for paths
1071    through RD, we clear the counts after completing all updates for RD.
1072    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1073    but the block frequencies and edge probabilities were updated as well,
1074    so we can simply clear the count fields.  */
1075
1076 static void
1077 clear_counts_path (struct redirection_data *rd)
1078 {
1079   edge e = rd->incoming_edges->e;
1080   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1081   edge ein, esucc;
1082   edge_iterator ei;
1083   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1084     ein->count = 0;
1085
1086   /* First clear counts along original path.  */
1087   for (unsigned int i = 1; i < path->length (); i++)
1088     {
1089       edge epath = (*path)[i]->e;
1090       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1091         esucc->count = 0;
1092       epath->src->count = 0;
1093     }
1094   /* Also need to clear the counts along duplicated path.  */
1095   for (unsigned int i = 0; i < 2; i++)
1096     {
1097       basic_block dup = rd->dup_blocks[i];
1098       if (!dup)
1099         continue;
1100       FOR_EACH_EDGE (esucc, ei, dup->succs)
1101         esucc->count = 0;
1102       dup->count = 0;
1103     }
1104 }
1105
1106 /* Wire up the outgoing edges from the duplicate blocks and
1107    update any PHIs as needed.  Also update the profile counts
1108    on the original and duplicate blocks and edges.  */
1109 void
1110 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1111                                ssa_local_info_t *local_info)
1112 {
1113   bool multi_incomings = (rd->incoming_edges->next != NULL);
1114   edge e = rd->incoming_edges->e;
1115   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1116   edge elast = path->last ()->e;
1117   gcov_type path_in_count = 0;
1118   gcov_type path_out_count = 0;
1119   int path_in_freq = 0;
1120
1121   /* This routine updates profile counts, frequencies, and probabilities
1122      incrementally. Since it is difficult to do the incremental updates
1123      using frequencies/probabilities alone, for routines without profile
1124      data we first take a snapshot of the existing block and edge frequencies
1125      by copying them into the empty profile count fields.  These counts are
1126      then used to do the incremental updates, and cleared at the end of this
1127      routine.  If the function is marked as having a profile, we still check
1128      to see if the paths through RD are using estimated frequencies because
1129      the routine had zero profile counts.  */
1130   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1131                              || estimated_freqs_path (rd));
1132   if (do_freqs_to_counts)
1133     freqs_to_counts_path (rd);
1134
1135   /* First determine how much profile count to move from original
1136      path to the duplicate path.  This is tricky in the presence of
1137      a joiner (see comments for compute_path_counts), where some portion
1138      of the path's counts will flow off-path from the joiner.  In the
1139      non-joiner case the path_in_count and path_out_count should be the
1140      same.  */
1141   bool has_joiner = compute_path_counts (rd, local_info,
1142                                          &path_in_count, &path_out_count,
1143                                          &path_in_freq);
1144
1145   int cur_path_freq = path_in_freq;
1146   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1147     {
1148       edge epath = (*path)[i]->e;
1149
1150       /* If we were threading through an joiner block, then we want
1151          to keep its control statement and redirect an outgoing edge.
1152          Else we want to remove the control statement & edges, then create
1153          a new outgoing edge.  In both cases we may need to update PHIs.  */
1154       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1155         {
1156           edge victim;
1157           edge e2;
1158
1159           gcc_assert (has_joiner);
1160
1161           /* This updates the PHIs at the destination of the duplicate
1162              block.  Pass 0 instead of i if we are threading a path which
1163              has multiple incoming edges.  */
1164           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1165                                    path, multi_incomings ? 0 : i);
1166
1167           /* Find the edge from the duplicate block to the block we're
1168              threading through.  That's the edge we want to redirect.  */
1169           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1170
1171           /* If there are no remaining blocks on the path to duplicate,
1172              then redirect VICTIM to the final destination of the jump
1173              threading path.  */
1174           if (!any_remaining_duplicated_blocks (path, i))
1175             {
1176               e2 = redirect_edge_and_branch (victim, elast->dest);
1177               /* If we redirected the edge, then we need to copy PHI arguments
1178                  at the target.  If the edge already existed (e2 != victim
1179                  case), then the PHIs in the target already have the correct
1180                  arguments.  */
1181               if (e2 == victim)
1182                 copy_phi_args (e2->dest, elast, e2,
1183                                path, multi_incomings ? 0 : i);
1184             }
1185           else
1186             {
1187               /* Redirect VICTIM to the next duplicated block in the path.  */
1188               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1189
1190               /* We need to update the PHIs in the next duplicated block.  We
1191                  want the new PHI args to have the same value as they had
1192                  in the source of the next duplicate block.
1193
1194                  Thus, we need to know which edge we traversed into the
1195                  source of the duplicate.  Furthermore, we may have
1196                  traversed many edges to reach the source of the duplicate.
1197
1198                  Walk through the path starting at element I until we
1199                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1200                  the edge from the prior element.  */
1201               for (unsigned int j = i + 1; j < path->length (); j++)
1202                 {
1203                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1204                     {
1205                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1206                       break;
1207                     }
1208                 }
1209             }
1210
1211           /* Update the counts and frequency of both the original block
1212              and path edge, and the duplicates.  The path duplicate's
1213              incoming count and frequency are the totals for all edges
1214              incoming to this jump threading path computed earlier.
1215              And we know that the duplicated path will have path_out_count
1216              flowing out of it (i.e. along the duplicated path out of the
1217              duplicated joiner).  */
1218           update_profile (epath, e2, path_in_count, path_out_count,
1219                           path_in_freq);
1220
1221           /* Next we need to update the counts of the original and duplicated
1222              edges from the joiner that go off path.  */
1223           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1224                                         path_out_count);
1225
1226           /* Finally, we need to set the probabilities on the duplicated
1227              edges out of the duplicated joiner (e2->src).  The probabilities
1228              along the original path will all be updated below after we finish
1229              processing the whole path.  */
1230           recompute_probabilities (e2->src);
1231
1232           /* Record the frequency flowing to the downstream duplicated
1233              path blocks.  */
1234           cur_path_freq = EDGE_FREQUENCY (e2);
1235         }
1236       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1237         {
1238           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1239           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1240                                                    multi_incomings ? 0 : i);
1241           if (count == 1)
1242             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1243
1244           /* Update the counts and frequency of both the original block
1245              and path edge, and the duplicates.  Since we are now after
1246              any joiner that may have existed on the path, the count
1247              flowing along the duplicated threaded path is path_out_count.
1248              If we didn't have a joiner, then cur_path_freq was the sum
1249              of the total frequencies along all incoming edges to the
1250              thread path (path_in_freq).  If we had a joiner, it would have
1251              been updated at the end of that handling to the edge frequency
1252              along the duplicated joiner path edge.  */
1253           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1254                           path_out_count, path_out_count,
1255                           cur_path_freq);
1256         }
1257       else
1258         {
1259           /* No copy case.  In this case we don't have an equivalent block
1260              on the duplicated thread path to update, but we do need
1261              to remove the portion of the counts/freqs that were moved
1262              to the duplicated path from the counts/freqs flowing through
1263              this block on the original path.  Since all the no-copy edges
1264              are after any joiner, the removed count is the same as
1265              path_out_count.
1266
1267              If we didn't have a joiner, then cur_path_freq was the sum
1268              of the total frequencies along all incoming edges to the
1269              thread path (path_in_freq).  If we had a joiner, it would have
1270              been updated at the end of that handling to the edge frequency
1271              along the duplicated joiner path edge.  */
1272              update_profile (epath, NULL, path_out_count, path_out_count,
1273                              cur_path_freq);
1274         }
1275
1276       /* Increment the index into the duplicated path when we processed
1277          a duplicated block.  */
1278       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1279           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1280       {
1281           count++;
1282       }
1283     }
1284
1285   /* Now walk orig blocks and update their probabilities, since the
1286      counts and freqs should be updated properly by above loop.  */
1287   for (unsigned int i = 1; i < path->length (); i++)
1288     {
1289       edge epath = (*path)[i]->e;
1290       recompute_probabilities (epath->src);
1291     }
1292
1293   /* Done with all profile and frequency updates, clear counts if they
1294      were copied.  */
1295   if (do_freqs_to_counts)
1296     clear_counts_path (rd);
1297 }
1298
1299 /* Hash table traversal callback routine to create duplicate blocks.  */
1300
1301 int
1302 ssa_create_duplicates (struct redirection_data **slot,
1303                        ssa_local_info_t *local_info)
1304 {
1305   struct redirection_data *rd = *slot;
1306
1307   /* The second duplicated block in a jump threading path is specific
1308      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1309
1310      Each time we're called, we have to look through the path and see
1311      if a second block needs to be duplicated.
1312
1313      Note the search starts with the third edge on the path.  The first
1314      edge is the incoming edge, the second edge always has its source
1315      duplicated.  Thus we start our search with the third edge.  */
1316   vec<jump_thread_edge *> *path = rd->path;
1317   for (unsigned int i = 2; i < path->length (); i++)
1318     {
1319       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1320           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1321         {
1322           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1323                                       &local_info->duplicate_blocks);
1324           break;
1325         }
1326     }
1327
1328   /* Create a template block if we have not done so already.  Otherwise
1329      use the template to create a new block.  */
1330   if (local_info->template_block == NULL)
1331     {
1332       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1333                                   &local_info->duplicate_blocks);
1334       local_info->template_block = rd->dup_blocks[0];
1335
1336       /* We do not create any outgoing edges for the template.  We will
1337          take care of that in a later traversal.  That way we do not
1338          create edges that are going to just be deleted.  */
1339     }
1340   else
1341     {
1342       create_block_for_threading (local_info->template_block, rd, 0,
1343                                   &local_info->duplicate_blocks);
1344
1345       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1346          block.   */
1347       ssa_fix_duplicate_block_edges (rd, local_info);
1348     }
1349
1350   /* Keep walking the hash table.  */
1351   return 1;
1352 }
1353
1354 /* We did not create any outgoing edges for the template block during
1355    block creation.  This hash table traversal callback creates the
1356    outgoing edge for the template block.  */
1357
1358 inline int
1359 ssa_fixup_template_block (struct redirection_data **slot,
1360                           ssa_local_info_t *local_info)
1361 {
1362   struct redirection_data *rd = *slot;
1363
1364   /* If this is the template block halt the traversal after updating
1365      it appropriately.
1366
1367      If we were threading through an joiner block, then we want
1368      to keep its control statement and redirect an outgoing edge.
1369      Else we want to remove the control statement & edges, then create
1370      a new outgoing edge.  In both cases we may need to update PHIs.  */
1371   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1372     {
1373       ssa_fix_duplicate_block_edges (rd, local_info);
1374       return 0;
1375     }
1376
1377   return 1;
1378 }
1379
1380 /* Hash table traversal callback to redirect each incoming edge
1381    associated with this hash table element to its new destination.  */
1382
1383 int
1384 ssa_redirect_edges (struct redirection_data **slot,
1385                     ssa_local_info_t *local_info)
1386 {
1387   struct redirection_data *rd = *slot;
1388   struct el *next, *el;
1389
1390   /* Walk over all the incoming edges associated associated with this
1391      hash table entry.  */
1392   for (el = rd->incoming_edges; el; el = next)
1393     {
1394       edge e = el->e;
1395       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1396
1397       /* Go ahead and free this element from the list.  Doing this now
1398          avoids the need for another list walk when we destroy the hash
1399          table.  */
1400       next = el->next;
1401       free (el);
1402
1403       thread_stats.num_threaded_edges++;
1404
1405       if (rd->dup_blocks[0])
1406         {
1407           edge e2;
1408
1409           if (dump_file && (dump_flags & TDF_DETAILS))
1410             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1411                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1412
1413           /* If we redirect a loop latch edge cancel its loop.  */
1414           if (e->src == e->src->loop_father->latch)
1415             mark_loop_for_removal (e->src->loop_father);
1416
1417           /* Redirect the incoming edge (possibly to the joiner block) to the
1418              appropriate duplicate block.  */
1419           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1420           gcc_assert (e == e2);
1421           flush_pending_stmts (e2);
1422         }
1423
1424       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1425          to clear it will cause all kinds of unpleasant problems later.  */
1426       delete_jump_thread_path (path);
1427       e->aux = NULL;
1428
1429     }
1430
1431   /* Indicate that we actually threaded one or more jumps.  */
1432   if (rd->incoming_edges)
1433     local_info->jumps_threaded = true;
1434
1435   return 1;
1436 }
1437
1438 /* Return true if this block has no executable statements other than
1439    a simple ctrl flow instruction.  When the number of outgoing edges
1440    is one, this is equivalent to a "forwarder" block.  */
1441
1442 static bool
1443 redirection_block_p (basic_block bb)
1444 {
1445   gimple_stmt_iterator gsi;
1446
1447   /* Advance to the first executable statement.  */
1448   gsi = gsi_start_bb (bb);
1449   while (!gsi_end_p (gsi)
1450          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1451              || is_gimple_debug (gsi_stmt (gsi))
1452              || gimple_nop_p (gsi_stmt (gsi))
1453              || gimple_clobber_p (gsi_stmt (gsi))))
1454     gsi_next (&gsi);
1455
1456   /* Check if this is an empty block.  */
1457   if (gsi_end_p (gsi))
1458     return true;
1459
1460   /* Test that we've reached the terminating control statement.  */
1461   return gsi_stmt (gsi)
1462          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1463              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1464              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1465 }
1466
1467 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1468    is reached via one or more specific incoming edges, we know which
1469    outgoing edge from BB will be traversed.
1470
1471    We want to redirect those incoming edges to the target of the
1472    appropriate outgoing edge.  Doing so avoids a conditional branch
1473    and may expose new optimization opportunities.  Note that we have
1474    to update dominator tree and SSA graph after such changes.
1475
1476    The key to keeping the SSA graph update manageable is to duplicate
1477    the side effects occurring in BB so that those side effects still
1478    occur on the paths which bypass BB after redirecting edges.
1479
1480    We accomplish this by creating duplicates of BB and arranging for
1481    the duplicates to unconditionally pass control to one specific
1482    successor of BB.  We then revector the incoming edges into BB to
1483    the appropriate duplicate of BB.
1484
1485    If NOLOOP_ONLY is true, we only perform the threading as long as it
1486    does not affect the structure of the loops in a nontrivial way.
1487
1488    If JOINERS is true, then thread through joiner blocks as well.  */
1489
1490 static bool
1491 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1492 {
1493   /* E is an incoming edge into BB that we may or may not want to
1494      redirect to a duplicate of BB.  */
1495   edge e, e2;
1496   edge_iterator ei;
1497   ssa_local_info_t local_info;
1498
1499   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1500
1501   /* To avoid scanning a linear array for the element we need we instead
1502      use a hash table.  For normal code there should be no noticeable
1503      difference.  However, if we have a block with a large number of
1504      incoming and outgoing edges such linear searches can get expensive.  */
1505   redirection_data
1506     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1507
1508   /* Record each unique threaded destination into a hash table for
1509      efficient lookups.  */
1510   FOR_EACH_EDGE (e, ei, bb->preds)
1511     {
1512       if (e->aux == NULL)
1513         continue;
1514
1515       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1516
1517       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1518           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1519         continue;
1520
1521       e2 = path->last ()->e;
1522       if (!e2 || noloop_only)
1523         {
1524           /* If NOLOOP_ONLY is true, we only allow threading through the
1525              header of a loop to exit edges.  */
1526
1527           /* One case occurs when there was loop header buried in a jump
1528              threading path that crosses loop boundaries.  We do not try
1529              and thread this elsewhere, so just cancel the jump threading
1530              request by clearing the AUX field now.  */
1531           if ((bb->loop_father != e2->src->loop_father
1532                && !loop_exit_edge_p (e2->src->loop_father, e2))
1533               || (e2->src->loop_father != e2->dest->loop_father
1534                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1535             {
1536               /* Since this case is not handled by our special code
1537                  to thread through a loop header, we must explicitly
1538                  cancel the threading request here.  */
1539               delete_jump_thread_path (path);
1540               e->aux = NULL;
1541               continue;
1542             }
1543
1544           /* Another case occurs when trying to thread through our
1545              own loop header, possibly from inside the loop.  We will
1546              thread these later.  */
1547           unsigned int i;
1548           for (i = 1; i < path->length (); i++)
1549             {
1550               if ((*path)[i]->e->src == bb->loop_father->header
1551                   && (!loop_exit_edge_p (bb->loop_father, e2)
1552                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1553                 break;
1554             }
1555
1556           if (i != path->length ())
1557             continue;
1558         }
1559
1560       /* Insert the outgoing edge into the hash table if it is not
1561          already in the hash table.  */
1562       lookup_redirection_data (e, INSERT);
1563     }
1564
1565   /* We do not update dominance info.  */
1566   free_dominance_info (CDI_DOMINATORS);
1567
1568   /* We know we only thread through the loop header to loop exits.
1569      Let the basic block duplication hook know we are not creating
1570      a multiple entry loop.  */
1571   if (noloop_only
1572       && bb == bb->loop_father->header)
1573     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1574
1575   /* Now create duplicates of BB.
1576
1577      Note that for a block with a high outgoing degree we can waste
1578      a lot of time and memory creating and destroying useless edges.
1579
1580      So we first duplicate BB and remove the control structure at the
1581      tail of the duplicate as well as all outgoing edges from the
1582      duplicate.  We then use that duplicate block as a template for
1583      the rest of the duplicates.  */
1584   local_info.template_block = NULL;
1585   local_info.bb = bb;
1586   local_info.jumps_threaded = false;
1587   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1588                             (&local_info);
1589
1590   /* The template does not have an outgoing edge.  Create that outgoing
1591      edge and update PHI nodes as the edge's target as necessary.
1592
1593      We do this after creating all the duplicates to avoid creating
1594      unnecessary edges.  */
1595   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1596                             (&local_info);
1597
1598   /* The hash table traversals above created the duplicate blocks (and the
1599      statements within the duplicate blocks).  This loop creates PHI nodes for
1600      the duplicated blocks and redirects the incoming edges into BB to reach
1601      the duplicates of BB.  */
1602   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1603                             (&local_info);
1604
1605   /* Done with this block.  Clear REDIRECTION_DATA.  */
1606   delete redirection_data;
1607   redirection_data = NULL;
1608
1609   if (noloop_only
1610       && bb == bb->loop_father->header)
1611     set_loop_copy (bb->loop_father, NULL);
1612
1613   BITMAP_FREE (local_info.duplicate_blocks);
1614   local_info.duplicate_blocks = NULL;
1615
1616   /* Indicate to our caller whether or not any jumps were threaded.  */
1617   return local_info.jumps_threaded;
1618 }
1619
1620 /* Wrapper for thread_block_1 so that we can first handle jump
1621    thread paths which do not involve copying joiner blocks, then
1622    handle jump thread paths which have joiner blocks.
1623
1624    By doing things this way we can be as aggressive as possible and
1625    not worry that copying a joiner block will create a jump threading
1626    opportunity.  */
1627
1628 static bool
1629 thread_block (basic_block bb, bool noloop_only)
1630 {
1631   bool retval;
1632   retval = thread_block_1 (bb, noloop_only, false);
1633   retval |= thread_block_1 (bb, noloop_only, true);
1634   return retval;
1635 }
1636
1637
1638 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1639    copy of E->dest created during threading, or E->dest if it was not necessary
1640    to copy it (E is its single predecessor).  */
1641
1642 static basic_block
1643 thread_single_edge (edge e)
1644 {
1645   basic_block bb = e->dest;
1646   struct redirection_data rd;
1647   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1648   edge eto = (*path)[1]->e;
1649
1650   delete_jump_thread_path (path);
1651   e->aux = NULL;
1652
1653   thread_stats.num_threaded_edges++;
1654
1655   if (single_pred_p (bb))
1656     {
1657       /* If BB has just a single predecessor, we should only remove the
1658          control statements at its end, and successors except for ETO.  */
1659       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1660
1661       /* And fixup the flags on the single remaining edge.  */
1662       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1663       eto->flags |= EDGE_FALLTHRU;
1664
1665       return bb;
1666     }
1667
1668   /* Otherwise, we need to create a copy.  */
1669   if (e->dest == eto->src)
1670     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1671
1672   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1673   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1674   npath->safe_push (x);
1675
1676   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1677   npath->safe_push (x);
1678   rd.path = npath;
1679
1680   create_block_for_threading (bb, &rd, 0, NULL);
1681   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1682   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1683
1684   if (dump_file && (dump_flags & TDF_DETAILS))
1685     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1686              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1687
1688   rd.dup_blocks[0]->count = e->count;
1689   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1690   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1691   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1692   flush_pending_stmts (e);
1693
1694   delete_jump_thread_path (npath);
1695   return rd.dup_blocks[0];
1696 }
1697
1698 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1699    from STOP and DBDS_CE_STOP.  */
1700
1701 static basic_block dbds_ce_stop;
1702 static bool
1703 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1704 {
1705   return (bb != (const_basic_block) stop
1706           && bb != dbds_ce_stop);
1707 }
1708
1709 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1710    returns the state.  */
1711
1712 enum bb_dom_status
1713 {
1714   /* BB does not dominate latch of the LOOP.  */
1715   DOMST_NONDOMINATING,
1716   /* The LOOP is broken (there is no path from the header to its latch.  */
1717   DOMST_LOOP_BROKEN,
1718   /* BB dominates the latch of the LOOP.  */
1719   DOMST_DOMINATING
1720 };
1721
1722 static enum bb_dom_status
1723 determine_bb_domination_status (struct loop *loop, basic_block bb)
1724 {
1725   basic_block *bblocks;
1726   unsigned nblocks, i;
1727   bool bb_reachable = false;
1728   edge_iterator ei;
1729   edge e;
1730
1731   /* This function assumes BB is a successor of LOOP->header.
1732      If that is not the case return DOMST_NONDOMINATING which
1733      is always safe.  */
1734     {
1735       bool ok = false;
1736
1737       FOR_EACH_EDGE (e, ei, bb->preds)
1738         {
1739           if (e->src == loop->header)
1740             {
1741               ok = true;
1742               break;
1743             }
1744         }
1745
1746       if (!ok)
1747         return DOMST_NONDOMINATING;
1748     }
1749
1750   if (bb == loop->latch)
1751     return DOMST_DOMINATING;
1752
1753   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1754      from it.  */
1755
1756   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1757   dbds_ce_stop = loop->header;
1758   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1759                                 bblocks, loop->num_nodes, bb);
1760   for (i = 0; i < nblocks; i++)
1761     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1762       {
1763         if (e->src == loop->header)
1764           {
1765             free (bblocks);
1766             return DOMST_NONDOMINATING;
1767           }
1768         if (e->src == bb)
1769           bb_reachable = true;
1770       }
1771
1772   free (bblocks);
1773   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1774 }
1775
1776 /* Return true if BB is part of the new pre-header that is created
1777    when threading the latch to DATA.  */
1778
1779 static bool
1780 def_split_header_continue_p (const_basic_block bb, const void *data)
1781 {
1782   const_basic_block new_header = (const_basic_block) data;
1783   const struct loop *l;
1784
1785   if (bb == new_header
1786       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1787     return false;
1788   for (l = bb->loop_father; l; l = loop_outer (l))
1789     if (l == new_header->loop_father)
1790       return true;
1791   return false;
1792 }
1793
1794 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1795    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1796    to the inside of the loop.  */
1797
1798 static bool
1799 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1800 {
1801   basic_block header = loop->header;
1802   edge e, tgt_edge, latch = loop_latch_edge (loop);
1803   edge_iterator ei;
1804   basic_block tgt_bb, atgt_bb;
1805   enum bb_dom_status domst;
1806
1807   /* We have already threaded through headers to exits, so all the threading
1808      requests now are to the inside of the loop.  We need to avoid creating
1809      irreducible regions (i.e., loops with more than one entry block), and
1810      also loop with several latch edges, or new subloops of the loop (although
1811      there are cases where it might be appropriate, it is difficult to decide,
1812      and doing it wrongly may confuse other optimizers).
1813
1814      We could handle more general cases here.  However, the intention is to
1815      preserve some information about the loop, which is impossible if its
1816      structure changes significantly, in a way that is not well understood.
1817      Thus we only handle few important special cases, in which also updating
1818      of the loop-carried information should be feasible:
1819
1820      1) Propagation of latch edge to a block that dominates the latch block
1821         of a loop.  This aims to handle the following idiom:
1822
1823         first = 1;
1824         while (1)
1825           {
1826             if (first)
1827               initialize;
1828             first = 0;
1829             body;
1830           }
1831
1832         After threading the latch edge, this becomes
1833
1834         first = 1;
1835         if (first)
1836           initialize;
1837         while (1)
1838           {
1839             first = 0;
1840             body;
1841           }
1842
1843         The original header of the loop is moved out of it, and we may thread
1844         the remaining edges through it without further constraints.
1845
1846      2) All entry edges are propagated to a single basic block that dominates
1847         the latch block of the loop.  This aims to handle the following idiom
1848         (normally created for "for" loops):
1849
1850         i = 0;
1851         while (1)
1852           {
1853             if (i >= 100)
1854               break;
1855             body;
1856             i++;
1857           }
1858
1859         This becomes
1860
1861         i = 0;
1862         while (1)
1863           {
1864             body;
1865             i++;
1866             if (i >= 100)
1867               break;
1868           }
1869      */
1870
1871   /* Threading through the header won't improve the code if the header has just
1872      one successor.  */
1873   if (single_succ_p (header))
1874     goto fail;
1875
1876   /* If we threaded the latch using a joiner block, we cancel the
1877      threading opportunity out of an abundance of caution.  However,
1878      still allow threading from outside to inside the loop.  */
1879   if (latch->aux)
1880     {
1881       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1882       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1883         {
1884           delete_jump_thread_path (path);
1885           latch->aux = NULL;
1886         }
1887     }
1888
1889   if (latch->aux)
1890     {
1891       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1892       tgt_edge = (*path)[1]->e;
1893       tgt_bb = tgt_edge->dest;
1894     }
1895   else if (!may_peel_loop_headers
1896            && !redirection_block_p (loop->header))
1897     goto fail;
1898   else
1899     {
1900       tgt_bb = NULL;
1901       tgt_edge = NULL;
1902       FOR_EACH_EDGE (e, ei, header->preds)
1903         {
1904           if (!e->aux)
1905             {
1906               if (e == latch)
1907                 continue;
1908
1909               /* If latch is not threaded, and there is a header
1910                  edge that is not threaded, we would create loop
1911                  with multiple entries.  */
1912               goto fail;
1913             }
1914
1915           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1916
1917           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1918             goto fail;
1919           tgt_edge = (*path)[1]->e;
1920           atgt_bb = tgt_edge->dest;
1921           if (!tgt_bb)
1922             tgt_bb = atgt_bb;
1923           /* Two targets of threading would make us create loop
1924              with multiple entries.  */
1925           else if (tgt_bb != atgt_bb)
1926             goto fail;
1927         }
1928
1929       if (!tgt_bb)
1930         {
1931           /* There are no threading requests.  */
1932           return false;
1933         }
1934
1935       /* Redirecting to empty loop latch is useless.  */
1936       if (tgt_bb == loop->latch
1937           && empty_block_p (loop->latch))
1938         goto fail;
1939     }
1940
1941   /* The target block must dominate the loop latch, otherwise we would be
1942      creating a subloop.  */
1943   domst = determine_bb_domination_status (loop, tgt_bb);
1944   if (domst == DOMST_NONDOMINATING)
1945     goto fail;
1946   if (domst == DOMST_LOOP_BROKEN)
1947     {
1948       /* If the loop ceased to exist, mark it as such, and thread through its
1949          original header.  */
1950       mark_loop_for_removal (loop);
1951       return thread_block (header, false);
1952     }
1953
1954   if (tgt_bb->loop_father->header == tgt_bb)
1955     {
1956       /* If the target of the threading is a header of a subloop, we need
1957          to create a preheader for it, so that the headers of the two loops
1958          do not merge.  */
1959       if (EDGE_COUNT (tgt_bb->preds) > 2)
1960         {
1961           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1962           gcc_assert (tgt_bb != NULL);
1963         }
1964       else
1965         tgt_bb = split_edge (tgt_edge);
1966     }
1967
1968   if (latch->aux)
1969     {
1970       basic_block *bblocks;
1971       unsigned nblocks, i;
1972
1973       /* First handle the case latch edge is redirected.  We are copying
1974          the loop header but not creating a multiple entry loop.  Make the
1975          cfg manipulation code aware of that fact.  */
1976       set_loop_copy (loop, loop);
1977       loop->latch = thread_single_edge (latch);
1978       set_loop_copy (loop, NULL);
1979       gcc_assert (single_succ (loop->latch) == tgt_bb);
1980       loop->header = tgt_bb;
1981
1982       /* Remove the new pre-header blocks from our loop.  */
1983       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1984       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1985                                     bblocks, loop->num_nodes, tgt_bb);
1986       for (i = 0; i < nblocks; i++)
1987         if (bblocks[i]->loop_father == loop)
1988           {
1989             remove_bb_from_loops (bblocks[i]);
1990             add_bb_to_loop (bblocks[i], loop_outer (loop));
1991           }
1992       free (bblocks);
1993
1994       /* If the new header has multiple latches mark it so.  */
1995       FOR_EACH_EDGE (e, ei, loop->header->preds)
1996         if (e->src->loop_father == loop
1997             && e->src != loop->latch)
1998           {
1999             loop->latch = NULL;
2000             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
2001           }
2002
2003       /* Cancel remaining threading requests that would make the
2004          loop a multiple entry loop.  */
2005       FOR_EACH_EDGE (e, ei, header->preds)
2006         {
2007           edge e2;
2008
2009           if (e->aux == NULL)
2010             continue;
2011
2012           vec<jump_thread_edge *> *path = THREAD_PATH (e);
2013           e2 = path->last ()->e;
2014
2015           if (e->src->loop_father != e2->dest->loop_father
2016               && e2->dest != loop->header)
2017             {
2018               delete_jump_thread_path (path);
2019               e->aux = NULL;
2020             }
2021         }
2022
2023       /* Thread the remaining edges through the former header.  */
2024       thread_block (header, false);
2025     }
2026   else
2027     {
2028       basic_block new_preheader;
2029
2030       /* Now consider the case entry edges are redirected to the new entry
2031          block.  Remember one entry edge, so that we can find the new
2032          preheader (its destination after threading).  */
2033       FOR_EACH_EDGE (e, ei, header->preds)
2034         {
2035           if (e->aux)
2036             break;
2037         }
2038
2039       /* The duplicate of the header is the new preheader of the loop.  Ensure
2040          that it is placed correctly in the loop hierarchy.  */
2041       set_loop_copy (loop, loop_outer (loop));
2042
2043       thread_block (header, false);
2044       set_loop_copy (loop, NULL);
2045       new_preheader = e->dest;
2046
2047       /* Create the new latch block.  This is always necessary, as the latch
2048          must have only a single successor, but the original header had at
2049          least two successors.  */
2050       loop->latch = NULL;
2051       mfb_kj_edge = single_succ_edge (new_preheader);
2052       loop->header = mfb_kj_edge->dest;
2053       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2054       loop->header = latch->dest;
2055       loop->latch = latch->src;
2056     }
2057
2058   return true;
2059
2060 fail:
2061   /* We failed to thread anything.  Cancel the requests.  */
2062   FOR_EACH_EDGE (e, ei, header->preds)
2063     {
2064       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2065
2066       if (path)
2067         {
2068           delete_jump_thread_path (path);
2069           e->aux = NULL;
2070         }
2071     }
2072   return false;
2073 }
2074
2075 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2076    PHI arguments associated with those edges are equal or there are no
2077    PHI arguments, otherwise return FALSE.  */
2078
2079 static bool
2080 phi_args_equal_on_edges (edge e1, edge e2)
2081 {
2082   gphi_iterator gsi;
2083   int indx1 = e1->dest_idx;
2084   int indx2 = e2->dest_idx;
2085
2086   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2087     {
2088       gphi *phi = gsi.phi ();
2089
2090       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2091                             gimple_phi_arg_def (phi, indx2), 0))
2092         return false;
2093     }
2094   return true;
2095 }
2096
2097 /* Walk through the registered jump threads and convert them into a
2098    form convenient for this pass.
2099
2100    Any block which has incoming edges threaded to outgoing edges
2101    will have its entry in THREADED_BLOCK set.
2102
2103    Any threaded edge will have its new outgoing edge stored in the
2104    original edge's AUX field.
2105
2106    This form avoids the need to walk all the edges in the CFG to
2107    discover blocks which need processing and avoids unnecessary
2108    hash table lookups to map from threaded edge to new target.  */
2109
2110 static void
2111 mark_threaded_blocks (bitmap threaded_blocks)
2112 {
2113   unsigned int i;
2114   bitmap_iterator bi;
2115   bitmap tmp = BITMAP_ALLOC (NULL);
2116   basic_block bb;
2117   edge e;
2118   edge_iterator ei;
2119
2120   /* It is possible to have jump threads in which one is a subpath
2121      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2122      block and (B, C), (C, D) where no joiner block exists.
2123
2124      When this occurs ignore the jump thread request with the joiner
2125      block.  It's totally subsumed by the simpler jump thread request.
2126
2127      This results in less block copying, simpler CFGs.  More importantly,
2128      when we duplicate the joiner block, B, in this case we will create
2129      a new threading opportunity that we wouldn't be able to optimize
2130      until the next jump threading iteration.
2131
2132      So first convert the jump thread requests which do not require a
2133      joiner block.  */
2134   for (i = 0; i < paths.length (); i++)
2135     {
2136       vec<jump_thread_edge *> *path = paths[i];
2137
2138       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2139         {
2140           edge e = (*path)[0]->e;
2141           e->aux = (void *)path;
2142           bitmap_set_bit (tmp, e->dest->index);
2143         }
2144     }
2145
2146   /* Now iterate again, converting cases where we want to thread
2147      through a joiner block, but only if no other edge on the path
2148      already has a jump thread attached to it.  We do this in two passes,
2149      to avoid situations where the order in the paths vec can hide overlapping
2150      threads (the path is recorded on the incoming edge, so we would miss
2151      cases where the second path starts at a downstream edge on the same
2152      path).  First record all joiner paths, deleting any in the unexpected
2153      case where there is already a path for that incoming edge.  */
2154   for (i = 0; i < paths.length (); i++)
2155     {
2156       vec<jump_thread_edge *> *path = paths[i];
2157
2158       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2159         {
2160           /* Attach the path to the starting edge if none is yet recorded.  */
2161           if ((*path)[0]->e->aux == NULL)
2162             {
2163               (*path)[0]->e->aux = path;
2164             }
2165           else
2166             {
2167               paths.unordered_remove (i);
2168               if (dump_file && (dump_flags & TDF_DETAILS))
2169                 dump_jump_thread_path (dump_file, *path, false);
2170               delete_jump_thread_path (path);
2171             }
2172         }
2173     }
2174   /* Second, look for paths that have any other jump thread attached to
2175      them, and either finish converting them or cancel them.  */
2176   for (i = 0; i < paths.length (); i++)
2177     {
2178       vec<jump_thread_edge *> *path = paths[i];
2179       edge e = (*path)[0]->e;
2180
2181       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2182         {
2183           unsigned int j;
2184           for (j = 1; j < path->length (); j++)
2185             if ((*path)[j]->e->aux != NULL)
2186               break;
2187
2188           /* If we iterated through the entire path without exiting the loop,
2189              then we are good to go, record it.  */
2190           if (j == path->length ())
2191             bitmap_set_bit (tmp, e->dest->index);
2192           else
2193             {
2194               e->aux = NULL;
2195               paths.unordered_remove (i);
2196               if (dump_file && (dump_flags & TDF_DETAILS))
2197                 dump_jump_thread_path (dump_file, *path, false);
2198               delete_jump_thread_path (path);
2199             }
2200         }
2201     }
2202
2203   /* If optimizing for size, only thread through block if we don't have
2204      to duplicate it or it's an otherwise empty redirection block.  */
2205   if (optimize_function_for_size_p (cfun))
2206     {
2207       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2208         {
2209           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2210           if (EDGE_COUNT (bb->preds) > 1
2211               && !redirection_block_p (bb))
2212             {
2213               FOR_EACH_EDGE (e, ei, bb->preds)
2214                 {
2215                   if (e->aux)
2216                     {
2217                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2218                       delete_jump_thread_path (path);
2219                       e->aux = NULL;
2220                     }
2221                 }
2222             }
2223           else
2224             bitmap_set_bit (threaded_blocks, i);
2225         }
2226     }
2227   else
2228     bitmap_copy (threaded_blocks, tmp);
2229
2230   /* Look for jump threading paths which cross multiple loop headers.
2231
2232      The code to thread through loop headers will change the CFG in ways
2233      that break assumptions made by the loop optimization code.
2234
2235      We don't want to blindly cancel the requests.  We can instead do better
2236      by trimming off the end of the jump thread path.  */
2237   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2238     {
2239       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2240       FOR_EACH_EDGE (e, ei, bb->preds)
2241         {
2242           if (e->aux)
2243             {
2244               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2245
2246               for (unsigned int i = 0, crossed_headers = 0;
2247                    i < path->length ();
2248                    i++)
2249                 {
2250                   basic_block dest = (*path)[i]->e->dest;
2251                   crossed_headers += (dest == dest->loop_father->header);
2252                   if (crossed_headers > 1)
2253                     {
2254                       /* Trim from entry I onwards.  */
2255                       for (unsigned int j = i; j < path->length (); j++)
2256                         delete (*path)[j];
2257                       path->truncate (i);
2258
2259                       /* Now that we've truncated the path, make sure
2260                          what's left is still valid.   We need at least
2261                          two edges on the path and the last edge can not
2262                          be a joiner.  This should never happen, but let's
2263                          be safe.  */
2264                       if (path->length () < 2
2265                           || (path->last ()->type
2266                               == EDGE_COPY_SRC_JOINER_BLOCK))
2267                         {
2268                           delete_jump_thread_path (path);
2269                           e->aux = NULL;
2270                         }
2271                       break;
2272                     }
2273                 }
2274             }
2275         }
2276     }
2277
2278   /* If we have a joiner block (J) which has two successors S1 and S2 and
2279      we are threading though S1 and the final destination of the thread
2280      is S2, then we must verify that any PHI nodes in S2 have the same
2281      PHI arguments for the edge J->S2 and J->S1->...->S2.
2282
2283      We used to detect this prior to registering the jump thread, but
2284      that prohibits propagation of edge equivalences into non-dominated
2285      PHI nodes as the equivalency test might occur before propagation.
2286
2287      This must also occur after we truncate any jump threading paths
2288      as this scenario may only show up after truncation.
2289
2290      This works for now, but will need improvement as part of the FSA
2291      optimization.
2292
2293      Note since we've moved the thread request data to the edges,
2294      we have to iterate on those rather than the threaded_edges vector.  */
2295   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2296     {
2297       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2298       FOR_EACH_EDGE (e, ei, bb->preds)
2299         {
2300           if (e->aux)
2301             {
2302               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2303               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2304
2305               if (have_joiner)
2306                 {
2307                   basic_block joiner = e->dest;
2308                   edge final_edge = path->last ()->e;
2309                   basic_block final_dest = final_edge->dest;
2310                   edge e2 = find_edge (joiner, final_dest);
2311
2312                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2313                     {
2314                       delete_jump_thread_path (path);
2315                       e->aux = NULL;
2316                     }
2317                 }
2318             }
2319         }
2320     }
2321
2322   BITMAP_FREE (tmp);
2323 }
2324
2325
2326 /* Return TRUE if BB ends with a switch statement or a computed goto.
2327    Otherwise return false.  */
2328 static bool
2329 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2330 {
2331   gimple stmt = last_stmt (bb);
2332   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2333     return true;
2334   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2335       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2336     return true;
2337   return false;
2338 }
2339
2340 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2341    case of SEME Single Entry Multiple Exits region in which all nodes in the
2342    REGION have exactly one incoming edge.  The only exception is the first block
2343    that may not have been connected to the rest of the cfg yet.  */
2344
2345 DEBUG_FUNCTION void
2346 verify_jump_thread (basic_block *region, unsigned n_region)
2347 {
2348   for (unsigned i = 0; i < n_region; i++)
2349     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2350 }
2351
2352 /* Return true when BB is one of the first N items in BBS.  */
2353
2354 static inline bool
2355 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2356 {
2357   for (int i = 0; i < n; i++)
2358     if (bb == bbs[i])
2359       return true;
2360
2361   return false;
2362 }
2363
2364 /* Duplicates a jump-thread path of N_REGION basic blocks.
2365    The ENTRY edge is redirected to the duplicate of the region.
2366
2367    Remove the last conditional statement in the last basic block in the REGION,
2368    and create a single fallthru edge pointing to the same destination as the
2369    EXIT edge.
2370
2371    The new basic blocks are stored to REGION_COPY in the same order as they had
2372    in REGION, provided that REGION_COPY is not NULL.
2373
2374    Returns false if it is unable to copy the region, true otherwise.  */
2375
2376 static bool
2377 duplicate_thread_path (edge entry, edge exit,
2378                        basic_block *region, unsigned n_region,
2379                        basic_block *region_copy)
2380 {
2381   unsigned i;
2382   bool free_region_copy = false;
2383   struct loop *loop = entry->dest->loop_father;
2384   edge exit_copy;
2385   edge redirected;
2386   int total_freq = 0, entry_freq = 0;
2387   gcov_type total_count = 0, entry_count = 0;
2388
2389   if (!can_copy_bbs_p (region, n_region))
2390     return false;
2391
2392   /* Some sanity checking.  Note that we do not check for all possible
2393      missuses of the functions.  I.e. if you ask to copy something weird,
2394      it will work, but the state of structures probably will not be
2395      correct.  */
2396   for (i = 0; i < n_region; i++)
2397     {
2398       /* We do not handle subloops, i.e. all the blocks must belong to the
2399          same loop.  */
2400       if (region[i]->loop_father != loop)
2401         return false;
2402     }
2403
2404   initialize_original_copy_tables ();
2405
2406   set_loop_copy (loop, loop);
2407
2408   if (!region_copy)
2409     {
2410       region_copy = XNEWVEC (basic_block, n_region);
2411       free_region_copy = true;
2412     }
2413
2414   if (entry->dest->count)
2415     {
2416       total_count = entry->dest->count;
2417       entry_count = entry->count;
2418       /* Fix up corner cases, to avoid division by zero or creation of negative
2419          frequencies.  */
2420       if (entry_count > total_count)
2421         entry_count = total_count;
2422     }
2423   else
2424     {
2425       total_freq = entry->dest->frequency;
2426       entry_freq = EDGE_FREQUENCY (entry);
2427       /* Fix up corner cases, to avoid division by zero or creation of negative
2428          frequencies.  */
2429       if (total_freq == 0)
2430         total_freq = 1;
2431       else if (entry_freq > total_freq)
2432         entry_freq = total_freq;
2433     }
2434
2435   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2436             split_edge_bb_loc (entry), false);
2437
2438   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2439      following code ensures that all the edges exiting the jump-thread path are
2440      redirected back to the original code: these edges are exceptions
2441      invalidating the property that is propagated by executing all the blocks of
2442      the jump-thread path in order.  */
2443
2444   for (i = 0; i < n_region; i++)
2445     {
2446       edge e;
2447       edge_iterator ei;
2448       basic_block bb = region_copy[i];
2449
2450       if (single_succ_p (bb))
2451         {
2452           /* Make sure the successor is the next node in the path.  */
2453           gcc_assert (i + 1 == n_region
2454                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2455           continue;
2456         }
2457
2458       /* Special case the last block on the path: make sure that it does not
2459          jump back on the copied path.  */
2460       if (i + 1 == n_region)
2461         {
2462           FOR_EACH_EDGE (e, ei, bb->succs)
2463             if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2464               {
2465                 basic_block orig = get_bb_original (e->dest);
2466                 if (orig)
2467                   redirect_edge_and_branch_force (e, orig);
2468               }
2469           continue;
2470         }
2471
2472       /* Redirect all other edges jumping to non-adjacent blocks back to the
2473          original code.  */
2474       FOR_EACH_EDGE (e, ei, bb->succs)
2475         if (region_copy[i + 1] != e->dest)
2476           {
2477             basic_block orig = get_bb_original (e->dest);
2478             if (orig)
2479               redirect_edge_and_branch_force (e, orig);
2480           }
2481     }
2482
2483   if (total_count)
2484     {
2485       scale_bbs_frequencies_gcov_type (region, n_region,
2486                                        total_count - entry_count,
2487                                        total_count);
2488       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2489                                        total_count);
2490     }
2491   else
2492     {
2493       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2494                                  total_freq);
2495       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2496     }
2497
2498 #ifdef ENABLE_CHECKING
2499   verify_jump_thread (region_copy, n_region);
2500 #endif
2501
2502   /* Remove the last branch in the jump thread path.  */
2503   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2504   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2505
2506   if (e) {
2507     rescan_loop_exit (e, true, false);
2508     e->probability = REG_BR_PROB_BASE;
2509     e->count = region_copy[n_region - 1]->count;
2510   }
2511
2512   /* Redirect the entry and add the phi node arguments.  */
2513   if (entry->dest == loop->header)
2514     mark_loop_for_removal (loop);
2515   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2516   gcc_assert (redirected != NULL);
2517   flush_pending_stmts (entry);
2518
2519   /* Add the other PHI node arguments.  */
2520   add_phi_args_after_copy (region_copy, n_region, NULL);
2521
2522   if (free_region_copy)
2523     free (region_copy);
2524
2525   free_original_copy_tables ();
2526   return true;
2527 }
2528
2529 /* Return true when PATH is a valid jump-thread path.  */
2530
2531 static bool
2532 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2533 {
2534   unsigned len = path->length ();
2535
2536   /* Check that the path is connected.  */
2537   for (unsigned int j = 0; j < len - 1; j++)
2538     if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2539       return false;
2540
2541   return true;
2542 }
2543
2544 /* Walk through all blocks and thread incoming edges to the appropriate
2545    outgoing edge for each edge pair recorded in THREADED_EDGES.
2546
2547    It is the caller's responsibility to fix the dominance information
2548    and rewrite duplicated SSA_NAMEs back into SSA form.
2549
2550    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2551    loop headers if it does not simplify the loop.
2552
2553    Returns true if one or more edges were threaded, false otherwise.  */
2554
2555 bool
2556 thread_through_all_blocks (bool may_peel_loop_headers)
2557 {
2558   bool retval = false;
2559   unsigned int i;
2560   bitmap_iterator bi;
2561   bitmap threaded_blocks;
2562   struct loop *loop;
2563
2564   if (!paths.exists ())
2565     return false;
2566
2567   threaded_blocks = BITMAP_ALLOC (NULL);
2568   memset (&thread_stats, 0, sizeof (thread_stats));
2569
2570   /* Jump-thread all FSM threads before other jump-threads.  */
2571   for (i = 0; i < paths.length ();)
2572     {
2573       vec<jump_thread_edge *> *path = paths[i];
2574       edge entry = (*path)[0]->e;
2575
2576       /* Only code-generate FSM jump-threads in this loop.  */
2577       if ((*path)[0]->type != EDGE_FSM_THREAD)
2578         {
2579           i++;
2580           continue;
2581         }
2582
2583       /* Do not jump-thread twice from the same block.  */
2584       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2585           /* Verify that the jump thread path is still valid: a
2586              previous jump-thread may have changed the CFG, and
2587              invalidated the current path.  */
2588           || !valid_jump_thread_path (path))
2589         {
2590           /* Remove invalid FSM jump-thread paths.  */
2591           delete_jump_thread_path (path);
2592           paths.unordered_remove (i);
2593           continue;
2594         }
2595
2596       unsigned len = path->length ();
2597       edge exit = (*path)[len - 1]->e;
2598       basic_block *region = XNEWVEC (basic_block, len - 1);
2599
2600       for (unsigned int j = 0; j < len - 1; j++)
2601         region[j] = (*path)[j]->e->dest;
2602
2603       if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2604         {
2605           /* We do not update dominance info.  */
2606           free_dominance_info (CDI_DOMINATORS);
2607           bitmap_set_bit (threaded_blocks, entry->src->index);
2608           retval = true;
2609         }
2610
2611       delete_jump_thread_path (path);
2612       paths.unordered_remove (i);
2613     }
2614
2615   /* Remove from PATHS all the jump-threads starting with an edge already
2616      jump-threaded.  */
2617   for (i = 0; i < paths.length ();)
2618     {
2619       vec<jump_thread_edge *> *path = paths[i];
2620       edge entry = (*path)[0]->e;
2621
2622       /* Do not jump-thread twice from the same block.  */
2623       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2624         {
2625           delete_jump_thread_path (path);
2626           paths.unordered_remove (i);
2627         }
2628       else
2629         i++;
2630     }
2631
2632   bitmap_clear (threaded_blocks);
2633
2634   mark_threaded_blocks (threaded_blocks);
2635
2636   initialize_original_copy_tables ();
2637
2638   /* First perform the threading requests that do not affect
2639      loop structure.  */
2640   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2641     {
2642       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2643
2644       if (EDGE_COUNT (bb->preds) > 0)
2645         retval |= thread_block (bb, true);
2646     }
2647
2648   /* Then perform the threading through loop headers.  We start with the
2649      innermost loop, so that the changes in cfg we perform won't affect
2650      further threading.  */
2651   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2652     {
2653       if (!loop->header
2654           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2655         continue;
2656
2657       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2658     }
2659
2660   /* Any jump threading paths that are still attached to edges at this
2661      point must be one of two cases.
2662
2663      First, we could have a jump threading path which went from outside
2664      a loop to inside a loop that was ignored because a prior jump thread
2665      across a backedge was realized (which indirectly causes the loop
2666      above to ignore the latter thread).  We can detect these because the
2667      loop structures will be different and we do not currently try to
2668      optimize this case.
2669
2670      Second, we could be threading across a backedge to a point within the
2671      same loop.  This occurrs for the FSA/FSM optimization and we would
2672      like to optimize it.  However, we have to be very careful as this
2673      may completely scramble the loop structures, with the result being
2674      irreducible loops causing us to throw away our loop structure.
2675
2676      As a compromise for the latter case, if the thread path ends in
2677      a block where the last statement is a multiway branch, then go
2678      ahead and thread it, else ignore it.  */
2679   basic_block bb;
2680   edge e;
2681   FOR_EACH_BB_FN (bb, cfun)
2682     {
2683       /* If we do end up threading here, we can remove elements from
2684          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2685       for (edge_iterator ei = ei_start (bb->preds);
2686            (e = ei_safe_edge (ei));)
2687         if (e->aux)
2688           {
2689             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2690
2691             /* Case 1, threading from outside to inside the loop
2692                after we'd already threaded through the header.  */
2693             if ((*path)[0]->e->dest->loop_father
2694                 != path->last ()->e->src->loop_father)
2695               {
2696                 delete_jump_thread_path (path);
2697                 e->aux = NULL;
2698                 ei_next (&ei);
2699               }
2700            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2701               {
2702                 /* The code to thread through loop headers may have
2703                    split a block with jump threads attached to it.
2704
2705                    We can identify this with a disjoint jump threading
2706                    path.  If found, just remove it.  */
2707                 for (unsigned int i = 0; i < path->length () - 1; i++)
2708                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2709                     {
2710                       delete_jump_thread_path (path);
2711                       e->aux = NULL;
2712                       ei_next (&ei);
2713                       break;
2714                     }
2715
2716                 /* Our path is still valid, thread it.  */
2717                 if (e->aux)
2718                   {
2719                     if (thread_block ((*path)[0]->e->dest, false))
2720                       e->aux = NULL;
2721                     else
2722                       {
2723                         delete_jump_thread_path (path);
2724                         e->aux = NULL;
2725                         ei_next (&ei);
2726                       }
2727                   }
2728               }
2729            else
2730               {
2731                 delete_jump_thread_path (path);
2732                 e->aux = NULL;
2733                 ei_next (&ei);
2734               }
2735           }
2736         else
2737           ei_next (&ei);
2738     }
2739
2740   statistics_counter_event (cfun, "Jumps threaded",
2741                             thread_stats.num_threaded_edges);
2742
2743   free_original_copy_tables ();
2744
2745   BITMAP_FREE (threaded_blocks);
2746   threaded_blocks = NULL;
2747   paths.release ();
2748
2749   if (retval)
2750     loops_state_set (LOOPS_NEED_FIXUP);
2751
2752   return retval;
2753 }
2754
2755 /* Delete the jump threading path PATH.  We have to explcitly delete
2756    each entry in the vector, then the container.  */
2757
2758 void
2759 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2760 {
2761   for (unsigned int i = 0; i < path->length (); i++)
2762     delete (*path)[i];
2763   path->release();
2764   delete path;
2765 }
2766
2767 /* Register a jump threading opportunity.  We queue up all the jump
2768    threading opportunities discovered by a pass and update the CFG
2769    and SSA form all at once.
2770
2771    E is the edge we can thread, E2 is the new target edge, i.e., we
2772    are effectively recording that E->dest can be changed to E2->dest
2773    after fixing the SSA graph.  */
2774
2775 void
2776 register_jump_thread (vec<jump_thread_edge *> *path)
2777 {
2778   if (!dbg_cnt (registered_jump_thread))
2779     {
2780       delete_jump_thread_path (path);
2781       return;
2782     }
2783
2784   /* First make sure there are no NULL outgoing edges on the jump threading
2785      path.  That can happen for jumping to a constant address.  */
2786   for (unsigned int i = 0; i < path->length (); i++)
2787     if ((*path)[i]->e == NULL)
2788       {
2789         if (dump_file && (dump_flags & TDF_DETAILS))
2790           {
2791             fprintf (dump_file,
2792                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2793             dump_jump_thread_path (dump_file, *path, false);
2794           }
2795
2796         delete_jump_thread_path (path);
2797         return;
2798       }
2799
2800   if (dump_file && (dump_flags & TDF_DETAILS))
2801     dump_jump_thread_path (dump_file, *path, true);
2802
2803   if (!paths.exists ())
2804     paths.create (5);
2805
2806   paths.safe_push (path);
2807 }