gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "alias.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "hard-reg-set.h"
  28 #include "ssa.h"
  29 #include "options.h"
  30 #include "fold-const.h"
  31 #include "flags.h"
  32 #include "cfganal.h"
  33 #include "internal-fn.h"
  34 #include "gimple-iterator.h"
  35 #include "tree-ssa.h"
  36 #include "tree-ssa-threadupdate.h"
  37 #include "dumpfile.h"
  38 #include "cfgloop.h"
  39 #include "dbgcnt.h"
  40 #include "tree-cfg.h"
  41 #include "tree-pass.h"
  42
  43 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  44    one or more in-edges to B to instead reach the destination of an
  45    out-edge from B while preserving any side effects in B.
  46
  47    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  48    side effects of executing B.
  49
  50      1. Make a copy of B (including its outgoing edges and statements).  Call
  51         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  52
  53      2. Remove the control statement at the end of B' and all outgoing edges
  54         except B'->C.
  55
  56      3. Add a new argument to each PHI in C with the same value as the existing
  57         argument associated with edge B->C.  Associate the new PHI arguments
  58         with the edge B'->C.
  59
  60      4. For each PHI in B, find or create a PHI in B' with an identical
  61         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  62         value as the PHI in B associated with the edge A->B.  Associate
  63         the new argument in the PHI in B' with the edge A->B.
  64
  65      5. Change the edge A->B to A->B'.
  66
  67         5a. This automatically deletes any PHI arguments associated with the
  68             edge A->B in B.
  69
  70         5b. This automatically associates each new argument added in step 4
  71             with the edge A->B'.
  72
  73      6. Repeat for other incoming edges into B.
  74
  75      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  76
  77    Note that block duplication can be minimized by first collecting the
  78    set of unique destination blocks that the incoming edges should
  79    be threaded to.
  80
  81    We reduce the number of edges and statements we create by not copying all
  82    the outgoing edges and the control statement in step #1.  We instead create
  83    a template block without the outgoing edges and duplicate the template.
  84
  85    Another case this code handles is threading through a "joiner" block.  In
  86    this case, we do not know the destination of the joiner block, but one
  87    of the outgoing edges from the joiner block leads to a threadable path.  This
  88    case largely works as outlined above, except the duplicate of the joiner
  89    block still contains a full set of outgoing edges and its control statement.
  90    We just redirect one of its outgoing edges to our jump threading path.  */
  91
  92
  93 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  94    all the incoming edges which thread to the same destination edge at
  95    the same time.  That avoids lots of table lookups to get information
  96    for the destination edge.
  97
  98    To realize that implementation we create a list of incoming edges
  99    which thread to the same outgoing edge.  Thus to implement steps
 100    #5 and #6 we traverse our hash table of outgoing edge information.
 101    For each entry we walk the list of incoming edges which thread to
 102    the current outgoing edge.  */
 103
 104 struct el
 105 {
 106   edge e;
 107   struct el *next;
 108 };
 109
 110 /* Main data structure recording information regarding B's duplicate
 111    blocks.  */
 112
 113 /* We need to efficiently record the unique thread destinations of this
 114    block and specific information associated with those destinations.  We
 115    may have many incoming edges threaded to the same outgoing edge.  This
 116    can be naturally implemented with a hash table.  */
 117
 118 struct redirection_data : free_ptr_hash<redirection_data>
 119 {
 120   /* We support wiring up two block duplicates in a jump threading path.
 121
 122      One is a normal block copy where we remove the control statement
 123      and wire up its single remaining outgoing edge to the thread path.
 124
 125      The other is a joiner block where we leave the control statement
 126      in place, but wire one of the outgoing edges to a thread path.
 127
 128      In theory we could have multiple block duplicates in a jump
 129      threading path, but I haven't tried that.
 130
 131      The duplicate blocks appear in this array in the same order in
 132      which they appear in the jump thread path.  */
 133   basic_block dup_blocks[2];
 134
 135   /* The jump threading path.  */
 136   vec<jump_thread_edge *> *path;
 137
 138   /* A list of incoming edges which we want to thread to the
 139      same path.  */
 140   struct el *incoming_edges;
 141
 142   /* hash_table support.  */
 143   static inline hashval_t hash (const redirection_data *);
 144   static inline int equal (const redirection_data *, const redirection_data *);
 145 };
 146
 147 /* Dump a jump threading path, including annotations about each
 148    edge in the path.  */
 149
 150 static void
 151 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 152                        bool registering)
 153 {
 154   fprintf (dump_file,
 155            "  %s%s jump thread: (%d, %d) incoming edge; ",
 156            (registering ? "Registering" : "Cancelling"),
 157            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 158            path[0]->e->src->index, path[0]->e->dest->index);
 159
 160   for (unsigned int i = 1; i < path.length (); i++)
 161     {
 162       /* We can get paths with a NULL edge when the final destination
 163          of a jump thread turns out to be a constant address.  We dump
 164          those paths when debugging, so we have to be prepared for that
 165          possibility here.  */
 166       if (path[i]->e == NULL)
 167         continue;
 168
 169       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 170         fprintf (dump_file, " (%d, %d) joiner; ",
 171                  path[i]->e->src->index, path[i]->e->dest->index);
 172       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 173        fprintf (dump_file, " (%d, %d) normal;",
 174                  path[i]->e->src->index, path[i]->e->dest->index);
 175       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 176        fprintf (dump_file, " (%d, %d) nocopy;",
 177                  path[i]->e->src->index, path[i]->e->dest->index);
 178       if (path[0]->type == EDGE_FSM_THREAD)
 179         fprintf (dump_file, " (%d, %d) ",
 180                  path[i]->e->src->index, path[i]->e->dest->index);
 181     }
 182   fputc ('\n', dump_file);
 183 }
 184
 185 /* Simple hashing function.  For any given incoming edge E, we're going
 186    to be most concerned with the final destination of its jump thread
 187    path.  So hash on the block index of the final edge in the path.  */
 188
 189 inline hashval_t
 190 redirection_data::hash (const redirection_data *p)
 191 {
 192   vec<jump_thread_edge *> *path = p->path;
 193   return path->last ()->e->dest->index;
 194 }
 195
 196 /* Given two hash table entries, return true if they have the same
 197    jump threading path.  */
 198 inline int
 199 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 200 {
 201   vec<jump_thread_edge *> *path1 = p1->path;
 202   vec<jump_thread_edge *> *path2 = p2->path;
 203
 204   if (path1->length () != path2->length ())
 205     return false;
 206
 207   for (unsigned int i = 1; i < path1->length (); i++)
 208     {
 209       if ((*path1)[i]->type != (*path2)[i]->type
 210           || (*path1)[i]->e != (*path2)[i]->e)
 211         return false;
 212     }
 213
 214   return true;
 215 }
 216
 217 /* Data structure of information to pass to hash table traversal routines.  */
 218 struct ssa_local_info_t
 219 {
 220   /* The current block we are working on.  */
 221   basic_block bb;
 222
 223   /* We only create a template block for the first duplicated block in a
 224      jump threading path as we may need many duplicates of that block.
 225
 226      The second duplicate block in a path is specific to that path.  Creating
 227      and sharing a template for that block is considerably more difficult.  */
 228   basic_block template_block;
 229
 230   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 231   bool jumps_threaded;
 232
 233   /* Blocks duplicated for the thread.  */
 234   bitmap duplicate_blocks;
 235 };
 236
 237 /* Passes which use the jump threading code register jump threading
 238    opportunities as they are discovered.  We keep the registered
 239    jump threading opportunities in this vector as edge pairs
 240    (original_edge, target_edge).  */
 241 static vec<vec<jump_thread_edge *> *> paths;
 242
 243 /* When we start updating the CFG for threading, data necessary for jump
 244    threading is attached to the AUX field for the incoming edge.  Use these
 245    macros to access the underlying structure attached to the AUX field.  */
 246 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 247
 248 /* Jump threading statistics.  */
 249
 250 struct thread_stats_d
 251 {
 252   unsigned long num_threaded_edges;
 253 };
 254
 255 struct thread_stats_d thread_stats;
 256
 257
 258 /* Remove the last statement in block BB if it is a control statement
 259    Also remove all outgoing edges except the edge which reaches DEST_BB.
 260    If DEST_BB is NULL, then remove all outgoing edges.  */
 261
 262 static void
 263 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 264 {
 265   gimple_stmt_iterator gsi;
 266   edge e;
 267   edge_iterator ei;
 268
 269   gsi = gsi_last_bb (bb);
 270
 271   /* If the duplicate ends with a control statement, then remove it.
 272
 273      Note that if we are duplicating the template block rather than the
 274      original basic block, then the duplicate might not have any real
 275      statements in it.  */
 276   if (!gsi_end_p (gsi)
 277       && gsi_stmt (gsi)
 278       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 279           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 280           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 281     gsi_remove (&gsi, true);
 282
 283   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 284     {
 285       if (e->dest != dest_bb)
 286         remove_edge (e);
 287       else
 288         ei_next (&ei);
 289     }
 290 }
 291
 292 /* Create a duplicate of BB.  Record the duplicate block in an array
 293    indexed by COUNT stored in RD.  */
 294
 295 static void
 296 create_block_for_threading (basic_block bb,
 297                             struct redirection_data *rd,
 298                             unsigned int count,
 299                             bitmap *duplicate_blocks)
 300 {
 301   edge_iterator ei;
 302   edge e;
 303
 304   /* We can use the generic block duplication code and simply remove
 305      the stuff we do not need.  */
 306   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 307
 308   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 309     e->aux = NULL;
 310
 311   /* Zero out the profile, since the block is unreachable for now.  */
 312   rd->dup_blocks[count]->frequency = 0;
 313   rd->dup_blocks[count]->count = 0;
 314   if (duplicate_blocks)
 315     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 316 }
 317
 318 /* Main data structure to hold information for duplicates of BB.  */
 319
 320 static hash_table<redirection_data> *redirection_data;
 321
 322 /* Given an outgoing edge E lookup and return its entry in our hash table.
 323
 324    If INSERT is true, then we insert the entry into the hash table if
 325    it is not already present.  INCOMING_EDGE is added to the list of incoming
 326    edges associated with E in the hash table.  */
 327
 328 static struct redirection_data *
 329 lookup_redirection_data (edge e, enum insert_option insert)
 330 {
 331   struct redirection_data **slot;
 332   struct redirection_data *elt;
 333   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 334
 335  /* Build a hash table element so we can see if E is already
 336      in the table.  */
 337   elt = XNEW (struct redirection_data);
 338   elt->path = path;
 339   elt->dup_blocks[0] = NULL;
 340   elt->dup_blocks[1] = NULL;
 341   elt->incoming_edges = NULL;
 342
 343   slot = redirection_data->find_slot (elt, insert);
 344
 345   /* This will only happen if INSERT is false and the entry is not
 346      in the hash table.  */
 347   if (slot == NULL)
 348     {
 349       free (elt);
 350       return NULL;
 351     }
 352
 353   /* This will only happen if E was not in the hash table and
 354      INSERT is true.  */
 355   if (*slot == NULL)
 356     {
 357       *slot = elt;
 358       elt->incoming_edges = XNEW (struct el);
 359       elt->incoming_edges->e = e;
 360       elt->incoming_edges->next = NULL;
 361       return elt;
 362     }
 363   /* E was in the hash table.  */
 364   else
 365     {
 366       /* Free ELT as we do not need it anymore, we will extract the
 367          relevant entry from the hash table itself.  */
 368       free (elt);
 369
 370       /* Get the entry stored in the hash table.  */
 371       elt = *slot;
 372
 373       /* If insertion was requested, then we need to add INCOMING_EDGE
 374          to the list of incoming edges associated with E.  */
 375       if (insert)
 376         {
 377           struct el *el = XNEW (struct el);
 378           el->next = elt->incoming_edges;
 379           el->e = e;
 380           elt->incoming_edges = el;
 381         }
 382
 383       return elt;
 384     }
 385 }
 386
 387 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 388    does not have a value associated with it.  */
 389
 390 static void
 391 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 392 {
 393   int src_idx = src_e->dest_idx;
 394   int tgt_idx = tgt_e->dest_idx;
 395
 396   /* Iterate over each PHI in e->dest.  */
 397   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 398                            gsi2 = gsi_start_phis (tgt_e->dest);
 399        !gsi_end_p (gsi);
 400        gsi_next (&gsi), gsi_next (&gsi2))
 401     {
 402       gphi *src_phi = gsi.phi ();
 403       gphi *dest_phi = gsi2.phi ();
 404       tree val = gimple_phi_arg_def (src_phi, src_idx);
 405       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 406
 407       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 408       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 409     }
 410 }
 411
 412 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 413    to see if it has constant value in a flow sensitive manner.  Set
 414    LOCUS to location of the constant phi arg and return the value.
 415    Return DEF directly if either PATH or idx is ZERO.  */
 416
 417 static tree
 418 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 419                          basic_block bb, int idx, source_location *locus)
 420 {
 421   tree arg;
 422   gphi *def_phi;
 423   basic_block def_bb;
 424
 425   if (path == NULL || idx == 0)
 426     return def;
 427
 428   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 429   if (!def_phi)
 430     return def;
 431
 432   def_bb = gimple_bb (def_phi);
 433   /* Don't propagate loop invariants into deeper loops.  */
 434   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 435     return def;
 436
 437   /* Backtrack jump threading path from IDX to see if def has constant
 438      value.  */
 439   for (int j = idx - 1; j >= 0; j--)
 440     {
 441       edge e = (*path)[j]->e;
 442       if (e->dest == def_bb)
 443         {
 444           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 445           if (is_gimple_min_invariant (arg))
 446             {
 447               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 448               return arg;
 449             }
 450           break;
 451         }
 452     }
 453
 454   return def;
 455 }
 456
 457 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 458    Try to backtrack jump threading PATH from node IDX to see if the arg
 459    has constant value, copy constant value instead of argument itself
 460    if yes.  */
 461
 462 static void
 463 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 464                vec<jump_thread_edge *> *path, int idx)
 465 {
 466   gphi_iterator gsi;
 467   int src_indx = src_e->dest_idx;
 468
 469   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 470     {
 471       gphi *phi = gsi.phi ();
 472       tree def = gimple_phi_arg_def (phi, src_indx);
 473       source_location locus = gimple_phi_arg_location (phi, src_indx);
 474
 475       if (TREE_CODE (def) == SSA_NAME
 476           && !virtual_operand_p (gimple_phi_result (phi)))
 477         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 478
 479       add_phi_arg (phi, def, tgt_e, locus);
 480     }
 481 }
 482
 483 /* We have recently made a copy of ORIG_BB, including its outgoing
 484    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 485    ORIG_BB has a new argument associated with edge from NEW_BB to the
 486    successor.  Initialize the PHI argument so that it is equal to the PHI
 487    argument associated with the edge from ORIG_BB to the successor.
 488    PATH and IDX are used to check if the new PHI argument has constant
 489    value in a flow sensitive manner.  */
 490
 491 static void
 492 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 493                          vec<jump_thread_edge *> *path, int idx)
 494 {
 495   edge_iterator ei;
 496   edge e;
 497
 498   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 499     {
 500       edge e2 = find_edge (new_bb, e->dest);
 501       copy_phi_args (e->dest, e, e2, path, idx);
 502     }
 503 }
 504
 505 /* Given a duplicate block and its single destination (both stored
 506    in RD).  Create an edge between the duplicate and its single
 507    destination.
 508
 509    Add an additional argument to any PHI nodes at the single
 510    destination.  IDX is the start node in jump threading path
 511    we start to check to see if the new PHI argument has constant
 512    value along the jump threading path.  */
 513
 514 static void
 515 create_edge_and_update_destination_phis (struct redirection_data *rd,
 516                                          basic_block bb, int idx)
 517 {
 518   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 519
 520   rescan_loop_exit (e, true, false);
 521   e->probability = REG_BR_PROB_BASE;
 522   e->count = bb->count;
 523
 524   /* We used to copy the thread path here.  That was added in 2007
 525      and dutifully updated through the representation changes in 2013.
 526
 527      In 2013 we added code to thread from an interior node through
 528      the backedge to another interior node.  That runs after the code
 529      to thread through loop headers from outside the loop.
 530
 531      The latter may delete edges in the CFG, including those
 532      which appeared in the jump threading path we copied here.  Thus
 533      we'd end up using a dangling pointer.
 534
 535      After reviewing the 2007/2011 code, I can't see how anything
 536      depended on copying the AUX field and clearly copying the jump
 537      threading path is problematical due to embedded edge pointers.
 538      It has been removed.  */
 539   e->aux = NULL;
 540
 541   /* If there are any PHI nodes at the destination of the outgoing edge
 542      from the duplicate block, then we will need to add a new argument
 543      to them.  The argument should have the same value as the argument
 544      associated with the outgoing edge stored in RD.  */
 545   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 546 }
 547
 548 /* Look through PATH beginning at START and return TRUE if there are
 549    any additional blocks that need to be duplicated.  Otherwise,
 550    return FALSE.  */
 551 static bool
 552 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 553                                  unsigned int start)
 554 {
 555   for (unsigned int i = start + 1; i < path->length (); i++)
 556     {
 557       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 558           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 559         return true;
 560     }
 561   return false;
 562 }
 563
 564
 565 /* Compute the amount of profile count/frequency coming into the jump threading
 566    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 567    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 568    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 569    identify blocks duplicated for jump threading, which have duplicated
 570    edges that need to be ignored in the analysis.  Return true if path contains
 571    a joiner, false otherwise.
 572
 573    In the non-joiner case, this is straightforward - all the counts/frequency
 574    flowing into the jump threading path should flow through the duplicated
 575    block and out of the duplicated path.
 576
 577    In the joiner case, it is very tricky.  Some of the counts flowing into
 578    the original path go offpath at the joiner.  The problem is that while
 579    we know how much total count goes off-path in the original control flow,
 580    we don't know how many of the counts corresponding to just the jump
 581    threading path go offpath at the joiner.
 582
 583    For example, assume we have the following control flow and identified
 584    jump threading paths:
 585
 586                 A     B     C
 587                  \    |    /
 588                Ea \   |Eb / Ec
 589                    \  |  /
 590                     v v v
 591                       J       <-- Joiner
 592                      / \
 593                 Eoff/   \Eon
 594                    /     \
 595                   v       v
 596                 Soff     Son  <--- Normal
 597                          /\
 598                       Ed/  \ Ee
 599                        /    \
 600                       v     v
 601                       D      E
 602
 603             Jump threading paths: A -> J -> Son -> D (path 1)
 604                                   C -> J -> Son -> E (path 2)
 605
 606    Note that the control flow could be more complicated:
 607    - Each jump threading path may have more than one incoming edge.  I.e. A and
 608    Ea could represent multiple incoming blocks/edges that are included in
 609    path 1.
 610    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 611    before or after the "normal" copy block).  These are not duplicated onto
 612    the jump threading path, as they are single-successor.
 613    - Any of the blocks along the path may have other incoming edges that
 614    are not part of any jump threading path, but add profile counts along
 615    the path.
 616
 617    In the aboe example, after all jump threading is complete, we will
 618    end up with the following control flow:
 619
 620                 A         B         C
 621                 |         |         |
 622               Ea|         |Eb     |Ec
 623                 |         |         |
 624                 v         v         v
 625                Ja         J        Jc
 626                / \      / \Eon'     / \
 627           Eona/   \   ---/---\--------   \Eonc
 628              /     \ /  /     \    \
 629             v       v  v       v          v
 630            Sona     Soff      Son       Sonc
 631              \           /\      /
 632               \___________    /  \  _____/
 633                           \  /    \/
 634                            vv      v
 635                             D      E
 636
 637    The main issue to notice here is that when we are processing path 1
 638    (A->J->Son->D) we need to figure out the outgoing edge weights to
 639    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 640    sum of the incoming weights to D remain Ed.  The problem with simply
 641    assuming that Ja (and Jc when processing path 2) has the same outgoing
 642    probabilities to its successors as the original block J, is that after
 643    all paths are processed and other edges/counts removed (e.g. none
 644    of Ec will reach D after processing path 2), we may end up with not
 645    enough count flowing along duplicated edge Sona->D.
 646
 647    Therefore, in the case of a joiner, we keep track of all counts
 648    coming in along the current path, as well as from predecessors not
 649    on any jump threading path (Eb in the above example).  While we
 650    first assume that the duplicated Eona for Ja->Sona has the same
 651    probability as the original, we later compensate for other jump
 652    threading paths that may eliminate edges.  We do that by keep track
 653    of all counts coming into the original path that are not in a jump
 654    thread (Eb in the above example, but as noted earlier, there could
 655    be other predecessors incoming to the path at various points, such
 656    as at Son).  Call this cumulative non-path count coming into the path
 657    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 658    least as big as (Ed - Enonpath), but no bigger than the minimum
 659    weight along the jump threading path.  The probabilities of both the
 660    original and duplicated joiner block J and Ja will be adjusted
 661    accordingly after the updates.  */
 662
 663 static bool
 664 compute_path_counts (struct redirection_data *rd,
 665                      ssa_local_info_t *local_info,
 666                      gcov_type *path_in_count_ptr,
 667                      gcov_type *path_out_count_ptr,
 668                      int *path_in_freq_ptr)
 669 {
 670   edge e = rd->incoming_edges->e;
 671   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 672   edge elast = path->last ()->e;
 673   gcov_type nonpath_count = 0;
 674   bool has_joiner = false;
 675   gcov_type path_in_count = 0;
 676   int path_in_freq = 0;
 677
 678   /* Start by accumulating incoming edge counts to the path's first bb
 679      into a couple buckets:
 680         path_in_count: total count of incoming edges that flow into the
 681                   current path.
 682         nonpath_count: total count of incoming edges that are not
 683                   flowing along *any* path.  These are the counts
 684                   that will still flow along the original path after
 685                   all path duplication is done by potentially multiple
 686                   calls to this routine.
 687      (any other incoming edge counts are for a different jump threading
 688      path that will be handled by a later call to this routine.)
 689      To make this easier, start by recording all incoming edges that flow into
 690      the current path in a bitmap.  We could add up the path's incoming edge
 691      counts here, but we still need to walk all the first bb's incoming edges
 692      below to add up the counts of the other edges not included in this jump
 693      threading path.  */
 694   struct el *next, *el;
 695   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 696   for (el = rd->incoming_edges; el; el = next)
 697     {
 698       next = el->next;
 699       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 700     }
 701   edge ein;
 702   edge_iterator ei;
 703   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 704     {
 705       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 706       /* Simply check the incoming edge src against the set captured above.  */
 707       if (ein_path
 708           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 709         {
 710           /* It is necessary but not sufficient that the last path edges
 711              are identical.  There may be different paths that share the
 712              same last path edge in the case where the last edge has a nocopy
 713              source block.  */
 714           gcc_assert (ein_path->last ()->e == elast);
 715           path_in_count += ein->count;
 716           path_in_freq += EDGE_FREQUENCY (ein);
 717         }
 718       else if (!ein_path)
 719         {
 720           /* Keep track of the incoming edges that are not on any jump-threading
 721              path.  These counts will still flow out of original path after all
 722              jump threading is complete.  */
 723             nonpath_count += ein->count;
 724         }
 725     }
 726
 727   /* This is needed due to insane incoming frequencies.  */
 728   if (path_in_freq > BB_FREQ_MAX)
 729     path_in_freq = BB_FREQ_MAX;
 730
 731   BITMAP_FREE (in_edge_srcs);
 732
 733   /* Now compute the fraction of the total count coming into the first
 734      path bb that is from the current threading path.  */
 735   gcov_type total_count = e->dest->count;
 736   /* Handle incoming profile insanities.  */
 737   if (total_count < path_in_count)
 738     path_in_count = total_count;
 739   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 740
 741   /* Walk the entire path to do some more computation in order to estimate
 742      how much of the path_in_count will flow out of the duplicated threading
 743      path.  In the non-joiner case this is straightforward (it should be
 744      the same as path_in_count, although we will handle incoming profile
 745      insanities by setting it equal to the minimum count along the path).
 746
 747      In the joiner case, we need to estimate how much of the path_in_count
 748      will stay on the threading path after the joiner's conditional branch.
 749      We don't really know for sure how much of the counts
 750      associated with this path go to each successor of the joiner, but we'll
 751      estimate based on the fraction of the total count coming into the path
 752      bb was from the threading paths (computed above in onpath_scale).
 753      Afterwards, we will need to do some fixup to account for other threading
 754      paths and possible profile insanities.
 755
 756      In order to estimate the joiner case's counts we also need to update
 757      nonpath_count with any additional counts coming into the path.  Other
 758      blocks along the path may have additional predecessors from outside
 759      the path.  */
 760   gcov_type path_out_count = path_in_count;
 761   gcov_type min_path_count = path_in_count;
 762   for (unsigned int i = 1; i < path->length (); i++)
 763     {
 764       edge epath = (*path)[i]->e;
 765       gcov_type cur_count = epath->count;
 766       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 767         {
 768           has_joiner = true;
 769           cur_count = apply_probability (cur_count, onpath_scale);
 770         }
 771       /* In the joiner case we need to update nonpath_count for any edges
 772          coming into the path that will contribute to the count flowing
 773          into the path successor.  */
 774       if (has_joiner && epath != elast)
 775       {
 776         /* Look for other incoming edges after joiner.  */
 777         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 778           {
 779             if (ein != epath
 780                 /* Ignore in edges from blocks we have duplicated for a
 781                    threading path, which have duplicated edge counts until
 782                    they are redirected by an invocation of this routine.  */
 783                 && !bitmap_bit_p (local_info->duplicate_blocks,
 784                                   ein->src->index))
 785               nonpath_count += ein->count;
 786           }
 787       }
 788       if (cur_count < path_out_count)
 789         path_out_count = cur_count;
 790       if (epath->count < min_path_count)
 791         min_path_count = epath->count;
 792     }
 793
 794   /* We computed path_out_count above assuming that this path targeted
 795      the joiner's on-path successor with the same likelihood as it
 796      reached the joiner.  However, other thread paths through the joiner
 797      may take a different path through the normal copy source block
 798      (i.e. they have a different elast), meaning that they do not
 799      contribute any counts to this path's elast.  As a result, it may
 800      turn out that this path must have more count flowing to the on-path
 801      successor of the joiner.  Essentially, all of this path's elast
 802      count must be contributed by this path and any nonpath counts
 803      (since any path through the joiner with a different elast will not
 804      include a copy of this elast in its duplicated path).
 805      So ensure that this path's path_out_count is at least the
 806      difference between elast->count and nonpath_count.  Otherwise the edge
 807      counts after threading will not be sane.  */
 808   if (has_joiner && path_out_count < elast->count - nonpath_count)
 809   {
 810     path_out_count = elast->count - nonpath_count;
 811     /* But neither can we go above the minimum count along the path
 812        we are duplicating.  This can be an issue due to profile
 813        insanities coming in to this pass.  */
 814     if (path_out_count > min_path_count)
 815       path_out_count = min_path_count;
 816   }
 817
 818   *path_in_count_ptr = path_in_count;
 819   *path_out_count_ptr = path_out_count;
 820   *path_in_freq_ptr = path_in_freq;
 821   return has_joiner;
 822 }
 823
 824
 825 /* Update the counts and frequencies for both an original path
 826    edge EPATH and its duplicate EDUP.  The duplicate source block
 827    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 828    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 829 static void
 830 update_profile (edge epath, edge edup, gcov_type path_in_count,
 831                 gcov_type path_out_count, int path_in_freq)
 832 {
 833
 834   /* First update the duplicated block's count / frequency.  */
 835   if (edup)
 836     {
 837       basic_block dup_block = edup->src;
 838       gcc_assert (dup_block->count == 0);
 839       gcc_assert (dup_block->frequency == 0);
 840       dup_block->count = path_in_count;
 841       dup_block->frequency = path_in_freq;
 842     }
 843
 844   /* Now update the original block's count and frequency in the
 845      opposite manner - remove the counts/freq that will flow
 846      into the duplicated block.  Handle underflow due to precision/
 847      rounding issues.  */
 848   epath->src->count -= path_in_count;
 849   if (epath->src->count < 0)
 850     epath->src->count = 0;
 851   epath->src->frequency -= path_in_freq;
 852   if (epath->src->frequency < 0)
 853     epath->src->frequency = 0;
 854
 855   /* Next update this path edge's original and duplicated counts.  We know
 856      that the duplicated path will have path_out_count flowing
 857      out of it (in the joiner case this is the count along the duplicated path
 858      out of the duplicated joiner).  This count can then be removed from the
 859      original path edge.  */
 860   if (edup)
 861     edup->count = path_out_count;
 862   epath->count -= path_out_count;
 863   gcc_assert (epath->count >= 0);
 864 }
 865
 866
 867 /* The duplicate and original joiner blocks may end up with different
 868    probabilities (different from both the original and from each other).
 869    Recompute the probabilities here once we have updated the edge
 870    counts and frequencies.  */
 871
 872 static void
 873 recompute_probabilities (basic_block bb)
 874 {
 875   edge esucc;
 876   edge_iterator ei;
 877   FOR_EACH_EDGE (esucc, ei, bb->succs)
 878     {
 879       if (!bb->count)
 880         continue;
 881
 882       /* Prevent overflow computation due to insane profiles.  */
 883       if (esucc->count < bb->count)
 884         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 885                                                  bb->count);
 886       else
 887         /* Can happen with missing/guessed probabilities, since we
 888            may determine that more is flowing along duplicated
 889            path than joiner succ probabilities allowed.
 890            Counts and freqs will be insane after jump threading,
 891            at least make sure probability is sane or we will
 892            get a flow verification error.
 893            Not much we can do to make counts/freqs sane without
 894            redoing the profile estimation.  */
 895         esucc->probability = REG_BR_PROB_BASE;
 896     }
 897 }
 898
 899
 900 /* Update the counts of the original and duplicated edges from a joiner
 901    that go off path, given that we have already determined that the
 902    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 903    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 904    edge from joiner is EPATH.  */
 905
 906 static void
 907 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 908                               gcov_type path_in_count,
 909                               gcov_type path_out_count)
 910 {
 911   /* Compute the count that currently flows off path from the joiner.
 912      In other words, the total count of joiner's out edges other than
 913      epath.  Compute this by walking the successors instead of
 914      subtracting epath's count from the joiner bb count, since there
 915      are sometimes slight insanities where the total out edge count is
 916      larger than the bb count (possibly due to rounding/truncation
 917      errors).  */
 918   gcov_type total_orig_off_path_count = 0;
 919   edge enonpath;
 920   edge_iterator ei;
 921   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 922     {
 923       if (enonpath == epath)
 924         continue;
 925       total_orig_off_path_count += enonpath->count;
 926     }
 927
 928   /* For the path that we are duplicating, the amount that will flow
 929      off path from the duplicated joiner is the delta between the
 930      path's cumulative in count and the portion of that count we
 931      estimated above as flowing from the joiner along the duplicated
 932      path.  */
 933   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 934
 935   /* Now do the actual updates of the off-path edges.  */
 936   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 937     {
 938       /* Look for edges going off of the threading path.  */
 939       if (enonpath == epath)
 940         continue;
 941
 942       /* Find the corresponding edge out of the duplicated joiner.  */
 943       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 944       gcc_assert (enonpathdup);
 945
 946       /* We can't use the original probability of the joiner's out
 947          edges, since the probabilities of the original branch
 948          and the duplicated branches may vary after all threading is
 949          complete.  But apportion the duplicated joiner's off-path
 950          total edge count computed earlier (total_dup_off_path_count)
 951          among the duplicated off-path edges based on their original
 952          ratio to the full off-path count (total_orig_off_path_count).
 953          */
 954       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 955                                       total_orig_off_path_count);
 956       /* Give the duplicated offpath edge a portion of the duplicated
 957          total.  */
 958       enonpathdup->count = apply_scale (scale,
 959                                         total_dup_off_path_count);
 960       /* Now update the original offpath edge count, handling underflow
 961          due to rounding errors.  */
 962       enonpath->count -= enonpathdup->count;
 963       if (enonpath->count < 0)
 964         enonpath->count = 0;
 965     }
 966 }
 967
 968
 969 /* Check if the paths through RD all have estimated frequencies but zero
 970    profile counts.  This is more accurate than checking the entry block
 971    for a zero profile count, since profile insanities sometimes creep in.  */
 972
 973 static bool
 974 estimated_freqs_path (struct redirection_data *rd)
 975 {
 976   edge e = rd->incoming_edges->e;
 977   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 978   edge ein;
 979   edge_iterator ei;
 980   bool non_zero_freq = false;
 981   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 982     {
 983       if (ein->count)
 984         return false;
 985       non_zero_freq |= ein->src->frequency != 0;
 986     }
 987
 988   for (unsigned int i = 1; i < path->length (); i++)
 989     {
 990       edge epath = (*path)[i]->e;
 991       if (epath->src->count)
 992         return false;
 993       non_zero_freq |= epath->src->frequency != 0;
 994       edge esucc;
 995       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 996         {
 997           if (esucc->count)
 998             return false;
 999           non_zero_freq |= esucc->src->frequency != 0;
1000         }
1001     }
1002   return non_zero_freq;
1003 }
1004
1005
1006 /* Invoked for routines that have guessed frequencies and no profile
1007    counts to record the block and edge frequencies for paths through RD
1008    in the profile count fields of those blocks and edges.  This is because
1009    ssa_fix_duplicate_block_edges incrementally updates the block and
1010    edge counts as edges are redirected, and it is difficult to do that
1011    for edge frequencies which are computed on the fly from the source
1012    block frequency and probability.  When a block frequency is updated
1013    its outgoing edge frequencies are affected and become difficult to
1014    adjust.  */
1015
1016 static void
1017 freqs_to_counts_path (struct redirection_data *rd)
1018 {
1019   edge e = rd->incoming_edges->e;
1020   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1021   edge ein;
1022   edge_iterator ei;
1023   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1024     {
1025       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1026          errors applying the probability when the frequencies are very
1027          small.  */
1028       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1029                                       ein->probability);
1030     }
1031
1032   for (unsigned int i = 1; i < path->length (); i++)
1033     {
1034       edge epath = (*path)[i]->e;
1035       edge esucc;
1036       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1037          errors applying the edge probability when the frequencies are very
1038          small.  */
1039       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1040       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1041         esucc->count = apply_probability (esucc->src->count,
1042                                           esucc->probability);
1043     }
1044 }
1045
1046
1047 /* For routines that have guessed frequencies and no profile counts, where we
1048    used freqs_to_counts_path to record block and edge frequencies for paths
1049    through RD, we clear the counts after completing all updates for RD.
1050    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1051    but the block frequencies and edge probabilities were updated as well,
1052    so we can simply clear the count fields.  */
1053
1054 static void
1055 clear_counts_path (struct redirection_data *rd)
1056 {
1057   edge e = rd->incoming_edges->e;
1058   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1059   edge ein, esucc;
1060   edge_iterator ei;
1061   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1062     ein->count = 0;
1063
1064   /* First clear counts along original path.  */
1065   for (unsigned int i = 1; i < path->length (); i++)
1066     {
1067       edge epath = (*path)[i]->e;
1068       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1069         esucc->count = 0;
1070       epath->src->count = 0;
1071     }
1072   /* Also need to clear the counts along duplicated path.  */
1073   for (unsigned int i = 0; i < 2; i++)
1074     {
1075       basic_block dup = rd->dup_blocks[i];
1076       if (!dup)
1077         continue;
1078       FOR_EACH_EDGE (esucc, ei, dup->succs)
1079         esucc->count = 0;
1080       dup->count = 0;
1081     }
1082 }
1083
1084 /* Wire up the outgoing edges from the duplicate blocks and
1085    update any PHIs as needed.  Also update the profile counts
1086    on the original and duplicate blocks and edges.  */
1087 void
1088 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1089                                ssa_local_info_t *local_info)
1090 {
1091   bool multi_incomings = (rd->incoming_edges->next != NULL);
1092   edge e = rd->incoming_edges->e;
1093   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1094   edge elast = path->last ()->e;
1095   gcov_type path_in_count = 0;
1096   gcov_type path_out_count = 0;
1097   int path_in_freq = 0;
1098
1099   /* This routine updates profile counts, frequencies, and probabilities
1100      incrementally. Since it is difficult to do the incremental updates
1101      using frequencies/probabilities alone, for routines without profile
1102      data we first take a snapshot of the existing block and edge frequencies
1103      by copying them into the empty profile count fields.  These counts are
1104      then used to do the incremental updates, and cleared at the end of this
1105      routine.  If the function is marked as having a profile, we still check
1106      to see if the paths through RD are using estimated frequencies because
1107      the routine had zero profile counts.  */
1108   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1109                              || estimated_freqs_path (rd));
1110   if (do_freqs_to_counts)
1111     freqs_to_counts_path (rd);
1112
1113   /* First determine how much profile count to move from original
1114      path to the duplicate path.  This is tricky in the presence of
1115      a joiner (see comments for compute_path_counts), where some portion
1116      of the path's counts will flow off-path from the joiner.  In the
1117      non-joiner case the path_in_count and path_out_count should be the
1118      same.  */
1119   bool has_joiner = compute_path_counts (rd, local_info,
1120                                          &path_in_count, &path_out_count,
1121                                          &path_in_freq);
1122
1123   int cur_path_freq = path_in_freq;
1124   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1125     {
1126       edge epath = (*path)[i]->e;
1127
1128       /* If we were threading through an joiner block, then we want
1129          to keep its control statement and redirect an outgoing edge.
1130          Else we want to remove the control statement & edges, then create
1131          a new outgoing edge.  In both cases we may need to update PHIs.  */
1132       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1133         {
1134           edge victim;
1135           edge e2;
1136
1137           gcc_assert (has_joiner);
1138
1139           /* This updates the PHIs at the destination of the duplicate
1140              block.  Pass 0 instead of i if we are threading a path which
1141              has multiple incoming edges.  */
1142           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1143                                    path, multi_incomings ? 0 : i);
1144
1145           /* Find the edge from the duplicate block to the block we're
1146              threading through.  That's the edge we want to redirect.  */
1147           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1148
1149           /* If there are no remaining blocks on the path to duplicate,
1150              then redirect VICTIM to the final destination of the jump
1151              threading path.  */
1152           if (!any_remaining_duplicated_blocks (path, i))
1153             {
1154               e2 = redirect_edge_and_branch (victim, elast->dest);
1155               /* If we redirected the edge, then we need to copy PHI arguments
1156                  at the target.  If the edge already existed (e2 != victim
1157                  case), then the PHIs in the target already have the correct
1158                  arguments.  */
1159               if (e2 == victim)
1160                 copy_phi_args (e2->dest, elast, e2,
1161                                path, multi_incomings ? 0 : i);
1162             }
1163           else
1164             {
1165               /* Redirect VICTIM to the next duplicated block in the path.  */
1166               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1167
1168               /* We need to update the PHIs in the next duplicated block.  We
1169                  want the new PHI args to have the same value as they had
1170                  in the source of the next duplicate block.
1171
1172                  Thus, we need to know which edge we traversed into the
1173                  source of the duplicate.  Furthermore, we may have
1174                  traversed many edges to reach the source of the duplicate.
1175
1176                  Walk through the path starting at element I until we
1177                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1178                  the edge from the prior element.  */
1179               for (unsigned int j = i + 1; j < path->length (); j++)
1180                 {
1181                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1182                     {
1183                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1184                       break;
1185                     }
1186                 }
1187             }
1188
1189           /* Update the counts and frequency of both the original block
1190              and path edge, and the duplicates.  The path duplicate's
1191              incoming count and frequency are the totals for all edges
1192              incoming to this jump threading path computed earlier.
1193              And we know that the duplicated path will have path_out_count
1194              flowing out of it (i.e. along the duplicated path out of the
1195              duplicated joiner).  */
1196           update_profile (epath, e2, path_in_count, path_out_count,
1197                           path_in_freq);
1198
1199           /* Next we need to update the counts of the original and duplicated
1200              edges from the joiner that go off path.  */
1201           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1202                                         path_out_count);
1203
1204           /* Finally, we need to set the probabilities on the duplicated
1205              edges out of the duplicated joiner (e2->src).  The probabilities
1206              along the original path will all be updated below after we finish
1207              processing the whole path.  */
1208           recompute_probabilities (e2->src);
1209
1210           /* Record the frequency flowing to the downstream duplicated
1211              path blocks.  */
1212           cur_path_freq = EDGE_FREQUENCY (e2);
1213         }
1214       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1215         {
1216           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1217           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1218                                                    multi_incomings ? 0 : i);
1219           if (count == 1)
1220             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1221
1222           /* Update the counts and frequency of both the original block
1223              and path edge, and the duplicates.  Since we are now after
1224              any joiner that may have existed on the path, the count
1225              flowing along the duplicated threaded path is path_out_count.
1226              If we didn't have a joiner, then cur_path_freq was the sum
1227              of the total frequencies along all incoming edges to the
1228              thread path (path_in_freq).  If we had a joiner, it would have
1229              been updated at the end of that handling to the edge frequency
1230              along the duplicated joiner path edge.  */
1231           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1232                           path_out_count, path_out_count,
1233                           cur_path_freq);
1234         }
1235       else
1236         {
1237           /* No copy case.  In this case we don't have an equivalent block
1238              on the duplicated thread path to update, but we do need
1239              to remove the portion of the counts/freqs that were moved
1240              to the duplicated path from the counts/freqs flowing through
1241              this block on the original path.  Since all the no-copy edges
1242              are after any joiner, the removed count is the same as
1243              path_out_count.
1244
1245              If we didn't have a joiner, then cur_path_freq was the sum
1246              of the total frequencies along all incoming edges to the
1247              thread path (path_in_freq).  If we had a joiner, it would have
1248              been updated at the end of that handling to the edge frequency
1249              along the duplicated joiner path edge.  */
1250              update_profile (epath, NULL, path_out_count, path_out_count,
1251                              cur_path_freq);
1252         }
1253
1254       /* Increment the index into the duplicated path when we processed
1255          a duplicated block.  */
1256       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1257           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1258       {
1259           count++;
1260       }
1261     }
1262
1263   /* Now walk orig blocks and update their probabilities, since the
1264      counts and freqs should be updated properly by above loop.  */
1265   for (unsigned int i = 1; i < path->length (); i++)
1266     {
1267       edge epath = (*path)[i]->e;
1268       recompute_probabilities (epath->src);
1269     }
1270
1271   /* Done with all profile and frequency updates, clear counts if they
1272      were copied.  */
1273   if (do_freqs_to_counts)
1274     clear_counts_path (rd);
1275 }
1276
1277 /* Hash table traversal callback routine to create duplicate blocks.  */
1278
1279 int
1280 ssa_create_duplicates (struct redirection_data **slot,
1281                        ssa_local_info_t *local_info)
1282 {
1283   struct redirection_data *rd = *slot;
1284
1285   /* The second duplicated block in a jump threading path is specific
1286      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1287
1288      Each time we're called, we have to look through the path and see
1289      if a second block needs to be duplicated.
1290
1291      Note the search starts with the third edge on the path.  The first
1292      edge is the incoming edge, the second edge always has its source
1293      duplicated.  Thus we start our search with the third edge.  */
1294   vec<jump_thread_edge *> *path = rd->path;
1295   for (unsigned int i = 2; i < path->length (); i++)
1296     {
1297       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1298           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1299         {
1300           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1301                                       &local_info->duplicate_blocks);
1302           break;
1303         }
1304     }
1305
1306   /* Create a template block if we have not done so already.  Otherwise
1307      use the template to create a new block.  */
1308   if (local_info->template_block == NULL)
1309     {
1310       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1311                                   &local_info->duplicate_blocks);
1312       local_info->template_block = rd->dup_blocks[0];
1313
1314       /* We do not create any outgoing edges for the template.  We will
1315          take care of that in a later traversal.  That way we do not
1316          create edges that are going to just be deleted.  */
1317     }
1318   else
1319     {
1320       create_block_for_threading (local_info->template_block, rd, 0,
1321                                   &local_info->duplicate_blocks);
1322
1323       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1324          block.   */
1325       ssa_fix_duplicate_block_edges (rd, local_info);
1326     }
1327
1328   /* Keep walking the hash table.  */
1329   return 1;
1330 }
1331
1332 /* We did not create any outgoing edges for the template block during
1333    block creation.  This hash table traversal callback creates the
1334    outgoing edge for the template block.  */
1335
1336 inline int
1337 ssa_fixup_template_block (struct redirection_data **slot,
1338                           ssa_local_info_t *local_info)
1339 {
1340   struct redirection_data *rd = *slot;
1341
1342   /* If this is the template block halt the traversal after updating
1343      it appropriately.
1344
1345      If we were threading through an joiner block, then we want
1346      to keep its control statement and redirect an outgoing edge.
1347      Else we want to remove the control statement & edges, then create
1348      a new outgoing edge.  In both cases we may need to update PHIs.  */
1349   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1350     {
1351       ssa_fix_duplicate_block_edges (rd, local_info);
1352       return 0;
1353     }
1354
1355   return 1;
1356 }
1357
1358 /* Hash table traversal callback to redirect each incoming edge
1359    associated with this hash table element to its new destination.  */
1360
1361 int
1362 ssa_redirect_edges (struct redirection_data **slot,
1363                     ssa_local_info_t *local_info)
1364 {
1365   struct redirection_data *rd = *slot;
1366   struct el *next, *el;
1367
1368   /* Walk over all the incoming edges associated associated with this
1369      hash table entry.  */
1370   for (el = rd->incoming_edges; el; el = next)
1371     {
1372       edge e = el->e;
1373       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1374
1375       /* Go ahead and free this element from the list.  Doing this now
1376          avoids the need for another list walk when we destroy the hash
1377          table.  */
1378       next = el->next;
1379       free (el);
1380
1381       thread_stats.num_threaded_edges++;
1382
1383       if (rd->dup_blocks[0])
1384         {
1385           edge e2;
1386
1387           if (dump_file && (dump_flags & TDF_DETAILS))
1388             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1389                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1390
1391           /* If we redirect a loop latch edge cancel its loop.  */
1392           if (e->src == e->src->loop_father->latch)
1393             mark_loop_for_removal (e->src->loop_father);
1394
1395           /* Redirect the incoming edge (possibly to the joiner block) to the
1396              appropriate duplicate block.  */
1397           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1398           gcc_assert (e == e2);
1399           flush_pending_stmts (e2);
1400         }
1401
1402       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1403          to clear it will cause all kinds of unpleasant problems later.  */
1404       delete_jump_thread_path (path);
1405       e->aux = NULL;
1406
1407     }
1408
1409   /* Indicate that we actually threaded one or more jumps.  */
1410   if (rd->incoming_edges)
1411     local_info->jumps_threaded = true;
1412
1413   return 1;
1414 }
1415
1416 /* Return true if this block has no executable statements other than
1417    a simple ctrl flow instruction.  When the number of outgoing edges
1418    is one, this is equivalent to a "forwarder" block.  */
1419
1420 static bool
1421 redirection_block_p (basic_block bb)
1422 {
1423   gimple_stmt_iterator gsi;
1424
1425   /* Advance to the first executable statement.  */
1426   gsi = gsi_start_bb (bb);
1427   while (!gsi_end_p (gsi)
1428          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1429              || is_gimple_debug (gsi_stmt (gsi))
1430              || gimple_nop_p (gsi_stmt (gsi))
1431              || gimple_clobber_p (gsi_stmt (gsi))))
1432     gsi_next (&gsi);
1433
1434   /* Check if this is an empty block.  */
1435   if (gsi_end_p (gsi))
1436     return true;
1437
1438   /* Test that we've reached the terminating control statement.  */
1439   return gsi_stmt (gsi)
1440          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1441              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1442              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1443 }
1444
1445 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1446    is reached via one or more specific incoming edges, we know which
1447    outgoing edge from BB will be traversed.
1448
1449    We want to redirect those incoming edges to the target of the
1450    appropriate outgoing edge.  Doing so avoids a conditional branch
1451    and may expose new optimization opportunities.  Note that we have
1452    to update dominator tree and SSA graph after such changes.
1453
1454    The key to keeping the SSA graph update manageable is to duplicate
1455    the side effects occurring in BB so that those side effects still
1456    occur on the paths which bypass BB after redirecting edges.
1457
1458    We accomplish this by creating duplicates of BB and arranging for
1459    the duplicates to unconditionally pass control to one specific
1460    successor of BB.  We then revector the incoming edges into BB to
1461    the appropriate duplicate of BB.
1462
1463    If NOLOOP_ONLY is true, we only perform the threading as long as it
1464    does not affect the structure of the loops in a nontrivial way.
1465
1466    If JOINERS is true, then thread through joiner blocks as well.  */
1467
1468 static bool
1469 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1470 {
1471   /* E is an incoming edge into BB that we may or may not want to
1472      redirect to a duplicate of BB.  */
1473   edge e, e2;
1474   edge_iterator ei;
1475   ssa_local_info_t local_info;
1476
1477   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1478
1479   /* To avoid scanning a linear array for the element we need we instead
1480      use a hash table.  For normal code there should be no noticeable
1481      difference.  However, if we have a block with a large number of
1482      incoming and outgoing edges such linear searches can get expensive.  */
1483   redirection_data
1484     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1485
1486   /* Record each unique threaded destination into a hash table for
1487      efficient lookups.  */
1488   FOR_EACH_EDGE (e, ei, bb->preds)
1489     {
1490       if (e->aux == NULL)
1491         continue;
1492
1493       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1494
1495       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1496           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1497         continue;
1498
1499       e2 = path->last ()->e;
1500       if (!e2 || noloop_only)
1501         {
1502           /* If NOLOOP_ONLY is true, we only allow threading through the
1503              header of a loop to exit edges.  */
1504
1505           /* One case occurs when there was loop header buried in a jump
1506              threading path that crosses loop boundaries.  We do not try
1507              and thread this elsewhere, so just cancel the jump threading
1508              request by clearing the AUX field now.  */
1509           if ((bb->loop_father != e2->src->loop_father
1510                && !loop_exit_edge_p (e2->src->loop_father, e2))
1511               || (e2->src->loop_father != e2->dest->loop_father
1512                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1513             {
1514               /* Since this case is not handled by our special code
1515                  to thread through a loop header, we must explicitly
1516                  cancel the threading request here.  */
1517               delete_jump_thread_path (path);
1518               e->aux = NULL;
1519               continue;
1520             }
1521
1522           /* Another case occurs when trying to thread through our
1523              own loop header, possibly from inside the loop.  We will
1524              thread these later.  */
1525           unsigned int i;
1526           for (i = 1; i < path->length (); i++)
1527             {
1528               if ((*path)[i]->e->src == bb->loop_father->header
1529                   && (!loop_exit_edge_p (bb->loop_father, e2)
1530                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1531                 break;
1532             }
1533
1534           if (i != path->length ())
1535             continue;
1536         }
1537
1538       /* Insert the outgoing edge into the hash table if it is not
1539          already in the hash table.  */
1540       lookup_redirection_data (e, INSERT);
1541     }
1542
1543   /* We do not update dominance info.  */
1544   free_dominance_info (CDI_DOMINATORS);
1545
1546   /* We know we only thread through the loop header to loop exits.
1547      Let the basic block duplication hook know we are not creating
1548      a multiple entry loop.  */
1549   if (noloop_only
1550       && bb == bb->loop_father->header)
1551     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1552
1553   /* Now create duplicates of BB.
1554
1555      Note that for a block with a high outgoing degree we can waste
1556      a lot of time and memory creating and destroying useless edges.
1557
1558      So we first duplicate BB and remove the control structure at the
1559      tail of the duplicate as well as all outgoing edges from the
1560      duplicate.  We then use that duplicate block as a template for
1561      the rest of the duplicates.  */
1562   local_info.template_block = NULL;
1563   local_info.bb = bb;
1564   local_info.jumps_threaded = false;
1565   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1566                             (&local_info);
1567
1568   /* The template does not have an outgoing edge.  Create that outgoing
1569      edge and update PHI nodes as the edge's target as necessary.
1570
1571      We do this after creating all the duplicates to avoid creating
1572      unnecessary edges.  */
1573   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1574                             (&local_info);
1575
1576   /* The hash table traversals above created the duplicate blocks (and the
1577      statements within the duplicate blocks).  This loop creates PHI nodes for
1578      the duplicated blocks and redirects the incoming edges into BB to reach
1579      the duplicates of BB.  */
1580   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1581                             (&local_info);
1582
1583   /* Done with this block.  Clear REDIRECTION_DATA.  */
1584   delete redirection_data;
1585   redirection_data = NULL;
1586
1587   if (noloop_only
1588       && bb == bb->loop_father->header)
1589     set_loop_copy (bb->loop_father, NULL);
1590
1591   BITMAP_FREE (local_info.duplicate_blocks);
1592   local_info.duplicate_blocks = NULL;
1593
1594   /* Indicate to our caller whether or not any jumps were threaded.  */
1595   return local_info.jumps_threaded;
1596 }
1597
1598 /* Wrapper for thread_block_1 so that we can first handle jump
1599    thread paths which do not involve copying joiner blocks, then
1600    handle jump thread paths which have joiner blocks.
1601
1602    By doing things this way we can be as aggressive as possible and
1603    not worry that copying a joiner block will create a jump threading
1604    opportunity.  */
1605
1606 static bool
1607 thread_block (basic_block bb, bool noloop_only)
1608 {
1609   bool retval;
1610   retval = thread_block_1 (bb, noloop_only, false);
1611   retval |= thread_block_1 (bb, noloop_only, true);
1612   return retval;
1613 }
1614
1615
1616 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1617    copy of E->dest created during threading, or E->dest if it was not necessary
1618    to copy it (E is its single predecessor).  */
1619
1620 static basic_block
1621 thread_single_edge (edge e)
1622 {
1623   basic_block bb = e->dest;
1624   struct redirection_data rd;
1625   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1626   edge eto = (*path)[1]->e;
1627
1628   delete_jump_thread_path (path);
1629   e->aux = NULL;
1630
1631   thread_stats.num_threaded_edges++;
1632
1633   if (single_pred_p (bb))
1634     {
1635       /* If BB has just a single predecessor, we should only remove the
1636          control statements at its end, and successors except for ETO.  */
1637       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1638
1639       /* And fixup the flags on the single remaining edge.  */
1640       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1641       eto->flags |= EDGE_FALLTHRU;
1642
1643       return bb;
1644     }
1645
1646   /* Otherwise, we need to create a copy.  */
1647   if (e->dest == eto->src)
1648     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1649
1650   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1651   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1652   npath->safe_push (x);
1653
1654   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1655   npath->safe_push (x);
1656   rd.path = npath;
1657
1658   create_block_for_threading (bb, &rd, 0, NULL);
1659   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1660   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1661
1662   if (dump_file && (dump_flags & TDF_DETAILS))
1663     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1664              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1665
1666   rd.dup_blocks[0]->count = e->count;
1667   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1668   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1669   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1670   flush_pending_stmts (e);
1671
1672   delete_jump_thread_path (npath);
1673   return rd.dup_blocks[0];
1674 }
1675
1676 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1677    from STOP and DBDS_CE_STOP.  */
1678
1679 static basic_block dbds_ce_stop;
1680 static bool
1681 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1682 {
1683   return (bb != (const_basic_block) stop
1684           && bb != dbds_ce_stop);
1685 }
1686
1687 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1688    returns the state.  */
1689
1690 enum bb_dom_status
1691 {
1692   /* BB does not dominate latch of the LOOP.  */
1693   DOMST_NONDOMINATING,
1694   /* The LOOP is broken (there is no path from the header to its latch.  */
1695   DOMST_LOOP_BROKEN,
1696   /* BB dominates the latch of the LOOP.  */
1697   DOMST_DOMINATING
1698 };
1699
1700 static enum bb_dom_status
1701 determine_bb_domination_status (struct loop *loop, basic_block bb)
1702 {
1703   basic_block *bblocks;
1704   unsigned nblocks, i;
1705   bool bb_reachable = false;
1706   edge_iterator ei;
1707   edge e;
1708
1709   /* This function assumes BB is a successor of LOOP->header.
1710      If that is not the case return DOMST_NONDOMINATING which
1711      is always safe.  */
1712     {
1713       bool ok = false;
1714
1715       FOR_EACH_EDGE (e, ei, bb->preds)
1716         {
1717           if (e->src == loop->header)
1718             {
1719               ok = true;
1720               break;
1721             }
1722         }
1723
1724       if (!ok)
1725         return DOMST_NONDOMINATING;
1726     }
1727
1728   if (bb == loop->latch)
1729     return DOMST_DOMINATING;
1730
1731   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1732      from it.  */
1733
1734   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1735   dbds_ce_stop = loop->header;
1736   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1737                                 bblocks, loop->num_nodes, bb);
1738   for (i = 0; i < nblocks; i++)
1739     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1740       {
1741         if (e->src == loop->header)
1742           {
1743             free (bblocks);
1744             return DOMST_NONDOMINATING;
1745           }
1746         if (e->src == bb)
1747           bb_reachable = true;
1748       }
1749
1750   free (bblocks);
1751   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1752 }
1753
1754 /* Return true if BB is part of the new pre-header that is created
1755    when threading the latch to DATA.  */
1756
1757 static bool
1758 def_split_header_continue_p (const_basic_block bb, const void *data)
1759 {
1760   const_basic_block new_header = (const_basic_block) data;
1761   const struct loop *l;
1762
1763   if (bb == new_header
1764       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1765     return false;
1766   for (l = bb->loop_father; l; l = loop_outer (l))
1767     if (l == new_header->loop_father)
1768       return true;
1769   return false;
1770 }
1771
1772 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1773    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1774    to the inside of the loop.  */
1775
1776 static bool
1777 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1778 {
1779   basic_block header = loop->header;
1780   edge e, tgt_edge, latch = loop_latch_edge (loop);
1781   edge_iterator ei;
1782   basic_block tgt_bb, atgt_bb;
1783   enum bb_dom_status domst;
1784
1785   /* We have already threaded through headers to exits, so all the threading
1786      requests now are to the inside of the loop.  We need to avoid creating
1787      irreducible regions (i.e., loops with more than one entry block), and
1788      also loop with several latch edges, or new subloops of the loop (although
1789      there are cases where it might be appropriate, it is difficult to decide,
1790      and doing it wrongly may confuse other optimizers).
1791
1792      We could handle more general cases here.  However, the intention is to
1793      preserve some information about the loop, which is impossible if its
1794      structure changes significantly, in a way that is not well understood.
1795      Thus we only handle few important special cases, in which also updating
1796      of the loop-carried information should be feasible:
1797
1798      1) Propagation of latch edge to a block that dominates the latch block
1799         of a loop.  This aims to handle the following idiom:
1800
1801         first = 1;
1802         while (1)
1803           {
1804             if (first)
1805               initialize;
1806             first = 0;
1807             body;
1808           }
1809
1810         After threading the latch edge, this becomes
1811
1812         first = 1;
1813         if (first)
1814           initialize;
1815         while (1)
1816           {
1817             first = 0;
1818             body;
1819           }
1820
1821         The original header of the loop is moved out of it, and we may thread
1822         the remaining edges through it without further constraints.
1823
1824      2) All entry edges are propagated to a single basic block that dominates
1825         the latch block of the loop.  This aims to handle the following idiom
1826         (normally created for "for" loops):
1827
1828         i = 0;
1829         while (1)
1830           {
1831             if (i >= 100)
1832               break;
1833             body;
1834             i++;
1835           }
1836
1837         This becomes
1838
1839         i = 0;
1840         while (1)
1841           {
1842             body;
1843             i++;
1844             if (i >= 100)
1845               break;
1846           }
1847      */
1848
1849   /* Threading through the header won't improve the code if the header has just
1850      one successor.  */
1851   if (single_succ_p (header))
1852     goto fail;
1853
1854   /* If we threaded the latch using a joiner block, we cancel the
1855      threading opportunity out of an abundance of caution.  However,
1856      still allow threading from outside to inside the loop.  */
1857   if (latch->aux)
1858     {
1859       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1860       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1861         {
1862           delete_jump_thread_path (path);
1863           latch->aux = NULL;
1864         }
1865     }
1866
1867   if (latch->aux)
1868     {
1869       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1870       tgt_edge = (*path)[1]->e;
1871       tgt_bb = tgt_edge->dest;
1872     }
1873   else if (!may_peel_loop_headers
1874            && !redirection_block_p (loop->header))
1875     goto fail;
1876   else
1877     {
1878       tgt_bb = NULL;
1879       tgt_edge = NULL;
1880       FOR_EACH_EDGE (e, ei, header->preds)
1881         {
1882           if (!e->aux)
1883             {
1884               if (e == latch)
1885                 continue;
1886
1887               /* If latch is not threaded, and there is a header
1888                  edge that is not threaded, we would create loop
1889                  with multiple entries.  */
1890               goto fail;
1891             }
1892
1893           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1894
1895           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1896             goto fail;
1897           tgt_edge = (*path)[1]->e;
1898           atgt_bb = tgt_edge->dest;
1899           if (!tgt_bb)
1900             tgt_bb = atgt_bb;
1901           /* Two targets of threading would make us create loop
1902              with multiple entries.  */
1903           else if (tgt_bb != atgt_bb)
1904             goto fail;
1905         }
1906
1907       if (!tgt_bb)
1908         {
1909           /* There are no threading requests.  */
1910           return false;
1911         }
1912
1913       /* Redirecting to empty loop latch is useless.  */
1914       if (tgt_bb == loop->latch
1915           && empty_block_p (loop->latch))
1916         goto fail;
1917     }
1918
1919   /* The target block must dominate the loop latch, otherwise we would be
1920      creating a subloop.  */
1921   domst = determine_bb_domination_status (loop, tgt_bb);
1922   if (domst == DOMST_NONDOMINATING)
1923     goto fail;
1924   if (domst == DOMST_LOOP_BROKEN)
1925     {
1926       /* If the loop ceased to exist, mark it as such, and thread through its
1927          original header.  */
1928       mark_loop_for_removal (loop);
1929       return thread_block (header, false);
1930     }
1931
1932   if (tgt_bb->loop_father->header == tgt_bb)
1933     {
1934       /* If the target of the threading is a header of a subloop, we need
1935          to create a preheader for it, so that the headers of the two loops
1936          do not merge.  */
1937       if (EDGE_COUNT (tgt_bb->preds) > 2)
1938         {
1939           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1940           gcc_assert (tgt_bb != NULL);
1941         }
1942       else
1943         tgt_bb = split_edge (tgt_edge);
1944     }
1945
1946   if (latch->aux)
1947     {
1948       basic_block *bblocks;
1949       unsigned nblocks, i;
1950
1951       /* First handle the case latch edge is redirected.  We are copying
1952          the loop header but not creating a multiple entry loop.  Make the
1953          cfg manipulation code aware of that fact.  */
1954       set_loop_copy (loop, loop);
1955       loop->latch = thread_single_edge (latch);
1956       set_loop_copy (loop, NULL);
1957       gcc_assert (single_succ (loop->latch) == tgt_bb);
1958       loop->header = tgt_bb;
1959
1960       /* Remove the new pre-header blocks from our loop.  */
1961       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1962       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1963                                     bblocks, loop->num_nodes, tgt_bb);
1964       for (i = 0; i < nblocks; i++)
1965         if (bblocks[i]->loop_father == loop)
1966           {
1967             remove_bb_from_loops (bblocks[i]);
1968             add_bb_to_loop (bblocks[i], loop_outer (loop));
1969           }
1970       free (bblocks);
1971
1972       /* If the new header has multiple latches mark it so.  */
1973       FOR_EACH_EDGE (e, ei, loop->header->preds)
1974         if (e->src->loop_father == loop
1975             && e->src != loop->latch)
1976           {
1977             loop->latch = NULL;
1978             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1979           }
1980
1981       /* Cancel remaining threading requests that would make the
1982          loop a multiple entry loop.  */
1983       FOR_EACH_EDGE (e, ei, header->preds)
1984         {
1985           edge e2;
1986
1987           if (e->aux == NULL)
1988             continue;
1989
1990           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1991           e2 = path->last ()->e;
1992
1993           if (e->src->loop_father != e2->dest->loop_father
1994               && e2->dest != loop->header)
1995             {
1996               delete_jump_thread_path (path);
1997               e->aux = NULL;
1998             }
1999         }
2000
2001       /* Thread the remaining edges through the former header.  */
2002       thread_block (header, false);
2003     }
2004   else
2005     {
2006       basic_block new_preheader;
2007
2008       /* Now consider the case entry edges are redirected to the new entry
2009          block.  Remember one entry edge, so that we can find the new
2010          preheader (its destination after threading).  */
2011       FOR_EACH_EDGE (e, ei, header->preds)
2012         {
2013           if (e->aux)
2014             break;
2015         }
2016
2017       /* The duplicate of the header is the new preheader of the loop.  Ensure
2018          that it is placed correctly in the loop hierarchy.  */
2019       set_loop_copy (loop, loop_outer (loop));
2020
2021       thread_block (header, false);
2022       set_loop_copy (loop, NULL);
2023       new_preheader = e->dest;
2024
2025       /* Create the new latch block.  This is always necessary, as the latch
2026          must have only a single successor, but the original header had at
2027          least two successors.  */
2028       loop->latch = NULL;
2029       mfb_kj_edge = single_succ_edge (new_preheader);
2030       loop->header = mfb_kj_edge->dest;
2031       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2032       loop->header = latch->dest;
2033       loop->latch = latch->src;
2034     }
2035
2036   return true;
2037
2038 fail:
2039   /* We failed to thread anything.  Cancel the requests.  */
2040   FOR_EACH_EDGE (e, ei, header->preds)
2041     {
2042       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2043
2044       if (path)
2045         {
2046           delete_jump_thread_path (path);
2047           e->aux = NULL;
2048         }
2049     }
2050   return false;
2051 }
2052
2053 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2054    PHI arguments associated with those edges are equal or there are no
2055    PHI arguments, otherwise return FALSE.  */
2056
2057 static bool
2058 phi_args_equal_on_edges (edge e1, edge e2)
2059 {
2060   gphi_iterator gsi;
2061   int indx1 = e1->dest_idx;
2062   int indx2 = e2->dest_idx;
2063
2064   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2065     {
2066       gphi *phi = gsi.phi ();
2067
2068       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2069                             gimple_phi_arg_def (phi, indx2), 0))
2070         return false;
2071     }
2072   return true;
2073 }
2074
2075 /* Walk through the registered jump threads and convert them into a
2076    form convenient for this pass.
2077
2078    Any block which has incoming edges threaded to outgoing edges
2079    will have its entry in THREADED_BLOCK set.
2080
2081    Any threaded edge will have its new outgoing edge stored in the
2082    original edge's AUX field.
2083
2084    This form avoids the need to walk all the edges in the CFG to
2085    discover blocks which need processing and avoids unnecessary
2086    hash table lookups to map from threaded edge to new target.  */
2087
2088 static void
2089 mark_threaded_blocks (bitmap threaded_blocks)
2090 {
2091   unsigned int i;
2092   bitmap_iterator bi;
2093   bitmap tmp = BITMAP_ALLOC (NULL);
2094   basic_block bb;
2095   edge e;
2096   edge_iterator ei;
2097
2098   /* It is possible to have jump threads in which one is a subpath
2099      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2100      block and (B, C), (C, D) where no joiner block exists.
2101
2102      When this occurs ignore the jump thread request with the joiner
2103      block.  It's totally subsumed by the simpler jump thread request.
2104
2105      This results in less block copying, simpler CFGs.  More importantly,
2106      when we duplicate the joiner block, B, in this case we will create
2107      a new threading opportunity that we wouldn't be able to optimize
2108      until the next jump threading iteration.
2109
2110      So first convert the jump thread requests which do not require a
2111      joiner block.  */
2112   for (i = 0; i < paths.length (); i++)
2113     {
2114       vec<jump_thread_edge *> *path = paths[i];
2115
2116       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2117         {
2118           edge e = (*path)[0]->e;
2119           e->aux = (void *)path;
2120           bitmap_set_bit (tmp, e->dest->index);
2121         }
2122     }
2123
2124   /* Now iterate again, converting cases where we want to thread
2125      through a joiner block, but only if no other edge on the path
2126      already has a jump thread attached to it.  We do this in two passes,
2127      to avoid situations where the order in the paths vec can hide overlapping
2128      threads (the path is recorded on the incoming edge, so we would miss
2129      cases where the second path starts at a downstream edge on the same
2130      path).  First record all joiner paths, deleting any in the unexpected
2131      case where there is already a path for that incoming edge.  */
2132   for (i = 0; i < paths.length (); i++)
2133     {
2134       vec<jump_thread_edge *> *path = paths[i];
2135
2136       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2137         {
2138           /* Attach the path to the starting edge if none is yet recorded.  */
2139           if ((*path)[0]->e->aux == NULL)
2140             {
2141               (*path)[0]->e->aux = path;
2142             }
2143           else
2144             {
2145               paths.unordered_remove (i);
2146               if (dump_file && (dump_flags & TDF_DETAILS))
2147                 dump_jump_thread_path (dump_file, *path, false);
2148               delete_jump_thread_path (path);
2149             }
2150         }
2151     }
2152   /* Second, look for paths that have any other jump thread attached to
2153      them, and either finish converting them or cancel them.  */
2154   for (i = 0; i < paths.length (); i++)
2155     {
2156       vec<jump_thread_edge *> *path = paths[i];
2157       edge e = (*path)[0]->e;
2158
2159       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2160         {
2161           unsigned int j;
2162           for (j = 1; j < path->length (); j++)
2163             if ((*path)[j]->e->aux != NULL)
2164               break;
2165
2166           /* If we iterated through the entire path without exiting the loop,
2167              then we are good to go, record it.  */
2168           if (j == path->length ())
2169             bitmap_set_bit (tmp, e->dest->index);
2170           else
2171             {
2172               e->aux = NULL;
2173               paths.unordered_remove (i);
2174               if (dump_file && (dump_flags & TDF_DETAILS))
2175                 dump_jump_thread_path (dump_file, *path, false);
2176               delete_jump_thread_path (path);
2177             }
2178         }
2179     }
2180
2181   /* If optimizing for size, only thread through block if we don't have
2182      to duplicate it or it's an otherwise empty redirection block.  */
2183   if (optimize_function_for_size_p (cfun))
2184     {
2185       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2186         {
2187           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2188           if (EDGE_COUNT (bb->preds) > 1
2189               && !redirection_block_p (bb))
2190             {
2191               FOR_EACH_EDGE (e, ei, bb->preds)
2192                 {
2193                   if (e->aux)
2194                     {
2195                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2196                       delete_jump_thread_path (path);
2197                       e->aux = NULL;
2198                     }
2199                 }
2200             }
2201           else
2202             bitmap_set_bit (threaded_blocks, i);
2203         }
2204     }
2205   else
2206     bitmap_copy (threaded_blocks, tmp);
2207
2208   /* Look for jump threading paths which cross multiple loop headers.
2209
2210      The code to thread through loop headers will change the CFG in ways
2211      that break assumptions made by the loop optimization code.
2212
2213      We don't want to blindly cancel the requests.  We can instead do better
2214      by trimming off the end of the jump thread path.  */
2215   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2216     {
2217       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2218       FOR_EACH_EDGE (e, ei, bb->preds)
2219         {
2220           if (e->aux)
2221             {
2222               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2223
2224               for (unsigned int i = 0, crossed_headers = 0;
2225                    i < path->length ();
2226                    i++)
2227                 {
2228                   basic_block dest = (*path)[i]->e->dest;
2229                   crossed_headers += (dest == dest->loop_father->header);
2230                   if (crossed_headers > 1)
2231                     {
2232                       /* Trim from entry I onwards.  */
2233                       for (unsigned int j = i; j < path->length (); j++)
2234                         delete (*path)[j];
2235                       path->truncate (i);
2236
2237                       /* Now that we've truncated the path, make sure
2238                          what's left is still valid.   We need at least
2239                          two edges on the path and the last edge can not
2240                          be a joiner.  This should never happen, but let's
2241                          be safe.  */
2242                       if (path->length () < 2
2243                           || (path->last ()->type
2244                               == EDGE_COPY_SRC_JOINER_BLOCK))
2245                         {
2246                           delete_jump_thread_path (path);
2247                           e->aux = NULL;
2248                         }
2249                       break;
2250                     }
2251                 }
2252             }
2253         }
2254     }
2255
2256   /* If we have a joiner block (J) which has two successors S1 and S2 and
2257      we are threading though S1 and the final destination of the thread
2258      is S2, then we must verify that any PHI nodes in S2 have the same
2259      PHI arguments for the edge J->S2 and J->S1->...->S2.
2260
2261      We used to detect this prior to registering the jump thread, but
2262      that prohibits propagation of edge equivalences into non-dominated
2263      PHI nodes as the equivalency test might occur before propagation.
2264
2265      This must also occur after we truncate any jump threading paths
2266      as this scenario may only show up after truncation.
2267
2268      This works for now, but will need improvement as part of the FSA
2269      optimization.
2270
2271      Note since we've moved the thread request data to the edges,
2272      we have to iterate on those rather than the threaded_edges vector.  */
2273   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2274     {
2275       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2276       FOR_EACH_EDGE (e, ei, bb->preds)
2277         {
2278           if (e->aux)
2279             {
2280               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2281               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2282
2283               if (have_joiner)
2284                 {
2285                   basic_block joiner = e->dest;
2286                   edge final_edge = path->last ()->e;
2287                   basic_block final_dest = final_edge->dest;
2288                   edge e2 = find_edge (joiner, final_dest);
2289
2290                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2291                     {
2292                       delete_jump_thread_path (path);
2293                       e->aux = NULL;
2294                     }
2295                 }
2296             }
2297         }
2298     }
2299
2300   BITMAP_FREE (tmp);
2301 }
2302
2303
2304 /* Return TRUE if BB ends with a switch statement or a computed goto.
2305    Otherwise return false.  */
2306 static bool
2307 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2308 {
2309   gimple stmt = last_stmt (bb);
2310   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2311     return true;
2312   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2313       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2314     return true;
2315   return false;
2316 }
2317
2318 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2319    case of SEME Single Entry Multiple Exits region in which all nodes in the
2320    REGION have exactly one incoming edge.  The only exception is the first block
2321    that may not have been connected to the rest of the cfg yet.  */
2322
2323 DEBUG_FUNCTION void
2324 verify_jump_thread (basic_block *region, unsigned n_region)
2325 {
2326   for (unsigned i = 0; i < n_region; i++)
2327     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2328 }
2329
2330 /* Return true when BB is one of the first N items in BBS.  */
2331
2332 static inline bool
2333 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2334 {
2335   for (int i = 0; i < n; i++)
2336     if (bb == bbs[i])
2337       return true;
2338
2339   return false;
2340 }
2341
2342 /* Duplicates a jump-thread path of N_REGION basic blocks.
2343    The ENTRY edge is redirected to the duplicate of the region.
2344
2345    Remove the last conditional statement in the last basic block in the REGION,
2346    and create a single fallthru edge pointing to the same destination as the
2347    EXIT edge.
2348
2349    The new basic blocks are stored to REGION_COPY in the same order as they had
2350    in REGION, provided that REGION_COPY is not NULL.
2351
2352    Returns false if it is unable to copy the region, true otherwise.  */
2353
2354 static bool
2355 duplicate_thread_path (edge entry, edge exit,
2356                        basic_block *region, unsigned n_region,
2357                        basic_block *region_copy)
2358 {
2359   unsigned i;
2360   bool free_region_copy = false;
2361   struct loop *loop = entry->dest->loop_father;
2362   edge exit_copy;
2363   edge redirected;
2364   int total_freq = 0, entry_freq = 0;
2365   gcov_type total_count = 0, entry_count = 0;
2366
2367   if (!can_copy_bbs_p (region, n_region))
2368     return false;
2369
2370   /* Some sanity checking.  Note that we do not check for all possible
2371      missuses of the functions.  I.e. if you ask to copy something weird,
2372      it will work, but the state of structures probably will not be
2373      correct.  */
2374   for (i = 0; i < n_region; i++)
2375     {
2376       /* We do not handle subloops, i.e. all the blocks must belong to the
2377          same loop.  */
2378       if (region[i]->loop_father != loop)
2379         return false;
2380     }
2381
2382   initialize_original_copy_tables ();
2383
2384   set_loop_copy (loop, loop);
2385
2386   if (!region_copy)
2387     {
2388       region_copy = XNEWVEC (basic_block, n_region);
2389       free_region_copy = true;
2390     }
2391
2392   if (entry->dest->count)
2393     {
2394       total_count = entry->dest->count;
2395       entry_count = entry->count;
2396       /* Fix up corner cases, to avoid division by zero or creation of negative
2397          frequencies.  */
2398       if (entry_count > total_count)
2399         entry_count = total_count;
2400     }
2401   else
2402     {
2403       total_freq = entry->dest->frequency;
2404       entry_freq = EDGE_FREQUENCY (entry);
2405       /* Fix up corner cases, to avoid division by zero or creation of negative
2406          frequencies.  */
2407       if (total_freq == 0)
2408         total_freq = 1;
2409       else if (entry_freq > total_freq)
2410         entry_freq = total_freq;
2411     }
2412
2413   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2414             split_edge_bb_loc (entry), false);
2415
2416   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2417      following code ensures that all the edges exiting the jump-thread path are
2418      redirected back to the original code: these edges are exceptions
2419      invalidating the property that is propagated by executing all the blocks of
2420      the jump-thread path in order.  */
2421
2422   for (i = 0; i < n_region; i++)
2423     {
2424       edge e;
2425       edge_iterator ei;
2426       basic_block bb = region_copy[i];
2427
2428       if (single_succ_p (bb))
2429         {
2430           /* Make sure the successor is the next node in the path.  */
2431           gcc_assert (i + 1 == n_region
2432                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2433           continue;
2434         }
2435
2436       /* Special case the last block on the path: make sure that it does not
2437          jump back on the copied path.  */
2438       if (i + 1 == n_region)
2439         {
2440           FOR_EACH_EDGE (e, ei, bb->succs)
2441             if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2442               {
2443                 basic_block orig = get_bb_original (e->dest);
2444                 if (orig)
2445                   redirect_edge_and_branch_force (e, orig);
2446               }
2447           continue;
2448         }
2449
2450       /* Redirect all other edges jumping to non-adjacent blocks back to the
2451          original code.  */
2452       FOR_EACH_EDGE (e, ei, bb->succs)
2453         if (region_copy[i + 1] != e->dest)
2454           {
2455             basic_block orig = get_bb_original (e->dest);
2456             if (orig)
2457               redirect_edge_and_branch_force (e, orig);
2458           }
2459     }
2460
2461   if (total_count)
2462     {
2463       scale_bbs_frequencies_gcov_type (region, n_region,
2464                                        total_count - entry_count,
2465                                        total_count);
2466       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2467                                        total_count);
2468     }
2469   else
2470     {
2471       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2472                                  total_freq);
2473       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2474     }
2475
2476 #ifdef ENABLE_CHECKING
2477   verify_jump_thread (region_copy, n_region);
2478 #endif
2479
2480   /* Remove the last branch in the jump thread path.  */
2481   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2482   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2483
2484   if (e) {
2485     rescan_loop_exit (e, true, false);
2486     e->probability = REG_BR_PROB_BASE;
2487     e->count = region_copy[n_region - 1]->count;
2488   }
2489
2490   /* Redirect the entry and add the phi node arguments.  */
2491   if (entry->dest == loop->header)
2492     mark_loop_for_removal (loop);
2493   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2494   gcc_assert (redirected != NULL);
2495   flush_pending_stmts (entry);
2496
2497   /* Add the other PHI node arguments.  */
2498   add_phi_args_after_copy (region_copy, n_region, NULL);
2499
2500   if (free_region_copy)
2501     free (region_copy);
2502
2503   free_original_copy_tables ();
2504   return true;
2505 }
2506
2507 /* Return true when PATH is a valid jump-thread path.  */
2508
2509 static bool
2510 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2511 {
2512   unsigned len = path->length ();
2513
2514   /* Check that the path is connected.  */
2515   for (unsigned int j = 0; j < len - 1; j++)
2516     if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2517       return false;
2518
2519   return true;
2520 }
2521
2522 /* Walk through all blocks and thread incoming edges to the appropriate
2523    outgoing edge for each edge pair recorded in THREADED_EDGES.
2524
2525    It is the caller's responsibility to fix the dominance information
2526    and rewrite duplicated SSA_NAMEs back into SSA form.
2527
2528    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2529    loop headers if it does not simplify the loop.
2530
2531    Returns true if one or more edges were threaded, false otherwise.  */
2532
2533 bool
2534 thread_through_all_blocks (bool may_peel_loop_headers)
2535 {
2536   bool retval = false;
2537   unsigned int i;
2538   bitmap_iterator bi;
2539   bitmap threaded_blocks;
2540   struct loop *loop;
2541
2542   if (!paths.exists ())
2543     return false;
2544
2545   threaded_blocks = BITMAP_ALLOC (NULL);
2546   memset (&thread_stats, 0, sizeof (thread_stats));
2547
2548   /* Jump-thread all FSM threads before other jump-threads.  */
2549   for (i = 0; i < paths.length ();)
2550     {
2551       vec<jump_thread_edge *> *path = paths[i];
2552       edge entry = (*path)[0]->e;
2553
2554       /* Only code-generate FSM jump-threads in this loop.  */
2555       if ((*path)[0]->type != EDGE_FSM_THREAD)
2556         {
2557           i++;
2558           continue;
2559         }
2560
2561       /* Do not jump-thread twice from the same block.  */
2562       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2563           /* Verify that the jump thread path is still valid: a
2564              previous jump-thread may have changed the CFG, and
2565              invalidated the current path.  */
2566           || !valid_jump_thread_path (path))
2567         {
2568           /* Remove invalid FSM jump-thread paths.  */
2569           delete_jump_thread_path (path);
2570           paths.unordered_remove (i);
2571           continue;
2572         }
2573
2574       unsigned len = path->length ();
2575       edge exit = (*path)[len - 1]->e;
2576       basic_block *region = XNEWVEC (basic_block, len - 1);
2577
2578       for (unsigned int j = 0; j < len - 1; j++)
2579         region[j] = (*path)[j]->e->dest;
2580
2581       if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2582         {
2583           /* We do not update dominance info.  */
2584           free_dominance_info (CDI_DOMINATORS);
2585           bitmap_set_bit (threaded_blocks, entry->src->index);
2586           retval = true;
2587         }
2588
2589       delete_jump_thread_path (path);
2590       paths.unordered_remove (i);
2591     }
2592
2593   /* Remove from PATHS all the jump-threads starting with an edge already
2594      jump-threaded.  */
2595   for (i = 0; i < paths.length ();)
2596     {
2597       vec<jump_thread_edge *> *path = paths[i];
2598       edge entry = (*path)[0]->e;
2599
2600       /* Do not jump-thread twice from the same block.  */
2601       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2602         {
2603           delete_jump_thread_path (path);
2604           paths.unordered_remove (i);
2605         }
2606       else
2607         i++;
2608     }
2609
2610   bitmap_clear (threaded_blocks);
2611
2612   mark_threaded_blocks (threaded_blocks);
2613
2614   initialize_original_copy_tables ();
2615
2616   /* First perform the threading requests that do not affect
2617      loop structure.  */
2618   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2619     {
2620       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2621
2622       if (EDGE_COUNT (bb->preds) > 0)
2623         retval |= thread_block (bb, true);
2624     }
2625
2626   /* Then perform the threading through loop headers.  We start with the
2627      innermost loop, so that the changes in cfg we perform won't affect
2628      further threading.  */
2629   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2630     {
2631       if (!loop->header
2632           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2633         continue;
2634
2635       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2636     }
2637
2638   /* Any jump threading paths that are still attached to edges at this
2639      point must be one of two cases.
2640
2641      First, we could have a jump threading path which went from outside
2642      a loop to inside a loop that was ignored because a prior jump thread
2643      across a backedge was realized (which indirectly causes the loop
2644      above to ignore the latter thread).  We can detect these because the
2645      loop structures will be different and we do not currently try to
2646      optimize this case.
2647
2648      Second, we could be threading across a backedge to a point within the
2649      same loop.  This occurrs for the FSA/FSM optimization and we would
2650      like to optimize it.  However, we have to be very careful as this
2651      may completely scramble the loop structures, with the result being
2652      irreducible loops causing us to throw away our loop structure.
2653
2654      As a compromise for the latter case, if the thread path ends in
2655      a block where the last statement is a multiway branch, then go
2656      ahead and thread it, else ignore it.  */
2657   basic_block bb;
2658   edge e;
2659   FOR_EACH_BB_FN (bb, cfun)
2660     {
2661       /* If we do end up threading here, we can remove elements from
2662          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2663       for (edge_iterator ei = ei_start (bb->preds);
2664            (e = ei_safe_edge (ei));)
2665         if (e->aux)
2666           {
2667             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2668
2669             /* Case 1, threading from outside to inside the loop
2670                after we'd already threaded through the header.  */
2671             if ((*path)[0]->e->dest->loop_father
2672                 != path->last ()->e->src->loop_father)
2673               {
2674                 delete_jump_thread_path (path);
2675                 e->aux = NULL;
2676                 ei_next (&ei);
2677               }
2678            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2679               {
2680                 /* The code to thread through loop headers may have
2681                    split a block with jump threads attached to it.
2682
2683                    We can identify this with a disjoint jump threading
2684                    path.  If found, just remove it.  */
2685                 for (unsigned int i = 0; i < path->length () - 1; i++)
2686                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2687                     {
2688                       delete_jump_thread_path (path);
2689                       e->aux = NULL;
2690                       ei_next (&ei);
2691                       break;
2692                     }
2693
2694                 /* Our path is still valid, thread it.  */
2695                 if (e->aux)
2696                   {
2697                     if (thread_block ((*path)[0]->e->dest, false))
2698                       e->aux = NULL;
2699                     else
2700                       {
2701                         delete_jump_thread_path (path);
2702                         e->aux = NULL;
2703                         ei_next (&ei);
2704                       }
2705                   }
2706               }
2707            else
2708               {
2709                 delete_jump_thread_path (path);
2710                 e->aux = NULL;
2711                 ei_next (&ei);
2712               }
2713           }
2714         else
2715           ei_next (&ei);
2716     }
2717
2718   statistics_counter_event (cfun, "Jumps threaded",
2719                             thread_stats.num_threaded_edges);
2720
2721   free_original_copy_tables ();
2722
2723   BITMAP_FREE (threaded_blocks);
2724   threaded_blocks = NULL;
2725   paths.release ();
2726
2727   if (retval)
2728     loops_state_set (LOOPS_NEED_FIXUP);
2729
2730   return retval;
2731 }
2732
2733 /* Delete the jump threading path PATH.  We have to explcitly delete
2734    each entry in the vector, then the container.  */
2735
2736 void
2737 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2738 {
2739   for (unsigned int i = 0; i < path->length (); i++)
2740     delete (*path)[i];
2741   path->release();
2742   delete path;
2743 }
2744
2745 /* Register a jump threading opportunity.  We queue up all the jump
2746    threading opportunities discovered by a pass and update the CFG
2747    and SSA form all at once.
2748
2749    E is the edge we can thread, E2 is the new target edge, i.e., we
2750    are effectively recording that E->dest can be changed to E2->dest
2751    after fixing the SSA graph.  */
2752
2753 void
2754 register_jump_thread (vec<jump_thread_edge *> *path)
2755 {
2756   if (!dbg_cnt (registered_jump_thread))
2757     {
2758       delete_jump_thread_path (path);
2759       return;
2760     }
2761
2762   /* First make sure there are no NULL outgoing edges on the jump threading
2763      path.  That can happen for jumping to a constant address.  */
2764   for (unsigned int i = 0; i < path->length (); i++)
2765     if ((*path)[i]->e == NULL)
2766       {
2767         if (dump_file && (dump_flags & TDF_DETAILS))
2768           {
2769             fprintf (dump_file,
2770                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2771             dump_jump_thread_path (dump_file, *path, false);
2772           }
2773
2774         delete_jump_thread_path (path);
2775         return;
2776       }
2777
2778   if (dump_file && (dump_flags & TDF_DETAILS))
2779     dump_jump_thread_path (dump_file, *path, true);
2780
2781   if (!paths.exists ())
2782     paths.create (5);
2783
2784   paths.safe_push (path);
2785 }