1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
31 #include "fold-const.h"
34 #include "internal-fn.h"
35 #include "gimple-iterator.h"
37 #include "tree-ssa-threadupdate.h"
42 #include "tree-pass.h"
44 /* Given a block B, update the CFG and SSA graph to reflect redirecting
45 one or more in-edges to B to instead reach the destination of an
46 out-edge from B while preserving any side effects in B.
48 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
49 side effects of executing B.
51 1. Make a copy of B (including its outgoing edges and statements). Call
52 the copy B'. Note B' has no incoming edges or PHIs at this time.
54 2. Remove the control statement at the end of B' and all outgoing edges
57 3. Add a new argument to each PHI in C with the same value as the existing
58 argument associated with edge B->C. Associate the new PHI arguments
61 4. For each PHI in B, find or create a PHI in B' with an identical
62 PHI_RESULT. Add an argument to the PHI in B' which has the same
63 value as the PHI in B associated with the edge A->B. Associate
64 the new argument in the PHI in B' with the edge A->B.
66 5. Change the edge A->B to A->B'.
68 5a. This automatically deletes any PHI arguments associated with the
71 5b. This automatically associates each new argument added in step 4
74 6. Repeat for other incoming edges into B.
76 7. Put the duplicated resources in B and all the B' blocks into SSA form.
78 Note that block duplication can be minimized by first collecting the
79 set of unique destination blocks that the incoming edges should
82 We reduce the number of edges and statements we create by not copying all
83 the outgoing edges and the control statement in step #1. We instead create
84 a template block without the outgoing edges and duplicate the template.
86 Another case this code handles is threading through a "joiner" block. In
87 this case, we do not know the destination of the joiner block, but one
88 of the outgoing edges from the joiner block leads to a threadable path. This
89 case largely works as outlined above, except the duplicate of the joiner
90 block still contains a full set of outgoing edges and its control statement.
91 We just redirect one of its outgoing edges to our jump threading path. */
94 /* Steps #5 and #6 of the above algorithm are best implemented by walking
95 all the incoming edges which thread to the same destination edge at
96 the same time. That avoids lots of table lookups to get information
97 for the destination edge.
99 To realize that implementation we create a list of incoming edges
100 which thread to the same outgoing edge. Thus to implement steps
101 #5 and #6 we traverse our hash table of outgoing edge information.
102 For each entry we walk the list of incoming edges which thread to
103 the current outgoing edge. */
111 /* Main data structure recording information regarding B's duplicate
114 /* We need to efficiently record the unique thread destinations of this
115 block and specific information associated with those destinations. We
116 may have many incoming edges threaded to the same outgoing edge. This
117 can be naturally implemented with a hash table. */
119 struct redirection_data
: free_ptr_hash
<redirection_data
>
121 /* We support wiring up two block duplicates in a jump threading path.
123 One is a normal block copy where we remove the control statement
124 and wire up its single remaining outgoing edge to the thread path.
126 The other is a joiner block where we leave the control statement
127 in place, but wire one of the outgoing edges to a thread path.
129 In theory we could have multiple block duplicates in a jump
130 threading path, but I haven't tried that.
132 The duplicate blocks appear in this array in the same order in
133 which they appear in the jump thread path. */
134 basic_block dup_blocks
[2];
136 /* The jump threading path. */
137 vec
<jump_thread_edge
*> *path
;
139 /* A list of incoming edges which we want to thread to the
141 struct el
*incoming_edges
;
143 /* hash_table support. */
144 static inline hashval_t
hash (const redirection_data
*);
145 static inline int equal (const redirection_data
*, const redirection_data
*);
148 /* Dump a jump threading path, including annotations about each
152 dump_jump_thread_path (FILE *dump_file
, vec
<jump_thread_edge
*> path
,
156 " %s%s jump thread: (%d, %d) incoming edge; ",
157 (registering
? "Registering" : "Cancelling"),
158 (path
[0]->type
== EDGE_FSM_THREAD
? " FSM": ""),
159 path
[0]->e
->src
->index
, path
[0]->e
->dest
->index
);
161 for (unsigned int i
= 1; i
< path
.length (); i
++)
163 /* We can get paths with a NULL edge when the final destination
164 of a jump thread turns out to be a constant address. We dump
165 those paths when debugging, so we have to be prepared for that
167 if (path
[i
]->e
== NULL
)
170 if (path
[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
171 fprintf (dump_file
, " (%d, %d) joiner; ",
172 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
173 if (path
[i
]->type
== EDGE_COPY_SRC_BLOCK
)
174 fprintf (dump_file
, " (%d, %d) normal;",
175 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
176 if (path
[i
]->type
== EDGE_NO_COPY_SRC_BLOCK
)
177 fprintf (dump_file
, " (%d, %d) nocopy;",
178 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
179 if (path
[0]->type
== EDGE_FSM_THREAD
)
180 fprintf (dump_file
, " (%d, %d) ",
181 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
183 fputc ('\n', dump_file
);
186 /* Simple hashing function. For any given incoming edge E, we're going
187 to be most concerned with the final destination of its jump thread
188 path. So hash on the block index of the final edge in the path. */
191 redirection_data::hash (const redirection_data
*p
)
193 vec
<jump_thread_edge
*> *path
= p
->path
;
194 return path
->last ()->e
->dest
->index
;
197 /* Given two hash table entries, return true if they have the same
198 jump threading path. */
200 redirection_data::equal (const redirection_data
*p1
, const redirection_data
*p2
)
202 vec
<jump_thread_edge
*> *path1
= p1
->path
;
203 vec
<jump_thread_edge
*> *path2
= p2
->path
;
205 if (path1
->length () != path2
->length ())
208 for (unsigned int i
= 1; i
< path1
->length (); i
++)
210 if ((*path1
)[i
]->type
!= (*path2
)[i
]->type
211 || (*path1
)[i
]->e
!= (*path2
)[i
]->e
)
218 /* Rather than search all the edges in jump thread paths each time
219 DOM is able to simply if control statement, we build a hash table
220 with the deleted edges. We only care about the address of the edge,
222 struct removed_edges
: nofree_ptr_hash
<edge_def
>
224 static hashval_t
hash (edge e
) { return htab_hash_pointer (e
); }
225 static bool equal (edge e1
, edge e2
) { return e1
== e2
; }
228 static hash_table
<removed_edges
> *removed_edges
;
230 /* Data structure of information to pass to hash table traversal routines. */
231 struct ssa_local_info_t
233 /* The current block we are working on. */
236 /* We only create a template block for the first duplicated block in a
237 jump threading path as we may need many duplicates of that block.
239 The second duplicate block in a path is specific to that path. Creating
240 and sharing a template for that block is considerably more difficult. */
241 basic_block template_block
;
243 /* TRUE if we thread one or more jumps, FALSE otherwise. */
246 /* Blocks duplicated for the thread. */
247 bitmap duplicate_blocks
;
250 /* Passes which use the jump threading code register jump threading
251 opportunities as they are discovered. We keep the registered
252 jump threading opportunities in this vector as edge pairs
253 (original_edge, target_edge). */
254 static vec
<vec
<jump_thread_edge
*> *> paths
;
256 /* When we start updating the CFG for threading, data necessary for jump
257 threading is attached to the AUX field for the incoming edge. Use these
258 macros to access the underlying structure attached to the AUX field. */
259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
261 /* Jump threading statistics. */
263 struct thread_stats_d
265 unsigned long num_threaded_edges
;
268 struct thread_stats_d thread_stats
;
271 /* Remove the last statement in block BB if it is a control statement
272 Also remove all outgoing edges except the edge which reaches DEST_BB.
273 If DEST_BB is NULL, then remove all outgoing edges. */
276 remove_ctrl_stmt_and_useless_edges (basic_block bb
, basic_block dest_bb
)
278 gimple_stmt_iterator gsi
;
282 gsi
= gsi_last_bb (bb
);
284 /* If the duplicate ends with a control statement, then remove it.
286 Note that if we are duplicating the template block rather than the
287 original basic block, then the duplicate might not have any real
291 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
292 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
293 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
))
294 gsi_remove (&gsi
, true);
296 for (ei
= ei_start (bb
->succs
); (e
= ei_safe_edge (ei
)); )
298 if (e
->dest
!= dest_bb
)
304 /* If the remaining edge is a loop exit, there must have
305 a removed edge that was not a loop exit.
307 In that case BB and possibly other blocks were previously
308 in the loop, but are now outside the loop. Thus, we need
309 to update the loop structures. */
310 if (single_succ_p (bb
)
311 && loop_outer (bb
->loop_father
)
312 && loop_exit_edge_p (bb
->loop_father
, single_succ_edge (bb
)))
313 loops_state_set (LOOPS_NEED_FIXUP
);
316 /* Create a duplicate of BB. Record the duplicate block in an array
317 indexed by COUNT stored in RD. */
320 create_block_for_threading (basic_block bb
,
321 struct redirection_data
*rd
,
323 bitmap
*duplicate_blocks
)
328 /* We can use the generic block duplication code and simply remove
329 the stuff we do not need. */
330 rd
->dup_blocks
[count
] = duplicate_block (bb
, NULL
, NULL
);
332 FOR_EACH_EDGE (e
, ei
, rd
->dup_blocks
[count
]->succs
)
335 /* Zero out the profile, since the block is unreachable for now. */
336 rd
->dup_blocks
[count
]->frequency
= 0;
337 rd
->dup_blocks
[count
]->count
= 0;
338 if (duplicate_blocks
)
339 bitmap_set_bit (*duplicate_blocks
, rd
->dup_blocks
[count
]->index
);
342 /* Main data structure to hold information for duplicates of BB. */
344 static hash_table
<redirection_data
> *redirection_data
;
346 /* Given an outgoing edge E lookup and return its entry in our hash table.
348 If INSERT is true, then we insert the entry into the hash table if
349 it is not already present. INCOMING_EDGE is added to the list of incoming
350 edges associated with E in the hash table. */
352 static struct redirection_data
*
353 lookup_redirection_data (edge e
, enum insert_option insert
)
355 struct redirection_data
**slot
;
356 struct redirection_data
*elt
;
357 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
359 /* Build a hash table element so we can see if E is already
361 elt
= XNEW (struct redirection_data
);
363 elt
->dup_blocks
[0] = NULL
;
364 elt
->dup_blocks
[1] = NULL
;
365 elt
->incoming_edges
= NULL
;
367 slot
= redirection_data
->find_slot (elt
, insert
);
369 /* This will only happen if INSERT is false and the entry is not
370 in the hash table. */
377 /* This will only happen if E was not in the hash table and
382 elt
->incoming_edges
= XNEW (struct el
);
383 elt
->incoming_edges
->e
= e
;
384 elt
->incoming_edges
->next
= NULL
;
387 /* E was in the hash table. */
390 /* Free ELT as we do not need it anymore, we will extract the
391 relevant entry from the hash table itself. */
394 /* Get the entry stored in the hash table. */
397 /* If insertion was requested, then we need to add INCOMING_EDGE
398 to the list of incoming edges associated with E. */
401 struct el
*el
= XNEW (struct el
);
402 el
->next
= elt
->incoming_edges
;
404 elt
->incoming_edges
= el
;
411 /* Similar to copy_phi_args, except that the PHI arg exists, it just
412 does not have a value associated with it. */
415 copy_phi_arg_into_existing_phi (edge src_e
, edge tgt_e
)
417 int src_idx
= src_e
->dest_idx
;
418 int tgt_idx
= tgt_e
->dest_idx
;
420 /* Iterate over each PHI in e->dest. */
421 for (gphi_iterator gsi
= gsi_start_phis (src_e
->dest
),
422 gsi2
= gsi_start_phis (tgt_e
->dest
);
424 gsi_next (&gsi
), gsi_next (&gsi2
))
426 gphi
*src_phi
= gsi
.phi ();
427 gphi
*dest_phi
= gsi2
.phi ();
428 tree val
= gimple_phi_arg_def (src_phi
, src_idx
);
429 source_location locus
= gimple_phi_arg_location (src_phi
, src_idx
);
431 SET_PHI_ARG_DEF (dest_phi
, tgt_idx
, val
);
432 gimple_phi_arg_set_location (dest_phi
, tgt_idx
, locus
);
436 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
437 to see if it has constant value in a flow sensitive manner. Set
438 LOCUS to location of the constant phi arg and return the value.
439 Return DEF directly if either PATH or idx is ZERO. */
442 get_value_locus_in_path (tree def
, vec
<jump_thread_edge
*> *path
,
443 basic_block bb
, int idx
, source_location
*locus
)
449 if (path
== NULL
|| idx
== 0)
452 def_phi
= dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (def
));
456 def_bb
= gimple_bb (def_phi
);
457 /* Don't propagate loop invariants into deeper loops. */
458 if (!def_bb
|| bb_loop_depth (def_bb
) < bb_loop_depth (bb
))
461 /* Backtrack jump threading path from IDX to see if def has constant
463 for (int j
= idx
- 1; j
>= 0; j
--)
465 edge e
= (*path
)[j
]->e
;
466 if (e
->dest
== def_bb
)
468 arg
= gimple_phi_arg_def (def_phi
, e
->dest_idx
);
469 if (is_gimple_min_invariant (arg
))
471 *locus
= gimple_phi_arg_location (def_phi
, e
->dest_idx
);
481 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
482 Try to backtrack jump threading PATH from node IDX to see if the arg
483 has constant value, copy constant value instead of argument itself
487 copy_phi_args (basic_block bb
, edge src_e
, edge tgt_e
,
488 vec
<jump_thread_edge
*> *path
, int idx
)
491 int src_indx
= src_e
->dest_idx
;
493 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
495 gphi
*phi
= gsi
.phi ();
496 tree def
= gimple_phi_arg_def (phi
, src_indx
);
497 source_location locus
= gimple_phi_arg_location (phi
, src_indx
);
499 if (TREE_CODE (def
) == SSA_NAME
500 && !virtual_operand_p (gimple_phi_result (phi
)))
501 def
= get_value_locus_in_path (def
, path
, bb
, idx
, &locus
);
503 add_phi_arg (phi
, def
, tgt_e
, locus
);
507 /* We have recently made a copy of ORIG_BB, including its outgoing
508 edges. The copy is NEW_BB. Every PHI node in every direct successor of
509 ORIG_BB has a new argument associated with edge from NEW_BB to the
510 successor. Initialize the PHI argument so that it is equal to the PHI
511 argument associated with the edge from ORIG_BB to the successor.
512 PATH and IDX are used to check if the new PHI argument has constant
513 value in a flow sensitive manner. */
516 update_destination_phis (basic_block orig_bb
, basic_block new_bb
,
517 vec
<jump_thread_edge
*> *path
, int idx
)
522 FOR_EACH_EDGE (e
, ei
, orig_bb
->succs
)
524 edge e2
= find_edge (new_bb
, e
->dest
);
525 copy_phi_args (e
->dest
, e
, e2
, path
, idx
);
529 /* Given a duplicate block and its single destination (both stored
530 in RD). Create an edge between the duplicate and its single
533 Add an additional argument to any PHI nodes at the single
534 destination. IDX is the start node in jump threading path
535 we start to check to see if the new PHI argument has constant
536 value along the jump threading path. */
539 create_edge_and_update_destination_phis (struct redirection_data
*rd
,
540 basic_block bb
, int idx
)
542 edge e
= make_edge (bb
, rd
->path
->last ()->e
->dest
, EDGE_FALLTHRU
);
544 rescan_loop_exit (e
, true, false);
545 e
->probability
= REG_BR_PROB_BASE
;
546 e
->count
= bb
->count
;
548 /* We used to copy the thread path here. That was added in 2007
549 and dutifully updated through the representation changes in 2013.
551 In 2013 we added code to thread from an interior node through
552 the backedge to another interior node. That runs after the code
553 to thread through loop headers from outside the loop.
555 The latter may delete edges in the CFG, including those
556 which appeared in the jump threading path we copied here. Thus
557 we'd end up using a dangling pointer.
559 After reviewing the 2007/2011 code, I can't see how anything
560 depended on copying the AUX field and clearly copying the jump
561 threading path is problematical due to embedded edge pointers.
562 It has been removed. */
565 /* If there are any PHI nodes at the destination of the outgoing edge
566 from the duplicate block, then we will need to add a new argument
567 to them. The argument should have the same value as the argument
568 associated with the outgoing edge stored in RD. */
569 copy_phi_args (e
->dest
, rd
->path
->last ()->e
, e
, rd
->path
, idx
);
572 /* Look through PATH beginning at START and return TRUE if there are
573 any additional blocks that need to be duplicated. Otherwise,
576 any_remaining_duplicated_blocks (vec
<jump_thread_edge
*> *path
,
579 for (unsigned int i
= start
+ 1; i
< path
->length (); i
++)
581 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
582 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
589 /* Compute the amount of profile count/frequency coming into the jump threading
590 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
591 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
592 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
593 identify blocks duplicated for jump threading, which have duplicated
594 edges that need to be ignored in the analysis. Return true if path contains
595 a joiner, false otherwise.
597 In the non-joiner case, this is straightforward - all the counts/frequency
598 flowing into the jump threading path should flow through the duplicated
599 block and out of the duplicated path.
601 In the joiner case, it is very tricky. Some of the counts flowing into
602 the original path go offpath at the joiner. The problem is that while
603 we know how much total count goes off-path in the original control flow,
604 we don't know how many of the counts corresponding to just the jump
605 threading path go offpath at the joiner.
607 For example, assume we have the following control flow and identified
608 jump threading paths:
627 Jump threading paths: A -> J -> Son -> D (path 1)
628 C -> J -> Son -> E (path 2)
630 Note that the control flow could be more complicated:
631 - Each jump threading path may have more than one incoming edge. I.e. A and
632 Ea could represent multiple incoming blocks/edges that are included in
634 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
635 before or after the "normal" copy block). These are not duplicated onto
636 the jump threading path, as they are single-successor.
637 - Any of the blocks along the path may have other incoming edges that
638 are not part of any jump threading path, but add profile counts along
641 In the aboe example, after all jump threading is complete, we will
642 end up with the following control flow:
651 Eona/ \ ---/---\-------- \Eonc
656 \___________ / \ _____/
661 The main issue to notice here is that when we are processing path 1
662 (A->J->Son->D) we need to figure out the outgoing edge weights to
663 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
664 sum of the incoming weights to D remain Ed. The problem with simply
665 assuming that Ja (and Jc when processing path 2) has the same outgoing
666 probabilities to its successors as the original block J, is that after
667 all paths are processed and other edges/counts removed (e.g. none
668 of Ec will reach D after processing path 2), we may end up with not
669 enough count flowing along duplicated edge Sona->D.
671 Therefore, in the case of a joiner, we keep track of all counts
672 coming in along the current path, as well as from predecessors not
673 on any jump threading path (Eb in the above example). While we
674 first assume that the duplicated Eona for Ja->Sona has the same
675 probability as the original, we later compensate for other jump
676 threading paths that may eliminate edges. We do that by keep track
677 of all counts coming into the original path that are not in a jump
678 thread (Eb in the above example, but as noted earlier, there could
679 be other predecessors incoming to the path at various points, such
680 as at Son). Call this cumulative non-path count coming into the path
681 before D as Enonpath. We then ensure that the count from Sona->D is as at
682 least as big as (Ed - Enonpath), but no bigger than the minimum
683 weight along the jump threading path. The probabilities of both the
684 original and duplicated joiner block J and Ja will be adjusted
685 accordingly after the updates. */
688 compute_path_counts (struct redirection_data
*rd
,
689 ssa_local_info_t
*local_info
,
690 gcov_type
*path_in_count_ptr
,
691 gcov_type
*path_out_count_ptr
,
692 int *path_in_freq_ptr
)
694 edge e
= rd
->incoming_edges
->e
;
695 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
696 edge elast
= path
->last ()->e
;
697 gcov_type nonpath_count
= 0;
698 bool has_joiner
= false;
699 gcov_type path_in_count
= 0;
700 int path_in_freq
= 0;
702 /* Start by accumulating incoming edge counts to the path's first bb
703 into a couple buckets:
704 path_in_count: total count of incoming edges that flow into the
706 nonpath_count: total count of incoming edges that are not
707 flowing along *any* path. These are the counts
708 that will still flow along the original path after
709 all path duplication is done by potentially multiple
710 calls to this routine.
711 (any other incoming edge counts are for a different jump threading
712 path that will be handled by a later call to this routine.)
713 To make this easier, start by recording all incoming edges that flow into
714 the current path in a bitmap. We could add up the path's incoming edge
715 counts here, but we still need to walk all the first bb's incoming edges
716 below to add up the counts of the other edges not included in this jump
718 struct el
*next
, *el
;
719 bitmap in_edge_srcs
= BITMAP_ALLOC (NULL
);
720 for (el
= rd
->incoming_edges
; el
; el
= next
)
723 bitmap_set_bit (in_edge_srcs
, el
->e
->src
->index
);
727 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
729 vec
<jump_thread_edge
*> *ein_path
= THREAD_PATH (ein
);
730 /* Simply check the incoming edge src against the set captured above. */
732 && bitmap_bit_p (in_edge_srcs
, (*ein_path
)[0]->e
->src
->index
))
734 /* It is necessary but not sufficient that the last path edges
735 are identical. There may be different paths that share the
736 same last path edge in the case where the last edge has a nocopy
738 gcc_assert (ein_path
->last ()->e
== elast
);
739 path_in_count
+= ein
->count
;
740 path_in_freq
+= EDGE_FREQUENCY (ein
);
744 /* Keep track of the incoming edges that are not on any jump-threading
745 path. These counts will still flow out of original path after all
746 jump threading is complete. */
747 nonpath_count
+= ein
->count
;
751 /* This is needed due to insane incoming frequencies. */
752 if (path_in_freq
> BB_FREQ_MAX
)
753 path_in_freq
= BB_FREQ_MAX
;
755 BITMAP_FREE (in_edge_srcs
);
757 /* Now compute the fraction of the total count coming into the first
758 path bb that is from the current threading path. */
759 gcov_type total_count
= e
->dest
->count
;
760 /* Handle incoming profile insanities. */
761 if (total_count
< path_in_count
)
762 path_in_count
= total_count
;
763 int onpath_scale
= GCOV_COMPUTE_SCALE (path_in_count
, total_count
);
765 /* Walk the entire path to do some more computation in order to estimate
766 how much of the path_in_count will flow out of the duplicated threading
767 path. In the non-joiner case this is straightforward (it should be
768 the same as path_in_count, although we will handle incoming profile
769 insanities by setting it equal to the minimum count along the path).
771 In the joiner case, we need to estimate how much of the path_in_count
772 will stay on the threading path after the joiner's conditional branch.
773 We don't really know for sure how much of the counts
774 associated with this path go to each successor of the joiner, but we'll
775 estimate based on the fraction of the total count coming into the path
776 bb was from the threading paths (computed above in onpath_scale).
777 Afterwards, we will need to do some fixup to account for other threading
778 paths and possible profile insanities.
780 In order to estimate the joiner case's counts we also need to update
781 nonpath_count with any additional counts coming into the path. Other
782 blocks along the path may have additional predecessors from outside
784 gcov_type path_out_count
= path_in_count
;
785 gcov_type min_path_count
= path_in_count
;
786 for (unsigned int i
= 1; i
< path
->length (); i
++)
788 edge epath
= (*path
)[i
]->e
;
789 gcov_type cur_count
= epath
->count
;
790 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
793 cur_count
= apply_probability (cur_count
, onpath_scale
);
795 /* In the joiner case we need to update nonpath_count for any edges
796 coming into the path that will contribute to the count flowing
797 into the path successor. */
798 if (has_joiner
&& epath
!= elast
)
800 /* Look for other incoming edges after joiner. */
801 FOR_EACH_EDGE (ein
, ei
, epath
->dest
->preds
)
804 /* Ignore in edges from blocks we have duplicated for a
805 threading path, which have duplicated edge counts until
806 they are redirected by an invocation of this routine. */
807 && !bitmap_bit_p (local_info
->duplicate_blocks
,
809 nonpath_count
+= ein
->count
;
812 if (cur_count
< path_out_count
)
813 path_out_count
= cur_count
;
814 if (epath
->count
< min_path_count
)
815 min_path_count
= epath
->count
;
818 /* We computed path_out_count above assuming that this path targeted
819 the joiner's on-path successor with the same likelihood as it
820 reached the joiner. However, other thread paths through the joiner
821 may take a different path through the normal copy source block
822 (i.e. they have a different elast), meaning that they do not
823 contribute any counts to this path's elast. As a result, it may
824 turn out that this path must have more count flowing to the on-path
825 successor of the joiner. Essentially, all of this path's elast
826 count must be contributed by this path and any nonpath counts
827 (since any path through the joiner with a different elast will not
828 include a copy of this elast in its duplicated path).
829 So ensure that this path's path_out_count is at least the
830 difference between elast->count and nonpath_count. Otherwise the edge
831 counts after threading will not be sane. */
832 if (has_joiner
&& path_out_count
< elast
->count
- nonpath_count
)
834 path_out_count
= elast
->count
- nonpath_count
;
835 /* But neither can we go above the minimum count along the path
836 we are duplicating. This can be an issue due to profile
837 insanities coming in to this pass. */
838 if (path_out_count
> min_path_count
)
839 path_out_count
= min_path_count
;
842 *path_in_count_ptr
= path_in_count
;
843 *path_out_count_ptr
= path_out_count
;
844 *path_in_freq_ptr
= path_in_freq
;
849 /* Update the counts and frequencies for both an original path
850 edge EPATH and its duplicate EDUP. The duplicate source block
851 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
852 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
854 update_profile (edge epath
, edge edup
, gcov_type path_in_count
,
855 gcov_type path_out_count
, int path_in_freq
)
858 /* First update the duplicated block's count / frequency. */
861 basic_block dup_block
= edup
->src
;
862 gcc_assert (dup_block
->count
== 0);
863 gcc_assert (dup_block
->frequency
== 0);
864 dup_block
->count
= path_in_count
;
865 dup_block
->frequency
= path_in_freq
;
868 /* Now update the original block's count and frequency in the
869 opposite manner - remove the counts/freq that will flow
870 into the duplicated block. Handle underflow due to precision/
872 epath
->src
->count
-= path_in_count
;
873 if (epath
->src
->count
< 0)
874 epath
->src
->count
= 0;
875 epath
->src
->frequency
-= path_in_freq
;
876 if (epath
->src
->frequency
< 0)
877 epath
->src
->frequency
= 0;
879 /* Next update this path edge's original and duplicated counts. We know
880 that the duplicated path will have path_out_count flowing
881 out of it (in the joiner case this is the count along the duplicated path
882 out of the duplicated joiner). This count can then be removed from the
883 original path edge. */
885 edup
->count
= path_out_count
;
886 epath
->count
-= path_out_count
;
887 gcc_assert (epath
->count
>= 0);
891 /* The duplicate and original joiner blocks may end up with different
892 probabilities (different from both the original and from each other).
893 Recompute the probabilities here once we have updated the edge
894 counts and frequencies. */
897 recompute_probabilities (basic_block bb
)
901 FOR_EACH_EDGE (esucc
, ei
, bb
->succs
)
906 /* Prevent overflow computation due to insane profiles. */
907 if (esucc
->count
< bb
->count
)
908 esucc
->probability
= GCOV_COMPUTE_SCALE (esucc
->count
,
911 /* Can happen with missing/guessed probabilities, since we
912 may determine that more is flowing along duplicated
913 path than joiner succ probabilities allowed.
914 Counts and freqs will be insane after jump threading,
915 at least make sure probability is sane or we will
916 get a flow verification error.
917 Not much we can do to make counts/freqs sane without
918 redoing the profile estimation. */
919 esucc
->probability
= REG_BR_PROB_BASE
;
924 /* Update the counts of the original and duplicated edges from a joiner
925 that go off path, given that we have already determined that the
926 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
927 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
928 edge from joiner is EPATH. */
931 update_joiner_offpath_counts (edge epath
, basic_block dup_bb
,
932 gcov_type path_in_count
,
933 gcov_type path_out_count
)
935 /* Compute the count that currently flows off path from the joiner.
936 In other words, the total count of joiner's out edges other than
937 epath. Compute this by walking the successors instead of
938 subtracting epath's count from the joiner bb count, since there
939 are sometimes slight insanities where the total out edge count is
940 larger than the bb count (possibly due to rounding/truncation
942 gcov_type total_orig_off_path_count
= 0;
945 FOR_EACH_EDGE (enonpath
, ei
, epath
->src
->succs
)
947 if (enonpath
== epath
)
949 total_orig_off_path_count
+= enonpath
->count
;
952 /* For the path that we are duplicating, the amount that will flow
953 off path from the duplicated joiner is the delta between the
954 path's cumulative in count and the portion of that count we
955 estimated above as flowing from the joiner along the duplicated
957 gcov_type total_dup_off_path_count
= path_in_count
- path_out_count
;
959 /* Now do the actual updates of the off-path edges. */
960 FOR_EACH_EDGE (enonpath
, ei
, epath
->src
->succs
)
962 /* Look for edges going off of the threading path. */
963 if (enonpath
== epath
)
966 /* Find the corresponding edge out of the duplicated joiner. */
967 edge enonpathdup
= find_edge (dup_bb
, enonpath
->dest
);
968 gcc_assert (enonpathdup
);
970 /* We can't use the original probability of the joiner's out
971 edges, since the probabilities of the original branch
972 and the duplicated branches may vary after all threading is
973 complete. But apportion the duplicated joiner's off-path
974 total edge count computed earlier (total_dup_off_path_count)
975 among the duplicated off-path edges based on their original
976 ratio to the full off-path count (total_orig_off_path_count).
978 int scale
= GCOV_COMPUTE_SCALE (enonpath
->count
,
979 total_orig_off_path_count
);
980 /* Give the duplicated offpath edge a portion of the duplicated
982 enonpathdup
->count
= apply_scale (scale
,
983 total_dup_off_path_count
);
984 /* Now update the original offpath edge count, handling underflow
985 due to rounding errors. */
986 enonpath
->count
-= enonpathdup
->count
;
987 if (enonpath
->count
< 0)
993 /* Check if the paths through RD all have estimated frequencies but zero
994 profile counts. This is more accurate than checking the entry block
995 for a zero profile count, since profile insanities sometimes creep in. */
998 estimated_freqs_path (struct redirection_data
*rd
)
1000 edge e
= rd
->incoming_edges
->e
;
1001 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1004 bool non_zero_freq
= false;
1005 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
1009 non_zero_freq
|= ein
->src
->frequency
!= 0;
1012 for (unsigned int i
= 1; i
< path
->length (); i
++)
1014 edge epath
= (*path
)[i
]->e
;
1015 if (epath
->src
->count
)
1017 non_zero_freq
|= epath
->src
->frequency
!= 0;
1019 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
1023 non_zero_freq
|= esucc
->src
->frequency
!= 0;
1026 return non_zero_freq
;
1030 /* Invoked for routines that have guessed frequencies and no profile
1031 counts to record the block and edge frequencies for paths through RD
1032 in the profile count fields of those blocks and edges. This is because
1033 ssa_fix_duplicate_block_edges incrementally updates the block and
1034 edge counts as edges are redirected, and it is difficult to do that
1035 for edge frequencies which are computed on the fly from the source
1036 block frequency and probability. When a block frequency is updated
1037 its outgoing edge frequencies are affected and become difficult to
1041 freqs_to_counts_path (struct redirection_data
*rd
)
1043 edge e
= rd
->incoming_edges
->e
;
1044 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1047 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
1049 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1050 errors applying the probability when the frequencies are very
1052 ein
->count
= apply_probability (ein
->src
->frequency
* REG_BR_PROB_BASE
,
1056 for (unsigned int i
= 1; i
< path
->length (); i
++)
1058 edge epath
= (*path
)[i
]->e
;
1060 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1061 errors applying the edge probability when the frequencies are very
1063 epath
->src
->count
= epath
->src
->frequency
* REG_BR_PROB_BASE
;
1064 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
1065 esucc
->count
= apply_probability (esucc
->src
->count
,
1066 esucc
->probability
);
1071 /* For routines that have guessed frequencies and no profile counts, where we
1072 used freqs_to_counts_path to record block and edge frequencies for paths
1073 through RD, we clear the counts after completing all updates for RD.
1074 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1075 but the block frequencies and edge probabilities were updated as well,
1076 so we can simply clear the count fields. */
1079 clear_counts_path (struct redirection_data
*rd
)
1081 edge e
= rd
->incoming_edges
->e
;
1082 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1085 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
1088 /* First clear counts along original path. */
1089 for (unsigned int i
= 1; i
< path
->length (); i
++)
1091 edge epath
= (*path
)[i
]->e
;
1092 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
1094 epath
->src
->count
= 0;
1096 /* Also need to clear the counts along duplicated path. */
1097 for (unsigned int i
= 0; i
< 2; i
++)
1099 basic_block dup
= rd
->dup_blocks
[i
];
1102 FOR_EACH_EDGE (esucc
, ei
, dup
->succs
)
1108 /* Wire up the outgoing edges from the duplicate blocks and
1109 update any PHIs as needed. Also update the profile counts
1110 on the original and duplicate blocks and edges. */
1112 ssa_fix_duplicate_block_edges (struct redirection_data
*rd
,
1113 ssa_local_info_t
*local_info
)
1115 bool multi_incomings
= (rd
->incoming_edges
->next
!= NULL
);
1116 edge e
= rd
->incoming_edges
->e
;
1117 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1118 edge elast
= path
->last ()->e
;
1119 gcov_type path_in_count
= 0;
1120 gcov_type path_out_count
= 0;
1121 int path_in_freq
= 0;
1123 /* This routine updates profile counts, frequencies, and probabilities
1124 incrementally. Since it is difficult to do the incremental updates
1125 using frequencies/probabilities alone, for routines without profile
1126 data we first take a snapshot of the existing block and edge frequencies
1127 by copying them into the empty profile count fields. These counts are
1128 then used to do the incremental updates, and cleared at the end of this
1129 routine. If the function is marked as having a profile, we still check
1130 to see if the paths through RD are using estimated frequencies because
1131 the routine had zero profile counts. */
1132 bool do_freqs_to_counts
= (profile_status_for_fn (cfun
) != PROFILE_READ
1133 || estimated_freqs_path (rd
));
1134 if (do_freqs_to_counts
)
1135 freqs_to_counts_path (rd
);
1137 /* First determine how much profile count to move from original
1138 path to the duplicate path. This is tricky in the presence of
1139 a joiner (see comments for compute_path_counts), where some portion
1140 of the path's counts will flow off-path from the joiner. In the
1141 non-joiner case the path_in_count and path_out_count should be the
1143 bool has_joiner
= compute_path_counts (rd
, local_info
,
1144 &path_in_count
, &path_out_count
,
1147 int cur_path_freq
= path_in_freq
;
1148 for (unsigned int count
= 0, i
= 1; i
< path
->length (); i
++)
1150 edge epath
= (*path
)[i
]->e
;
1152 /* If we were threading through an joiner block, then we want
1153 to keep its control statement and redirect an outgoing edge.
1154 Else we want to remove the control statement & edges, then create
1155 a new outgoing edge. In both cases we may need to update PHIs. */
1156 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1161 gcc_assert (has_joiner
);
1163 /* This updates the PHIs at the destination of the duplicate
1164 block. Pass 0 instead of i if we are threading a path which
1165 has multiple incoming edges. */
1166 update_destination_phis (local_info
->bb
, rd
->dup_blocks
[count
],
1167 path
, multi_incomings
? 0 : i
);
1169 /* Find the edge from the duplicate block to the block we're
1170 threading through. That's the edge we want to redirect. */
1171 victim
= find_edge (rd
->dup_blocks
[count
], (*path
)[i
]->e
->dest
);
1173 /* If there are no remaining blocks on the path to duplicate,
1174 then redirect VICTIM to the final destination of the jump
1176 if (!any_remaining_duplicated_blocks (path
, i
))
1178 e2
= redirect_edge_and_branch (victim
, elast
->dest
);
1179 /* If we redirected the edge, then we need to copy PHI arguments
1180 at the target. If the edge already existed (e2 != victim
1181 case), then the PHIs in the target already have the correct
1184 copy_phi_args (e2
->dest
, elast
, e2
,
1185 path
, multi_incomings
? 0 : i
);
1189 /* Redirect VICTIM to the next duplicated block in the path. */
1190 e2
= redirect_edge_and_branch (victim
, rd
->dup_blocks
[count
+ 1]);
1192 /* We need to update the PHIs in the next duplicated block. We
1193 want the new PHI args to have the same value as they had
1194 in the source of the next duplicate block.
1196 Thus, we need to know which edge we traversed into the
1197 source of the duplicate. Furthermore, we may have
1198 traversed many edges to reach the source of the duplicate.
1200 Walk through the path starting at element I until we
1201 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1202 the edge from the prior element. */
1203 for (unsigned int j
= i
+ 1; j
< path
->length (); j
++)
1205 if ((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
)
1207 copy_phi_arg_into_existing_phi ((*path
)[j
- 1]->e
, e2
);
1213 /* Update the counts and frequency of both the original block
1214 and path edge, and the duplicates. The path duplicate's
1215 incoming count and frequency are the totals for all edges
1216 incoming to this jump threading path computed earlier.
1217 And we know that the duplicated path will have path_out_count
1218 flowing out of it (i.e. along the duplicated path out of the
1219 duplicated joiner). */
1220 update_profile (epath
, e2
, path_in_count
, path_out_count
,
1223 /* Next we need to update the counts of the original and duplicated
1224 edges from the joiner that go off path. */
1225 update_joiner_offpath_counts (epath
, e2
->src
, path_in_count
,
1228 /* Finally, we need to set the probabilities on the duplicated
1229 edges out of the duplicated joiner (e2->src). The probabilities
1230 along the original path will all be updated below after we finish
1231 processing the whole path. */
1232 recompute_probabilities (e2
->src
);
1234 /* Record the frequency flowing to the downstream duplicated
1236 cur_path_freq
= EDGE_FREQUENCY (e2
);
1238 else if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1240 remove_ctrl_stmt_and_useless_edges (rd
->dup_blocks
[count
], NULL
);
1241 create_edge_and_update_destination_phis (rd
, rd
->dup_blocks
[count
],
1242 multi_incomings
? 0 : i
);
1244 single_succ_edge (rd
->dup_blocks
[1])->aux
= NULL
;
1246 /* Update the counts and frequency of both the original block
1247 and path edge, and the duplicates. Since we are now after
1248 any joiner that may have existed on the path, the count
1249 flowing along the duplicated threaded path is path_out_count.
1250 If we didn't have a joiner, then cur_path_freq was the sum
1251 of the total frequencies along all incoming edges to the
1252 thread path (path_in_freq). If we had a joiner, it would have
1253 been updated at the end of that handling to the edge frequency
1254 along the duplicated joiner path edge. */
1255 update_profile (epath
, EDGE_SUCC (rd
->dup_blocks
[count
], 0),
1256 path_out_count
, path_out_count
,
1261 /* No copy case. In this case we don't have an equivalent block
1262 on the duplicated thread path to update, but we do need
1263 to remove the portion of the counts/freqs that were moved
1264 to the duplicated path from the counts/freqs flowing through
1265 this block on the original path. Since all the no-copy edges
1266 are after any joiner, the removed count is the same as
1269 If we didn't have a joiner, then cur_path_freq was the sum
1270 of the total frequencies along all incoming edges to the
1271 thread path (path_in_freq). If we had a joiner, it would have
1272 been updated at the end of that handling to the edge frequency
1273 along the duplicated joiner path edge. */
1274 update_profile (epath
, NULL
, path_out_count
, path_out_count
,
1278 /* Increment the index into the duplicated path when we processed
1279 a duplicated block. */
1280 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
1281 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1287 /* Now walk orig blocks and update their probabilities, since the
1288 counts and freqs should be updated properly by above loop. */
1289 for (unsigned int i
= 1; i
< path
->length (); i
++)
1291 edge epath
= (*path
)[i
]->e
;
1292 recompute_probabilities (epath
->src
);
1295 /* Done with all profile and frequency updates, clear counts if they
1297 if (do_freqs_to_counts
)
1298 clear_counts_path (rd
);
1301 /* Hash table traversal callback routine to create duplicate blocks. */
1304 ssa_create_duplicates (struct redirection_data
**slot
,
1305 ssa_local_info_t
*local_info
)
1307 struct redirection_data
*rd
= *slot
;
1309 /* The second duplicated block in a jump threading path is specific
1310 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1312 Each time we're called, we have to look through the path and see
1313 if a second block needs to be duplicated.
1315 Note the search starts with the third edge on the path. The first
1316 edge is the incoming edge, the second edge always has its source
1317 duplicated. Thus we start our search with the third edge. */
1318 vec
<jump_thread_edge
*> *path
= rd
->path
;
1319 for (unsigned int i
= 2; i
< path
->length (); i
++)
1321 if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
1322 || (*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1324 create_block_for_threading ((*path
)[i
]->e
->src
, rd
, 1,
1325 &local_info
->duplicate_blocks
);
1330 /* Create a template block if we have not done so already. Otherwise
1331 use the template to create a new block. */
1332 if (local_info
->template_block
== NULL
)
1334 create_block_for_threading ((*path
)[1]->e
->src
, rd
, 0,
1335 &local_info
->duplicate_blocks
);
1336 local_info
->template_block
= rd
->dup_blocks
[0];
1338 /* We do not create any outgoing edges for the template. We will
1339 take care of that in a later traversal. That way we do not
1340 create edges that are going to just be deleted. */
1344 create_block_for_threading (local_info
->template_block
, rd
, 0,
1345 &local_info
->duplicate_blocks
);
1347 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1349 ssa_fix_duplicate_block_edges (rd
, local_info
);
1352 /* Keep walking the hash table. */
1356 /* We did not create any outgoing edges for the template block during
1357 block creation. This hash table traversal callback creates the
1358 outgoing edge for the template block. */
1361 ssa_fixup_template_block (struct redirection_data
**slot
,
1362 ssa_local_info_t
*local_info
)
1364 struct redirection_data
*rd
= *slot
;
1366 /* If this is the template block halt the traversal after updating
1369 If we were threading through an joiner block, then we want
1370 to keep its control statement and redirect an outgoing edge.
1371 Else we want to remove the control statement & edges, then create
1372 a new outgoing edge. In both cases we may need to update PHIs. */
1373 if (rd
->dup_blocks
[0] && rd
->dup_blocks
[0] == local_info
->template_block
)
1375 ssa_fix_duplicate_block_edges (rd
, local_info
);
1382 /* Hash table traversal callback to redirect each incoming edge
1383 associated with this hash table element to its new destination. */
1386 ssa_redirect_edges (struct redirection_data
**slot
,
1387 ssa_local_info_t
*local_info
)
1389 struct redirection_data
*rd
= *slot
;
1390 struct el
*next
, *el
;
1392 /* Walk over all the incoming edges associated with this hash table
1394 for (el
= rd
->incoming_edges
; el
; el
= next
)
1397 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1399 /* Go ahead and free this element from the list. Doing this now
1400 avoids the need for another list walk when we destroy the hash
1405 thread_stats
.num_threaded_edges
++;
1407 if (rd
->dup_blocks
[0])
1411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1412 fprintf (dump_file
, " Threaded jump %d --> %d to %d\n",
1413 e
->src
->index
, e
->dest
->index
, rd
->dup_blocks
[0]->index
);
1415 /* If we redirect a loop latch edge cancel its loop. */
1416 if (e
->src
== e
->src
->loop_father
->latch
)
1417 mark_loop_for_removal (e
->src
->loop_father
);
1419 /* Redirect the incoming edge (possibly to the joiner block) to the
1420 appropriate duplicate block. */
1421 e2
= redirect_edge_and_branch (e
, rd
->dup_blocks
[0]);
1422 gcc_assert (e
== e2
);
1423 flush_pending_stmts (e2
);
1426 /* Go ahead and clear E->aux. It's not needed anymore and failure
1427 to clear it will cause all kinds of unpleasant problems later. */
1428 delete_jump_thread_path (path
);
1433 /* Indicate that we actually threaded one or more jumps. */
1434 if (rd
->incoming_edges
)
1435 local_info
->jumps_threaded
= true;
1440 /* Return true if this block has no executable statements other than
1441 a simple ctrl flow instruction. When the number of outgoing edges
1442 is one, this is equivalent to a "forwarder" block. */
1445 redirection_block_p (basic_block bb
)
1447 gimple_stmt_iterator gsi
;
1449 /* Advance to the first executable statement. */
1450 gsi
= gsi_start_bb (bb
);
1451 while (!gsi_end_p (gsi
)
1452 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_LABEL
1453 || is_gimple_debug (gsi_stmt (gsi
))
1454 || gimple_nop_p (gsi_stmt (gsi
))
1455 || gimple_clobber_p (gsi_stmt (gsi
))))
1458 /* Check if this is an empty block. */
1459 if (gsi_end_p (gsi
))
1462 /* Test that we've reached the terminating control statement. */
1463 return gsi_stmt (gsi
)
1464 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
1465 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
1466 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
);
1469 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1470 is reached via one or more specific incoming edges, we know which
1471 outgoing edge from BB will be traversed.
1473 We want to redirect those incoming edges to the target of the
1474 appropriate outgoing edge. Doing so avoids a conditional branch
1475 and may expose new optimization opportunities. Note that we have
1476 to update dominator tree and SSA graph after such changes.
1478 The key to keeping the SSA graph update manageable is to duplicate
1479 the side effects occurring in BB so that those side effects still
1480 occur on the paths which bypass BB after redirecting edges.
1482 We accomplish this by creating duplicates of BB and arranging for
1483 the duplicates to unconditionally pass control to one specific
1484 successor of BB. We then revector the incoming edges into BB to
1485 the appropriate duplicate of BB.
1487 If NOLOOP_ONLY is true, we only perform the threading as long as it
1488 does not affect the structure of the loops in a nontrivial way.
1490 If JOINERS is true, then thread through joiner blocks as well. */
1493 thread_block_1 (basic_block bb
, bool noloop_only
, bool joiners
)
1495 /* E is an incoming edge into BB that we may or may not want to
1496 redirect to a duplicate of BB. */
1499 ssa_local_info_t local_info
;
1501 local_info
.duplicate_blocks
= BITMAP_ALLOC (NULL
);
1503 /* To avoid scanning a linear array for the element we need we instead
1504 use a hash table. For normal code there should be no noticeable
1505 difference. However, if we have a block with a large number of
1506 incoming and outgoing edges such linear searches can get expensive. */
1508 = new hash_table
<struct redirection_data
> (EDGE_COUNT (bb
->succs
));
1510 /* Record each unique threaded destination into a hash table for
1511 efficient lookups. */
1512 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1517 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1519 if (((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& !joiners
)
1520 || ((*path
)[1]->type
== EDGE_COPY_SRC_BLOCK
&& joiners
))
1523 e2
= path
->last ()->e
;
1524 if (!e2
|| noloop_only
)
1526 /* If NOLOOP_ONLY is true, we only allow threading through the
1527 header of a loop to exit edges. */
1529 /* One case occurs when there was loop header buried in a jump
1530 threading path that crosses loop boundaries. We do not try
1531 and thread this elsewhere, so just cancel the jump threading
1532 request by clearing the AUX field now. */
1533 if ((bb
->loop_father
!= e2
->src
->loop_father
1534 && !loop_exit_edge_p (e2
->src
->loop_father
, e2
))
1535 || (e2
->src
->loop_father
!= e2
->dest
->loop_father
1536 && !loop_exit_edge_p (e2
->src
->loop_father
, e2
)))
1538 /* Since this case is not handled by our special code
1539 to thread through a loop header, we must explicitly
1540 cancel the threading request here. */
1541 delete_jump_thread_path (path
);
1546 /* Another case occurs when trying to thread through our
1547 own loop header, possibly from inside the loop. We will
1548 thread these later. */
1550 for (i
= 1; i
< path
->length (); i
++)
1552 if ((*path
)[i
]->e
->src
== bb
->loop_father
->header
1553 && (!loop_exit_edge_p (bb
->loop_father
, e2
)
1554 || (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
))
1558 if (i
!= path
->length ())
1562 /* Insert the outgoing edge into the hash table if it is not
1563 already in the hash table. */
1564 lookup_redirection_data (e
, INSERT
);
1567 /* We do not update dominance info. */
1568 free_dominance_info (CDI_DOMINATORS
);
1570 /* We know we only thread through the loop header to loop exits.
1571 Let the basic block duplication hook know we are not creating
1572 a multiple entry loop. */
1574 && bb
== bb
->loop_father
->header
)
1575 set_loop_copy (bb
->loop_father
, loop_outer (bb
->loop_father
));
1577 /* Now create duplicates of BB.
1579 Note that for a block with a high outgoing degree we can waste
1580 a lot of time and memory creating and destroying useless edges.
1582 So we first duplicate BB and remove the control structure at the
1583 tail of the duplicate as well as all outgoing edges from the
1584 duplicate. We then use that duplicate block as a template for
1585 the rest of the duplicates. */
1586 local_info
.template_block
= NULL
;
1588 local_info
.jumps_threaded
= false;
1589 redirection_data
->traverse
<ssa_local_info_t
*, ssa_create_duplicates
>
1592 /* The template does not have an outgoing edge. Create that outgoing
1593 edge and update PHI nodes as the edge's target as necessary.
1595 We do this after creating all the duplicates to avoid creating
1596 unnecessary edges. */
1597 redirection_data
->traverse
<ssa_local_info_t
*, ssa_fixup_template_block
>
1600 /* The hash table traversals above created the duplicate blocks (and the
1601 statements within the duplicate blocks). This loop creates PHI nodes for
1602 the duplicated blocks and redirects the incoming edges into BB to reach
1603 the duplicates of BB. */
1604 redirection_data
->traverse
<ssa_local_info_t
*, ssa_redirect_edges
>
1607 /* Done with this block. Clear REDIRECTION_DATA. */
1608 delete redirection_data
;
1609 redirection_data
= NULL
;
1612 && bb
== bb
->loop_father
->header
)
1613 set_loop_copy (bb
->loop_father
, NULL
);
1615 BITMAP_FREE (local_info
.duplicate_blocks
);
1616 local_info
.duplicate_blocks
= NULL
;
1618 /* Indicate to our caller whether or not any jumps were threaded. */
1619 return local_info
.jumps_threaded
;
1622 /* Wrapper for thread_block_1 so that we can first handle jump
1623 thread paths which do not involve copying joiner blocks, then
1624 handle jump thread paths which have joiner blocks.
1626 By doing things this way we can be as aggressive as possible and
1627 not worry that copying a joiner block will create a jump threading
1631 thread_block (basic_block bb
, bool noloop_only
)
1634 retval
= thread_block_1 (bb
, noloop_only
, false);
1635 retval
|= thread_block_1 (bb
, noloop_only
, true);
1640 /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1641 copy of E->dest created during threading, or E->dest if it was not necessary
1642 to copy it (E is its single predecessor). */
1645 thread_single_edge (edge e
)
1647 basic_block bb
= e
->dest
;
1648 struct redirection_data rd
;
1649 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1650 edge eto
= (*path
)[1]->e
;
1652 delete_jump_thread_path (path
);
1655 thread_stats
.num_threaded_edges
++;
1657 if (single_pred_p (bb
))
1659 /* If BB has just a single predecessor, we should only remove the
1660 control statements at its end, and successors except for ETO. */
1661 remove_ctrl_stmt_and_useless_edges (bb
, eto
->dest
);
1663 /* And fixup the flags on the single remaining edge. */
1664 eto
->flags
&= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
| EDGE_ABNORMAL
);
1665 eto
->flags
|= EDGE_FALLTHRU
;
1670 /* Otherwise, we need to create a copy. */
1671 if (e
->dest
== eto
->src
)
1672 update_bb_profile_for_threading (bb
, EDGE_FREQUENCY (e
), e
->count
, eto
);
1674 vec
<jump_thread_edge
*> *npath
= new vec
<jump_thread_edge
*> ();
1675 jump_thread_edge
*x
= new jump_thread_edge (e
, EDGE_START_JUMP_THREAD
);
1676 npath
->safe_push (x
);
1678 x
= new jump_thread_edge (eto
, EDGE_COPY_SRC_BLOCK
);
1679 npath
->safe_push (x
);
1682 create_block_for_threading (bb
, &rd
, 0, NULL
);
1683 remove_ctrl_stmt_and_useless_edges (rd
.dup_blocks
[0], NULL
);
1684 create_edge_and_update_destination_phis (&rd
, rd
.dup_blocks
[0], 0);
1686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1687 fprintf (dump_file
, " Threaded jump %d --> %d to %d\n",
1688 e
->src
->index
, e
->dest
->index
, rd
.dup_blocks
[0]->index
);
1690 rd
.dup_blocks
[0]->count
= e
->count
;
1691 rd
.dup_blocks
[0]->frequency
= EDGE_FREQUENCY (e
);
1692 single_succ_edge (rd
.dup_blocks
[0])->count
= e
->count
;
1693 redirect_edge_and_branch (e
, rd
.dup_blocks
[0]);
1694 flush_pending_stmts (e
);
1696 delete_jump_thread_path (npath
);
1697 return rd
.dup_blocks
[0];
1700 /* Callback for dfs_enumerate_from. Returns true if BB is different
1701 from STOP and DBDS_CE_STOP. */
1703 static basic_block dbds_ce_stop
;
1705 dbds_continue_enumeration_p (const_basic_block bb
, const void *stop
)
1707 return (bb
!= (const_basic_block
) stop
1708 && bb
!= dbds_ce_stop
);
1711 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1712 returns the state. */
1716 /* BB does not dominate latch of the LOOP. */
1717 DOMST_NONDOMINATING
,
1718 /* The LOOP is broken (there is no path from the header to its latch. */
1720 /* BB dominates the latch of the LOOP. */
1724 static enum bb_dom_status
1725 determine_bb_domination_status (struct loop
*loop
, basic_block bb
)
1727 basic_block
*bblocks
;
1728 unsigned nblocks
, i
;
1729 bool bb_reachable
= false;
1733 /* This function assumes BB is a successor of LOOP->header.
1734 If that is not the case return DOMST_NONDOMINATING which
1739 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1741 if (e
->src
== loop
->header
)
1749 return DOMST_NONDOMINATING
;
1752 if (bb
== loop
->latch
)
1753 return DOMST_DOMINATING
;
1755 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1758 bblocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1759 dbds_ce_stop
= loop
->header
;
1760 nblocks
= dfs_enumerate_from (loop
->latch
, 1, dbds_continue_enumeration_p
,
1761 bblocks
, loop
->num_nodes
, bb
);
1762 for (i
= 0; i
< nblocks
; i
++)
1763 FOR_EACH_EDGE (e
, ei
, bblocks
[i
]->preds
)
1765 if (e
->src
== loop
->header
)
1768 return DOMST_NONDOMINATING
;
1771 bb_reachable
= true;
1775 return (bb_reachable
? DOMST_DOMINATING
: DOMST_LOOP_BROKEN
);
1778 /* Return true if BB is part of the new pre-header that is created
1779 when threading the latch to DATA. */
1782 def_split_header_continue_p (const_basic_block bb
, const void *data
)
1784 const_basic_block new_header
= (const_basic_block
) data
;
1785 const struct loop
*l
;
1787 if (bb
== new_header
1788 || loop_depth (bb
->loop_father
) < loop_depth (new_header
->loop_father
))
1790 for (l
= bb
->loop_father
; l
; l
= loop_outer (l
))
1791 if (l
== new_header
->loop_father
)
1796 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1797 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1798 to the inside of the loop. */
1801 thread_through_loop_header (struct loop
*loop
, bool may_peel_loop_headers
)
1803 basic_block header
= loop
->header
;
1804 edge e
, tgt_edge
, latch
= loop_latch_edge (loop
);
1806 basic_block tgt_bb
, atgt_bb
;
1807 enum bb_dom_status domst
;
1809 /* We have already threaded through headers to exits, so all the threading
1810 requests now are to the inside of the loop. We need to avoid creating
1811 irreducible regions (i.e., loops with more than one entry block), and
1812 also loop with several latch edges, or new subloops of the loop (although
1813 there are cases where it might be appropriate, it is difficult to decide,
1814 and doing it wrongly may confuse other optimizers).
1816 We could handle more general cases here. However, the intention is to
1817 preserve some information about the loop, which is impossible if its
1818 structure changes significantly, in a way that is not well understood.
1819 Thus we only handle few important special cases, in which also updating
1820 of the loop-carried information should be feasible:
1822 1) Propagation of latch edge to a block that dominates the latch block
1823 of a loop. This aims to handle the following idiom:
1834 After threading the latch edge, this becomes
1845 The original header of the loop is moved out of it, and we may thread
1846 the remaining edges through it without further constraints.
1848 2) All entry edges are propagated to a single basic block that dominates
1849 the latch block of the loop. This aims to handle the following idiom
1850 (normally created for "for" loops):
1873 /* Threading through the header won't improve the code if the header has just
1875 if (single_succ_p (header
))
1878 /* If we threaded the latch using a joiner block, we cancel the
1879 threading opportunity out of an abundance of caution. However,
1880 still allow threading from outside to inside the loop. */
1883 vec
<jump_thread_edge
*> *path
= THREAD_PATH (latch
);
1884 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1886 delete_jump_thread_path (path
);
1893 vec
<jump_thread_edge
*> *path
= THREAD_PATH (latch
);
1894 tgt_edge
= (*path
)[1]->e
;
1895 tgt_bb
= tgt_edge
->dest
;
1897 else if (!may_peel_loop_headers
1898 && !redirection_block_p (loop
->header
))
1904 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1911 /* If latch is not threaded, and there is a header
1912 edge that is not threaded, we would create loop
1913 with multiple entries. */
1917 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1919 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1921 tgt_edge
= (*path
)[1]->e
;
1922 atgt_bb
= tgt_edge
->dest
;
1925 /* Two targets of threading would make us create loop
1926 with multiple entries. */
1927 else if (tgt_bb
!= atgt_bb
)
1933 /* There are no threading requests. */
1937 /* Redirecting to empty loop latch is useless. */
1938 if (tgt_bb
== loop
->latch
1939 && empty_block_p (loop
->latch
))
1943 /* The target block must dominate the loop latch, otherwise we would be
1944 creating a subloop. */
1945 domst
= determine_bb_domination_status (loop
, tgt_bb
);
1946 if (domst
== DOMST_NONDOMINATING
)
1948 if (domst
== DOMST_LOOP_BROKEN
)
1950 /* If the loop ceased to exist, mark it as such, and thread through its
1952 mark_loop_for_removal (loop
);
1953 return thread_block (header
, false);
1956 if (tgt_bb
->loop_father
->header
== tgt_bb
)
1958 /* If the target of the threading is a header of a subloop, we need
1959 to create a preheader for it, so that the headers of the two loops
1961 if (EDGE_COUNT (tgt_bb
->preds
) > 2)
1963 tgt_bb
= create_preheader (tgt_bb
->loop_father
, 0);
1964 gcc_assert (tgt_bb
!= NULL
);
1967 tgt_bb
= split_edge (tgt_edge
);
1972 basic_block
*bblocks
;
1973 unsigned nblocks
, i
;
1975 /* First handle the case latch edge is redirected. We are copying
1976 the loop header but not creating a multiple entry loop. Make the
1977 cfg manipulation code aware of that fact. */
1978 set_loop_copy (loop
, loop
);
1979 loop
->latch
= thread_single_edge (latch
);
1980 set_loop_copy (loop
, NULL
);
1981 gcc_assert (single_succ (loop
->latch
) == tgt_bb
);
1982 loop
->header
= tgt_bb
;
1984 /* Remove the new pre-header blocks from our loop. */
1985 bblocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1986 nblocks
= dfs_enumerate_from (header
, 0, def_split_header_continue_p
,
1987 bblocks
, loop
->num_nodes
, tgt_bb
);
1988 for (i
= 0; i
< nblocks
; i
++)
1989 if (bblocks
[i
]->loop_father
== loop
)
1991 remove_bb_from_loops (bblocks
[i
]);
1992 add_bb_to_loop (bblocks
[i
], loop_outer (loop
));
1996 /* If the new header has multiple latches mark it so. */
1997 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
1998 if (e
->src
->loop_father
== loop
1999 && e
->src
!= loop
->latch
)
2002 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES
);
2005 /* Cancel remaining threading requests that would make the
2006 loop a multiple entry loop. */
2007 FOR_EACH_EDGE (e
, ei
, header
->preds
)
2014 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2015 e2
= path
->last ()->e
;
2017 if (e
->src
->loop_father
!= e2
->dest
->loop_father
2018 && e2
->dest
!= loop
->header
)
2020 delete_jump_thread_path (path
);
2025 /* Thread the remaining edges through the former header. */
2026 thread_block (header
, false);
2030 basic_block new_preheader
;
2032 /* Now consider the case entry edges are redirected to the new entry
2033 block. Remember one entry edge, so that we can find the new
2034 preheader (its destination after threading). */
2035 FOR_EACH_EDGE (e
, ei
, header
->preds
)
2041 /* The duplicate of the header is the new preheader of the loop. Ensure
2042 that it is placed correctly in the loop hierarchy. */
2043 set_loop_copy (loop
, loop_outer (loop
));
2045 thread_block (header
, false);
2046 set_loop_copy (loop
, NULL
);
2047 new_preheader
= e
->dest
;
2049 /* Create the new latch block. This is always necessary, as the latch
2050 must have only a single successor, but the original header had at
2051 least two successors. */
2053 mfb_kj_edge
= single_succ_edge (new_preheader
);
2054 loop
->header
= mfb_kj_edge
->dest
;
2055 latch
= make_forwarder_block (tgt_bb
, mfb_keep_just
, NULL
);
2056 loop
->header
= latch
->dest
;
2057 loop
->latch
= latch
->src
;
2063 /* We failed to thread anything. Cancel the requests. */
2064 FOR_EACH_EDGE (e
, ei
, header
->preds
)
2066 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2070 delete_jump_thread_path (path
);
2077 /* E1 and E2 are edges into the same basic block. Return TRUE if the
2078 PHI arguments associated with those edges are equal or there are no
2079 PHI arguments, otherwise return FALSE. */
2082 phi_args_equal_on_edges (edge e1
, edge e2
)
2085 int indx1
= e1
->dest_idx
;
2086 int indx2
= e2
->dest_idx
;
2088 for (gsi
= gsi_start_phis (e1
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2090 gphi
*phi
= gsi
.phi ();
2092 if (!operand_equal_p (gimple_phi_arg_def (phi
, indx1
),
2093 gimple_phi_arg_def (phi
, indx2
), 0))
2099 /* Walk through the registered jump threads and convert them into a
2100 form convenient for this pass.
2102 Any block which has incoming edges threaded to outgoing edges
2103 will have its entry in THREADED_BLOCK set.
2105 Any threaded edge will have its new outgoing edge stored in the
2106 original edge's AUX field.
2108 This form avoids the need to walk all the edges in the CFG to
2109 discover blocks which need processing and avoids unnecessary
2110 hash table lookups to map from threaded edge to new target. */
2113 mark_threaded_blocks (bitmap threaded_blocks
)
2117 bitmap tmp
= BITMAP_ALLOC (NULL
);
2122 /* It is possible to have jump threads in which one is a subpath
2123 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2124 block and (B, C), (C, D) where no joiner block exists.
2126 When this occurs ignore the jump thread request with the joiner
2127 block. It's totally subsumed by the simpler jump thread request.
2129 This results in less block copying, simpler CFGs. More importantly,
2130 when we duplicate the joiner block, B, in this case we will create
2131 a new threading opportunity that we wouldn't be able to optimize
2132 until the next jump threading iteration.
2134 So first convert the jump thread requests which do not require a
2136 for (i
= 0; i
< paths
.length (); i
++)
2138 vec
<jump_thread_edge
*> *path
= paths
[i
];
2140 if ((*path
)[1]->type
!= EDGE_COPY_SRC_JOINER_BLOCK
)
2142 edge e
= (*path
)[0]->e
;
2143 e
->aux
= (void *)path
;
2144 bitmap_set_bit (tmp
, e
->dest
->index
);
2148 /* Now iterate again, converting cases where we want to thread
2149 through a joiner block, but only if no other edge on the path
2150 already has a jump thread attached to it. We do this in two passes,
2151 to avoid situations where the order in the paths vec can hide overlapping
2152 threads (the path is recorded on the incoming edge, so we would miss
2153 cases where the second path starts at a downstream edge on the same
2154 path). First record all joiner paths, deleting any in the unexpected
2155 case where there is already a path for that incoming edge. */
2156 for (i
= 0; i
< paths
.length ();)
2158 vec
<jump_thread_edge
*> *path
= paths
[i
];
2160 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
2162 /* Attach the path to the starting edge if none is yet recorded. */
2163 if ((*path
)[0]->e
->aux
== NULL
)
2165 (*path
)[0]->e
->aux
= path
;
2170 paths
.unordered_remove (i
);
2171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2172 dump_jump_thread_path (dump_file
, *path
, false);
2173 delete_jump_thread_path (path
);
2182 /* Second, look for paths that have any other jump thread attached to
2183 them, and either finish converting them or cancel them. */
2184 for (i
= 0; i
< paths
.length ();)
2186 vec
<jump_thread_edge
*> *path
= paths
[i
];
2187 edge e
= (*path
)[0]->e
;
2189 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& e
->aux
== path
)
2192 for (j
= 1; j
< path
->length (); j
++)
2193 if ((*path
)[j
]->e
->aux
!= NULL
)
2196 /* If we iterated through the entire path without exiting the loop,
2197 then we are good to go, record it. */
2198 if (j
== path
->length ())
2200 bitmap_set_bit (tmp
, e
->dest
->index
);
2206 paths
.unordered_remove (i
);
2207 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2208 dump_jump_thread_path (dump_file
, *path
, false);
2209 delete_jump_thread_path (path
);
2218 /* If optimizing for size, only thread through block if we don't have
2219 to duplicate it or it's an otherwise empty redirection block. */
2220 if (optimize_function_for_size_p (cfun
))
2222 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2224 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2225 if (EDGE_COUNT (bb
->preds
) > 1
2226 && !redirection_block_p (bb
))
2228 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2232 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2233 delete_jump_thread_path (path
);
2239 bitmap_set_bit (threaded_blocks
, i
);
2243 bitmap_copy (threaded_blocks
, tmp
);
2245 /* Look for jump threading paths which cross multiple loop headers.
2247 The code to thread through loop headers will change the CFG in ways
2248 that break assumptions made by the loop optimization code.
2250 We don't want to blindly cancel the requests. We can instead do better
2251 by trimming off the end of the jump thread path. */
2252 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2254 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2255 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2259 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2261 for (unsigned int i
= 0, crossed_headers
= 0;
2262 i
< path
->length ();
2265 basic_block dest
= (*path
)[i
]->e
->dest
;
2266 crossed_headers
+= (dest
== dest
->loop_father
->header
);
2267 if (crossed_headers
> 1)
2269 /* Trim from entry I onwards. */
2270 for (unsigned int j
= i
; j
< path
->length (); j
++)
2274 /* Now that we've truncated the path, make sure
2275 what's left is still valid. We need at least
2276 two edges on the path and the last edge can not
2277 be a joiner. This should never happen, but let's
2279 if (path
->length () < 2
2280 || (path
->last ()->type
2281 == EDGE_COPY_SRC_JOINER_BLOCK
))
2283 delete_jump_thread_path (path
);
2293 /* If we have a joiner block (J) which has two successors S1 and S2 and
2294 we are threading though S1 and the final destination of the thread
2295 is S2, then we must verify that any PHI nodes in S2 have the same
2296 PHI arguments for the edge J->S2 and J->S1->...->S2.
2298 We used to detect this prior to registering the jump thread, but
2299 that prohibits propagation of edge equivalences into non-dominated
2300 PHI nodes as the equivalency test might occur before propagation.
2302 This must also occur after we truncate any jump threading paths
2303 as this scenario may only show up after truncation.
2305 This works for now, but will need improvement as part of the FSA
2308 Note since we've moved the thread request data to the edges,
2309 we have to iterate on those rather than the threaded_edges vector. */
2310 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2312 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2313 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2317 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2318 bool have_joiner
= ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
);
2322 basic_block joiner
= e
->dest
;
2323 edge final_edge
= path
->last ()->e
;
2324 basic_block final_dest
= final_edge
->dest
;
2325 edge e2
= find_edge (joiner
, final_dest
);
2327 if (e2
&& !phi_args_equal_on_edges (e2
, final_edge
))
2329 delete_jump_thread_path (path
);
2341 /* Return TRUE if BB ends with a switch statement or a computed goto.
2342 Otherwise return false. */
2344 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED
)
2346 gimple
*stmt
= last_stmt (bb
);
2347 if (stmt
&& gimple_code (stmt
) == GIMPLE_SWITCH
)
2349 if (stmt
&& gimple_code (stmt
) == GIMPLE_GOTO
2350 && TREE_CODE (gimple_goto_dest (stmt
)) == SSA_NAME
)
2355 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2356 case of SEME Single Entry Multiple Exits region in which all nodes in the
2357 REGION have exactly one incoming edge. The only exception is the first block
2358 that may not have been connected to the rest of the cfg yet. */
2361 verify_jump_thread (basic_block
*region
, unsigned n_region
)
2363 for (unsigned i
= 0; i
< n_region
; i
++)
2364 gcc_assert (EDGE_COUNT (region
[i
]->preds
) <= 1);
2367 /* Return true when BB is one of the first N items in BBS. */
2370 bb_in_bbs (basic_block bb
, basic_block
*bbs
, int n
)
2372 for (int i
= 0; i
< n
; i
++)
2379 /* Duplicates a jump-thread path of N_REGION basic blocks.
2380 The ENTRY edge is redirected to the duplicate of the region.
2382 Remove the last conditional statement in the last basic block in the REGION,
2383 and create a single fallthru edge pointing to the same destination as the
2386 The new basic blocks are stored to REGION_COPY in the same order as they had
2387 in REGION, provided that REGION_COPY is not NULL.
2389 Returns false if it is unable to copy the region, true otherwise. */
2392 duplicate_thread_path (edge entry
, edge exit
,
2393 basic_block
*region
, unsigned n_region
,
2394 basic_block
*region_copy
)
2397 bool free_region_copy
= false;
2398 struct loop
*loop
= entry
->dest
->loop_father
;
2401 int total_freq
= 0, entry_freq
= 0;
2402 gcov_type total_count
= 0, entry_count
= 0;
2404 if (!can_copy_bbs_p (region
, n_region
))
2407 /* Some sanity checking. Note that we do not check for all possible
2408 missuses of the functions. I.e. if you ask to copy something weird,
2409 it will work, but the state of structures probably will not be
2411 for (i
= 0; i
< n_region
; i
++)
2413 /* We do not handle subloops, i.e. all the blocks must belong to the
2415 if (region
[i
]->loop_father
!= loop
)
2419 initialize_original_copy_tables ();
2421 set_loop_copy (loop
, loop
);
2425 region_copy
= XNEWVEC (basic_block
, n_region
);
2426 free_region_copy
= true;
2429 if (entry
->dest
->count
)
2431 total_count
= entry
->dest
->count
;
2432 entry_count
= entry
->count
;
2433 /* Fix up corner cases, to avoid division by zero or creation of negative
2435 if (entry_count
> total_count
)
2436 entry_count
= total_count
;
2440 total_freq
= entry
->dest
->frequency
;
2441 entry_freq
= EDGE_FREQUENCY (entry
);
2442 /* Fix up corner cases, to avoid division by zero or creation of negative
2444 if (total_freq
== 0)
2446 else if (entry_freq
> total_freq
)
2447 entry_freq
= total_freq
;
2450 copy_bbs (region
, n_region
, region_copy
, &exit
, 1, &exit_copy
, loop
,
2451 split_edge_bb_loc (entry
), false);
2453 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2454 following code ensures that all the edges exiting the jump-thread path are
2455 redirected back to the original code: these edges are exceptions
2456 invalidating the property that is propagated by executing all the blocks of
2457 the jump-thread path in order. */
2459 for (i
= 0; i
< n_region
; i
++)
2463 basic_block bb
= region_copy
[i
];
2465 if (single_succ_p (bb
))
2467 /* Make sure the successor is the next node in the path. */
2468 gcc_assert (i
+ 1 == n_region
2469 || region_copy
[i
+ 1] == single_succ_edge (bb
)->dest
);
2473 /* Special case the last block on the path: make sure that it does not
2474 jump back on the copied path. */
2475 if (i
+ 1 == n_region
)
2477 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2478 if (bb_in_bbs (e
->dest
, region_copy
, n_region
- 1))
2480 basic_block orig
= get_bb_original (e
->dest
);
2482 redirect_edge_and_branch_force (e
, orig
);
2487 /* Redirect all other edges jumping to non-adjacent blocks back to the
2489 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2490 if (region_copy
[i
+ 1] != e
->dest
)
2492 basic_block orig
= get_bb_original (e
->dest
);
2494 redirect_edge_and_branch_force (e
, orig
);
2500 scale_bbs_frequencies_gcov_type (region
, n_region
,
2501 total_count
- entry_count
,
2503 scale_bbs_frequencies_gcov_type (region_copy
, n_region
, entry_count
,
2508 scale_bbs_frequencies_int (region
, n_region
, total_freq
- entry_freq
,
2510 scale_bbs_frequencies_int (region_copy
, n_region
, entry_freq
, total_freq
);
2514 verify_jump_thread (region_copy
, n_region
);
2516 /* Remove the last branch in the jump thread path. */
2517 remove_ctrl_stmt_and_useless_edges (region_copy
[n_region
- 1], exit
->dest
);
2519 /* And fixup the flags on the single remaining edge. */
2520 edge fix_e
= find_edge (region_copy
[n_region
- 1], exit
->dest
);
2521 fix_e
->flags
&= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
| EDGE_ABNORMAL
);
2522 fix_e
->flags
|= EDGE_FALLTHRU
;
2524 edge e
= make_edge (region_copy
[n_region
- 1], exit
->dest
, EDGE_FALLTHRU
);
2527 rescan_loop_exit (e
, true, false);
2528 e
->probability
= REG_BR_PROB_BASE
;
2529 e
->count
= region_copy
[n_region
- 1]->count
;
2532 /* Redirect the entry and add the phi node arguments. */
2533 if (entry
->dest
== loop
->header
)
2534 mark_loop_for_removal (loop
);
2535 redirected
= redirect_edge_and_branch (entry
, get_bb_copy (entry
->dest
));
2536 gcc_assert (redirected
!= NULL
);
2537 flush_pending_stmts (entry
);
2539 /* Add the other PHI node arguments. */
2540 add_phi_args_after_copy (region_copy
, n_region
, NULL
);
2542 if (free_region_copy
)
2545 free_original_copy_tables ();
2549 /* Return true when PATH is a valid jump-thread path. */
2552 valid_jump_thread_path (vec
<jump_thread_edge
*> *path
)
2554 unsigned len
= path
->length ();
2555 bool multiway_branch
= false;
2557 /* Check that the path is connected and see if there's a multi-way
2558 branch on the path. */
2559 for (unsigned int j
= 0; j
< len
- 1; j
++)
2561 if ((*path
)[j
]->e
->dest
!= (*path
)[j
+1]->e
->src
)
2563 gimple
*last
= last_stmt ((*path
)[j
]->e
->dest
);
2564 multiway_branch
|= (last
&& gimple_code (last
) == GIMPLE_SWITCH
);
2567 /* If we are trying to thread the loop latch to a block that does
2568 not dominate the loop latch, then that will create an irreducible
2569 loop. We avoid that unless the jump thread has a multi-way
2570 branch, in which case we have deemed it worth losing other
2571 loop optimizations later if we can eliminate the multi-way branch. */
2572 edge e
= (*path
)[0]->e
;
2573 struct loop
*loop
= e
->dest
->loop_father
;
2574 if (!multiway_branch
2576 && loop_latch_edge (loop
) == e
2577 && (determine_bb_domination_status (loop
, path
->last ()->e
->dest
)
2578 == DOMST_NONDOMINATING
))
2584 /* Remove any queued jump threads that include edge E.
2586 We don't actually remove them here, just record the edges into ax
2587 hash table. That way we can do the search once per iteration of
2588 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2591 remove_jump_threads_including (edge_def
*e
)
2593 if (!paths
.exists ())
2597 removed_edges
= new hash_table
<struct removed_edges
> (17);
2599 edge
*slot
= removed_edges
->find_slot (e
, INSERT
);
2603 /* Walk through all blocks and thread incoming edges to the appropriate
2604 outgoing edge for each edge pair recorded in THREADED_EDGES.
2606 It is the caller's responsibility to fix the dominance information
2607 and rewrite duplicated SSA_NAMEs back into SSA form.
2609 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2610 loop headers if it does not simplify the loop.
2612 Returns true if one or more edges were threaded, false otherwise. */
2615 thread_through_all_blocks (bool may_peel_loop_headers
)
2617 bool retval
= false;
2620 bitmap threaded_blocks
;
2623 if (!paths
.exists ())
2629 threaded_blocks
= BITMAP_ALLOC (NULL
);
2630 memset (&thread_stats
, 0, sizeof (thread_stats
));
2632 /* Remove any paths that referenced removed edges. */
2634 for (i
= 0; i
< paths
.length (); )
2637 vec
<jump_thread_edge
*> *path
= paths
[i
];
2639 for (j
= 0; j
< path
->length (); j
++)
2641 edge e
= (*path
)[j
]->e
;
2642 if (removed_edges
->find_slot (e
, NO_INSERT
))
2646 if (j
!= path
->length ())
2648 delete_jump_thread_path (path
);
2649 paths
.unordered_remove (i
);
2655 /* Jump-thread all FSM threads before other jump-threads. */
2656 for (i
= 0; i
< paths
.length ();)
2658 vec
<jump_thread_edge
*> *path
= paths
[i
];
2659 edge entry
= (*path
)[0]->e
;
2661 /* Only code-generate FSM jump-threads in this loop. */
2662 if ((*path
)[0]->type
!= EDGE_FSM_THREAD
)
2668 /* Do not jump-thread twice from the same block. */
2669 if (bitmap_bit_p (threaded_blocks
, entry
->src
->index
)
2670 /* Verify that the jump thread path is still valid: a
2671 previous jump-thread may have changed the CFG, and
2672 invalidated the current path or the requested jump
2673 thread might create irreducible loops which should
2674 generally be avoided. */
2675 || !valid_jump_thread_path (path
))
2677 /* Remove invalid FSM jump-thread paths. */
2678 delete_jump_thread_path (path
);
2679 paths
.unordered_remove (i
);
2683 unsigned len
= path
->length ();
2684 edge exit
= (*path
)[len
- 1]->e
;
2685 basic_block
*region
= XNEWVEC (basic_block
, len
- 1);
2687 for (unsigned int j
= 0; j
< len
- 1; j
++)
2688 region
[j
] = (*path
)[j
]->e
->dest
;
2690 if (duplicate_thread_path (entry
, exit
, region
, len
- 1, NULL
))
2692 /* We do not update dominance info. */
2693 free_dominance_info (CDI_DOMINATORS
);
2694 bitmap_set_bit (threaded_blocks
, entry
->src
->index
);
2696 thread_stats
.num_threaded_edges
++;
2699 delete_jump_thread_path (path
);
2700 paths
.unordered_remove (i
);
2703 /* Remove from PATHS all the jump-threads starting with an edge already
2705 for (i
= 0; i
< paths
.length ();)
2707 vec
<jump_thread_edge
*> *path
= paths
[i
];
2708 edge entry
= (*path
)[0]->e
;
2710 /* Do not jump-thread twice from the same block. */
2711 if (bitmap_bit_p (threaded_blocks
, entry
->src
->index
))
2713 delete_jump_thread_path (path
);
2714 paths
.unordered_remove (i
);
2720 bitmap_clear (threaded_blocks
);
2722 mark_threaded_blocks (threaded_blocks
);
2724 initialize_original_copy_tables ();
2726 /* First perform the threading requests that do not affect
2728 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks
, 0, i
, bi
)
2730 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2732 if (EDGE_COUNT (bb
->preds
) > 0)
2733 retval
|= thread_block (bb
, true);
2736 /* Then perform the threading through loop headers. We start with the
2737 innermost loop, so that the changes in cfg we perform won't affect
2738 further threading. */
2739 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
2742 || !bitmap_bit_p (threaded_blocks
, loop
->header
->index
))
2745 retval
|= thread_through_loop_header (loop
, may_peel_loop_headers
);
2748 /* Any jump threading paths that are still attached to edges at this
2749 point must be one of two cases.
2751 First, we could have a jump threading path which went from outside
2752 a loop to inside a loop that was ignored because a prior jump thread
2753 across a backedge was realized (which indirectly causes the loop
2754 above to ignore the latter thread). We can detect these because the
2755 loop structures will be different and we do not currently try to
2758 Second, we could be threading across a backedge to a point within the
2759 same loop. This occurrs for the FSA/FSM optimization and we would
2760 like to optimize it. However, we have to be very careful as this
2761 may completely scramble the loop structures, with the result being
2762 irreducible loops causing us to throw away our loop structure.
2764 As a compromise for the latter case, if the thread path ends in
2765 a block where the last statement is a multiway branch, then go
2766 ahead and thread it, else ignore it. */
2769 FOR_EACH_BB_FN (bb
, cfun
)
2771 /* If we do end up threading here, we can remove elements from
2772 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2773 for (edge_iterator ei
= ei_start (bb
->preds
);
2774 (e
= ei_safe_edge (ei
));)
2777 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2779 /* Case 1, threading from outside to inside the loop
2780 after we'd already threaded through the header. */
2781 if ((*path
)[0]->e
->dest
->loop_father
2782 != path
->last ()->e
->src
->loop_father
)
2784 delete_jump_thread_path (path
);
2788 else if (bb_ends_with_multiway_branch (path
->last ()->e
->src
))
2790 /* The code to thread through loop headers may have
2791 split a block with jump threads attached to it.
2793 We can identify this with a disjoint jump threading
2794 path. If found, just remove it. */
2795 for (unsigned int i
= 0; i
< path
->length () - 1; i
++)
2796 if ((*path
)[i
]->e
->dest
!= (*path
)[i
+ 1]->e
->src
)
2798 delete_jump_thread_path (path
);
2804 /* Our path is still valid, thread it. */
2807 if (thread_block ((*path
)[0]->e
->dest
, false))
2811 delete_jump_thread_path (path
);
2819 delete_jump_thread_path (path
);
2828 statistics_counter_event (cfun
, "Jumps threaded",
2829 thread_stats
.num_threaded_edges
);
2831 free_original_copy_tables ();
2833 BITMAP_FREE (threaded_blocks
);
2834 threaded_blocks
= NULL
;
2838 loops_state_set (LOOPS_NEED_FIXUP
);
2841 delete removed_edges
;
2842 removed_edges
= NULL
;
2846 /* Delete the jump threading path PATH. We have to explcitly delete
2847 each entry in the vector, then the container. */
2850 delete_jump_thread_path (vec
<jump_thread_edge
*> *path
)
2852 for (unsigned int i
= 0; i
< path
->length (); i
++)
2858 /* Register a jump threading opportunity. We queue up all the jump
2859 threading opportunities discovered by a pass and update the CFG
2860 and SSA form all at once.
2862 E is the edge we can thread, E2 is the new target edge, i.e., we
2863 are effectively recording that E->dest can be changed to E2->dest
2864 after fixing the SSA graph. */
2867 register_jump_thread (vec
<jump_thread_edge
*> *path
)
2869 if (!dbg_cnt (registered_jump_thread
))
2871 delete_jump_thread_path (path
);
2875 /* First make sure there are no NULL outgoing edges on the jump threading
2876 path. That can happen for jumping to a constant address. */
2877 for (unsigned int i
= 0; i
< path
->length (); i
++)
2878 if ((*path
)[i
]->e
== NULL
)
2880 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2883 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2884 dump_jump_thread_path (dump_file
, *path
, false);
2887 delete_jump_thread_path (path
);
2891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2892 dump_jump_thread_path (dump_file
, *path
, true);
2894 if (!paths
.exists ())
2897 paths
.safe_push (path
);