]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-threadupdate.c
[PATCH 7/9] ENABLE_CHECKING refactoring: middle-end, LTO FE
[thirdparty/gcc.git] / gcc / tree-ssa-threadupdate.c
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "hard-reg-set.h"
29 #include "ssa.h"
30 #include "options.h"
31 #include "fold-const.h"
32 #include "flags.h"
33 #include "cfganal.h"
34 #include "internal-fn.h"
35 #include "gimple-iterator.h"
36 #include "tree-ssa.h"
37 #include "tree-ssa-threadupdate.h"
38 #include "dumpfile.h"
39 #include "cfgloop.h"
40 #include "dbgcnt.h"
41 #include "tree-cfg.h"
42 #include "tree-pass.h"
43
44 /* Given a block B, update the CFG and SSA graph to reflect redirecting
45 one or more in-edges to B to instead reach the destination of an
46 out-edge from B while preserving any side effects in B.
47
48 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
49 side effects of executing B.
50
51 1. Make a copy of B (including its outgoing edges and statements). Call
52 the copy B'. Note B' has no incoming edges or PHIs at this time.
53
54 2. Remove the control statement at the end of B' and all outgoing edges
55 except B'->C.
56
57 3. Add a new argument to each PHI in C with the same value as the existing
58 argument associated with edge B->C. Associate the new PHI arguments
59 with the edge B'->C.
60
61 4. For each PHI in B, find or create a PHI in B' with an identical
62 PHI_RESULT. Add an argument to the PHI in B' which has the same
63 value as the PHI in B associated with the edge A->B. Associate
64 the new argument in the PHI in B' with the edge A->B.
65
66 5. Change the edge A->B to A->B'.
67
68 5a. This automatically deletes any PHI arguments associated with the
69 edge A->B in B.
70
71 5b. This automatically associates each new argument added in step 4
72 with the edge A->B'.
73
74 6. Repeat for other incoming edges into B.
75
76 7. Put the duplicated resources in B and all the B' blocks into SSA form.
77
78 Note that block duplication can be minimized by first collecting the
79 set of unique destination blocks that the incoming edges should
80 be threaded to.
81
82 We reduce the number of edges and statements we create by not copying all
83 the outgoing edges and the control statement in step #1. We instead create
84 a template block without the outgoing edges and duplicate the template.
85
86 Another case this code handles is threading through a "joiner" block. In
87 this case, we do not know the destination of the joiner block, but one
88 of the outgoing edges from the joiner block leads to a threadable path. This
89 case largely works as outlined above, except the duplicate of the joiner
90 block still contains a full set of outgoing edges and its control statement.
91 We just redirect one of its outgoing edges to our jump threading path. */
92
93
94 /* Steps #5 and #6 of the above algorithm are best implemented by walking
95 all the incoming edges which thread to the same destination edge at
96 the same time. That avoids lots of table lookups to get information
97 for the destination edge.
98
99 To realize that implementation we create a list of incoming edges
100 which thread to the same outgoing edge. Thus to implement steps
101 #5 and #6 we traverse our hash table of outgoing edge information.
102 For each entry we walk the list of incoming edges which thread to
103 the current outgoing edge. */
104
105 struct el
106 {
107 edge e;
108 struct el *next;
109 };
110
111 /* Main data structure recording information regarding B's duplicate
112 blocks. */
113
114 /* We need to efficiently record the unique thread destinations of this
115 block and specific information associated with those destinations. We
116 may have many incoming edges threaded to the same outgoing edge. This
117 can be naturally implemented with a hash table. */
118
119 struct redirection_data : free_ptr_hash<redirection_data>
120 {
121 /* We support wiring up two block duplicates in a jump threading path.
122
123 One is a normal block copy where we remove the control statement
124 and wire up its single remaining outgoing edge to the thread path.
125
126 The other is a joiner block where we leave the control statement
127 in place, but wire one of the outgoing edges to a thread path.
128
129 In theory we could have multiple block duplicates in a jump
130 threading path, but I haven't tried that.
131
132 The duplicate blocks appear in this array in the same order in
133 which they appear in the jump thread path. */
134 basic_block dup_blocks[2];
135
136 /* The jump threading path. */
137 vec<jump_thread_edge *> *path;
138
139 /* A list of incoming edges which we want to thread to the
140 same path. */
141 struct el *incoming_edges;
142
143 /* hash_table support. */
144 static inline hashval_t hash (const redirection_data *);
145 static inline int equal (const redirection_data *, const redirection_data *);
146 };
147
148 /* Dump a jump threading path, including annotations about each
149 edge in the path. */
150
151 static void
152 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
153 bool registering)
154 {
155 fprintf (dump_file,
156 " %s%s jump thread: (%d, %d) incoming edge; ",
157 (registering ? "Registering" : "Cancelling"),
158 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
159 path[0]->e->src->index, path[0]->e->dest->index);
160
161 for (unsigned int i = 1; i < path.length (); i++)
162 {
163 /* We can get paths with a NULL edge when the final destination
164 of a jump thread turns out to be a constant address. We dump
165 those paths when debugging, so we have to be prepared for that
166 possibility here. */
167 if (path[i]->e == NULL)
168 continue;
169
170 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
171 fprintf (dump_file, " (%d, %d) joiner; ",
172 path[i]->e->src->index, path[i]->e->dest->index);
173 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
174 fprintf (dump_file, " (%d, %d) normal;",
175 path[i]->e->src->index, path[i]->e->dest->index);
176 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
177 fprintf (dump_file, " (%d, %d) nocopy;",
178 path[i]->e->src->index, path[i]->e->dest->index);
179 if (path[0]->type == EDGE_FSM_THREAD)
180 fprintf (dump_file, " (%d, %d) ",
181 path[i]->e->src->index, path[i]->e->dest->index);
182 }
183 fputc ('\n', dump_file);
184 }
185
186 /* Simple hashing function. For any given incoming edge E, we're going
187 to be most concerned with the final destination of its jump thread
188 path. So hash on the block index of the final edge in the path. */
189
190 inline hashval_t
191 redirection_data::hash (const redirection_data *p)
192 {
193 vec<jump_thread_edge *> *path = p->path;
194 return path->last ()->e->dest->index;
195 }
196
197 /* Given two hash table entries, return true if they have the same
198 jump threading path. */
199 inline int
200 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
201 {
202 vec<jump_thread_edge *> *path1 = p1->path;
203 vec<jump_thread_edge *> *path2 = p2->path;
204
205 if (path1->length () != path2->length ())
206 return false;
207
208 for (unsigned int i = 1; i < path1->length (); i++)
209 {
210 if ((*path1)[i]->type != (*path2)[i]->type
211 || (*path1)[i]->e != (*path2)[i]->e)
212 return false;
213 }
214
215 return true;
216 }
217
218 /* Rather than search all the edges in jump thread paths each time
219 DOM is able to simply if control statement, we build a hash table
220 with the deleted edges. We only care about the address of the edge,
221 not its contents. */
222 struct removed_edges : nofree_ptr_hash<edge_def>
223 {
224 static hashval_t hash (edge e) { return htab_hash_pointer (e); }
225 static bool equal (edge e1, edge e2) { return e1 == e2; }
226 };
227
228 static hash_table<removed_edges> *removed_edges;
229
230 /* Data structure of information to pass to hash table traversal routines. */
231 struct ssa_local_info_t
232 {
233 /* The current block we are working on. */
234 basic_block bb;
235
236 /* We only create a template block for the first duplicated block in a
237 jump threading path as we may need many duplicates of that block.
238
239 The second duplicate block in a path is specific to that path. Creating
240 and sharing a template for that block is considerably more difficult. */
241 basic_block template_block;
242
243 /* TRUE if we thread one or more jumps, FALSE otherwise. */
244 bool jumps_threaded;
245
246 /* Blocks duplicated for the thread. */
247 bitmap duplicate_blocks;
248 };
249
250 /* Passes which use the jump threading code register jump threading
251 opportunities as they are discovered. We keep the registered
252 jump threading opportunities in this vector as edge pairs
253 (original_edge, target_edge). */
254 static vec<vec<jump_thread_edge *> *> paths;
255
256 /* When we start updating the CFG for threading, data necessary for jump
257 threading is attached to the AUX field for the incoming edge. Use these
258 macros to access the underlying structure attached to the AUX field. */
259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
260
261 /* Jump threading statistics. */
262
263 struct thread_stats_d
264 {
265 unsigned long num_threaded_edges;
266 };
267
268 struct thread_stats_d thread_stats;
269
270
271 /* Remove the last statement in block BB if it is a control statement
272 Also remove all outgoing edges except the edge which reaches DEST_BB.
273 If DEST_BB is NULL, then remove all outgoing edges. */
274
275 void
276 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
277 {
278 gimple_stmt_iterator gsi;
279 edge e;
280 edge_iterator ei;
281
282 gsi = gsi_last_bb (bb);
283
284 /* If the duplicate ends with a control statement, then remove it.
285
286 Note that if we are duplicating the template block rather than the
287 original basic block, then the duplicate might not have any real
288 statements in it. */
289 if (!gsi_end_p (gsi)
290 && gsi_stmt (gsi)
291 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
292 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
293 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
294 gsi_remove (&gsi, true);
295
296 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
297 {
298 if (e->dest != dest_bb)
299 remove_edge (e);
300 else
301 ei_next (&ei);
302 }
303
304 /* If the remaining edge is a loop exit, there must have
305 a removed edge that was not a loop exit.
306
307 In that case BB and possibly other blocks were previously
308 in the loop, but are now outside the loop. Thus, we need
309 to update the loop structures. */
310 if (single_succ_p (bb)
311 && loop_outer (bb->loop_father)
312 && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
313 loops_state_set (LOOPS_NEED_FIXUP);
314 }
315
316 /* Create a duplicate of BB. Record the duplicate block in an array
317 indexed by COUNT stored in RD. */
318
319 static void
320 create_block_for_threading (basic_block bb,
321 struct redirection_data *rd,
322 unsigned int count,
323 bitmap *duplicate_blocks)
324 {
325 edge_iterator ei;
326 edge e;
327
328 /* We can use the generic block duplication code and simply remove
329 the stuff we do not need. */
330 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
331
332 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
333 e->aux = NULL;
334
335 /* Zero out the profile, since the block is unreachable for now. */
336 rd->dup_blocks[count]->frequency = 0;
337 rd->dup_blocks[count]->count = 0;
338 if (duplicate_blocks)
339 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
340 }
341
342 /* Main data structure to hold information for duplicates of BB. */
343
344 static hash_table<redirection_data> *redirection_data;
345
346 /* Given an outgoing edge E lookup and return its entry in our hash table.
347
348 If INSERT is true, then we insert the entry into the hash table if
349 it is not already present. INCOMING_EDGE is added to the list of incoming
350 edges associated with E in the hash table. */
351
352 static struct redirection_data *
353 lookup_redirection_data (edge e, enum insert_option insert)
354 {
355 struct redirection_data **slot;
356 struct redirection_data *elt;
357 vec<jump_thread_edge *> *path = THREAD_PATH (e);
358
359 /* Build a hash table element so we can see if E is already
360 in the table. */
361 elt = XNEW (struct redirection_data);
362 elt->path = path;
363 elt->dup_blocks[0] = NULL;
364 elt->dup_blocks[1] = NULL;
365 elt->incoming_edges = NULL;
366
367 slot = redirection_data->find_slot (elt, insert);
368
369 /* This will only happen if INSERT is false and the entry is not
370 in the hash table. */
371 if (slot == NULL)
372 {
373 free (elt);
374 return NULL;
375 }
376
377 /* This will only happen if E was not in the hash table and
378 INSERT is true. */
379 if (*slot == NULL)
380 {
381 *slot = elt;
382 elt->incoming_edges = XNEW (struct el);
383 elt->incoming_edges->e = e;
384 elt->incoming_edges->next = NULL;
385 return elt;
386 }
387 /* E was in the hash table. */
388 else
389 {
390 /* Free ELT as we do not need it anymore, we will extract the
391 relevant entry from the hash table itself. */
392 free (elt);
393
394 /* Get the entry stored in the hash table. */
395 elt = *slot;
396
397 /* If insertion was requested, then we need to add INCOMING_EDGE
398 to the list of incoming edges associated with E. */
399 if (insert)
400 {
401 struct el *el = XNEW (struct el);
402 el->next = elt->incoming_edges;
403 el->e = e;
404 elt->incoming_edges = el;
405 }
406
407 return elt;
408 }
409 }
410
411 /* Similar to copy_phi_args, except that the PHI arg exists, it just
412 does not have a value associated with it. */
413
414 static void
415 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
416 {
417 int src_idx = src_e->dest_idx;
418 int tgt_idx = tgt_e->dest_idx;
419
420 /* Iterate over each PHI in e->dest. */
421 for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
422 gsi2 = gsi_start_phis (tgt_e->dest);
423 !gsi_end_p (gsi);
424 gsi_next (&gsi), gsi_next (&gsi2))
425 {
426 gphi *src_phi = gsi.phi ();
427 gphi *dest_phi = gsi2.phi ();
428 tree val = gimple_phi_arg_def (src_phi, src_idx);
429 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
430
431 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
432 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
433 }
434 }
435
436 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
437 to see if it has constant value in a flow sensitive manner. Set
438 LOCUS to location of the constant phi arg and return the value.
439 Return DEF directly if either PATH or idx is ZERO. */
440
441 static tree
442 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
443 basic_block bb, int idx, source_location *locus)
444 {
445 tree arg;
446 gphi *def_phi;
447 basic_block def_bb;
448
449 if (path == NULL || idx == 0)
450 return def;
451
452 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
453 if (!def_phi)
454 return def;
455
456 def_bb = gimple_bb (def_phi);
457 /* Don't propagate loop invariants into deeper loops. */
458 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
459 return def;
460
461 /* Backtrack jump threading path from IDX to see if def has constant
462 value. */
463 for (int j = idx - 1; j >= 0; j--)
464 {
465 edge e = (*path)[j]->e;
466 if (e->dest == def_bb)
467 {
468 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
469 if (is_gimple_min_invariant (arg))
470 {
471 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
472 return arg;
473 }
474 break;
475 }
476 }
477
478 return def;
479 }
480
481 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
482 Try to backtrack jump threading PATH from node IDX to see if the arg
483 has constant value, copy constant value instead of argument itself
484 if yes. */
485
486 static void
487 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
488 vec<jump_thread_edge *> *path, int idx)
489 {
490 gphi_iterator gsi;
491 int src_indx = src_e->dest_idx;
492
493 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
494 {
495 gphi *phi = gsi.phi ();
496 tree def = gimple_phi_arg_def (phi, src_indx);
497 source_location locus = gimple_phi_arg_location (phi, src_indx);
498
499 if (TREE_CODE (def) == SSA_NAME
500 && !virtual_operand_p (gimple_phi_result (phi)))
501 def = get_value_locus_in_path (def, path, bb, idx, &locus);
502
503 add_phi_arg (phi, def, tgt_e, locus);
504 }
505 }
506
507 /* We have recently made a copy of ORIG_BB, including its outgoing
508 edges. The copy is NEW_BB. Every PHI node in every direct successor of
509 ORIG_BB has a new argument associated with edge from NEW_BB to the
510 successor. Initialize the PHI argument so that it is equal to the PHI
511 argument associated with the edge from ORIG_BB to the successor.
512 PATH and IDX are used to check if the new PHI argument has constant
513 value in a flow sensitive manner. */
514
515 static void
516 update_destination_phis (basic_block orig_bb, basic_block new_bb,
517 vec<jump_thread_edge *> *path, int idx)
518 {
519 edge_iterator ei;
520 edge e;
521
522 FOR_EACH_EDGE (e, ei, orig_bb->succs)
523 {
524 edge e2 = find_edge (new_bb, e->dest);
525 copy_phi_args (e->dest, e, e2, path, idx);
526 }
527 }
528
529 /* Given a duplicate block and its single destination (both stored
530 in RD). Create an edge between the duplicate and its single
531 destination.
532
533 Add an additional argument to any PHI nodes at the single
534 destination. IDX is the start node in jump threading path
535 we start to check to see if the new PHI argument has constant
536 value along the jump threading path. */
537
538 static void
539 create_edge_and_update_destination_phis (struct redirection_data *rd,
540 basic_block bb, int idx)
541 {
542 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
543
544 rescan_loop_exit (e, true, false);
545 e->probability = REG_BR_PROB_BASE;
546 e->count = bb->count;
547
548 /* We used to copy the thread path here. That was added in 2007
549 and dutifully updated through the representation changes in 2013.
550
551 In 2013 we added code to thread from an interior node through
552 the backedge to another interior node. That runs after the code
553 to thread through loop headers from outside the loop.
554
555 The latter may delete edges in the CFG, including those
556 which appeared in the jump threading path we copied here. Thus
557 we'd end up using a dangling pointer.
558
559 After reviewing the 2007/2011 code, I can't see how anything
560 depended on copying the AUX field and clearly copying the jump
561 threading path is problematical due to embedded edge pointers.
562 It has been removed. */
563 e->aux = NULL;
564
565 /* If there are any PHI nodes at the destination of the outgoing edge
566 from the duplicate block, then we will need to add a new argument
567 to them. The argument should have the same value as the argument
568 associated with the outgoing edge stored in RD. */
569 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
570 }
571
572 /* Look through PATH beginning at START and return TRUE if there are
573 any additional blocks that need to be duplicated. Otherwise,
574 return FALSE. */
575 static bool
576 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
577 unsigned int start)
578 {
579 for (unsigned int i = start + 1; i < path->length (); i++)
580 {
581 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
582 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
583 return true;
584 }
585 return false;
586 }
587
588
589 /* Compute the amount of profile count/frequency coming into the jump threading
590 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
591 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
592 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
593 identify blocks duplicated for jump threading, which have duplicated
594 edges that need to be ignored in the analysis. Return true if path contains
595 a joiner, false otherwise.
596
597 In the non-joiner case, this is straightforward - all the counts/frequency
598 flowing into the jump threading path should flow through the duplicated
599 block and out of the duplicated path.
600
601 In the joiner case, it is very tricky. Some of the counts flowing into
602 the original path go offpath at the joiner. The problem is that while
603 we know how much total count goes off-path in the original control flow,
604 we don't know how many of the counts corresponding to just the jump
605 threading path go offpath at the joiner.
606
607 For example, assume we have the following control flow and identified
608 jump threading paths:
609
610 A B C
611 \ | /
612 Ea \ |Eb / Ec
613 \ | /
614 v v v
615 J <-- Joiner
616 / \
617 Eoff/ \Eon
618 / \
619 v v
620 Soff Son <--- Normal
621 /\
622 Ed/ \ Ee
623 / \
624 v v
625 D E
626
627 Jump threading paths: A -> J -> Son -> D (path 1)
628 C -> J -> Son -> E (path 2)
629
630 Note that the control flow could be more complicated:
631 - Each jump threading path may have more than one incoming edge. I.e. A and
632 Ea could represent multiple incoming blocks/edges that are included in
633 path 1.
634 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
635 before or after the "normal" copy block). These are not duplicated onto
636 the jump threading path, as they are single-successor.
637 - Any of the blocks along the path may have other incoming edges that
638 are not part of any jump threading path, but add profile counts along
639 the path.
640
641 In the aboe example, after all jump threading is complete, we will
642 end up with the following control flow:
643
644 A B C
645 | | |
646 Ea| |Eb |Ec
647 | | |
648 v v v
649 Ja J Jc
650 / \ / \Eon' / \
651 Eona/ \ ---/---\-------- \Eonc
652 / \ / / \ \
653 v v v v v
654 Sona Soff Son Sonc
655 \ /\ /
656 \___________ / \ _____/
657 \ / \/
658 vv v
659 D E
660
661 The main issue to notice here is that when we are processing path 1
662 (A->J->Son->D) we need to figure out the outgoing edge weights to
663 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
664 sum of the incoming weights to D remain Ed. The problem with simply
665 assuming that Ja (and Jc when processing path 2) has the same outgoing
666 probabilities to its successors as the original block J, is that after
667 all paths are processed and other edges/counts removed (e.g. none
668 of Ec will reach D after processing path 2), we may end up with not
669 enough count flowing along duplicated edge Sona->D.
670
671 Therefore, in the case of a joiner, we keep track of all counts
672 coming in along the current path, as well as from predecessors not
673 on any jump threading path (Eb in the above example). While we
674 first assume that the duplicated Eona for Ja->Sona has the same
675 probability as the original, we later compensate for other jump
676 threading paths that may eliminate edges. We do that by keep track
677 of all counts coming into the original path that are not in a jump
678 thread (Eb in the above example, but as noted earlier, there could
679 be other predecessors incoming to the path at various points, such
680 as at Son). Call this cumulative non-path count coming into the path
681 before D as Enonpath. We then ensure that the count from Sona->D is as at
682 least as big as (Ed - Enonpath), but no bigger than the minimum
683 weight along the jump threading path. The probabilities of both the
684 original and duplicated joiner block J and Ja will be adjusted
685 accordingly after the updates. */
686
687 static bool
688 compute_path_counts (struct redirection_data *rd,
689 ssa_local_info_t *local_info,
690 gcov_type *path_in_count_ptr,
691 gcov_type *path_out_count_ptr,
692 int *path_in_freq_ptr)
693 {
694 edge e = rd->incoming_edges->e;
695 vec<jump_thread_edge *> *path = THREAD_PATH (e);
696 edge elast = path->last ()->e;
697 gcov_type nonpath_count = 0;
698 bool has_joiner = false;
699 gcov_type path_in_count = 0;
700 int path_in_freq = 0;
701
702 /* Start by accumulating incoming edge counts to the path's first bb
703 into a couple buckets:
704 path_in_count: total count of incoming edges that flow into the
705 current path.
706 nonpath_count: total count of incoming edges that are not
707 flowing along *any* path. These are the counts
708 that will still flow along the original path after
709 all path duplication is done by potentially multiple
710 calls to this routine.
711 (any other incoming edge counts are for a different jump threading
712 path that will be handled by a later call to this routine.)
713 To make this easier, start by recording all incoming edges that flow into
714 the current path in a bitmap. We could add up the path's incoming edge
715 counts here, but we still need to walk all the first bb's incoming edges
716 below to add up the counts of the other edges not included in this jump
717 threading path. */
718 struct el *next, *el;
719 bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
720 for (el = rd->incoming_edges; el; el = next)
721 {
722 next = el->next;
723 bitmap_set_bit (in_edge_srcs, el->e->src->index);
724 }
725 edge ein;
726 edge_iterator ei;
727 FOR_EACH_EDGE (ein, ei, e->dest->preds)
728 {
729 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
730 /* Simply check the incoming edge src against the set captured above. */
731 if (ein_path
732 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
733 {
734 /* It is necessary but not sufficient that the last path edges
735 are identical. There may be different paths that share the
736 same last path edge in the case where the last edge has a nocopy
737 source block. */
738 gcc_assert (ein_path->last ()->e == elast);
739 path_in_count += ein->count;
740 path_in_freq += EDGE_FREQUENCY (ein);
741 }
742 else if (!ein_path)
743 {
744 /* Keep track of the incoming edges that are not on any jump-threading
745 path. These counts will still flow out of original path after all
746 jump threading is complete. */
747 nonpath_count += ein->count;
748 }
749 }
750
751 /* This is needed due to insane incoming frequencies. */
752 if (path_in_freq > BB_FREQ_MAX)
753 path_in_freq = BB_FREQ_MAX;
754
755 BITMAP_FREE (in_edge_srcs);
756
757 /* Now compute the fraction of the total count coming into the first
758 path bb that is from the current threading path. */
759 gcov_type total_count = e->dest->count;
760 /* Handle incoming profile insanities. */
761 if (total_count < path_in_count)
762 path_in_count = total_count;
763 int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
764
765 /* Walk the entire path to do some more computation in order to estimate
766 how much of the path_in_count will flow out of the duplicated threading
767 path. In the non-joiner case this is straightforward (it should be
768 the same as path_in_count, although we will handle incoming profile
769 insanities by setting it equal to the minimum count along the path).
770
771 In the joiner case, we need to estimate how much of the path_in_count
772 will stay on the threading path after the joiner's conditional branch.
773 We don't really know for sure how much of the counts
774 associated with this path go to each successor of the joiner, but we'll
775 estimate based on the fraction of the total count coming into the path
776 bb was from the threading paths (computed above in onpath_scale).
777 Afterwards, we will need to do some fixup to account for other threading
778 paths and possible profile insanities.
779
780 In order to estimate the joiner case's counts we also need to update
781 nonpath_count with any additional counts coming into the path. Other
782 blocks along the path may have additional predecessors from outside
783 the path. */
784 gcov_type path_out_count = path_in_count;
785 gcov_type min_path_count = path_in_count;
786 for (unsigned int i = 1; i < path->length (); i++)
787 {
788 edge epath = (*path)[i]->e;
789 gcov_type cur_count = epath->count;
790 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
791 {
792 has_joiner = true;
793 cur_count = apply_probability (cur_count, onpath_scale);
794 }
795 /* In the joiner case we need to update nonpath_count for any edges
796 coming into the path that will contribute to the count flowing
797 into the path successor. */
798 if (has_joiner && epath != elast)
799 {
800 /* Look for other incoming edges after joiner. */
801 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
802 {
803 if (ein != epath
804 /* Ignore in edges from blocks we have duplicated for a
805 threading path, which have duplicated edge counts until
806 they are redirected by an invocation of this routine. */
807 && !bitmap_bit_p (local_info->duplicate_blocks,
808 ein->src->index))
809 nonpath_count += ein->count;
810 }
811 }
812 if (cur_count < path_out_count)
813 path_out_count = cur_count;
814 if (epath->count < min_path_count)
815 min_path_count = epath->count;
816 }
817
818 /* We computed path_out_count above assuming that this path targeted
819 the joiner's on-path successor with the same likelihood as it
820 reached the joiner. However, other thread paths through the joiner
821 may take a different path through the normal copy source block
822 (i.e. they have a different elast), meaning that they do not
823 contribute any counts to this path's elast. As a result, it may
824 turn out that this path must have more count flowing to the on-path
825 successor of the joiner. Essentially, all of this path's elast
826 count must be contributed by this path and any nonpath counts
827 (since any path through the joiner with a different elast will not
828 include a copy of this elast in its duplicated path).
829 So ensure that this path's path_out_count is at least the
830 difference between elast->count and nonpath_count. Otherwise the edge
831 counts after threading will not be sane. */
832 if (has_joiner && path_out_count < elast->count - nonpath_count)
833 {
834 path_out_count = elast->count - nonpath_count;
835 /* But neither can we go above the minimum count along the path
836 we are duplicating. This can be an issue due to profile
837 insanities coming in to this pass. */
838 if (path_out_count > min_path_count)
839 path_out_count = min_path_count;
840 }
841
842 *path_in_count_ptr = path_in_count;
843 *path_out_count_ptr = path_out_count;
844 *path_in_freq_ptr = path_in_freq;
845 return has_joiner;
846 }
847
848
849 /* Update the counts and frequencies for both an original path
850 edge EPATH and its duplicate EDUP. The duplicate source block
851 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
852 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
853 static void
854 update_profile (edge epath, edge edup, gcov_type path_in_count,
855 gcov_type path_out_count, int path_in_freq)
856 {
857
858 /* First update the duplicated block's count / frequency. */
859 if (edup)
860 {
861 basic_block dup_block = edup->src;
862 gcc_assert (dup_block->count == 0);
863 gcc_assert (dup_block->frequency == 0);
864 dup_block->count = path_in_count;
865 dup_block->frequency = path_in_freq;
866 }
867
868 /* Now update the original block's count and frequency in the
869 opposite manner - remove the counts/freq that will flow
870 into the duplicated block. Handle underflow due to precision/
871 rounding issues. */
872 epath->src->count -= path_in_count;
873 if (epath->src->count < 0)
874 epath->src->count = 0;
875 epath->src->frequency -= path_in_freq;
876 if (epath->src->frequency < 0)
877 epath->src->frequency = 0;
878
879 /* Next update this path edge's original and duplicated counts. We know
880 that the duplicated path will have path_out_count flowing
881 out of it (in the joiner case this is the count along the duplicated path
882 out of the duplicated joiner). This count can then be removed from the
883 original path edge. */
884 if (edup)
885 edup->count = path_out_count;
886 epath->count -= path_out_count;
887 gcc_assert (epath->count >= 0);
888 }
889
890
891 /* The duplicate and original joiner blocks may end up with different
892 probabilities (different from both the original and from each other).
893 Recompute the probabilities here once we have updated the edge
894 counts and frequencies. */
895
896 static void
897 recompute_probabilities (basic_block bb)
898 {
899 edge esucc;
900 edge_iterator ei;
901 FOR_EACH_EDGE (esucc, ei, bb->succs)
902 {
903 if (!bb->count)
904 continue;
905
906 /* Prevent overflow computation due to insane profiles. */
907 if (esucc->count < bb->count)
908 esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
909 bb->count);
910 else
911 /* Can happen with missing/guessed probabilities, since we
912 may determine that more is flowing along duplicated
913 path than joiner succ probabilities allowed.
914 Counts and freqs will be insane after jump threading,
915 at least make sure probability is sane or we will
916 get a flow verification error.
917 Not much we can do to make counts/freqs sane without
918 redoing the profile estimation. */
919 esucc->probability = REG_BR_PROB_BASE;
920 }
921 }
922
923
924 /* Update the counts of the original and duplicated edges from a joiner
925 that go off path, given that we have already determined that the
926 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
927 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
928 edge from joiner is EPATH. */
929
930 static void
931 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
932 gcov_type path_in_count,
933 gcov_type path_out_count)
934 {
935 /* Compute the count that currently flows off path from the joiner.
936 In other words, the total count of joiner's out edges other than
937 epath. Compute this by walking the successors instead of
938 subtracting epath's count from the joiner bb count, since there
939 are sometimes slight insanities where the total out edge count is
940 larger than the bb count (possibly due to rounding/truncation
941 errors). */
942 gcov_type total_orig_off_path_count = 0;
943 edge enonpath;
944 edge_iterator ei;
945 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
946 {
947 if (enonpath == epath)
948 continue;
949 total_orig_off_path_count += enonpath->count;
950 }
951
952 /* For the path that we are duplicating, the amount that will flow
953 off path from the duplicated joiner is the delta between the
954 path's cumulative in count and the portion of that count we
955 estimated above as flowing from the joiner along the duplicated
956 path. */
957 gcov_type total_dup_off_path_count = path_in_count - path_out_count;
958
959 /* Now do the actual updates of the off-path edges. */
960 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
961 {
962 /* Look for edges going off of the threading path. */
963 if (enonpath == epath)
964 continue;
965
966 /* Find the corresponding edge out of the duplicated joiner. */
967 edge enonpathdup = find_edge (dup_bb, enonpath->dest);
968 gcc_assert (enonpathdup);
969
970 /* We can't use the original probability of the joiner's out
971 edges, since the probabilities of the original branch
972 and the duplicated branches may vary after all threading is
973 complete. But apportion the duplicated joiner's off-path
974 total edge count computed earlier (total_dup_off_path_count)
975 among the duplicated off-path edges based on their original
976 ratio to the full off-path count (total_orig_off_path_count).
977 */
978 int scale = GCOV_COMPUTE_SCALE (enonpath->count,
979 total_orig_off_path_count);
980 /* Give the duplicated offpath edge a portion of the duplicated
981 total. */
982 enonpathdup->count = apply_scale (scale,
983 total_dup_off_path_count);
984 /* Now update the original offpath edge count, handling underflow
985 due to rounding errors. */
986 enonpath->count -= enonpathdup->count;
987 if (enonpath->count < 0)
988 enonpath->count = 0;
989 }
990 }
991
992
993 /* Check if the paths through RD all have estimated frequencies but zero
994 profile counts. This is more accurate than checking the entry block
995 for a zero profile count, since profile insanities sometimes creep in. */
996
997 static bool
998 estimated_freqs_path (struct redirection_data *rd)
999 {
1000 edge e = rd->incoming_edges->e;
1001 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1002 edge ein;
1003 edge_iterator ei;
1004 bool non_zero_freq = false;
1005 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1006 {
1007 if (ein->count)
1008 return false;
1009 non_zero_freq |= ein->src->frequency != 0;
1010 }
1011
1012 for (unsigned int i = 1; i < path->length (); i++)
1013 {
1014 edge epath = (*path)[i]->e;
1015 if (epath->src->count)
1016 return false;
1017 non_zero_freq |= epath->src->frequency != 0;
1018 edge esucc;
1019 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1020 {
1021 if (esucc->count)
1022 return false;
1023 non_zero_freq |= esucc->src->frequency != 0;
1024 }
1025 }
1026 return non_zero_freq;
1027 }
1028
1029
1030 /* Invoked for routines that have guessed frequencies and no profile
1031 counts to record the block and edge frequencies for paths through RD
1032 in the profile count fields of those blocks and edges. This is because
1033 ssa_fix_duplicate_block_edges incrementally updates the block and
1034 edge counts as edges are redirected, and it is difficult to do that
1035 for edge frequencies which are computed on the fly from the source
1036 block frequency and probability. When a block frequency is updated
1037 its outgoing edge frequencies are affected and become difficult to
1038 adjust. */
1039
1040 static void
1041 freqs_to_counts_path (struct redirection_data *rd)
1042 {
1043 edge e = rd->incoming_edges->e;
1044 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1045 edge ein;
1046 edge_iterator ei;
1047 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1048 {
1049 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1050 errors applying the probability when the frequencies are very
1051 small. */
1052 ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1053 ein->probability);
1054 }
1055
1056 for (unsigned int i = 1; i < path->length (); i++)
1057 {
1058 edge epath = (*path)[i]->e;
1059 edge esucc;
1060 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1061 errors applying the edge probability when the frequencies are very
1062 small. */
1063 epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1064 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1065 esucc->count = apply_probability (esucc->src->count,
1066 esucc->probability);
1067 }
1068 }
1069
1070
1071 /* For routines that have guessed frequencies and no profile counts, where we
1072 used freqs_to_counts_path to record block and edge frequencies for paths
1073 through RD, we clear the counts after completing all updates for RD.
1074 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1075 but the block frequencies and edge probabilities were updated as well,
1076 so we can simply clear the count fields. */
1077
1078 static void
1079 clear_counts_path (struct redirection_data *rd)
1080 {
1081 edge e = rd->incoming_edges->e;
1082 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1083 edge ein, esucc;
1084 edge_iterator ei;
1085 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1086 ein->count = 0;
1087
1088 /* First clear counts along original path. */
1089 for (unsigned int i = 1; i < path->length (); i++)
1090 {
1091 edge epath = (*path)[i]->e;
1092 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1093 esucc->count = 0;
1094 epath->src->count = 0;
1095 }
1096 /* Also need to clear the counts along duplicated path. */
1097 for (unsigned int i = 0; i < 2; i++)
1098 {
1099 basic_block dup = rd->dup_blocks[i];
1100 if (!dup)
1101 continue;
1102 FOR_EACH_EDGE (esucc, ei, dup->succs)
1103 esucc->count = 0;
1104 dup->count = 0;
1105 }
1106 }
1107
1108 /* Wire up the outgoing edges from the duplicate blocks and
1109 update any PHIs as needed. Also update the profile counts
1110 on the original and duplicate blocks and edges. */
1111 void
1112 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1113 ssa_local_info_t *local_info)
1114 {
1115 bool multi_incomings = (rd->incoming_edges->next != NULL);
1116 edge e = rd->incoming_edges->e;
1117 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1118 edge elast = path->last ()->e;
1119 gcov_type path_in_count = 0;
1120 gcov_type path_out_count = 0;
1121 int path_in_freq = 0;
1122
1123 /* This routine updates profile counts, frequencies, and probabilities
1124 incrementally. Since it is difficult to do the incremental updates
1125 using frequencies/probabilities alone, for routines without profile
1126 data we first take a snapshot of the existing block and edge frequencies
1127 by copying them into the empty profile count fields. These counts are
1128 then used to do the incremental updates, and cleared at the end of this
1129 routine. If the function is marked as having a profile, we still check
1130 to see if the paths through RD are using estimated frequencies because
1131 the routine had zero profile counts. */
1132 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1133 || estimated_freqs_path (rd));
1134 if (do_freqs_to_counts)
1135 freqs_to_counts_path (rd);
1136
1137 /* First determine how much profile count to move from original
1138 path to the duplicate path. This is tricky in the presence of
1139 a joiner (see comments for compute_path_counts), where some portion
1140 of the path's counts will flow off-path from the joiner. In the
1141 non-joiner case the path_in_count and path_out_count should be the
1142 same. */
1143 bool has_joiner = compute_path_counts (rd, local_info,
1144 &path_in_count, &path_out_count,
1145 &path_in_freq);
1146
1147 int cur_path_freq = path_in_freq;
1148 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1149 {
1150 edge epath = (*path)[i]->e;
1151
1152 /* If we were threading through an joiner block, then we want
1153 to keep its control statement and redirect an outgoing edge.
1154 Else we want to remove the control statement & edges, then create
1155 a new outgoing edge. In both cases we may need to update PHIs. */
1156 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1157 {
1158 edge victim;
1159 edge e2;
1160
1161 gcc_assert (has_joiner);
1162
1163 /* This updates the PHIs at the destination of the duplicate
1164 block. Pass 0 instead of i if we are threading a path which
1165 has multiple incoming edges. */
1166 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1167 path, multi_incomings ? 0 : i);
1168
1169 /* Find the edge from the duplicate block to the block we're
1170 threading through. That's the edge we want to redirect. */
1171 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1172
1173 /* If there are no remaining blocks on the path to duplicate,
1174 then redirect VICTIM to the final destination of the jump
1175 threading path. */
1176 if (!any_remaining_duplicated_blocks (path, i))
1177 {
1178 e2 = redirect_edge_and_branch (victim, elast->dest);
1179 /* If we redirected the edge, then we need to copy PHI arguments
1180 at the target. If the edge already existed (e2 != victim
1181 case), then the PHIs in the target already have the correct
1182 arguments. */
1183 if (e2 == victim)
1184 copy_phi_args (e2->dest, elast, e2,
1185 path, multi_incomings ? 0 : i);
1186 }
1187 else
1188 {
1189 /* Redirect VICTIM to the next duplicated block in the path. */
1190 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1191
1192 /* We need to update the PHIs in the next duplicated block. We
1193 want the new PHI args to have the same value as they had
1194 in the source of the next duplicate block.
1195
1196 Thus, we need to know which edge we traversed into the
1197 source of the duplicate. Furthermore, we may have
1198 traversed many edges to reach the source of the duplicate.
1199
1200 Walk through the path starting at element I until we
1201 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1202 the edge from the prior element. */
1203 for (unsigned int j = i + 1; j < path->length (); j++)
1204 {
1205 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1206 {
1207 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1208 break;
1209 }
1210 }
1211 }
1212
1213 /* Update the counts and frequency of both the original block
1214 and path edge, and the duplicates. The path duplicate's
1215 incoming count and frequency are the totals for all edges
1216 incoming to this jump threading path computed earlier.
1217 And we know that the duplicated path will have path_out_count
1218 flowing out of it (i.e. along the duplicated path out of the
1219 duplicated joiner). */
1220 update_profile (epath, e2, path_in_count, path_out_count,
1221 path_in_freq);
1222
1223 /* Next we need to update the counts of the original and duplicated
1224 edges from the joiner that go off path. */
1225 update_joiner_offpath_counts (epath, e2->src, path_in_count,
1226 path_out_count);
1227
1228 /* Finally, we need to set the probabilities on the duplicated
1229 edges out of the duplicated joiner (e2->src). The probabilities
1230 along the original path will all be updated below after we finish
1231 processing the whole path. */
1232 recompute_probabilities (e2->src);
1233
1234 /* Record the frequency flowing to the downstream duplicated
1235 path blocks. */
1236 cur_path_freq = EDGE_FREQUENCY (e2);
1237 }
1238 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1239 {
1240 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1241 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1242 multi_incomings ? 0 : i);
1243 if (count == 1)
1244 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1245
1246 /* Update the counts and frequency of both the original block
1247 and path edge, and the duplicates. Since we are now after
1248 any joiner that may have existed on the path, the count
1249 flowing along the duplicated threaded path is path_out_count.
1250 If we didn't have a joiner, then cur_path_freq was the sum
1251 of the total frequencies along all incoming edges to the
1252 thread path (path_in_freq). If we had a joiner, it would have
1253 been updated at the end of that handling to the edge frequency
1254 along the duplicated joiner path edge. */
1255 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1256 path_out_count, path_out_count,
1257 cur_path_freq);
1258 }
1259 else
1260 {
1261 /* No copy case. In this case we don't have an equivalent block
1262 on the duplicated thread path to update, but we do need
1263 to remove the portion of the counts/freqs that were moved
1264 to the duplicated path from the counts/freqs flowing through
1265 this block on the original path. Since all the no-copy edges
1266 are after any joiner, the removed count is the same as
1267 path_out_count.
1268
1269 If we didn't have a joiner, then cur_path_freq was the sum
1270 of the total frequencies along all incoming edges to the
1271 thread path (path_in_freq). If we had a joiner, it would have
1272 been updated at the end of that handling to the edge frequency
1273 along the duplicated joiner path edge. */
1274 update_profile (epath, NULL, path_out_count, path_out_count,
1275 cur_path_freq);
1276 }
1277
1278 /* Increment the index into the duplicated path when we processed
1279 a duplicated block. */
1280 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1281 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1282 {
1283 count++;
1284 }
1285 }
1286
1287 /* Now walk orig blocks and update their probabilities, since the
1288 counts and freqs should be updated properly by above loop. */
1289 for (unsigned int i = 1; i < path->length (); i++)
1290 {
1291 edge epath = (*path)[i]->e;
1292 recompute_probabilities (epath->src);
1293 }
1294
1295 /* Done with all profile and frequency updates, clear counts if they
1296 were copied. */
1297 if (do_freqs_to_counts)
1298 clear_counts_path (rd);
1299 }
1300
1301 /* Hash table traversal callback routine to create duplicate blocks. */
1302
1303 int
1304 ssa_create_duplicates (struct redirection_data **slot,
1305 ssa_local_info_t *local_info)
1306 {
1307 struct redirection_data *rd = *slot;
1308
1309 /* The second duplicated block in a jump threading path is specific
1310 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1311
1312 Each time we're called, we have to look through the path and see
1313 if a second block needs to be duplicated.
1314
1315 Note the search starts with the third edge on the path. The first
1316 edge is the incoming edge, the second edge always has its source
1317 duplicated. Thus we start our search with the third edge. */
1318 vec<jump_thread_edge *> *path = rd->path;
1319 for (unsigned int i = 2; i < path->length (); i++)
1320 {
1321 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1322 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1323 {
1324 create_block_for_threading ((*path)[i]->e->src, rd, 1,
1325 &local_info->duplicate_blocks);
1326 break;
1327 }
1328 }
1329
1330 /* Create a template block if we have not done so already. Otherwise
1331 use the template to create a new block. */
1332 if (local_info->template_block == NULL)
1333 {
1334 create_block_for_threading ((*path)[1]->e->src, rd, 0,
1335 &local_info->duplicate_blocks);
1336 local_info->template_block = rd->dup_blocks[0];
1337
1338 /* We do not create any outgoing edges for the template. We will
1339 take care of that in a later traversal. That way we do not
1340 create edges that are going to just be deleted. */
1341 }
1342 else
1343 {
1344 create_block_for_threading (local_info->template_block, rd, 0,
1345 &local_info->duplicate_blocks);
1346
1347 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1348 block. */
1349 ssa_fix_duplicate_block_edges (rd, local_info);
1350 }
1351
1352 /* Keep walking the hash table. */
1353 return 1;
1354 }
1355
1356 /* We did not create any outgoing edges for the template block during
1357 block creation. This hash table traversal callback creates the
1358 outgoing edge for the template block. */
1359
1360 inline int
1361 ssa_fixup_template_block (struct redirection_data **slot,
1362 ssa_local_info_t *local_info)
1363 {
1364 struct redirection_data *rd = *slot;
1365
1366 /* If this is the template block halt the traversal after updating
1367 it appropriately.
1368
1369 If we were threading through an joiner block, then we want
1370 to keep its control statement and redirect an outgoing edge.
1371 Else we want to remove the control statement & edges, then create
1372 a new outgoing edge. In both cases we may need to update PHIs. */
1373 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1374 {
1375 ssa_fix_duplicate_block_edges (rd, local_info);
1376 return 0;
1377 }
1378
1379 return 1;
1380 }
1381
1382 /* Hash table traversal callback to redirect each incoming edge
1383 associated with this hash table element to its new destination. */
1384
1385 int
1386 ssa_redirect_edges (struct redirection_data **slot,
1387 ssa_local_info_t *local_info)
1388 {
1389 struct redirection_data *rd = *slot;
1390 struct el *next, *el;
1391
1392 /* Walk over all the incoming edges associated with this hash table
1393 entry. */
1394 for (el = rd->incoming_edges; el; el = next)
1395 {
1396 edge e = el->e;
1397 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1398
1399 /* Go ahead and free this element from the list. Doing this now
1400 avoids the need for another list walk when we destroy the hash
1401 table. */
1402 next = el->next;
1403 free (el);
1404
1405 thread_stats.num_threaded_edges++;
1406
1407 if (rd->dup_blocks[0])
1408 {
1409 edge e2;
1410
1411 if (dump_file && (dump_flags & TDF_DETAILS))
1412 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1413 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1414
1415 /* If we redirect a loop latch edge cancel its loop. */
1416 if (e->src == e->src->loop_father->latch)
1417 mark_loop_for_removal (e->src->loop_father);
1418
1419 /* Redirect the incoming edge (possibly to the joiner block) to the
1420 appropriate duplicate block. */
1421 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1422 gcc_assert (e == e2);
1423 flush_pending_stmts (e2);
1424 }
1425
1426 /* Go ahead and clear E->aux. It's not needed anymore and failure
1427 to clear it will cause all kinds of unpleasant problems later. */
1428 delete_jump_thread_path (path);
1429 e->aux = NULL;
1430
1431 }
1432
1433 /* Indicate that we actually threaded one or more jumps. */
1434 if (rd->incoming_edges)
1435 local_info->jumps_threaded = true;
1436
1437 return 1;
1438 }
1439
1440 /* Return true if this block has no executable statements other than
1441 a simple ctrl flow instruction. When the number of outgoing edges
1442 is one, this is equivalent to a "forwarder" block. */
1443
1444 static bool
1445 redirection_block_p (basic_block bb)
1446 {
1447 gimple_stmt_iterator gsi;
1448
1449 /* Advance to the first executable statement. */
1450 gsi = gsi_start_bb (bb);
1451 while (!gsi_end_p (gsi)
1452 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1453 || is_gimple_debug (gsi_stmt (gsi))
1454 || gimple_nop_p (gsi_stmt (gsi))
1455 || gimple_clobber_p (gsi_stmt (gsi))))
1456 gsi_next (&gsi);
1457
1458 /* Check if this is an empty block. */
1459 if (gsi_end_p (gsi))
1460 return true;
1461
1462 /* Test that we've reached the terminating control statement. */
1463 return gsi_stmt (gsi)
1464 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1465 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1466 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1467 }
1468
1469 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1470 is reached via one or more specific incoming edges, we know which
1471 outgoing edge from BB will be traversed.
1472
1473 We want to redirect those incoming edges to the target of the
1474 appropriate outgoing edge. Doing so avoids a conditional branch
1475 and may expose new optimization opportunities. Note that we have
1476 to update dominator tree and SSA graph after such changes.
1477
1478 The key to keeping the SSA graph update manageable is to duplicate
1479 the side effects occurring in BB so that those side effects still
1480 occur on the paths which bypass BB after redirecting edges.
1481
1482 We accomplish this by creating duplicates of BB and arranging for
1483 the duplicates to unconditionally pass control to one specific
1484 successor of BB. We then revector the incoming edges into BB to
1485 the appropriate duplicate of BB.
1486
1487 If NOLOOP_ONLY is true, we only perform the threading as long as it
1488 does not affect the structure of the loops in a nontrivial way.
1489
1490 If JOINERS is true, then thread through joiner blocks as well. */
1491
1492 static bool
1493 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1494 {
1495 /* E is an incoming edge into BB that we may or may not want to
1496 redirect to a duplicate of BB. */
1497 edge e, e2;
1498 edge_iterator ei;
1499 ssa_local_info_t local_info;
1500
1501 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1502
1503 /* To avoid scanning a linear array for the element we need we instead
1504 use a hash table. For normal code there should be no noticeable
1505 difference. However, if we have a block with a large number of
1506 incoming and outgoing edges such linear searches can get expensive. */
1507 redirection_data
1508 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1509
1510 /* Record each unique threaded destination into a hash table for
1511 efficient lookups. */
1512 FOR_EACH_EDGE (e, ei, bb->preds)
1513 {
1514 if (e->aux == NULL)
1515 continue;
1516
1517 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1518
1519 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1520 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1521 continue;
1522
1523 e2 = path->last ()->e;
1524 if (!e2 || noloop_only)
1525 {
1526 /* If NOLOOP_ONLY is true, we only allow threading through the
1527 header of a loop to exit edges. */
1528
1529 /* One case occurs when there was loop header buried in a jump
1530 threading path that crosses loop boundaries. We do not try
1531 and thread this elsewhere, so just cancel the jump threading
1532 request by clearing the AUX field now. */
1533 if ((bb->loop_father != e2->src->loop_father
1534 && !loop_exit_edge_p (e2->src->loop_father, e2))
1535 || (e2->src->loop_father != e2->dest->loop_father
1536 && !loop_exit_edge_p (e2->src->loop_father, e2)))
1537 {
1538 /* Since this case is not handled by our special code
1539 to thread through a loop header, we must explicitly
1540 cancel the threading request here. */
1541 delete_jump_thread_path (path);
1542 e->aux = NULL;
1543 continue;
1544 }
1545
1546 /* Another case occurs when trying to thread through our
1547 own loop header, possibly from inside the loop. We will
1548 thread these later. */
1549 unsigned int i;
1550 for (i = 1; i < path->length (); i++)
1551 {
1552 if ((*path)[i]->e->src == bb->loop_father->header
1553 && (!loop_exit_edge_p (bb->loop_father, e2)
1554 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1555 break;
1556 }
1557
1558 if (i != path->length ())
1559 continue;
1560 }
1561
1562 /* Insert the outgoing edge into the hash table if it is not
1563 already in the hash table. */
1564 lookup_redirection_data (e, INSERT);
1565 }
1566
1567 /* We do not update dominance info. */
1568 free_dominance_info (CDI_DOMINATORS);
1569
1570 /* We know we only thread through the loop header to loop exits.
1571 Let the basic block duplication hook know we are not creating
1572 a multiple entry loop. */
1573 if (noloop_only
1574 && bb == bb->loop_father->header)
1575 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1576
1577 /* Now create duplicates of BB.
1578
1579 Note that for a block with a high outgoing degree we can waste
1580 a lot of time and memory creating and destroying useless edges.
1581
1582 So we first duplicate BB and remove the control structure at the
1583 tail of the duplicate as well as all outgoing edges from the
1584 duplicate. We then use that duplicate block as a template for
1585 the rest of the duplicates. */
1586 local_info.template_block = NULL;
1587 local_info.bb = bb;
1588 local_info.jumps_threaded = false;
1589 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1590 (&local_info);
1591
1592 /* The template does not have an outgoing edge. Create that outgoing
1593 edge and update PHI nodes as the edge's target as necessary.
1594
1595 We do this after creating all the duplicates to avoid creating
1596 unnecessary edges. */
1597 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1598 (&local_info);
1599
1600 /* The hash table traversals above created the duplicate blocks (and the
1601 statements within the duplicate blocks). This loop creates PHI nodes for
1602 the duplicated blocks and redirects the incoming edges into BB to reach
1603 the duplicates of BB. */
1604 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1605 (&local_info);
1606
1607 /* Done with this block. Clear REDIRECTION_DATA. */
1608 delete redirection_data;
1609 redirection_data = NULL;
1610
1611 if (noloop_only
1612 && bb == bb->loop_father->header)
1613 set_loop_copy (bb->loop_father, NULL);
1614
1615 BITMAP_FREE (local_info.duplicate_blocks);
1616 local_info.duplicate_blocks = NULL;
1617
1618 /* Indicate to our caller whether or not any jumps were threaded. */
1619 return local_info.jumps_threaded;
1620 }
1621
1622 /* Wrapper for thread_block_1 so that we can first handle jump
1623 thread paths which do not involve copying joiner blocks, then
1624 handle jump thread paths which have joiner blocks.
1625
1626 By doing things this way we can be as aggressive as possible and
1627 not worry that copying a joiner block will create a jump threading
1628 opportunity. */
1629
1630 static bool
1631 thread_block (basic_block bb, bool noloop_only)
1632 {
1633 bool retval;
1634 retval = thread_block_1 (bb, noloop_only, false);
1635 retval |= thread_block_1 (bb, noloop_only, true);
1636 return retval;
1637 }
1638
1639
1640 /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1641 copy of E->dest created during threading, or E->dest if it was not necessary
1642 to copy it (E is its single predecessor). */
1643
1644 static basic_block
1645 thread_single_edge (edge e)
1646 {
1647 basic_block bb = e->dest;
1648 struct redirection_data rd;
1649 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1650 edge eto = (*path)[1]->e;
1651
1652 delete_jump_thread_path (path);
1653 e->aux = NULL;
1654
1655 thread_stats.num_threaded_edges++;
1656
1657 if (single_pred_p (bb))
1658 {
1659 /* If BB has just a single predecessor, we should only remove the
1660 control statements at its end, and successors except for ETO. */
1661 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1662
1663 /* And fixup the flags on the single remaining edge. */
1664 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1665 eto->flags |= EDGE_FALLTHRU;
1666
1667 return bb;
1668 }
1669
1670 /* Otherwise, we need to create a copy. */
1671 if (e->dest == eto->src)
1672 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1673
1674 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1675 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1676 npath->safe_push (x);
1677
1678 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1679 npath->safe_push (x);
1680 rd.path = npath;
1681
1682 create_block_for_threading (bb, &rd, 0, NULL);
1683 remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1684 create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1685
1686 if (dump_file && (dump_flags & TDF_DETAILS))
1687 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1688 e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1689
1690 rd.dup_blocks[0]->count = e->count;
1691 rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1692 single_succ_edge (rd.dup_blocks[0])->count = e->count;
1693 redirect_edge_and_branch (e, rd.dup_blocks[0]);
1694 flush_pending_stmts (e);
1695
1696 delete_jump_thread_path (npath);
1697 return rd.dup_blocks[0];
1698 }
1699
1700 /* Callback for dfs_enumerate_from. Returns true if BB is different
1701 from STOP and DBDS_CE_STOP. */
1702
1703 static basic_block dbds_ce_stop;
1704 static bool
1705 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1706 {
1707 return (bb != (const_basic_block) stop
1708 && bb != dbds_ce_stop);
1709 }
1710
1711 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1712 returns the state. */
1713
1714 enum bb_dom_status
1715 {
1716 /* BB does not dominate latch of the LOOP. */
1717 DOMST_NONDOMINATING,
1718 /* The LOOP is broken (there is no path from the header to its latch. */
1719 DOMST_LOOP_BROKEN,
1720 /* BB dominates the latch of the LOOP. */
1721 DOMST_DOMINATING
1722 };
1723
1724 static enum bb_dom_status
1725 determine_bb_domination_status (struct loop *loop, basic_block bb)
1726 {
1727 basic_block *bblocks;
1728 unsigned nblocks, i;
1729 bool bb_reachable = false;
1730 edge_iterator ei;
1731 edge e;
1732
1733 /* This function assumes BB is a successor of LOOP->header.
1734 If that is not the case return DOMST_NONDOMINATING which
1735 is always safe. */
1736 {
1737 bool ok = false;
1738
1739 FOR_EACH_EDGE (e, ei, bb->preds)
1740 {
1741 if (e->src == loop->header)
1742 {
1743 ok = true;
1744 break;
1745 }
1746 }
1747
1748 if (!ok)
1749 return DOMST_NONDOMINATING;
1750 }
1751
1752 if (bb == loop->latch)
1753 return DOMST_DOMINATING;
1754
1755 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1756 from it. */
1757
1758 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1759 dbds_ce_stop = loop->header;
1760 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1761 bblocks, loop->num_nodes, bb);
1762 for (i = 0; i < nblocks; i++)
1763 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1764 {
1765 if (e->src == loop->header)
1766 {
1767 free (bblocks);
1768 return DOMST_NONDOMINATING;
1769 }
1770 if (e->src == bb)
1771 bb_reachable = true;
1772 }
1773
1774 free (bblocks);
1775 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1776 }
1777
1778 /* Return true if BB is part of the new pre-header that is created
1779 when threading the latch to DATA. */
1780
1781 static bool
1782 def_split_header_continue_p (const_basic_block bb, const void *data)
1783 {
1784 const_basic_block new_header = (const_basic_block) data;
1785 const struct loop *l;
1786
1787 if (bb == new_header
1788 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1789 return false;
1790 for (l = bb->loop_father; l; l = loop_outer (l))
1791 if (l == new_header->loop_father)
1792 return true;
1793 return false;
1794 }
1795
1796 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1797 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1798 to the inside of the loop. */
1799
1800 static bool
1801 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1802 {
1803 basic_block header = loop->header;
1804 edge e, tgt_edge, latch = loop_latch_edge (loop);
1805 edge_iterator ei;
1806 basic_block tgt_bb, atgt_bb;
1807 enum bb_dom_status domst;
1808
1809 /* We have already threaded through headers to exits, so all the threading
1810 requests now are to the inside of the loop. We need to avoid creating
1811 irreducible regions (i.e., loops with more than one entry block), and
1812 also loop with several latch edges, or new subloops of the loop (although
1813 there are cases where it might be appropriate, it is difficult to decide,
1814 and doing it wrongly may confuse other optimizers).
1815
1816 We could handle more general cases here. However, the intention is to
1817 preserve some information about the loop, which is impossible if its
1818 structure changes significantly, in a way that is not well understood.
1819 Thus we only handle few important special cases, in which also updating
1820 of the loop-carried information should be feasible:
1821
1822 1) Propagation of latch edge to a block that dominates the latch block
1823 of a loop. This aims to handle the following idiom:
1824
1825 first = 1;
1826 while (1)
1827 {
1828 if (first)
1829 initialize;
1830 first = 0;
1831 body;
1832 }
1833
1834 After threading the latch edge, this becomes
1835
1836 first = 1;
1837 if (first)
1838 initialize;
1839 while (1)
1840 {
1841 first = 0;
1842 body;
1843 }
1844
1845 The original header of the loop is moved out of it, and we may thread
1846 the remaining edges through it without further constraints.
1847
1848 2) All entry edges are propagated to a single basic block that dominates
1849 the latch block of the loop. This aims to handle the following idiom
1850 (normally created for "for" loops):
1851
1852 i = 0;
1853 while (1)
1854 {
1855 if (i >= 100)
1856 break;
1857 body;
1858 i++;
1859 }
1860
1861 This becomes
1862
1863 i = 0;
1864 while (1)
1865 {
1866 body;
1867 i++;
1868 if (i >= 100)
1869 break;
1870 }
1871 */
1872
1873 /* Threading through the header won't improve the code if the header has just
1874 one successor. */
1875 if (single_succ_p (header))
1876 goto fail;
1877
1878 /* If we threaded the latch using a joiner block, we cancel the
1879 threading opportunity out of an abundance of caution. However,
1880 still allow threading from outside to inside the loop. */
1881 if (latch->aux)
1882 {
1883 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1884 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1885 {
1886 delete_jump_thread_path (path);
1887 latch->aux = NULL;
1888 }
1889 }
1890
1891 if (latch->aux)
1892 {
1893 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1894 tgt_edge = (*path)[1]->e;
1895 tgt_bb = tgt_edge->dest;
1896 }
1897 else if (!may_peel_loop_headers
1898 && !redirection_block_p (loop->header))
1899 goto fail;
1900 else
1901 {
1902 tgt_bb = NULL;
1903 tgt_edge = NULL;
1904 FOR_EACH_EDGE (e, ei, header->preds)
1905 {
1906 if (!e->aux)
1907 {
1908 if (e == latch)
1909 continue;
1910
1911 /* If latch is not threaded, and there is a header
1912 edge that is not threaded, we would create loop
1913 with multiple entries. */
1914 goto fail;
1915 }
1916
1917 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1918
1919 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1920 goto fail;
1921 tgt_edge = (*path)[1]->e;
1922 atgt_bb = tgt_edge->dest;
1923 if (!tgt_bb)
1924 tgt_bb = atgt_bb;
1925 /* Two targets of threading would make us create loop
1926 with multiple entries. */
1927 else if (tgt_bb != atgt_bb)
1928 goto fail;
1929 }
1930
1931 if (!tgt_bb)
1932 {
1933 /* There are no threading requests. */
1934 return false;
1935 }
1936
1937 /* Redirecting to empty loop latch is useless. */
1938 if (tgt_bb == loop->latch
1939 && empty_block_p (loop->latch))
1940 goto fail;
1941 }
1942
1943 /* The target block must dominate the loop latch, otherwise we would be
1944 creating a subloop. */
1945 domst = determine_bb_domination_status (loop, tgt_bb);
1946 if (domst == DOMST_NONDOMINATING)
1947 goto fail;
1948 if (domst == DOMST_LOOP_BROKEN)
1949 {
1950 /* If the loop ceased to exist, mark it as such, and thread through its
1951 original header. */
1952 mark_loop_for_removal (loop);
1953 return thread_block (header, false);
1954 }
1955
1956 if (tgt_bb->loop_father->header == tgt_bb)
1957 {
1958 /* If the target of the threading is a header of a subloop, we need
1959 to create a preheader for it, so that the headers of the two loops
1960 do not merge. */
1961 if (EDGE_COUNT (tgt_bb->preds) > 2)
1962 {
1963 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1964 gcc_assert (tgt_bb != NULL);
1965 }
1966 else
1967 tgt_bb = split_edge (tgt_edge);
1968 }
1969
1970 if (latch->aux)
1971 {
1972 basic_block *bblocks;
1973 unsigned nblocks, i;
1974
1975 /* First handle the case latch edge is redirected. We are copying
1976 the loop header but not creating a multiple entry loop. Make the
1977 cfg manipulation code aware of that fact. */
1978 set_loop_copy (loop, loop);
1979 loop->latch = thread_single_edge (latch);
1980 set_loop_copy (loop, NULL);
1981 gcc_assert (single_succ (loop->latch) == tgt_bb);
1982 loop->header = tgt_bb;
1983
1984 /* Remove the new pre-header blocks from our loop. */
1985 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1986 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1987 bblocks, loop->num_nodes, tgt_bb);
1988 for (i = 0; i < nblocks; i++)
1989 if (bblocks[i]->loop_father == loop)
1990 {
1991 remove_bb_from_loops (bblocks[i]);
1992 add_bb_to_loop (bblocks[i], loop_outer (loop));
1993 }
1994 free (bblocks);
1995
1996 /* If the new header has multiple latches mark it so. */
1997 FOR_EACH_EDGE (e, ei, loop->header->preds)
1998 if (e->src->loop_father == loop
1999 && e->src != loop->latch)
2000 {
2001 loop->latch = NULL;
2002 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
2003 }
2004
2005 /* Cancel remaining threading requests that would make the
2006 loop a multiple entry loop. */
2007 FOR_EACH_EDGE (e, ei, header->preds)
2008 {
2009 edge e2;
2010
2011 if (e->aux == NULL)
2012 continue;
2013
2014 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2015 e2 = path->last ()->e;
2016
2017 if (e->src->loop_father != e2->dest->loop_father
2018 && e2->dest != loop->header)
2019 {
2020 delete_jump_thread_path (path);
2021 e->aux = NULL;
2022 }
2023 }
2024
2025 /* Thread the remaining edges through the former header. */
2026 thread_block (header, false);
2027 }
2028 else
2029 {
2030 basic_block new_preheader;
2031
2032 /* Now consider the case entry edges are redirected to the new entry
2033 block. Remember one entry edge, so that we can find the new
2034 preheader (its destination after threading). */
2035 FOR_EACH_EDGE (e, ei, header->preds)
2036 {
2037 if (e->aux)
2038 break;
2039 }
2040
2041 /* The duplicate of the header is the new preheader of the loop. Ensure
2042 that it is placed correctly in the loop hierarchy. */
2043 set_loop_copy (loop, loop_outer (loop));
2044
2045 thread_block (header, false);
2046 set_loop_copy (loop, NULL);
2047 new_preheader = e->dest;
2048
2049 /* Create the new latch block. This is always necessary, as the latch
2050 must have only a single successor, but the original header had at
2051 least two successors. */
2052 loop->latch = NULL;
2053 mfb_kj_edge = single_succ_edge (new_preheader);
2054 loop->header = mfb_kj_edge->dest;
2055 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2056 loop->header = latch->dest;
2057 loop->latch = latch->src;
2058 }
2059
2060 return true;
2061
2062 fail:
2063 /* We failed to thread anything. Cancel the requests. */
2064 FOR_EACH_EDGE (e, ei, header->preds)
2065 {
2066 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2067
2068 if (path)
2069 {
2070 delete_jump_thread_path (path);
2071 e->aux = NULL;
2072 }
2073 }
2074 return false;
2075 }
2076
2077 /* E1 and E2 are edges into the same basic block. Return TRUE if the
2078 PHI arguments associated with those edges are equal or there are no
2079 PHI arguments, otherwise return FALSE. */
2080
2081 static bool
2082 phi_args_equal_on_edges (edge e1, edge e2)
2083 {
2084 gphi_iterator gsi;
2085 int indx1 = e1->dest_idx;
2086 int indx2 = e2->dest_idx;
2087
2088 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2089 {
2090 gphi *phi = gsi.phi ();
2091
2092 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2093 gimple_phi_arg_def (phi, indx2), 0))
2094 return false;
2095 }
2096 return true;
2097 }
2098
2099 /* Walk through the registered jump threads and convert them into a
2100 form convenient for this pass.
2101
2102 Any block which has incoming edges threaded to outgoing edges
2103 will have its entry in THREADED_BLOCK set.
2104
2105 Any threaded edge will have its new outgoing edge stored in the
2106 original edge's AUX field.
2107
2108 This form avoids the need to walk all the edges in the CFG to
2109 discover blocks which need processing and avoids unnecessary
2110 hash table lookups to map from threaded edge to new target. */
2111
2112 static void
2113 mark_threaded_blocks (bitmap threaded_blocks)
2114 {
2115 unsigned int i;
2116 bitmap_iterator bi;
2117 bitmap tmp = BITMAP_ALLOC (NULL);
2118 basic_block bb;
2119 edge e;
2120 edge_iterator ei;
2121
2122 /* It is possible to have jump threads in which one is a subpath
2123 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2124 block and (B, C), (C, D) where no joiner block exists.
2125
2126 When this occurs ignore the jump thread request with the joiner
2127 block. It's totally subsumed by the simpler jump thread request.
2128
2129 This results in less block copying, simpler CFGs. More importantly,
2130 when we duplicate the joiner block, B, in this case we will create
2131 a new threading opportunity that we wouldn't be able to optimize
2132 until the next jump threading iteration.
2133
2134 So first convert the jump thread requests which do not require a
2135 joiner block. */
2136 for (i = 0; i < paths.length (); i++)
2137 {
2138 vec<jump_thread_edge *> *path = paths[i];
2139
2140 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2141 {
2142 edge e = (*path)[0]->e;
2143 e->aux = (void *)path;
2144 bitmap_set_bit (tmp, e->dest->index);
2145 }
2146 }
2147
2148 /* Now iterate again, converting cases where we want to thread
2149 through a joiner block, but only if no other edge on the path
2150 already has a jump thread attached to it. We do this in two passes,
2151 to avoid situations where the order in the paths vec can hide overlapping
2152 threads (the path is recorded on the incoming edge, so we would miss
2153 cases where the second path starts at a downstream edge on the same
2154 path). First record all joiner paths, deleting any in the unexpected
2155 case where there is already a path for that incoming edge. */
2156 for (i = 0; i < paths.length ();)
2157 {
2158 vec<jump_thread_edge *> *path = paths[i];
2159
2160 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2161 {
2162 /* Attach the path to the starting edge if none is yet recorded. */
2163 if ((*path)[0]->e->aux == NULL)
2164 {
2165 (*path)[0]->e->aux = path;
2166 i++;
2167 }
2168 else
2169 {
2170 paths.unordered_remove (i);
2171 if (dump_file && (dump_flags & TDF_DETAILS))
2172 dump_jump_thread_path (dump_file, *path, false);
2173 delete_jump_thread_path (path);
2174 }
2175 }
2176 else
2177 {
2178 i++;
2179 }
2180 }
2181
2182 /* Second, look for paths that have any other jump thread attached to
2183 them, and either finish converting them or cancel them. */
2184 for (i = 0; i < paths.length ();)
2185 {
2186 vec<jump_thread_edge *> *path = paths[i];
2187 edge e = (*path)[0]->e;
2188
2189 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2190 {
2191 unsigned int j;
2192 for (j = 1; j < path->length (); j++)
2193 if ((*path)[j]->e->aux != NULL)
2194 break;
2195
2196 /* If we iterated through the entire path without exiting the loop,
2197 then we are good to go, record it. */
2198 if (j == path->length ())
2199 {
2200 bitmap_set_bit (tmp, e->dest->index);
2201 i++;
2202 }
2203 else
2204 {
2205 e->aux = NULL;
2206 paths.unordered_remove (i);
2207 if (dump_file && (dump_flags & TDF_DETAILS))
2208 dump_jump_thread_path (dump_file, *path, false);
2209 delete_jump_thread_path (path);
2210 }
2211 }
2212 else
2213 {
2214 i++;
2215 }
2216 }
2217
2218 /* If optimizing for size, only thread through block if we don't have
2219 to duplicate it or it's an otherwise empty redirection block. */
2220 if (optimize_function_for_size_p (cfun))
2221 {
2222 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2223 {
2224 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2225 if (EDGE_COUNT (bb->preds) > 1
2226 && !redirection_block_p (bb))
2227 {
2228 FOR_EACH_EDGE (e, ei, bb->preds)
2229 {
2230 if (e->aux)
2231 {
2232 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2233 delete_jump_thread_path (path);
2234 e->aux = NULL;
2235 }
2236 }
2237 }
2238 else
2239 bitmap_set_bit (threaded_blocks, i);
2240 }
2241 }
2242 else
2243 bitmap_copy (threaded_blocks, tmp);
2244
2245 /* Look for jump threading paths which cross multiple loop headers.
2246
2247 The code to thread through loop headers will change the CFG in ways
2248 that break assumptions made by the loop optimization code.
2249
2250 We don't want to blindly cancel the requests. We can instead do better
2251 by trimming off the end of the jump thread path. */
2252 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2253 {
2254 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2255 FOR_EACH_EDGE (e, ei, bb->preds)
2256 {
2257 if (e->aux)
2258 {
2259 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2260
2261 for (unsigned int i = 0, crossed_headers = 0;
2262 i < path->length ();
2263 i++)
2264 {
2265 basic_block dest = (*path)[i]->e->dest;
2266 crossed_headers += (dest == dest->loop_father->header);
2267 if (crossed_headers > 1)
2268 {
2269 /* Trim from entry I onwards. */
2270 for (unsigned int j = i; j < path->length (); j++)
2271 delete (*path)[j];
2272 path->truncate (i);
2273
2274 /* Now that we've truncated the path, make sure
2275 what's left is still valid. We need at least
2276 two edges on the path and the last edge can not
2277 be a joiner. This should never happen, but let's
2278 be safe. */
2279 if (path->length () < 2
2280 || (path->last ()->type
2281 == EDGE_COPY_SRC_JOINER_BLOCK))
2282 {
2283 delete_jump_thread_path (path);
2284 e->aux = NULL;
2285 }
2286 break;
2287 }
2288 }
2289 }
2290 }
2291 }
2292
2293 /* If we have a joiner block (J) which has two successors S1 and S2 and
2294 we are threading though S1 and the final destination of the thread
2295 is S2, then we must verify that any PHI nodes in S2 have the same
2296 PHI arguments for the edge J->S2 and J->S1->...->S2.
2297
2298 We used to detect this prior to registering the jump thread, but
2299 that prohibits propagation of edge equivalences into non-dominated
2300 PHI nodes as the equivalency test might occur before propagation.
2301
2302 This must also occur after we truncate any jump threading paths
2303 as this scenario may only show up after truncation.
2304
2305 This works for now, but will need improvement as part of the FSA
2306 optimization.
2307
2308 Note since we've moved the thread request data to the edges,
2309 we have to iterate on those rather than the threaded_edges vector. */
2310 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2311 {
2312 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2313 FOR_EACH_EDGE (e, ei, bb->preds)
2314 {
2315 if (e->aux)
2316 {
2317 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2318 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2319
2320 if (have_joiner)
2321 {
2322 basic_block joiner = e->dest;
2323 edge final_edge = path->last ()->e;
2324 basic_block final_dest = final_edge->dest;
2325 edge e2 = find_edge (joiner, final_dest);
2326
2327 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2328 {
2329 delete_jump_thread_path (path);
2330 e->aux = NULL;
2331 }
2332 }
2333 }
2334 }
2335 }
2336
2337 BITMAP_FREE (tmp);
2338 }
2339
2340
2341 /* Return TRUE if BB ends with a switch statement or a computed goto.
2342 Otherwise return false. */
2343 static bool
2344 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2345 {
2346 gimple *stmt = last_stmt (bb);
2347 if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2348 return true;
2349 if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2350 && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2351 return true;
2352 return false;
2353 }
2354
2355 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2356 case of SEME Single Entry Multiple Exits region in which all nodes in the
2357 REGION have exactly one incoming edge. The only exception is the first block
2358 that may not have been connected to the rest of the cfg yet. */
2359
2360 DEBUG_FUNCTION void
2361 verify_jump_thread (basic_block *region, unsigned n_region)
2362 {
2363 for (unsigned i = 0; i < n_region; i++)
2364 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2365 }
2366
2367 /* Return true when BB is one of the first N items in BBS. */
2368
2369 static inline bool
2370 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2371 {
2372 for (int i = 0; i < n; i++)
2373 if (bb == bbs[i])
2374 return true;
2375
2376 return false;
2377 }
2378
2379 /* Duplicates a jump-thread path of N_REGION basic blocks.
2380 The ENTRY edge is redirected to the duplicate of the region.
2381
2382 Remove the last conditional statement in the last basic block in the REGION,
2383 and create a single fallthru edge pointing to the same destination as the
2384 EXIT edge.
2385
2386 The new basic blocks are stored to REGION_COPY in the same order as they had
2387 in REGION, provided that REGION_COPY is not NULL.
2388
2389 Returns false if it is unable to copy the region, true otherwise. */
2390
2391 static bool
2392 duplicate_thread_path (edge entry, edge exit,
2393 basic_block *region, unsigned n_region,
2394 basic_block *region_copy)
2395 {
2396 unsigned i;
2397 bool free_region_copy = false;
2398 struct loop *loop = entry->dest->loop_father;
2399 edge exit_copy;
2400 edge redirected;
2401 int total_freq = 0, entry_freq = 0;
2402 gcov_type total_count = 0, entry_count = 0;
2403
2404 if (!can_copy_bbs_p (region, n_region))
2405 return false;
2406
2407 /* Some sanity checking. Note that we do not check for all possible
2408 missuses of the functions. I.e. if you ask to copy something weird,
2409 it will work, but the state of structures probably will not be
2410 correct. */
2411 for (i = 0; i < n_region; i++)
2412 {
2413 /* We do not handle subloops, i.e. all the blocks must belong to the
2414 same loop. */
2415 if (region[i]->loop_father != loop)
2416 return false;
2417 }
2418
2419 initialize_original_copy_tables ();
2420
2421 set_loop_copy (loop, loop);
2422
2423 if (!region_copy)
2424 {
2425 region_copy = XNEWVEC (basic_block, n_region);
2426 free_region_copy = true;
2427 }
2428
2429 if (entry->dest->count)
2430 {
2431 total_count = entry->dest->count;
2432 entry_count = entry->count;
2433 /* Fix up corner cases, to avoid division by zero or creation of negative
2434 frequencies. */
2435 if (entry_count > total_count)
2436 entry_count = total_count;
2437 }
2438 else
2439 {
2440 total_freq = entry->dest->frequency;
2441 entry_freq = EDGE_FREQUENCY (entry);
2442 /* Fix up corner cases, to avoid division by zero or creation of negative
2443 frequencies. */
2444 if (total_freq == 0)
2445 total_freq = 1;
2446 else if (entry_freq > total_freq)
2447 entry_freq = total_freq;
2448 }
2449
2450 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2451 split_edge_bb_loc (entry), false);
2452
2453 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2454 following code ensures that all the edges exiting the jump-thread path are
2455 redirected back to the original code: these edges are exceptions
2456 invalidating the property that is propagated by executing all the blocks of
2457 the jump-thread path in order. */
2458
2459 for (i = 0; i < n_region; i++)
2460 {
2461 edge e;
2462 edge_iterator ei;
2463 basic_block bb = region_copy[i];
2464
2465 if (single_succ_p (bb))
2466 {
2467 /* Make sure the successor is the next node in the path. */
2468 gcc_assert (i + 1 == n_region
2469 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2470 continue;
2471 }
2472
2473 /* Special case the last block on the path: make sure that it does not
2474 jump back on the copied path. */
2475 if (i + 1 == n_region)
2476 {
2477 FOR_EACH_EDGE (e, ei, bb->succs)
2478 if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2479 {
2480 basic_block orig = get_bb_original (e->dest);
2481 if (orig)
2482 redirect_edge_and_branch_force (e, orig);
2483 }
2484 continue;
2485 }
2486
2487 /* Redirect all other edges jumping to non-adjacent blocks back to the
2488 original code. */
2489 FOR_EACH_EDGE (e, ei, bb->succs)
2490 if (region_copy[i + 1] != e->dest)
2491 {
2492 basic_block orig = get_bb_original (e->dest);
2493 if (orig)
2494 redirect_edge_and_branch_force (e, orig);
2495 }
2496 }
2497
2498 if (total_count)
2499 {
2500 scale_bbs_frequencies_gcov_type (region, n_region,
2501 total_count - entry_count,
2502 total_count);
2503 scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2504 total_count);
2505 }
2506 else
2507 {
2508 scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2509 total_freq);
2510 scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2511 }
2512
2513 if (flag_checking)
2514 verify_jump_thread (region_copy, n_region);
2515
2516 /* Remove the last branch in the jump thread path. */
2517 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2518
2519 /* And fixup the flags on the single remaining edge. */
2520 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2521 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2522 fix_e->flags |= EDGE_FALLTHRU;
2523
2524 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2525
2526 if (e) {
2527 rescan_loop_exit (e, true, false);
2528 e->probability = REG_BR_PROB_BASE;
2529 e->count = region_copy[n_region - 1]->count;
2530 }
2531
2532 /* Redirect the entry and add the phi node arguments. */
2533 if (entry->dest == loop->header)
2534 mark_loop_for_removal (loop);
2535 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2536 gcc_assert (redirected != NULL);
2537 flush_pending_stmts (entry);
2538
2539 /* Add the other PHI node arguments. */
2540 add_phi_args_after_copy (region_copy, n_region, NULL);
2541
2542 if (free_region_copy)
2543 free (region_copy);
2544
2545 free_original_copy_tables ();
2546 return true;
2547 }
2548
2549 /* Return true when PATH is a valid jump-thread path. */
2550
2551 static bool
2552 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2553 {
2554 unsigned len = path->length ();
2555 bool multiway_branch = false;
2556
2557 /* Check that the path is connected and see if there's a multi-way
2558 branch on the path. */
2559 for (unsigned int j = 0; j < len - 1; j++)
2560 {
2561 if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2562 return false;
2563 gimple *last = last_stmt ((*path)[j]->e->dest);
2564 multiway_branch |= (last && gimple_code (last) == GIMPLE_SWITCH);
2565 }
2566
2567 /* If we are trying to thread the loop latch to a block that does
2568 not dominate the loop latch, then that will create an irreducible
2569 loop. We avoid that unless the jump thread has a multi-way
2570 branch, in which case we have deemed it worth losing other
2571 loop optimizations later if we can eliminate the multi-way branch. */
2572 edge e = (*path)[0]->e;
2573 struct loop *loop = e->dest->loop_father;
2574 if (!multiway_branch
2575 && loop->latch
2576 && loop_latch_edge (loop) == e
2577 && (determine_bb_domination_status (loop, path->last ()->e->dest)
2578 == DOMST_NONDOMINATING))
2579 return false;
2580
2581 return true;
2582 }
2583
2584 /* Remove any queued jump threads that include edge E.
2585
2586 We don't actually remove them here, just record the edges into ax
2587 hash table. That way we can do the search once per iteration of
2588 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2589
2590 void
2591 remove_jump_threads_including (edge_def *e)
2592 {
2593 if (!paths.exists ())
2594 return;
2595
2596 if (!removed_edges)
2597 removed_edges = new hash_table<struct removed_edges> (17);
2598
2599 edge *slot = removed_edges->find_slot (e, INSERT);
2600 *slot = e;
2601 }
2602
2603 /* Walk through all blocks and thread incoming edges to the appropriate
2604 outgoing edge for each edge pair recorded in THREADED_EDGES.
2605
2606 It is the caller's responsibility to fix the dominance information
2607 and rewrite duplicated SSA_NAMEs back into SSA form.
2608
2609 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2610 loop headers if it does not simplify the loop.
2611
2612 Returns true if one or more edges were threaded, false otherwise. */
2613
2614 bool
2615 thread_through_all_blocks (bool may_peel_loop_headers)
2616 {
2617 bool retval = false;
2618 unsigned int i;
2619 bitmap_iterator bi;
2620 bitmap threaded_blocks;
2621 struct loop *loop;
2622
2623 if (!paths.exists ())
2624 {
2625 retval = false;
2626 goto out;
2627 }
2628
2629 threaded_blocks = BITMAP_ALLOC (NULL);
2630 memset (&thread_stats, 0, sizeof (thread_stats));
2631
2632 /* Remove any paths that referenced removed edges. */
2633 if (removed_edges)
2634 for (i = 0; i < paths.length (); )
2635 {
2636 unsigned int j;
2637 vec<jump_thread_edge *> *path = paths[i];
2638
2639 for (j = 0; j < path->length (); j++)
2640 {
2641 edge e = (*path)[j]->e;
2642 if (removed_edges->find_slot (e, NO_INSERT))
2643 break;
2644 }
2645
2646 if (j != path->length ())
2647 {
2648 delete_jump_thread_path (path);
2649 paths.unordered_remove (i);
2650 continue;
2651 }
2652 i++;
2653 }
2654
2655 /* Jump-thread all FSM threads before other jump-threads. */
2656 for (i = 0; i < paths.length ();)
2657 {
2658 vec<jump_thread_edge *> *path = paths[i];
2659 edge entry = (*path)[0]->e;
2660
2661 /* Only code-generate FSM jump-threads in this loop. */
2662 if ((*path)[0]->type != EDGE_FSM_THREAD)
2663 {
2664 i++;
2665 continue;
2666 }
2667
2668 /* Do not jump-thread twice from the same block. */
2669 if (bitmap_bit_p (threaded_blocks, entry->src->index)
2670 /* Verify that the jump thread path is still valid: a
2671 previous jump-thread may have changed the CFG, and
2672 invalidated the current path or the requested jump
2673 thread might create irreducible loops which should
2674 generally be avoided. */
2675 || !valid_jump_thread_path (path))
2676 {
2677 /* Remove invalid FSM jump-thread paths. */
2678 delete_jump_thread_path (path);
2679 paths.unordered_remove (i);
2680 continue;
2681 }
2682
2683 unsigned len = path->length ();
2684 edge exit = (*path)[len - 1]->e;
2685 basic_block *region = XNEWVEC (basic_block, len - 1);
2686
2687 for (unsigned int j = 0; j < len - 1; j++)
2688 region[j] = (*path)[j]->e->dest;
2689
2690 if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2691 {
2692 /* We do not update dominance info. */
2693 free_dominance_info (CDI_DOMINATORS);
2694 bitmap_set_bit (threaded_blocks, entry->src->index);
2695 retval = true;
2696 thread_stats.num_threaded_edges++;
2697 }
2698
2699 delete_jump_thread_path (path);
2700 paths.unordered_remove (i);
2701 }
2702
2703 /* Remove from PATHS all the jump-threads starting with an edge already
2704 jump-threaded. */
2705 for (i = 0; i < paths.length ();)
2706 {
2707 vec<jump_thread_edge *> *path = paths[i];
2708 edge entry = (*path)[0]->e;
2709
2710 /* Do not jump-thread twice from the same block. */
2711 if (bitmap_bit_p (threaded_blocks, entry->src->index))
2712 {
2713 delete_jump_thread_path (path);
2714 paths.unordered_remove (i);
2715 }
2716 else
2717 i++;
2718 }
2719
2720 bitmap_clear (threaded_blocks);
2721
2722 mark_threaded_blocks (threaded_blocks);
2723
2724 initialize_original_copy_tables ();
2725
2726 /* First perform the threading requests that do not affect
2727 loop structure. */
2728 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2729 {
2730 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2731
2732 if (EDGE_COUNT (bb->preds) > 0)
2733 retval |= thread_block (bb, true);
2734 }
2735
2736 /* Then perform the threading through loop headers. We start with the
2737 innermost loop, so that the changes in cfg we perform won't affect
2738 further threading. */
2739 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2740 {
2741 if (!loop->header
2742 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2743 continue;
2744
2745 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2746 }
2747
2748 /* Any jump threading paths that are still attached to edges at this
2749 point must be one of two cases.
2750
2751 First, we could have a jump threading path which went from outside
2752 a loop to inside a loop that was ignored because a prior jump thread
2753 across a backedge was realized (which indirectly causes the loop
2754 above to ignore the latter thread). We can detect these because the
2755 loop structures will be different and we do not currently try to
2756 optimize this case.
2757
2758 Second, we could be threading across a backedge to a point within the
2759 same loop. This occurrs for the FSA/FSM optimization and we would
2760 like to optimize it. However, we have to be very careful as this
2761 may completely scramble the loop structures, with the result being
2762 irreducible loops causing us to throw away our loop structure.
2763
2764 As a compromise for the latter case, if the thread path ends in
2765 a block where the last statement is a multiway branch, then go
2766 ahead and thread it, else ignore it. */
2767 basic_block bb;
2768 edge e;
2769 FOR_EACH_BB_FN (bb, cfun)
2770 {
2771 /* If we do end up threading here, we can remove elements from
2772 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2773 for (edge_iterator ei = ei_start (bb->preds);
2774 (e = ei_safe_edge (ei));)
2775 if (e->aux)
2776 {
2777 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2778
2779 /* Case 1, threading from outside to inside the loop
2780 after we'd already threaded through the header. */
2781 if ((*path)[0]->e->dest->loop_father
2782 != path->last ()->e->src->loop_father)
2783 {
2784 delete_jump_thread_path (path);
2785 e->aux = NULL;
2786 ei_next (&ei);
2787 }
2788 else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2789 {
2790 /* The code to thread through loop headers may have
2791 split a block with jump threads attached to it.
2792
2793 We can identify this with a disjoint jump threading
2794 path. If found, just remove it. */
2795 for (unsigned int i = 0; i < path->length () - 1; i++)
2796 if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2797 {
2798 delete_jump_thread_path (path);
2799 e->aux = NULL;
2800 ei_next (&ei);
2801 break;
2802 }
2803
2804 /* Our path is still valid, thread it. */
2805 if (e->aux)
2806 {
2807 if (thread_block ((*path)[0]->e->dest, false))
2808 e->aux = NULL;
2809 else
2810 {
2811 delete_jump_thread_path (path);
2812 e->aux = NULL;
2813 ei_next (&ei);
2814 }
2815 }
2816 }
2817 else
2818 {
2819 delete_jump_thread_path (path);
2820 e->aux = NULL;
2821 ei_next (&ei);
2822 }
2823 }
2824 else
2825 ei_next (&ei);
2826 }
2827
2828 statistics_counter_event (cfun, "Jumps threaded",
2829 thread_stats.num_threaded_edges);
2830
2831 free_original_copy_tables ();
2832
2833 BITMAP_FREE (threaded_blocks);
2834 threaded_blocks = NULL;
2835 paths.release ();
2836
2837 if (retval)
2838 loops_state_set (LOOPS_NEED_FIXUP);
2839
2840 out:
2841 delete removed_edges;
2842 removed_edges = NULL;
2843 return retval;
2844 }
2845
2846 /* Delete the jump threading path PATH. We have to explcitly delete
2847 each entry in the vector, then the container. */
2848
2849 void
2850 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2851 {
2852 for (unsigned int i = 0; i < path->length (); i++)
2853 delete (*path)[i];
2854 path->release();
2855 delete path;
2856 }
2857
2858 /* Register a jump threading opportunity. We queue up all the jump
2859 threading opportunities discovered by a pass and update the CFG
2860 and SSA form all at once.
2861
2862 E is the edge we can thread, E2 is the new target edge, i.e., we
2863 are effectively recording that E->dest can be changed to E2->dest
2864 after fixing the SSA graph. */
2865
2866 void
2867 register_jump_thread (vec<jump_thread_edge *> *path)
2868 {
2869 if (!dbg_cnt (registered_jump_thread))
2870 {
2871 delete_jump_thread_path (path);
2872 return;
2873 }
2874
2875 /* First make sure there are no NULL outgoing edges on the jump threading
2876 path. That can happen for jumping to a constant address. */
2877 for (unsigned int i = 0; i < path->length (); i++)
2878 if ((*path)[i]->e == NULL)
2879 {
2880 if (dump_file && (dump_flags & TDF_DETAILS))
2881 {
2882 fprintf (dump_file,
2883 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2884 dump_jump_thread_path (dump_file, *path, false);
2885 }
2886
2887 delete_jump_thread_path (path);
2888 return;
2889 }
2890
2891 if (dump_file && (dump_flags & TDF_DETAILS))
2892 dump_jump_thread_path (dump_file, *path, true);
2893
2894 if (!paths.exists ())
2895 paths.create (5);
2896
2897 paths.safe_push (path);
2898 }