]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-threadupdate.c
reorg.c: use vec<rtx_insn *> instead of rtx_insn_list for the delay insn list
[thirdparty/gcc.git] / gcc / tree-ssa-threadupdate.c
CommitLineData
a8046f60 1/* Thread edges through blocks and update the control flow and SSA graphs.
d353bf18 2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
a8046f60 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8c4c00c1 8the Free Software Foundation; either version 3, or (at your option)
a8046f60 9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
a8046f60 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
b20a8bb4 23#include "alias.h"
9ef16211 24#include "backend.h"
d040a5b0 25#include "cfghooks.h"
a8046f60 26#include "tree.h"
9ef16211 27#include "gimple.h"
28#include "hard-reg-set.h"
29#include "ssa.h"
30#include "options.h"
b20a8bb4 31#include "fold-const.h"
a8046f60 32#include "flags.h"
94ea8568 33#include "cfganal.h"
bc61cadb 34#include "internal-fn.h"
dcf1a1ec 35#include "gimple-iterator.h"
69ee5dbb 36#include "tree-ssa.h"
0c5b289a 37#include "tree-ssa-threadupdate.h"
b9ed1410 38#include "dumpfile.h"
388d1fc1 39#include "cfgloop.h"
a3724f9d 40#include "dbgcnt.h"
ab596744 41#include "tree-cfg.h"
42#include "tree-pass.h"
a8046f60 43
44/* Given a block B, update the CFG and SSA graph to reflect redirecting
45 one or more in-edges to B to instead reach the destination of an
46 out-edge from B while preserving any side effects in B.
47
0c6d8c36 48 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
a8046f60 49 side effects of executing B.
50
51 1. Make a copy of B (including its outgoing edges and statements). Call
52 the copy B'. Note B' has no incoming edges or PHIs at this time.
53
54 2. Remove the control statement at the end of B' and all outgoing edges
55 except B'->C.
56
57 3. Add a new argument to each PHI in C with the same value as the existing
58 argument associated with edge B->C. Associate the new PHI arguments
59 with the edge B'->C.
60
61 4. For each PHI in B, find or create a PHI in B' with an identical
7a635e9c 62 PHI_RESULT. Add an argument to the PHI in B' which has the same
a8046f60 63 value as the PHI in B associated with the edge A->B. Associate
64 the new argument in the PHI in B' with the edge A->B.
65
66 5. Change the edge A->B to A->B'.
67
68 5a. This automatically deletes any PHI arguments associated with the
69 edge A->B in B.
70
71 5b. This automatically associates each new argument added in step 4
72 with the edge A->B'.
73
74 6. Repeat for other incoming edges into B.
75
76 7. Put the duplicated resources in B and all the B' blocks into SSA form.
77
78 Note that block duplication can be minimized by first collecting the
f0b5f617 79 set of unique destination blocks that the incoming edges should
255a8494 80 be threaded to.
81
afe75331 82 We reduce the number of edges and statements we create by not copying all
83 the outgoing edges and the control statement in step #1. We instead create
84 a template block without the outgoing edges and duplicate the template.
a8046f60 85
afe75331 86 Another case this code handles is threading through a "joiner" block. In
87 this case, we do not know the destination of the joiner block, but one
88 of the outgoing edges from the joiner block leads to a threadable path. This
89 case largely works as outlined above, except the duplicate of the joiner
90 block still contains a full set of outgoing edges and its control statement.
91 We just redirect one of its outgoing edges to our jump threading path. */
778182c1 92
93
94/* Steps #5 and #6 of the above algorithm are best implemented by walking
95 all the incoming edges which thread to the same destination edge at
96 the same time. That avoids lots of table lookups to get information
97 for the destination edge.
98
99 To realize that implementation we create a list of incoming edges
100 which thread to the same outgoing edge. Thus to implement steps
101 #5 and #6 we traverse our hash table of outgoing edge information.
102 For each entry we walk the list of incoming edges which thread to
103 the current outgoing edge. */
104
105struct el
106{
107 edge e;
108 struct el *next;
109};
a8046f60 110
111/* Main data structure recording information regarding B's duplicate
112 blocks. */
113
778182c1 114/* We need to efficiently record the unique thread destinations of this
115 block and specific information associated with those destinations. We
116 may have many incoming edges threaded to the same outgoing edge. This
c5d4a10b 117 can be naturally implemented with a hash table. */
778182c1 118
298e7f9a 119struct redirection_data : free_ptr_hash<redirection_data>
a8046f60 120{
11af02d8 121 /* We support wiring up two block duplicates in a jump threading path.
122
123 One is a normal block copy where we remove the control statement
124 and wire up its single remaining outgoing edge to the thread path.
125
126 The other is a joiner block where we leave the control statement
1b83778e 127 in place, but wire one of the outgoing edges to a thread path.
11af02d8 128
129 In theory we could have multiple block duplicates in a jump
130 threading path, but I haven't tried that.
131
132 The duplicate blocks appear in this array in the same order in
133 which they appear in the jump thread path. */
134 basic_block dup_blocks[2];
a8046f60 135
5fe6149c 136 /* The jump threading path. */
137 vec<jump_thread_edge *> *path;
778182c1 138
5fe6149c 139 /* A list of incoming edges which we want to thread to the
140 same path. */
778182c1 141 struct el *incoming_edges;
494bbaae 142
143 /* hash_table support. */
9969c043 144 static inline hashval_t hash (const redirection_data *);
145 static inline int equal (const redirection_data *, const redirection_data *);
a8046f60 146};
147
b93ba654 148/* Dump a jump threading path, including annotations about each
149 edge in the path. */
150
151static void
152dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
153 bool registering)
154{
155 fprintf (dump_file,
ded1c768 156 " %s%s jump thread: (%d, %d) incoming edge; ",
b93ba654 157 (registering ? "Registering" : "Cancelling"),
ded1c768 158 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
b93ba654 159 path[0]->e->src->index, path[0]->e->dest->index);
160
161 for (unsigned int i = 1; i < path.length (); i++)
162 {
163 /* We can get paths with a NULL edge when the final destination
164 of a jump thread turns out to be a constant address. We dump
165 those paths when debugging, so we have to be prepared for that
166 possibility here. */
167 if (path[i]->e == NULL)
168 continue;
169
170 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
171 fprintf (dump_file, " (%d, %d) joiner; ",
172 path[i]->e->src->index, path[i]->e->dest->index);
173 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
174 fprintf (dump_file, " (%d, %d) normal;",
175 path[i]->e->src->index, path[i]->e->dest->index);
176 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
177 fprintf (dump_file, " (%d, %d) nocopy;",
178 path[i]->e->src->index, path[i]->e->dest->index);
c5baf1e1 179 if (path[0]->type == EDGE_FSM_THREAD)
180 fprintf (dump_file, " (%d, %d) ",
181 path[i]->e->src->index, path[i]->e->dest->index);
b93ba654 182 }
183 fputc ('\n', dump_file);
184}
185
5fe6149c 186/* Simple hashing function. For any given incoming edge E, we're going
187 to be most concerned with the final destination of its jump thread
188 path. So hash on the block index of the final edge in the path. */
189
494bbaae 190inline hashval_t
9969c043 191redirection_data::hash (const redirection_data *p)
494bbaae 192{
5fe6149c 193 vec<jump_thread_edge *> *path = p->path;
194 return path->last ()->e->dest->index;
494bbaae 195}
196
5fe6149c 197/* Given two hash table entries, return true if they have the same
198 jump threading path. */
494bbaae 199inline int
9969c043 200redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
494bbaae 201{
5fe6149c 202 vec<jump_thread_edge *> *path1 = p1->path;
203 vec<jump_thread_edge *> *path2 = p2->path;
204
205 if (path1->length () != path2->length ())
206 return false;
207
208 for (unsigned int i = 1; i < path1->length (); i++)
209 {
210 if ((*path1)[i]->type != (*path2)[i]->type
211 || (*path1)[i]->e != (*path2)[i]->e)
212 return false;
213 }
214
215 return true;
494bbaae 216}
217
778182c1 218/* Data structure of information to pass to hash table traversal routines. */
2b15d2ba 219struct ssa_local_info_t
778182c1 220{
221 /* The current block we are working on. */
222 basic_block bb;
223
11af02d8 224 /* We only create a template block for the first duplicated block in a
225 jump threading path as we may need many duplicates of that block.
226
227 The second duplicate block in a path is specific to that path. Creating
228 and sharing a template for that block is considerably more difficult. */
778182c1 229 basic_block template_block;
388d1fc1 230
231 /* TRUE if we thread one or more jumps, FALSE otherwise. */
232 bool jumps_threaded;
30e432bb 233
234 /* Blocks duplicated for the thread. */
235 bitmap duplicate_blocks;
778182c1 236};
a3d0fd80 237
3cebc9d2 238/* Passes which use the jump threading code register jump threading
239 opportunities as they are discovered. We keep the registered
240 jump threading opportunities in this vector as edge pairs
241 (original_edge, target_edge). */
f2981b08 242static vec<vec<jump_thread_edge *> *> paths;
3cebc9d2 243
eb31063a 244/* When we start updating the CFG for threading, data necessary for jump
245 threading is attached to the AUX field for the incoming edge. Use these
246 macros to access the underlying structure attached to the AUX field. */
f2981b08 247#define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
3cebc9d2 248
5236b8bb 249/* Jump threading statistics. */
250
251struct thread_stats_d
252{
253 unsigned long num_threaded_edges;
254};
255
256struct thread_stats_d thread_stats;
257
258
f582bb6c 259/* Remove the last statement in block BB if it is a control statement
260 Also remove all outgoing edges except the edge which reaches DEST_BB.
261 If DEST_BB is NULL, then remove all outgoing edges. */
a8046f60 262
f1344f45 263void
f582bb6c 264remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
a8046f60 265{
75a70cf9 266 gimple_stmt_iterator gsi;
cd665a06 267 edge e;
268 edge_iterator ei;
a8046f60 269
75a70cf9 270 gsi = gsi_last_bb (bb);
a8046f60 271
f582bb6c 272 /* If the duplicate ends with a control statement, then remove it.
a8046f60 273
f582bb6c 274 Note that if we are duplicating the template block rather than the
275 original basic block, then the duplicate might not have any real
276 statements in it. */
75a70cf9 277 if (!gsi_end_p (gsi)
278 && gsi_stmt (gsi)
279 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
280 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
282 gsi_remove (&gsi, true);
a8046f60 283
cd665a06 284 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
a8046f60 285 {
a8046f60 286 if (e->dest != dest_bb)
0891994d 287 remove_edge (e);
cd665a06 288 else
289 ei_next (&ei);
a8046f60 290 }
a8046f60 291}
292
11af02d8 293/* Create a duplicate of BB. Record the duplicate block in an array
a7ee7309 294 indexed by COUNT stored in RD. */
a8046f60 295
296static void
11af02d8 297create_block_for_threading (basic_block bb,
298 struct redirection_data *rd,
30e432bb 299 unsigned int count,
a7ee7309 300 bitmap *duplicate_blocks)
a8046f60 301{
eb31063a 302 edge_iterator ei;
303 edge e;
304
a8046f60 305 /* We can use the generic block duplication code and simply remove
306 the stuff we do not need. */
11af02d8 307 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
a8046f60 308
11af02d8 309 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
eb31063a 310 e->aux = NULL;
311
615dd397 312 /* Zero out the profile, since the block is unreachable for now. */
11af02d8 313 rd->dup_blocks[count]->frequency = 0;
314 rd->dup_blocks[count]->count = 0;
30e432bb 315 if (duplicate_blocks)
316 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
a8046f60 317}
318
2b15d2ba 319/* Main data structure to hold information for duplicates of BB. */
320
c1f445d2 321static hash_table<redirection_data> *redirection_data;
2b15d2ba 322
778182c1 323/* Given an outgoing edge E lookup and return its entry in our hash table.
324
325 If INSERT is true, then we insert the entry into the hash table if
326 it is not already present. INCOMING_EDGE is added to the list of incoming
327 edges associated with E in the hash table. */
328
329static struct redirection_data *
da81e0c5 330lookup_redirection_data (edge e, enum insert_option insert)
778182c1 331{
2b15d2ba 332 struct redirection_data **slot;
778182c1 333 struct redirection_data *elt;
f2981b08 334 vec<jump_thread_edge *> *path = THREAD_PATH (e);
778182c1 335
336 /* Build a hash table element so we can see if E is already
337 in the table. */
4c36ffe6 338 elt = XNEW (struct redirection_data);
5fe6149c 339 elt->path = path;
11af02d8 340 elt->dup_blocks[0] = NULL;
341 elt->dup_blocks[1] = NULL;
778182c1 342 elt->incoming_edges = NULL;
343
c1f445d2 344 slot = redirection_data->find_slot (elt, insert);
778182c1 345
346 /* This will only happen if INSERT is false and the entry is not
347 in the hash table. */
348 if (slot == NULL)
349 {
350 free (elt);
351 return NULL;
352 }
353
354 /* This will only happen if E was not in the hash table and
355 INSERT is true. */
356 if (*slot == NULL)
357 {
2b15d2ba 358 *slot = elt;
4c36ffe6 359 elt->incoming_edges = XNEW (struct el);
da81e0c5 360 elt->incoming_edges->e = e;
778182c1 361 elt->incoming_edges->next = NULL;
362 return elt;
363 }
364 /* E was in the hash table. */
365 else
366 {
367 /* Free ELT as we do not need it anymore, we will extract the
368 relevant entry from the hash table itself. */
369 free (elt);
370
371 /* Get the entry stored in the hash table. */
2b15d2ba 372 elt = *slot;
778182c1 373
374 /* If insertion was requested, then we need to add INCOMING_EDGE
375 to the list of incoming edges associated with E. */
376 if (insert)
377 {
559685be 378 struct el *el = XNEW (struct el);
778182c1 379 el->next = elt->incoming_edges;
da81e0c5 380 el->e = e;
778182c1 381 elt->incoming_edges = el;
382 }
383
384 return elt;
385 }
386}
387
fc54aba7 388/* Similar to copy_phi_args, except that the PHI arg exists, it just
389 does not have a value associated with it. */
390
391static void
392copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
393{
394 int src_idx = src_e->dest_idx;
395 int tgt_idx = tgt_e->dest_idx;
396
397 /* Iterate over each PHI in e->dest. */
1a91d914 398 for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
399 gsi2 = gsi_start_phis (tgt_e->dest);
fc54aba7 400 !gsi_end_p (gsi);
401 gsi_next (&gsi), gsi_next (&gsi2))
402 {
1a91d914 403 gphi *src_phi = gsi.phi ();
404 gphi *dest_phi = gsi2.phi ();
fc54aba7 405 tree val = gimple_phi_arg_def (src_phi, src_idx);
406 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
407
408 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
409 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
410 }
411}
412
1b83c31b 413/* Given ssa_name DEF, backtrack jump threading PATH from node IDX
414 to see if it has constant value in a flow sensitive manner. Set
415 LOCUS to location of the constant phi arg and return the value.
416 Return DEF directly if either PATH or idx is ZERO. */
417
418static tree
419get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
420 basic_block bb, int idx, source_location *locus)
421{
422 tree arg;
1a91d914 423 gphi *def_phi;
1b83c31b 424 basic_block def_bb;
425
426 if (path == NULL || idx == 0)
427 return def;
428
1a91d914 429 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
430 if (!def_phi)
1b83c31b 431 return def;
432
433 def_bb = gimple_bb (def_phi);
434 /* Don't propagate loop invariants into deeper loops. */
435 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
436 return def;
437
438 /* Backtrack jump threading path from IDX to see if def has constant
439 value. */
440 for (int j = idx - 1; j >= 0; j--)
441 {
442 edge e = (*path)[j]->e;
443 if (e->dest == def_bb)
444 {
445 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
446 if (is_gimple_min_invariant (arg))
447 {
448 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
449 return arg;
450 }
451 break;
452 }
453 }
454
455 return def;
456}
457
458/* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
459 Try to backtrack jump threading PATH from node IDX to see if the arg
460 has constant value, copy constant value instead of argument itself
461 if yes. */
da81e0c5 462
463static void
1b83c31b 464copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
465 vec<jump_thread_edge *> *path, int idx)
da81e0c5 466{
1a91d914 467 gphi_iterator gsi;
da81e0c5 468 int src_indx = src_e->dest_idx;
469
470 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
471 {
1a91d914 472 gphi *phi = gsi.phi ();
1b83c31b 473 tree def = gimple_phi_arg_def (phi, src_indx);
da81e0c5 474 source_location locus = gimple_phi_arg_location (phi, src_indx);
1b83c31b 475
476 if (TREE_CODE (def) == SSA_NAME
477 && !virtual_operand_p (gimple_phi_result (phi)))
478 def = get_value_locus_in_path (def, path, bb, idx, &locus);
479
480 add_phi_arg (phi, def, tgt_e, locus);
da81e0c5 481 }
482}
483
484/* We have recently made a copy of ORIG_BB, including its outgoing
485 edges. The copy is NEW_BB. Every PHI node in every direct successor of
486 ORIG_BB has a new argument associated with edge from NEW_BB to the
487 successor. Initialize the PHI argument so that it is equal to the PHI
1b83c31b 488 argument associated with the edge from ORIG_BB to the successor.
489 PATH and IDX are used to check if the new PHI argument has constant
490 value in a flow sensitive manner. */
da81e0c5 491
492static void
1b83c31b 493update_destination_phis (basic_block orig_bb, basic_block new_bb,
494 vec<jump_thread_edge *> *path, int idx)
da81e0c5 495{
496 edge_iterator ei;
497 edge e;
498
499 FOR_EACH_EDGE (e, ei, orig_bb->succs)
500 {
501 edge e2 = find_edge (new_bb, e->dest);
1b83c31b 502 copy_phi_args (e->dest, e, e2, path, idx);
da81e0c5 503 }
504}
505
778182c1 506/* Given a duplicate block and its single destination (both stored
507 in RD). Create an edge between the duplicate and its single
508 destination.
509
510 Add an additional argument to any PHI nodes at the single
1b83c31b 511 destination. IDX is the start node in jump threading path
512 we start to check to see if the new PHI argument has constant
513 value along the jump threading path. */
778182c1 514
515static void
42b013bc 516create_edge_and_update_destination_phis (struct redirection_data *rd,
1b83c31b 517 basic_block bb, int idx)
778182c1 518{
5fe6149c 519 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
778182c1 520
f9614b84 521 rescan_loop_exit (e, true, false);
421e19dd 522 e->probability = REG_BR_PROB_BASE;
42b013bc 523 e->count = bb->count;
eb31063a 524
e63988cc 525 /* We used to copy the thread path here. That was added in 2007
526 and dutifully updated through the representation changes in 2013.
527
528 In 2013 we added code to thread from an interior node through
529 the backedge to another interior node. That runs after the code
530 to thread through loop headers from outside the loop.
531
532 The latter may delete edges in the CFG, including those
533 which appeared in the jump threading path we copied here. Thus
534 we'd end up using a dangling pointer.
535
536 After reviewing the 2007/2011 code, I can't see how anything
537 depended on copying the AUX field and clearly copying the jump
538 threading path is problematical due to embedded edge pointers.
539 It has been removed. */
540 e->aux = NULL;
421e19dd 541
778182c1 542 /* If there are any PHI nodes at the destination of the outgoing edge
543 from the duplicate block, then we will need to add a new argument
544 to them. The argument should have the same value as the argument
545 associated with the outgoing edge stored in RD. */
1b83c31b 546 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
da81e0c5 547}
548
fc54aba7 549/* Look through PATH beginning at START and return TRUE if there are
550 any additional blocks that need to be duplicated. Otherwise,
551 return FALSE. */
552static bool
553any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
554 unsigned int start)
555{
556 for (unsigned int i = start + 1; i < path->length (); i++)
557 {
558 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
559 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
560 return true;
561 }
562 return false;
563}
564
30e432bb 565
566/* Compute the amount of profile count/frequency coming into the jump threading
567 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
568 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
569 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
570 identify blocks duplicated for jump threading, which have duplicated
571 edges that need to be ignored in the analysis. Return true if path contains
572 a joiner, false otherwise.
573
574 In the non-joiner case, this is straightforward - all the counts/frequency
575 flowing into the jump threading path should flow through the duplicated
576 block and out of the duplicated path.
577
578 In the joiner case, it is very tricky. Some of the counts flowing into
579 the original path go offpath at the joiner. The problem is that while
580 we know how much total count goes off-path in the original control flow,
581 we don't know how many of the counts corresponding to just the jump
582 threading path go offpath at the joiner.
583
584 For example, assume we have the following control flow and identified
585 jump threading paths:
586
9943b198 587 A B C
588 \ | /
589 Ea \ |Eb / Ec
590 \ | /
591 v v v
592 J <-- Joiner
593 / \
594 Eoff/ \Eon
595 / \
596 v v
597 Soff Son <--- Normal
598 /\
599 Ed/ \ Ee
600 / \
601 v v
602 D E
603
604 Jump threading paths: A -> J -> Son -> D (path 1)
605 C -> J -> Son -> E (path 2)
30e432bb 606
607 Note that the control flow could be more complicated:
608 - Each jump threading path may have more than one incoming edge. I.e. A and
609 Ea could represent multiple incoming blocks/edges that are included in
610 path 1.
611 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
612 before or after the "normal" copy block). These are not duplicated onto
613 the jump threading path, as they are single-successor.
614 - Any of the blocks along the path may have other incoming edges that
615 are not part of any jump threading path, but add profile counts along
616 the path.
617
618 In the aboe example, after all jump threading is complete, we will
619 end up with the following control flow:
620
9943b198 621 A B C
622 | | |
623 Ea| |Eb |Ec
624 | | |
625 v v v
626 Ja J Jc
627 / \ / \Eon' / \
628 Eona/ \ ---/---\-------- \Eonc
629 / \ / / \ \
630 v v v v v
631 Sona Soff Son Sonc
632 \ /\ /
633 \___________ / \ _____/
634 \ / \/
635 vv v
636 D E
30e432bb 637
638 The main issue to notice here is that when we are processing path 1
639 (A->J->Son->D) we need to figure out the outgoing edge weights to
640 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
641 sum of the incoming weights to D remain Ed. The problem with simply
642 assuming that Ja (and Jc when processing path 2) has the same outgoing
643 probabilities to its successors as the original block J, is that after
644 all paths are processed and other edges/counts removed (e.g. none
645 of Ec will reach D after processing path 2), we may end up with not
646 enough count flowing along duplicated edge Sona->D.
647
648 Therefore, in the case of a joiner, we keep track of all counts
649 coming in along the current path, as well as from predecessors not
650 on any jump threading path (Eb in the above example). While we
651 first assume that the duplicated Eona for Ja->Sona has the same
652 probability as the original, we later compensate for other jump
653 threading paths that may eliminate edges. We do that by keep track
654 of all counts coming into the original path that are not in a jump
655 thread (Eb in the above example, but as noted earlier, there could
656 be other predecessors incoming to the path at various points, such
657 as at Son). Call this cumulative non-path count coming into the path
658 before D as Enonpath. We then ensure that the count from Sona->D is as at
659 least as big as (Ed - Enonpath), but no bigger than the minimum
660 weight along the jump threading path. The probabilities of both the
661 original and duplicated joiner block J and Ja will be adjusted
662 accordingly after the updates. */
663
664static bool
665compute_path_counts (struct redirection_data *rd,
9943b198 666 ssa_local_info_t *local_info,
667 gcov_type *path_in_count_ptr,
668 gcov_type *path_out_count_ptr,
669 int *path_in_freq_ptr)
30e432bb 670{
671 edge e = rd->incoming_edges->e;
672 vec<jump_thread_edge *> *path = THREAD_PATH (e);
673 edge elast = path->last ()->e;
674 gcov_type nonpath_count = 0;
675 bool has_joiner = false;
676 gcov_type path_in_count = 0;
677 int path_in_freq = 0;
678
679 /* Start by accumulating incoming edge counts to the path's first bb
680 into a couple buckets:
9943b198 681 path_in_count: total count of incoming edges that flow into the
682 current path.
683 nonpath_count: total count of incoming edges that are not
684 flowing along *any* path. These are the counts
685 that will still flow along the original path after
686 all path duplication is done by potentially multiple
687 calls to this routine.
30e432bb 688 (any other incoming edge counts are for a different jump threading
689 path that will be handled by a later call to this routine.)
690 To make this easier, start by recording all incoming edges that flow into
691 the current path in a bitmap. We could add up the path's incoming edge
692 counts here, but we still need to walk all the first bb's incoming edges
693 below to add up the counts of the other edges not included in this jump
694 threading path. */
695 struct el *next, *el;
696 bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
697 for (el = rd->incoming_edges; el; el = next)
698 {
699 next = el->next;
700 bitmap_set_bit (in_edge_srcs, el->e->src->index);
701 }
702 edge ein;
703 edge_iterator ei;
704 FOR_EACH_EDGE (ein, ei, e->dest->preds)
705 {
706 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
707 /* Simply check the incoming edge src against the set captured above. */
708 if (ein_path
9943b198 709 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
710 {
711 /* It is necessary but not sufficient that the last path edges
712 are identical. There may be different paths that share the
713 same last path edge in the case where the last edge has a nocopy
714 source block. */
715 gcc_assert (ein_path->last ()->e == elast);
716 path_in_count += ein->count;
717 path_in_freq += EDGE_FREQUENCY (ein);
718 }
30e432bb 719 else if (!ein_path)
9943b198 720 {
721 /* Keep track of the incoming edges that are not on any jump-threading
722 path. These counts will still flow out of original path after all
723 jump threading is complete. */
724 nonpath_count += ein->count;
725 }
30e432bb 726 }
664dd751 727
728 /* This is needed due to insane incoming frequencies. */
729 if (path_in_freq > BB_FREQ_MAX)
730 path_in_freq = BB_FREQ_MAX;
731
30e432bb 732 BITMAP_FREE (in_edge_srcs);
733
734 /* Now compute the fraction of the total count coming into the first
735 path bb that is from the current threading path. */
736 gcov_type total_count = e->dest->count;
737 /* Handle incoming profile insanities. */
738 if (total_count < path_in_count)
739 path_in_count = total_count;
740 int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
741
742 /* Walk the entire path to do some more computation in order to estimate
743 how much of the path_in_count will flow out of the duplicated threading
744 path. In the non-joiner case this is straightforward (it should be
745 the same as path_in_count, although we will handle incoming profile
746 insanities by setting it equal to the minimum count along the path).
747
748 In the joiner case, we need to estimate how much of the path_in_count
749 will stay on the threading path after the joiner's conditional branch.
750 We don't really know for sure how much of the counts
751 associated with this path go to each successor of the joiner, but we'll
752 estimate based on the fraction of the total count coming into the path
753 bb was from the threading paths (computed above in onpath_scale).
754 Afterwards, we will need to do some fixup to account for other threading
755 paths and possible profile insanities.
756
757 In order to estimate the joiner case's counts we also need to update
758 nonpath_count with any additional counts coming into the path. Other
759 blocks along the path may have additional predecessors from outside
760 the path. */
761 gcov_type path_out_count = path_in_count;
762 gcov_type min_path_count = path_in_count;
763 for (unsigned int i = 1; i < path->length (); i++)
764 {
765 edge epath = (*path)[i]->e;
766 gcov_type cur_count = epath->count;
767 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
9943b198 768 {
769 has_joiner = true;
770 cur_count = apply_probability (cur_count, onpath_scale);
771 }
30e432bb 772 /* In the joiner case we need to update nonpath_count for any edges
9943b198 773 coming into the path that will contribute to the count flowing
774 into the path successor. */
30e432bb 775 if (has_joiner && epath != elast)
776 {
9943b198 777 /* Look for other incoming edges after joiner. */
778 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
779 {
780 if (ein != epath
781 /* Ignore in edges from blocks we have duplicated for a
782 threading path, which have duplicated edge counts until
783 they are redirected by an invocation of this routine. */
784 && !bitmap_bit_p (local_info->duplicate_blocks,
785 ein->src->index))
786 nonpath_count += ein->count;
787 }
30e432bb 788 }
789 if (cur_count < path_out_count)
9943b198 790 path_out_count = cur_count;
30e432bb 791 if (epath->count < min_path_count)
9943b198 792 min_path_count = epath->count;
30e432bb 793 }
794
795 /* We computed path_out_count above assuming that this path targeted
796 the joiner's on-path successor with the same likelihood as it
797 reached the joiner. However, other thread paths through the joiner
798 may take a different path through the normal copy source block
799 (i.e. they have a different elast), meaning that they do not
800 contribute any counts to this path's elast. As a result, it may
801 turn out that this path must have more count flowing to the on-path
802 successor of the joiner. Essentially, all of this path's elast
803 count must be contributed by this path and any nonpath counts
804 (since any path through the joiner with a different elast will not
805 include a copy of this elast in its duplicated path).
806 So ensure that this path's path_out_count is at least the
807 difference between elast->count and nonpath_count. Otherwise the edge
808 counts after threading will not be sane. */
809 if (has_joiner && path_out_count < elast->count - nonpath_count)
810 {
811 path_out_count = elast->count - nonpath_count;
812 /* But neither can we go above the minimum count along the path
813 we are duplicating. This can be an issue due to profile
814 insanities coming in to this pass. */
815 if (path_out_count > min_path_count)
816 path_out_count = min_path_count;
817 }
818
819 *path_in_count_ptr = path_in_count;
820 *path_out_count_ptr = path_out_count;
821 *path_in_freq_ptr = path_in_freq;
822 return has_joiner;
823}
824
825
826/* Update the counts and frequencies for both an original path
827 edge EPATH and its duplicate EDUP. The duplicate source block
828 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
829 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
830static void
831update_profile (edge epath, edge edup, gcov_type path_in_count,
9943b198 832 gcov_type path_out_count, int path_in_freq)
30e432bb 833{
834
835 /* First update the duplicated block's count / frequency. */
836 if (edup)
837 {
838 basic_block dup_block = edup->src;
839 gcc_assert (dup_block->count == 0);
840 gcc_assert (dup_block->frequency == 0);
841 dup_block->count = path_in_count;
842 dup_block->frequency = path_in_freq;
843 }
844
845 /* Now update the original block's count and frequency in the
846 opposite manner - remove the counts/freq that will flow
847 into the duplicated block. Handle underflow due to precision/
848 rounding issues. */
849 epath->src->count -= path_in_count;
850 if (epath->src->count < 0)
851 epath->src->count = 0;
852 epath->src->frequency -= path_in_freq;
853 if (epath->src->frequency < 0)
854 epath->src->frequency = 0;
855
856 /* Next update this path edge's original and duplicated counts. We know
857 that the duplicated path will have path_out_count flowing
858 out of it (in the joiner case this is the count along the duplicated path
859 out of the duplicated joiner). This count can then be removed from the
860 original path edge. */
861 if (edup)
862 edup->count = path_out_count;
863 epath->count -= path_out_count;
864 gcc_assert (epath->count >= 0);
865}
866
867
868/* The duplicate and original joiner blocks may end up with different
869 probabilities (different from both the original and from each other).
870 Recompute the probabilities here once we have updated the edge
871 counts and frequencies. */
872
873static void
874recompute_probabilities (basic_block bb)
875{
876 edge esucc;
877 edge_iterator ei;
878 FOR_EACH_EDGE (esucc, ei, bb->succs)
879 {
bdd367a0 880 if (!bb->count)
9943b198 881 continue;
bdd367a0 882
883 /* Prevent overflow computation due to insane profiles. */
884 if (esucc->count < bb->count)
9943b198 885 esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
886 bb->count);
bdd367a0 887 else
9943b198 888 /* Can happen with missing/guessed probabilities, since we
889 may determine that more is flowing along duplicated
890 path than joiner succ probabilities allowed.
891 Counts and freqs will be insane after jump threading,
892 at least make sure probability is sane or we will
893 get a flow verification error.
894 Not much we can do to make counts/freqs sane without
895 redoing the profile estimation. */
896 esucc->probability = REG_BR_PROB_BASE;
30e432bb 897 }
898}
899
900
901/* Update the counts of the original and duplicated edges from a joiner
902 that go off path, given that we have already determined that the
903 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
904 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
905 edge from joiner is EPATH. */
906
907static void
908update_joiner_offpath_counts (edge epath, basic_block dup_bb,
9943b198 909 gcov_type path_in_count,
910 gcov_type path_out_count)
30e432bb 911{
912 /* Compute the count that currently flows off path from the joiner.
913 In other words, the total count of joiner's out edges other than
914 epath. Compute this by walking the successors instead of
915 subtracting epath's count from the joiner bb count, since there
916 are sometimes slight insanities where the total out edge count is
917 larger than the bb count (possibly due to rounding/truncation
918 errors). */
919 gcov_type total_orig_off_path_count = 0;
920 edge enonpath;
921 edge_iterator ei;
922 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
923 {
924 if (enonpath == epath)
9943b198 925 continue;
30e432bb 926 total_orig_off_path_count += enonpath->count;
927 }
928
929 /* For the path that we are duplicating, the amount that will flow
930 off path from the duplicated joiner is the delta between the
931 path's cumulative in count and the portion of that count we
932 estimated above as flowing from the joiner along the duplicated
933 path. */
934 gcov_type total_dup_off_path_count = path_in_count - path_out_count;
935
936 /* Now do the actual updates of the off-path edges. */
937 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
938 {
939 /* Look for edges going off of the threading path. */
940 if (enonpath == epath)
9943b198 941 continue;
30e432bb 942
943 /* Find the corresponding edge out of the duplicated joiner. */
944 edge enonpathdup = find_edge (dup_bb, enonpath->dest);
945 gcc_assert (enonpathdup);
946
947 /* We can't use the original probability of the joiner's out
9943b198 948 edges, since the probabilities of the original branch
949 and the duplicated branches may vary after all threading is
950 complete. But apportion the duplicated joiner's off-path
951 total edge count computed earlier (total_dup_off_path_count)
952 among the duplicated off-path edges based on their original
953 ratio to the full off-path count (total_orig_off_path_count).
954 */
30e432bb 955 int scale = GCOV_COMPUTE_SCALE (enonpath->count,
9943b198 956 total_orig_off_path_count);
30e432bb 957 /* Give the duplicated offpath edge a portion of the duplicated
9943b198 958 total. */
30e432bb 959 enonpathdup->count = apply_scale (scale,
9943b198 960 total_dup_off_path_count);
30e432bb 961 /* Now update the original offpath edge count, handling underflow
9943b198 962 due to rounding errors. */
30e432bb 963 enonpath->count -= enonpathdup->count;
964 if (enonpath->count < 0)
9943b198 965 enonpath->count = 0;
30e432bb 966 }
967}
968
969
f1ce4e72 970/* Check if the paths through RD all have estimated frequencies but zero
971 profile counts. This is more accurate than checking the entry block
972 for a zero profile count, since profile insanities sometimes creep in. */
973
974static bool
975estimated_freqs_path (struct redirection_data *rd)
976{
977 edge e = rd->incoming_edges->e;
978 vec<jump_thread_edge *> *path = THREAD_PATH (e);
979 edge ein;
980 edge_iterator ei;
981 bool non_zero_freq = false;
982 FOR_EACH_EDGE (ein, ei, e->dest->preds)
983 {
984 if (ein->count)
9943b198 985 return false;
f1ce4e72 986 non_zero_freq |= ein->src->frequency != 0;
987 }
988
989 for (unsigned int i = 1; i < path->length (); i++)
990 {
991 edge epath = (*path)[i]->e;
992 if (epath->src->count)
9943b198 993 return false;
f1ce4e72 994 non_zero_freq |= epath->src->frequency != 0;
995 edge esucc;
996 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
9943b198 997 {
998 if (esucc->count)
999 return false;
1000 non_zero_freq |= esucc->src->frequency != 0;
1001 }
f1ce4e72 1002 }
1003 return non_zero_freq;
1004}
1005
1006
30e432bb 1007/* Invoked for routines that have guessed frequencies and no profile
1008 counts to record the block and edge frequencies for paths through RD
1009 in the profile count fields of those blocks and edges. This is because
1010 ssa_fix_duplicate_block_edges incrementally updates the block and
1011 edge counts as edges are redirected, and it is difficult to do that
1012 for edge frequencies which are computed on the fly from the source
1013 block frequency and probability. When a block frequency is updated
1014 its outgoing edge frequencies are affected and become difficult to
1015 adjust. */
1016
1017static void
1018freqs_to_counts_path (struct redirection_data *rd)
1019{
1020 edge e = rd->incoming_edges->e;
1021 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1022 edge ein;
1023 edge_iterator ei;
1024 FOR_EACH_EDGE (ein, ei, e->dest->preds)
e8038c32 1025 {
1026 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
9943b198 1027 errors applying the probability when the frequencies are very
1028 small. */
e8038c32 1029 ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
9943b198 1030 ein->probability);
e8038c32 1031 }
30e432bb 1032
1033 for (unsigned int i = 1; i < path->length (); i++)
1034 {
1035 edge epath = (*path)[i]->e;
30e432bb 1036 edge esucc;
e8038c32 1037 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
9943b198 1038 errors applying the edge probability when the frequencies are very
1039 small. */
e8038c32 1040 epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
30e432bb 1041 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
9943b198 1042 esucc->count = apply_probability (esucc->src->count,
1043 esucc->probability);
30e432bb 1044 }
1045}
1046
1047
1048/* For routines that have guessed frequencies and no profile counts, where we
1049 used freqs_to_counts_path to record block and edge frequencies for paths
1050 through RD, we clear the counts after completing all updates for RD.
1051 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1052 but the block frequencies and edge probabilities were updated as well,
1053 so we can simply clear the count fields. */
1054
1055static void
1056clear_counts_path (struct redirection_data *rd)
1057{
1058 edge e = rd->incoming_edges->e;
1059 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1060 edge ein, esucc;
1061 edge_iterator ei;
1062 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1063 ein->count = 0;
1064
1065 /* First clear counts along original path. */
1066 for (unsigned int i = 1; i < path->length (); i++)
1067 {
1068 edge epath = (*path)[i]->e;
1069 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
9943b198 1070 esucc->count = 0;
30e432bb 1071 epath->src->count = 0;
1072 }
1073 /* Also need to clear the counts along duplicated path. */
1074 for (unsigned int i = 0; i < 2; i++)
1075 {
1076 basic_block dup = rd->dup_blocks[i];
1077 if (!dup)
9943b198 1078 continue;
30e432bb 1079 FOR_EACH_EDGE (esucc, ei, dup->succs)
9943b198 1080 esucc->count = 0;
30e432bb 1081 dup->count = 0;
1082 }
1083}
1084
fc54aba7 1085/* Wire up the outgoing edges from the duplicate blocks and
30e432bb 1086 update any PHIs as needed. Also update the profile counts
1087 on the original and duplicate blocks and edges. */
2b15d2ba 1088void
1089ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1090 ssa_local_info_t *local_info)
da81e0c5 1091{
1b83c31b 1092 bool multi_incomings = (rd->incoming_edges->next != NULL);
f2981b08 1093 edge e = rd->incoming_edges->e;
1094 vec<jump_thread_edge *> *path = THREAD_PATH (e);
30e432bb 1095 edge elast = path->last ()->e;
1096 gcov_type path_in_count = 0;
1097 gcov_type path_out_count = 0;
1098 int path_in_freq = 0;
1099
1100 /* This routine updates profile counts, frequencies, and probabilities
1101 incrementally. Since it is difficult to do the incremental updates
1102 using frequencies/probabilities alone, for routines without profile
1103 data we first take a snapshot of the existing block and edge frequencies
1104 by copying them into the empty profile count fields. These counts are
1105 then used to do the incremental updates, and cleared at the end of this
f1ce4e72 1106 routine. If the function is marked as having a profile, we still check
1107 to see if the paths through RD are using estimated frequencies because
1108 the routine had zero profile counts. */
30e432bb 1109 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
9943b198 1110 || estimated_freqs_path (rd));
30e432bb 1111 if (do_freqs_to_counts)
1112 freqs_to_counts_path (rd);
1113
1114 /* First determine how much profile count to move from original
1115 path to the duplicate path. This is tricky in the presence of
1116 a joiner (see comments for compute_path_counts), where some portion
1117 of the path's counts will flow off-path from the joiner. In the
1118 non-joiner case the path_in_count and path_out_count should be the
1119 same. */
1120 bool has_joiner = compute_path_counts (rd, local_info,
9943b198 1121 &path_in_count, &path_out_count,
1122 &path_in_freq);
30e432bb 1123
1124 int cur_path_freq = path_in_freq;
fc54aba7 1125 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1b83778e 1126 {
30e432bb 1127 edge epath = (*path)[i]->e;
1128
fc54aba7 1129 /* If we were threading through an joiner block, then we want
1130 to keep its control statement and redirect an outgoing edge.
1131 Else we want to remove the control statement & edges, then create
1132 a new outgoing edge. In both cases we may need to update PHIs. */
1133 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1134 {
1135 edge victim;
1136 edge e2;
1137
9943b198 1138 gcc_assert (has_joiner);
30e432bb 1139
fc54aba7 1140 /* This updates the PHIs at the destination of the duplicate
1b83c31b 1141 block. Pass 0 instead of i if we are threading a path which
1142 has multiple incoming edges. */
1143 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1144 path, multi_incomings ? 0 : i);
fc54aba7 1145
1146 /* Find the edge from the duplicate block to the block we're
1147 threading through. That's the edge we want to redirect. */
1148 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1149
1150 /* If there are no remaining blocks on the path to duplicate,
1151 then redirect VICTIM to the final destination of the jump
1152 threading path. */
1153 if (!any_remaining_duplicated_blocks (path, i))
1154 {
30e432bb 1155 e2 = redirect_edge_and_branch (victim, elast->dest);
fc54aba7 1156 /* If we redirected the edge, then we need to copy PHI arguments
559685be 1157 at the target. If the edge already existed (e2 != victim
fc54aba7 1158 case), then the PHIs in the target already have the correct
1159 arguments. */
1160 if (e2 == victim)
30e432bb 1161 copy_phi_args (e2->dest, elast, e2,
1b83c31b 1162 path, multi_incomings ? 0 : i);
fc54aba7 1163 }
1164 else
1165 {
1166 /* Redirect VICTIM to the next duplicated block in the path. */
1167 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1168
1169 /* We need to update the PHIs in the next duplicated block. We
1170 want the new PHI args to have the same value as they had
1171 in the source of the next duplicate block.
1172
1173 Thus, we need to know which edge we traversed into the
1174 source of the duplicate. Furthermore, we may have
1175 traversed many edges to reach the source of the duplicate.
1176
1177 Walk through the path starting at element I until we
1178 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1179 the edge from the prior element. */
1180 for (unsigned int j = i + 1; j < path->length (); j++)
1181 {
1182 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1183 {
1184 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1185 break;
1186 }
1187 }
1188 }
30e432bb 1189
1190 /* Update the counts and frequency of both the original block
1191 and path edge, and the duplicates. The path duplicate's
1192 incoming count and frequency are the totals for all edges
1193 incoming to this jump threading path computed earlier.
1194 And we know that the duplicated path will have path_out_count
1195 flowing out of it (i.e. along the duplicated path out of the
1196 duplicated joiner). */
1197 update_profile (epath, e2, path_in_count, path_out_count,
1198 path_in_freq);
1199
1200 /* Next we need to update the counts of the original and duplicated
1201 edges from the joiner that go off path. */
1202 update_joiner_offpath_counts (epath, e2->src, path_in_count,
9943b198 1203 path_out_count);
30e432bb 1204
1205 /* Finally, we need to set the probabilities on the duplicated
1206 edges out of the duplicated joiner (e2->src). The probabilities
1207 along the original path will all be updated below after we finish
1208 processing the whole path. */
1209 recompute_probabilities (e2->src);
1210
1211 /* Record the frequency flowing to the downstream duplicated
1212 path blocks. */
1213 cur_path_freq = EDGE_FREQUENCY (e2);
fc54aba7 1214 }
1215 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1216 {
1217 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1b83c31b 1218 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1219 multi_incomings ? 0 : i);
fc54aba7 1220 if (count == 1)
1221 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
30e432bb 1222
1223 /* Update the counts and frequency of both the original block
1224 and path edge, and the duplicates. Since we are now after
1225 any joiner that may have existed on the path, the count
1226 flowing along the duplicated threaded path is path_out_count.
1227 If we didn't have a joiner, then cur_path_freq was the sum
1228 of the total frequencies along all incoming edges to the
1229 thread path (path_in_freq). If we had a joiner, it would have
1230 been updated at the end of that handling to the edge frequency
1231 along the duplicated joiner path edge. */
1232 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1233 path_out_count, path_out_count,
1234 cur_path_freq);
fc54aba7 1235 }
30e432bb 1236 else
9943b198 1237 {
30e432bb 1238 /* No copy case. In this case we don't have an equivalent block
1239 on the duplicated thread path to update, but we do need
1240 to remove the portion of the counts/freqs that were moved
1241 to the duplicated path from the counts/freqs flowing through
1242 this block on the original path. Since all the no-copy edges
1243 are after any joiner, the removed count is the same as
1244 path_out_count.
1245
1246 If we didn't have a joiner, then cur_path_freq was the sum
1247 of the total frequencies along all incoming edges to the
1248 thread path (path_in_freq). If we had a joiner, it would have
1249 been updated at the end of that handling to the edge frequency
1250 along the duplicated joiner path edge. */
1251 update_profile (epath, NULL, path_out_count, path_out_count,
1252 cur_path_freq);
1253 }
1254
1255 /* Increment the index into the duplicated path when we processed
9943b198 1256 a duplicated block. */
30e432bb 1257 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
9943b198 1258 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
30e432bb 1259 {
1260 count++;
1261 }
1262 }
1263
1264 /* Now walk orig blocks and update their probabilities, since the
1265 counts and freqs should be updated properly by above loop. */
1266 for (unsigned int i = 1; i < path->length (); i++)
1267 {
1268 edge epath = (*path)[i]->e;
1269 recompute_probabilities (epath->src);
778182c1 1270 }
30e432bb 1271
1272 /* Done with all profile and frequency updates, clear counts if they
1273 were copied. */
1274 if (do_freqs_to_counts)
1275 clear_counts_path (rd);
778182c1 1276}
fc54aba7 1277
778182c1 1278/* Hash table traversal callback routine to create duplicate blocks. */
1279
2b15d2ba 1280int
1281ssa_create_duplicates (struct redirection_data **slot,
1282 ssa_local_info_t *local_info)
778182c1 1283{
2b15d2ba 1284 struct redirection_data *rd = *slot;
778182c1 1285
11af02d8 1286 /* The second duplicated block in a jump threading path is specific
1b83778e 1287 to the path. So it gets stored in RD rather than in LOCAL_DATA.
559685be 1288
11af02d8 1289 Each time we're called, we have to look through the path and see
1b83778e 1290 if a second block needs to be duplicated.
11af02d8 1291
1292 Note the search starts with the third edge on the path. The first
1293 edge is the incoming edge, the second edge always has its source
1294 duplicated. Thus we start our search with the third edge. */
a7ee7309 1295 vec<jump_thread_edge *> *path = rd->path;
11af02d8 1296 for (unsigned int i = 2; i < path->length (); i++)
1297 {
1298 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1299 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1300 {
30e432bb 1301 create_block_for_threading ((*path)[i]->e->src, rd, 1,
a7ee7309 1302 &local_info->duplicate_blocks);
11af02d8 1303 break;
1304 }
1305 }
1b83778e 1306
778182c1 1307 /* Create a template block if we have not done so already. Otherwise
1308 use the template to create a new block. */
1309 if (local_info->template_block == NULL)
1310 {
30e432bb 1311 create_block_for_threading ((*path)[1]->e->src, rd, 0,
a7ee7309 1312 &local_info->duplicate_blocks);
11af02d8 1313 local_info->template_block = rd->dup_blocks[0];
778182c1 1314
1315 /* We do not create any outgoing edges for the template. We will
1316 take care of that in a later traversal. That way we do not
1317 create edges that are going to just be deleted. */
1318 }
1319 else
1320 {
30e432bb 1321 create_block_for_threading (local_info->template_block, rd, 0,
a7ee7309 1322 &local_info->duplicate_blocks);
778182c1 1323
1324 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
da81e0c5 1325 block. */
2b15d2ba 1326 ssa_fix_duplicate_block_edges (rd, local_info);
778182c1 1327 }
1328
1329 /* Keep walking the hash table. */
1330 return 1;
1331}
1332
1333/* We did not create any outgoing edges for the template block during
1334 block creation. This hash table traversal callback creates the
1335 outgoing edge for the template block. */
1336
2b15d2ba 1337inline int
1338ssa_fixup_template_block (struct redirection_data **slot,
1339 ssa_local_info_t *local_info)
778182c1 1340{
2b15d2ba 1341 struct redirection_data *rd = *slot;
778182c1 1342
da81e0c5 1343 /* If this is the template block halt the traversal after updating
1344 it appropriately.
1345
1346 If we were threading through an joiner block, then we want
1347 to keep its control statement and redirect an outgoing edge.
1348 Else we want to remove the control statement & edges, then create
1349 a new outgoing edge. In both cases we may need to update PHIs. */
11af02d8 1350 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
778182c1 1351 {
2b15d2ba 1352 ssa_fix_duplicate_block_edges (rd, local_info);
778182c1 1353 return 0;
1354 }
1355
1356 return 1;
1357}
1358
1359/* Hash table traversal callback to redirect each incoming edge
1360 associated with this hash table element to its new destination. */
1361
2b15d2ba 1362int
1363ssa_redirect_edges (struct redirection_data **slot,
1364 ssa_local_info_t *local_info)
778182c1 1365{
2b15d2ba 1366 struct redirection_data *rd = *slot;
778182c1 1367 struct el *next, *el;
1368
47ae02b7 1369 /* Walk over all the incoming edges associated with this hash table
1370 entry. */
778182c1 1371 for (el = rd->incoming_edges; el; el = next)
1372 {
1373 edge e = el->e;
f2981b08 1374 vec<jump_thread_edge *> *path = THREAD_PATH (e);
778182c1 1375
1376 /* Go ahead and free this element from the list. Doing this now
1377 avoids the need for another list walk when we destroy the hash
1378 table. */
1379 next = el->next;
1380 free (el);
1381
5236b8bb 1382 thread_stats.num_threaded_edges++;
1383
11af02d8 1384 if (rd->dup_blocks[0])
778182c1 1385 {
1386 edge e2;
1387
1388 if (dump_file && (dump_flags & TDF_DETAILS))
1389 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
11af02d8 1390 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
778182c1 1391
c08f3525 1392 /* If we redirect a loop latch edge cancel its loop. */
1393 if (e->src == e->src->loop_father->latch)
d25159cc 1394 mark_loop_for_removal (e->src->loop_father);
c08f3525 1395
353f9f16 1396 /* Redirect the incoming edge (possibly to the joiner block) to the
1397 appropriate duplicate block. */
11af02d8 1398 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
7e0311ae 1399 gcc_assert (e == e2);
778182c1 1400 flush_pending_stmts (e2);
778182c1 1401 }
eb31063a 1402
1403 /* Go ahead and clear E->aux. It's not needed anymore and failure
559685be 1404 to clear it will cause all kinds of unpleasant problems later. */
6d1fdbf9 1405 delete_jump_thread_path (path);
eb31063a 1406 e->aux = NULL;
1407
778182c1 1408 }
388d1fc1 1409
1410 /* Indicate that we actually threaded one or more jumps. */
1411 if (rd->incoming_edges)
1412 local_info->jumps_threaded = true;
1413
778182c1 1414 return 1;
1415}
1416
aed95130 1417/* Return true if this block has no executable statements other than
1418 a simple ctrl flow instruction. When the number of outgoing edges
1419 is one, this is equivalent to a "forwarder" block. */
1420
1421static bool
47aaf6e6 1422redirection_block_p (basic_block bb)
aed95130 1423{
75a70cf9 1424 gimple_stmt_iterator gsi;
aed95130 1425
1426 /* Advance to the first executable statement. */
75a70cf9 1427 gsi = gsi_start_bb (bb);
1428 while (!gsi_end_p (gsi)
559685be 1429 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
9845d120 1430 || is_gimple_debug (gsi_stmt (gsi))
c7566ddf 1431 || gimple_nop_p (gsi_stmt (gsi))
581c1a3c 1432 || gimple_clobber_p (gsi_stmt (gsi))))
75a70cf9 1433 gsi_next (&gsi);
48e1416a 1434
aed95130 1435 /* Check if this is an empty block. */
75a70cf9 1436 if (gsi_end_p (gsi))
aed95130 1437 return true;
1438
1439 /* Test that we've reached the terminating control statement. */
75a70cf9 1440 return gsi_stmt (gsi)
559685be 1441 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1442 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1443 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
aed95130 1444}
1445
a8046f60 1446/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1447 is reached via one or more specific incoming edges, we know which
1448 outgoing edge from BB will be traversed.
1449
778182c1 1450 We want to redirect those incoming edges to the target of the
a8046f60 1451 appropriate outgoing edge. Doing so avoids a conditional branch
1452 and may expose new optimization opportunities. Note that we have
1453 to update dominator tree and SSA graph after such changes.
1454
597ff315 1455 The key to keeping the SSA graph update manageable is to duplicate
91275768 1456 the side effects occurring in BB so that those side effects still
a8046f60 1457 occur on the paths which bypass BB after redirecting edges.
1458
1459 We accomplish this by creating duplicates of BB and arranging for
1460 the duplicates to unconditionally pass control to one specific
1461 successor of BB. We then revector the incoming edges into BB to
1462 the appropriate duplicate of BB.
1463
7e0311ae 1464 If NOLOOP_ONLY is true, we only perform the threading as long as it
1b83778e 1465 does not affect the structure of the loops in a nontrivial way.
ed4feca1 1466
1467 If JOINERS is true, then thread through joiner blocks as well. */
a8046f60 1468
388d1fc1 1469static bool
ed4feca1 1470thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
a8046f60 1471{
1472 /* E is an incoming edge into BB that we may or may not want to
1473 redirect to a duplicate of BB. */
7e0311ae 1474 edge e, e2;
cd665a06 1475 edge_iterator ei;
2b15d2ba 1476 ssa_local_info_t local_info;
388d1fc1 1477
30e432bb 1478 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1479
778182c1 1480 /* To avoid scanning a linear array for the element we need we instead
c5d4a10b 1481 use a hash table. For normal code there should be no noticeable
778182c1 1482 difference. However, if we have a block with a large number of
1483 incoming and outgoing edges such linear searches can get expensive. */
c1f445d2 1484 redirection_data
1485 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
778182c1 1486
1487 /* Record each unique threaded destination into a hash table for
1488 efficient lookups. */
cd665a06 1489 FOR_EACH_EDGE (e, ei, bb->preds)
a8046f60 1490 {
eb31063a 1491 if (e->aux == NULL)
1492 continue;
1493
f2981b08 1494 vec<jump_thread_edge *> *path = THREAD_PATH (e);
ed4feca1 1495
1496 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1497 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1498 continue;
1499
f2981b08 1500 e2 = path->last ()->e;
e2b72d6c 1501 if (!e2 || noloop_only)
1502 {
7e0311ae 1503 /* If NOLOOP_ONLY is true, we only allow threading through the
559685be 1504 header of a loop to exit edges. */
e2b72d6c 1505
559685be 1506 /* One case occurs when there was loop header buried in a jump
1507 threading path that crosses loop boundaries. We do not try
1508 and thread this elsewhere, so just cancel the jump threading
1509 request by clearing the AUX field now. */
bb66e2d1 1510 if ((bb->loop_father != e2->src->loop_father
1511 && !loop_exit_edge_p (e2->src->loop_father, e2))
1512 || (e2->src->loop_father != e2->dest->loop_father
1513 && !loop_exit_edge_p (e2->src->loop_father, e2)))
e2b72d6c 1514 {
1515 /* Since this case is not handled by our special code
1516 to thread through a loop header, we must explicitly
1517 cancel the threading request here. */
6d1fdbf9 1518 delete_jump_thread_path (path);
e2b72d6c 1519 e->aux = NULL;
1520 continue;
1521 }
559685be 1522
1523 /* Another case occurs when trying to thread through our
ab596744 1524 own loop header, possibly from inside the loop. We will
1525 thread these later. */
559685be 1526 unsigned int i;
1527 for (i = 1; i < path->length (); i++)
1528 {
1529 if ((*path)[i]->e->src == bb->loop_father->header
1530 && (!loop_exit_edge_p (bb->loop_father, e2)
1531 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
ab596744 1532 break;
559685be 1533 }
1534
1535 if (i != path->length ())
1536 continue;
e2b72d6c 1537 }
778182c1 1538
7e0311ae 1539 /* Insert the outgoing edge into the hash table if it is not
1540 already in the hash table. */
da81e0c5 1541 lookup_redirection_data (e, INSERT);
a8046f60 1542 }
1543
3f9439d7 1544 /* We do not update dominance info. */
1545 free_dominance_info (CDI_DOMINATORS);
1546
d906930c 1547 /* We know we only thread through the loop header to loop exits.
1548 Let the basic block duplication hook know we are not creating
1549 a multiple entry loop. */
1550 if (noloop_only
1551 && bb == bb->loop_father->header)
1552 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1553
778182c1 1554 /* Now create duplicates of BB.
f582bb6c 1555
1556 Note that for a block with a high outgoing degree we can waste
1557 a lot of time and memory creating and destroying useless edges.
1558
1559 So we first duplicate BB and remove the control structure at the
1560 tail of the duplicate as well as all outgoing edges from the
1561 duplicate. We then use that duplicate block as a template for
1562 the rest of the duplicates. */
778182c1 1563 local_info.template_block = NULL;
1564 local_info.bb = bb;
388d1fc1 1565 local_info.jumps_threaded = false;
c1f445d2 1566 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
2b15d2ba 1567 (&local_info);
f582bb6c 1568
778182c1 1569 /* The template does not have an outgoing edge. Create that outgoing
1570 edge and update PHI nodes as the edge's target as necessary.
f582bb6c 1571
778182c1 1572 We do this after creating all the duplicates to avoid creating
1573 unnecessary edges. */
c1f445d2 1574 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
2b15d2ba 1575 (&local_info);
f582bb6c 1576
778182c1 1577 /* The hash table traversals above created the duplicate blocks (and the
1578 statements within the duplicate blocks). This loop creates PHI nodes for
1579 the duplicated blocks and redirects the incoming edges into BB to reach
1580 the duplicates of BB. */
c1f445d2 1581 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
2b15d2ba 1582 (&local_info);
a8046f60 1583
a3d0fd80 1584 /* Done with this block. Clear REDIRECTION_DATA. */
c1f445d2 1585 delete redirection_data;
1586 redirection_data = NULL;
388d1fc1 1587
d906930c 1588 if (noloop_only
1589 && bb == bb->loop_father->header)
1590 set_loop_copy (bb->loop_father, NULL);
1591
30e432bb 1592 BITMAP_FREE (local_info.duplicate_blocks);
1593 local_info.duplicate_blocks = NULL;
1594
388d1fc1 1595 /* Indicate to our caller whether or not any jumps were threaded. */
1596 return local_info.jumps_threaded;
a8046f60 1597}
1598
ed4feca1 1599/* Wrapper for thread_block_1 so that we can first handle jump
1600 thread paths which do not involve copying joiner blocks, then
1601 handle jump thread paths which have joiner blocks.
1602
1603 By doing things this way we can be as aggressive as possible and
1604 not worry that copying a joiner block will create a jump threading
1605 opportunity. */
1b83778e 1606
ed4feca1 1607static bool
1608thread_block (basic_block bb, bool noloop_only)
1609{
1610 bool retval;
1611 retval = thread_block_1 (bb, noloop_only, false);
1612 retval |= thread_block_1 (bb, noloop_only, true);
1613 return retval;
1614}
1615
1616
eb31063a 1617/* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1618 copy of E->dest created during threading, or E->dest if it was not necessary
7e0311ae 1619 to copy it (E is its single predecessor). */
1620
1621static basic_block
1622thread_single_edge (edge e)
1623{
1624 basic_block bb = e->dest;
7e0311ae 1625 struct redirection_data rd;
f2981b08 1626 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1627 edge eto = (*path)[1]->e;
7e0311ae 1628
32b97880 1629 delete_jump_thread_path (path);
7e0311ae 1630 e->aux = NULL;
1631
1632 thread_stats.num_threaded_edges++;
1633
1634 if (single_pred_p (bb))
1635 {
1636 /* If BB has just a single predecessor, we should only remove the
1637 control statements at its end, and successors except for ETO. */
1638 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
ad330780 1639
1640 /* And fixup the flags on the single remaining edge. */
1641 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1642 eto->flags |= EDGE_FALLTHRU;
1643
7e0311ae 1644 return bb;
1645 }
1646
1647 /* Otherwise, we need to create a copy. */
42b013bc 1648 if (e->dest == eto->src)
1649 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
7e0311ae 1650
5fe6149c 1651 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1652 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1653 npath->safe_push (x);
1654
1655 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1656 npath->safe_push (x);
1657 rd.path = npath;
7e0311ae 1658
a7ee7309 1659 create_block_for_threading (bb, &rd, 0, NULL);
11af02d8 1660 remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1b83c31b 1661 create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
7e0311ae 1662
1663 if (dump_file && (dump_flags & TDF_DETAILS))
1664 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
11af02d8 1665 e->src->index, e->dest->index, rd.dup_blocks[0]->index);
7e0311ae 1666
11af02d8 1667 rd.dup_blocks[0]->count = e->count;
1668 rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1669 single_succ_edge (rd.dup_blocks[0])->count = e->count;
1670 redirect_edge_and_branch (e, rd.dup_blocks[0]);
7e0311ae 1671 flush_pending_stmts (e);
1672
32b97880 1673 delete_jump_thread_path (npath);
11af02d8 1674 return rd.dup_blocks[0];
7e0311ae 1675}
1676
1677/* Callback for dfs_enumerate_from. Returns true if BB is different
1678 from STOP and DBDS_CE_STOP. */
1679
1680static basic_block dbds_ce_stop;
1681static bool
7ecb5bb2 1682dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
7e0311ae 1683{
7ecb5bb2 1684 return (bb != (const_basic_block) stop
7e0311ae 1685 && bb != dbds_ce_stop);
1686}
1687
1688/* Evaluates the dominance relationship of latch of the LOOP and BB, and
1689 returns the state. */
1690
1691enum bb_dom_status
1692{
1693 /* BB does not dominate latch of the LOOP. */
1694 DOMST_NONDOMINATING,
1695 /* The LOOP is broken (there is no path from the header to its latch. */
1696 DOMST_LOOP_BROKEN,
1697 /* BB dominates the latch of the LOOP. */
1698 DOMST_DOMINATING
1699};
1700
1701static enum bb_dom_status
1702determine_bb_domination_status (struct loop *loop, basic_block bb)
1703{
1704 basic_block *bblocks;
1705 unsigned nblocks, i;
1706 bool bb_reachable = false;
1707 edge_iterator ei;
1708 edge e;
1709
42b013bc 1710 /* This function assumes BB is a successor of LOOP->header.
1711 If that is not the case return DOMST_NONDOMINATING which
1712 is always safe. */
7e0311ae 1713 {
1714 bool ok = false;
1715
1716 FOR_EACH_EDGE (e, ei, bb->preds)
1717 {
1718 if (e->src == loop->header)
1719 {
1720 ok = true;
1721 break;
1722 }
1723 }
1724
42b013bc 1725 if (!ok)
1726 return DOMST_NONDOMINATING;
7e0311ae 1727 }
7e0311ae 1728
1729 if (bb == loop->latch)
1730 return DOMST_DOMINATING;
1731
1732 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1733 from it. */
1734
1735 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1736 dbds_ce_stop = loop->header;
1737 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1738 bblocks, loop->num_nodes, bb);
1739 for (i = 0; i < nblocks; i++)
1740 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1741 {
1742 if (e->src == loop->header)
1743 {
1744 free (bblocks);
1745 return DOMST_NONDOMINATING;
1746 }
1747 if (e->src == bb)
1748 bb_reachable = true;
1749 }
1750
1751 free (bblocks);
1752 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1753}
1754
6eb99d8a 1755/* Return true if BB is part of the new pre-header that is created
1756 when threading the latch to DATA. */
1757
1758static bool
1759def_split_header_continue_p (const_basic_block bb, const void *data)
1760{
1761 const_basic_block new_header = (const_basic_block) data;
a934d302 1762 const struct loop *l;
1763
1764 if (bb == new_header
1765 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1766 return false;
1767 for (l = bb->loop_father; l; l = loop_outer (l))
1768 if (l == new_header->loop_father)
1769 return true;
1770 return false;
6eb99d8a 1771}
1772
7e0311ae 1773/* Thread jumps through the header of LOOP. Returns true if cfg changes.
1774 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1775 to the inside of the loop. */
1776
1777static bool
1778thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1779{
1780 basic_block header = loop->header;
1781 edge e, tgt_edge, latch = loop_latch_edge (loop);
1782 edge_iterator ei;
1783 basic_block tgt_bb, atgt_bb;
1784 enum bb_dom_status domst;
1785
1786 /* We have already threaded through headers to exits, so all the threading
1787 requests now are to the inside of the loop. We need to avoid creating
1788 irreducible regions (i.e., loops with more than one entry block), and
1789 also loop with several latch edges, or new subloops of the loop (although
1790 there are cases where it might be appropriate, it is difficult to decide,
1791 and doing it wrongly may confuse other optimizers).
1792
1793 We could handle more general cases here. However, the intention is to
1794 preserve some information about the loop, which is impossible if its
1795 structure changes significantly, in a way that is not well understood.
1796 Thus we only handle few important special cases, in which also updating
1797 of the loop-carried information should be feasible:
1798
1799 1) Propagation of latch edge to a block that dominates the latch block
1800 of a loop. This aims to handle the following idiom:
1801
1802 first = 1;
1803 while (1)
1804 {
1805 if (first)
1806 initialize;
1807 first = 0;
1808 body;
1809 }
1810
1811 After threading the latch edge, this becomes
1812
1813 first = 1;
1814 if (first)
1815 initialize;
1816 while (1)
1817 {
1818 first = 0;
1819 body;
1820 }
1821
1822 The original header of the loop is moved out of it, and we may thread
1823 the remaining edges through it without further constraints.
1824
1825 2) All entry edges are propagated to a single basic block that dominates
1826 the latch block of the loop. This aims to handle the following idiom
1827 (normally created for "for" loops):
1828
1829 i = 0;
1830 while (1)
1831 {
1832 if (i >= 100)
1833 break;
1834 body;
1835 i++;
1836 }
1837
1838 This becomes
1839
1840 i = 0;
1841 while (1)
1842 {
1843 body;
1844 i++;
1845 if (i >= 100)
1846 break;
1847 }
1848 */
1849
1850 /* Threading through the header won't improve the code if the header has just
1851 one successor. */
1852 if (single_succ_p (header))
1853 goto fail;
1854
98685018 1855 /* If we threaded the latch using a joiner block, we cancel the
1856 threading opportunity out of an abundance of caution. However,
1857 still allow threading from outside to inside the loop. */
7e0311ae 1858 if (latch->aux)
1859 {
f2981b08 1860 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1861 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
98685018 1862 {
1863 delete_jump_thread_path (path);
1864 latch->aux = NULL;
1865 }
1866 }
1867
1868 if (latch->aux)
1869 {
1870 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
f2981b08 1871 tgt_edge = (*path)[1]->e;
7e0311ae 1872 tgt_bb = tgt_edge->dest;
1873 }
1874 else if (!may_peel_loop_headers
1875 && !redirection_block_p (loop->header))
1876 goto fail;
1877 else
1878 {
1879 tgt_bb = NULL;
1880 tgt_edge = NULL;
1881 FOR_EACH_EDGE (e, ei, header->preds)
1882 {
1883 if (!e->aux)
1884 {
1885 if (e == latch)
1886 continue;
1887
1888 /* If latch is not threaded, and there is a header
1889 edge that is not threaded, we would create loop
1890 with multiple entries. */
1891 goto fail;
1892 }
1893
f2981b08 1894 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1895
1896 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
da81e0c5 1897 goto fail;
f2981b08 1898 tgt_edge = (*path)[1]->e;
7e0311ae 1899 atgt_bb = tgt_edge->dest;
1900 if (!tgt_bb)
1901 tgt_bb = atgt_bb;
1902 /* Two targets of threading would make us create loop
1903 with multiple entries. */
1904 else if (tgt_bb != atgt_bb)
1905 goto fail;
1906 }
1907
1908 if (!tgt_bb)
1909 {
1910 /* There are no threading requests. */
1911 return false;
1912 }
1913
1914 /* Redirecting to empty loop latch is useless. */
1915 if (tgt_bb == loop->latch
1916 && empty_block_p (loop->latch))
1917 goto fail;
1918 }
1919
1920 /* The target block must dominate the loop latch, otherwise we would be
1921 creating a subloop. */
1922 domst = determine_bb_domination_status (loop, tgt_bb);
1923 if (domst == DOMST_NONDOMINATING)
1924 goto fail;
1925 if (domst == DOMST_LOOP_BROKEN)
1926 {
1927 /* If the loop ceased to exist, mark it as such, and thread through its
1928 original header. */
d25159cc 1929 mark_loop_for_removal (loop);
7e0311ae 1930 return thread_block (header, false);
1931 }
1932
1933 if (tgt_bb->loop_father->header == tgt_bb)
1934 {
1935 /* If the target of the threading is a header of a subloop, we need
1936 to create a preheader for it, so that the headers of the two loops
1937 do not merge. */
1938 if (EDGE_COUNT (tgt_bb->preds) > 2)
1939 {
1940 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1941 gcc_assert (tgt_bb != NULL);
1942 }
1943 else
1944 tgt_bb = split_edge (tgt_edge);
1945 }
48e1416a 1946
7e0311ae 1947 if (latch->aux)
1948 {
6eb99d8a 1949 basic_block *bblocks;
1950 unsigned nblocks, i;
1951
35c67c83 1952 /* First handle the case latch edge is redirected. We are copying
559685be 1953 the loop header but not creating a multiple entry loop. Make the
35c67c83 1954 cfg manipulation code aware of that fact. */
1955 set_loop_copy (loop, loop);
7e0311ae 1956 loop->latch = thread_single_edge (latch);
35c67c83 1957 set_loop_copy (loop, NULL);
7e0311ae 1958 gcc_assert (single_succ (loop->latch) == tgt_bb);
1959 loop->header = tgt_bb;
1960
6eb99d8a 1961 /* Remove the new pre-header blocks from our loop. */
1962 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1963 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1964 bblocks, loop->num_nodes, tgt_bb);
1965 for (i = 0; i < nblocks; i++)
c99897b6 1966 if (bblocks[i]->loop_father == loop)
1967 {
1968 remove_bb_from_loops (bblocks[i]);
1969 add_bb_to_loop (bblocks[i], loop_outer (loop));
1970 }
6eb99d8a 1971 free (bblocks);
1972
bb722af4 1973 /* If the new header has multiple latches mark it so. */
1974 FOR_EACH_EDGE (e, ei, loop->header->preds)
1975 if (e->src->loop_father == loop
1976 && e->src != loop->latch)
1977 {
1978 loop->latch = NULL;
1979 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1980 }
1981
6eb99d8a 1982 /* Cancel remaining threading requests that would make the
1983 loop a multiple entry loop. */
1984 FOR_EACH_EDGE (e, ei, header->preds)
1985 {
1986 edge e2;
bb722af4 1987
6eb99d8a 1988 if (e->aux == NULL)
1989 continue;
1990
f2981b08 1991 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1992 e2 = path->last ()->e;
6eb99d8a 1993
1994 if (e->src->loop_father != e2->dest->loop_father
1995 && e2->dest != loop->header)
1996 {
6d1fdbf9 1997 delete_jump_thread_path (path);
6eb99d8a 1998 e->aux = NULL;
1999 }
2000 }
2001
7e0311ae 2002 /* Thread the remaining edges through the former header. */
2003 thread_block (header, false);
2004 }
2005 else
2006 {
2007 basic_block new_preheader;
2008
2009 /* Now consider the case entry edges are redirected to the new entry
2010 block. Remember one entry edge, so that we can find the new
eb31063a 2011 preheader (its destination after threading). */
7e0311ae 2012 FOR_EACH_EDGE (e, ei, header->preds)
2013 {
2014 if (e->aux)
2015 break;
2016 }
2017
2018 /* The duplicate of the header is the new preheader of the loop. Ensure
2019 that it is placed correctly in the loop hierarchy. */
96c90e5e 2020 set_loop_copy (loop, loop_outer (loop));
7e0311ae 2021
2022 thread_block (header, false);
96c90e5e 2023 set_loop_copy (loop, NULL);
7e0311ae 2024 new_preheader = e->dest;
2025
2026 /* Create the new latch block. This is always necessary, as the latch
2027 must have only a single successor, but the original header had at
2028 least two successors. */
2029 loop->latch = NULL;
2030 mfb_kj_edge = single_succ_edge (new_preheader);
2031 loop->header = mfb_kj_edge->dest;
2032 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2033 loop->header = latch->dest;
2034 loop->latch = latch->src;
2035 }
48e1416a 2036
7e0311ae 2037 return true;
2038
2039fail:
2040 /* We failed to thread anything. Cancel the requests. */
2041 FOR_EACH_EDGE (e, ei, header->preds)
2042 {
f2981b08 2043 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2044
2045 if (path)
2046 {
6d1fdbf9 2047 delete_jump_thread_path (path);
f2981b08 2048 e->aux = NULL;
2049 }
7e0311ae 2050 }
2051 return false;
2052}
2053
b99a7d6d 2054/* E1 and E2 are edges into the same basic block. Return TRUE if the
2055 PHI arguments associated with those edges are equal or there are no
2056 PHI arguments, otherwise return FALSE. */
2057
2058static bool
2059phi_args_equal_on_edges (edge e1, edge e2)
2060{
1a91d914 2061 gphi_iterator gsi;
b99a7d6d 2062 int indx1 = e1->dest_idx;
2063 int indx2 = e2->dest_idx;
2064
2065 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2066 {
1a91d914 2067 gphi *phi = gsi.phi ();
b99a7d6d 2068
2069 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2070 gimple_phi_arg_def (phi, indx2), 0))
2071 return false;
2072 }
2073 return true;
2074}
2075
3cebc9d2 2076/* Walk through the registered jump threads and convert them into a
334ec2d8 2077 form convenient for this pass.
3cebc9d2 2078
2079 Any block which has incoming edges threaded to outgoing edges
2080 will have its entry in THREADED_BLOCK set.
a8046f60 2081
3cebc9d2 2082 Any threaded edge will have its new outgoing edge stored in the
2083 original edge's AUX field.
a8046f60 2084
3cebc9d2 2085 This form avoids the need to walk all the edges in the CFG to
2086 discover blocks which need processing and avoids unnecessary
2087 hash table lookups to map from threaded edge to new target. */
a8046f60 2088
3cebc9d2 2089static void
2090mark_threaded_blocks (bitmap threaded_blocks)
2091{
2092 unsigned int i;
7e0311ae 2093 bitmap_iterator bi;
2094 bitmap tmp = BITMAP_ALLOC (NULL);
2095 basic_block bb;
2096 edge e;
2097 edge_iterator ei;
3cebc9d2 2098
b93ba654 2099 /* It is possible to have jump threads in which one is a subpath
2100 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2101 block and (B, C), (C, D) where no joiner block exists.
2102
2103 When this occurs ignore the jump thread request with the joiner
2104 block. It's totally subsumed by the simpler jump thread request.
2105
2106 This results in less block copying, simpler CFGs. More importantly,
2107 when we duplicate the joiner block, B, in this case we will create
2108 a new threading opportunity that we wouldn't be able to optimize
2109 until the next jump threading iteration.
2110
2111 So first convert the jump thread requests which do not require a
2112 joiner block. */
f2981b08 2113 for (i = 0; i < paths.length (); i++)
3cebc9d2 2114 {
f2981b08 2115 vec<jump_thread_edge *> *path = paths[i];
b93ba654 2116
2117 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2118 {
2119 edge e = (*path)[0]->e;
2120 e->aux = (void *)path;
2121 bitmap_set_bit (tmp, e->dest->index);
2122 }
1f3976e7 2123 }
2124
b93ba654 2125 /* Now iterate again, converting cases where we want to thread
2126 through a joiner block, but only if no other edge on the path
f1ce4e72 2127 already has a jump thread attached to it. We do this in two passes,
2128 to avoid situations where the order in the paths vec can hide overlapping
2129 threads (the path is recorded on the incoming edge, so we would miss
2130 cases where the second path starts at a downstream edge on the same
2131 path). First record all joiner paths, deleting any in the unexpected
2132 case where there is already a path for that incoming edge. */
51ea8bc6 2133 for (i = 0; i < paths.length ();)
b93ba654 2134 {
2135 vec<jump_thread_edge *> *path = paths[i];
2136
2137 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
9943b198 2138 {
f1ce4e72 2139 /* Attach the path to the starting edge if none is yet recorded. */
9943b198 2140 if ((*path)[0]->e->aux == NULL)
5d293e79 2141 {
9943b198 2142 (*path)[0]->e->aux = path;
51ea8bc6 2143 i++;
5d293e79 2144 }
2145 else
2146 {
2147 paths.unordered_remove (i);
2148 if (dump_file && (dump_flags & TDF_DETAILS))
9943b198 2149 dump_jump_thread_path (dump_file, *path, false);
5d293e79 2150 delete_jump_thread_path (path);
2151 }
9943b198 2152 }
51ea8bc6 2153 else
2154 {
2155 i++;
2156 }
f1ce4e72 2157 }
51ea8bc6 2158
f1ce4e72 2159 /* Second, look for paths that have any other jump thread attached to
2160 them, and either finish converting them or cancel them. */
51ea8bc6 2161 for (i = 0; i < paths.length ();)
f1ce4e72 2162 {
2163 vec<jump_thread_edge *> *path = paths[i];
2164 edge e = (*path)[0]->e;
2165
2166 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
b93ba654 2167 {
2168 unsigned int j;
f1ce4e72 2169 for (j = 1; j < path->length (); j++)
b93ba654 2170 if ((*path)[j]->e->aux != NULL)
2171 break;
2172
2173 /* If we iterated through the entire path without exiting the loop,
f1ce4e72 2174 then we are good to go, record it. */
b93ba654 2175 if (j == path->length ())
51ea8bc6 2176 {
2177 bitmap_set_bit (tmp, e->dest->index);
2178 i++;
2179 }
f1ce4e72 2180 else
b93ba654 2181 {
f1ce4e72 2182 e->aux = NULL;
5d293e79 2183 paths.unordered_remove (i);
f1ce4e72 2184 if (dump_file && (dump_flags & TDF_DETAILS))
9943b198 2185 dump_jump_thread_path (dump_file, *path, false);
5d293e79 2186 delete_jump_thread_path (path);
b93ba654 2187 }
2188 }
51ea8bc6 2189 else
2190 {
2191 i++;
2192 }
b93ba654 2193 }
b99a7d6d 2194
7e0311ae 2195 /* If optimizing for size, only thread through block if we don't have
2196 to duplicate it or it's an otherwise empty redirection block. */
0bfd8d5c 2197 if (optimize_function_for_size_p (cfun))
7e0311ae 2198 {
2199 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2200 {
f5a6b05f 2201 bb = BASIC_BLOCK_FOR_FN (cfun, i);
7e0311ae 2202 if (EDGE_COUNT (bb->preds) > 1
2203 && !redirection_block_p (bb))
2204 {
2205 FOR_EACH_EDGE (e, ei, bb->preds)
eb31063a 2206 {
f2981b08 2207 if (e->aux)
2208 {
2209 vec<jump_thread_edge *> *path = THREAD_PATH (e);
6d1fdbf9 2210 delete_jump_thread_path (path);
f2981b08 2211 e->aux = NULL;
2212 }
eb31063a 2213 }
7e0311ae 2214 }
2215 else
2216 bitmap_set_bit (threaded_blocks, i);
2217 }
3cebc9d2 2218 }
7e0311ae 2219 else
2220 bitmap_copy (threaded_blocks, tmp);
2221
6328e25d 2222 /* Look for jump threading paths which cross multiple loop headers.
2223
2224 The code to thread through loop headers will change the CFG in ways
2225 that break assumptions made by the loop optimization code.
2226
2227 We don't want to blindly cancel the requests. We can instead do better
2228 by trimming off the end of the jump thread path. */
2229 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2230 {
f5a6b05f 2231 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
6328e25d 2232 FOR_EACH_EDGE (e, ei, bb->preds)
2233 {
2234 if (e->aux)
2235 {
2236 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2237
de9b51e4 2238 for (unsigned int i = 0, crossed_headers = 0;
2239 i < path->length ();
2240 i++)
6328e25d 2241 {
de9b51e4 2242 basic_block dest = (*path)[i]->e->dest;
2243 crossed_headers += (dest == dest->loop_father->header);
2244 if (crossed_headers > 1)
6328e25d 2245 {
de9b51e4 2246 /* Trim from entry I onwards. */
2247 for (unsigned int j = i; j < path->length (); j++)
2248 delete (*path)[j];
2249 path->truncate (i);
2250
2251 /* Now that we've truncated the path, make sure
2252 what's left is still valid. We need at least
2253 two edges on the path and the last edge can not
2254 be a joiner. This should never happen, but let's
2255 be safe. */
2256 if (path->length () < 2
2257 || (path->last ()->type
2258 == EDGE_COPY_SRC_JOINER_BLOCK))
6328e25d 2259 {
de9b51e4 2260 delete_jump_thread_path (path);
2261 e->aux = NULL;
6328e25d 2262 }
de9b51e4 2263 break;
6328e25d 2264 }
2265 }
2266 }
2267 }
2268 }
2269
af6b6631 2270 /* If we have a joiner block (J) which has two successors S1 and S2 and
2271 we are threading though S1 and the final destination of the thread
2272 is S2, then we must verify that any PHI nodes in S2 have the same
2273 PHI arguments for the edge J->S2 and J->S1->...->S2.
2274
2275 We used to detect this prior to registering the jump thread, but
2276 that prohibits propagation of edge equivalences into non-dominated
2277 PHI nodes as the equivalency test might occur before propagation.
2278
2279 This must also occur after we truncate any jump threading paths
2280 as this scenario may only show up after truncation.
2281
2282 This works for now, but will need improvement as part of the FSA
2283 optimization.
2284
2285 Note since we've moved the thread request data to the edges,
2286 we have to iterate on those rather than the threaded_edges vector. */
2287 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2288 {
f5a6b05f 2289 bb = BASIC_BLOCK_FOR_FN (cfun, i);
af6b6631 2290 FOR_EACH_EDGE (e, ei, bb->preds)
2291 {
2292 if (e->aux)
2293 {
2294 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2295 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2296
2297 if (have_joiner)
2298 {
2299 basic_block joiner = e->dest;
2300 edge final_edge = path->last ()->e;
2301 basic_block final_dest = final_edge->dest;
2302 edge e2 = find_edge (joiner, final_dest);
2303
2304 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2305 {
2306 delete_jump_thread_path (path);
2307 e->aux = NULL;
2308 }
2309 }
2310 }
2311 }
2312 }
2313
9af5ce0c 2314 BITMAP_FREE (tmp);
3cebc9d2 2315}
2316
2317
ab596744 2318/* Return TRUE if BB ends with a switch statement or a computed goto.
2319 Otherwise return false. */
2320static bool
2321bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2322{
42acab1c 2323 gimple *stmt = last_stmt (bb);
ab596744 2324 if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2325 return true;
2326 if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2327 && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2328 return true;
2329 return false;
2330}
2331
9e0d85a7 2332/* Verify that the REGION is a valid jump thread. A jump thread is a special
2333 case of SEME Single Entry Multiple Exits region in which all nodes in the
2334 REGION have exactly one incoming edge. The only exception is the first block
2335 that may not have been connected to the rest of the cfg yet. */
ded1c768 2336
2337DEBUG_FUNCTION void
9e0d85a7 2338verify_jump_thread (basic_block *region, unsigned n_region)
ded1c768 2339{
ded1c768 2340 for (unsigned i = 0; i < n_region; i++)
9e0d85a7 2341 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2342}
ded1c768 2343
9e0d85a7 2344/* Return true when BB is one of the first N items in BBS. */
ded1c768 2345
9e0d85a7 2346static inline bool
2347bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2348{
2349 for (int i = 0; i < n; i++)
2350 if (bb == bbs[i])
2351 return true;
ded1c768 2352
9e0d85a7 2353 return false;
ded1c768 2354}
2355
9e0d85a7 2356/* Duplicates a jump-thread path of N_REGION basic blocks.
2357 The ENTRY edge is redirected to the duplicate of the region.
ded1c768 2358
2359 Remove the last conditional statement in the last basic block in the REGION,
2360 and create a single fallthru edge pointing to the same destination as the
2361 EXIT edge.
2362
2363 The new basic blocks are stored to REGION_COPY in the same order as they had
2364 in REGION, provided that REGION_COPY is not NULL.
2365
2366 Returns false if it is unable to copy the region, true otherwise. */
2367
2368static bool
9e0d85a7 2369duplicate_thread_path (edge entry, edge exit,
ded1c768 2370 basic_block *region, unsigned n_region,
2371 basic_block *region_copy)
2372{
2373 unsigned i;
af5f6a93 2374 bool free_region_copy = false;
ded1c768 2375 struct loop *loop = entry->dest->loop_father;
2376 edge exit_copy;
2377 edge redirected;
2378 int total_freq = 0, entry_freq = 0;
2379 gcov_type total_count = 0, entry_count = 0;
2380
2381 if (!can_copy_bbs_p (region, n_region))
2382 return false;
2383
2384 /* Some sanity checking. Note that we do not check for all possible
2385 missuses of the functions. I.e. if you ask to copy something weird,
2386 it will work, but the state of structures probably will not be
2387 correct. */
2388 for (i = 0; i < n_region; i++)
2389 {
2390 /* We do not handle subloops, i.e. all the blocks must belong to the
2391 same loop. */
2392 if (region[i]->loop_father != loop)
2393 return false;
2394 }
2395
2396 initialize_original_copy_tables ();
2397
af5f6a93 2398 set_loop_copy (loop, loop);
ded1c768 2399
2400 if (!region_copy)
2401 {
2402 region_copy = XNEWVEC (basic_block, n_region);
2403 free_region_copy = true;
2404 }
2405
2406 if (entry->dest->count)
2407 {
2408 total_count = entry->dest->count;
2409 entry_count = entry->count;
2410 /* Fix up corner cases, to avoid division by zero or creation of negative
2411 frequencies. */
2412 if (entry_count > total_count)
2413 entry_count = total_count;
2414 }
2415 else
2416 {
2417 total_freq = entry->dest->frequency;
2418 entry_freq = EDGE_FREQUENCY (entry);
2419 /* Fix up corner cases, to avoid division by zero or creation of negative
2420 frequencies. */
2421 if (total_freq == 0)
2422 total_freq = 1;
2423 else if (entry_freq > total_freq)
2424 entry_freq = total_freq;
2425 }
2426
2427 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
9e0d85a7 2428 split_edge_bb_loc (entry), false);
2429
2430 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2431 following code ensures that all the edges exiting the jump-thread path are
2432 redirected back to the original code: these edges are exceptions
2433 invalidating the property that is propagated by executing all the blocks of
2434 the jump-thread path in order. */
2435
2436 for (i = 0; i < n_region; i++)
2437 {
2438 edge e;
2439 edge_iterator ei;
2440 basic_block bb = region_copy[i];
2441
2442 if (single_succ_p (bb))
2443 {
2444 /* Make sure the successor is the next node in the path. */
2445 gcc_assert (i + 1 == n_region
2446 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2447 continue;
2448 }
2449
2450 /* Special case the last block on the path: make sure that it does not
2451 jump back on the copied path. */
2452 if (i + 1 == n_region)
2453 {
2454 FOR_EACH_EDGE (e, ei, bb->succs)
2455 if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2456 {
2457 basic_block orig = get_bb_original (e->dest);
2458 if (orig)
2459 redirect_edge_and_branch_force (e, orig);
2460 }
2461 continue;
2462 }
2463
2464 /* Redirect all other edges jumping to non-adjacent blocks back to the
2465 original code. */
2466 FOR_EACH_EDGE (e, ei, bb->succs)
2467 if (region_copy[i + 1] != e->dest)
2468 {
2469 basic_block orig = get_bb_original (e->dest);
2470 if (orig)
2471 redirect_edge_and_branch_force (e, orig);
2472 }
2473 }
2474
ded1c768 2475 if (total_count)
2476 {
2477 scale_bbs_frequencies_gcov_type (region, n_region,
2478 total_count - entry_count,
2479 total_count);
2480 scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2481 total_count);
2482 }
2483 else
2484 {
2485 scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2486 total_freq);
2487 scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2488 }
2489
2490#ifdef ENABLE_CHECKING
9e0d85a7 2491 verify_jump_thread (region_copy, n_region);
ded1c768 2492#endif
2493
2494 /* Remove the last branch in the jump thread path. */
2495 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
7729459f 2496
2497 /* And fixup the flags on the single remaining edge. */
2498 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2499 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2500 fix_e->flags |= EDGE_FALLTHRU;
2501
ded1c768 2502 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2503
2504 if (e) {
2505 rescan_loop_exit (e, true, false);
2506 e->probability = REG_BR_PROB_BASE;
2507 e->count = region_copy[n_region - 1]->count;
2508 }
2509
2510 /* Redirect the entry and add the phi node arguments. */
af5f6a93 2511 if (entry->dest == loop->header)
2512 mark_loop_for_removal (loop);
ded1c768 2513 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2514 gcc_assert (redirected != NULL);
2515 flush_pending_stmts (entry);
2516
2517 /* Add the other PHI node arguments. */
2518 add_phi_args_after_copy (region_copy, n_region, NULL);
2519
2520 if (free_region_copy)
2521 free (region_copy);
2522
2523 free_original_copy_tables ();
2524 return true;
2525}
2526
b9903eb3 2527/* Return true when PATH is a valid jump-thread path. */
2528
2529static bool
2530valid_jump_thread_path (vec<jump_thread_edge *> *path)
2531{
2532 unsigned len = path->length ();
2533
2534 /* Check that the path is connected. */
2535 for (unsigned int j = 0; j < len - 1; j++)
2536 if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2537 return false;
2538
2539 return true;
2540}
2541
f1344f45 2542/* Remove any queued jump threads that start at BB. */
2543
2544void
2545remove_jump_threads_starting_at (basic_block bb)
2546{
2547 if (!paths.exists ())
2548 return;
2549
2550 for (unsigned i = 0; i < paths.length ();)
2551 {
2552 vec<jump_thread_edge *> *path = paths[i];
2553
2554 /* Sadly, FSM jump threads have a slightly different
2555 representation than the rest of the jump threads. */
2556 if ((*path)[0]->type == EDGE_FSM_THREAD
2557 && (*path)[0]->e->src == bb)
2558 {
2559 delete_jump_thread_path (path);
2560 paths.unordered_remove (i);
2561 }
2562 else if ((*path)[0]->type != EDGE_FSM_THREAD
2563 && (*path)[0]->e->dest == bb)
2564 {
2565 delete_jump_thread_path (path);
2566 paths.unordered_remove (i);
2567 }
2568 else
2569 i++;
2570 }
2571}
2572
3cebc9d2 2573/* Walk through all blocks and thread incoming edges to the appropriate
2574 outgoing edge for each edge pair recorded in THREADED_EDGES.
a8046f60 2575
2576 It is the caller's responsibility to fix the dominance information
2577 and rewrite duplicated SSA_NAMEs back into SSA form.
2578
7e0311ae 2579 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2580 loop headers if it does not simplify the loop.
2581
dac49aa5 2582 Returns true if one or more edges were threaded, false otherwise. */
a8046f60 2583
2584bool
7e0311ae 2585thread_through_all_blocks (bool may_peel_loop_headers)
a8046f60 2586{
a8046f60 2587 bool retval = false;
7ea47fbd 2588 unsigned int i;
2589 bitmap_iterator bi;
3cebc9d2 2590 bitmap threaded_blocks;
7e0311ae 2591 struct loop *loop;
3cebc9d2 2592
f2981b08 2593 if (!paths.exists ())
3cebc9d2 2594 return false;
a8046f60 2595
3cebc9d2 2596 threaded_blocks = BITMAP_ALLOC (NULL);
5236b8bb 2597 memset (&thread_stats, 0, sizeof (thread_stats));
388d1fc1 2598
ded1c768 2599 /* Jump-thread all FSM threads before other jump-threads. */
2600 for (i = 0; i < paths.length ();)
2601 {
2602 vec<jump_thread_edge *> *path = paths[i];
2603 edge entry = (*path)[0]->e;
2604
b9903eb3 2605 /* Only code-generate FSM jump-threads in this loop. */
2606 if ((*path)[0]->type != EDGE_FSM_THREAD)
2607 {
2608 i++;
2609 continue;
2610 }
2611
2612 /* Do not jump-thread twice from the same block. */
2613 if (bitmap_bit_p (threaded_blocks, entry->src->index)
2614 /* Verify that the jump thread path is still valid: a
2615 previous jump-thread may have changed the CFG, and
2616 invalidated the current path. */
2617 || !valid_jump_thread_path (path))
2618 {
2619 /* Remove invalid FSM jump-thread paths. */
2620 delete_jump_thread_path (path);
2621 paths.unordered_remove (i);
2622 continue;
2623 }
ded1c768 2624
2625 unsigned len = path->length ();
2626 edge exit = (*path)[len - 1]->e;
2627 basic_block *region = XNEWVEC (basic_block, len - 1);
2628
2629 for (unsigned int j = 0; j < len - 1; j++)
2630 region[j] = (*path)[j]->e->dest;
2631
9e0d85a7 2632 if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
ded1c768 2633 {
2634 /* We do not update dominance info. */
2635 free_dominance_info (CDI_DOMINATORS);
2636 bitmap_set_bit (threaded_blocks, entry->src->index);
2637 retval = true;
2638 }
2639
2640 delete_jump_thread_path (path);
2641 paths.unordered_remove (i);
2642 }
2643
2644 /* Remove from PATHS all the jump-threads starting with an edge already
2645 jump-threaded. */
2646 for (i = 0; i < paths.length ();)
2647 {
2648 vec<jump_thread_edge *> *path = paths[i];
2649 edge entry = (*path)[0]->e;
2650
2651 /* Do not jump-thread twice from the same block. */
2652 if (bitmap_bit_p (threaded_blocks, entry->src->index))
2653 {
2654 delete_jump_thread_path (path);
2655 paths.unordered_remove (i);
2656 }
2657 else
2658 i++;
2659 }
2660
2661 bitmap_clear (threaded_blocks);
2662
3cebc9d2 2663 mark_threaded_blocks (threaded_blocks);
2664
96c90e5e 2665 initialize_original_copy_tables ();
7e0311ae 2666
2667 /* First perform the threading requests that do not affect
2668 loop structure. */
7ea47fbd 2669 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
a8046f60 2670 {
f5a6b05f 2671 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
7ea47fbd 2672
2673 if (EDGE_COUNT (bb->preds) > 0)
7e0311ae 2674 retval |= thread_block (bb, true);
2675 }
2676
2677 /* Then perform the threading through loop headers. We start with the
2678 innermost loop, so that the changes in cfg we perform won't affect
2679 further threading. */
f21d4d00 2680 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7e0311ae 2681 {
7a3bf727 2682 if (!loop->header
2683 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2684 continue;
7e0311ae 2685
7a3bf727 2686 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
a8046f60 2687 }
388d1fc1 2688
ab596744 2689 /* Any jump threading paths that are still attached to edges at this
2690 point must be one of two cases.
2691
2692 First, we could have a jump threading path which went from outside
2693 a loop to inside a loop that was ignored because a prior jump thread
2694 across a backedge was realized (which indirectly causes the loop
2695 above to ignore the latter thread). We can detect these because the
2696 loop structures will be different and we do not currently try to
2697 optimize this case.
2698
2699 Second, we could be threading across a backedge to a point within the
2700 same loop. This occurrs for the FSA/FSM optimization and we would
2701 like to optimize it. However, we have to be very careful as this
2702 may completely scramble the loop structures, with the result being
2703 irreducible loops causing us to throw away our loop structure.
2704
2705 As a compromise for the latter case, if the thread path ends in
2706 a block where the last statement is a multiway branch, then go
2707 ahead and thread it, else ignore it. */
ed4feca1 2708 basic_block bb;
ed4feca1 2709 edge e;
fc00614f 2710 FOR_EACH_BB_FN (bb, cfun)
ed4feca1 2711 {
ab596744 2712 /* If we do end up threading here, we can remove elements from
2713 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2714 for (edge_iterator ei = ei_start (bb->preds);
2715 (e = ei_safe_edge (ei));)
ed4feca1 2716 if (e->aux)
2717 {
2718 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2719
ab596744 2720 /* Case 1, threading from outside to inside the loop
2721 after we'd already threaded through the header. */
2722 if ((*path)[0]->e->dest->loop_father
2723 != path->last ()->e->src->loop_father)
2724 {
2725 delete_jump_thread_path (path);
2726 e->aux = NULL;
2727 ei_next (&ei);
2728 }
2729 else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2730 {
2731 /* The code to thread through loop headers may have
2732 split a block with jump threads attached to it.
2733
2734 We can identify this with a disjoint jump threading
2735 path. If found, just remove it. */
2736 for (unsigned int i = 0; i < path->length () - 1; i++)
2737 if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2738 {
2739 delete_jump_thread_path (path);
2740 e->aux = NULL;
2741 ei_next (&ei);
2742 break;
2743 }
2744
2745 /* Our path is still valid, thread it. */
9943b198 2746 if (e->aux)
ab596744 2747 {
d2644aa0 2748 if (thread_block ((*path)[0]->e->dest, false))
addf6c7a 2749 e->aux = NULL;
d2644aa0 2750 else
2751 {
9943b198 2752 delete_jump_thread_path (path);
d2644aa0 2753 e->aux = NULL;
2754 ei_next (&ei);
2755 }
ab596744 2756 }
2757 }
2758 else
2759 {
2760 delete_jump_thread_path (path);
2761 e->aux = NULL;
2762 ei_next (&ei);
2763 }
ed4feca1 2764 }
ab596744 2765 else
2766 ei_next (&ei);
ed4feca1 2767 }
2768
581f8050 2769 statistics_counter_event (cfun, "Jumps threaded",
2770 thread_stats.num_threaded_edges);
5236b8bb 2771
96c90e5e 2772 free_original_copy_tables ();
2773
3cebc9d2 2774 BITMAP_FREE (threaded_blocks);
2775 threaded_blocks = NULL;
f2981b08 2776 paths.release ();
7e0311ae 2777
396c773e 2778 if (retval)
f24ec26f 2779 loops_state_set (LOOPS_NEED_FIXUP);
eb2a640e 2780
a8046f60 2781 return retval;
2782}
3cebc9d2 2783
6d1fdbf9 2784/* Delete the jump threading path PATH. We have to explcitly delete
2785 each entry in the vector, then the container. */
2786
2787void
2788delete_jump_thread_path (vec<jump_thread_edge *> *path)
2789{
2790 for (unsigned int i = 0; i < path->length (); i++)
2791 delete (*path)[i];
2792 path->release();
9b5a88db 2793 delete path;
6d1fdbf9 2794}
2795
3cebc9d2 2796/* Register a jump threading opportunity. We queue up all the jump
2797 threading opportunities discovered by a pass and update the CFG
2798 and SSA form all at once.
2799
f0b5f617 2800 E is the edge we can thread, E2 is the new target edge, i.e., we
3cebc9d2 2801 are effectively recording that E->dest can be changed to E2->dest
2802 after fixing the SSA graph. */
2803
2804void
f2981b08 2805register_jump_thread (vec<jump_thread_edge *> *path)
3cebc9d2 2806{
a3724f9d 2807 if (!dbg_cnt (registered_jump_thread))
2808 {
6d1fdbf9 2809 delete_jump_thread_path (path);
a3724f9d 2810 return;
2811 }
2812
0c5b289a 2813 /* First make sure there are no NULL outgoing edges on the jump threading
2814 path. That can happen for jumping to a constant address. */
f2981b08 2815 for (unsigned int i = 0; i < path->length (); i++)
2816 if ((*path)[i]->e == NULL)
0c5b289a 2817 {
2818 if (dump_file && (dump_flags & TDF_DETAILS))
2819 {
2820 fprintf (dump_file,
2821 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
b93ba654 2822 dump_jump_thread_path (dump_file, *path, false);
0c5b289a 2823 }
f2981b08 2824
6d1fdbf9 2825 delete_jump_thread_path (path);
0c5b289a 2826 return;
2827 }
5411af4e 2828
631d940c 2829 if (dump_file && (dump_flags & TDF_DETAILS))
b93ba654 2830 dump_jump_thread_path (dump_file, *path, true);
631d940c 2831
f2981b08 2832 if (!paths.exists ())
2833 paths.create (5);
631d940c 2834
f2981b08 2835 paths.safe_push (path);
3cebc9d2 2836}