]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-threadupdate.c
gimple-walk.h: New File.
[thirdparty/gcc.git] / gcc / tree-ssa-threadupdate.c
CommitLineData
56b043c8 1/* Thread edges through blocks and update the control flow and SSA graphs.
d1e082c2 2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
56b043c8
JL
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
9dcd6f09 8the Free Software Foundation; either version 3, or (at your option)
56b043c8
JL
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
56b043c8
JL
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
56b043c8
JL
23#include "tree.h"
24#include "flags.h"
56b043c8 25#include "basic-block.h"
56b043c8 26#include "function.h"
442b4905 27#include "gimple.h"
5be5c238 28#include "gimple-iterator.h"
442b4905
AM
29#include "gimple-ssa.h"
30#include "tree-phinodes.h"
7a300452 31#include "tree-ssa.h"
5254eac4 32#include "tree-ssa-threadupdate.h"
7ee2468b 33#include "dumpfile.h"
d38ffc55 34#include "cfgloop.h"
0823efed 35#include "hash-table.h"
01e127b1 36#include "dbgcnt.h"
56b043c8
JL
37
38/* Given a block B, update the CFG and SSA graph to reflect redirecting
39 one or more in-edges to B to instead reach the destination of an
40 out-edge from B while preserving any side effects in B.
41
454ff5cb 42 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
56b043c8
JL
43 side effects of executing B.
44
45 1. Make a copy of B (including its outgoing edges and statements). Call
46 the copy B'. Note B' has no incoming edges or PHIs at this time.
47
48 2. Remove the control statement at the end of B' and all outgoing edges
49 except B'->C.
50
51 3. Add a new argument to each PHI in C with the same value as the existing
52 argument associated with edge B->C. Associate the new PHI arguments
53 with the edge B'->C.
54
55 4. For each PHI in B, find or create a PHI in B' with an identical
d4a9b3a3 56 PHI_RESULT. Add an argument to the PHI in B' which has the same
56b043c8
JL
57 value as the PHI in B associated with the edge A->B. Associate
58 the new argument in the PHI in B' with the edge A->B.
59
60 5. Change the edge A->B to A->B'.
61
62 5a. This automatically deletes any PHI arguments associated with the
63 edge A->B in B.
64
65 5b. This automatically associates each new argument added in step 4
66 with the edge A->B'.
67
68 6. Repeat for other incoming edges into B.
69
70 7. Put the duplicated resources in B and all the B' blocks into SSA form.
71
72 Note that block duplication can be minimized by first collecting the
fa10beec 73 set of unique destination blocks that the incoming edges should
633c9126
JL
74 be threaded to.
75
aee2d611 76 Block duplication can be further minimized by using B instead of
633c9126
JL
77 creating B' for one destination if all edges into B are going to be
78 threaded to a successor of B. We had code to do this at one time, but
79 I'm not convinced it is correct with the changes to avoid mucking up
80 the loop structure (which may cancel threading requests, thus a block
81 which we thought was going to become unreachable may still be reachable).
82 This code was also going to get ugly with the introduction of the ability
aee2d611 83 for a single jump thread request to bypass multiple blocks.
56b043c8 84
1983ac12
JL
85 We further reduce the number of edges and statements we create by
86 not copying all the outgoing edges and the control statement in
87 step #1. We instead create a template block without the outgoing
88 edges and duplicate the template. */
89
90
91/* Steps #5 and #6 of the above algorithm are best implemented by walking
92 all the incoming edges which thread to the same destination edge at
93 the same time. That avoids lots of table lookups to get information
94 for the destination edge.
95
96 To realize that implementation we create a list of incoming edges
97 which thread to the same outgoing edge. Thus to implement steps
98 #5 and #6 we traverse our hash table of outgoing edge information.
99 For each entry we walk the list of incoming edges which thread to
100 the current outgoing edge. */
101
102struct el
103{
104 edge e;
105 struct el *next;
106};
56b043c8
JL
107
108/* Main data structure recording information regarding B's duplicate
109 blocks. */
110
1983ac12
JL
111/* We need to efficiently record the unique thread destinations of this
112 block and specific information associated with those destinations. We
113 may have many incoming edges threaded to the same outgoing edge. This
e7a531ae 114 can be naturally implemented with a hash table. */
1983ac12 115
5deac340 116struct redirection_data : typed_free_remove<redirection_data>
56b043c8
JL
117{
118 /* A duplicate of B with the trailing control statement removed and which
119 targets a single successor of B. */
120 basic_block dup_block;
121
1465e5cf
JL
122 /* The jump threading path. */
123 vec<jump_thread_edge *> *path;
1983ac12 124
1465e5cf
JL
125 /* A list of incoming edges which we want to thread to the
126 same path. */
1983ac12 127 struct el *incoming_edges;
5deac340
RG
128
129 /* hash_table support. */
5831a5f0
LC
130 typedef redirection_data value_type;
131 typedef redirection_data compare_type;
132 static inline hashval_t hash (const value_type *);
133 static inline int equal (const value_type *, const compare_type *);
56b043c8
JL
134};
135
1465e5cf
JL
136/* Simple hashing function. For any given incoming edge E, we're going
137 to be most concerned with the final destination of its jump thread
138 path. So hash on the block index of the final edge in the path. */
139
5deac340 140inline hashval_t
5831a5f0 141redirection_data::hash (const value_type *p)
5deac340 142{
1465e5cf
JL
143 vec<jump_thread_edge *> *path = p->path;
144 return path->last ()->e->dest->index;
5deac340
RG
145}
146
1465e5cf
JL
147/* Given two hash table entries, return true if they have the same
148 jump threading path. */
5deac340 149inline int
5831a5f0 150redirection_data::equal (const value_type *p1, const compare_type *p2)
5deac340 151{
1465e5cf
JL
152 vec<jump_thread_edge *> *path1 = p1->path;
153 vec<jump_thread_edge *> *path2 = p2->path;
154
155 if (path1->length () != path2->length ())
156 return false;
157
158 for (unsigned int i = 1; i < path1->length (); i++)
159 {
160 if ((*path1)[i]->type != (*path2)[i]->type
161 || (*path1)[i]->e != (*path2)[i]->e)
162 return false;
163 }
164
165 return true;
5deac340
RG
166}
167
1983ac12 168/* Data structure of information to pass to hash table traversal routines. */
0823efed 169struct ssa_local_info_t
1983ac12
JL
170{
171 /* The current block we are working on. */
172 basic_block bb;
173
174 /* A template copy of BB with no outgoing edges or control statement that
175 we use for creating copies. */
176 basic_block template_block;
d38ffc55
JL
177
178 /* TRUE if we thread one or more jumps, FALSE otherwise. */
179 bool jumps_threaded;
1983ac12 180};
37840132 181
8702a557
JL
182/* Passes which use the jump threading code register jump threading
183 opportunities as they are discovered. We keep the registered
184 jump threading opportunities in this vector as edge pairs
185 (original_edge, target_edge). */
aee2d611 186static vec<vec<jump_thread_edge *> *> paths;
8702a557 187
7134c090
JL
188/* When we start updating the CFG for threading, data necessary for jump
189 threading is attached to the AUX field for the incoming edge. Use these
190 macros to access the underlying structure attached to the AUX field. */
aee2d611 191#define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
8702a557 192
a4233c29
DN
193/* Jump threading statistics. */
194
195struct thread_stats_d
196{
197 unsigned long num_threaded_edges;
198};
199
200struct thread_stats_d thread_stats;
201
202
e376fe58
JL
203/* Remove the last statement in block BB if it is a control statement
204 Also remove all outgoing edges except the edge which reaches DEST_BB.
205 If DEST_BB is NULL, then remove all outgoing edges. */
56b043c8
JL
206
207static void
e376fe58 208remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
56b043c8 209{
726a989a 210 gimple_stmt_iterator gsi;
628f6a4e
BE
211 edge e;
212 edge_iterator ei;
56b043c8 213
726a989a 214 gsi = gsi_last_bb (bb);
56b043c8 215
e376fe58 216 /* If the duplicate ends with a control statement, then remove it.
56b043c8 217
e376fe58
JL
218 Note that if we are duplicating the template block rather than the
219 original basic block, then the duplicate might not have any real
220 statements in it. */
726a989a
RB
221 if (!gsi_end_p (gsi)
222 && gsi_stmt (gsi)
223 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
224 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
225 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
226 gsi_remove (&gsi, true);
56b043c8 227
628f6a4e 228 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
56b043c8 229 {
56b043c8 230 if (e->dest != dest_bb)
d0d2cc21 231 remove_edge (e);
628f6a4e
BE
232 else
233 ei_next (&ei);
56b043c8 234 }
56b043c8
JL
235}
236
a91926b9 237/* Create a duplicate of BB. Record the duplicate block in RD. */
56b043c8
JL
238
239static void
240create_block_for_threading (basic_block bb, struct redirection_data *rd)
241{
7134c090
JL
242 edge_iterator ei;
243 edge e;
244
56b043c8
JL
245 /* We can use the generic block duplication code and simply remove
246 the stuff we do not need. */
b9a66240 247 rd->dup_block = duplicate_block (bb, NULL, NULL);
56b043c8 248
7134c090
JL
249 FOR_EACH_EDGE (e, ei, rd->dup_block->succs)
250 e->aux = NULL;
251
15db5571
JH
252 /* Zero out the profile, since the block is unreachable for now. */
253 rd->dup_block->frequency = 0;
254 rd->dup_block->count = 0;
56b043c8
JL
255}
256
0823efed
DN
257/* Main data structure to hold information for duplicates of BB. */
258
5deac340 259static hash_table <redirection_data> redirection_data;
0823efed 260
1983ac12
JL
261/* Given an outgoing edge E lookup and return its entry in our hash table.
262
263 If INSERT is true, then we insert the entry into the hash table if
264 it is not already present. INCOMING_EDGE is added to the list of incoming
265 edges associated with E in the hash table. */
266
267static struct redirection_data *
361b51c0 268lookup_redirection_data (edge e, enum insert_option insert)
1983ac12 269{
0823efed 270 struct redirection_data **slot;
1983ac12 271 struct redirection_data *elt;
aee2d611 272 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1983ac12
JL
273
274 /* Build a hash table element so we can see if E is already
275 in the table. */
5ed6ace5 276 elt = XNEW (struct redirection_data);
1465e5cf 277 elt->path = path;
1983ac12 278 elt->dup_block = NULL;
1983ac12
JL
279 elt->incoming_edges = NULL;
280
0823efed 281 slot = redirection_data.find_slot (elt, insert);
1983ac12
JL
282
283 /* This will only happen if INSERT is false and the entry is not
284 in the hash table. */
285 if (slot == NULL)
286 {
287 free (elt);
288 return NULL;
289 }
290
291 /* This will only happen if E was not in the hash table and
292 INSERT is true. */
293 if (*slot == NULL)
294 {
0823efed 295 *slot = elt;
5ed6ace5 296 elt->incoming_edges = XNEW (struct el);
361b51c0 297 elt->incoming_edges->e = e;
1983ac12
JL
298 elt->incoming_edges->next = NULL;
299 return elt;
300 }
301 /* E was in the hash table. */
302 else
303 {
304 /* Free ELT as we do not need it anymore, we will extract the
305 relevant entry from the hash table itself. */
306 free (elt);
307
308 /* Get the entry stored in the hash table. */
0823efed 309 elt = *slot;
1983ac12
JL
310
311 /* If insertion was requested, then we need to add INCOMING_EDGE
312 to the list of incoming edges associated with E. */
313 if (insert)
314 {
5ed6ace5 315 struct el *el = XNEW (struct el);
1983ac12 316 el->next = elt->incoming_edges;
361b51c0 317 el->e = e;
1983ac12
JL
318 elt->incoming_edges = el;
319 }
320
321 return elt;
322 }
323}
324
361b51c0
JL
325/* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. */
326
327static void
328copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
329{
330 gimple_stmt_iterator gsi;
331 int src_indx = src_e->dest_idx;
332
333 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
334 {
335 gimple phi = gsi_stmt (gsi);
336 source_location locus = gimple_phi_arg_location (phi, src_indx);
9e227d60 337 add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
361b51c0
JL
338 }
339}
340
341/* We have recently made a copy of ORIG_BB, including its outgoing
342 edges. The copy is NEW_BB. Every PHI node in every direct successor of
343 ORIG_BB has a new argument associated with edge from NEW_BB to the
344 successor. Initialize the PHI argument so that it is equal to the PHI
345 argument associated with the edge from ORIG_BB to the successor. */
346
347static void
348update_destination_phis (basic_block orig_bb, basic_block new_bb)
349{
350 edge_iterator ei;
351 edge e;
352
353 FOR_EACH_EDGE (e, ei, orig_bb->succs)
354 {
355 edge e2 = find_edge (new_bb, e->dest);
356 copy_phi_args (e->dest, e, e2);
357 }
358}
359
1983ac12
JL
360/* Given a duplicate block and its single destination (both stored
361 in RD). Create an edge between the duplicate and its single
362 destination.
363
364 Add an additional argument to any PHI nodes at the single
365 destination. */
366
367static void
520af9ec
JL
368create_edge_and_update_destination_phis (struct redirection_data *rd,
369 basic_block bb)
1983ac12 370{
1465e5cf 371 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
1983ac12 372
aa2645a0 373 rescan_loop_exit (e, true, false);
d416304e 374 e->probability = REG_BR_PROB_BASE;
520af9ec 375 e->count = bb->count;
7134c090 376
aee2d611
JL
377 /* We have to copy path -- which means creating a new vector as well
378 as all the jump_thread_edge entries. */
1465e5cf 379 if (rd->path->last ()->e->aux)
7134c090 380 {
1465e5cf 381 vec<jump_thread_edge *> *path = THREAD_PATH (rd->path->last ()->e);
aee2d611
JL
382 vec<jump_thread_edge *> *copy = new vec<jump_thread_edge *> ();
383
384 /* Sadly, the elements of the vector are pointers and need to
385 be copied as well. */
386 for (unsigned int i = 0; i < path->length (); i++)
387 {
388 jump_thread_edge *x
389 = new jump_thread_edge ((*path)[i]->e, (*path)[i]->type);
390 copy->safe_push (x);
391 }
392 e->aux = (void *)copy;
7134c090
JL
393 }
394 else
395 {
396 e->aux = NULL;
397 }
d416304e 398
1983ac12
JL
399 /* If there are any PHI nodes at the destination of the outgoing edge
400 from the duplicate block, then we will need to add a new argument
401 to them. The argument should have the same value as the argument
402 associated with the outgoing edge stored in RD. */
1465e5cf 403 copy_phi_args (e->dest, rd->path->last ()->e, e);
361b51c0
JL
404}
405
406/* Wire up the outgoing edges from the duplicate block and
407 update any PHIs as needed. */
0823efed
DN
408void
409ssa_fix_duplicate_block_edges (struct redirection_data *rd,
410 ssa_local_info_t *local_info)
361b51c0 411{
aee2d611
JL
412 edge e = rd->incoming_edges->e;
413 vec<jump_thread_edge *> *path = THREAD_PATH (e);
414
361b51c0
JL
415 /* If we were threading through an joiner block, then we want
416 to keep its control statement and redirect an outgoing edge.
417 Else we want to remove the control statement & edges, then create
418 a new outgoing edge. In both cases we may need to update PHIs. */
aee2d611 419 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1983ac12 420 {
361b51c0
JL
421 edge victim;
422 edge e2;
361b51c0
JL
423
424 /* This updates the PHIs at the destination of the duplicate
425 block. */
426 update_destination_phis (local_info->bb, rd->dup_block);
f5045c96 427
361b51c0
JL
428 /* Find the edge from the duplicate block to the block we're
429 threading through. That's the edge we want to redirect. */
aee2d611
JL
430 victim = find_edge (rd->dup_block, (*path)[1]->e->dest);
431 e2 = redirect_edge_and_branch (victim, path->last ()->e->dest);
432 e2->count = path->last ()->e->count;
361b51c0 433
ad3577df
JL
434 /* If we redirected the edge, then we need to copy PHI arguments
435 at the target. If the edge already existed (e2 != victim case),
436 then the PHIs in the target already have the correct arguments. */
437 if (e2 == victim)
aee2d611 438 copy_phi_args (e2->dest, path->last ()->e, e2);
361b51c0
JL
439 }
440 else
441 {
442 remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
443 create_edge_and_update_destination_phis (rd, rd->dup_block);
1983ac12
JL
444 }
445}
1983ac12
JL
446/* Hash table traversal callback routine to create duplicate blocks. */
447
0823efed
DN
448int
449ssa_create_duplicates (struct redirection_data **slot,
450 ssa_local_info_t *local_info)
1983ac12 451{
0823efed 452 struct redirection_data *rd = *slot;
1983ac12 453
1983ac12
JL
454 /* Create a template block if we have not done so already. Otherwise
455 use the template to create a new block. */
456 if (local_info->template_block == NULL)
457 {
458 create_block_for_threading (local_info->bb, rd);
459 local_info->template_block = rd->dup_block;
460
461 /* We do not create any outgoing edges for the template. We will
462 take care of that in a later traversal. That way we do not
463 create edges that are going to just be deleted. */
464 }
465 else
466 {
467 create_block_for_threading (local_info->template_block, rd);
468
469 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
361b51c0 470 block. */
0823efed 471 ssa_fix_duplicate_block_edges (rd, local_info);
1983ac12
JL
472 }
473
474 /* Keep walking the hash table. */
475 return 1;
476}
477
478/* We did not create any outgoing edges for the template block during
479 block creation. This hash table traversal callback creates the
480 outgoing edge for the template block. */
481
0823efed
DN
482inline int
483ssa_fixup_template_block (struct redirection_data **slot,
484 ssa_local_info_t *local_info)
1983ac12 485{
0823efed 486 struct redirection_data *rd = *slot;
1983ac12 487
361b51c0
JL
488 /* If this is the template block halt the traversal after updating
489 it appropriately.
490
491 If we were threading through an joiner block, then we want
492 to keep its control statement and redirect an outgoing edge.
493 Else we want to remove the control statement & edges, then create
494 a new outgoing edge. In both cases we may need to update PHIs. */
1983ac12
JL
495 if (rd->dup_block && rd->dup_block == local_info->template_block)
496 {
0823efed 497 ssa_fix_duplicate_block_edges (rd, local_info);
1983ac12
JL
498 return 0;
499 }
500
501 return 1;
502}
503
504/* Hash table traversal callback to redirect each incoming edge
505 associated with this hash table element to its new destination. */
506
0823efed
DN
507int
508ssa_redirect_edges (struct redirection_data **slot,
509 ssa_local_info_t *local_info)
1983ac12 510{
0823efed 511 struct redirection_data *rd = *slot;
1983ac12
JL
512 struct el *next, *el;
513
514 /* Walk over all the incoming edges associated associated with this
515 hash table entry. */
516 for (el = rd->incoming_edges; el; el = next)
517 {
518 edge e = el->e;
aee2d611 519 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1983ac12
JL
520
521 /* Go ahead and free this element from the list. Doing this now
522 avoids the need for another list walk when we destroy the hash
523 table. */
524 next = el->next;
525 free (el);
526
a4233c29
DN
527 thread_stats.num_threaded_edges++;
528
05357ac3 529 if (rd->dup_block)
1983ac12
JL
530 {
531 edge e2;
532
533 if (dump_file && (dump_flags & TDF_DETAILS))
534 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
535 e->src->index, e->dest->index, rd->dup_block->index);
536
2b151cb2 537 rd->dup_block->count += e->count;
0bed228e
JH
538
539 /* Excessive jump threading may make frequencies large enough so
540 the computation overflows. */
541 if (rd->dup_block->frequency < BB_FREQ_MAX * 2)
542 rd->dup_block->frequency += EDGE_FREQUENCY (e);
05357ac3
TJ
543
544 /* In the case of threading through a joiner block, the outgoing
545 edges from the duplicate block were updated when they were
546 redirected during ssa_fix_duplicate_block_edges. */
aee2d611 547 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
05357ac3
TJ
548 EDGE_SUCC (rd->dup_block, 0)->count += e->count;
549
550 /* Redirect the incoming edge (possibly to the joiner block) to the
551 appropriate duplicate block. */
1983ac12 552 e2 = redirect_edge_and_branch (e, rd->dup_block);
b02b9b53 553 gcc_assert (e == e2);
1983ac12 554 flush_pending_stmts (e2);
1983ac12 555 }
7134c090
JL
556
557 /* Go ahead and clear E->aux. It's not needed anymore and failure
558 to clear it will cause all kinds of unpleasant problems later. */
f0bc3cc0 559 delete_jump_thread_path (path);
7134c090
JL
560 e->aux = NULL;
561
1983ac12 562 }
d38ffc55
JL
563
564 /* Indicate that we actually threaded one or more jumps. */
565 if (rd->incoming_edges)
566 local_info->jumps_threaded = true;
567
1983ac12
JL
568 return 1;
569}
570
31a9760a
RS
571/* Return true if this block has no executable statements other than
572 a simple ctrl flow instruction. When the number of outgoing edges
573 is one, this is equivalent to a "forwarder" block. */
574
575static bool
b48d0358 576redirection_block_p (basic_block bb)
31a9760a 577{
726a989a 578 gimple_stmt_iterator gsi;
31a9760a
RS
579
580 /* Advance to the first executable statement. */
726a989a
RB
581 gsi = gsi_start_bb (bb);
582 while (!gsi_end_p (gsi)
583 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
b5b8b0ac 584 || is_gimple_debug (gsi_stmt (gsi))
726a989a
RB
585 || gimple_nop_p (gsi_stmt (gsi))))
586 gsi_next (&gsi);
b8698a0f 587
31a9760a 588 /* Check if this is an empty block. */
726a989a 589 if (gsi_end_p (gsi))
31a9760a
RS
590 return true;
591
592 /* Test that we've reached the terminating control statement. */
726a989a
RB
593 return gsi_stmt (gsi)
594 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
595 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
596 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
31a9760a
RS
597}
598
56b043c8
JL
599/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
600 is reached via one or more specific incoming edges, we know which
601 outgoing edge from BB will be traversed.
602
1983ac12 603 We want to redirect those incoming edges to the target of the
56b043c8
JL
604 appropriate outgoing edge. Doing so avoids a conditional branch
605 and may expose new optimization opportunities. Note that we have
606 to update dominator tree and SSA graph after such changes.
607
6cb38cd4 608 The key to keeping the SSA graph update manageable is to duplicate
2a7e31df 609 the side effects occurring in BB so that those side effects still
56b043c8
JL
610 occur on the paths which bypass BB after redirecting edges.
611
612 We accomplish this by creating duplicates of BB and arranging for
613 the duplicates to unconditionally pass control to one specific
614 successor of BB. We then revector the incoming edges into BB to
615 the appropriate duplicate of BB.
616
b02b9b53 617 If NOLOOP_ONLY is true, we only perform the threading as long as it
b1149e84
JL
618 does not affect the structure of the loops in a nontrivial way.
619
620 If JOINERS is true, then thread through joiner blocks as well. */
56b043c8 621
d38ffc55 622static bool
b1149e84 623thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
56b043c8
JL
624{
625 /* E is an incoming edge into BB that we may or may not want to
626 redirect to a duplicate of BB. */
b02b9b53 627 edge e, e2;
628f6a4e 628 edge_iterator ei;
0823efed 629 ssa_local_info_t local_info;
b02b9b53 630 struct loop *loop = bb->loop_father;
d38ffc55 631
1983ac12 632 /* To avoid scanning a linear array for the element we need we instead
e7a531ae 633 use a hash table. For normal code there should be no noticeable
1983ac12
JL
634 difference. However, if we have a block with a large number of
635 incoming and outgoing edges such linear searches can get expensive. */
0823efed 636 redirection_data.create (EDGE_COUNT (bb->succs));
1983ac12 637
b02b9b53
ZD
638 /* If we thread the latch of the loop to its exit, the loop ceases to
639 exist. Make sure we do not restrict ourselves in order to preserve
640 this loop. */
d51157de 641 if (loop->header == bb)
b02b9b53
ZD
642 {
643 e = loop_latch_edge (loop);
aee2d611 644 vec<jump_thread_edge *> *path = THREAD_PATH (e);
7134c090 645
b1149e84
JL
646 if (path
647 && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
648 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
b02b9b53 649 {
aee2d611
JL
650 for (unsigned int i = 1; i < path->length (); i++)
651 {
652 edge e2 = (*path)[i]->e;
653
654 if (loop_exit_edge_p (loop, e2))
655 {
656 loop->header = NULL;
657 loop->latch = NULL;
658 loops_state_set (LOOPS_NEED_FIXUP);
659 }
660 }
b02b9b53
ZD
661 }
662 }
d38ffc55 663
1983ac12
JL
664 /* Record each unique threaded destination into a hash table for
665 efficient lookups. */
628f6a4e 666 FOR_EACH_EDGE (e, ei, bb->preds)
56b043c8 667 {
7134c090
JL
668 if (e->aux == NULL)
669 continue;
670
aee2d611 671 vec<jump_thread_edge *> *path = THREAD_PATH (e);
b1149e84
JL
672
673 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
674 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
675 continue;
676
aee2d611 677 e2 = path->last ()->e;
581aedec
JL
678 if (!e2 || noloop_only)
679 {
b02b9b53 680 /* If NOLOOP_ONLY is true, we only allow threading through the
aee2d611 681 header of a loop to exit edges.
581aedec
JL
682
683 There are two cases to consider. The first when BB is the
684 loop header. We will attempt to thread this elsewhere, so
685 we can just continue here. */
686
687 if (bb == bb->loop_father->header
361b51c0 688 && (!loop_exit_edge_p (bb->loop_father, e2)
aee2d611 689 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
581aedec
JL
690 continue;
691
692
693 /* The second occurs when there was loop header buried in a jump
694 threading path. We do not try and thread this elsewhere, so
695 just cancel the jump threading request by clearing the AUX
696 field now. */
9e1376e9
JL
697 if ((bb->loop_father != e2->src->loop_father
698 && !loop_exit_edge_p (e2->src->loop_father, e2))
699 || (e2->src->loop_father != e2->dest->loop_father
700 && !loop_exit_edge_p (e2->src->loop_father, e2)))
581aedec
JL
701 {
702 /* Since this case is not handled by our special code
703 to thread through a loop header, we must explicitly
704 cancel the threading request here. */
f0bc3cc0 705 delete_jump_thread_path (path);
581aedec
JL
706 e->aux = NULL;
707 continue;
708 }
709 }
1983ac12 710
520af9ec
JL
711 if (e->dest == e2->src)
712 update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
aee2d611 713 e->count, (*THREAD_PATH (e))[1]->e);
b02b9b53
ZD
714
715 /* Insert the outgoing edge into the hash table if it is not
716 already in the hash table. */
361b51c0 717 lookup_redirection_data (e, INSERT);
56b043c8
JL
718 }
719
66f97d31
ZD
720 /* We do not update dominance info. */
721 free_dominance_info (CDI_DOMINATORS);
722
12df9a2f
RG
723 /* We know we only thread through the loop header to loop exits.
724 Let the basic block duplication hook know we are not creating
725 a multiple entry loop. */
726 if (noloop_only
727 && bb == bb->loop_father->header)
728 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
729
1983ac12 730 /* Now create duplicates of BB.
e376fe58
JL
731
732 Note that for a block with a high outgoing degree we can waste
733 a lot of time and memory creating and destroying useless edges.
734
735 So we first duplicate BB and remove the control structure at the
736 tail of the duplicate as well as all outgoing edges from the
737 duplicate. We then use that duplicate block as a template for
738 the rest of the duplicates. */
1983ac12
JL
739 local_info.template_block = NULL;
740 local_info.bb = bb;
d38ffc55 741 local_info.jumps_threaded = false;
0823efed
DN
742 redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates>
743 (&local_info);
e376fe58 744
1983ac12
JL
745 /* The template does not have an outgoing edge. Create that outgoing
746 edge and update PHI nodes as the edge's target as necessary.
e376fe58 747
1983ac12
JL
748 We do this after creating all the duplicates to avoid creating
749 unnecessary edges. */
0823efed
DN
750 redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block>
751 (&local_info);
e376fe58 752
1983ac12
JL
753 /* The hash table traversals above created the duplicate blocks (and the
754 statements within the duplicate blocks). This loop creates PHI nodes for
755 the duplicated blocks and redirects the incoming edges into BB to reach
756 the duplicates of BB. */
0823efed
DN
757 redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges>
758 (&local_info);
56b043c8 759
37840132 760 /* Done with this block. Clear REDIRECTION_DATA. */
0823efed 761 redirection_data.dispose ();
d38ffc55 762
12df9a2f
RG
763 if (noloop_only
764 && bb == bb->loop_father->header)
765 set_loop_copy (bb->loop_father, NULL);
766
d38ffc55
JL
767 /* Indicate to our caller whether or not any jumps were threaded. */
768 return local_info.jumps_threaded;
56b043c8
JL
769}
770
b1149e84
JL
771/* Wrapper for thread_block_1 so that we can first handle jump
772 thread paths which do not involve copying joiner blocks, then
773 handle jump thread paths which have joiner blocks.
774
775 By doing things this way we can be as aggressive as possible and
776 not worry that copying a joiner block will create a jump threading
777 opportunity. */
778
779static bool
780thread_block (basic_block bb, bool noloop_only)
781{
782 bool retval;
783 retval = thread_block_1 (bb, noloop_only, false);
784 retval |= thread_block_1 (bb, noloop_only, true);
785 return retval;
786}
787
788
7134c090
JL
789/* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
790 copy of E->dest created during threading, or E->dest if it was not necessary
b02b9b53
ZD
791 to copy it (E is its single predecessor). */
792
793static basic_block
794thread_single_edge (edge e)
795{
796 basic_block bb = e->dest;
b02b9b53 797 struct redirection_data rd;
aee2d611
JL
798 vec<jump_thread_edge *> *path = THREAD_PATH (e);
799 edge eto = (*path)[1]->e;
b02b9b53 800
aee2d611
JL
801 for (unsigned int i = 0; i < path->length (); i++)
802 delete (*path)[i];
803 delete path;
b02b9b53
ZD
804 e->aux = NULL;
805
806 thread_stats.num_threaded_edges++;
807
808 if (single_pred_p (bb))
809 {
810 /* If BB has just a single predecessor, we should only remove the
811 control statements at its end, and successors except for ETO. */
812 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
d9eb5318
RG
813
814 /* And fixup the flags on the single remaining edge. */
815 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
816 eto->flags |= EDGE_FALLTHRU;
817
b02b9b53
ZD
818 return bb;
819 }
820
821 /* Otherwise, we need to create a copy. */
520af9ec
JL
822 if (e->dest == eto->src)
823 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
b02b9b53 824
1465e5cf
JL
825 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
826 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
827 npath->safe_push (x);
828
829 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
830 npath->safe_push (x);
831 rd.path = npath;
b02b9b53
ZD
832
833 create_block_for_threading (bb, &rd);
a91926b9 834 remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL);
520af9ec 835 create_edge_and_update_destination_phis (&rd, rd.dup_block);
b02b9b53
ZD
836
837 if (dump_file && (dump_flags & TDF_DETAILS))
838 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
839 e->src->index, e->dest->index, rd.dup_block->index);
840
841 rd.dup_block->count = e->count;
842 rd.dup_block->frequency = EDGE_FREQUENCY (e);
843 single_succ_edge (rd.dup_block)->count = e->count;
844 redirect_edge_and_branch (e, rd.dup_block);
845 flush_pending_stmts (e);
846
847 return rd.dup_block;
848}
849
850/* Callback for dfs_enumerate_from. Returns true if BB is different
851 from STOP and DBDS_CE_STOP. */
852
853static basic_block dbds_ce_stop;
854static bool
ed7a4b4b 855dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
b02b9b53 856{
ed7a4b4b 857 return (bb != (const_basic_block) stop
b02b9b53
ZD
858 && bb != dbds_ce_stop);
859}
860
861/* Evaluates the dominance relationship of latch of the LOOP and BB, and
862 returns the state. */
863
864enum bb_dom_status
865{
866 /* BB does not dominate latch of the LOOP. */
867 DOMST_NONDOMINATING,
868 /* The LOOP is broken (there is no path from the header to its latch. */
869 DOMST_LOOP_BROKEN,
870 /* BB dominates the latch of the LOOP. */
871 DOMST_DOMINATING
872};
873
874static enum bb_dom_status
875determine_bb_domination_status (struct loop *loop, basic_block bb)
876{
877 basic_block *bblocks;
878 unsigned nblocks, i;
879 bool bb_reachable = false;
880 edge_iterator ei;
881 edge e;
882
520af9ec
JL
883 /* This function assumes BB is a successor of LOOP->header.
884 If that is not the case return DOMST_NONDOMINATING which
885 is always safe. */
b02b9b53
ZD
886 {
887 bool ok = false;
888
889 FOR_EACH_EDGE (e, ei, bb->preds)
890 {
891 if (e->src == loop->header)
892 {
893 ok = true;
894 break;
895 }
896 }
897
520af9ec
JL
898 if (!ok)
899 return DOMST_NONDOMINATING;
b02b9b53 900 }
b02b9b53
ZD
901
902 if (bb == loop->latch)
903 return DOMST_DOMINATING;
904
905 /* Check that BB dominates LOOP->latch, and that it is back-reachable
906 from it. */
907
908 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
909 dbds_ce_stop = loop->header;
910 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
911 bblocks, loop->num_nodes, bb);
912 for (i = 0; i < nblocks; i++)
913 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
914 {
915 if (e->src == loop->header)
916 {
917 free (bblocks);
918 return DOMST_NONDOMINATING;
919 }
920 if (e->src == bb)
921 bb_reachable = true;
922 }
923
924 free (bblocks);
925 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
926}
927
18ce8171
RG
928/* Return true if BB is part of the new pre-header that is created
929 when threading the latch to DATA. */
930
931static bool
932def_split_header_continue_p (const_basic_block bb, const void *data)
933{
934 const_basic_block new_header = (const_basic_block) data;
535269f4
JH
935 const struct loop *l;
936
937 if (bb == new_header
938 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
939 return false;
940 for (l = bb->loop_father; l; l = loop_outer (l))
941 if (l == new_header->loop_father)
942 return true;
943 return false;
18ce8171
RG
944}
945
b02b9b53
ZD
946/* Thread jumps through the header of LOOP. Returns true if cfg changes.
947 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
948 to the inside of the loop. */
949
950static bool
951thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
952{
953 basic_block header = loop->header;
954 edge e, tgt_edge, latch = loop_latch_edge (loop);
955 edge_iterator ei;
956 basic_block tgt_bb, atgt_bb;
957 enum bb_dom_status domst;
958
959 /* We have already threaded through headers to exits, so all the threading
960 requests now are to the inside of the loop. We need to avoid creating
961 irreducible regions (i.e., loops with more than one entry block), and
962 also loop with several latch edges, or new subloops of the loop (although
963 there are cases where it might be appropriate, it is difficult to decide,
964 and doing it wrongly may confuse other optimizers).
965
966 We could handle more general cases here. However, the intention is to
967 preserve some information about the loop, which is impossible if its
968 structure changes significantly, in a way that is not well understood.
969 Thus we only handle few important special cases, in which also updating
970 of the loop-carried information should be feasible:
971
972 1) Propagation of latch edge to a block that dominates the latch block
973 of a loop. This aims to handle the following idiom:
974
975 first = 1;
976 while (1)
977 {
978 if (first)
979 initialize;
980 first = 0;
981 body;
982 }
983
984 After threading the latch edge, this becomes
985
986 first = 1;
987 if (first)
988 initialize;
989 while (1)
990 {
991 first = 0;
992 body;
993 }
994
995 The original header of the loop is moved out of it, and we may thread
996 the remaining edges through it without further constraints.
997
998 2) All entry edges are propagated to a single basic block that dominates
999 the latch block of the loop. This aims to handle the following idiom
1000 (normally created for "for" loops):
1001
1002 i = 0;
1003 while (1)
1004 {
1005 if (i >= 100)
1006 break;
1007 body;
1008 i++;
1009 }
1010
1011 This becomes
1012
1013 i = 0;
1014 while (1)
1015 {
1016 body;
1017 i++;
1018 if (i >= 100)
1019 break;
1020 }
1021 */
1022
1023 /* Threading through the header won't improve the code if the header has just
1024 one successor. */
1025 if (single_succ_p (header))
1026 goto fail;
1027
1028 if (latch->aux)
1029 {
aee2d611
JL
1030 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1031 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
361b51c0 1032 goto fail;
aee2d611 1033 tgt_edge = (*path)[1]->e;
b02b9b53
ZD
1034 tgt_bb = tgt_edge->dest;
1035 }
1036 else if (!may_peel_loop_headers
1037 && !redirection_block_p (loop->header))
1038 goto fail;
1039 else
1040 {
1041 tgt_bb = NULL;
1042 tgt_edge = NULL;
1043 FOR_EACH_EDGE (e, ei, header->preds)
1044 {
1045 if (!e->aux)
1046 {
1047 if (e == latch)
1048 continue;
1049
1050 /* If latch is not threaded, and there is a header
1051 edge that is not threaded, we would create loop
1052 with multiple entries. */
1053 goto fail;
1054 }
1055
aee2d611
JL
1056 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1057
1058 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
361b51c0 1059 goto fail;
aee2d611 1060 tgt_edge = (*path)[1]->e;
b02b9b53
ZD
1061 atgt_bb = tgt_edge->dest;
1062 if (!tgt_bb)
1063 tgt_bb = atgt_bb;
1064 /* Two targets of threading would make us create loop
1065 with multiple entries. */
1066 else if (tgt_bb != atgt_bb)
1067 goto fail;
1068 }
1069
1070 if (!tgt_bb)
1071 {
1072 /* There are no threading requests. */
1073 return false;
1074 }
1075
1076 /* Redirecting to empty loop latch is useless. */
1077 if (tgt_bb == loop->latch
1078 && empty_block_p (loop->latch))
1079 goto fail;
1080 }
1081
1082 /* The target block must dominate the loop latch, otherwise we would be
1083 creating a subloop. */
1084 domst = determine_bb_domination_status (loop, tgt_bb);
1085 if (domst == DOMST_NONDOMINATING)
1086 goto fail;
1087 if (domst == DOMST_LOOP_BROKEN)
1088 {
1089 /* If the loop ceased to exist, mark it as such, and thread through its
1090 original header. */
1091 loop->header = NULL;
1092 loop->latch = NULL;
7d776ee2 1093 loops_state_set (LOOPS_NEED_FIXUP);
b02b9b53
ZD
1094 return thread_block (header, false);
1095 }
1096
1097 if (tgt_bb->loop_father->header == tgt_bb)
1098 {
1099 /* If the target of the threading is a header of a subloop, we need
1100 to create a preheader for it, so that the headers of the two loops
1101 do not merge. */
1102 if (EDGE_COUNT (tgt_bb->preds) > 2)
1103 {
1104 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1105 gcc_assert (tgt_bb != NULL);
1106 }
1107 else
1108 tgt_bb = split_edge (tgt_edge);
1109 }
b8698a0f 1110
b02b9b53
ZD
1111 if (latch->aux)
1112 {
18ce8171
RG
1113 basic_block *bblocks;
1114 unsigned nblocks, i;
1115
07b1bf20
RG
1116 /* First handle the case latch edge is redirected. We are copying
1117 the loop header but not creating a multiple entry loop. Make the
1118 cfg manipulation code aware of that fact. */
1119 set_loop_copy (loop, loop);
b02b9b53 1120 loop->latch = thread_single_edge (latch);
07b1bf20 1121 set_loop_copy (loop, NULL);
b02b9b53
ZD
1122 gcc_assert (single_succ (loop->latch) == tgt_bb);
1123 loop->header = tgt_bb;
1124
18ce8171
RG
1125 /* Remove the new pre-header blocks from our loop. */
1126 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1127 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1128 bblocks, loop->num_nodes, tgt_bb);
1129 for (i = 0; i < nblocks; i++)
1779dc34
RG
1130 if (bblocks[i]->loop_father == loop)
1131 {
1132 remove_bb_from_loops (bblocks[i]);
1133 add_bb_to_loop (bblocks[i], loop_outer (loop));
1134 }
18ce8171
RG
1135 free (bblocks);
1136
a8886f7d
RG
1137 /* If the new header has multiple latches mark it so. */
1138 FOR_EACH_EDGE (e, ei, loop->header->preds)
1139 if (e->src->loop_father == loop
1140 && e->src != loop->latch)
1141 {
1142 loop->latch = NULL;
1143 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1144 }
1145
18ce8171
RG
1146 /* Cancel remaining threading requests that would make the
1147 loop a multiple entry loop. */
1148 FOR_EACH_EDGE (e, ei, header->preds)
1149 {
1150 edge e2;
a8886f7d 1151
18ce8171
RG
1152 if (e->aux == NULL)
1153 continue;
1154
aee2d611
JL
1155 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1156 e2 = path->last ()->e;
18ce8171
RG
1157
1158 if (e->src->loop_father != e2->dest->loop_father
1159 && e2->dest != loop->header)
1160 {
f0bc3cc0 1161 delete_jump_thread_path (path);
18ce8171
RG
1162 e->aux = NULL;
1163 }
1164 }
1165
b02b9b53
ZD
1166 /* Thread the remaining edges through the former header. */
1167 thread_block (header, false);
1168 }
1169 else
1170 {
1171 basic_block new_preheader;
1172
1173 /* Now consider the case entry edges are redirected to the new entry
1174 block. Remember one entry edge, so that we can find the new
7134c090 1175 preheader (its destination after threading). */
b02b9b53
ZD
1176 FOR_EACH_EDGE (e, ei, header->preds)
1177 {
1178 if (e->aux)
1179 break;
1180 }
1181
1182 /* The duplicate of the header is the new preheader of the loop. Ensure
1183 that it is placed correctly in the loop hierarchy. */
561e8a90 1184 set_loop_copy (loop, loop_outer (loop));
b02b9b53
ZD
1185
1186 thread_block (header, false);
561e8a90 1187 set_loop_copy (loop, NULL);
b02b9b53
ZD
1188 new_preheader = e->dest;
1189
1190 /* Create the new latch block. This is always necessary, as the latch
1191 must have only a single successor, but the original header had at
1192 least two successors. */
1193 loop->latch = NULL;
1194 mfb_kj_edge = single_succ_edge (new_preheader);
1195 loop->header = mfb_kj_edge->dest;
1196 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1197 loop->header = latch->dest;
1198 loop->latch = latch->src;
1199 }
b8698a0f 1200
b02b9b53
ZD
1201 return true;
1202
1203fail:
1204 /* We failed to thread anything. Cancel the requests. */
1205 FOR_EACH_EDGE (e, ei, header->preds)
1206 {
aee2d611
JL
1207 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1208
1209 if (path)
1210 {
f0bc3cc0 1211 delete_jump_thread_path (path);
aee2d611
JL
1212 e->aux = NULL;
1213 }
b02b9b53
ZD
1214 }
1215 return false;
1216}
1217
8d34e421
JL
1218/* E1 and E2 are edges into the same basic block. Return TRUE if the
1219 PHI arguments associated with those edges are equal or there are no
1220 PHI arguments, otherwise return FALSE. */
1221
1222static bool
1223phi_args_equal_on_edges (edge e1, edge e2)
1224{
1225 gimple_stmt_iterator gsi;
1226 int indx1 = e1->dest_idx;
1227 int indx2 = e2->dest_idx;
1228
1229 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1230 {
1231 gimple phi = gsi_stmt (gsi);
1232
1233 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1234 gimple_phi_arg_def (phi, indx2), 0))
1235 return false;
1236 }
1237 return true;
1238}
1239
8702a557 1240/* Walk through the registered jump threads and convert them into a
c0220ea4 1241 form convenient for this pass.
8702a557
JL
1242
1243 Any block which has incoming edges threaded to outgoing edges
1244 will have its entry in THREADED_BLOCK set.
56b043c8 1245
8702a557
JL
1246 Any threaded edge will have its new outgoing edge stored in the
1247 original edge's AUX field.
56b043c8 1248
8702a557
JL
1249 This form avoids the need to walk all the edges in the CFG to
1250 discover blocks which need processing and avoids unnecessary
1251 hash table lookups to map from threaded edge to new target. */
56b043c8 1252
8702a557
JL
1253static void
1254mark_threaded_blocks (bitmap threaded_blocks)
1255{
1256 unsigned int i;
b02b9b53
ZD
1257 bitmap_iterator bi;
1258 bitmap tmp = BITMAP_ALLOC (NULL);
1259 basic_block bb;
1260 edge e;
1261 edge_iterator ei;
8702a557 1262
b1149e84
JL
1263 /* Move the jump threading requests from PATHS to each edge
1264 which starts a jump thread path. */
aee2d611 1265 for (i = 0; i < paths.length (); i++)
8702a557 1266 {
aee2d611 1267 vec<jump_thread_edge *> *path = paths[i];
b1149e84
JL
1268 edge e = (*path)[0]->e;
1269 e->aux = (void *)path;
1270 bitmap_set_bit (tmp, e->dest->index);
34554d1a
JL
1271 }
1272
8d34e421 1273
34554d1a 1274
b02b9b53
ZD
1275 /* If optimizing for size, only thread through block if we don't have
1276 to duplicate it or it's an otherwise empty redirection block. */
efd8f750 1277 if (optimize_function_for_size_p (cfun))
b02b9b53
ZD
1278 {
1279 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1280 {
1281 bb = BASIC_BLOCK (i);
1282 if (EDGE_COUNT (bb->preds) > 1
1283 && !redirection_block_p (bb))
1284 {
1285 FOR_EACH_EDGE (e, ei, bb->preds)
7134c090 1286 {
aee2d611
JL
1287 if (e->aux)
1288 {
1289 vec<jump_thread_edge *> *path = THREAD_PATH (e);
f0bc3cc0 1290 delete_jump_thread_path (path);
aee2d611
JL
1291 e->aux = NULL;
1292 }
7134c090 1293 }
b02b9b53
ZD
1294 }
1295 else
1296 bitmap_set_bit (threaded_blocks, i);
1297 }
8702a557 1298 }
b02b9b53
ZD
1299 else
1300 bitmap_copy (threaded_blocks, tmp);
1301
ef3cfba2
JL
1302 /* Look for jump threading paths which cross multiple loop headers.
1303
1304 The code to thread through loop headers will change the CFG in ways
1305 that break assumptions made by the loop optimization code.
1306
1307 We don't want to blindly cancel the requests. We can instead do better
1308 by trimming off the end of the jump thread path. */
1309 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1310 {
1311 basic_block bb = BASIC_BLOCK (i);
1312 FOR_EACH_EDGE (e, ei, bb->preds)
1313 {
1314 if (e->aux)
1315 {
1316 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1317
1318 /* Basically we're looking for a situation where we can see
1319 3 or more loop structures on a jump threading path. */
1320
1321 struct loop *first_father = (*path)[0]->e->src->loop_father;
1322 struct loop *second_father = NULL;
1323 for (unsigned int i = 0; i < path->length (); i++)
1324 {
1325 /* See if this is a loop father we have not seen before. */
1326 if ((*path)[i]->e->dest->loop_father != first_father
1327 && (*path)[i]->e->dest->loop_father != second_father)
1328 {
1329 /* We've already seen two loop fathers, so we
1330 need to trim this jump threading path. */
1331 if (second_father != NULL)
1332 {
1333 /* Trim from entry I onwards. */
1334 for (unsigned int j = i; j < path->length (); j++)
1335 delete (*path)[j];
1336 path->truncate (i);
1337
1338 /* Now that we've truncated the path, make sure
1339 what's left is still valid. We need at least
1340 two edges on the path and the last edge can not
1341 be a joiner. This should never happen, but let's
1342 be safe. */
1343 if (path->length () < 2
1344 || (path->last ()->type
1345 == EDGE_COPY_SRC_JOINER_BLOCK))
1346 {
f0bc3cc0 1347 delete_jump_thread_path (path);
ef3cfba2
JL
1348 e->aux = NULL;
1349 }
1350 break;
1351 }
1352 else
1353 {
1354 second_father = (*path)[i]->e->dest->loop_father;
1355 }
1356 }
1357 }
1358 }
1359 }
1360 }
1361
b11b9adb
JL
1362 /* If we have a joiner block (J) which has two successors S1 and S2 and
1363 we are threading though S1 and the final destination of the thread
1364 is S2, then we must verify that any PHI nodes in S2 have the same
1365 PHI arguments for the edge J->S2 and J->S1->...->S2.
1366
1367 We used to detect this prior to registering the jump thread, but
1368 that prohibits propagation of edge equivalences into non-dominated
1369 PHI nodes as the equivalency test might occur before propagation.
1370
1371 This must also occur after we truncate any jump threading paths
1372 as this scenario may only show up after truncation.
1373
1374 This works for now, but will need improvement as part of the FSA
1375 optimization.
1376
1377 Note since we've moved the thread request data to the edges,
1378 we have to iterate on those rather than the threaded_edges vector. */
1379 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1380 {
1381 bb = BASIC_BLOCK (i);
1382 FOR_EACH_EDGE (e, ei, bb->preds)
1383 {
1384 if (e->aux)
1385 {
1386 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1387 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
1388
1389 if (have_joiner)
1390 {
1391 basic_block joiner = e->dest;
1392 edge final_edge = path->last ()->e;
1393 basic_block final_dest = final_edge->dest;
1394 edge e2 = find_edge (joiner, final_dest);
1395
1396 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
1397 {
1398 delete_jump_thread_path (path);
1399 e->aux = NULL;
1400 }
1401 }
1402 }
1403 }
1404 }
1405
c3284718 1406 BITMAP_FREE (tmp);
8702a557
JL
1407}
1408
1409
1410/* Walk through all blocks and thread incoming edges to the appropriate
1411 outgoing edge for each edge pair recorded in THREADED_EDGES.
56b043c8
JL
1412
1413 It is the caller's responsibility to fix the dominance information
1414 and rewrite duplicated SSA_NAMEs back into SSA form.
1415
b02b9b53
ZD
1416 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1417 loop headers if it does not simplify the loop.
1418
471854f8 1419 Returns true if one or more edges were threaded, false otherwise. */
56b043c8
JL
1420
1421bool
b02b9b53 1422thread_through_all_blocks (bool may_peel_loop_headers)
56b043c8 1423{
56b043c8 1424 bool retval = false;
4aab792d
KH
1425 unsigned int i;
1426 bitmap_iterator bi;
8702a557 1427 bitmap threaded_blocks;
b02b9b53
ZD
1428 struct loop *loop;
1429 loop_iterator li;
8702a557 1430
d51157de
ZD
1431 /* We must know about loops in order to preserve them. */
1432 gcc_assert (current_loops != NULL);
1433
aee2d611 1434 if (!paths.exists ())
8702a557 1435 return false;
56b043c8 1436
8702a557 1437 threaded_blocks = BITMAP_ALLOC (NULL);
a4233c29 1438 memset (&thread_stats, 0, sizeof (thread_stats));
d38ffc55 1439
8702a557
JL
1440 mark_threaded_blocks (threaded_blocks);
1441
561e8a90 1442 initialize_original_copy_tables ();
b02b9b53
ZD
1443
1444 /* First perform the threading requests that do not affect
1445 loop structure. */
4aab792d 1446 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
56b043c8 1447 {
4aab792d
KH
1448 basic_block bb = BASIC_BLOCK (i);
1449
1450 if (EDGE_COUNT (bb->preds) > 0)
b02b9b53
ZD
1451 retval |= thread_block (bb, true);
1452 }
1453
1454 /* Then perform the threading through loop headers. We start with the
1455 innermost loop, so that the changes in cfg we perform won't affect
1456 further threading. */
d51157de 1457 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
b02b9b53 1458 {
d51157de
ZD
1459 if (!loop->header
1460 || !bitmap_bit_p (threaded_blocks, loop->header->index))
1461 continue;
b02b9b53 1462
d51157de 1463 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
56b043c8 1464 }
d38ffc55 1465
b1149e84
JL
1466 /* Assume we had a jump thread path which went from the latch to the exit
1467 and a path which goes from outside to inside the same loop.
1468
1469 If the latch to exit was handled first, we will thread it and clear
1470 loop->header.
1471
1472 The second path will be ignored by thread_block because we're going
1473 through a loop header. It will also be ignored by the loop above
1474 because loop->header is NULL.
1475
1476 This results in the second path never being threaded. The failure
1477 mode is a dangling AUX field.
1478
1479 This is inherently a bit of a pain to fix, so we just walk all the
1480 blocks and all the incoming edges to those blocks and clear their
1481 AUX fields. */
1482 basic_block bb;
1483 edge_iterator ei;
1484 edge e;
1485 FOR_EACH_BB (bb)
1486 {
1487 FOR_EACH_EDGE (e, ei, bb->preds)
1488 if (e->aux)
1489 {
1490 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1491
f0bc3cc0 1492 delete_jump_thread_path (path);
b1149e84
JL
1493 e->aux = NULL;
1494 }
1495 }
1496
01902653
RG
1497 statistics_counter_event (cfun, "Jumps threaded",
1498 thread_stats.num_threaded_edges);
a4233c29 1499
561e8a90
ZD
1500 free_original_copy_tables ();
1501
8702a557
JL
1502 BITMAP_FREE (threaded_blocks);
1503 threaded_blocks = NULL;
aee2d611 1504 paths.release ();
b02b9b53 1505
592c303d 1506 if (retval)
f87000d0 1507 loops_state_set (LOOPS_NEED_FIXUP);
592c303d 1508
56b043c8
JL
1509 return retval;
1510}
8702a557 1511
f0bc3cc0
JL
1512/* Delete the jump threading path PATH. We have to explcitly delete
1513 each entry in the vector, then the container. */
1514
1515void
1516delete_jump_thread_path (vec<jump_thread_edge *> *path)
1517{
1518 for (unsigned int i = 0; i < path->length (); i++)
1519 delete (*path)[i];
1520 path->release();
1521}
1522
5254eac4
JL
1523/* Dump a jump threading path, including annotations about each
1524 edge in the path. */
1525
1526static void
1527dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path)
1528{
1529 fprintf (dump_file,
1530 " Registering jump thread: (%d, %d) incoming edge; ",
1531 path[0]->e->src->index, path[0]->e->dest->index);
1532
1533 for (unsigned int i = 1; i < path.length (); i++)
1534 {
1535 /* We can get paths with a NULL edge when the final destination
1536 of a jump thread turns out to be a constant address. We dump
1537 those paths when debugging, so we have to be prepared for that
1538 possibility here. */
1539 if (path[i]->e == NULL)
1540 continue;
1541
1542 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1543 fprintf (dump_file, " (%d, %d) joiner; ",
1544 path[i]->e->src->index, path[i]->e->dest->index);
1545 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
1546 fprintf (dump_file, " (%d, %d) normal;",
1547 path[i]->e->src->index, path[i]->e->dest->index);
1548 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
1549 fprintf (dump_file, " (%d, %d) nocopy;",
1550 path[i]->e->src->index, path[i]->e->dest->index);
1551 }
1552 fputc ('\n', dump_file);
1553}
1554
8702a557
JL
1555/* Register a jump threading opportunity. We queue up all the jump
1556 threading opportunities discovered by a pass and update the CFG
1557 and SSA form all at once.
1558
fa10beec 1559 E is the edge we can thread, E2 is the new target edge, i.e., we
8702a557
JL
1560 are effectively recording that E->dest can be changed to E2->dest
1561 after fixing the SSA graph. */
1562
1563void
aee2d611 1564register_jump_thread (vec<jump_thread_edge *> *path)
8702a557 1565{
01e127b1
JL
1566 if (!dbg_cnt (registered_jump_thread))
1567 {
f0bc3cc0 1568 delete_jump_thread_path (path);
01e127b1
JL
1569 return;
1570 }
1571
5254eac4
JL
1572 /* First make sure there are no NULL outgoing edges on the jump threading
1573 path. That can happen for jumping to a constant address. */
aee2d611
JL
1574 for (unsigned int i = 0; i < path->length (); i++)
1575 if ((*path)[i]->e == NULL)
5254eac4
JL
1576 {
1577 if (dump_file && (dump_flags & TDF_DETAILS))
1578 {
1579 fprintf (dump_file,
1580 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
aee2d611 1581 dump_jump_thread_path (dump_file, *path);
5254eac4 1582 }
aee2d611 1583
f0bc3cc0 1584 delete_jump_thread_path (path);
5254eac4
JL
1585 return;
1586 }
b9ebee5d 1587
3b18bc42 1588 if (dump_file && (dump_flags & TDF_DETAILS))
aee2d611 1589 dump_jump_thread_path (dump_file, *path);
3b18bc42 1590
aee2d611
JL
1591 if (!paths.exists ())
1592 paths.create (5);
3b18bc42 1593
aee2d611 1594 paths.safe_push (path);
8702a557 1595}