]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-tail-merge.c
tree-ssa.h: Remove all #include's
[thirdparty/gcc.git] / gcc / tree-ssa-tail-merge.c
CommitLineData
c9e93168 1/* Tail merging for gimple.
d1e082c2 2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
c9e93168
TV
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
8dee4479
TV
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
182
183
c9e93168
TV
184 SWITCHES
185
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
187
188#include "config.h"
189#include "system.h"
190#include "coretypes.h"
191#include "tm.h"
192#include "tree.h"
193#include "tm_p.h"
194#include "basic-block.h"
c9e93168
TV
195#include "flags.h"
196#include "function.h"
442b4905
AM
197#include "gimple.h"
198#include "gimple-ssa.h"
199#include "tree-cfg.h"
200#include "tree-phinodes.h"
201#include "ssa-iterators.h"
202#include "tree-into-ssa.h"
c9e93168
TV
203#include "tree-ssa-alias.h"
204#include "params.h"
0823efed 205#include "hash-table.h"
c9e93168
TV
206#include "gimple-pretty-print.h"
207#include "tree-ssa-sccvn.h"
208#include "tree-dump.h"
a9e0d843 209#include "cfgloop.h"
7ee2468b
SB
210#include "tree-pass.h"
211
c9e93168
TV
212/* Describes a group of bbs with the same successors. The successor bbs are
213 cached in succs, and the successor edge flags are cached in succ_flags.
214 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
215 it's marked in inverse.
216 Additionally, the hash value for the struct is cached in hashval, and
217 in_worklist indicates whether it's currently part of worklist. */
218
219struct same_succ_def
220{
221 /* The bbs that have the same successor bbs. */
222 bitmap bbs;
223 /* The successor bbs. */
224 bitmap succs;
225 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
226 bb. */
227 bitmap inverse;
228 /* The edge flags for each of the successor bbs. */
9771b263 229 vec<int> succ_flags;
c9e93168
TV
230 /* Indicates whether the struct is currently in the worklist. */
231 bool in_worklist;
232 /* The hash value of the struct. */
233 hashval_t hashval;
5deac340
RG
234
235 /* hash_table support. */
5831a5f0
LC
236 typedef same_succ_def value_type;
237 typedef same_succ_def compare_type;
238 static inline hashval_t hash (const value_type *);
239 static int equal (const value_type *, const compare_type *);
240 static void remove (value_type *);
c9e93168
TV
241};
242typedef struct same_succ_def *same_succ;
243typedef const struct same_succ_def *const_same_succ;
244
5deac340
RG
245/* hash routine for hash_table support, returns hashval of E. */
246
247inline hashval_t
5831a5f0 248same_succ_def::hash (const value_type *e)
5deac340
RG
249{
250 return e->hashval;
251}
252
c9e93168
TV
253/* A group of bbs where 1 bb from bbs can replace the other bbs. */
254
255struct bb_cluster_def
256{
257 /* The bbs in the cluster. */
258 bitmap bbs;
259 /* The preds of the bbs in the cluster. */
260 bitmap preds;
261 /* Index in all_clusters vector. */
262 int index;
263 /* The bb to replace the cluster with. */
264 basic_block rep_bb;
265};
266typedef struct bb_cluster_def *bb_cluster;
267typedef const struct bb_cluster_def *const_bb_cluster;
268
269/* Per bb-info. */
270
271struct aux_bb_info
272{
273 /* The number of non-debug statements in the bb. */
274 int size;
275 /* The same_succ that this bb is a member of. */
276 same_succ bb_same_succ;
277 /* The cluster that this bb is a member of. */
278 bb_cluster cluster;
279 /* The vop state at the exit of a bb. This is shortlived data, used to
280 communicate data between update_block_by and update_vuses. */
281 tree vop_at_exit;
282 /* The bb that either contains or is dominated by the dependencies of the
283 bb. */
284 basic_block dep_bb;
285};
286
287/* Macros to access the fields of struct aux_bb_info. */
288
289#define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
290#define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
291#define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
292#define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
293#define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
294
b2b2f160
TV
295/* Returns true if the only effect a statement STMT has, is to define locally
296 used SSA_NAMEs. */
297
298static bool
299stmt_local_def (gimple stmt)
300{
301 basic_block bb, def_bb;
302 imm_use_iterator iter;
303 use_operand_p use_p;
304 tree val;
305 def_operand_p def_p;
306
307 if (gimple_has_side_effects (stmt))
308 return false;
309
310 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
311 if (def_p == NULL)
312 return false;
313
314 val = DEF_FROM_PTR (def_p);
315 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
316 return false;
317
318 def_bb = gimple_bb (stmt);
319
320 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
321 {
322 if (is_gimple_debug (USE_STMT (use_p)))
323 continue;
324 bb = gimple_bb (USE_STMT (use_p));
325 if (bb == def_bb)
326 continue;
327
328 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
329 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
330 continue;
331
332 return false;
333 }
334
335 return true;
336}
337
338/* Let GSI skip forwards over local defs. */
339
340static void
341gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
342{
343 gimple stmt;
344
345 while (true)
346 {
347 if (gsi_end_p (*gsi))
348 return;
349 stmt = gsi_stmt (*gsi);
350 if (!stmt_local_def (stmt))
351 return;
352 gsi_next_nondebug (gsi);
353 }
354}
355
c9e93168
TV
356/* VAL1 and VAL2 are either:
357 - uses in BB1 and BB2, or
358 - phi alternatives for BB1 and BB2.
359 Return true if the uses have the same gvn value. */
360
361static bool
362gvn_uses_equal (tree val1, tree val2)
363{
364 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
365
366 if (val1 == val2)
367 return true;
368
369 if (vn_valueize (val1) != vn_valueize (val2))
370 return false;
371
372 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
373 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
374}
375
376/* Prints E to FILE. */
377
378static void
379same_succ_print (FILE *file, const same_succ e)
380{
381 unsigned int i;
382 bitmap_print (file, e->bbs, "bbs:", "\n");
383 bitmap_print (file, e->succs, "succs:", "\n");
384 bitmap_print (file, e->inverse, "inverse:", "\n");
385 fprintf (file, "flags:");
9771b263
DN
386 for (i = 0; i < e->succ_flags.length (); ++i)
387 fprintf (file, " %x", e->succ_flags[i]);
c9e93168
TV
388 fprintf (file, "\n");
389}
390
391/* Prints same_succ VE to VFILE. */
392
0823efed
DN
393inline int
394ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
c9e93168 395{
0823efed 396 const same_succ e = *pe;
c9e93168
TV
397 same_succ_print (file, e);
398 return 1;
399}
400
401/* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
402
403static void
404update_dep_bb (basic_block use_bb, tree val)
405{
406 basic_block dep_bb;
407
408 /* Not a dep. */
409 if (TREE_CODE (val) != SSA_NAME)
410 return;
411
412 /* Skip use of global def. */
413 if (SSA_NAME_IS_DEFAULT_DEF (val))
414 return;
415
416 /* Skip use of local def. */
417 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
418 if (dep_bb == use_bb)
419 return;
420
421 if (BB_DEP_BB (use_bb) == NULL
422 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
423 BB_DEP_BB (use_bb) = dep_bb;
424}
425
426/* Update BB_DEP_BB, given the dependencies in STMT. */
427
428static void
429stmt_update_dep_bb (gimple stmt)
430{
431 ssa_op_iter iter;
432 use_operand_p use;
433
434 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
435 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
436}
437
c9e93168
TV
438/* Calculates hash value for same_succ VE. */
439
5deac340
RG
440static hashval_t
441same_succ_hash (const_same_succ e)
c9e93168 442{
c9e93168
TV
443 hashval_t hashval = bitmap_hash (e->succs);
444 int flags;
445 unsigned int i;
446 unsigned int first = bitmap_first_set_bit (e->bbs);
447 basic_block bb = BASIC_BLOCK (first);
448 int size = 0;
449 gimple_stmt_iterator gsi;
450 gimple stmt;
451 tree arg;
452 unsigned int s;
453 bitmap_iterator bs;
454
455 for (gsi = gsi_start_nondebug_bb (bb);
456 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
457 {
458 stmt = gsi_stmt (gsi);
459 stmt_update_dep_bb (stmt);
b2b2f160 460 if (stmt_local_def (stmt))
c9e93168
TV
461 continue;
462 size++;
463
464 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
465 if (is_gimple_assign (stmt))
466 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
467 hashval);
468 if (!is_gimple_call (stmt))
469 continue;
470 if (gimple_call_internal_p (stmt))
471 hashval = iterative_hash_hashval_t
472 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
473 else
474 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
475 for (i = 0; i < gimple_call_num_args (stmt); i++)
476 {
477 arg = gimple_call_arg (stmt, i);
478 arg = vn_valueize (arg);
479 hashval = iterative_hash_expr (arg, hashval);
480 }
481 }
482
483 hashval = iterative_hash_hashval_t (size, hashval);
484 BB_SIZE (bb) = size;
485
9771b263 486 for (i = 0; i < e->succ_flags.length (); ++i)
c9e93168 487 {
9771b263 488 flags = e->succ_flags[i];
c9e93168
TV
489 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
490 hashval = iterative_hash_hashval_t (flags, hashval);
491 }
492
493 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
494 {
495 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
496 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
497 gsi_next (&gsi))
498 {
499 gimple phi = gsi_stmt (gsi);
500 tree lhs = gimple_phi_result (phi);
501 tree val = gimple_phi_arg_def (phi, n);
502
ea057359 503 if (virtual_operand_p (lhs))
c9e93168
TV
504 continue;
505 update_dep_bb (bb, val);
506 }
507 }
508
509 return hashval;
510}
511
512/* Returns true if E1 and E2 have 2 successors, and if the successor flags
513 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
514 the other edge flags. */
515
516static bool
517inverse_flags (const_same_succ e1, const_same_succ e2)
518{
519 int f1a, f1b, f2a, f2b;
520 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
521
9771b263 522 if (e1->succ_flags.length () != 2)
c9e93168
TV
523 return false;
524
9771b263
DN
525 f1a = e1->succ_flags[0];
526 f1b = e1->succ_flags[1];
527 f2a = e2->succ_flags[0];
528 f2b = e2->succ_flags[1];
c9e93168
TV
529
530 if (f1a == f2a && f1b == f2b)
531 return false;
532
533 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
534}
535
5deac340 536/* Compares SAME_SUCCs E1 and E2. */
c9e93168 537
0823efed 538int
5831a5f0 539same_succ_def::equal (const value_type *e1, const compare_type *e2)
c9e93168 540{
c9e93168
TV
541 unsigned int i, first1, first2;
542 gimple_stmt_iterator gsi1, gsi2;
543 gimple s1, s2;
544 basic_block bb1, bb2;
545
546 if (e1->hashval != e2->hashval)
547 return 0;
548
9771b263 549 if (e1->succ_flags.length () != e2->succ_flags.length ())
c9e93168
TV
550 return 0;
551
552 if (!bitmap_equal_p (e1->succs, e2->succs))
553 return 0;
554
555 if (!inverse_flags (e1, e2))
556 {
9771b263
DN
557 for (i = 0; i < e1->succ_flags.length (); ++i)
558 if (e1->succ_flags[i] != e1->succ_flags[i])
c9e93168
TV
559 return 0;
560 }
561
562 first1 = bitmap_first_set_bit (e1->bbs);
563 first2 = bitmap_first_set_bit (e2->bbs);
564
565 bb1 = BASIC_BLOCK (first1);
566 bb2 = BASIC_BLOCK (first2);
567
568 if (BB_SIZE (bb1) != BB_SIZE (bb2))
569 return 0;
570
571 gsi1 = gsi_start_nondebug_bb (bb1);
572 gsi2 = gsi_start_nondebug_bb (bb2);
b2b2f160
TV
573 gsi_advance_fw_nondebug_nonlocal (&gsi1);
574 gsi_advance_fw_nondebug_nonlocal (&gsi2);
c9e93168
TV
575 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
576 {
577 s1 = gsi_stmt (gsi1);
578 s2 = gsi_stmt (gsi2);
579 if (gimple_code (s1) != gimple_code (s2))
580 return 0;
581 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
582 return 0;
583 gsi_next_nondebug (&gsi1);
584 gsi_next_nondebug (&gsi2);
b2b2f160
TV
585 gsi_advance_fw_nondebug_nonlocal (&gsi1);
586 gsi_advance_fw_nondebug_nonlocal (&gsi2);
c9e93168
TV
587 }
588
589 return 1;
590}
591
592/* Alloc and init a new SAME_SUCC. */
593
594static same_succ
595same_succ_alloc (void)
596{
597 same_succ same = XNEW (struct same_succ_def);
598
599 same->bbs = BITMAP_ALLOC (NULL);
600 same->succs = BITMAP_ALLOC (NULL);
601 same->inverse = BITMAP_ALLOC (NULL);
9771b263 602 same->succ_flags.create (10);
c9e93168
TV
603 same->in_worklist = false;
604
605 return same;
606}
607
5deac340 608/* Delete same_succ E. */
c9e93168 609
5deac340
RG
610void
611same_succ_def::remove (same_succ e)
c9e93168 612{
c9e93168
TV
613 BITMAP_FREE (e->bbs);
614 BITMAP_FREE (e->succs);
615 BITMAP_FREE (e->inverse);
9771b263 616 e->succ_flags.release ();
c9e93168 617
0823efed 618 XDELETE (e);
c9e93168
TV
619}
620
621/* Reset same_succ SAME. */
622
623static void
624same_succ_reset (same_succ same)
625{
626 bitmap_clear (same->bbs);
627 bitmap_clear (same->succs);
628 bitmap_clear (same->inverse);
9771b263 629 same->succ_flags.truncate (0);
c9e93168
TV
630}
631
5deac340 632static hash_table <same_succ_def> same_succ_htab;
c9e93168
TV
633
634/* Array that is used to store the edge flags for a successor. */
635
636static int *same_succ_edge_flags;
637
638/* Bitmap that is used to mark bbs that are recently deleted. */
639
640static bitmap deleted_bbs;
641
642/* Bitmap that is used to mark predecessors of bbs that are
643 deleted. */
644
645static bitmap deleted_bb_preds;
646
647/* Prints same_succ_htab to stderr. */
648
649extern void debug_same_succ (void);
650DEBUG_FUNCTION void
651debug_same_succ ( void)
652{
0823efed 653 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
c9e93168
TV
654}
655
c9e93168
TV
656
657/* Vector of bbs to process. */
658
9771b263 659static vec<same_succ> worklist;
c9e93168
TV
660
661/* Prints worklist to FILE. */
662
663static void
664print_worklist (FILE *file)
665{
666 unsigned int i;
9771b263
DN
667 for (i = 0; i < worklist.length (); ++i)
668 same_succ_print (file, worklist[i]);
c9e93168
TV
669}
670
671/* Adds SAME to worklist. */
672
673static void
674add_to_worklist (same_succ same)
675{
676 if (same->in_worklist)
677 return;
678
679 if (bitmap_count_bits (same->bbs) < 2)
680 return;
681
682 same->in_worklist = true;
9771b263 683 worklist.safe_push (same);
c9e93168
TV
684}
685
686/* Add BB to same_succ_htab. */
687
688static void
689find_same_succ_bb (basic_block bb, same_succ *same_p)
690{
691 unsigned int j;
692 bitmap_iterator bj;
693 same_succ same = *same_p;
694 same_succ *slot;
695 edge_iterator ei;
696 edge e;
697
315bbd2e
TV
698 if (bb == NULL
699 /* Be conservative with loop structure. It's not evident that this test
700 is sufficient. Before tail-merge, we've just called
701 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
702 set, so there's no guarantee that the loop->latch value is still valid.
703 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
704 start of pre, we've kept that property intact throughout pre, and are
705 keeping it throughout tail-merge using this test. */
706 || bb->loop_father->latch == bb)
c9e93168
TV
707 return;
708 bitmap_set_bit (same->bbs, bb->index);
709 FOR_EACH_EDGE (e, ei, bb->succs)
710 {
711 int index = e->dest->index;
712 bitmap_set_bit (same->succs, index);
713 same_succ_edge_flags[index] = e->flags;
714 }
715 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
9771b263 716 same->succ_flags.safe_push (same_succ_edge_flags[j]);
c9e93168 717
5deac340 718 same->hashval = same_succ_hash (same);
c9e93168 719
0823efed 720 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
c9e93168
TV
721 if (*slot == NULL)
722 {
723 *slot = same;
724 BB_SAME_SUCC (bb) = same;
725 add_to_worklist (same);
726 *same_p = NULL;
727 }
728 else
729 {
730 bitmap_set_bit ((*slot)->bbs, bb->index);
731 BB_SAME_SUCC (bb) = *slot;
732 add_to_worklist (*slot);
733 if (inverse_flags (same, *slot))
734 bitmap_set_bit ((*slot)->inverse, bb->index);
735 same_succ_reset (same);
736 }
737}
738
739/* Find bbs with same successors. */
740
741static void
742find_same_succ (void)
743{
744 same_succ same = same_succ_alloc ();
745 basic_block bb;
746
747 FOR_EACH_BB (bb)
748 {
749 find_same_succ_bb (bb, &same);
750 if (same == NULL)
751 same = same_succ_alloc ();
752 }
753
5deac340 754 same_succ_def::remove (same);
c9e93168
TV
755}
756
757/* Initializes worklist administration. */
758
759static void
760init_worklist (void)
761{
762 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
0823efed 763 same_succ_htab.create (n_basic_blocks);
c9e93168
TV
764 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
765 deleted_bbs = BITMAP_ALLOC (NULL);
766 deleted_bb_preds = BITMAP_ALLOC (NULL);
9771b263 767 worklist.create (n_basic_blocks);
c9e93168
TV
768 find_same_succ ();
769
770 if (dump_file && (dump_flags & TDF_DETAILS))
771 {
772 fprintf (dump_file, "initial worklist:\n");
773 print_worklist (dump_file);
774 }
775}
776
777/* Deletes worklist administration. */
778
779static void
780delete_worklist (void)
781{
782 free_aux_for_blocks ();
0823efed 783 same_succ_htab.dispose ();
c9e93168
TV
784 XDELETEVEC (same_succ_edge_flags);
785 same_succ_edge_flags = NULL;
786 BITMAP_FREE (deleted_bbs);
787 BITMAP_FREE (deleted_bb_preds);
9771b263 788 worklist.release ();
c9e93168
TV
789}
790
791/* Mark BB as deleted, and mark its predecessors. */
792
793static void
643400b8 794mark_basic_block_deleted (basic_block bb)
c9e93168
TV
795{
796 edge e;
797 edge_iterator ei;
798
799 bitmap_set_bit (deleted_bbs, bb->index);
800
801 FOR_EACH_EDGE (e, ei, bb->preds)
802 bitmap_set_bit (deleted_bb_preds, e->src->index);
803}
804
4cbdcd40
TV
805/* Removes BB from its corresponding same_succ. */
806
807static void
808same_succ_flush_bb (basic_block bb)
809{
810 same_succ same = BB_SAME_SUCC (bb);
811 BB_SAME_SUCC (bb) = NULL;
812 if (bitmap_single_bit_set_p (same->bbs))
0823efed 813 same_succ_htab.remove_elt_with_hash (same, same->hashval);
4cbdcd40
TV
814 else
815 bitmap_clear_bit (same->bbs, bb->index);
816}
817
c9e93168
TV
818/* Removes all bbs in BBS from their corresponding same_succ. */
819
820static void
821same_succ_flush_bbs (bitmap bbs)
822{
823 unsigned int i;
824 bitmap_iterator bi;
825
826 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
4cbdcd40 827 same_succ_flush_bb (BASIC_BLOCK (i));
c9e93168
TV
828}
829
fcddd80e
RG
830/* Release the last vdef in BB, either normal or phi result. */
831
832static void
833release_last_vdef (basic_block bb)
834{
835 gimple_stmt_iterator i;
836
837 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
838 {
839 gimple stmt = gsi_stmt (i);
840 if (gimple_vdef (stmt) == NULL_TREE)
841 continue;
842
843 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
844 return;
845 }
846
847 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
848 {
849 gimple phi = gsi_stmt (i);
850 tree res = gimple_phi_result (phi);
851
ea057359 852 if (!virtual_operand_p (res))
fcddd80e
RG
853 continue;
854
855 mark_virtual_phi_result_for_renaming (phi);
856 return;
857 }
858
859}
860
c9e93168
TV
861/* For deleted_bb_preds, find bbs with same successors. */
862
863static void
864update_worklist (void)
865{
866 unsigned int i;
867 bitmap_iterator bi;
868 basic_block bb;
869 same_succ same;
870
643400b8
TV
871 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
872 bitmap_clear (deleted_bbs);
873
c9e93168
TV
874 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
875 same_succ_flush_bbs (deleted_bb_preds);
876
877 same = same_succ_alloc ();
878 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
879 {
880 bb = BASIC_BLOCK (i);
881 gcc_assert (bb != NULL);
882 find_same_succ_bb (bb, &same);
883 if (same == NULL)
884 same = same_succ_alloc ();
885 }
5deac340 886 same_succ_def::remove (same);
c9e93168
TV
887 bitmap_clear (deleted_bb_preds);
888}
889
890/* Prints cluster C to FILE. */
891
892static void
893print_cluster (FILE *file, bb_cluster c)
894{
895 if (c == NULL)
896 return;
897 bitmap_print (file, c->bbs, "bbs:", "\n");
898 bitmap_print (file, c->preds, "preds:", "\n");
899}
900
901/* Prints cluster C to stderr. */
902
903extern void debug_cluster (bb_cluster);
904DEBUG_FUNCTION void
905debug_cluster (bb_cluster c)
906{
907 print_cluster (stderr, c);
908}
909
910/* Update C->rep_bb, given that BB is added to the cluster. */
911
912static void
913update_rep_bb (bb_cluster c, basic_block bb)
914{
915 /* Initial. */
916 if (c->rep_bb == NULL)
917 {
918 c->rep_bb = bb;
919 return;
920 }
921
922 /* Current needs no deps, keep it. */
923 if (BB_DEP_BB (c->rep_bb) == NULL)
924 return;
925
926 /* Bb needs no deps, change rep_bb. */
927 if (BB_DEP_BB (bb) == NULL)
928 {
929 c->rep_bb = bb;
930 return;
931 }
932
933 /* Bb needs last deps earlier than current, change rep_bb. A potential
934 problem with this, is that the first deps might also be earlier, which
935 would mean we prefer longer lifetimes for the deps. To be able to check
936 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
937 BB_DEP_BB, which is really BB_LAST_DEP_BB.
938 The benefit of choosing the bb with last deps earlier, is that it can
939 potentially be used as replacement for more bbs. */
940 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
941 c->rep_bb = bb;
942}
943
944/* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
945
946static void
947add_bb_to_cluster (bb_cluster c, basic_block bb)
948{
949 edge e;
950 edge_iterator ei;
951
952 bitmap_set_bit (c->bbs, bb->index);
953
954 FOR_EACH_EDGE (e, ei, bb->preds)
955 bitmap_set_bit (c->preds, e->src->index);
956
957 update_rep_bb (c, bb);
958}
959
960/* Allocate and init new cluster. */
961
962static bb_cluster
963new_cluster (void)
964{
965 bb_cluster c;
966 c = XCNEW (struct bb_cluster_def);
967 c->bbs = BITMAP_ALLOC (NULL);
968 c->preds = BITMAP_ALLOC (NULL);
969 c->rep_bb = NULL;
970 return c;
971}
972
973/* Delete clusters. */
974
975static void
976delete_cluster (bb_cluster c)
977{
978 if (c == NULL)
979 return;
980 BITMAP_FREE (c->bbs);
981 BITMAP_FREE (c->preds);
982 XDELETE (c);
983}
984
c9e93168
TV
985
986/* Array that contains all clusters. */
987
9771b263 988static vec<bb_cluster> all_clusters;
c9e93168
TV
989
990/* Allocate all cluster vectors. */
991
992static void
993alloc_cluster_vectors (void)
994{
9771b263 995 all_clusters.create (n_basic_blocks);
c9e93168
TV
996}
997
998/* Reset all cluster vectors. */
999
1000static void
1001reset_cluster_vectors (void)
1002{
1003 unsigned int i;
1004 basic_block bb;
9771b263
DN
1005 for (i = 0; i < all_clusters.length (); ++i)
1006 delete_cluster (all_clusters[i]);
1007 all_clusters.truncate (0);
c9e93168
TV
1008 FOR_EACH_BB (bb)
1009 BB_CLUSTER (bb) = NULL;
1010}
1011
1012/* Delete all cluster vectors. */
1013
1014static void
1015delete_cluster_vectors (void)
1016{
1017 unsigned int i;
9771b263
DN
1018 for (i = 0; i < all_clusters.length (); ++i)
1019 delete_cluster (all_clusters[i]);
1020 all_clusters.release ();
c9e93168
TV
1021}
1022
1023/* Merge cluster C2 into C1. */
1024
1025static void
1026merge_clusters (bb_cluster c1, bb_cluster c2)
1027{
1028 bitmap_ior_into (c1->bbs, c2->bbs);
1029 bitmap_ior_into (c1->preds, c2->preds);
1030}
1031
1032/* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1033 all_clusters, or merge c with existing cluster. */
1034
1035static void
1036set_cluster (basic_block bb1, basic_block bb2)
1037{
1038 basic_block merge_bb, other_bb;
1039 bb_cluster merge, old, c;
1040
1041 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1042 {
1043 c = new_cluster ();
1044 add_bb_to_cluster (c, bb1);
1045 add_bb_to_cluster (c, bb2);
1046 BB_CLUSTER (bb1) = c;
1047 BB_CLUSTER (bb2) = c;
9771b263
DN
1048 c->index = all_clusters.length ();
1049 all_clusters.safe_push (c);
c9e93168
TV
1050 }
1051 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1052 {
1053 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1054 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1055 merge = BB_CLUSTER (merge_bb);
1056 add_bb_to_cluster (merge, other_bb);
1057 BB_CLUSTER (other_bb) = merge;
1058 }
1059 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1060 {
1061 unsigned int i;
1062 bitmap_iterator bi;
1063
1064 old = BB_CLUSTER (bb2);
1065 merge = BB_CLUSTER (bb1);
1066 merge_clusters (merge, old);
1067 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1068 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
9771b263 1069 all_clusters[old->index] = NULL;
c9e93168
TV
1070 update_rep_bb (merge, old->rep_bb);
1071 delete_cluster (old);
1072 }
1073 else
1074 gcc_unreachable ();
1075}
1076
1077/* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1078 gimple_bb (s2) are members of SAME_SUCC. */
1079
1080static bool
1081gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1082{
1083 unsigned int i;
1084 tree lhs1, lhs2;
1085 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1086 tree t1, t2;
1087 bool equal, inv_cond;
1088 enum tree_code code1, code2;
1089
1090 if (gimple_code (s1) != gimple_code (s2))
1091 return false;
1092
1093 switch (gimple_code (s1))
1094 {
1095 case GIMPLE_CALL:
1096 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1097 return false;
1098 if (!gimple_call_same_target_p (s1, s2))
1099 return false;
1100
feca8f5a
TR
1101 /* Eventually, we'll significantly complicate the CFG by adding
1102 back edges to properly model the effects of transaction restart.
1103 For the bulk of optimization this does not matter, but what we
1104 cannot recover from is tail merging blocks between two separate
1105 transactions. Avoid that by making commit not match. */
1106 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1107 return false;
1108
c9e93168
TV
1109 equal = true;
1110 for (i = 0; i < gimple_call_num_args (s1); ++i)
1111 {
1112 t1 = gimple_call_arg (s1, i);
1113 t2 = gimple_call_arg (s2, i);
1114 if (operand_equal_p (t1, t2, 0))
1115 continue;
1116 if (gvn_uses_equal (t1, t2))
1117 continue;
1118 equal = false;
1119 break;
1120 }
e6fa9204
JJ
1121 if (!equal)
1122 return false;
c9e93168
TV
1123
1124 lhs1 = gimple_get_lhs (s1);
1125 lhs2 = gimple_get_lhs (s2);
e6fa9204
JJ
1126 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1127 return true;
1128 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1129 return false;
1130 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1131 return vn_valueize (lhs1) == vn_valueize (lhs2);
1132 return operand_equal_p (lhs1, lhs2, 0);
c9e93168
TV
1133
1134 case GIMPLE_ASSIGN:
1135 lhs1 = gimple_get_lhs (s1);
1136 lhs2 = gimple_get_lhs (s2);
fcfa87ac
RB
1137 if (TREE_CODE (lhs1) != SSA_NAME
1138 && TREE_CODE (lhs2) != SSA_NAME)
1139 return (vn_valueize (gimple_vdef (s1))
1140 == vn_valueize (gimple_vdef (s2)));
1141 else if (TREE_CODE (lhs1) == SSA_NAME
1142 && TREE_CODE (lhs2) == SSA_NAME)
1143 return vn_valueize (lhs1) == vn_valueize (lhs2);
1144 return false;
c9e93168
TV
1145
1146 case GIMPLE_COND:
1147 t1 = gimple_cond_lhs (s1);
1148 t2 = gimple_cond_lhs (s2);
1149 if (!operand_equal_p (t1, t2, 0)
1150 && !gvn_uses_equal (t1, t2))
1151 return false;
1152
1153 t1 = gimple_cond_rhs (s1);
1154 t2 = gimple_cond_rhs (s2);
1155 if (!operand_equal_p (t1, t2, 0)
1156 && !gvn_uses_equal (t1, t2))
1157 return false;
1158
1159 code1 = gimple_expr_code (s1);
1160 code2 = gimple_expr_code (s2);
1161 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1162 != bitmap_bit_p (same_succ->inverse, bb2->index));
1163 if (inv_cond)
1164 {
1165 bool honor_nans
1166 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1167 code2 = invert_tree_comparison (code2, honor_nans);
1168 }
1169 return code1 == code2;
1170
1171 default:
1172 return false;
1173 }
1174}
1175
46301137
TV
1176/* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1177 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1178 processed statements. */
c9e93168
TV
1179
1180static void
46301137
TV
1181gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1182 bool *vuse_escaped)
c9e93168
TV
1183{
1184 gimple stmt;
46301137 1185 tree lvuse;
c9e93168
TV
1186
1187 while (true)
1188 {
1189 if (gsi_end_p (*gsi))
1190 return;
1191 stmt = gsi_stmt (*gsi);
46301137
TV
1192
1193 lvuse = gimple_vuse (stmt);
1194 if (lvuse != NULL_TREE)
1195 {
1196 *vuse = lvuse;
1197 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1198 *vuse_escaped = true;
1199 }
1200
b2b2f160 1201 if (!stmt_local_def (stmt))
c9e93168
TV
1202 return;
1203 gsi_prev_nondebug (gsi);
1204 }
1205}
1206
1207/* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1208 clusters them. */
1209
1210static void
1211find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1212{
1213 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1214 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
46301137
TV
1215 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1216 bool vuse_escaped = false;
c9e93168 1217
46301137
TV
1218 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1219 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
c9e93168
TV
1220
1221 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1222 {
7ec88701
RH
1223 gimple stmt1 = gsi_stmt (gsi1);
1224 gimple stmt2 = gsi_stmt (gsi2);
1225
1226 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1227 return;
1228
1229 // We cannot tail-merge the builtins that end transactions.
1230 // ??? The alternative being unsharing of BBs in the tm_init pass.
1231 if (flag_tm
1232 && is_gimple_call (stmt1)
1233 && (gimple_call_flags (stmt1) & ECF_TM_BUILTIN)
1234 && is_tm_ending_fndecl (gimple_call_fndecl (stmt1)))
c9e93168
TV
1235 return;
1236
1237 gsi_prev_nondebug (&gsi1);
1238 gsi_prev_nondebug (&gsi2);
46301137
TV
1239 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1240 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
c9e93168
TV
1241 }
1242
1243 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1244 return;
1245
46301137
TV
1246 /* If the incoming vuses are not the same, and the vuse escaped into an
1247 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1248 which potentially means the semantics of one of the blocks will be changed.
1249 TODO: make this check more precise. */
1250 if (vuse_escaped && vuse1 != vuse2)
1251 return;
1252
c9e93168
TV
1253 if (dump_file)
1254 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1255 bb1->index, bb2->index);
1256
1257 set_cluster (bb1, bb2);
1258}
1259
1260/* Returns whether for all phis in DEST the phi alternatives for E1 and
1261 E2 are equal. */
1262
1263static bool
1264same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1265{
1266 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1267 gimple_stmt_iterator gsi;
1268
1269 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1270 {
1271 gimple phi = gsi_stmt (gsi);
1272 tree lhs = gimple_phi_result (phi);
1273 tree val1 = gimple_phi_arg_def (phi, n1);
1274 tree val2 = gimple_phi_arg_def (phi, n2);
1275
ea057359 1276 if (virtual_operand_p (lhs))
c9e93168
TV
1277 continue;
1278
1279 if (operand_equal_for_phi_arg_p (val1, val2))
1280 continue;
1281 if (gvn_uses_equal (val1, val2))
1282 continue;
1283
1284 return false;
1285 }
1286
1287 return true;
1288}
1289
1290/* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1291 phi alternatives for BB1 and BB2 are equal. */
1292
1293static bool
1294same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1295{
1296 unsigned int s;
1297 bitmap_iterator bs;
1298 edge e1, e2;
1299 basic_block succ;
1300
1301 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1302 {
1303 succ = BASIC_BLOCK (s);
1304 e1 = find_edge (bb1, succ);
1305 e2 = find_edge (bb2, succ);
1306 if (e1->flags & EDGE_COMPLEX
1307 || e2->flags & EDGE_COMPLEX)
1308 return false;
1309
1310 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1311 the same value. */
1312 if (!same_phi_alternatives_1 (succ, e1, e2))
1313 return false;
1314 }
1315
1316 return true;
1317}
1318
1319/* Return true if BB has non-vop phis. */
1320
1321static bool
1322bb_has_non_vop_phi (basic_block bb)
1323{
1324 gimple_seq phis = phi_nodes (bb);
1325 gimple phi;
1326
1327 if (phis == NULL)
1328 return false;
1329
1330 if (!gimple_seq_singleton_p (phis))
1331 return true;
1332
1333 phi = gimple_seq_first_stmt (phis);
ea057359 1334 return !virtual_operand_p (gimple_phi_result (phi));
c9e93168
TV
1335}
1336
1337/* Returns true if redirecting the incoming edges of FROM to TO maintains the
1338 invariant that uses in FROM are dominates by their defs. */
1339
1340static bool
1341deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1342{
1343 basic_block cd, dep_bb = BB_DEP_BB (to);
1344 edge_iterator ei;
1345 edge e;
1346 bitmap from_preds = BITMAP_ALLOC (NULL);
1347
1348 if (dep_bb == NULL)
1349 return true;
1350
1351 FOR_EACH_EDGE (e, ei, from->preds)
1352 bitmap_set_bit (from_preds, e->src->index);
1353 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1354 BITMAP_FREE (from_preds);
1355
1356 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1357}
1358
1359/* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1360 replacement bb) and vice versa maintains the invariant that uses in the
1361 replacement are dominates by their defs. */
1362
1363static bool
1364deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1365{
1366 if (BB_CLUSTER (bb1) != NULL)
1367 bb1 = BB_CLUSTER (bb1)->rep_bb;
1368
1369 if (BB_CLUSTER (bb2) != NULL)
1370 bb2 = BB_CLUSTER (bb2)->rep_bb;
1371
1372 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1373 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1374}
1375
1376/* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1377
1378static void
1379find_clusters_1 (same_succ same_succ)
1380{
1381 basic_block bb1, bb2;
1382 unsigned int i, j;
1383 bitmap_iterator bi, bj;
1384 int nr_comparisons;
1385 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1386
1387 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1388 {
1389 bb1 = BASIC_BLOCK (i);
1390
1391 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1392 phi-nodes in bb1 and bb2, with the same alternatives for the same
1393 preds. */
1394 if (bb_has_non_vop_phi (bb1))
1395 continue;
1396
1397 nr_comparisons = 0;
1398 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1399 {
1400 bb2 = BASIC_BLOCK (j);
1401
1402 if (bb_has_non_vop_phi (bb2))
1403 continue;
1404
1405 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1406 continue;
1407
1408 /* Limit quadratic behaviour. */
1409 nr_comparisons++;
1410 if (nr_comparisons > max_comparisons)
1411 break;
1412
1413 /* This is a conservative dependency check. We could test more
1414 precise for allowed replacement direction. */
1415 if (!deps_ok_for_redirect (bb1, bb2))
1416 continue;
1417
1418 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1419 continue;
1420
1421 find_duplicate (same_succ, bb1, bb2);
1422 }
1423 }
1424}
1425
1426/* Find clusters of bbs which can be merged. */
1427
1428static void
1429find_clusters (void)
1430{
1431 same_succ same;
1432
9771b263 1433 while (!worklist.is_empty ())
c9e93168 1434 {
9771b263 1435 same = worklist.pop ();
c9e93168
TV
1436 same->in_worklist = false;
1437 if (dump_file && (dump_flags & TDF_DETAILS))
1438 {
1439 fprintf (dump_file, "processing worklist entry\n");
1440 same_succ_print (dump_file, same);
1441 }
1442 find_clusters_1 (same);
1443 }
1444}
1445
c9e93168
TV
1446/* Returns the vop phi of BB, if any. */
1447
1448static gimple
1449vop_phi (basic_block bb)
1450{
1451 gimple stmt;
1452 gimple_stmt_iterator gsi;
1453 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1454 {
1455 stmt = gsi_stmt (gsi);
ea057359 1456 if (! virtual_operand_p (gimple_phi_result (stmt)))
c9e93168
TV
1457 continue;
1458 return stmt;
1459 }
1460 return NULL;
1461}
1462
643400b8 1463/* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
c9e93168
TV
1464
1465static void
643400b8 1466replace_block_by (basic_block bb1, basic_block bb2)
c9e93168
TV
1467{
1468 edge pred_edge;
adc7a812
TJ
1469 edge e1;
1470 edge_iterator ei;
c9e93168 1471 unsigned int i;
643400b8 1472 gimple bb2_phi;
40f73edd 1473
643400b8 1474 bb2_phi = vop_phi (bb2);
c9e93168 1475
643400b8
TV
1476 /* Mark the basic block as deleted. */
1477 mark_basic_block_deleted (bb1);
c9e93168
TV
1478
1479 /* Redirect the incoming edges of bb1 to bb2. */
1480 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1481 {
1482 pred_edge = EDGE_PRED (bb1, i - 1);
1483 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1484 gcc_assert (pred_edge != NULL);
643400b8
TV
1485
1486 if (bb2_phi == NULL)
1487 continue;
1488
1489 /* The phi might have run out of capacity when the redirect added an
1490 argument, which means it could have been replaced. Refresh it. */
1491 bb2_phi = vop_phi (bb2);
1492
1493 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
9e227d60 1494 pred_edge, UNKNOWN_LOCATION);
c9e93168
TV
1495 }
1496
fa405d7b
TV
1497 bb2->frequency += bb1->frequency;
1498 if (bb2->frequency > BB_FREQ_MAX)
1499 bb2->frequency = BB_FREQ_MAX;
55fcb901
CB
1500
1501 bb2->count += bb1->count;
fa405d7b 1502
adc7a812
TJ
1503 /* Merge the outgoing edge counts from bb1 onto bb2. */
1504 FOR_EACH_EDGE (e1, ei, bb1->succs)
1505 {
1506 edge e2;
1507 e2 = find_edge (bb2, e1->dest);
1508 gcc_assert (e2);
1509 e2->count += e1->count;
1510 /* Recompute the probability from the new merged edge count (bb2->count
1511 was updated above). */
1512 e2->probability = GCOV_COMPUTE_SCALE (e2->count, bb2->count);
1513 }
1514
a31895d7 1515 /* Do updates that use bb1, before deleting bb1. */
fcddd80e 1516 release_last_vdef (bb1);
a31895d7
TV
1517 same_succ_flush_bb (bb1);
1518
643400b8 1519 delete_basic_block (bb1);
c9e93168
TV
1520}
1521
1522/* Bbs for which update_debug_stmt need to be called. */
1523
1524static bitmap update_bbs;
1525
1526/* For each cluster in all_clusters, merge all cluster->bbs. Returns
643400b8 1527 number of bbs removed. */
c9e93168
TV
1528
1529static int
643400b8 1530apply_clusters (void)
c9e93168
TV
1531{
1532 basic_block bb1, bb2;
1533 bb_cluster c;
1534 unsigned int i, j;
1535 bitmap_iterator bj;
1536 int nr_bbs_removed = 0;
1537
9771b263 1538 for (i = 0; i < all_clusters.length (); ++i)
c9e93168 1539 {
9771b263 1540 c = all_clusters[i];
c9e93168
TV
1541 if (c == NULL)
1542 continue;
1543
1544 bb2 = c->rep_bb;
1545 bitmap_set_bit (update_bbs, bb2->index);
1546
1547 bitmap_clear_bit (c->bbs, bb2->index);
1548 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1549 {
1550 bb1 = BASIC_BLOCK (j);
1551 bitmap_clear_bit (update_bbs, bb1->index);
1552
643400b8 1553 replace_block_by (bb1, bb2);
c9e93168
TV
1554 nr_bbs_removed++;
1555 }
1556 }
1557
1558 return nr_bbs_removed;
1559}
1560
1561/* Resets debug statement STMT if it has uses that are not dominated by their
1562 defs. */
1563
1564static void
1565update_debug_stmt (gimple stmt)
1566{
1567 use_operand_p use_p;
1568 ssa_op_iter oi;
1569 basic_block bbdef, bbuse;
1570 gimple def_stmt;
1571 tree name;
1572
1573 if (!gimple_debug_bind_p (stmt))
1574 return;
1575
1576 bbuse = gimple_bb (stmt);
1577 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1578 {
1579 name = USE_FROM_PTR (use_p);
1580 gcc_assert (TREE_CODE (name) == SSA_NAME);
1581
1582 def_stmt = SSA_NAME_DEF_STMT (name);
1583 gcc_assert (def_stmt != NULL);
1584
1585 bbdef = gimple_bb (def_stmt);
1586 if (bbdef == NULL || bbuse == bbdef
1587 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1588 continue;
1589
1590 gimple_debug_bind_reset_value (stmt);
1591 update_stmt (stmt);
1592 }
1593}
1594
1595/* Resets all debug statements that have uses that are not
1596 dominated by their defs. */
1597
1598static void
1599update_debug_stmts (void)
1600{
1601 basic_block bb;
1602 bitmap_iterator bi;
1603 unsigned int i;
1604
c9e93168
TV
1605 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1606 {
1607 gimple stmt;
1608 gimple_stmt_iterator gsi;
1609
1610 bb = BASIC_BLOCK (i);
1611 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1612 {
1613 stmt = gsi_stmt (gsi);
1614 if (!is_gimple_debug (stmt))
1615 continue;
1616 update_debug_stmt (stmt);
1617 }
1618 }
1619}
1620
1621/* Runs tail merge optimization. */
1622
1623unsigned int
1624tail_merge_optimize (unsigned int todo)
1625{
1626 int nr_bbs_removed_total = 0;
1627 int nr_bbs_removed;
1628 bool loop_entered = false;
1629 int iteration_nr = 0;
c9e93168
TV
1630 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1631
315bbd2e
TV
1632 if (!flag_tree_tail_merge
1633 || max_iterations == 0
1634 /* We try to be conservative with respect to loop structure, since:
1635 - the cases where tail-merging could both affect loop structure and be
c0d18c6c 1636 beneficial are rare,
315bbd2e
TV
1637 - it prevents us from having to fixup the loops using
1638 loops_state_set (LOOPS_NEED_FIXUP), and
1639 - keeping loop structure may allow us to simplify the pass.
1640 In order to be conservative, we need loop information. In rare cases
1641 (about 7 test-cases in the g++ testsuite) there is none (because
1642 loop_optimizer_finalize has been called before tail-merge, and
1643 PROP_loops is not set), so we bail out. */
1644 || current_loops == NULL)
c9e93168
TV
1645 return 0;
1646
1647 timevar_push (TV_TREE_TAIL_MERGE);
1648
9c370032
RB
1649 if (!dom_info_available_p (CDI_DOMINATORS))
1650 {
1651 /* PRE can leave us with unreachable blocks, remove them now. */
1652 delete_unreachable_blocks ();
1653 calculate_dominance_info (CDI_DOMINATORS);
1654 }
c9e93168
TV
1655 init_worklist ();
1656
9771b263 1657 while (!worklist.is_empty ())
c9e93168
TV
1658 {
1659 if (!loop_entered)
1660 {
1661 loop_entered = true;
1662 alloc_cluster_vectors ();
1663 update_bbs = BITMAP_ALLOC (NULL);
1664 }
1665 else
1666 reset_cluster_vectors ();
1667
1668 iteration_nr++;
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1670 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1671
1672 find_clusters ();
9771b263
DN
1673 gcc_assert (worklist.is_empty ());
1674 if (all_clusters.is_empty ())
c9e93168
TV
1675 break;
1676
643400b8 1677 nr_bbs_removed = apply_clusters ();
c9e93168
TV
1678 nr_bbs_removed_total += nr_bbs_removed;
1679 if (nr_bbs_removed == 0)
1680 break;
1681
643400b8 1682 free_dominance_info (CDI_DOMINATORS);
c9e93168
TV
1683
1684 if (iteration_nr == max_iterations)
1685 break;
1686
643400b8 1687 calculate_dominance_info (CDI_DOMINATORS);
c9e93168
TV
1688 update_worklist ();
1689 }
1690
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 fprintf (dump_file, "htab collision / search: %f\n",
0823efed 1693 same_succ_htab.collisions ());
c9e93168
TV
1694
1695 if (nr_bbs_removed_total > 0)
1696 {
643400b8
TV
1697 if (MAY_HAVE_DEBUG_STMTS)
1698 {
1699 calculate_dominance_info (CDI_DOMINATORS);
1700 update_debug_stmts ();
1701 }
c9e93168
TV
1702
1703 if (dump_file && (dump_flags & TDF_DETAILS))
1704 {
1705 fprintf (dump_file, "Before TODOs.\n");
1706 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1707 }
1708
c634f4ba 1709 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
525174a2 1710 mark_virtual_operands_for_renaming (cfun);
c9e93168
TV
1711 }
1712
1713 delete_worklist ();
1714 if (loop_entered)
1715 {
1716 delete_cluster_vectors ();
1717 BITMAP_FREE (update_bbs);
1718 }
1719
1720 timevar_pop (TV_TREE_TAIL_MERGE);
1721
1722 return todo;
1723}