]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-tail-merge.c
revert: tree.h (phi_arg_d): New field.
[thirdparty/gcc.git] / gcc / tree-ssa-tail-merge.c
1 /* Tail merging for gimple.
2 Copyright (C) 2011, 2012 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
179 SWITCHES
180
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
182
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "timevar.h"
194 #include "bitmap.h"
195 #include "tree-ssa-alias.h"
196 #include "params.h"
197 #include "tree-pretty-print.h"
198 #include "hashtab.h"
199 #include "gimple-pretty-print.h"
200 #include "tree-ssa-sccvn.h"
201 #include "tree-dump.h"
202
203 /* Describes a group of bbs with the same successors. The successor bbs are
204 cached in succs, and the successor edge flags are cached in succ_flags.
205 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
206 it's marked in inverse.
207 Additionally, the hash value for the struct is cached in hashval, and
208 in_worklist indicates whether it's currently part of worklist. */
209
210 struct same_succ_def
211 {
212 /* The bbs that have the same successor bbs. */
213 bitmap bbs;
214 /* The successor bbs. */
215 bitmap succs;
216 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
217 bb. */
218 bitmap inverse;
219 /* The edge flags for each of the successor bbs. */
220 VEC (int, heap) *succ_flags;
221 /* Indicates whether the struct is currently in the worklist. */
222 bool in_worklist;
223 /* The hash value of the struct. */
224 hashval_t hashval;
225 };
226 typedef struct same_succ_def *same_succ;
227 typedef const struct same_succ_def *const_same_succ;
228
229 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
230
231 struct bb_cluster_def
232 {
233 /* The bbs in the cluster. */
234 bitmap bbs;
235 /* The preds of the bbs in the cluster. */
236 bitmap preds;
237 /* Index in all_clusters vector. */
238 int index;
239 /* The bb to replace the cluster with. */
240 basic_block rep_bb;
241 };
242 typedef struct bb_cluster_def *bb_cluster;
243 typedef const struct bb_cluster_def *const_bb_cluster;
244
245 /* Per bb-info. */
246
247 struct aux_bb_info
248 {
249 /* The number of non-debug statements in the bb. */
250 int size;
251 /* The same_succ that this bb is a member of. */
252 same_succ bb_same_succ;
253 /* The cluster that this bb is a member of. */
254 bb_cluster cluster;
255 /* The vop state at the exit of a bb. This is shortlived data, used to
256 communicate data between update_block_by and update_vuses. */
257 tree vop_at_exit;
258 /* The bb that either contains or is dominated by the dependencies of the
259 bb. */
260 basic_block dep_bb;
261 };
262
263 /* Macros to access the fields of struct aux_bb_info. */
264
265 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
266 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
267 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
268 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
269 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
270
271 /* Returns true if the only effect a statement STMT has, is to define locally
272 used SSA_NAMEs. */
273
274 static bool
275 stmt_local_def (gimple stmt)
276 {
277 basic_block bb, def_bb;
278 imm_use_iterator iter;
279 use_operand_p use_p;
280 tree val;
281 def_operand_p def_p;
282
283 if (gimple_has_side_effects (stmt))
284 return false;
285
286 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
287 if (def_p == NULL)
288 return false;
289
290 val = DEF_FROM_PTR (def_p);
291 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
292 return false;
293
294 def_bb = gimple_bb (stmt);
295
296 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
297 {
298 if (is_gimple_debug (USE_STMT (use_p)))
299 continue;
300 bb = gimple_bb (USE_STMT (use_p));
301 if (bb == def_bb)
302 continue;
303
304 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
305 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
306 continue;
307
308 return false;
309 }
310
311 return true;
312 }
313
314 /* Let GSI skip forwards over local defs. */
315
316 static void
317 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
318 {
319 gimple stmt;
320
321 while (true)
322 {
323 if (gsi_end_p (*gsi))
324 return;
325 stmt = gsi_stmt (*gsi);
326 if (!stmt_local_def (stmt))
327 return;
328 gsi_next_nondebug (gsi);
329 }
330 }
331
332 /* VAL1 and VAL2 are either:
333 - uses in BB1 and BB2, or
334 - phi alternatives for BB1 and BB2.
335 Return true if the uses have the same gvn value. */
336
337 static bool
338 gvn_uses_equal (tree val1, tree val2)
339 {
340 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
341
342 if (val1 == val2)
343 return true;
344
345 if (vn_valueize (val1) != vn_valueize (val2))
346 return false;
347
348 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
349 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
350 }
351
352 /* Prints E to FILE. */
353
354 static void
355 same_succ_print (FILE *file, const same_succ e)
356 {
357 unsigned int i;
358 bitmap_print (file, e->bbs, "bbs:", "\n");
359 bitmap_print (file, e->succs, "succs:", "\n");
360 bitmap_print (file, e->inverse, "inverse:", "\n");
361 fprintf (file, "flags:");
362 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
363 fprintf (file, " %x", VEC_index (int, e->succ_flags, i));
364 fprintf (file, "\n");
365 }
366
367 /* Prints same_succ VE to VFILE. */
368
369 static int
370 same_succ_print_traverse (void **ve, void *vfile)
371 {
372 const same_succ e = *((const same_succ *)ve);
373 FILE *file = ((FILE*)vfile);
374 same_succ_print (file, e);
375 return 1;
376 }
377
378 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
379
380 static void
381 update_dep_bb (basic_block use_bb, tree val)
382 {
383 basic_block dep_bb;
384
385 /* Not a dep. */
386 if (TREE_CODE (val) != SSA_NAME)
387 return;
388
389 /* Skip use of global def. */
390 if (SSA_NAME_IS_DEFAULT_DEF (val))
391 return;
392
393 /* Skip use of local def. */
394 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
395 if (dep_bb == use_bb)
396 return;
397
398 if (BB_DEP_BB (use_bb) == NULL
399 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
400 BB_DEP_BB (use_bb) = dep_bb;
401 }
402
403 /* Update BB_DEP_BB, given the dependencies in STMT. */
404
405 static void
406 stmt_update_dep_bb (gimple stmt)
407 {
408 ssa_op_iter iter;
409 use_operand_p use;
410
411 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
412 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
413 }
414
415 /* Calculates hash value for same_succ VE. */
416
417 static hashval_t
418 same_succ_hash (const void *ve)
419 {
420 const_same_succ e = (const_same_succ)ve;
421 hashval_t hashval = bitmap_hash (e->succs);
422 int flags;
423 unsigned int i;
424 unsigned int first = bitmap_first_set_bit (e->bbs);
425 basic_block bb = BASIC_BLOCK (first);
426 int size = 0;
427 gimple_stmt_iterator gsi;
428 gimple stmt;
429 tree arg;
430 unsigned int s;
431 bitmap_iterator bs;
432
433 for (gsi = gsi_start_nondebug_bb (bb);
434 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
435 {
436 stmt = gsi_stmt (gsi);
437 stmt_update_dep_bb (stmt);
438 if (stmt_local_def (stmt))
439 continue;
440 size++;
441
442 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
443 if (is_gimple_assign (stmt))
444 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
445 hashval);
446 if (!is_gimple_call (stmt))
447 continue;
448 if (gimple_call_internal_p (stmt))
449 hashval = iterative_hash_hashval_t
450 ((hashval_t) gimple_call_internal_fn (stmt), hashval);
451 else
452 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
453 for (i = 0; i < gimple_call_num_args (stmt); i++)
454 {
455 arg = gimple_call_arg (stmt, i);
456 arg = vn_valueize (arg);
457 hashval = iterative_hash_expr (arg, hashval);
458 }
459 }
460
461 hashval = iterative_hash_hashval_t (size, hashval);
462 BB_SIZE (bb) = size;
463
464 for (i = 0; i < VEC_length (int, e->succ_flags); ++i)
465 {
466 flags = VEC_index (int, e->succ_flags, i);
467 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
468 hashval = iterative_hash_hashval_t (flags, hashval);
469 }
470
471 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
472 {
473 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
474 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
475 gsi_next (&gsi))
476 {
477 gimple phi = gsi_stmt (gsi);
478 tree lhs = gimple_phi_result (phi);
479 tree val = gimple_phi_arg_def (phi, n);
480
481 if (!is_gimple_reg (lhs))
482 continue;
483 update_dep_bb (bb, val);
484 }
485 }
486
487 return hashval;
488 }
489
490 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
491 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
492 the other edge flags. */
493
494 static bool
495 inverse_flags (const_same_succ e1, const_same_succ e2)
496 {
497 int f1a, f1b, f2a, f2b;
498 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
499
500 if (VEC_length (int, e1->succ_flags) != 2)
501 return false;
502
503 f1a = VEC_index (int, e1->succ_flags, 0);
504 f1b = VEC_index (int, e1->succ_flags, 1);
505 f2a = VEC_index (int, e2->succ_flags, 0);
506 f2b = VEC_index (int, e2->succ_flags, 1);
507
508 if (f1a == f2a && f1b == f2b)
509 return false;
510
511 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
512 }
513
514 /* Compares SAME_SUCCs VE1 and VE2. */
515
516 static int
517 same_succ_equal (const void *ve1, const void *ve2)
518 {
519 const_same_succ e1 = (const_same_succ)ve1;
520 const_same_succ e2 = (const_same_succ)ve2;
521 unsigned int i, first1, first2;
522 gimple_stmt_iterator gsi1, gsi2;
523 gimple s1, s2;
524 basic_block bb1, bb2;
525
526 if (e1->hashval != e2->hashval)
527 return 0;
528
529 if (VEC_length (int, e1->succ_flags) != VEC_length (int, e2->succ_flags))
530 return 0;
531
532 if (!bitmap_equal_p (e1->succs, e2->succs))
533 return 0;
534
535 if (!inverse_flags (e1, e2))
536 {
537 for (i = 0; i < VEC_length (int, e1->succ_flags); ++i)
538 if (VEC_index (int, e1->succ_flags, i)
539 != VEC_index (int, e1->succ_flags, i))
540 return 0;
541 }
542
543 first1 = bitmap_first_set_bit (e1->bbs);
544 first2 = bitmap_first_set_bit (e2->bbs);
545
546 bb1 = BASIC_BLOCK (first1);
547 bb2 = BASIC_BLOCK (first2);
548
549 if (BB_SIZE (bb1) != BB_SIZE (bb2))
550 return 0;
551
552 gsi1 = gsi_start_nondebug_bb (bb1);
553 gsi2 = gsi_start_nondebug_bb (bb2);
554 gsi_advance_fw_nondebug_nonlocal (&gsi1);
555 gsi_advance_fw_nondebug_nonlocal (&gsi2);
556 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
557 {
558 s1 = gsi_stmt (gsi1);
559 s2 = gsi_stmt (gsi2);
560 if (gimple_code (s1) != gimple_code (s2))
561 return 0;
562 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
563 return 0;
564 gsi_next_nondebug (&gsi1);
565 gsi_next_nondebug (&gsi2);
566 gsi_advance_fw_nondebug_nonlocal (&gsi1);
567 gsi_advance_fw_nondebug_nonlocal (&gsi2);
568 }
569
570 return 1;
571 }
572
573 /* Alloc and init a new SAME_SUCC. */
574
575 static same_succ
576 same_succ_alloc (void)
577 {
578 same_succ same = XNEW (struct same_succ_def);
579
580 same->bbs = BITMAP_ALLOC (NULL);
581 same->succs = BITMAP_ALLOC (NULL);
582 same->inverse = BITMAP_ALLOC (NULL);
583 same->succ_flags = VEC_alloc (int, heap, 10);
584 same->in_worklist = false;
585
586 return same;
587 }
588
589 /* Delete same_succ VE. */
590
591 static void
592 same_succ_delete (void *ve)
593 {
594 same_succ e = (same_succ)ve;
595
596 BITMAP_FREE (e->bbs);
597 BITMAP_FREE (e->succs);
598 BITMAP_FREE (e->inverse);
599 VEC_free (int, heap, e->succ_flags);
600
601 XDELETE (ve);
602 }
603
604 /* Reset same_succ SAME. */
605
606 static void
607 same_succ_reset (same_succ same)
608 {
609 bitmap_clear (same->bbs);
610 bitmap_clear (same->succs);
611 bitmap_clear (same->inverse);
612 VEC_truncate (int, same->succ_flags, 0);
613 }
614
615 /* Hash table with all same_succ entries. */
616
617 static htab_t same_succ_htab;
618
619 /* Array that is used to store the edge flags for a successor. */
620
621 static int *same_succ_edge_flags;
622
623 /* Bitmap that is used to mark bbs that are recently deleted. */
624
625 static bitmap deleted_bbs;
626
627 /* Bitmap that is used to mark predecessors of bbs that are
628 deleted. */
629
630 static bitmap deleted_bb_preds;
631
632 /* Prints same_succ_htab to stderr. */
633
634 extern void debug_same_succ (void);
635 DEBUG_FUNCTION void
636 debug_same_succ ( void)
637 {
638 htab_traverse (same_succ_htab, same_succ_print_traverse, stderr);
639 }
640
641 DEF_VEC_P (same_succ);
642 DEF_VEC_ALLOC_P (same_succ, heap);
643
644 /* Vector of bbs to process. */
645
646 static VEC (same_succ, heap) *worklist;
647
648 /* Prints worklist to FILE. */
649
650 static void
651 print_worklist (FILE *file)
652 {
653 unsigned int i;
654 for (i = 0; i < VEC_length (same_succ, worklist); ++i)
655 same_succ_print (file, VEC_index (same_succ, worklist, i));
656 }
657
658 /* Adds SAME to worklist. */
659
660 static void
661 add_to_worklist (same_succ same)
662 {
663 if (same->in_worklist)
664 return;
665
666 if (bitmap_count_bits (same->bbs) < 2)
667 return;
668
669 same->in_worklist = true;
670 VEC_safe_push (same_succ, heap, worklist, same);
671 }
672
673 /* Add BB to same_succ_htab. */
674
675 static void
676 find_same_succ_bb (basic_block bb, same_succ *same_p)
677 {
678 unsigned int j;
679 bitmap_iterator bj;
680 same_succ same = *same_p;
681 same_succ *slot;
682 edge_iterator ei;
683 edge e;
684
685 if (bb == NULL)
686 return;
687 bitmap_set_bit (same->bbs, bb->index);
688 FOR_EACH_EDGE (e, ei, bb->succs)
689 {
690 int index = e->dest->index;
691 bitmap_set_bit (same->succs, index);
692 same_succ_edge_flags[index] = e->flags;
693 }
694 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
695 VEC_safe_push (int, heap, same->succ_flags, same_succ_edge_flags[j]);
696
697 same->hashval = same_succ_hash (same);
698
699 slot = (same_succ *) htab_find_slot_with_hash (same_succ_htab, same,
700 same->hashval, INSERT);
701 if (*slot == NULL)
702 {
703 *slot = same;
704 BB_SAME_SUCC (bb) = same;
705 add_to_worklist (same);
706 *same_p = NULL;
707 }
708 else
709 {
710 bitmap_set_bit ((*slot)->bbs, bb->index);
711 BB_SAME_SUCC (bb) = *slot;
712 add_to_worklist (*slot);
713 if (inverse_flags (same, *slot))
714 bitmap_set_bit ((*slot)->inverse, bb->index);
715 same_succ_reset (same);
716 }
717 }
718
719 /* Find bbs with same successors. */
720
721 static void
722 find_same_succ (void)
723 {
724 same_succ same = same_succ_alloc ();
725 basic_block bb;
726
727 FOR_EACH_BB (bb)
728 {
729 find_same_succ_bb (bb, &same);
730 if (same == NULL)
731 same = same_succ_alloc ();
732 }
733
734 same_succ_delete (same);
735 }
736
737 /* Initializes worklist administration. */
738
739 static void
740 init_worklist (void)
741 {
742 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
743 same_succ_htab
744 = htab_create (n_basic_blocks, same_succ_hash, same_succ_equal,
745 same_succ_delete);
746 same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
747 deleted_bbs = BITMAP_ALLOC (NULL);
748 deleted_bb_preds = BITMAP_ALLOC (NULL);
749 worklist = VEC_alloc (same_succ, heap, n_basic_blocks);
750 find_same_succ ();
751
752 if (dump_file && (dump_flags & TDF_DETAILS))
753 {
754 fprintf (dump_file, "initial worklist:\n");
755 print_worklist (dump_file);
756 }
757 }
758
759 /* Deletes worklist administration. */
760
761 static void
762 delete_worklist (void)
763 {
764 free_aux_for_blocks ();
765 htab_delete (same_succ_htab);
766 same_succ_htab = NULL;
767 XDELETEVEC (same_succ_edge_flags);
768 same_succ_edge_flags = NULL;
769 BITMAP_FREE (deleted_bbs);
770 BITMAP_FREE (deleted_bb_preds);
771 VEC_free (same_succ, heap, worklist);
772 }
773
774 /* Mark BB as deleted, and mark its predecessors. */
775
776 static void
777 mark_basic_block_deleted (basic_block bb)
778 {
779 edge e;
780 edge_iterator ei;
781
782 bitmap_set_bit (deleted_bbs, bb->index);
783
784 FOR_EACH_EDGE (e, ei, bb->preds)
785 bitmap_set_bit (deleted_bb_preds, e->src->index);
786 }
787
788 /* Removes BB from its corresponding same_succ. */
789
790 static void
791 same_succ_flush_bb (basic_block bb)
792 {
793 same_succ same = BB_SAME_SUCC (bb);
794 BB_SAME_SUCC (bb) = NULL;
795 if (bitmap_single_bit_set_p (same->bbs))
796 htab_remove_elt_with_hash (same_succ_htab, same, same->hashval);
797 else
798 bitmap_clear_bit (same->bbs, bb->index);
799 }
800
801 /* Removes all bbs in BBS from their corresponding same_succ. */
802
803 static void
804 same_succ_flush_bbs (bitmap bbs)
805 {
806 unsigned int i;
807 bitmap_iterator bi;
808
809 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
810 same_succ_flush_bb (BASIC_BLOCK (i));
811 }
812
813 /* Release the last vdef in BB, either normal or phi result. */
814
815 static void
816 release_last_vdef (basic_block bb)
817 {
818 gimple_stmt_iterator i;
819
820 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
821 {
822 gimple stmt = gsi_stmt (i);
823 if (gimple_vdef (stmt) == NULL_TREE)
824 continue;
825
826 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
827 return;
828 }
829
830 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
831 {
832 gimple phi = gsi_stmt (i);
833 tree res = gimple_phi_result (phi);
834
835 if (is_gimple_reg (res))
836 continue;
837
838 mark_virtual_phi_result_for_renaming (phi);
839 return;
840 }
841
842 }
843
844 /* For deleted_bb_preds, find bbs with same successors. */
845
846 static void
847 update_worklist (void)
848 {
849 unsigned int i;
850 bitmap_iterator bi;
851 basic_block bb;
852 same_succ same;
853
854 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
855 bitmap_clear (deleted_bbs);
856
857 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
858 same_succ_flush_bbs (deleted_bb_preds);
859
860 same = same_succ_alloc ();
861 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
862 {
863 bb = BASIC_BLOCK (i);
864 gcc_assert (bb != NULL);
865 find_same_succ_bb (bb, &same);
866 if (same == NULL)
867 same = same_succ_alloc ();
868 }
869 same_succ_delete (same);
870 bitmap_clear (deleted_bb_preds);
871 }
872
873 /* Prints cluster C to FILE. */
874
875 static void
876 print_cluster (FILE *file, bb_cluster c)
877 {
878 if (c == NULL)
879 return;
880 bitmap_print (file, c->bbs, "bbs:", "\n");
881 bitmap_print (file, c->preds, "preds:", "\n");
882 }
883
884 /* Prints cluster C to stderr. */
885
886 extern void debug_cluster (bb_cluster);
887 DEBUG_FUNCTION void
888 debug_cluster (bb_cluster c)
889 {
890 print_cluster (stderr, c);
891 }
892
893 /* Update C->rep_bb, given that BB is added to the cluster. */
894
895 static void
896 update_rep_bb (bb_cluster c, basic_block bb)
897 {
898 /* Initial. */
899 if (c->rep_bb == NULL)
900 {
901 c->rep_bb = bb;
902 return;
903 }
904
905 /* Current needs no deps, keep it. */
906 if (BB_DEP_BB (c->rep_bb) == NULL)
907 return;
908
909 /* Bb needs no deps, change rep_bb. */
910 if (BB_DEP_BB (bb) == NULL)
911 {
912 c->rep_bb = bb;
913 return;
914 }
915
916 /* Bb needs last deps earlier than current, change rep_bb. A potential
917 problem with this, is that the first deps might also be earlier, which
918 would mean we prefer longer lifetimes for the deps. To be able to check
919 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
920 BB_DEP_BB, which is really BB_LAST_DEP_BB.
921 The benefit of choosing the bb with last deps earlier, is that it can
922 potentially be used as replacement for more bbs. */
923 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
924 c->rep_bb = bb;
925 }
926
927 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
928
929 static void
930 add_bb_to_cluster (bb_cluster c, basic_block bb)
931 {
932 edge e;
933 edge_iterator ei;
934
935 bitmap_set_bit (c->bbs, bb->index);
936
937 FOR_EACH_EDGE (e, ei, bb->preds)
938 bitmap_set_bit (c->preds, e->src->index);
939
940 update_rep_bb (c, bb);
941 }
942
943 /* Allocate and init new cluster. */
944
945 static bb_cluster
946 new_cluster (void)
947 {
948 bb_cluster c;
949 c = XCNEW (struct bb_cluster_def);
950 c->bbs = BITMAP_ALLOC (NULL);
951 c->preds = BITMAP_ALLOC (NULL);
952 c->rep_bb = NULL;
953 return c;
954 }
955
956 /* Delete clusters. */
957
958 static void
959 delete_cluster (bb_cluster c)
960 {
961 if (c == NULL)
962 return;
963 BITMAP_FREE (c->bbs);
964 BITMAP_FREE (c->preds);
965 XDELETE (c);
966 }
967
968 DEF_VEC_P (bb_cluster);
969 DEF_VEC_ALLOC_P (bb_cluster, heap);
970
971 /* Array that contains all clusters. */
972
973 static VEC (bb_cluster, heap) *all_clusters;
974
975 /* Allocate all cluster vectors. */
976
977 static void
978 alloc_cluster_vectors (void)
979 {
980 all_clusters = VEC_alloc (bb_cluster, heap, n_basic_blocks);
981 }
982
983 /* Reset all cluster vectors. */
984
985 static void
986 reset_cluster_vectors (void)
987 {
988 unsigned int i;
989 basic_block bb;
990 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
991 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
992 VEC_truncate (bb_cluster, all_clusters, 0);
993 FOR_EACH_BB (bb)
994 BB_CLUSTER (bb) = NULL;
995 }
996
997 /* Delete all cluster vectors. */
998
999 static void
1000 delete_cluster_vectors (void)
1001 {
1002 unsigned int i;
1003 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1004 delete_cluster (VEC_index (bb_cluster, all_clusters, i));
1005 VEC_free (bb_cluster, heap, all_clusters);
1006 }
1007
1008 /* Merge cluster C2 into C1. */
1009
1010 static void
1011 merge_clusters (bb_cluster c1, bb_cluster c2)
1012 {
1013 bitmap_ior_into (c1->bbs, c2->bbs);
1014 bitmap_ior_into (c1->preds, c2->preds);
1015 }
1016
1017 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1018 all_clusters, or merge c with existing cluster. */
1019
1020 static void
1021 set_cluster (basic_block bb1, basic_block bb2)
1022 {
1023 basic_block merge_bb, other_bb;
1024 bb_cluster merge, old, c;
1025
1026 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1027 {
1028 c = new_cluster ();
1029 add_bb_to_cluster (c, bb1);
1030 add_bb_to_cluster (c, bb2);
1031 BB_CLUSTER (bb1) = c;
1032 BB_CLUSTER (bb2) = c;
1033 c->index = VEC_length (bb_cluster, all_clusters);
1034 VEC_safe_push (bb_cluster, heap, all_clusters, c);
1035 }
1036 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1037 {
1038 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1039 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1040 merge = BB_CLUSTER (merge_bb);
1041 add_bb_to_cluster (merge, other_bb);
1042 BB_CLUSTER (other_bb) = merge;
1043 }
1044 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1045 {
1046 unsigned int i;
1047 bitmap_iterator bi;
1048
1049 old = BB_CLUSTER (bb2);
1050 merge = BB_CLUSTER (bb1);
1051 merge_clusters (merge, old);
1052 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1053 BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1054 VEC_replace (bb_cluster, all_clusters, old->index, NULL);
1055 update_rep_bb (merge, old->rep_bb);
1056 delete_cluster (old);
1057 }
1058 else
1059 gcc_unreachable ();
1060 }
1061
1062 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1063 gimple_bb (s2) are members of SAME_SUCC. */
1064
1065 static bool
1066 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1067 {
1068 unsigned int i;
1069 tree lhs1, lhs2;
1070 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1071 tree t1, t2;
1072 bool equal, inv_cond;
1073 enum tree_code code1, code2;
1074
1075 if (gimple_code (s1) != gimple_code (s2))
1076 return false;
1077
1078 switch (gimple_code (s1))
1079 {
1080 case GIMPLE_CALL:
1081 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1082 return false;
1083 if (!gimple_call_same_target_p (s1, s2))
1084 return false;
1085
1086 /* Eventually, we'll significantly complicate the CFG by adding
1087 back edges to properly model the effects of transaction restart.
1088 For the bulk of optimization this does not matter, but what we
1089 cannot recover from is tail merging blocks between two separate
1090 transactions. Avoid that by making commit not match. */
1091 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1092 return false;
1093
1094 equal = true;
1095 for (i = 0; i < gimple_call_num_args (s1); ++i)
1096 {
1097 t1 = gimple_call_arg (s1, i);
1098 t2 = gimple_call_arg (s2, i);
1099 if (operand_equal_p (t1, t2, 0))
1100 continue;
1101 if (gvn_uses_equal (t1, t2))
1102 continue;
1103 equal = false;
1104 break;
1105 }
1106 if (!equal)
1107 return false;
1108
1109 lhs1 = gimple_get_lhs (s1);
1110 lhs2 = gimple_get_lhs (s2);
1111 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1112 return true;
1113 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1114 return false;
1115 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1116 return vn_valueize (lhs1) == vn_valueize (lhs2);
1117 return operand_equal_p (lhs1, lhs2, 0);
1118
1119 case GIMPLE_ASSIGN:
1120 lhs1 = gimple_get_lhs (s1);
1121 lhs2 = gimple_get_lhs (s2);
1122 if (gimple_vdef (s1))
1123 {
1124 if (vn_valueize (gimple_vdef (s1)) != vn_valueize (gimple_vdef (s2)))
1125 return false;
1126 if (TREE_CODE (lhs1) != SSA_NAME
1127 && TREE_CODE (lhs2) != SSA_NAME)
1128 return true;
1129 }
1130 return (TREE_CODE (lhs1) == SSA_NAME
1131 && TREE_CODE (lhs2) == SSA_NAME
1132 && vn_valueize (lhs1) == vn_valueize (lhs2));
1133
1134 case GIMPLE_COND:
1135 t1 = gimple_cond_lhs (s1);
1136 t2 = gimple_cond_lhs (s2);
1137 if (!operand_equal_p (t1, t2, 0)
1138 && !gvn_uses_equal (t1, t2))
1139 return false;
1140
1141 t1 = gimple_cond_rhs (s1);
1142 t2 = gimple_cond_rhs (s2);
1143 if (!operand_equal_p (t1, t2, 0)
1144 && !gvn_uses_equal (t1, t2))
1145 return false;
1146
1147 code1 = gimple_expr_code (s1);
1148 code2 = gimple_expr_code (s2);
1149 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1150 != bitmap_bit_p (same_succ->inverse, bb2->index));
1151 if (inv_cond)
1152 {
1153 bool honor_nans
1154 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1155 code2 = invert_tree_comparison (code2, honor_nans);
1156 }
1157 return code1 == code2;
1158
1159 default:
1160 return false;
1161 }
1162 }
1163
1164 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1165 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1166 processed statements. */
1167
1168 static void
1169 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1170 bool *vuse_escaped)
1171 {
1172 gimple stmt;
1173 tree lvuse;
1174
1175 while (true)
1176 {
1177 if (gsi_end_p (*gsi))
1178 return;
1179 stmt = gsi_stmt (*gsi);
1180
1181 lvuse = gimple_vuse (stmt);
1182 if (lvuse != NULL_TREE)
1183 {
1184 *vuse = lvuse;
1185 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1186 *vuse_escaped = true;
1187 }
1188
1189 if (!stmt_local_def (stmt))
1190 return;
1191 gsi_prev_nondebug (gsi);
1192 }
1193 }
1194
1195 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1196 clusters them. */
1197
1198 static void
1199 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1200 {
1201 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1202 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1203 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1204 bool vuse_escaped = false;
1205
1206 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1207 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1208
1209 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1210 {
1211 if (!gimple_equal_p (same_succ, gsi_stmt (gsi1), gsi_stmt (gsi2)))
1212 return;
1213
1214 gsi_prev_nondebug (&gsi1);
1215 gsi_prev_nondebug (&gsi2);
1216 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1217 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1218 }
1219
1220 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1221 return;
1222
1223 /* If the incoming vuses are not the same, and the vuse escaped into an
1224 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1225 which potentially means the semantics of one of the blocks will be changed.
1226 TODO: make this check more precise. */
1227 if (vuse_escaped && vuse1 != vuse2)
1228 return;
1229
1230 if (dump_file)
1231 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1232 bb1->index, bb2->index);
1233
1234 set_cluster (bb1, bb2);
1235 }
1236
1237 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1238 E2 are equal. */
1239
1240 static bool
1241 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1242 {
1243 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1244 gimple_stmt_iterator gsi;
1245
1246 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1247 {
1248 gimple phi = gsi_stmt (gsi);
1249 tree lhs = gimple_phi_result (phi);
1250 tree val1 = gimple_phi_arg_def (phi, n1);
1251 tree val2 = gimple_phi_arg_def (phi, n2);
1252
1253 if (!is_gimple_reg (lhs))
1254 continue;
1255
1256 if (operand_equal_for_phi_arg_p (val1, val2))
1257 continue;
1258 if (gvn_uses_equal (val1, val2))
1259 continue;
1260
1261 return false;
1262 }
1263
1264 return true;
1265 }
1266
1267 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1268 phi alternatives for BB1 and BB2 are equal. */
1269
1270 static bool
1271 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1272 {
1273 unsigned int s;
1274 bitmap_iterator bs;
1275 edge e1, e2;
1276 basic_block succ;
1277
1278 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1279 {
1280 succ = BASIC_BLOCK (s);
1281 e1 = find_edge (bb1, succ);
1282 e2 = find_edge (bb2, succ);
1283 if (e1->flags & EDGE_COMPLEX
1284 || e2->flags & EDGE_COMPLEX)
1285 return false;
1286
1287 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1288 the same value. */
1289 if (!same_phi_alternatives_1 (succ, e1, e2))
1290 return false;
1291 }
1292
1293 return true;
1294 }
1295
1296 /* Return true if BB has non-vop phis. */
1297
1298 static bool
1299 bb_has_non_vop_phi (basic_block bb)
1300 {
1301 gimple_seq phis = phi_nodes (bb);
1302 gimple phi;
1303
1304 if (phis == NULL)
1305 return false;
1306
1307 if (!gimple_seq_singleton_p (phis))
1308 return true;
1309
1310 phi = gimple_seq_first_stmt (phis);
1311 return is_gimple_reg (gimple_phi_result (phi));
1312 }
1313
1314 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1315 invariant that uses in FROM are dominates by their defs. */
1316
1317 static bool
1318 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1319 {
1320 basic_block cd, dep_bb = BB_DEP_BB (to);
1321 edge_iterator ei;
1322 edge e;
1323 bitmap from_preds = BITMAP_ALLOC (NULL);
1324
1325 if (dep_bb == NULL)
1326 return true;
1327
1328 FOR_EACH_EDGE (e, ei, from->preds)
1329 bitmap_set_bit (from_preds, e->src->index);
1330 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1331 BITMAP_FREE (from_preds);
1332
1333 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1334 }
1335
1336 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1337 replacement bb) and vice versa maintains the invariant that uses in the
1338 replacement are dominates by their defs. */
1339
1340 static bool
1341 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1342 {
1343 if (BB_CLUSTER (bb1) != NULL)
1344 bb1 = BB_CLUSTER (bb1)->rep_bb;
1345
1346 if (BB_CLUSTER (bb2) != NULL)
1347 bb2 = BB_CLUSTER (bb2)->rep_bb;
1348
1349 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1350 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1351 }
1352
1353 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1354
1355 static void
1356 find_clusters_1 (same_succ same_succ)
1357 {
1358 basic_block bb1, bb2;
1359 unsigned int i, j;
1360 bitmap_iterator bi, bj;
1361 int nr_comparisons;
1362 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1363
1364 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1365 {
1366 bb1 = BASIC_BLOCK (i);
1367
1368 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1369 phi-nodes in bb1 and bb2, with the same alternatives for the same
1370 preds. */
1371 if (bb_has_non_vop_phi (bb1))
1372 continue;
1373
1374 nr_comparisons = 0;
1375 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1376 {
1377 bb2 = BASIC_BLOCK (j);
1378
1379 if (bb_has_non_vop_phi (bb2))
1380 continue;
1381
1382 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1383 continue;
1384
1385 /* Limit quadratic behaviour. */
1386 nr_comparisons++;
1387 if (nr_comparisons > max_comparisons)
1388 break;
1389
1390 /* This is a conservative dependency check. We could test more
1391 precise for allowed replacement direction. */
1392 if (!deps_ok_for_redirect (bb1, bb2))
1393 continue;
1394
1395 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1396 continue;
1397
1398 find_duplicate (same_succ, bb1, bb2);
1399 }
1400 }
1401 }
1402
1403 /* Find clusters of bbs which can be merged. */
1404
1405 static void
1406 find_clusters (void)
1407 {
1408 same_succ same;
1409
1410 while (!VEC_empty (same_succ, worklist))
1411 {
1412 same = VEC_pop (same_succ, worklist);
1413 same->in_worklist = false;
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1415 {
1416 fprintf (dump_file, "processing worklist entry\n");
1417 same_succ_print (dump_file, same);
1418 }
1419 find_clusters_1 (same);
1420 }
1421 }
1422
1423 /* Returns the vop phi of BB, if any. */
1424
1425 static gimple
1426 vop_phi (basic_block bb)
1427 {
1428 gimple stmt;
1429 gimple_stmt_iterator gsi;
1430 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1431 {
1432 stmt = gsi_stmt (gsi);
1433 if (is_gimple_reg (gimple_phi_result (stmt)))
1434 continue;
1435 return stmt;
1436 }
1437 return NULL;
1438 }
1439
1440 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1441
1442 static void
1443 replace_block_by (basic_block bb1, basic_block bb2)
1444 {
1445 edge pred_edge;
1446 unsigned int i;
1447 gimple bb2_phi;
1448
1449 bb2_phi = vop_phi (bb2);
1450
1451 /* Mark the basic block as deleted. */
1452 mark_basic_block_deleted (bb1);
1453
1454 /* Redirect the incoming edges of bb1 to bb2. */
1455 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1456 {
1457 pred_edge = EDGE_PRED (bb1, i - 1);
1458 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1459 gcc_assert (pred_edge != NULL);
1460
1461 if (bb2_phi == NULL)
1462 continue;
1463
1464 /* The phi might have run out of capacity when the redirect added an
1465 argument, which means it could have been replaced. Refresh it. */
1466 bb2_phi = vop_phi (bb2);
1467
1468 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1469 pred_edge, UNKNOWN_LOCATION);
1470 }
1471
1472 bb2->frequency += bb1->frequency;
1473 if (bb2->frequency > BB_FREQ_MAX)
1474 bb2->frequency = BB_FREQ_MAX;
1475 bb1->frequency = 0;
1476
1477 /* Do updates that use bb1, before deleting bb1. */
1478 release_last_vdef (bb1);
1479 same_succ_flush_bb (bb1);
1480
1481 delete_basic_block (bb1);
1482 }
1483
1484 /* Bbs for which update_debug_stmt need to be called. */
1485
1486 static bitmap update_bbs;
1487
1488 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1489 number of bbs removed. */
1490
1491 static int
1492 apply_clusters (void)
1493 {
1494 basic_block bb1, bb2;
1495 bb_cluster c;
1496 unsigned int i, j;
1497 bitmap_iterator bj;
1498 int nr_bbs_removed = 0;
1499
1500 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i)
1501 {
1502 c = VEC_index (bb_cluster, all_clusters, i);
1503 if (c == NULL)
1504 continue;
1505
1506 bb2 = c->rep_bb;
1507 bitmap_set_bit (update_bbs, bb2->index);
1508
1509 bitmap_clear_bit (c->bbs, bb2->index);
1510 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1511 {
1512 bb1 = BASIC_BLOCK (j);
1513 bitmap_clear_bit (update_bbs, bb1->index);
1514
1515 replace_block_by (bb1, bb2);
1516 nr_bbs_removed++;
1517 }
1518 }
1519
1520 return nr_bbs_removed;
1521 }
1522
1523 /* Resets debug statement STMT if it has uses that are not dominated by their
1524 defs. */
1525
1526 static void
1527 update_debug_stmt (gimple stmt)
1528 {
1529 use_operand_p use_p;
1530 ssa_op_iter oi;
1531 basic_block bbdef, bbuse;
1532 gimple def_stmt;
1533 tree name;
1534
1535 if (!gimple_debug_bind_p (stmt))
1536 return;
1537
1538 bbuse = gimple_bb (stmt);
1539 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1540 {
1541 name = USE_FROM_PTR (use_p);
1542 gcc_assert (TREE_CODE (name) == SSA_NAME);
1543
1544 def_stmt = SSA_NAME_DEF_STMT (name);
1545 gcc_assert (def_stmt != NULL);
1546
1547 bbdef = gimple_bb (def_stmt);
1548 if (bbdef == NULL || bbuse == bbdef
1549 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1550 continue;
1551
1552 gimple_debug_bind_reset_value (stmt);
1553 update_stmt (stmt);
1554 }
1555 }
1556
1557 /* Resets all debug statements that have uses that are not
1558 dominated by their defs. */
1559
1560 static void
1561 update_debug_stmts (void)
1562 {
1563 basic_block bb;
1564 bitmap_iterator bi;
1565 unsigned int i;
1566
1567 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1568 {
1569 gimple stmt;
1570 gimple_stmt_iterator gsi;
1571
1572 bb = BASIC_BLOCK (i);
1573 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1574 {
1575 stmt = gsi_stmt (gsi);
1576 if (!is_gimple_debug (stmt))
1577 continue;
1578 update_debug_stmt (stmt);
1579 }
1580 }
1581 }
1582
1583 /* Runs tail merge optimization. */
1584
1585 unsigned int
1586 tail_merge_optimize (unsigned int todo)
1587 {
1588 int nr_bbs_removed_total = 0;
1589 int nr_bbs_removed;
1590 bool loop_entered = false;
1591 int iteration_nr = 0;
1592 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1593
1594 if (!flag_tree_tail_merge || max_iterations == 0)
1595 return 0;
1596
1597 timevar_push (TV_TREE_TAIL_MERGE);
1598
1599 calculate_dominance_info (CDI_DOMINATORS);
1600 init_worklist ();
1601
1602 while (!VEC_empty (same_succ, worklist))
1603 {
1604 if (!loop_entered)
1605 {
1606 loop_entered = true;
1607 alloc_cluster_vectors ();
1608 update_bbs = BITMAP_ALLOC (NULL);
1609 }
1610 else
1611 reset_cluster_vectors ();
1612
1613 iteration_nr++;
1614 if (dump_file && (dump_flags & TDF_DETAILS))
1615 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1616
1617 find_clusters ();
1618 gcc_assert (VEC_empty (same_succ, worklist));
1619 if (VEC_empty (bb_cluster, all_clusters))
1620 break;
1621
1622 nr_bbs_removed = apply_clusters ();
1623 nr_bbs_removed_total += nr_bbs_removed;
1624 if (nr_bbs_removed == 0)
1625 break;
1626
1627 free_dominance_info (CDI_DOMINATORS);
1628
1629 if (iteration_nr == max_iterations)
1630 break;
1631
1632 calculate_dominance_info (CDI_DOMINATORS);
1633 update_worklist ();
1634 }
1635
1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 fprintf (dump_file, "htab collision / search: %f\n",
1638 htab_collisions (same_succ_htab));
1639
1640 if (nr_bbs_removed_total > 0)
1641 {
1642 if (MAY_HAVE_DEBUG_STMTS)
1643 {
1644 calculate_dominance_info (CDI_DOMINATORS);
1645 update_debug_stmts ();
1646 }
1647
1648 if (dump_file && (dump_flags & TDF_DETAILS))
1649 {
1650 fprintf (dump_file, "Before TODOs.\n");
1651 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1652 }
1653
1654 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1655 mark_sym_for_renaming (gimple_vop (cfun));
1656 }
1657
1658 delete_worklist ();
1659 if (loop_entered)
1660 {
1661 delete_cluster_vectors ();
1662 BITMAP_FREE (update_bbs);
1663 }
1664
1665 timevar_pop (TV_TREE_TAIL_MERGE);
1666
1667 return todo;
1668 }