]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-tail-merge.c
i386.c (make_resolver_func): Update.
[thirdparty/gcc.git] / gcc / tree-ssa-tail-merge.c
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Pass overview.
22
23
24 MOTIVATIONAL EXAMPLE
25
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29 {
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
34
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
53
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
62
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
75
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
84
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
92
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
97
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
104 }
105
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
108
109
110 CONTEXT
111
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
119
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
126
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
132
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
136
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
143
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
151
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
155
156
157 IMPLEMENTATION
158
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
165
166
167 LIMITATIONS/TODO
168
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
177
178
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
182
183
184 SWITCHES
185
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
187
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "params.h"
205 #include "tree-ssa-sccvn.h"
206 #include "cfgloop.h"
207 #include "tree-eh.h"
208 #include "tree-cfgcleanup.h"
209
210 /* Describes a group of bbs with the same successors. The successor bbs are
211 cached in succs, and the successor edge flags are cached in succ_flags.
212 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
213 it's marked in inverse.
214 Additionally, the hash value for the struct is cached in hashval, and
215 in_worklist indicates whether it's currently part of worklist. */
216
217 struct same_succ : pointer_hash <same_succ>
218 {
219 /* The bbs that have the same successor bbs. */
220 bitmap bbs;
221 /* The successor bbs. */
222 bitmap succs;
223 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
224 bb. */
225 bitmap inverse;
226 /* The edge flags for each of the successor bbs. */
227 vec<int> succ_flags;
228 /* Indicates whether the struct is currently in the worklist. */
229 bool in_worklist;
230 /* The hash value of the struct. */
231 hashval_t hashval;
232
233 /* hash_table support. */
234 static inline hashval_t hash (const same_succ *);
235 static int equal (const same_succ *, const same_succ *);
236 static void remove (same_succ *);
237 };
238
239 /* hash routine for hash_table support, returns hashval of E. */
240
241 inline hashval_t
242 same_succ::hash (const same_succ *e)
243 {
244 return e->hashval;
245 }
246
247 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
248
249 struct bb_cluster
250 {
251 /* The bbs in the cluster. */
252 bitmap bbs;
253 /* The preds of the bbs in the cluster. */
254 bitmap preds;
255 /* Index in all_clusters vector. */
256 int index;
257 /* The bb to replace the cluster with. */
258 basic_block rep_bb;
259 };
260
261 /* Per bb-info. */
262
263 struct aux_bb_info
264 {
265 /* The number of non-debug statements in the bb. */
266 int size;
267 /* The same_succ that this bb is a member of. */
268 same_succ *bb_same_succ;
269 /* The cluster that this bb is a member of. */
270 bb_cluster *cluster;
271 /* The vop state at the exit of a bb. This is shortlived data, used to
272 communicate data between update_block_by and update_vuses. */
273 tree vop_at_exit;
274 /* The bb that either contains or is dominated by the dependencies of the
275 bb. */
276 basic_block dep_bb;
277 };
278
279 /* Macros to access the fields of struct aux_bb_info. */
280
281 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
282 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
283 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
284 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
285 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
286
287 /* Returns true if the only effect a statement STMT has, is to define locally
288 used SSA_NAMEs. */
289
290 static bool
291 stmt_local_def (gimple *stmt)
292 {
293 basic_block bb, def_bb;
294 imm_use_iterator iter;
295 use_operand_p use_p;
296 tree val;
297 def_operand_p def_p;
298
299 if (gimple_vdef (stmt) != NULL_TREE
300 || gimple_has_side_effects (stmt)
301 || gimple_could_trap_p_1 (stmt, false, false)
302 || gimple_vuse (stmt) != NULL_TREE)
303 return false;
304
305 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
306 if (def_p == NULL)
307 return false;
308
309 val = DEF_FROM_PTR (def_p);
310 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
311 return false;
312
313 def_bb = gimple_bb (stmt);
314
315 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
316 {
317 if (is_gimple_debug (USE_STMT (use_p)))
318 continue;
319 bb = gimple_bb (USE_STMT (use_p));
320 if (bb == def_bb)
321 continue;
322
323 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
324 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
325 continue;
326
327 return false;
328 }
329
330 return true;
331 }
332
333 /* Let GSI skip forwards over local defs. */
334
335 static void
336 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
337 {
338 gimple *stmt;
339
340 while (true)
341 {
342 if (gsi_end_p (*gsi))
343 return;
344 stmt = gsi_stmt (*gsi);
345 if (!stmt_local_def (stmt))
346 return;
347 gsi_next_nondebug (gsi);
348 }
349 }
350
351 /* VAL1 and VAL2 are either:
352 - uses in BB1 and BB2, or
353 - phi alternatives for BB1 and BB2.
354 Return true if the uses have the same gvn value. */
355
356 static bool
357 gvn_uses_equal (tree val1, tree val2)
358 {
359 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
360
361 if (val1 == val2)
362 return true;
363
364 if (vn_valueize (val1) != vn_valueize (val2))
365 return false;
366
367 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
368 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
369 }
370
371 /* Prints E to FILE. */
372
373 static void
374 same_succ_print (FILE *file, const same_succ *e)
375 {
376 unsigned int i;
377 bitmap_print (file, e->bbs, "bbs:", "\n");
378 bitmap_print (file, e->succs, "succs:", "\n");
379 bitmap_print (file, e->inverse, "inverse:", "\n");
380 fprintf (file, "flags:");
381 for (i = 0; i < e->succ_flags.length (); ++i)
382 fprintf (file, " %x", e->succ_flags[i]);
383 fprintf (file, "\n");
384 }
385
386 /* Prints same_succ VE to VFILE. */
387
388 inline int
389 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
390 {
391 const same_succ *e = *pe;
392 same_succ_print (file, e);
393 return 1;
394 }
395
396 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
397
398 static void
399 update_dep_bb (basic_block use_bb, tree val)
400 {
401 basic_block dep_bb;
402
403 /* Not a dep. */
404 if (TREE_CODE (val) != SSA_NAME)
405 return;
406
407 /* Skip use of global def. */
408 if (SSA_NAME_IS_DEFAULT_DEF (val))
409 return;
410
411 /* Skip use of local def. */
412 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
413 if (dep_bb == use_bb)
414 return;
415
416 if (BB_DEP_BB (use_bb) == NULL
417 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
418 BB_DEP_BB (use_bb) = dep_bb;
419 }
420
421 /* Update BB_DEP_BB, given the dependencies in STMT. */
422
423 static void
424 stmt_update_dep_bb (gimple *stmt)
425 {
426 ssa_op_iter iter;
427 use_operand_p use;
428
429 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
430 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
431 }
432
433 /* Calculates hash value for same_succ VE. */
434
435 static hashval_t
436 same_succ_hash (const same_succ *e)
437 {
438 inchash::hash hstate (bitmap_hash (e->succs));
439 int flags;
440 unsigned int i;
441 unsigned int first = bitmap_first_set_bit (e->bbs);
442 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
443 int size = 0;
444 gimple *stmt;
445 tree arg;
446 unsigned int s;
447 bitmap_iterator bs;
448
449 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
450 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
451 {
452 stmt = gsi_stmt (gsi);
453 stmt_update_dep_bb (stmt);
454 if (stmt_local_def (stmt))
455 continue;
456 size++;
457
458 hstate.add_int (gimple_code (stmt));
459 if (is_gimple_assign (stmt))
460 hstate.add_int (gimple_assign_rhs_code (stmt));
461 if (!is_gimple_call (stmt))
462 continue;
463 if (gimple_call_internal_p (stmt))
464 hstate.add_int (gimple_call_internal_fn (stmt));
465 else
466 {
467 inchash::add_expr (gimple_call_fn (stmt), hstate);
468 if (gimple_call_chain (stmt))
469 inchash::add_expr (gimple_call_chain (stmt), hstate);
470 }
471 for (i = 0; i < gimple_call_num_args (stmt); i++)
472 {
473 arg = gimple_call_arg (stmt, i);
474 arg = vn_valueize (arg);
475 inchash::add_expr (arg, hstate);
476 }
477 }
478
479 hstate.add_int (size);
480 BB_SIZE (bb) = size;
481
482 for (i = 0; i < e->succ_flags.length (); ++i)
483 {
484 flags = e->succ_flags[i];
485 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
486 hstate.add_int (flags);
487 }
488
489 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
490 {
491 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
492 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
493 !gsi_end_p (gsi);
494 gsi_next (&gsi))
495 {
496 gphi *phi = gsi.phi ();
497 tree lhs = gimple_phi_result (phi);
498 tree val = gimple_phi_arg_def (phi, n);
499
500 if (virtual_operand_p (lhs))
501 continue;
502 update_dep_bb (bb, val);
503 }
504 }
505
506 return hstate.end ();
507 }
508
509 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
510 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
511 the other edge flags. */
512
513 static bool
514 inverse_flags (const same_succ *e1, const same_succ *e2)
515 {
516 int f1a, f1b, f2a, f2b;
517 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
518
519 if (e1->succ_flags.length () != 2)
520 return false;
521
522 f1a = e1->succ_flags[0];
523 f1b = e1->succ_flags[1];
524 f2a = e2->succ_flags[0];
525 f2b = e2->succ_flags[1];
526
527 if (f1a == f2a && f1b == f2b)
528 return false;
529
530 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
531 }
532
533 /* Compares SAME_SUCCs E1 and E2. */
534
535 int
536 same_succ::equal (const same_succ *e1, const same_succ *e2)
537 {
538 unsigned int i, first1, first2;
539 gimple_stmt_iterator gsi1, gsi2;
540 gimple *s1, *s2;
541 basic_block bb1, bb2;
542
543 if (e1 == e2)
544 return 1;
545
546 if (e1->hashval != e2->hashval)
547 return 0;
548
549 if (e1->succ_flags.length () != e2->succ_flags.length ())
550 return 0;
551
552 if (!bitmap_equal_p (e1->succs, e2->succs))
553 return 0;
554
555 if (!inverse_flags (e1, e2))
556 {
557 for (i = 0; i < e1->succ_flags.length (); ++i)
558 if (e1->succ_flags[i] != e2->succ_flags[i])
559 return 0;
560 }
561
562 first1 = bitmap_first_set_bit (e1->bbs);
563 first2 = bitmap_first_set_bit (e2->bbs);
564
565 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
566 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
567
568 if (BB_SIZE (bb1) != BB_SIZE (bb2))
569 return 0;
570
571 gsi1 = gsi_start_nondebug_bb (bb1);
572 gsi2 = gsi_start_nondebug_bb (bb2);
573 gsi_advance_fw_nondebug_nonlocal (&gsi1);
574 gsi_advance_fw_nondebug_nonlocal (&gsi2);
575 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
576 {
577 s1 = gsi_stmt (gsi1);
578 s2 = gsi_stmt (gsi2);
579 if (gimple_code (s1) != gimple_code (s2))
580 return 0;
581 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
582 return 0;
583 gsi_next_nondebug (&gsi1);
584 gsi_next_nondebug (&gsi2);
585 gsi_advance_fw_nondebug_nonlocal (&gsi1);
586 gsi_advance_fw_nondebug_nonlocal (&gsi2);
587 }
588
589 return 1;
590 }
591
592 /* Alloc and init a new SAME_SUCC. */
593
594 static same_succ *
595 same_succ_alloc (void)
596 {
597 same_succ *same = XNEW (struct same_succ);
598
599 same->bbs = BITMAP_ALLOC (NULL);
600 same->succs = BITMAP_ALLOC (NULL);
601 same->inverse = BITMAP_ALLOC (NULL);
602 same->succ_flags.create (10);
603 same->in_worklist = false;
604
605 return same;
606 }
607
608 /* Delete same_succ E. */
609
610 void
611 same_succ::remove (same_succ *e)
612 {
613 BITMAP_FREE (e->bbs);
614 BITMAP_FREE (e->succs);
615 BITMAP_FREE (e->inverse);
616 e->succ_flags.release ();
617
618 XDELETE (e);
619 }
620
621 /* Reset same_succ SAME. */
622
623 static void
624 same_succ_reset (same_succ *same)
625 {
626 bitmap_clear (same->bbs);
627 bitmap_clear (same->succs);
628 bitmap_clear (same->inverse);
629 same->succ_flags.truncate (0);
630 }
631
632 static hash_table<same_succ> *same_succ_htab;
633
634 /* Array that is used to store the edge flags for a successor. */
635
636 static int *same_succ_edge_flags;
637
638 /* Bitmap that is used to mark bbs that are recently deleted. */
639
640 static bitmap deleted_bbs;
641
642 /* Bitmap that is used to mark predecessors of bbs that are
643 deleted. */
644
645 static bitmap deleted_bb_preds;
646
647 /* Prints same_succ_htab to stderr. */
648
649 extern void debug_same_succ (void);
650 DEBUG_FUNCTION void
651 debug_same_succ ( void)
652 {
653 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
654 }
655
656
657 /* Vector of bbs to process. */
658
659 static vec<same_succ *> worklist;
660
661 /* Prints worklist to FILE. */
662
663 static void
664 print_worklist (FILE *file)
665 {
666 unsigned int i;
667 for (i = 0; i < worklist.length (); ++i)
668 same_succ_print (file, worklist[i]);
669 }
670
671 /* Adds SAME to worklist. */
672
673 static void
674 add_to_worklist (same_succ *same)
675 {
676 if (same->in_worklist)
677 return;
678
679 if (bitmap_count_bits (same->bbs) < 2)
680 return;
681
682 same->in_worklist = true;
683 worklist.safe_push (same);
684 }
685
686 /* Add BB to same_succ_htab. */
687
688 static void
689 find_same_succ_bb (basic_block bb, same_succ **same_p)
690 {
691 unsigned int j;
692 bitmap_iterator bj;
693 same_succ *same = *same_p;
694 same_succ **slot;
695 edge_iterator ei;
696 edge e;
697
698 if (bb == NULL
699 /* Be conservative with loop structure. It's not evident that this test
700 is sufficient. Before tail-merge, we've just called
701 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
702 set, so there's no guarantee that the loop->latch value is still valid.
703 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
704 start of pre, we've kept that property intact throughout pre, and are
705 keeping it throughout tail-merge using this test. */
706 || bb->loop_father->latch == bb)
707 return;
708 bitmap_set_bit (same->bbs, bb->index);
709 FOR_EACH_EDGE (e, ei, bb->succs)
710 {
711 int index = e->dest->index;
712 bitmap_set_bit (same->succs, index);
713 same_succ_edge_flags[index] = e->flags;
714 }
715 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
716 same->succ_flags.safe_push (same_succ_edge_flags[j]);
717
718 same->hashval = same_succ_hash (same);
719
720 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
721 if (*slot == NULL)
722 {
723 *slot = same;
724 BB_SAME_SUCC (bb) = same;
725 add_to_worklist (same);
726 *same_p = NULL;
727 }
728 else
729 {
730 bitmap_set_bit ((*slot)->bbs, bb->index);
731 BB_SAME_SUCC (bb) = *slot;
732 add_to_worklist (*slot);
733 if (inverse_flags (same, *slot))
734 bitmap_set_bit ((*slot)->inverse, bb->index);
735 same_succ_reset (same);
736 }
737 }
738
739 /* Find bbs with same successors. */
740
741 static void
742 find_same_succ (void)
743 {
744 same_succ *same = same_succ_alloc ();
745 basic_block bb;
746
747 FOR_EACH_BB_FN (bb, cfun)
748 {
749 find_same_succ_bb (bb, &same);
750 if (same == NULL)
751 same = same_succ_alloc ();
752 }
753
754 same_succ::remove (same);
755 }
756
757 /* Initializes worklist administration. */
758
759 static void
760 init_worklist (void)
761 {
762 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
763 same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
764 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
765 deleted_bbs = BITMAP_ALLOC (NULL);
766 deleted_bb_preds = BITMAP_ALLOC (NULL);
767 worklist.create (n_basic_blocks_for_fn (cfun));
768 find_same_succ ();
769
770 if (dump_file && (dump_flags & TDF_DETAILS))
771 {
772 fprintf (dump_file, "initial worklist:\n");
773 print_worklist (dump_file);
774 }
775 }
776
777 /* Deletes worklist administration. */
778
779 static void
780 delete_worklist (void)
781 {
782 free_aux_for_blocks ();
783 delete same_succ_htab;
784 same_succ_htab = NULL;
785 XDELETEVEC (same_succ_edge_flags);
786 same_succ_edge_flags = NULL;
787 BITMAP_FREE (deleted_bbs);
788 BITMAP_FREE (deleted_bb_preds);
789 worklist.release ();
790 }
791
792 /* Mark BB as deleted, and mark its predecessors. */
793
794 static void
795 mark_basic_block_deleted (basic_block bb)
796 {
797 edge e;
798 edge_iterator ei;
799
800 bitmap_set_bit (deleted_bbs, bb->index);
801
802 FOR_EACH_EDGE (e, ei, bb->preds)
803 bitmap_set_bit (deleted_bb_preds, e->src->index);
804 }
805
806 /* Removes BB from its corresponding same_succ. */
807
808 static void
809 same_succ_flush_bb (basic_block bb)
810 {
811 same_succ *same = BB_SAME_SUCC (bb);
812 BB_SAME_SUCC (bb) = NULL;
813 if (bitmap_single_bit_set_p (same->bbs))
814 same_succ_htab->remove_elt_with_hash (same, same->hashval);
815 else
816 bitmap_clear_bit (same->bbs, bb->index);
817 }
818
819 /* Removes all bbs in BBS from their corresponding same_succ. */
820
821 static void
822 same_succ_flush_bbs (bitmap bbs)
823 {
824 unsigned int i;
825 bitmap_iterator bi;
826
827 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
828 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
829 }
830
831 /* Release the last vdef in BB, either normal or phi result. */
832
833 static void
834 release_last_vdef (basic_block bb)
835 {
836 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
837 gsi_prev_nondebug (&i))
838 {
839 gimple *stmt = gsi_stmt (i);
840 if (gimple_vdef (stmt) == NULL_TREE)
841 continue;
842
843 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
844 return;
845 }
846
847 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
848 gsi_next (&i))
849 {
850 gphi *phi = i.phi ();
851 tree res = gimple_phi_result (phi);
852
853 if (!virtual_operand_p (res))
854 continue;
855
856 mark_virtual_phi_result_for_renaming (phi);
857 return;
858 }
859 }
860
861 /* For deleted_bb_preds, find bbs with same successors. */
862
863 static void
864 update_worklist (void)
865 {
866 unsigned int i;
867 bitmap_iterator bi;
868 basic_block bb;
869 same_succ *same;
870
871 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
872 bitmap_clear (deleted_bbs);
873
874 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
875 same_succ_flush_bbs (deleted_bb_preds);
876
877 same = same_succ_alloc ();
878 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
879 {
880 bb = BASIC_BLOCK_FOR_FN (cfun, i);
881 gcc_assert (bb != NULL);
882 find_same_succ_bb (bb, &same);
883 if (same == NULL)
884 same = same_succ_alloc ();
885 }
886 same_succ::remove (same);
887 bitmap_clear (deleted_bb_preds);
888 }
889
890 /* Prints cluster C to FILE. */
891
892 static void
893 print_cluster (FILE *file, bb_cluster *c)
894 {
895 if (c == NULL)
896 return;
897 bitmap_print (file, c->bbs, "bbs:", "\n");
898 bitmap_print (file, c->preds, "preds:", "\n");
899 }
900
901 /* Prints cluster C to stderr. */
902
903 extern void debug_cluster (bb_cluster *);
904 DEBUG_FUNCTION void
905 debug_cluster (bb_cluster *c)
906 {
907 print_cluster (stderr, c);
908 }
909
910 /* Update C->rep_bb, given that BB is added to the cluster. */
911
912 static void
913 update_rep_bb (bb_cluster *c, basic_block bb)
914 {
915 /* Initial. */
916 if (c->rep_bb == NULL)
917 {
918 c->rep_bb = bb;
919 return;
920 }
921
922 /* Current needs no deps, keep it. */
923 if (BB_DEP_BB (c->rep_bb) == NULL)
924 return;
925
926 /* Bb needs no deps, change rep_bb. */
927 if (BB_DEP_BB (bb) == NULL)
928 {
929 c->rep_bb = bb;
930 return;
931 }
932
933 /* Bb needs last deps earlier than current, change rep_bb. A potential
934 problem with this, is that the first deps might also be earlier, which
935 would mean we prefer longer lifetimes for the deps. To be able to check
936 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
937 BB_DEP_BB, which is really BB_LAST_DEP_BB.
938 The benefit of choosing the bb with last deps earlier, is that it can
939 potentially be used as replacement for more bbs. */
940 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
941 c->rep_bb = bb;
942 }
943
944 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
945
946 static void
947 add_bb_to_cluster (bb_cluster *c, basic_block bb)
948 {
949 edge e;
950 edge_iterator ei;
951
952 bitmap_set_bit (c->bbs, bb->index);
953
954 FOR_EACH_EDGE (e, ei, bb->preds)
955 bitmap_set_bit (c->preds, e->src->index);
956
957 update_rep_bb (c, bb);
958 }
959
960 /* Allocate and init new cluster. */
961
962 static bb_cluster *
963 new_cluster (void)
964 {
965 bb_cluster *c;
966 c = XCNEW (bb_cluster);
967 c->bbs = BITMAP_ALLOC (NULL);
968 c->preds = BITMAP_ALLOC (NULL);
969 c->rep_bb = NULL;
970 return c;
971 }
972
973 /* Delete clusters. */
974
975 static void
976 delete_cluster (bb_cluster *c)
977 {
978 if (c == NULL)
979 return;
980 BITMAP_FREE (c->bbs);
981 BITMAP_FREE (c->preds);
982 XDELETE (c);
983 }
984
985
986 /* Array that contains all clusters. */
987
988 static vec<bb_cluster *> all_clusters;
989
990 /* Allocate all cluster vectors. */
991
992 static void
993 alloc_cluster_vectors (void)
994 {
995 all_clusters.create (n_basic_blocks_for_fn (cfun));
996 }
997
998 /* Reset all cluster vectors. */
999
1000 static void
1001 reset_cluster_vectors (void)
1002 {
1003 unsigned int i;
1004 basic_block bb;
1005 for (i = 0; i < all_clusters.length (); ++i)
1006 delete_cluster (all_clusters[i]);
1007 all_clusters.truncate (0);
1008 FOR_EACH_BB_FN (bb, cfun)
1009 BB_CLUSTER (bb) = NULL;
1010 }
1011
1012 /* Delete all cluster vectors. */
1013
1014 static void
1015 delete_cluster_vectors (void)
1016 {
1017 unsigned int i;
1018 for (i = 0; i < all_clusters.length (); ++i)
1019 delete_cluster (all_clusters[i]);
1020 all_clusters.release ();
1021 }
1022
1023 /* Merge cluster C2 into C1. */
1024
1025 static void
1026 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1027 {
1028 bitmap_ior_into (c1->bbs, c2->bbs);
1029 bitmap_ior_into (c1->preds, c2->preds);
1030 }
1031
1032 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1033 all_clusters, or merge c with existing cluster. */
1034
1035 static void
1036 set_cluster (basic_block bb1, basic_block bb2)
1037 {
1038 basic_block merge_bb, other_bb;
1039 bb_cluster *merge, *old, *c;
1040
1041 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1042 {
1043 c = new_cluster ();
1044 add_bb_to_cluster (c, bb1);
1045 add_bb_to_cluster (c, bb2);
1046 BB_CLUSTER (bb1) = c;
1047 BB_CLUSTER (bb2) = c;
1048 c->index = all_clusters.length ();
1049 all_clusters.safe_push (c);
1050 }
1051 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1052 {
1053 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1054 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1055 merge = BB_CLUSTER (merge_bb);
1056 add_bb_to_cluster (merge, other_bb);
1057 BB_CLUSTER (other_bb) = merge;
1058 }
1059 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1060 {
1061 unsigned int i;
1062 bitmap_iterator bi;
1063
1064 old = BB_CLUSTER (bb2);
1065 merge = BB_CLUSTER (bb1);
1066 merge_clusters (merge, old);
1067 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1068 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1069 all_clusters[old->index] = NULL;
1070 update_rep_bb (merge, old->rep_bb);
1071 delete_cluster (old);
1072 }
1073 else
1074 gcc_unreachable ();
1075 }
1076
1077 /* Return true if gimple operands T1 and T2 have the same value. */
1078
1079 static bool
1080 gimple_operand_equal_value_p (tree t1, tree t2)
1081 {
1082 if (t1 == t2)
1083 return true;
1084
1085 if (t1 == NULL_TREE
1086 || t2 == NULL_TREE)
1087 return false;
1088
1089 if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1090 return true;
1091
1092 return gvn_uses_equal (t1, t2);
1093 }
1094
1095 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1096 gimple_bb (s2) are members of SAME_SUCC. */
1097
1098 static bool
1099 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1100 {
1101 unsigned int i;
1102 tree lhs1, lhs2;
1103 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1104 tree t1, t2;
1105 bool inv_cond;
1106 enum tree_code code1, code2;
1107
1108 if (gimple_code (s1) != gimple_code (s2))
1109 return false;
1110
1111 switch (gimple_code (s1))
1112 {
1113 case GIMPLE_CALL:
1114 if (!gimple_call_same_target_p (s1, s2))
1115 return false;
1116
1117 t1 = gimple_call_chain (s1);
1118 t2 = gimple_call_chain (s2);
1119 if (!gimple_operand_equal_value_p (t1, t2))
1120 return false;
1121
1122 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1123 return false;
1124
1125 for (i = 0; i < gimple_call_num_args (s1); ++i)
1126 {
1127 t1 = gimple_call_arg (s1, i);
1128 t2 = gimple_call_arg (s2, i);
1129 if (!gimple_operand_equal_value_p (t1, t2))
1130 return false;
1131 }
1132
1133 lhs1 = gimple_get_lhs (s1);
1134 lhs2 = gimple_get_lhs (s2);
1135 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1136 return true;
1137 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1138 return false;
1139 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1140 return vn_valueize (lhs1) == vn_valueize (lhs2);
1141 return operand_equal_p (lhs1, lhs2, 0);
1142
1143 case GIMPLE_ASSIGN:
1144 lhs1 = gimple_get_lhs (s1);
1145 lhs2 = gimple_get_lhs (s2);
1146 if (TREE_CODE (lhs1) != SSA_NAME
1147 && TREE_CODE (lhs2) != SSA_NAME)
1148 return (operand_equal_p (lhs1, lhs2, 0)
1149 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1150 gimple_assign_rhs1 (s2)));
1151 else if (TREE_CODE (lhs1) == SSA_NAME
1152 && TREE_CODE (lhs2) == SSA_NAME)
1153 return operand_equal_p (gimple_assign_rhs1 (s1),
1154 gimple_assign_rhs1 (s2), 0);
1155 return false;
1156
1157 case GIMPLE_COND:
1158 t1 = gimple_cond_lhs (s1);
1159 t2 = gimple_cond_lhs (s2);
1160 if (!gimple_operand_equal_value_p (t1, t2))
1161 return false;
1162
1163 t1 = gimple_cond_rhs (s1);
1164 t2 = gimple_cond_rhs (s2);
1165 if (!gimple_operand_equal_value_p (t1, t2))
1166 return false;
1167
1168 code1 = gimple_expr_code (s1);
1169 code2 = gimple_expr_code (s2);
1170 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1171 != bitmap_bit_p (same_succ->inverse, bb2->index));
1172 if (inv_cond)
1173 {
1174 bool honor_nans = HONOR_NANS (t1);
1175 code2 = invert_tree_comparison (code2, honor_nans);
1176 }
1177 return code1 == code2;
1178
1179 default:
1180 return false;
1181 }
1182 }
1183
1184 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1185 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1186 processed statements. */
1187
1188 static void
1189 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1190 bool *vuse_escaped)
1191 {
1192 gimple *stmt;
1193 tree lvuse;
1194
1195 while (true)
1196 {
1197 if (gsi_end_p (*gsi))
1198 return;
1199 stmt = gsi_stmt (*gsi);
1200
1201 lvuse = gimple_vuse (stmt);
1202 if (lvuse != NULL_TREE)
1203 {
1204 *vuse = lvuse;
1205 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1206 *vuse_escaped = true;
1207 }
1208
1209 if (!stmt_local_def (stmt))
1210 return;
1211 gsi_prev_nondebug (gsi);
1212 }
1213 }
1214
1215 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1216 STMT2 are allowed to be merged. */
1217
1218 static bool
1219 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1220 {
1221 /* What could be better than this here is to blacklist the bb
1222 containing the stmt, when encountering the stmt f.i. in
1223 same_succ_hash. */
1224 if (is_tm_ending (stmt1))
1225 return false;
1226
1227 /* Verify EH landing pads. */
1228 if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1229 return false;
1230
1231 if (is_gimple_call (stmt1)
1232 && gimple_call_internal_p (stmt1))
1233 switch (gimple_call_internal_fn (stmt1))
1234 {
1235 case IFN_UBSAN_NULL:
1236 case IFN_UBSAN_BOUNDS:
1237 case IFN_UBSAN_VPTR:
1238 case IFN_UBSAN_CHECK_ADD:
1239 case IFN_UBSAN_CHECK_SUB:
1240 case IFN_UBSAN_CHECK_MUL:
1241 case IFN_UBSAN_OBJECT_SIZE:
1242 case IFN_ASAN_CHECK:
1243 /* For these internal functions, gimple_location is an implicit
1244 parameter, which will be used explicitly after expansion.
1245 Merging these statements may cause confusing line numbers in
1246 sanitizer messages. */
1247 return gimple_location (stmt1) == gimple_location (stmt2);
1248 default:
1249 break;
1250 }
1251
1252 return true;
1253 }
1254
1255 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1256 clusters them. */
1257
1258 static void
1259 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1260 {
1261 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1262 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1263 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1264 bool vuse_escaped = false;
1265
1266 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1267 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1268
1269 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1270 {
1271 gimple *stmt1 = gsi_stmt (gsi1);
1272 gimple *stmt2 = gsi_stmt (gsi2);
1273
1274 if (gimple_code (stmt1) == GIMPLE_LABEL
1275 && gimple_code (stmt2) == GIMPLE_LABEL)
1276 break;
1277
1278 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1279 return;
1280
1281 if (!merge_stmts_p (stmt1, stmt2))
1282 return;
1283
1284 gsi_prev_nondebug (&gsi1);
1285 gsi_prev_nondebug (&gsi2);
1286 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1287 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1288 }
1289
1290 while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1291 {
1292 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1293 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1294 return;
1295 gsi_prev (&gsi1);
1296 }
1297 while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
1298 {
1299 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2)));
1300 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1301 return;
1302 gsi_prev (&gsi2);
1303 }
1304 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1305 return;
1306
1307 /* If the incoming vuses are not the same, and the vuse escaped into an
1308 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1309 which potentially means the semantics of one of the blocks will be changed.
1310 TODO: make this check more precise. */
1311 if (vuse_escaped && vuse1 != vuse2)
1312 return;
1313
1314 if (dump_file)
1315 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1316 bb1->index, bb2->index);
1317
1318 set_cluster (bb1, bb2);
1319 }
1320
1321 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1322 E2 are equal. */
1323
1324 static bool
1325 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1326 {
1327 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1328 gphi_iterator gsi;
1329
1330 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1331 {
1332 gphi *phi = gsi.phi ();
1333 tree lhs = gimple_phi_result (phi);
1334 tree val1 = gimple_phi_arg_def (phi, n1);
1335 tree val2 = gimple_phi_arg_def (phi, n2);
1336
1337 if (virtual_operand_p (lhs))
1338 continue;
1339
1340 if (operand_equal_for_phi_arg_p (val1, val2))
1341 continue;
1342 if (gvn_uses_equal (val1, val2))
1343 continue;
1344
1345 return false;
1346 }
1347
1348 return true;
1349 }
1350
1351 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1352 phi alternatives for BB1 and BB2 are equal. */
1353
1354 static bool
1355 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1356 {
1357 unsigned int s;
1358 bitmap_iterator bs;
1359 edge e1, e2;
1360 basic_block succ;
1361
1362 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1363 {
1364 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1365 e1 = find_edge (bb1, succ);
1366 e2 = find_edge (bb2, succ);
1367 if (e1->flags & EDGE_COMPLEX
1368 || e2->flags & EDGE_COMPLEX)
1369 return false;
1370
1371 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1372 the same value. */
1373 if (!same_phi_alternatives_1 (succ, e1, e2))
1374 return false;
1375 }
1376
1377 return true;
1378 }
1379
1380 /* Return true if BB has non-vop phis. */
1381
1382 static bool
1383 bb_has_non_vop_phi (basic_block bb)
1384 {
1385 gimple_seq phis = phi_nodes (bb);
1386 gimple *phi;
1387
1388 if (phis == NULL)
1389 return false;
1390
1391 if (!gimple_seq_singleton_p (phis))
1392 return true;
1393
1394 phi = gimple_seq_first_stmt (phis);
1395 return !virtual_operand_p (gimple_phi_result (phi));
1396 }
1397
1398 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1399 invariant that uses in FROM are dominates by their defs. */
1400
1401 static bool
1402 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1403 {
1404 basic_block cd, dep_bb = BB_DEP_BB (to);
1405 edge_iterator ei;
1406 edge e;
1407
1408 if (dep_bb == NULL)
1409 return true;
1410
1411 bitmap from_preds = BITMAP_ALLOC (NULL);
1412 FOR_EACH_EDGE (e, ei, from->preds)
1413 bitmap_set_bit (from_preds, e->src->index);
1414 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1415 BITMAP_FREE (from_preds);
1416
1417 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1418 }
1419
1420 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1421 replacement bb) and vice versa maintains the invariant that uses in the
1422 replacement are dominates by their defs. */
1423
1424 static bool
1425 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1426 {
1427 if (BB_CLUSTER (bb1) != NULL)
1428 bb1 = BB_CLUSTER (bb1)->rep_bb;
1429
1430 if (BB_CLUSTER (bb2) != NULL)
1431 bb2 = BB_CLUSTER (bb2)->rep_bb;
1432
1433 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1434 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1435 }
1436
1437 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1438
1439 static void
1440 find_clusters_1 (same_succ *same_succ)
1441 {
1442 basic_block bb1, bb2;
1443 unsigned int i, j;
1444 bitmap_iterator bi, bj;
1445 int nr_comparisons;
1446 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1447
1448 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1449 {
1450 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1451
1452 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1453 phi-nodes in bb1 and bb2, with the same alternatives for the same
1454 preds. */
1455 if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1))
1456 continue;
1457
1458 nr_comparisons = 0;
1459 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1460 {
1461 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1462
1463 if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2))
1464 continue;
1465
1466 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1467 continue;
1468
1469 /* Limit quadratic behavior. */
1470 nr_comparisons++;
1471 if (nr_comparisons > max_comparisons)
1472 break;
1473
1474 /* This is a conservative dependency check. We could test more
1475 precise for allowed replacement direction. */
1476 if (!deps_ok_for_redirect (bb1, bb2))
1477 continue;
1478
1479 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1480 continue;
1481
1482 find_duplicate (same_succ, bb1, bb2);
1483 }
1484 }
1485 }
1486
1487 /* Find clusters of bbs which can be merged. */
1488
1489 static void
1490 find_clusters (void)
1491 {
1492 same_succ *same;
1493
1494 while (!worklist.is_empty ())
1495 {
1496 same = worklist.pop ();
1497 same->in_worklist = false;
1498 if (dump_file && (dump_flags & TDF_DETAILS))
1499 {
1500 fprintf (dump_file, "processing worklist entry\n");
1501 same_succ_print (dump_file, same);
1502 }
1503 find_clusters_1 (same);
1504 }
1505 }
1506
1507 /* Returns the vop phi of BB, if any. */
1508
1509 static gphi *
1510 vop_phi (basic_block bb)
1511 {
1512 gphi *stmt;
1513 gphi_iterator gsi;
1514 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1515 {
1516 stmt = gsi.phi ();
1517 if (! virtual_operand_p (gimple_phi_result (stmt)))
1518 continue;
1519 return stmt;
1520 }
1521 return NULL;
1522 }
1523
1524 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1525
1526 static void
1527 replace_block_by (basic_block bb1, basic_block bb2)
1528 {
1529 edge pred_edge;
1530 edge e1, e2;
1531 edge_iterator ei;
1532 unsigned int i;
1533 gphi *bb2_phi;
1534
1535 bb2_phi = vop_phi (bb2);
1536
1537 /* Mark the basic block as deleted. */
1538 mark_basic_block_deleted (bb1);
1539
1540 /* Redirect the incoming edges of bb1 to bb2. */
1541 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1542 {
1543 pred_edge = EDGE_PRED (bb1, i - 1);
1544 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1545 gcc_assert (pred_edge != NULL);
1546
1547 if (bb2_phi == NULL)
1548 continue;
1549
1550 /* The phi might have run out of capacity when the redirect added an
1551 argument, which means it could have been replaced. Refresh it. */
1552 bb2_phi = vop_phi (bb2);
1553
1554 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1555 pred_edge, UNKNOWN_LOCATION);
1556 }
1557
1558 bb2->frequency += bb1->frequency;
1559 if (bb2->frequency > BB_FREQ_MAX)
1560 bb2->frequency = BB_FREQ_MAX;
1561
1562 bb2->count += bb1->count;
1563
1564 /* Merge the outgoing edge counts from bb1 onto bb2. */
1565 profile_count out_sum = profile_count::zero ();
1566 FOR_EACH_EDGE (e1, ei, bb1->succs)
1567 {
1568 e2 = find_edge (bb2, e1->dest);
1569 gcc_assert (e2);
1570 e2->count += e1->count;
1571 out_sum += e2->count;
1572 }
1573 /* Recompute the edge probabilities from the new merged edge count.
1574 Use the sum of the new merged edge counts computed above instead
1575 of bb2's merged count, in case there are profile count insanities
1576 making the bb count inconsistent with the edge weights. */
1577 FOR_EACH_EDGE (e2, ei, bb2->succs)
1578 {
1579 e2->probability = e2->count.probability_in (out_sum);
1580 }
1581
1582 /* Move over any user labels from bb1 after the bb2 labels. */
1583 gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
1584 if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1585 {
1586 gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
1587 while (!gsi_end_p (gsi1)
1588 && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1589 {
1590 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1591 gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
1592 if (DECL_ARTIFICIAL (label))
1593 gsi_next (&gsi1);
1594 else
1595 gsi_move_before (&gsi1, &gsi2);
1596 }
1597 }
1598
1599 /* Clear range info from all stmts in BB2 -- this transformation
1600 could make them out of date. */
1601 reset_flow_sensitive_info_in_bb (bb2);
1602
1603 /* Do updates that use bb1, before deleting bb1. */
1604 release_last_vdef (bb1);
1605 same_succ_flush_bb (bb1);
1606
1607 delete_basic_block (bb1);
1608 }
1609
1610 /* Bbs for which update_debug_stmt need to be called. */
1611
1612 static bitmap update_bbs;
1613
1614 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1615 number of bbs removed. */
1616
1617 static int
1618 apply_clusters (void)
1619 {
1620 basic_block bb1, bb2;
1621 bb_cluster *c;
1622 unsigned int i, j;
1623 bitmap_iterator bj;
1624 int nr_bbs_removed = 0;
1625
1626 for (i = 0; i < all_clusters.length (); ++i)
1627 {
1628 c = all_clusters[i];
1629 if (c == NULL)
1630 continue;
1631
1632 bb2 = c->rep_bb;
1633 bitmap_set_bit (update_bbs, bb2->index);
1634
1635 bitmap_clear_bit (c->bbs, bb2->index);
1636 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1637 {
1638 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1639 bitmap_clear_bit (update_bbs, bb1->index);
1640
1641 replace_block_by (bb1, bb2);
1642 nr_bbs_removed++;
1643 }
1644 }
1645
1646 return nr_bbs_removed;
1647 }
1648
1649 /* Resets debug statement STMT if it has uses that are not dominated by their
1650 defs. */
1651
1652 static void
1653 update_debug_stmt (gimple *stmt)
1654 {
1655 use_operand_p use_p;
1656 ssa_op_iter oi;
1657 basic_block bbuse;
1658
1659 if (!gimple_debug_bind_p (stmt))
1660 return;
1661
1662 bbuse = gimple_bb (stmt);
1663 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1664 {
1665 tree name = USE_FROM_PTR (use_p);
1666 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1667 basic_block bbdef = gimple_bb (def_stmt);
1668 if (bbdef == NULL || bbuse == bbdef
1669 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1670 continue;
1671
1672 gimple_debug_bind_reset_value (stmt);
1673 update_stmt (stmt);
1674 break;
1675 }
1676 }
1677
1678 /* Resets all debug statements that have uses that are not
1679 dominated by their defs. */
1680
1681 static void
1682 update_debug_stmts (void)
1683 {
1684 basic_block bb;
1685 bitmap_iterator bi;
1686 unsigned int i;
1687
1688 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1689 {
1690 gimple *stmt;
1691 gimple_stmt_iterator gsi;
1692
1693 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1694 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1695 {
1696 stmt = gsi_stmt (gsi);
1697 if (!is_gimple_debug (stmt))
1698 continue;
1699 update_debug_stmt (stmt);
1700 }
1701 }
1702 }
1703
1704 /* Runs tail merge optimization. */
1705
1706 unsigned int
1707 tail_merge_optimize (unsigned int todo)
1708 {
1709 int nr_bbs_removed_total = 0;
1710 int nr_bbs_removed;
1711 bool loop_entered = false;
1712 int iteration_nr = 0;
1713 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1714
1715 if (!flag_tree_tail_merge
1716 || max_iterations == 0)
1717 return 0;
1718
1719 timevar_push (TV_TREE_TAIL_MERGE);
1720
1721 /* We enter from PRE which has critical edges split. Elimination
1722 does not process trivially dead code so cleanup the CFG if we
1723 are told so. And re-split critical edges then. */
1724 if (todo & TODO_cleanup_cfg)
1725 {
1726 cleanup_tree_cfg ();
1727 todo &= ~TODO_cleanup_cfg;
1728 split_critical_edges ();
1729 }
1730
1731 if (!dom_info_available_p (CDI_DOMINATORS))
1732 {
1733 /* PRE can leave us with unreachable blocks, remove them now. */
1734 delete_unreachable_blocks ();
1735 calculate_dominance_info (CDI_DOMINATORS);
1736 }
1737 init_worklist ();
1738
1739 while (!worklist.is_empty ())
1740 {
1741 if (!loop_entered)
1742 {
1743 loop_entered = true;
1744 alloc_cluster_vectors ();
1745 update_bbs = BITMAP_ALLOC (NULL);
1746 }
1747 else
1748 reset_cluster_vectors ();
1749
1750 iteration_nr++;
1751 if (dump_file && (dump_flags & TDF_DETAILS))
1752 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1753
1754 find_clusters ();
1755 gcc_assert (worklist.is_empty ());
1756 if (all_clusters.is_empty ())
1757 break;
1758
1759 nr_bbs_removed = apply_clusters ();
1760 nr_bbs_removed_total += nr_bbs_removed;
1761 if (nr_bbs_removed == 0)
1762 break;
1763
1764 free_dominance_info (CDI_DOMINATORS);
1765
1766 if (iteration_nr == max_iterations)
1767 break;
1768
1769 calculate_dominance_info (CDI_DOMINATORS);
1770 update_worklist ();
1771 }
1772
1773 if (dump_file && (dump_flags & TDF_DETAILS))
1774 fprintf (dump_file, "htab collision / search: %f\n",
1775 same_succ_htab->collisions ());
1776
1777 if (nr_bbs_removed_total > 0)
1778 {
1779 if (MAY_HAVE_DEBUG_STMTS)
1780 {
1781 calculate_dominance_info (CDI_DOMINATORS);
1782 update_debug_stmts ();
1783 }
1784
1785 if (dump_file && (dump_flags & TDF_DETAILS))
1786 {
1787 fprintf (dump_file, "Before TODOs.\n");
1788 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1789 }
1790
1791 mark_virtual_operands_for_renaming (cfun);
1792 }
1793
1794 delete_worklist ();
1795 if (loop_entered)
1796 {
1797 delete_cluster_vectors ();
1798 BITMAP_FREE (update_bbs);
1799 }
1800
1801 timevar_pop (TV_TREE_TAIL_MERGE);
1802
1803 return todo;
1804 }