]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/shrink-wrap.c
gimple-predict.h: New file.
[thirdparty/gcc.git] / gcc / shrink-wrap.c
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file handles shrink-wrapping related optimizations. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "cfghooks.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "df.h"
30 #include "rtl-error.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "varasm.h"
35 #include "stringpool.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "insn-config.h"
39 #include "expmed.h"
40 #include "dojump.h"
41 #include "explow.h"
42 #include "calls.h"
43 #include "emit-rtl.h"
44 #include "stmt.h"
45 #include "expr.h"
46 #include "insn-codes.h"
47 #include "optabs.h"
48 #include "libfuncs.h"
49 #include "regs.h"
50 #include "recog.h"
51 #include "output.h"
52 #include "tm_p.h"
53 #include "langhooks.h"
54 #include "target.h"
55 #include "common/common-target.h"
56 #include "gimple-expr.h"
57 #include "gimplify.h"
58 #include "tree-pass.h"
59 #include "cfgrtl.h"
60 #include "params.h"
61 #include "bb-reorder.h"
62 #include "shrink-wrap.h"
63 #include "regcprop.h"
64 #include "rtl-iter.h"
65
66
67 /* Return true if INSN requires the stack frame to be set up.
68 PROLOGUE_USED contains the hard registers used in the function
69 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
70 prologue to set up for the function. */
71 bool
72 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
73 HARD_REG_SET set_up_by_prologue)
74 {
75 df_ref def, use;
76 HARD_REG_SET hardregs;
77 unsigned regno;
78
79 if (CALL_P (insn))
80 return !SIBLING_CALL_P (insn);
81
82 /* We need a frame to get the unique CFA expected by the unwinder. */
83 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
84 return true;
85
86 CLEAR_HARD_REG_SET (hardregs);
87 FOR_EACH_INSN_DEF (def, insn)
88 {
89 rtx dreg = DF_REF_REG (def);
90
91 if (!REG_P (dreg))
92 continue;
93
94 add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
95 REGNO (dreg));
96 }
97 if (hard_reg_set_intersect_p (hardregs, prologue_used))
98 return true;
99 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
100 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
101 if (TEST_HARD_REG_BIT (hardregs, regno)
102 && df_regs_ever_live_p (regno))
103 return true;
104
105 FOR_EACH_INSN_USE (use, insn)
106 {
107 rtx reg = DF_REF_REG (use);
108
109 if (!REG_P (reg))
110 continue;
111
112 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
113 REGNO (reg));
114 }
115 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
116 return true;
117
118 return false;
119 }
120
121 /* See whether there has a single live edge from BB, which dest uses
122 [REGNO, END_REGNO). Return the live edge if its dest bb has
123 one or two predecessors. Otherwise return NULL. */
124
125 static edge
126 live_edge_for_reg (basic_block bb, int regno, int end_regno)
127 {
128 edge e, live_edge;
129 edge_iterator ei;
130 bitmap live;
131 int i;
132
133 live_edge = NULL;
134 FOR_EACH_EDGE (e, ei, bb->succs)
135 {
136 live = df_get_live_in (e->dest);
137 for (i = regno; i < end_regno; i++)
138 if (REGNO_REG_SET_P (live, i))
139 {
140 if (live_edge && live_edge != e)
141 return NULL;
142 live_edge = e;
143 }
144 }
145
146 /* We can sometimes encounter dead code. Don't try to move it
147 into the exit block. */
148 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
149 return NULL;
150
151 /* Reject targets of abnormal edges. This is needed for correctness
152 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
153 exception edges even though it is generally treated as call-saved
154 for the majority of the compilation. Moving across abnormal edges
155 isn't going to be interesting for shrink-wrap usage anyway. */
156 if (live_edge->flags & EDGE_ABNORMAL)
157 return NULL;
158
159 /* When live_edge->dest->preds == 2, we can create a new block on
160 the edge to make it meet the requirement. */
161 if (EDGE_COUNT (live_edge->dest->preds) > 2)
162 return NULL;
163
164 return live_edge;
165 }
166
167 /* Try to move INSN from BB to a successor. Return true on success.
168 USES and DEFS are the set of registers that are used and defined
169 after INSN in BB. SPLIT_P indicates whether a live edge from BB
170 is splitted or not. */
171
172 static bool
173 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
174 const HARD_REG_SET uses,
175 const HARD_REG_SET defs,
176 bool *split_p)
177 {
178 rtx set, src, dest;
179 bitmap live_out, live_in, bb_uses, bb_defs;
180 unsigned int i, dregno, end_dregno;
181 unsigned int sregno = FIRST_PSEUDO_REGISTER;
182 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
183 basic_block next_block;
184 edge live_edge;
185
186 /* Look for a simple register assignment. We don't use single_set here
187 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
188 destinations. */
189 if (!INSN_P (insn))
190 return false;
191 set = PATTERN (insn);
192 if (GET_CODE (set) != SET)
193 return false;
194 src = SET_SRC (set);
195 dest = SET_DEST (set);
196
197 /* For the destination, we want only a register. Also disallow STACK
198 or FRAME related adjustments. They are likely part of the prologue,
199 so keep them in the entry block. */
200 if (!REG_P (dest)
201 || dest == stack_pointer_rtx
202 || dest == frame_pointer_rtx
203 || dest == hard_frame_pointer_rtx)
204 return false;
205
206 /* For the source, we want one of:
207 (1) A (non-overlapping) register
208 (2) A constant,
209 (3) An expression involving no more than one register.
210
211 That last point comes from the code following, which was originally
212 written to handle only register move operations, and still only handles
213 a single source register when checking for overlaps. Happily, the
214 same checks can be applied to expressions like (plus reg const). */
215
216 if (CONSTANT_P (src))
217 ;
218 else if (!REG_P (src))
219 {
220 rtx src_inner = NULL_RTX;
221
222 if (can_throw_internal (insn))
223 return false;
224
225 subrtx_var_iterator::array_type array;
226 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
227 {
228 rtx x = *iter;
229 switch (GET_RTX_CLASS (GET_CODE (x)))
230 {
231 case RTX_CONST_OBJ:
232 case RTX_COMPARE:
233 case RTX_COMM_COMPARE:
234 case RTX_BIN_ARITH:
235 case RTX_COMM_ARITH:
236 case RTX_UNARY:
237 case RTX_TERNARY:
238 /* Constant or expression. Continue. */
239 break;
240
241 case RTX_OBJ:
242 case RTX_EXTRA:
243 switch (GET_CODE (x))
244 {
245 case UNSPEC:
246 case SUBREG:
247 case STRICT_LOW_PART:
248 case PC:
249 case LO_SUM:
250 /* Ok. Continue. */
251 break;
252
253 case REG:
254 /* Fail if we see a second inner register. */
255 if (src_inner != NULL)
256 return false;
257 src_inner = x;
258 break;
259
260 default:
261 return false;
262 }
263 break;
264
265 default:
266 return false;
267 }
268 }
269
270 if (src_inner != NULL)
271 src = src_inner;
272 }
273
274 /* Make sure that the source register isn't defined later in BB. */
275 if (REG_P (src))
276 {
277 sregno = REGNO (src);
278 end_sregno = END_REGNO (src);
279 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
280 return false;
281 }
282
283 /* Make sure that the destination register isn't referenced later in BB. */
284 dregno = REGNO (dest);
285 end_dregno = END_REGNO (dest);
286 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
287 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
288 return false;
289
290 /* See whether there is a successor block to which we could move INSN. */
291 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
292 if (!live_edge)
293 return false;
294
295 next_block = live_edge->dest;
296 /* Create a new basic block on the edge. */
297 if (EDGE_COUNT (next_block->preds) == 2)
298 {
299 /* split_edge for a block with only one successor is meaningless. */
300 if (EDGE_COUNT (bb->succs) == 1)
301 return false;
302
303 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
304 if (!df_live)
305 return false;
306
307 basic_block old_dest = live_edge->dest;
308 next_block = split_edge (live_edge);
309
310 /* We create a new basic block. Call df_grow_bb_info to make sure
311 all data structures are allocated. */
312 df_grow_bb_info (df_live);
313
314 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
315 df_get_live_in (old_dest));
316 df_set_bb_dirty (next_block);
317
318 /* We should not split more than once for a function. */
319 if (*split_p)
320 return false;
321
322 *split_p = true;
323 }
324
325 /* At this point we are committed to moving INSN, but let's try to
326 move it as far as we can. */
327 do
328 {
329 live_out = df_get_live_out (bb);
330 live_in = df_get_live_in (next_block);
331 bb = next_block;
332
333 /* Check whether BB uses DEST or clobbers DEST. We need to add
334 INSN to BB if so. Either way, DEST is no longer live on entry,
335 except for any part that overlaps SRC (next loop). */
336 bb_uses = &DF_LR_BB_INFO (bb)->use;
337 bb_defs = &DF_LR_BB_INFO (bb)->def;
338 if (df_live)
339 {
340 for (i = dregno; i < end_dregno; i++)
341 {
342 if (*split_p
343 || REGNO_REG_SET_P (bb_uses, i)
344 || REGNO_REG_SET_P (bb_defs, i)
345 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
346 next_block = NULL;
347 CLEAR_REGNO_REG_SET (live_out, i);
348 CLEAR_REGNO_REG_SET (live_in, i);
349 }
350
351 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
352 Either way, SRC is now live on entry. */
353 for (i = sregno; i < end_sregno; i++)
354 {
355 if (*split_p
356 || REGNO_REG_SET_P (bb_defs, i)
357 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358 next_block = NULL;
359 SET_REGNO_REG_SET (live_out, i);
360 SET_REGNO_REG_SET (live_in, i);
361 }
362 }
363 else
364 {
365 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
366 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
367 at -O1, just give up searching NEXT_BLOCK. */
368 next_block = NULL;
369 for (i = dregno; i < end_dregno; i++)
370 {
371 CLEAR_REGNO_REG_SET (live_out, i);
372 CLEAR_REGNO_REG_SET (live_in, i);
373 }
374
375 for (i = sregno; i < end_sregno; i++)
376 {
377 SET_REGNO_REG_SET (live_out, i);
378 SET_REGNO_REG_SET (live_in, i);
379 }
380 }
381
382 /* If we don't need to add the move to BB, look for a single
383 successor block. */
384 if (next_block)
385 {
386 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
387 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
388 break;
389 next_block = live_edge->dest;
390 }
391 }
392 while (next_block);
393
394 /* For the new created basic block, there is no dataflow info at all.
395 So skip the following dataflow update and check. */
396 if (!(*split_p))
397 {
398 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
399 (next loop). */
400 for (i = dregno; i < end_dregno; i++)
401 {
402 CLEAR_REGNO_REG_SET (bb_uses, i);
403 SET_REGNO_REG_SET (bb_defs, i);
404 }
405
406 /* BB now uses SRC. */
407 for (i = sregno; i < end_sregno; i++)
408 SET_REGNO_REG_SET (bb_uses, i);
409 }
410
411 emit_insn_after (PATTERN (insn), bb_note (bb));
412 delete_insn (insn);
413 return true;
414 }
415
416 /* Look for register copies in the first block of the function, and move
417 them down into successor blocks if the register is used only on one
418 path. This exposes more opportunities for shrink-wrapping. These
419 kinds of sets often occur when incoming argument registers are moved
420 to call-saved registers because their values are live across one or
421 more calls during the function. */
422
423 void
424 prepare_shrink_wrap (basic_block entry_block)
425 {
426 rtx_insn *insn, *curr;
427 rtx x;
428 HARD_REG_SET uses, defs;
429 df_ref def, use;
430 bool split_p = false;
431
432 if (JUMP_P (BB_END (entry_block)))
433 {
434 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
435 to sink the copies from parameter to callee saved register out of
436 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
437 to release some dependences. */
438 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
439 }
440
441 CLEAR_HARD_REG_SET (uses);
442 CLEAR_HARD_REG_SET (defs);
443 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
444 if (NONDEBUG_INSN_P (insn)
445 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
446 &split_p))
447 {
448 /* Add all defined registers to DEFs. */
449 FOR_EACH_INSN_DEF (def, insn)
450 {
451 x = DF_REF_REG (def);
452 if (REG_P (x) && HARD_REGISTER_P (x))
453 SET_HARD_REG_BIT (defs, REGNO (x));
454 }
455
456 /* Add all used registers to USESs. */
457 FOR_EACH_INSN_USE (use, insn)
458 {
459 x = DF_REF_REG (use);
460 if (REG_P (x) && HARD_REGISTER_P (x))
461 SET_HARD_REG_BIT (uses, REGNO (x));
462 }
463 }
464 }
465
466 /* Create a copy of BB instructions and insert at BEFORE. Redirect
467 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
468 void
469 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
470 bitmap_head *need_prologue)
471 {
472 edge_iterator ei;
473 edge e;
474 rtx_insn *insn = BB_END (bb);
475
476 /* We know BB has a single successor, so there is no need to copy a
477 simple jump at the end of BB. */
478 if (simplejump_p (insn))
479 insn = PREV_INSN (insn);
480
481 start_sequence ();
482 duplicate_insn_chain (BB_HEAD (bb), insn);
483 if (dump_file)
484 {
485 unsigned count = 0;
486 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
487 if (active_insn_p (insn))
488 ++count;
489 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
490 bb->index, copy_bb->index, count);
491 }
492 insn = get_insns ();
493 end_sequence ();
494 emit_insn_before (insn, before);
495
496 /* Redirect all the paths that need no prologue into copy_bb. */
497 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
498 if (!bitmap_bit_p (need_prologue, e->src->index))
499 {
500 int freq = EDGE_FREQUENCY (e);
501 copy_bb->count += e->count;
502 copy_bb->frequency += EDGE_FREQUENCY (e);
503 e->dest->count -= e->count;
504 if (e->dest->count < 0)
505 e->dest->count = 0;
506 e->dest->frequency -= freq;
507 if (e->dest->frequency < 0)
508 e->dest->frequency = 0;
509 redirect_edge_and_branch_force (e, copy_bb);
510 continue;
511 }
512 else
513 ei_next (&ei);
514 }
515
516
517 /* Try to perform a kind of shrink-wrapping, making sure the
518 prologue/epilogue is emitted only around those parts of the
519 function that require it. */
520
521 void
522 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
523 bitmap_head *bb_flags, rtx_insn *prologue_seq)
524 {
525 edge e;
526 edge_iterator ei;
527 bool nonempty_prologue = false;
528 unsigned max_grow_size;
529 rtx_insn *seq;
530
531 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
532 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
533 {
534 nonempty_prologue = true;
535 break;
536 }
537
538 if (SHRINK_WRAPPING_ENABLED
539 && (targetm.profile_before_prologue () || !crtl->profile)
540 && nonempty_prologue && !crtl->calls_eh_return)
541 {
542 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
543 struct hard_reg_set_container set_up_by_prologue;
544 rtx_insn *p_insn;
545 vec<basic_block> vec;
546 basic_block bb;
547 bitmap_head bb_antic_flags;
548 bitmap_head bb_on_list;
549 bitmap_head bb_tail;
550
551 if (dump_file)
552 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
553
554 /* Compute the registers set and used in the prologue. */
555 CLEAR_HARD_REG_SET (prologue_clobbered);
556 CLEAR_HARD_REG_SET (prologue_used);
557 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
558 {
559 HARD_REG_SET this_used;
560 if (!NONDEBUG_INSN_P (p_insn))
561 continue;
562
563 CLEAR_HARD_REG_SET (this_used);
564 note_uses (&PATTERN (p_insn), record_hard_reg_uses,
565 &this_used);
566 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
567 IOR_HARD_REG_SET (prologue_used, this_used);
568 note_stores (PATTERN (p_insn), record_hard_reg_sets,
569 &prologue_clobbered);
570 }
571
572 prepare_shrink_wrap ((*entry_edge)->dest);
573
574 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
575 bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
576 bitmap_initialize (&bb_tail, &bitmap_default_obstack);
577
578 /* Find the set of basic blocks that require a stack frame,
579 and blocks that are too big to be duplicated. */
580
581 vec.create (n_basic_blocks_for_fn (cfun));
582
583 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
584 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
585 STACK_POINTER_REGNUM);
586 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
587 if (frame_pointer_needed)
588 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
589 HARD_FRAME_POINTER_REGNUM);
590 if (pic_offset_table_rtx
591 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
592 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
593 PIC_OFFSET_TABLE_REGNUM);
594 if (crtl->drap_reg)
595 add_to_hard_reg_set (&set_up_by_prologue.set,
596 GET_MODE (crtl->drap_reg),
597 REGNO (crtl->drap_reg));
598 if (targetm.set_up_by_prologue)
599 targetm.set_up_by_prologue (&set_up_by_prologue);
600
601 /* We don't use a different max size depending on
602 optimize_bb_for_speed_p because increasing shrink-wrapping
603 opportunities by duplicating tail blocks can actually result
604 in an overall decrease in code size. */
605 max_grow_size = get_uncond_jump_length ();
606 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
607
608 FOR_EACH_BB_FN (bb, cfun)
609 {
610 rtx_insn *insn;
611 unsigned size = 0;
612
613 FOR_BB_INSNS (bb, insn)
614 if (NONDEBUG_INSN_P (insn))
615 {
616 if (requires_stack_frame_p (insn, prologue_used,
617 set_up_by_prologue.set))
618 {
619 if (bb == (*entry_edge)->dest)
620 goto fail_shrinkwrap;
621 bitmap_set_bit (bb_flags, bb->index);
622 vec.quick_push (bb);
623 break;
624 }
625 else if (size <= max_grow_size)
626 {
627 size += get_attr_min_length (insn);
628 if (size > max_grow_size)
629 bitmap_set_bit (&bb_on_list, bb->index);
630 }
631 }
632 }
633
634 /* Blocks that really need a prologue, or are too big for tails. */
635 bitmap_ior_into (&bb_on_list, bb_flags);
636
637 /* For every basic block that needs a prologue, mark all blocks
638 reachable from it, so as to ensure they are also seen as
639 requiring a prologue. */
640 while (!vec.is_empty ())
641 {
642 basic_block tmp_bb = vec.pop ();
643
644 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
645 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
646 && bitmap_set_bit (bb_flags, e->dest->index))
647 vec.quick_push (e->dest);
648 }
649
650 /* Find the set of basic blocks that need no prologue, have a
651 single successor, can be duplicated, meet a max size
652 requirement, and go to the exit via like blocks. */
653 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
654 while (!vec.is_empty ())
655 {
656 basic_block tmp_bb = vec.pop ();
657
658 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
659 if (single_succ_p (e->src)
660 && !bitmap_bit_p (&bb_on_list, e->src->index)
661 && can_duplicate_block_p (e->src))
662 {
663 edge pe;
664 edge_iterator pei;
665
666 /* If there is predecessor of e->src which doesn't
667 need prologue and the edge is complex,
668 we might not be able to redirect the branch
669 to a copy of e->src. */
670 FOR_EACH_EDGE (pe, pei, e->src->preds)
671 if ((pe->flags & EDGE_COMPLEX) != 0
672 && !bitmap_bit_p (bb_flags, pe->src->index))
673 break;
674 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
675 vec.quick_push (e->src);
676 }
677 }
678
679 /* Now walk backwards from every block that is marked as needing
680 a prologue to compute the bb_antic_flags bitmap. Exclude
681 tail blocks; They can be duplicated to be used on paths not
682 needing a prologue. */
683 bitmap_clear (&bb_on_list);
684 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
685 FOR_EACH_BB_FN (bb, cfun)
686 {
687 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
688 continue;
689 FOR_EACH_EDGE (e, ei, bb->preds)
690 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
691 && bitmap_set_bit (&bb_on_list, e->src->index))
692 vec.quick_push (e->src);
693 }
694 while (!vec.is_empty ())
695 {
696 basic_block tmp_bb = vec.pop ();
697 bool all_set = true;
698
699 bitmap_clear_bit (&bb_on_list, tmp_bb->index);
700 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
701 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
702 {
703 all_set = false;
704 break;
705 }
706
707 if (all_set)
708 {
709 bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
710 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
711 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
712 && bitmap_set_bit (&bb_on_list, e->src->index))
713 vec.quick_push (e->src);
714 }
715 }
716 /* Find exactly one edge that leads to a block in ANTIC from
717 a block that isn't. */
718 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
719 FOR_EACH_BB_FN (bb, cfun)
720 {
721 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
722 continue;
723 FOR_EACH_EDGE (e, ei, bb->preds)
724 if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
725 {
726 if (*entry_edge != orig_entry_edge)
727 {
728 *entry_edge = orig_entry_edge;
729 if (dump_file)
730 fprintf (dump_file, "More than one candidate edge.\n");
731 goto fail_shrinkwrap;
732 }
733 if (dump_file)
734 fprintf (dump_file, "Found candidate edge for "
735 "shrink-wrapping, %d->%d.\n", e->src->index,
736 e->dest->index);
737 *entry_edge = e;
738 }
739 }
740
741 if (*entry_edge != orig_entry_edge)
742 {
743 /* Test whether the prologue is known to clobber any register
744 (other than FP or SP) which are live on the edge. */
745 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
746 if (frame_pointer_needed)
747 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
748 REG_SET_TO_HARD_REG_SET (live_on_edge,
749 df_get_live_in ((*entry_edge)->dest));
750 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
751 {
752 *entry_edge = orig_entry_edge;
753 if (dump_file)
754 fprintf (dump_file,
755 "Shrink-wrapping aborted due to clobber.\n");
756 }
757 }
758 if (*entry_edge != orig_entry_edge)
759 {
760 crtl->shrink_wrapped = true;
761 if (dump_file)
762 fprintf (dump_file, "Performing shrink-wrapping.\n");
763
764 /* Find tail blocks reachable from both blocks needing a
765 prologue and blocks not needing a prologue. */
766 if (!bitmap_empty_p (&bb_tail))
767 FOR_EACH_BB_FN (bb, cfun)
768 {
769 bool some_pro, some_no_pro;
770 if (!bitmap_bit_p (&bb_tail, bb->index))
771 continue;
772 some_pro = some_no_pro = false;
773 FOR_EACH_EDGE (e, ei, bb->preds)
774 {
775 if (bitmap_bit_p (bb_flags, e->src->index))
776 some_pro = true;
777 else
778 some_no_pro = true;
779 }
780 if (some_pro && some_no_pro)
781 vec.quick_push (bb);
782 else
783 bitmap_clear_bit (&bb_tail, bb->index);
784 }
785 /* Find the head of each tail. */
786 while (!vec.is_empty ())
787 {
788 basic_block tbb = vec.pop ();
789
790 if (!bitmap_bit_p (&bb_tail, tbb->index))
791 continue;
792
793 while (single_succ_p (tbb))
794 {
795 tbb = single_succ (tbb);
796 bitmap_clear_bit (&bb_tail, tbb->index);
797 }
798 }
799 /* Now duplicate the tails. */
800 if (!bitmap_empty_p (&bb_tail))
801 FOR_EACH_BB_REVERSE_FN (bb, cfun)
802 {
803 basic_block copy_bb, tbb;
804 int eflags;
805
806 if (!bitmap_clear_bit (&bb_tail, bb->index))
807 continue;
808
809 /* Create a copy of BB, instructions and all, for
810 use on paths that don't need a prologue.
811 Ideal placement of the copy is on a fall-thru edge
812 or after a block that would jump to the copy. */
813 FOR_EACH_EDGE (e, ei, bb->preds)
814 if (!bitmap_bit_p (bb_flags, e->src->index)
815 && single_succ_p (e->src))
816 break;
817 if (e)
818 {
819 /* Make sure we insert after any barriers. */
820 rtx_insn *end = get_last_bb_insn (e->src);
821 copy_bb = create_basic_block (NEXT_INSN (end),
822 NULL_RTX, e->src);
823 BB_COPY_PARTITION (copy_bb, e->src);
824 }
825 else
826 {
827 /* Otherwise put the copy at the end of the function. */
828 copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
829 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
830 BB_COPY_PARTITION (copy_bb, bb);
831 }
832
833 rtx_note *insert_point = emit_note_after (NOTE_INSN_DELETED,
834 BB_END (copy_bb));
835 emit_barrier_after (BB_END (copy_bb));
836
837 tbb = bb;
838 while (1)
839 {
840 dup_block_and_redirect (tbb, copy_bb, insert_point,
841 bb_flags);
842 tbb = single_succ (tbb);
843 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
844 break;
845 e = split_block (copy_bb, PREV_INSN (insert_point));
846 copy_bb = e->dest;
847 }
848
849 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
850 We have yet to add a simple_return to the tails,
851 as we'd like to first convert_jumps_to_returns in
852 case the block is no longer used after that. */
853 eflags = EDGE_FAKE;
854 if (CALL_P (PREV_INSN (insert_point))
855 && SIBLING_CALL_P (PREV_INSN (insert_point)))
856 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
857 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
858 eflags);
859
860 /* verify_flow_info doesn't like a note after a
861 sibling call. */
862 delete_insn (insert_point);
863 if (bitmap_empty_p (&bb_tail))
864 break;
865 }
866 }
867
868 fail_shrinkwrap:
869 bitmap_clear (&bb_tail);
870 bitmap_clear (&bb_antic_flags);
871 bitmap_clear (&bb_on_list);
872 vec.release ();
873 }
874 }
875
876 /* If we're allowed to generate a simple return instruction, then by
877 definition we don't need a full epilogue. If the last basic
878 block before the exit block does not contain active instructions,
879 examine its predecessors and try to emit (conditional) return
880 instructions. */
881
882 edge
883 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
884 vec<edge> *unconverted_simple_returns,
885 rtx_insn **returnjump)
886 {
887 if (optimize)
888 {
889 unsigned i, last;
890
891 /* convert_jumps_to_returns may add to preds of the exit block
892 (but won't remove). Stop at end of current preds. */
893 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
894 for (i = 0; i < last; i++)
895 {
896 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
897 if (LABEL_P (BB_HEAD (e->src))
898 && !bitmap_bit_p (&bb_flags, e->src->index)
899 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
900 *unconverted_simple_returns
901 = convert_jumps_to_returns (e->src, true,
902 *unconverted_simple_returns);
903 }
904 }
905
906 if (exit_fallthru_edge != NULL
907 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
908 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
909 {
910 basic_block last_bb;
911
912 last_bb = emit_return_for_exit (exit_fallthru_edge, true);
913 *returnjump = BB_END (last_bb);
914 exit_fallthru_edge = NULL;
915 }
916 return exit_fallthru_edge;
917 }
918
919 /* If there were branches to an empty LAST_BB which we tried to
920 convert to conditional simple_returns, but couldn't for some
921 reason, create a block to hold a simple_return insn and redirect
922 those remaining edges. */
923
924 void
925 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
926 bitmap_head bb_flags, rtx_insn *returnjump,
927 vec<edge> unconverted_simple_returns)
928 {
929 edge e;
930 edge_iterator ei;
931
932 if (!unconverted_simple_returns.is_empty ())
933 {
934 basic_block simple_return_block_hot = NULL;
935 basic_block simple_return_block_cold = NULL;
936 edge pending_edge_hot = NULL;
937 edge pending_edge_cold = NULL;
938 basic_block exit_pred;
939 int i;
940
941 gcc_assert (entry_edge != orig_entry_edge);
942
943 /* See if we can reuse the last insn that was emitted for the
944 epilogue. */
945 if (returnjump != NULL_RTX
946 && JUMP_LABEL (returnjump) == simple_return_rtx)
947 {
948 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
949 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
950 simple_return_block_hot = e->dest;
951 else
952 simple_return_block_cold = e->dest;
953 }
954
955 /* Also check returns we might need to add to tail blocks. */
956 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
957 if (EDGE_COUNT (e->src->preds) != 0
958 && (e->flags & EDGE_FAKE) != 0
959 && !bitmap_bit_p (&bb_flags, e->src->index))
960 {
961 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
962 pending_edge_hot = e;
963 else
964 pending_edge_cold = e;
965 }
966
967 /* Save a pointer to the exit's predecessor BB for use in
968 inserting new BBs at the end of the function. Do this
969 after the call to split_block above which may split
970 the original exit pred. */
971 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
972
973 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
974 {
975 basic_block *pdest_bb;
976 edge pending;
977
978 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
979 {
980 pdest_bb = &simple_return_block_hot;
981 pending = pending_edge_hot;
982 }
983 else
984 {
985 pdest_bb = &simple_return_block_cold;
986 pending = pending_edge_cold;
987 }
988
989 if (*pdest_bb == NULL && pending != NULL)
990 {
991 emit_return_into_block (true, pending->src);
992 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
993 *pdest_bb = pending->src;
994 }
995 else if (*pdest_bb == NULL)
996 {
997 basic_block bb;
998
999 bb = create_basic_block (NULL, NULL, exit_pred);
1000 BB_COPY_PARTITION (bb, e->src);
1001 rtx_insn *ret = targetm.gen_simple_return ();
1002 rtx_jump_insn *start = emit_jump_insn_after (ret, BB_END (bb));
1003 JUMP_LABEL (start) = simple_return_rtx;
1004 emit_barrier_after (start);
1005
1006 *pdest_bb = bb;
1007 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1008 }
1009 redirect_edge_and_branch_force (e, *pdest_bb);
1010 }
1011 unconverted_simple_returns.release ();
1012 }
1013
1014 if (entry_edge != orig_entry_edge)
1015 {
1016 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1017 if (EDGE_COUNT (e->src->preds) != 0
1018 && (e->flags & EDGE_FAKE) != 0
1019 && !bitmap_bit_p (&bb_flags, e->src->index))
1020 {
1021 emit_return_into_block (true, e->src);
1022 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1023 }
1024 }
1025 }