]>
Commit | Line | Data |
---|---|---|
808bdd17 | 1 | /* Shrink-wrapping related optimizations. |
c562205f | 2 | Copyright (C) 1987-2014 Free Software Foundation, Inc. |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | /* This file handles shrink-wrapping related optimizations. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "rtl-error.h" | |
27 | #include "tree.h" | |
28 | #include "stor-layout.h" | |
29 | #include "varasm.h" | |
30 | #include "stringpool.h" | |
31 | #include "flags.h" | |
32 | #include "except.h" | |
a3020f2f | 33 | #include "hashtab.h" |
34 | #include "hash-set.h" | |
35 | #include "vec.h" | |
36 | #include "machmode.h" | |
37 | #include "hard-reg-set.h" | |
38 | #include "input.h" | |
c562205f | 39 | #include "function.h" |
40 | #include "expr.h" | |
41 | #include "optabs.h" | |
42 | #include "libfuncs.h" | |
43 | #include "regs.h" | |
c562205f | 44 | #include "insn-config.h" |
45 | #include "recog.h" | |
46 | #include "output.h" | |
c562205f | 47 | #include "tm_p.h" |
48 | #include "langhooks.h" | |
49 | #include "target.h" | |
50 | #include "common/common-target.h" | |
51 | #include "gimple-expr.h" | |
52 | #include "gimplify.h" | |
53 | #include "tree-pass.h" | |
54 | #include "predict.h" | |
55 | #include "df.h" | |
56 | #include "params.h" | |
57 | #include "bb-reorder.h" | |
58 | #include "shrink-wrap.h" | |
59483f68 | 59 | #include "regcprop.h" |
63b06162 | 60 | #include "rtl-iter.h" |
c562205f | 61 | |
62 | #ifdef HAVE_simple_return | |
63 | ||
64 | /* Return true if INSN requires the stack frame to be set up. | |
65 | PROLOGUE_USED contains the hard registers used in the function | |
66 | prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the | |
67 | prologue to set up for the function. */ | |
68 | bool | |
1ed22553 | 69 | requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, |
c562205f | 70 | HARD_REG_SET set_up_by_prologue) |
71 | { | |
be10bb5a | 72 | df_ref def, use; |
c562205f | 73 | HARD_REG_SET hardregs; |
74 | unsigned regno; | |
75 | ||
76 | if (CALL_P (insn)) | |
77 | return !SIBLING_CALL_P (insn); | |
78 | ||
79 | /* We need a frame to get the unique CFA expected by the unwinder. */ | |
80 | if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) | |
81 | return true; | |
82 | ||
83 | CLEAR_HARD_REG_SET (hardregs); | |
be10bb5a | 84 | FOR_EACH_INSN_DEF (def, insn) |
c562205f | 85 | { |
be10bb5a | 86 | rtx dreg = DF_REF_REG (def); |
c562205f | 87 | |
88 | if (!REG_P (dreg)) | |
89 | continue; | |
90 | ||
91 | add_to_hard_reg_set (&hardregs, GET_MODE (dreg), | |
92 | REGNO (dreg)); | |
93 | } | |
94 | if (hard_reg_set_intersect_p (hardregs, prologue_used)) | |
95 | return true; | |
96 | AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); | |
97 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) | |
98 | if (TEST_HARD_REG_BIT (hardregs, regno) | |
99 | && df_regs_ever_live_p (regno)) | |
100 | return true; | |
101 | ||
be10bb5a | 102 | FOR_EACH_INSN_USE (use, insn) |
c562205f | 103 | { |
be10bb5a | 104 | rtx reg = DF_REF_REG (use); |
c562205f | 105 | |
106 | if (!REG_P (reg)) | |
107 | continue; | |
108 | ||
109 | add_to_hard_reg_set (&hardregs, GET_MODE (reg), | |
110 | REGNO (reg)); | |
111 | } | |
112 | if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) | |
113 | return true; | |
114 | ||
115 | return false; | |
116 | } | |
117 | ||
808bdd17 | 118 | /* See whether there has a single live edge from BB, which dest uses |
119 | [REGNO, END_REGNO). Return the live edge if its dest bb has | |
120 | one or two predecessors. Otherwise return NULL. */ | |
c562205f | 121 | |
808bdd17 | 122 | static edge |
123 | live_edge_for_reg (basic_block bb, int regno, int end_regno) | |
c562205f | 124 | { |
125 | edge e, live_edge; | |
126 | edge_iterator ei; | |
127 | bitmap live; | |
128 | int i; | |
129 | ||
130 | live_edge = NULL; | |
131 | FOR_EACH_EDGE (e, ei, bb->succs) | |
132 | { | |
133 | live = df_get_live_in (e->dest); | |
134 | for (i = regno; i < end_regno; i++) | |
135 | if (REGNO_REG_SET_P (live, i)) | |
136 | { | |
137 | if (live_edge && live_edge != e) | |
138 | return NULL; | |
139 | live_edge = e; | |
140 | } | |
141 | } | |
142 | ||
143 | /* We can sometimes encounter dead code. Don't try to move it | |
144 | into the exit block. */ | |
145 | if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
146 | return NULL; | |
147 | ||
148 | /* Reject targets of abnormal edges. This is needed for correctness | |
149 | on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on | |
150 | exception edges even though it is generally treated as call-saved | |
151 | for the majority of the compilation. Moving across abnormal edges | |
152 | isn't going to be interesting for shrink-wrap usage anyway. */ | |
153 | if (live_edge->flags & EDGE_ABNORMAL) | |
154 | return NULL; | |
155 | ||
808bdd17 | 156 | /* When live_edge->dest->preds == 2, we can create a new block on |
157 | the edge to make it meet the requirement. */ | |
158 | if (EDGE_COUNT (live_edge->dest->preds) > 2) | |
c562205f | 159 | return NULL; |
160 | ||
808bdd17 | 161 | return live_edge; |
c562205f | 162 | } |
163 | ||
164 | /* Try to move INSN from BB to a successor. Return true on success. | |
165 | USES and DEFS are the set of registers that are used and defined | |
808bdd17 | 166 | after INSN in BB. SPLIT_P indicates whether a live edge from BB |
167 | is splitted or not. */ | |
c562205f | 168 | |
169 | static bool | |
1ed22553 | 170 | move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, |
c562205f | 171 | const HARD_REG_SET uses, |
808bdd17 | 172 | const HARD_REG_SET defs, |
173 | bool *split_p) | |
c562205f | 174 | { |
175 | rtx set, src, dest; | |
176 | bitmap live_out, live_in, bb_uses, bb_defs; | |
63b06162 | 177 | unsigned int i, dregno, end_dregno; |
178 | unsigned int sregno = FIRST_PSEUDO_REGISTER; | |
179 | unsigned int end_sregno = FIRST_PSEUDO_REGISTER; | |
c562205f | 180 | basic_block next_block; |
808bdd17 | 181 | edge live_edge; |
c562205f | 182 | |
0c2d398f | 183 | /* Look for a simple register assignment. We don't use single_set here |
184 | because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary | |
185 | destinations. */ | |
186 | if (!INSN_P (insn)) | |
187 | return false; | |
188 | set = PATTERN (insn); | |
189 | if (GET_CODE (set) != SET) | |
c562205f | 190 | return false; |
191 | src = SET_SRC (set); | |
192 | dest = SET_DEST (set); | |
63b06162 | 193 | |
0c2d398f | 194 | /* For the destination, we want only a register. Also disallow STACK |
195 | or FRAME related adjustments. They are likely part of the prologue, | |
196 | so keep them in the entry block. */ | |
197 | if (!REG_P (dest) | |
198 | || dest == stack_pointer_rtx | |
199 | || dest == frame_pointer_rtx | |
200 | || dest == hard_frame_pointer_rtx) | |
201 | return false; | |
202 | ||
203 | /* For the source, we want one of: | |
204 | (1) A (non-overlapping) register | |
205 | (2) A constant, | |
206 | (3) An expression involving no more than one register. | |
207 | ||
208 | That last point comes from the code following, which was originally | |
209 | written to handle only register move operations, and still only handles | |
210 | a single source register when checking for overlaps. Happily, the | |
211 | same checks can be applied to expressions like (plus reg const). */ | |
212 | ||
213 | if (CONSTANT_P (src)) | |
214 | ; | |
215 | else if (!REG_P (src)) | |
63b06162 | 216 | { |
63b06162 | 217 | rtx src_inner = NULL_RTX; |
218 | ||
c24eea6a | 219 | if (can_throw_internal (insn)) |
220 | return false; | |
221 | ||
63b06162 | 222 | subrtx_var_iterator::array_type array; |
223 | FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) | |
224 | { | |
225 | rtx x = *iter; | |
0c2d398f | 226 | switch (GET_RTX_CLASS (GET_CODE (x))) |
63b06162 | 227 | { |
0c2d398f | 228 | case RTX_CONST_OBJ: |
229 | case RTX_COMPARE: | |
230 | case RTX_COMM_COMPARE: | |
231 | case RTX_BIN_ARITH: | |
232 | case RTX_COMM_ARITH: | |
233 | case RTX_UNARY: | |
234 | case RTX_TERNARY: | |
235 | /* Constant or expression. Continue. */ | |
236 | break; | |
237 | ||
238 | case RTX_OBJ: | |
239 | case RTX_EXTRA: | |
240 | switch (GET_CODE (x)) | |
241 | { | |
242 | case UNSPEC: | |
243 | case SUBREG: | |
244 | case STRICT_LOW_PART: | |
245 | case PC: | |
246 | /* Ok. Continue. */ | |
247 | break; | |
248 | ||
249 | case REG: | |
250 | /* Fail if we see a second inner register. */ | |
251 | if (src_inner != NULL) | |
252 | return false; | |
253 | src_inner = x; | |
254 | break; | |
255 | ||
256 | default: | |
257 | return false; | |
258 | } | |
259 | break; | |
260 | ||
261 | default: | |
262 | return false; | |
63b06162 | 263 | } |
63b06162 | 264 | } |
265 | ||
0c2d398f | 266 | if (src_inner != NULL) |
63b06162 | 267 | src = src_inner; |
268 | } | |
269 | ||
c562205f | 270 | /* Make sure that the source register isn't defined later in BB. */ |
63b06162 | 271 | if (REG_P (src)) |
272 | { | |
273 | sregno = REGNO (src); | |
274 | end_sregno = END_REGNO (src); | |
275 | if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) | |
276 | return false; | |
277 | } | |
c562205f | 278 | |
279 | /* Make sure that the destination register isn't referenced later in BB. */ | |
280 | dregno = REGNO (dest); | |
281 | end_dregno = END_REGNO (dest); | |
282 | if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) | |
283 | || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) | |
284 | return false; | |
285 | ||
286 | /* See whether there is a successor block to which we could move INSN. */ | |
808bdd17 | 287 | live_edge = live_edge_for_reg (bb, dregno, end_dregno); |
288 | if (!live_edge) | |
c562205f | 289 | return false; |
290 | ||
808bdd17 | 291 | next_block = live_edge->dest; |
292 | /* Create a new basic block on the edge. */ | |
293 | if (EDGE_COUNT (next_block->preds) == 2) | |
294 | { | |
ceecb56c | 295 | /* split_edge for a block with only one successor is meaningless. */ |
296 | if (EDGE_COUNT (bb->succs) == 1) | |
297 | return false; | |
298 | ||
0cd43ac7 | 299 | /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ |
300 | if (!df_live) | |
301 | return false; | |
302 | ||
e1242353 | 303 | basic_block old_dest = live_edge->dest; |
808bdd17 | 304 | next_block = split_edge (live_edge); |
305 | ||
0cd43ac7 | 306 | /* We create a new basic block. Call df_grow_bb_info to make sure |
307 | all data structures are allocated. */ | |
308 | df_grow_bb_info (df_live); | |
e1242353 | 309 | |
310 | bitmap_and (df_get_live_in (next_block), df_get_live_out (bb), | |
311 | df_get_live_in (old_dest)); | |
808bdd17 | 312 | df_set_bb_dirty (next_block); |
313 | ||
314 | /* We should not split more than once for a function. */ | |
e1242353 | 315 | if (*split_p) |
316 | return false; | |
317 | ||
808bdd17 | 318 | *split_p = true; |
319 | } | |
320 | ||
c562205f | 321 | /* At this point we are committed to moving INSN, but let's try to |
322 | move it as far as we can. */ | |
323 | do | |
324 | { | |
325 | live_out = df_get_live_out (bb); | |
326 | live_in = df_get_live_in (next_block); | |
327 | bb = next_block; | |
328 | ||
329 | /* Check whether BB uses DEST or clobbers DEST. We need to add | |
330 | INSN to BB if so. Either way, DEST is no longer live on entry, | |
331 | except for any part that overlaps SRC (next loop). */ | |
332 | bb_uses = &DF_LR_BB_INFO (bb)->use; | |
333 | bb_defs = &DF_LR_BB_INFO (bb)->def; | |
334 | if (df_live) | |
335 | { | |
336 | for (i = dregno; i < end_dregno; i++) | |
337 | { | |
808bdd17 | 338 | if (*split_p |
339 | || REGNO_REG_SET_P (bb_uses, i) | |
340 | || REGNO_REG_SET_P (bb_defs, i) | |
c562205f | 341 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
342 | next_block = NULL; | |
343 | CLEAR_REGNO_REG_SET (live_out, i); | |
344 | CLEAR_REGNO_REG_SET (live_in, i); | |
345 | } | |
346 | ||
347 | /* Check whether BB clobbers SRC. We need to add INSN to BB if so. | |
348 | Either way, SRC is now live on entry. */ | |
349 | for (i = sregno; i < end_sregno; i++) | |
350 | { | |
808bdd17 | 351 | if (*split_p |
352 | || REGNO_REG_SET_P (bb_defs, i) | |
c562205f | 353 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
354 | next_block = NULL; | |
355 | SET_REGNO_REG_SET (live_out, i); | |
356 | SET_REGNO_REG_SET (live_in, i); | |
357 | } | |
358 | } | |
359 | else | |
360 | { | |
361 | /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and | |
362 | DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. | |
363 | at -O1, just give up searching NEXT_BLOCK. */ | |
364 | next_block = NULL; | |
365 | for (i = dregno; i < end_dregno; i++) | |
366 | { | |
367 | CLEAR_REGNO_REG_SET (live_out, i); | |
368 | CLEAR_REGNO_REG_SET (live_in, i); | |
369 | } | |
370 | ||
371 | for (i = sregno; i < end_sregno; i++) | |
372 | { | |
373 | SET_REGNO_REG_SET (live_out, i); | |
374 | SET_REGNO_REG_SET (live_in, i); | |
375 | } | |
376 | } | |
377 | ||
378 | /* If we don't need to add the move to BB, look for a single | |
379 | successor block. */ | |
380 | if (next_block) | |
808bdd17 | 381 | { |
382 | live_edge = live_edge_for_reg (next_block, dregno, end_dregno); | |
383 | if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) | |
384 | break; | |
385 | next_block = live_edge->dest; | |
386 | } | |
c562205f | 387 | } |
388 | while (next_block); | |
389 | ||
808bdd17 | 390 | /* For the new created basic block, there is no dataflow info at all. |
391 | So skip the following dataflow update and check. */ | |
392 | if (!(*split_p)) | |
c562205f | 393 | { |
808bdd17 | 394 | /* BB now defines DEST. It only uses the parts of DEST that overlap SRC |
395 | (next loop). */ | |
396 | for (i = dregno; i < end_dregno; i++) | |
397 | { | |
398 | CLEAR_REGNO_REG_SET (bb_uses, i); | |
399 | SET_REGNO_REG_SET (bb_defs, i); | |
400 | } | |
c562205f | 401 | |
808bdd17 | 402 | /* BB now uses SRC. */ |
403 | for (i = sregno; i < end_sregno; i++) | |
404 | SET_REGNO_REG_SET (bb_uses, i); | |
405 | } | |
c562205f | 406 | |
407 | emit_insn_after (PATTERN (insn), bb_note (bb)); | |
408 | delete_insn (insn); | |
409 | return true; | |
410 | } | |
411 | ||
412 | /* Look for register copies in the first block of the function, and move | |
413 | them down into successor blocks if the register is used only on one | |
414 | path. This exposes more opportunities for shrink-wrapping. These | |
415 | kinds of sets often occur when incoming argument registers are moved | |
416 | to call-saved registers because their values are live across one or | |
417 | more calls during the function. */ | |
418 | ||
419 | void | |
420 | prepare_shrink_wrap (basic_block entry_block) | |
421 | { | |
1ed22553 | 422 | rtx_insn *insn, *curr; |
423 | rtx x; | |
c562205f | 424 | HARD_REG_SET uses, defs; |
be10bb5a | 425 | df_ref def, use; |
808bdd17 | 426 | bool split_p = false; |
c562205f | 427 | |
59483f68 | 428 | if (JUMP_P (BB_END (entry_block))) |
429 | { | |
430 | /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries | |
431 | to sink the copies from parameter to callee saved register out of | |
432 | entry block. copyprop_hardreg_forward_bb_without_debug_insn is called | |
433 | to release some dependences. */ | |
434 | copyprop_hardreg_forward_bb_without_debug_insn (entry_block); | |
435 | } | |
436 | ||
c562205f | 437 | CLEAR_HARD_REG_SET (uses); |
438 | CLEAR_HARD_REG_SET (defs); | |
439 | FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) | |
440 | if (NONDEBUG_INSN_P (insn) | |
808bdd17 | 441 | && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, |
442 | &split_p)) | |
c562205f | 443 | { |
444 | /* Add all defined registers to DEFs. */ | |
be10bb5a | 445 | FOR_EACH_INSN_DEF (def, insn) |
c562205f | 446 | { |
be10bb5a | 447 | x = DF_REF_REG (def); |
c562205f | 448 | if (REG_P (x) && HARD_REGISTER_P (x)) |
449 | SET_HARD_REG_BIT (defs, REGNO (x)); | |
450 | } | |
451 | ||
452 | /* Add all used registers to USESs. */ | |
be10bb5a | 453 | FOR_EACH_INSN_USE (use, insn) |
c562205f | 454 | { |
be10bb5a | 455 | x = DF_REF_REG (use); |
c562205f | 456 | if (REG_P (x) && HARD_REGISTER_P (x)) |
457 | SET_HARD_REG_BIT (uses, REGNO (x)); | |
458 | } | |
459 | } | |
460 | } | |
461 | ||
462 | /* Create a copy of BB instructions and insert at BEFORE. Redirect | |
463 | preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */ | |
464 | void | |
1ed22553 | 465 | dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before, |
c562205f | 466 | bitmap_head *need_prologue) |
467 | { | |
468 | edge_iterator ei; | |
469 | edge e; | |
1ed22553 | 470 | rtx_insn *insn = BB_END (bb); |
c562205f | 471 | |
472 | /* We know BB has a single successor, so there is no need to copy a | |
473 | simple jump at the end of BB. */ | |
474 | if (simplejump_p (insn)) | |
475 | insn = PREV_INSN (insn); | |
476 | ||
477 | start_sequence (); | |
478 | duplicate_insn_chain (BB_HEAD (bb), insn); | |
479 | if (dump_file) | |
480 | { | |
481 | unsigned count = 0; | |
482 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
483 | if (active_insn_p (insn)) | |
484 | ++count; | |
485 | fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n", | |
486 | bb->index, copy_bb->index, count); | |
487 | } | |
488 | insn = get_insns (); | |
489 | end_sequence (); | |
490 | emit_insn_before (insn, before); | |
491 | ||
492 | /* Redirect all the paths that need no prologue into copy_bb. */ | |
493 | for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) | |
494 | if (!bitmap_bit_p (need_prologue, e->src->index)) | |
495 | { | |
496 | int freq = EDGE_FREQUENCY (e); | |
497 | copy_bb->count += e->count; | |
498 | copy_bb->frequency += EDGE_FREQUENCY (e); | |
499 | e->dest->count -= e->count; | |
500 | if (e->dest->count < 0) | |
501 | e->dest->count = 0; | |
502 | e->dest->frequency -= freq; | |
503 | if (e->dest->frequency < 0) | |
504 | e->dest->frequency = 0; | |
505 | redirect_edge_and_branch_force (e, copy_bb); | |
506 | continue; | |
507 | } | |
508 | else | |
509 | ei_next (&ei); | |
510 | } | |
511 | ||
512 | ||
513 | /* Try to perform a kind of shrink-wrapping, making sure the | |
514 | prologue/epilogue is emitted only around those parts of the | |
515 | function that require it. */ | |
516 | ||
517 | void | |
518 | try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge, | |
4cd001d5 | 519 | bitmap_head *bb_flags, rtx_insn *prologue_seq) |
c562205f | 520 | { |
521 | edge e; | |
522 | edge_iterator ei; | |
523 | bool nonempty_prologue = false; | |
524 | unsigned max_grow_size; | |
4cd001d5 | 525 | rtx_insn *seq; |
c562205f | 526 | |
527 | for (seq = prologue_seq; seq; seq = NEXT_INSN (seq)) | |
528 | if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END) | |
529 | { | |
530 | nonempty_prologue = true; | |
531 | break; | |
532 | } | |
533 | ||
534 | if (flag_shrink_wrap && HAVE_simple_return | |
535 | && (targetm.profile_before_prologue () || !crtl->profile) | |
536 | && nonempty_prologue && !crtl->calls_eh_return) | |
537 | { | |
538 | HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; | |
539 | struct hard_reg_set_container set_up_by_prologue; | |
4cd001d5 | 540 | rtx_insn *p_insn; |
c562205f | 541 | vec<basic_block> vec; |
542 | basic_block bb; | |
543 | bitmap_head bb_antic_flags; | |
544 | bitmap_head bb_on_list; | |
545 | bitmap_head bb_tail; | |
546 | ||
547 | if (dump_file) | |
548 | fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); | |
549 | ||
550 | /* Compute the registers set and used in the prologue. */ | |
551 | CLEAR_HARD_REG_SET (prologue_clobbered); | |
552 | CLEAR_HARD_REG_SET (prologue_used); | |
553 | for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) | |
554 | { | |
555 | HARD_REG_SET this_used; | |
556 | if (!NONDEBUG_INSN_P (p_insn)) | |
557 | continue; | |
558 | ||
559 | CLEAR_HARD_REG_SET (this_used); | |
560 | note_uses (&PATTERN (p_insn), record_hard_reg_uses, | |
561 | &this_used); | |
562 | AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); | |
563 | IOR_HARD_REG_SET (prologue_used, this_used); | |
564 | note_stores (PATTERN (p_insn), record_hard_reg_sets, | |
565 | &prologue_clobbered); | |
566 | } | |
567 | ||
568 | prepare_shrink_wrap ((*entry_edge)->dest); | |
569 | ||
570 | bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); | |
571 | bitmap_initialize (&bb_on_list, &bitmap_default_obstack); | |
572 | bitmap_initialize (&bb_tail, &bitmap_default_obstack); | |
573 | ||
574 | /* Find the set of basic blocks that require a stack frame, | |
575 | and blocks that are too big to be duplicated. */ | |
576 | ||
577 | vec.create (n_basic_blocks_for_fn (cfun)); | |
578 | ||
579 | CLEAR_HARD_REG_SET (set_up_by_prologue.set); | |
580 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
581 | STACK_POINTER_REGNUM); | |
582 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); | |
583 | if (frame_pointer_needed) | |
584 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
585 | HARD_FRAME_POINTER_REGNUM); | |
a9d8ab38 | 586 | if (pic_offset_table_rtx |
587 | && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) | |
c562205f | 588 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, |
589 | PIC_OFFSET_TABLE_REGNUM); | |
590 | if (crtl->drap_reg) | |
591 | add_to_hard_reg_set (&set_up_by_prologue.set, | |
592 | GET_MODE (crtl->drap_reg), | |
593 | REGNO (crtl->drap_reg)); | |
594 | if (targetm.set_up_by_prologue) | |
595 | targetm.set_up_by_prologue (&set_up_by_prologue); | |
596 | ||
597 | /* We don't use a different max size depending on | |
598 | optimize_bb_for_speed_p because increasing shrink-wrapping | |
599 | opportunities by duplicating tail blocks can actually result | |
600 | in an overall decrease in code size. */ | |
601 | max_grow_size = get_uncond_jump_length (); | |
602 | max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); | |
603 | ||
604 | FOR_EACH_BB_FN (bb, cfun) | |
605 | { | |
1ed22553 | 606 | rtx_insn *insn; |
c562205f | 607 | unsigned size = 0; |
608 | ||
609 | FOR_BB_INSNS (bb, insn) | |
610 | if (NONDEBUG_INSN_P (insn)) | |
611 | { | |
612 | if (requires_stack_frame_p (insn, prologue_used, | |
613 | set_up_by_prologue.set)) | |
614 | { | |
615 | if (bb == (*entry_edge)->dest) | |
616 | goto fail_shrinkwrap; | |
617 | bitmap_set_bit (bb_flags, bb->index); | |
618 | vec.quick_push (bb); | |
619 | break; | |
620 | } | |
621 | else if (size <= max_grow_size) | |
622 | { | |
623 | size += get_attr_min_length (insn); | |
624 | if (size > max_grow_size) | |
625 | bitmap_set_bit (&bb_on_list, bb->index); | |
626 | } | |
627 | } | |
628 | } | |
629 | ||
630 | /* Blocks that really need a prologue, or are too big for tails. */ | |
631 | bitmap_ior_into (&bb_on_list, bb_flags); | |
632 | ||
633 | /* For every basic block that needs a prologue, mark all blocks | |
634 | reachable from it, so as to ensure they are also seen as | |
635 | requiring a prologue. */ | |
636 | while (!vec.is_empty ()) | |
637 | { | |
638 | basic_block tmp_bb = vec.pop (); | |
639 | ||
640 | FOR_EACH_EDGE (e, ei, tmp_bb->succs) | |
641 | if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) | |
642 | && bitmap_set_bit (bb_flags, e->dest->index)) | |
643 | vec.quick_push (e->dest); | |
644 | } | |
645 | ||
646 | /* Find the set of basic blocks that need no prologue, have a | |
647 | single successor, can be duplicated, meet a max size | |
648 | requirement, and go to the exit via like blocks. */ | |
649 | vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun)); | |
650 | while (!vec.is_empty ()) | |
651 | { | |
652 | basic_block tmp_bb = vec.pop (); | |
653 | ||
654 | FOR_EACH_EDGE (e, ei, tmp_bb->preds) | |
655 | if (single_succ_p (e->src) | |
656 | && !bitmap_bit_p (&bb_on_list, e->src->index) | |
657 | && can_duplicate_block_p (e->src)) | |
658 | { | |
659 | edge pe; | |
660 | edge_iterator pei; | |
661 | ||
662 | /* If there is predecessor of e->src which doesn't | |
663 | need prologue and the edge is complex, | |
664 | we might not be able to redirect the branch | |
665 | to a copy of e->src. */ | |
666 | FOR_EACH_EDGE (pe, pei, e->src->preds) | |
667 | if ((pe->flags & EDGE_COMPLEX) != 0 | |
668 | && !bitmap_bit_p (bb_flags, pe->src->index)) | |
669 | break; | |
670 | if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index)) | |
671 | vec.quick_push (e->src); | |
672 | } | |
673 | } | |
674 | ||
675 | /* Now walk backwards from every block that is marked as needing | |
676 | a prologue to compute the bb_antic_flags bitmap. Exclude | |
677 | tail blocks; They can be duplicated to be used on paths not | |
678 | needing a prologue. */ | |
679 | bitmap_clear (&bb_on_list); | |
680 | bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail); | |
681 | FOR_EACH_BB_FN (bb, cfun) | |
682 | { | |
683 | if (!bitmap_bit_p (&bb_antic_flags, bb->index)) | |
684 | continue; | |
685 | FOR_EACH_EDGE (e, ei, bb->preds) | |
686 | if (!bitmap_bit_p (&bb_antic_flags, e->src->index) | |
687 | && bitmap_set_bit (&bb_on_list, e->src->index)) | |
688 | vec.quick_push (e->src); | |
689 | } | |
690 | while (!vec.is_empty ()) | |
691 | { | |
692 | basic_block tmp_bb = vec.pop (); | |
693 | bool all_set = true; | |
694 | ||
695 | bitmap_clear_bit (&bb_on_list, tmp_bb->index); | |
696 | FOR_EACH_EDGE (e, ei, tmp_bb->succs) | |
697 | if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) | |
698 | { | |
699 | all_set = false; | |
700 | break; | |
701 | } | |
702 | ||
703 | if (all_set) | |
704 | { | |
705 | bitmap_set_bit (&bb_antic_flags, tmp_bb->index); | |
706 | FOR_EACH_EDGE (e, ei, tmp_bb->preds) | |
707 | if (!bitmap_bit_p (&bb_antic_flags, e->src->index) | |
708 | && bitmap_set_bit (&bb_on_list, e->src->index)) | |
709 | vec.quick_push (e->src); | |
710 | } | |
711 | } | |
712 | /* Find exactly one edge that leads to a block in ANTIC from | |
713 | a block that isn't. */ | |
714 | if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index)) | |
715 | FOR_EACH_BB_FN (bb, cfun) | |
716 | { | |
717 | if (!bitmap_bit_p (&bb_antic_flags, bb->index)) | |
718 | continue; | |
719 | FOR_EACH_EDGE (e, ei, bb->preds) | |
720 | if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) | |
721 | { | |
722 | if (*entry_edge != orig_entry_edge) | |
723 | { | |
724 | *entry_edge = orig_entry_edge; | |
725 | if (dump_file) | |
726 | fprintf (dump_file, "More than one candidate edge.\n"); | |
727 | goto fail_shrinkwrap; | |
728 | } | |
729 | if (dump_file) | |
730 | fprintf (dump_file, "Found candidate edge for " | |
731 | "shrink-wrapping, %d->%d.\n", e->src->index, | |
732 | e->dest->index); | |
733 | *entry_edge = e; | |
734 | } | |
735 | } | |
736 | ||
737 | if (*entry_edge != orig_entry_edge) | |
738 | { | |
739 | /* Test whether the prologue is known to clobber any register | |
740 | (other than FP or SP) which are live on the edge. */ | |
741 | CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); | |
742 | if (frame_pointer_needed) | |
743 | CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); | |
744 | REG_SET_TO_HARD_REG_SET (live_on_edge, | |
745 | df_get_live_in ((*entry_edge)->dest)); | |
746 | if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) | |
747 | { | |
748 | *entry_edge = orig_entry_edge; | |
749 | if (dump_file) | |
750 | fprintf (dump_file, | |
751 | "Shrink-wrapping aborted due to clobber.\n"); | |
752 | } | |
753 | } | |
754 | if (*entry_edge != orig_entry_edge) | |
755 | { | |
756 | crtl->shrink_wrapped = true; | |
757 | if (dump_file) | |
758 | fprintf (dump_file, "Performing shrink-wrapping.\n"); | |
759 | ||
760 | /* Find tail blocks reachable from both blocks needing a | |
761 | prologue and blocks not needing a prologue. */ | |
762 | if (!bitmap_empty_p (&bb_tail)) | |
763 | FOR_EACH_BB_FN (bb, cfun) | |
764 | { | |
765 | bool some_pro, some_no_pro; | |
766 | if (!bitmap_bit_p (&bb_tail, bb->index)) | |
767 | continue; | |
768 | some_pro = some_no_pro = false; | |
769 | FOR_EACH_EDGE (e, ei, bb->preds) | |
770 | { | |
771 | if (bitmap_bit_p (bb_flags, e->src->index)) | |
772 | some_pro = true; | |
773 | else | |
774 | some_no_pro = true; | |
775 | } | |
776 | if (some_pro && some_no_pro) | |
777 | vec.quick_push (bb); | |
778 | else | |
779 | bitmap_clear_bit (&bb_tail, bb->index); | |
780 | } | |
781 | /* Find the head of each tail. */ | |
782 | while (!vec.is_empty ()) | |
783 | { | |
784 | basic_block tbb = vec.pop (); | |
785 | ||
786 | if (!bitmap_bit_p (&bb_tail, tbb->index)) | |
787 | continue; | |
788 | ||
789 | while (single_succ_p (tbb)) | |
790 | { | |
791 | tbb = single_succ (tbb); | |
792 | bitmap_clear_bit (&bb_tail, tbb->index); | |
793 | } | |
794 | } | |
795 | /* Now duplicate the tails. */ | |
796 | if (!bitmap_empty_p (&bb_tail)) | |
797 | FOR_EACH_BB_REVERSE_FN (bb, cfun) | |
798 | { | |
799 | basic_block copy_bb, tbb; | |
1ed22553 | 800 | rtx_insn *insert_point; |
c562205f | 801 | int eflags; |
802 | ||
803 | if (!bitmap_clear_bit (&bb_tail, bb->index)) | |
804 | continue; | |
805 | ||
806 | /* Create a copy of BB, instructions and all, for | |
807 | use on paths that don't need a prologue. | |
808 | Ideal placement of the copy is on a fall-thru edge | |
809 | or after a block that would jump to the copy. */ | |
810 | FOR_EACH_EDGE (e, ei, bb->preds) | |
811 | if (!bitmap_bit_p (bb_flags, e->src->index) | |
812 | && single_succ_p (e->src)) | |
813 | break; | |
814 | if (e) | |
815 | { | |
816 | /* Make sure we insert after any barriers. */ | |
1ed22553 | 817 | rtx_insn *end = get_last_bb_insn (e->src); |
c562205f | 818 | copy_bb = create_basic_block (NEXT_INSN (end), |
819 | NULL_RTX, e->src); | |
820 | BB_COPY_PARTITION (copy_bb, e->src); | |
821 | } | |
822 | else | |
823 | { | |
824 | /* Otherwise put the copy at the end of the function. */ | |
825 | copy_bb = create_basic_block (NULL_RTX, NULL_RTX, | |
826 | EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); | |
827 | BB_COPY_PARTITION (copy_bb, bb); | |
828 | } | |
829 | ||
830 | insert_point = emit_note_after (NOTE_INSN_DELETED, | |
831 | BB_END (copy_bb)); | |
832 | emit_barrier_after (BB_END (copy_bb)); | |
833 | ||
834 | tbb = bb; | |
835 | while (1) | |
836 | { | |
837 | dup_block_and_redirect (tbb, copy_bb, insert_point, | |
838 | bb_flags); | |
839 | tbb = single_succ (tbb); | |
840 | if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
841 | break; | |
842 | e = split_block (copy_bb, PREV_INSN (insert_point)); | |
843 | copy_bb = e->dest; | |
844 | } | |
845 | ||
846 | /* Quiet verify_flow_info by (ab)using EDGE_FAKE. | |
847 | We have yet to add a simple_return to the tails, | |
848 | as we'd like to first convert_jumps_to_returns in | |
849 | case the block is no longer used after that. */ | |
850 | eflags = EDGE_FAKE; | |
851 | if (CALL_P (PREV_INSN (insert_point)) | |
852 | && SIBLING_CALL_P (PREV_INSN (insert_point))) | |
853 | eflags = EDGE_SIBCALL | EDGE_ABNORMAL; | |
854 | make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), | |
855 | eflags); | |
856 | ||
857 | /* verify_flow_info doesn't like a note after a | |
858 | sibling call. */ | |
859 | delete_insn (insert_point); | |
860 | if (bitmap_empty_p (&bb_tail)) | |
861 | break; | |
862 | } | |
863 | } | |
864 | ||
865 | fail_shrinkwrap: | |
866 | bitmap_clear (&bb_tail); | |
867 | bitmap_clear (&bb_antic_flags); | |
868 | bitmap_clear (&bb_on_list); | |
869 | vec.release (); | |
870 | } | |
871 | } | |
872 | ||
873 | /* If we're allowed to generate a simple return instruction, then by | |
874 | definition we don't need a full epilogue. If the last basic | |
875 | block before the exit block does not contain active instructions, | |
876 | examine its predecessors and try to emit (conditional) return | |
877 | instructions. */ | |
878 | ||
879 | edge | |
880 | get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags, | |
881 | vec<edge> *unconverted_simple_returns, | |
5a7c3c87 | 882 | rtx_insn **returnjump) |
c562205f | 883 | { |
884 | if (optimize) | |
885 | { | |
886 | unsigned i, last; | |
887 | ||
888 | /* convert_jumps_to_returns may add to preds of the exit block | |
889 | (but won't remove). Stop at end of current preds. */ | |
890 | last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); | |
891 | for (i = 0; i < last; i++) | |
892 | { | |
893 | edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i); | |
894 | if (LABEL_P (BB_HEAD (e->src)) | |
895 | && !bitmap_bit_p (&bb_flags, e->src->index) | |
896 | && !active_insn_between (BB_HEAD (e->src), BB_END (e->src))) | |
897 | *unconverted_simple_returns | |
898 | = convert_jumps_to_returns (e->src, true, | |
899 | *unconverted_simple_returns); | |
900 | } | |
901 | } | |
902 | ||
903 | if (exit_fallthru_edge != NULL | |
904 | && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0 | |
905 | && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index)) | |
906 | { | |
907 | basic_block last_bb; | |
908 | ||
909 | last_bb = emit_return_for_exit (exit_fallthru_edge, true); | |
910 | *returnjump = BB_END (last_bb); | |
911 | exit_fallthru_edge = NULL; | |
912 | } | |
913 | return exit_fallthru_edge; | |
914 | } | |
915 | ||
916 | /* If there were branches to an empty LAST_BB which we tried to | |
917 | convert to conditional simple_returns, but couldn't for some | |
918 | reason, create a block to hold a simple_return insn and redirect | |
919 | those remaining edges. */ | |
920 | ||
921 | void | |
922 | convert_to_simple_return (edge entry_edge, edge orig_entry_edge, | |
4cd001d5 | 923 | bitmap_head bb_flags, rtx_insn *returnjump, |
c562205f | 924 | vec<edge> unconverted_simple_returns) |
925 | { | |
926 | edge e; | |
927 | edge_iterator ei; | |
928 | ||
929 | if (!unconverted_simple_returns.is_empty ()) | |
930 | { | |
931 | basic_block simple_return_block_hot = NULL; | |
932 | basic_block simple_return_block_cold = NULL; | |
933 | edge pending_edge_hot = NULL; | |
934 | edge pending_edge_cold = NULL; | |
935 | basic_block exit_pred; | |
936 | int i; | |
937 | ||
938 | gcc_assert (entry_edge != orig_entry_edge); | |
939 | ||
940 | /* See if we can reuse the last insn that was emitted for the | |
941 | epilogue. */ | |
942 | if (returnjump != NULL_RTX | |
943 | && JUMP_LABEL (returnjump) == simple_return_rtx) | |
944 | { | |
945 | e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump)); | |
946 | if (BB_PARTITION (e->src) == BB_HOT_PARTITION) | |
947 | simple_return_block_hot = e->dest; | |
948 | else | |
949 | simple_return_block_cold = e->dest; | |
950 | } | |
951 | ||
952 | /* Also check returns we might need to add to tail blocks. */ | |
953 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) | |
954 | if (EDGE_COUNT (e->src->preds) != 0 | |
955 | && (e->flags & EDGE_FAKE) != 0 | |
956 | && !bitmap_bit_p (&bb_flags, e->src->index)) | |
957 | { | |
958 | if (BB_PARTITION (e->src) == BB_HOT_PARTITION) | |
959 | pending_edge_hot = e; | |
960 | else | |
961 | pending_edge_cold = e; | |
962 | } | |
963 | ||
964 | /* Save a pointer to the exit's predecessor BB for use in | |
965 | inserting new BBs at the end of the function. Do this | |
966 | after the call to split_block above which may split | |
967 | the original exit pred. */ | |
968 | exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; | |
969 | ||
970 | FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e) | |
971 | { | |
972 | basic_block *pdest_bb; | |
973 | edge pending; | |
974 | ||
975 | if (BB_PARTITION (e->src) == BB_HOT_PARTITION) | |
976 | { | |
977 | pdest_bb = &simple_return_block_hot; | |
978 | pending = pending_edge_hot; | |
979 | } | |
980 | else | |
981 | { | |
982 | pdest_bb = &simple_return_block_cold; | |
983 | pending = pending_edge_cold; | |
984 | } | |
985 | ||
986 | if (*pdest_bb == NULL && pending != NULL) | |
987 | { | |
988 | emit_return_into_block (true, pending->src); | |
989 | pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); | |
990 | *pdest_bb = pending->src; | |
991 | } | |
992 | else if (*pdest_bb == NULL) | |
993 | { | |
994 | basic_block bb; | |
1ed22553 | 995 | rtx_insn *start; |
c562205f | 996 | |
997 | bb = create_basic_block (NULL, NULL, exit_pred); | |
998 | BB_COPY_PARTITION (bb, e->src); | |
999 | start = emit_jump_insn_after (gen_simple_return (), | |
1000 | BB_END (bb)); | |
1001 | JUMP_LABEL (start) = simple_return_rtx; | |
1002 | emit_barrier_after (start); | |
1003 | ||
1004 | *pdest_bb = bb; | |
1005 | make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); | |
1006 | } | |
1007 | redirect_edge_and_branch_force (e, *pdest_bb); | |
1008 | } | |
1009 | unconverted_simple_returns.release (); | |
1010 | } | |
1011 | ||
1012 | if (entry_edge != orig_entry_edge) | |
1013 | { | |
1014 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) | |
1015 | if (EDGE_COUNT (e->src->preds) != 0 | |
1016 | && (e->flags & EDGE_FAKE) != 0 | |
1017 | && !bitmap_bit_p (&bb_flags, e->src->index)) | |
1018 | { | |
1019 | emit_return_into_block (true, e->src); | |
1020 | e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); | |
1021 | } | |
1022 | } | |
1023 | } | |
1024 | ||
1025 | #endif |