]>
Commit | Line | Data |
---|---|---|
e974b93b | 1 | /* Shrink-wrapping related optimizations. |
8d9254fc | 2 | Copyright (C) 1987-2020 Free Software Foundation, Inc. |
f30e25a3 ZC |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | /* This file handles shrink-wrapping related optimizations. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
c7131fb2 | 25 | #include "backend.h" |
957060b5 | 26 | #include "target.h" |
c7131fb2 | 27 | #include "rtl.h" |
957060b5 | 28 | #include "tree.h" |
957060b5 | 29 | #include "cfghooks.h" |
c7131fb2 | 30 | #include "df.h" |
4d0cdd0c | 31 | #include "memmodel.h" |
957060b5 | 32 | #include "tm_p.h" |
957060b5 | 33 | #include "regs.h" |
c997869f | 34 | #include "insn-config.h" |
957060b5 | 35 | #include "emit-rtl.h" |
f30e25a3 | 36 | #include "output.h" |
f30e25a3 | 37 | #include "tree-pass.h" |
60393bbc | 38 | #include "cfgrtl.h" |
c997869f | 39 | #include "cfgbuild.h" |
f30e25a3 ZC |
40 | #include "bb-reorder.h" |
41 | #include "shrink-wrap.h" | |
a2e6c10c | 42 | #include "regcprop.h" |
2b63a3ac | 43 | #include "rtl-iter.h" |
015337d3 | 44 | #include "valtrack.h" |
b21a62b6 | 45 | #include "function-abi.h" |
f30e25a3 ZC |
46 | |
47 | /* Return true if INSN requires the stack frame to be set up. | |
48 | PROLOGUE_USED contains the hard registers used in the function | |
49 | prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the | |
50 | prologue to set up for the function. */ | |
2eba0ed5 | 51 | bool |
939d7575 | 52 | requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, |
f30e25a3 ZC |
53 | HARD_REG_SET set_up_by_prologue) |
54 | { | |
bfac633a | 55 | df_ref def, use; |
f30e25a3 ZC |
56 | HARD_REG_SET hardregs; |
57 | unsigned regno; | |
58 | ||
59 | if (CALL_P (insn)) | |
60 | return !SIBLING_CALL_P (insn); | |
61 | ||
62 | /* We need a frame to get the unique CFA expected by the unwinder. */ | |
63 | if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) | |
64 | return true; | |
65 | ||
66 | CLEAR_HARD_REG_SET (hardregs); | |
bfac633a | 67 | FOR_EACH_INSN_DEF (def, insn) |
f30e25a3 | 68 | { |
bfac633a | 69 | rtx dreg = DF_REF_REG (def); |
f30e25a3 ZC |
70 | |
71 | if (!REG_P (dreg)) | |
72 | continue; | |
73 | ||
23997c53 | 74 | add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg)); |
f30e25a3 ZC |
75 | } |
76 | if (hard_reg_set_intersect_p (hardregs, prologue_used)) | |
77 | return true; | |
b21a62b6 | 78 | hardregs &= ~crtl->abi->full_reg_clobbers (); |
f30e25a3 ZC |
79 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
80 | if (TEST_HARD_REG_BIT (hardregs, regno) | |
81 | && df_regs_ever_live_p (regno)) | |
82 | return true; | |
83 | ||
bfac633a | 84 | FOR_EACH_INSN_USE (use, insn) |
f30e25a3 | 85 | { |
bfac633a | 86 | rtx reg = DF_REF_REG (use); |
f30e25a3 ZC |
87 | |
88 | if (!REG_P (reg)) | |
89 | continue; | |
90 | ||
91 | add_to_hard_reg_set (&hardregs, GET_MODE (reg), | |
92 | REGNO (reg)); | |
93 | } | |
94 | if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) | |
95 | return true; | |
96 | ||
97 | return false; | |
98 | } | |
99 | ||
e974b93b ZC |
100 | /* See whether there has a single live edge from BB, which dest uses |
101 | [REGNO, END_REGNO). Return the live edge if its dest bb has | |
102 | one or two predecessors. Otherwise return NULL. */ | |
f30e25a3 | 103 | |
e974b93b ZC |
104 | static edge |
105 | live_edge_for_reg (basic_block bb, int regno, int end_regno) | |
f30e25a3 ZC |
106 | { |
107 | edge e, live_edge; | |
108 | edge_iterator ei; | |
109 | bitmap live; | |
110 | int i; | |
111 | ||
112 | live_edge = NULL; | |
113 | FOR_EACH_EDGE (e, ei, bb->succs) | |
114 | { | |
115 | live = df_get_live_in (e->dest); | |
116 | for (i = regno; i < end_regno; i++) | |
117 | if (REGNO_REG_SET_P (live, i)) | |
118 | { | |
119 | if (live_edge && live_edge != e) | |
120 | return NULL; | |
121 | live_edge = e; | |
122 | } | |
123 | } | |
124 | ||
125 | /* We can sometimes encounter dead code. Don't try to move it | |
126 | into the exit block. */ | |
127 | if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
128 | return NULL; | |
129 | ||
130 | /* Reject targets of abnormal edges. This is needed for correctness | |
131 | on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on | |
132 | exception edges even though it is generally treated as call-saved | |
133 | for the majority of the compilation. Moving across abnormal edges | |
134 | isn't going to be interesting for shrink-wrap usage anyway. */ | |
135 | if (live_edge->flags & EDGE_ABNORMAL) | |
136 | return NULL; | |
137 | ||
e974b93b ZC |
138 | /* When live_edge->dest->preds == 2, we can create a new block on |
139 | the edge to make it meet the requirement. */ | |
140 | if (EDGE_COUNT (live_edge->dest->preds) > 2) | |
f30e25a3 ZC |
141 | return NULL; |
142 | ||
e974b93b | 143 | return live_edge; |
f30e25a3 ZC |
144 | } |
145 | ||
146 | /* Try to move INSN from BB to a successor. Return true on success. | |
147 | USES and DEFS are the set of registers that are used and defined | |
e974b93b ZC |
148 | after INSN in BB. SPLIT_P indicates whether a live edge from BB |
149 | is splitted or not. */ | |
f30e25a3 ZC |
150 | |
151 | static bool | |
939d7575 | 152 | move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, |
504279ae RS |
153 | const_hard_reg_set uses, |
154 | const_hard_reg_set defs, | |
015337d3 JJ |
155 | bool *split_p, |
156 | struct dead_debug_local *debug) | |
f30e25a3 ZC |
157 | { |
158 | rtx set, src, dest; | |
b6a7a294 | 159 | bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL; |
2b63a3ac JW |
160 | unsigned int i, dregno, end_dregno; |
161 | unsigned int sregno = FIRST_PSEUDO_REGISTER; | |
162 | unsigned int end_sregno = FIRST_PSEUDO_REGISTER; | |
f30e25a3 | 163 | basic_block next_block; |
e974b93b | 164 | edge live_edge; |
015337d3 JJ |
165 | rtx_insn *dinsn; |
166 | df_ref def; | |
f30e25a3 | 167 | |
069d7fc5 RH |
168 | /* Look for a simple register assignment. We don't use single_set here |
169 | because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary | |
170 | destinations. */ | |
171 | if (!INSN_P (insn)) | |
172 | return false; | |
173 | set = PATTERN (insn); | |
174 | if (GET_CODE (set) != SET) | |
f30e25a3 ZC |
175 | return false; |
176 | src = SET_SRC (set); | |
177 | dest = SET_DEST (set); | |
2b63a3ac | 178 | |
069d7fc5 RH |
179 | /* For the destination, we want only a register. Also disallow STACK |
180 | or FRAME related adjustments. They are likely part of the prologue, | |
181 | so keep them in the entry block. */ | |
182 | if (!REG_P (dest) | |
183 | || dest == stack_pointer_rtx | |
184 | || dest == frame_pointer_rtx | |
185 | || dest == hard_frame_pointer_rtx) | |
186 | return false; | |
187 | ||
188 | /* For the source, we want one of: | |
189 | (1) A (non-overlapping) register | |
190 | (2) A constant, | |
191 | (3) An expression involving no more than one register. | |
192 | ||
193 | That last point comes from the code following, which was originally | |
194 | written to handle only register move operations, and still only handles | |
195 | a single source register when checking for overlaps. Happily, the | |
196 | same checks can be applied to expressions like (plus reg const). */ | |
197 | ||
198 | if (CONSTANT_P (src)) | |
199 | ; | |
200 | else if (!REG_P (src)) | |
2b63a3ac | 201 | { |
2b63a3ac JW |
202 | rtx src_inner = NULL_RTX; |
203 | ||
2cfc4ade JW |
204 | if (can_throw_internal (insn)) |
205 | return false; | |
206 | ||
2b63a3ac JW |
207 | subrtx_var_iterator::array_type array; |
208 | FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) | |
209 | { | |
210 | rtx x = *iter; | |
069d7fc5 | 211 | switch (GET_RTX_CLASS (GET_CODE (x))) |
2b63a3ac | 212 | { |
069d7fc5 RH |
213 | case RTX_CONST_OBJ: |
214 | case RTX_COMPARE: | |
215 | case RTX_COMM_COMPARE: | |
216 | case RTX_BIN_ARITH: | |
217 | case RTX_COMM_ARITH: | |
218 | case RTX_UNARY: | |
219 | case RTX_TERNARY: | |
220 | /* Constant or expression. Continue. */ | |
221 | break; | |
222 | ||
223 | case RTX_OBJ: | |
224 | case RTX_EXTRA: | |
225 | switch (GET_CODE (x)) | |
226 | { | |
227 | case UNSPEC: | |
228 | case SUBREG: | |
229 | case STRICT_LOW_PART: | |
230 | case PC: | |
3f2012e4 | 231 | case LO_SUM: |
069d7fc5 RH |
232 | /* Ok. Continue. */ |
233 | break; | |
234 | ||
235 | case REG: | |
236 | /* Fail if we see a second inner register. */ | |
237 | if (src_inner != NULL) | |
238 | return false; | |
239 | src_inner = x; | |
240 | break; | |
241 | ||
242 | default: | |
243 | return false; | |
244 | } | |
245 | break; | |
246 | ||
247 | default: | |
248 | return false; | |
2b63a3ac | 249 | } |
2b63a3ac JW |
250 | } |
251 | ||
069d7fc5 | 252 | if (src_inner != NULL) |
2b63a3ac JW |
253 | src = src_inner; |
254 | } | |
255 | ||
f30e25a3 | 256 | /* Make sure that the source register isn't defined later in BB. */ |
2b63a3ac JW |
257 | if (REG_P (src)) |
258 | { | |
259 | sregno = REGNO (src); | |
260 | end_sregno = END_REGNO (src); | |
261 | if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) | |
262 | return false; | |
263 | } | |
f30e25a3 ZC |
264 | |
265 | /* Make sure that the destination register isn't referenced later in BB. */ | |
266 | dregno = REGNO (dest); | |
267 | end_dregno = END_REGNO (dest); | |
268 | if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) | |
269 | || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) | |
270 | return false; | |
271 | ||
272 | /* See whether there is a successor block to which we could move INSN. */ | |
e974b93b ZC |
273 | live_edge = live_edge_for_reg (bb, dregno, end_dregno); |
274 | if (!live_edge) | |
f30e25a3 ZC |
275 | return false; |
276 | ||
e974b93b ZC |
277 | next_block = live_edge->dest; |
278 | /* Create a new basic block on the edge. */ | |
279 | if (EDGE_COUNT (next_block->preds) == 2) | |
280 | { | |
88f32f0f ZC |
281 | /* split_edge for a block with only one successor is meaningless. */ |
282 | if (EDGE_COUNT (bb->succs) == 1) | |
283 | return false; | |
284 | ||
d29d688a ZC |
285 | /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ |
286 | if (!df_live) | |
287 | return false; | |
288 | ||
d0d9aad7 | 289 | basic_block old_dest = live_edge->dest; |
e974b93b ZC |
290 | next_block = split_edge (live_edge); |
291 | ||
d29d688a ZC |
292 | /* We create a new basic block. Call df_grow_bb_info to make sure |
293 | all data structures are allocated. */ | |
294 | df_grow_bb_info (df_live); | |
d0d9aad7 JW |
295 | |
296 | bitmap_and (df_get_live_in (next_block), df_get_live_out (bb), | |
297 | df_get_live_in (old_dest)); | |
e974b93b ZC |
298 | df_set_bb_dirty (next_block); |
299 | ||
300 | /* We should not split more than once for a function. */ | |
d0d9aad7 JW |
301 | if (*split_p) |
302 | return false; | |
303 | ||
e974b93b ZC |
304 | *split_p = true; |
305 | } | |
306 | ||
f30e25a3 ZC |
307 | /* At this point we are committed to moving INSN, but let's try to |
308 | move it as far as we can. */ | |
309 | do | |
310 | { | |
36f52e8f | 311 | if (MAY_HAVE_DEBUG_BIND_INSNS) |
015337d3 JJ |
312 | { |
313 | FOR_BB_INSNS_REVERSE (bb, dinsn) | |
36f52e8f | 314 | if (DEBUG_BIND_INSN_P (dinsn)) |
015337d3 JJ |
315 | { |
316 | df_ref use; | |
317 | FOR_EACH_INSN_USE (use, dinsn) | |
318 | if (refers_to_regno_p (dregno, end_dregno, | |
319 | DF_REF_REG (use), (rtx *) NULL)) | |
320 | dead_debug_add (debug, use, DF_REF_REGNO (use)); | |
321 | } | |
322 | else if (dinsn == insn) | |
323 | break; | |
324 | } | |
f30e25a3 ZC |
325 | live_out = df_get_live_out (bb); |
326 | live_in = df_get_live_in (next_block); | |
327 | bb = next_block; | |
328 | ||
329 | /* Check whether BB uses DEST or clobbers DEST. We need to add | |
330 | INSN to BB if so. Either way, DEST is no longer live on entry, | |
331 | except for any part that overlaps SRC (next loop). */ | |
b6a7a294 JJ |
332 | if (!*split_p) |
333 | { | |
334 | bb_uses = &DF_LR_BB_INFO (bb)->use; | |
335 | bb_defs = &DF_LR_BB_INFO (bb)->def; | |
336 | } | |
f30e25a3 ZC |
337 | if (df_live) |
338 | { | |
339 | for (i = dregno; i < end_dregno; i++) | |
340 | { | |
e974b93b ZC |
341 | if (*split_p |
342 | || REGNO_REG_SET_P (bb_uses, i) | |
343 | || REGNO_REG_SET_P (bb_defs, i) | |
f30e25a3 ZC |
344 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
345 | next_block = NULL; | |
346 | CLEAR_REGNO_REG_SET (live_out, i); | |
347 | CLEAR_REGNO_REG_SET (live_in, i); | |
348 | } | |
349 | ||
350 | /* Check whether BB clobbers SRC. We need to add INSN to BB if so. | |
351 | Either way, SRC is now live on entry. */ | |
352 | for (i = sregno; i < end_sregno; i++) | |
353 | { | |
e974b93b ZC |
354 | if (*split_p |
355 | || REGNO_REG_SET_P (bb_defs, i) | |
f30e25a3 ZC |
356 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
357 | next_block = NULL; | |
358 | SET_REGNO_REG_SET (live_out, i); | |
359 | SET_REGNO_REG_SET (live_in, i); | |
360 | } | |
361 | } | |
362 | else | |
363 | { | |
364 | /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and | |
365 | DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. | |
366 | at -O1, just give up searching NEXT_BLOCK. */ | |
367 | next_block = NULL; | |
368 | for (i = dregno; i < end_dregno; i++) | |
369 | { | |
370 | CLEAR_REGNO_REG_SET (live_out, i); | |
371 | CLEAR_REGNO_REG_SET (live_in, i); | |
372 | } | |
373 | ||
374 | for (i = sregno; i < end_sregno; i++) | |
375 | { | |
376 | SET_REGNO_REG_SET (live_out, i); | |
377 | SET_REGNO_REG_SET (live_in, i); | |
378 | } | |
379 | } | |
380 | ||
381 | /* If we don't need to add the move to BB, look for a single | |
382 | successor block. */ | |
383 | if (next_block) | |
e974b93b ZC |
384 | { |
385 | live_edge = live_edge_for_reg (next_block, dregno, end_dregno); | |
386 | if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) | |
387 | break; | |
388 | next_block = live_edge->dest; | |
389 | } | |
f30e25a3 ZC |
390 | } |
391 | while (next_block); | |
392 | ||
e974b93b ZC |
393 | /* For the new created basic block, there is no dataflow info at all. |
394 | So skip the following dataflow update and check. */ | |
395 | if (!(*split_p)) | |
f30e25a3 | 396 | { |
e974b93b ZC |
397 | /* BB now defines DEST. It only uses the parts of DEST that overlap SRC |
398 | (next loop). */ | |
399 | for (i = dregno; i < end_dregno; i++) | |
400 | { | |
401 | CLEAR_REGNO_REG_SET (bb_uses, i); | |
402 | SET_REGNO_REG_SET (bb_defs, i); | |
403 | } | |
f30e25a3 | 404 | |
e974b93b ZC |
405 | /* BB now uses SRC. */ |
406 | for (i = sregno; i < end_sregno; i++) | |
407 | SET_REGNO_REG_SET (bb_uses, i); | |
408 | } | |
f30e25a3 | 409 | |
015337d3 JJ |
410 | /* Insert debug temps for dead REGs used in subsequent debug insns. */ |
411 | if (debug->used && !bitmap_empty_p (debug->used)) | |
412 | FOR_EACH_INSN_DEF (def, insn) | |
413 | dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn, | |
414 | DEBUG_TEMP_BEFORE_WITH_VALUE); | |
415 | ||
0a3d52dd AS |
416 | rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb)); |
417 | /* Update the LABEL_NUSES count on any referenced labels. The ideal | |
418 | solution here would be to actually move the instruction instead | |
419 | of copying/deleting it as this loses some notations on the | |
420 | insn. */ | |
421 | mark_jump_label (PATTERN (insn), insn_copy, 0); | |
f30e25a3 ZC |
422 | delete_insn (insn); |
423 | return true; | |
424 | } | |
425 | ||
426 | /* Look for register copies in the first block of the function, and move | |
427 | them down into successor blocks if the register is used only on one | |
428 | path. This exposes more opportunities for shrink-wrapping. These | |
429 | kinds of sets often occur when incoming argument registers are moved | |
430 | to call-saved registers because their values are live across one or | |
431 | more calls during the function. */ | |
432 | ||
8b661145 | 433 | static void |
f30e25a3 ZC |
434 | prepare_shrink_wrap (basic_block entry_block) |
435 | { | |
939d7575 DM |
436 | rtx_insn *insn, *curr; |
437 | rtx x; | |
f30e25a3 | 438 | HARD_REG_SET uses, defs; |
bfac633a | 439 | df_ref def, use; |
e974b93b | 440 | bool split_p = false; |
015337d3 JJ |
441 | unsigned int i; |
442 | struct dead_debug_local debug; | |
f30e25a3 | 443 | |
a2e6c10c ZC |
444 | if (JUMP_P (BB_END (entry_block))) |
445 | { | |
446 | /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries | |
447 | to sink the copies from parameter to callee saved register out of | |
448 | entry block. copyprop_hardreg_forward_bb_without_debug_insn is called | |
449 | to release some dependences. */ | |
450 | copyprop_hardreg_forward_bb_without_debug_insn (entry_block); | |
451 | } | |
452 | ||
015337d3 | 453 | dead_debug_local_init (&debug, NULL, NULL); |
f30e25a3 ZC |
454 | CLEAR_HARD_REG_SET (uses); |
455 | CLEAR_HARD_REG_SET (defs); | |
015337d3 | 456 | |
f30e25a3 ZC |
457 | FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) |
458 | if (NONDEBUG_INSN_P (insn) | |
e974b93b | 459 | && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, |
015337d3 | 460 | &split_p, &debug)) |
f30e25a3 ZC |
461 | { |
462 | /* Add all defined registers to DEFs. */ | |
bfac633a | 463 | FOR_EACH_INSN_DEF (def, insn) |
f30e25a3 | 464 | { |
bfac633a | 465 | x = DF_REF_REG (def); |
f30e25a3 | 466 | if (REG_P (x) && HARD_REGISTER_P (x)) |
015337d3 JJ |
467 | for (i = REGNO (x); i < END_REGNO (x); i++) |
468 | SET_HARD_REG_BIT (defs, i); | |
f30e25a3 ZC |
469 | } |
470 | ||
471 | /* Add all used registers to USESs. */ | |
bfac633a | 472 | FOR_EACH_INSN_USE (use, insn) |
f30e25a3 | 473 | { |
bfac633a | 474 | x = DF_REF_REG (use); |
f30e25a3 | 475 | if (REG_P (x) && HARD_REGISTER_P (x)) |
015337d3 JJ |
476 | for (i = REGNO (x); i < END_REGNO (x); i++) |
477 | SET_HARD_REG_BIT (uses, i); | |
f30e25a3 ZC |
478 | } |
479 | } | |
015337d3 JJ |
480 | |
481 | dead_debug_local_finish (&debug, NULL); | |
f30e25a3 ZC |
482 | } |
483 | ||
67914693 | 484 | /* Return whether basic block PRO can get the prologue. It cannot if it |
6c98d499 SB |
485 | has incoming complex edges that need a prologue inserted (we make a new |
486 | block for the prologue, so those edges would need to be redirected, which | |
67914693 | 487 | does not work). It also cannot if there exist registers live on entry |
6c98d499 SB |
488 | to PRO that are clobbered by the prologue. */ |
489 | ||
490 | static bool | |
491 | can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered) | |
492 | { | |
493 | edge e; | |
494 | edge_iterator ei; | |
495 | FOR_EACH_EDGE (e, ei, pro->preds) | |
496 | if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) | |
497 | && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
498 | return false; | |
499 | ||
500 | HARD_REG_SET live; | |
501 | REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro)); | |
502 | if (hard_reg_set_intersect_p (live, prologue_clobbered)) | |
503 | return false; | |
504 | ||
505 | return true; | |
506 | } | |
507 | ||
23997c53 SB |
508 | /* Return whether we can duplicate basic block BB for shrink wrapping. We |
509 | cannot if the block cannot be duplicated at all, or if any of its incoming | |
510 | edges are complex and come from a block that does not require a prologue | |
511 | (we cannot redirect such edges), or if the block is too big to copy. | |
512 | PRO is the basic block before which we would put the prologue, MAX_SIZE is | |
513 | the maximum size block we allow to be copied. */ | |
514 | ||
515 | static bool | |
516 | can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size) | |
f30e25a3 | 517 | { |
23997c53 SB |
518 | if (!can_duplicate_block_p (bb)) |
519 | return false; | |
f30e25a3 | 520 | |
23997c53 SB |
521 | edge e; |
522 | edge_iterator ei; | |
523 | FOR_EACH_EDGE (e, ei, bb->preds) | |
6c98d499 | 524 | if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) |
23997c53 SB |
525 | && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) |
526 | return false; | |
f30e25a3 | 527 | |
23997c53 | 528 | unsigned size = 0; |
f30e25a3 | 529 | |
23997c53 SB |
530 | rtx_insn *insn; |
531 | FOR_BB_INSNS (bb, insn) | |
532 | if (NONDEBUG_INSN_P (insn)) | |
f30e25a3 | 533 | { |
23997c53 SB |
534 | size += get_attr_min_length (insn); |
535 | if (size > max_size) | |
536 | return false; | |
f30e25a3 | 537 | } |
23997c53 SB |
538 | |
539 | return true; | |
f30e25a3 ZC |
540 | } |
541 | ||
33fec8d5 SB |
542 | /* Do whatever needs to be done for exits that run without prologue. |
543 | Sibcalls need nothing done. Normal exits get a simple_return inserted. */ | |
23997c53 | 544 | |
33fec8d5 SB |
545 | static void |
546 | handle_simple_exit (edge e) | |
23997c53 | 547 | { |
23997c53 | 548 | |
33fec8d5 SB |
549 | if (e->flags & EDGE_SIBCALL) |
550 | { | |
551 | /* Tell function.c to take no further action on this edge. */ | |
552 | e->flags |= EDGE_IGNORE; | |
553 | ||
554 | e->flags &= ~EDGE_FALLTHRU; | |
555 | emit_barrier_after_bb (e->src); | |
556 | return; | |
557 | } | |
558 | ||
559 | /* If the basic block the edge comes from has multiple successors, | |
560 | split the edge. */ | |
561 | if (EDGE_COUNT (e->src->succs) > 1) | |
562 | { | |
563 | basic_block old_bb = e->src; | |
564 | rtx_insn *end = BB_END (old_bb); | |
565 | rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end); | |
566 | basic_block new_bb = create_basic_block (note, note, old_bb); | |
567 | BB_COPY_PARTITION (new_bb, old_bb); | |
568 | BB_END (old_bb) = end; | |
569 | ||
570 | redirect_edge_succ (e, new_bb); | |
e7a74006 | 571 | new_bb->count = e->count (); |
33fec8d5 SB |
572 | e->flags |= EDGE_FALLTHRU; |
573 | ||
9c47c8ae | 574 | e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
33fec8d5 | 575 | } |
23997c53 | 576 | |
33fec8d5 SB |
577 | e->flags &= ~EDGE_FALLTHRU; |
578 | rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (), | |
579 | BB_END (e->src)); | |
580 | JUMP_LABEL (ret) = simple_return_rtx; | |
581 | emit_barrier_after_bb (e->src); | |
23997c53 | 582 | |
33fec8d5 SB |
583 | if (dump_file) |
584 | fprintf (dump_file, "Made simple_return with UID %d in bb %d\n", | |
585 | INSN_UID (ret), e->src->index); | |
23997c53 | 586 | } |
f30e25a3 ZC |
587 | |
588 | /* Try to perform a kind of shrink-wrapping, making sure the | |
589 | prologue/epilogue is emitted only around those parts of the | |
23997c53 SB |
590 | function that require it. |
591 | ||
592 | There will be exactly one prologue, and it will be executed either | |
593 | zero or one time, on any path. Depending on where the prologue is | |
594 | placed, some of the basic blocks can be reached via both paths with | |
595 | and without a prologue. Such blocks will be duplicated here, and the | |
596 | edges changed to match. | |
597 | ||
598 | Paths that go to the exit without going through the prologue will use | |
599 | a simple_return instead of the epilogue. We maximize the number of | |
600 | those, making sure to only duplicate blocks that can be duplicated. | |
601 | If the prologue can then still be placed in multiple locations, we | |
602 | place it as early as possible. | |
603 | ||
604 | An example, where we duplicate blocks with control flow (legend: | |
605 | _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should | |
606 | be taken to point down or to the right, to simplify the diagram; here, | |
607 | block 3 needs a prologue, the rest does not): | |
608 | ||
609 | ||
610 | B B | |
611 | | | | |
612 | 2 2 | |
613 | |\ |\ | |
614 | | 3 becomes | 3 | |
615 | |/ | \ | |
616 | 4 7 4 | |
617 | |\ |\ |\ | |
618 | | 5 | 8 | 5 | |
619 | |/ |/ |/ | |
620 | 6 9 6 | |
621 | | | | | |
622 | R S R | |
623 | ||
624 | ||
625 | (bb 4 is duplicated to 7, and so on; the prologue is inserted on the | |
626 | edge 2->3). | |
627 | ||
628 | Another example, where part of a loop is duplicated (again, bb 3 is | |
629 | the only block that needs a prologue): | |
630 | ||
631 | ||
632 | B 3<-- B ->3<-- | |
633 | | | | | | | | | |
634 | | v | becomes | | v | | |
635 | 2---4--- 2---5-- 4--- | |
636 | | | | | |
637 | R S R | |
638 | ||
639 | ||
640 | (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3). | |
641 | ||
642 | ENTRY_EDGE is the edge where the prologue will be placed, possibly | |
33fec8d5 | 643 | changed by this function. PROLOGUE_SEQ is the prologue we will insert. */ |
f30e25a3 ZC |
644 | |
645 | void | |
33fec8d5 | 646 | try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq) |
f30e25a3 | 647 | { |
23997c53 SB |
648 | /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes |
649 | no sense to shrink-wrap: then do not shrink-wrap! */ | |
650 | ||
651 | if (!SHRINK_WRAPPING_ENABLED) | |
652 | return; | |
653 | ||
654 | if (crtl->profile && !targetm.profile_before_prologue ()) | |
655 | return; | |
f30e25a3 | 656 | |
23997c53 SB |
657 | if (crtl->calls_eh_return) |
658 | return; | |
659 | ||
660 | bool empty_prologue = true; | |
661 | for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) | |
662 | if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)) | |
f30e25a3 | 663 | { |
23997c53 | 664 | empty_prologue = false; |
f30e25a3 ZC |
665 | break; |
666 | } | |
23997c53 SB |
667 | if (empty_prologue) |
668 | return; | |
669 | ||
670 | /* Move some code down to expose more shrink-wrapping opportunities. */ | |
671 | ||
672 | basic_block entry = (*entry_edge)->dest; | |
673 | prepare_shrink_wrap (entry); | |
674 | ||
675 | if (dump_file) | |
676 | fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); | |
677 | ||
678 | /* Compute the registers set and used in the prologue. */ | |
679 | ||
680 | HARD_REG_SET prologue_clobbered, prologue_used; | |
681 | CLEAR_HARD_REG_SET (prologue_clobbered); | |
682 | CLEAR_HARD_REG_SET (prologue_used); | |
683 | for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) | |
684 | if (NONDEBUG_INSN_P (insn)) | |
685 | { | |
686 | HARD_REG_SET this_used; | |
687 | CLEAR_HARD_REG_SET (this_used); | |
688 | note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); | |
d15e5131 | 689 | this_used &= ~prologue_clobbered; |
44942965 | 690 | prologue_used |= this_used; |
e8448ba5 | 691 | note_stores (insn, record_hard_reg_sets, &prologue_clobbered); |
23997c53 | 692 | } |
6c98d499 SB |
693 | CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); |
694 | if (frame_pointer_needed) | |
695 | CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); | |
f30e25a3 | 696 | |
23997c53 SB |
697 | /* Find out what registers are set up by the prologue; any use of these |
698 | cannot happen before the prologue. */ | |
699 | ||
700 | struct hard_reg_set_container set_up_by_prologue; | |
701 | CLEAR_HARD_REG_SET (set_up_by_prologue.set); | |
702 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM); | |
703 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); | |
704 | if (frame_pointer_needed) | |
705 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
706 | HARD_FRAME_POINTER_REGNUM); | |
707 | if (pic_offset_table_rtx | |
708 | && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) | |
709 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
710 | PIC_OFFSET_TABLE_REGNUM); | |
711 | if (crtl->drap_reg) | |
712 | add_to_hard_reg_set (&set_up_by_prologue.set, | |
713 | GET_MODE (crtl->drap_reg), | |
714 | REGNO (crtl->drap_reg)); | |
715 | if (targetm.set_up_by_prologue) | |
716 | targetm.set_up_by_prologue (&set_up_by_prologue); | |
717 | ||
718 | /* We will insert the prologue before the basic block PRO. PRO should | |
719 | dominate all basic blocks that need the prologue to be executed | |
720 | before them. First, make PRO the "tightest wrap" possible. */ | |
721 | ||
722 | calculate_dominance_info (CDI_DOMINATORS); | |
723 | ||
724 | basic_block pro = 0; | |
725 | ||
726 | basic_block bb; | |
727 | edge e; | |
728 | edge_iterator ei; | |
729 | FOR_EACH_BB_FN (bb, cfun) | |
f30e25a3 | 730 | { |
23997c53 SB |
731 | rtx_insn *insn; |
732 | FOR_BB_INSNS (bb, insn) | |
733 | if (NONDEBUG_INSN_P (insn) | |
734 | && requires_stack_frame_p (insn, prologue_used, | |
735 | set_up_by_prologue.set)) | |
736 | { | |
737 | if (dump_file) | |
738 | fprintf (dump_file, "Block %d needs the prologue.\n", bb->index); | |
739 | pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb); | |
740 | break; | |
741 | } | |
742 | } | |
f30e25a3 | 743 | |
23997c53 SB |
744 | /* If nothing needs a prologue, just put it at the start. This really |
745 | shouldn't happen, but we cannot fix it here. */ | |
746 | ||
747 | if (pro == 0) | |
748 | { | |
f30e25a3 | 749 | if (dump_file) |
23997c53 SB |
750 | fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; " |
751 | "putting it at the start.\n"); | |
752 | pro = entry; | |
753 | } | |
f30e25a3 | 754 | |
23997c53 SB |
755 | if (dump_file) |
756 | fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n", | |
757 | pro->index); | |
f30e25a3 | 758 | |
23997c53 | 759 | /* Now see if we can put the prologue at the start of PRO. Putting it |
6c98d499 SB |
760 | there might require duplicating a block that cannot be duplicated, |
761 | or in some cases we cannot insert the prologue there at all. If PRO | |
762 | wont't do, try again with the immediate dominator of PRO, and so on. | |
f30e25a3 | 763 | |
23997c53 SB |
764 | The blocks that need duplicating are those reachable from PRO but |
765 | not dominated by it. We keep in BB_WITH a bitmap of the blocks | |
766 | reachable from PRO that we already found, and in VEC a stack of | |
767 | those we still need to consider (to find successors). */ | |
f30e25a3 | 768 | |
0e3de1d4 | 769 | auto_bitmap bb_with; |
23997c53 | 770 | bitmap_set_bit (bb_with, pro->index); |
f30e25a3 | 771 | |
23997c53 SB |
772 | vec<basic_block> vec; |
773 | vec.create (n_basic_blocks_for_fn (cfun)); | |
774 | vec.quick_push (pro); | |
f30e25a3 | 775 | |
23997c53 | 776 | unsigned max_grow_size = get_uncond_jump_length (); |
028d4092 | 777 | max_grow_size *= param_max_grow_copy_bb_insns; |
f30e25a3 | 778 | |
23997c53 SB |
779 | while (!vec.is_empty () && pro != entry) |
780 | { | |
6c98d499 SB |
781 | while (pro != entry && !can_get_prologue (pro, prologue_clobbered)) |
782 | { | |
783 | pro = get_immediate_dominator (CDI_DOMINATORS, pro); | |
784 | ||
82ad5144 SB |
785 | if (bitmap_set_bit (bb_with, pro->index)) |
786 | vec.quick_push (pro); | |
6c98d499 SB |
787 | } |
788 | ||
23997c53 SB |
789 | basic_block bb = vec.pop (); |
790 | if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size)) | |
791 | while (!dominated_by_p (CDI_DOMINATORS, bb, pro)) | |
792 | { | |
793 | gcc_assert (pro != entry); | |
f30e25a3 | 794 | |
23997c53 SB |
795 | pro = get_immediate_dominator (CDI_DOMINATORS, pro); |
796 | ||
82ad5144 SB |
797 | if (bitmap_set_bit (bb_with, pro->index)) |
798 | vec.quick_push (pro); | |
23997c53 SB |
799 | } |
800 | ||
801 | FOR_EACH_EDGE (e, ei, bb->succs) | |
802 | if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) | |
803 | && bitmap_set_bit (bb_with, e->dest->index)) | |
804 | vec.quick_push (e->dest); | |
805 | } | |
806 | ||
23997c53 SB |
807 | if (dump_file) |
808 | fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n", | |
809 | pro->index); | |
810 | ||
811 | /* If we can move PRO back without having to duplicate more blocks, do so. | |
5d59ed63 | 812 | We do this because putting the prologue earlier is better for scheduling. |
52ad5601 | 813 | |
23997c53 | 814 | We can move back to a block PRE if every path from PRE will eventually |
5d59ed63 | 815 | need a prologue, that is, PRO is a post-dominator of PRE. PRE needs |
52ad5601 SB |
816 | to dominate every block reachable from itself. We keep in BB_TMP a |
817 | bitmap of the blocks reachable from PRE that we already found, and in | |
818 | VEC a stack of those we still need to consider. | |
819 | ||
820 | Any block reachable from PRE is also reachable from all predecessors | |
821 | of PRE, so if we find we need to move PRE back further we can leave | |
822 | everything not considered so far on the stack. Any block dominated | |
823 | by PRE is also dominated by all other dominators of PRE, so anything | |
824 | found good for some PRE does not need to be reconsidered later. | |
825 | ||
826 | We don't need to update BB_WITH because none of the new blocks found | |
827 | can jump to a block that does not need the prologue. */ | |
23997c53 SB |
828 | |
829 | if (pro != entry) | |
830 | { | |
831 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
f30e25a3 | 832 | |
0e3de1d4 | 833 | auto_bitmap bb_tmp; |
5d59ed63 | 834 | bitmap_copy (bb_tmp, bb_with); |
6c98d499 | 835 | basic_block last_ok = pro; |
5d59ed63 SB |
836 | vec.truncate (0); |
837 | ||
23997c53 | 838 | while (pro != entry) |
f30e25a3 | 839 | { |
23997c53 | 840 | basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro); |
6c98d499 | 841 | if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro)) |
23997c53 | 842 | break; |
6c98d499 | 843 | |
5d59ed63 SB |
844 | if (bitmap_set_bit (bb_tmp, pre->index)) |
845 | vec.quick_push (pre); | |
846 | ||
847 | bool ok = true; | |
848 | while (!vec.is_empty ()) | |
849 | { | |
52ad5601 | 850 | if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre)) |
5d59ed63 SB |
851 | { |
852 | ok = false; | |
853 | break; | |
854 | } | |
855 | ||
52ad5601 | 856 | basic_block bb = vec.pop (); |
5d59ed63 | 857 | FOR_EACH_EDGE (e, ei, bb->succs) |
52ad5601 | 858 | if (bitmap_set_bit (bb_tmp, e->dest->index)) |
5d59ed63 SB |
859 | vec.quick_push (e->dest); |
860 | } | |
861 | ||
862 | if (ok && can_get_prologue (pre, prologue_clobbered)) | |
863 | last_ok = pre; | |
864 | ||
6c98d499 | 865 | pro = pre; |
f30e25a3 | 866 | } |
5d59ed63 | 867 | |
6c98d499 | 868 | pro = last_ok; |
f30e25a3 | 869 | |
23997c53 SB |
870 | free_dominance_info (CDI_POST_DOMINATORS); |
871 | } | |
f30e25a3 | 872 | |
5d59ed63 SB |
873 | vec.release (); |
874 | ||
23997c53 SB |
875 | if (dump_file) |
876 | fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n", | |
877 | pro->index); | |
878 | ||
6c98d499 | 879 | if (pro == entry) |
23997c53 | 880 | { |
6c98d499 SB |
881 | free_dominance_info (CDI_DOMINATORS); |
882 | return; | |
23997c53 SB |
883 | } |
884 | ||
23997c53 SB |
885 | /* Compute what fraction of the frequency and count of the blocks that run |
886 | both with and without prologue are for running with prologue. This gives | |
887 | the correct answer for reducible flow graphs; for irreducible flow graphs | |
888 | our profile is messed up beyond repair anyway. */ | |
889 | ||
fee234f1 JH |
890 | profile_count num = profile_count::zero (); |
891 | profile_count den = profile_count::zero (); | |
23997c53 | 892 | |
6c98d499 SB |
893 | FOR_EACH_EDGE (e, ei, pro->preds) |
894 | if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
895 | { | |
fee234f1 JH |
896 | if (e->count ().initialized_p ()) |
897 | num += e->count (); | |
898 | if (e->src->count.initialized_p ()) | |
899 | den += e->src->count; | |
6c98d499 | 900 | } |
23997c53 | 901 | |
23997c53 SB |
902 | /* All is okay, so do it. */ |
903 | ||
904 | crtl->shrink_wrapped = true; | |
905 | if (dump_file) | |
906 | fprintf (dump_file, "Performing shrink-wrapping.\n"); | |
907 | ||
908 | /* Copy the blocks that can run both with and without prologue. The | |
909 | originals run with prologue, the copies without. Store a pointer to | |
910 | the copy in the ->aux field of the original. */ | |
911 | ||
912 | FOR_EACH_BB_FN (bb, cfun) | |
913 | if (bitmap_bit_p (bb_with, bb->index) | |
914 | && !dominated_by_p (CDI_DOMINATORS, bb, pro)) | |
915 | { | |
916 | basic_block dup = duplicate_block (bb, 0, 0); | |
917 | ||
918 | bb->aux = dup; | |
919 | ||
920 | if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup))) | |
921 | emit_barrier_after_bb (dup); | |
922 | ||
923 | if (EDGE_COUNT (dup->succs) == 0) | |
924 | emit_barrier_after_bb (dup); | |
925 | ||
926 | if (dump_file) | |
927 | fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); | |
fee234f1 JH |
928 | |
929 | if (num == profile_count::zero () || den.nonzero_p ()) | |
930 | bb->count = bb->count.apply_scale (num, den); | |
23997c53 SB |
931 | dup->count -= bb->count; |
932 | } | |
933 | ||
23997c53 SB |
934 | /* Now change the edges to point to the copies, where appropriate. */ |
935 | ||
936 | FOR_EACH_BB_FN (bb, cfun) | |
937 | if (!dominated_by_p (CDI_DOMINATORS, bb, pro)) | |
938 | { | |
939 | basic_block src = bb; | |
940 | if (bitmap_bit_p (bb_with, bb->index)) | |
941 | src = (basic_block) bb->aux; | |
942 | ||
943 | FOR_EACH_EDGE (e, ei, src->succs) | |
f30e25a3 | 944 | { |
23997c53 | 945 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
f30e25a3 | 946 | continue; |
f30e25a3 | 947 | |
23997c53 SB |
948 | if (bitmap_bit_p (bb_with, e->dest->index) |
949 | && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) | |
f30e25a3 | 950 | { |
23997c53 SB |
951 | if (dump_file) |
952 | fprintf (dump_file, "Redirecting edge %d->%d to %d\n", | |
953 | e->src->index, e->dest->index, | |
954 | ((basic_block) e->dest->aux)->index); | |
955 | redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); | |
f30e25a3 | 956 | } |
23997c53 SB |
957 | else if (e->flags & EDGE_FALLTHRU |
958 | && bitmap_bit_p (bb_with, bb->index)) | |
959 | force_nonfallthru (e); | |
960 | } | |
961 | } | |
f30e25a3 | 962 | |
23997c53 | 963 | /* Also redirect the function entry edge if necessary. */ |
f30e25a3 | 964 | |
23997c53 SB |
965 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
966 | if (bitmap_bit_p (bb_with, e->dest->index) | |
967 | && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) | |
968 | { | |
969 | basic_block split_bb = split_edge (e); | |
970 | e = single_succ_edge (split_bb); | |
971 | redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); | |
972 | } | |
f30e25a3 | 973 | |
d07d2177 | 974 | /* Make a simple_return for those exits that run without prologue. */ |
23997c53 | 975 | |
d07d2177 | 976 | FOR_EACH_BB_REVERSE_FN (bb, cfun) |
23997c53 SB |
977 | if (!bitmap_bit_p (bb_with, bb->index)) |
978 | FOR_EACH_EDGE (e, ei, bb->succs) | |
979 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
33fec8d5 | 980 | handle_simple_exit (e); |
23997c53 | 981 | |
6c98d499 SB |
982 | /* Finally, we want a single edge to put the prologue on. Make a new |
983 | block before the PRO block; the edge beteen them is the edge we want. | |
984 | Then redirect those edges into PRO that come from blocks without the | |
985 | prologue, to point to the new block instead. The new prologue block | |
986 | is put at the end of the insn chain. */ | |
987 | ||
988 | basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); | |
989 | BB_COPY_PARTITION (new_bb, pro); | |
9c47c8ae | 990 | new_bb->count = profile_count::zero (); |
6c98d499 SB |
991 | if (dump_file) |
992 | fprintf (dump_file, "Made prologue block %d\n", new_bb->index); | |
993 | ||
994 | for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); ) | |
995 | { | |
996 | if (bitmap_bit_p (bb_with, e->src->index) | |
997 | || dominated_by_p (CDI_DOMINATORS, e->src, pro)) | |
998 | { | |
999 | ei_next (&ei); | |
1000 | continue; | |
1001 | } | |
1002 | ||
e7a74006 | 1003 | new_bb->count += e->count (); |
6c98d499 SB |
1004 | |
1005 | redirect_edge_and_branch_force (e, new_bb); | |
1006 | if (dump_file) | |
1007 | fprintf (dump_file, "Redirected edge from %d\n", e->src->index); | |
1008 | } | |
1009 | ||
1010 | *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU); | |
1011 | force_nonfallthru (*entry_edge); | |
1012 | ||
23997c53 | 1013 | free_dominance_info (CDI_DOMINATORS); |
f30e25a3 | 1014 | } |
c997869f SB |
1015 | \f |
1016 | /* Separate shrink-wrapping | |
1017 | ||
1018 | Instead of putting all of the prologue and epilogue in one spot, we | |
1019 | can put parts of it in places where those components are executed less | |
1020 | frequently. The following code does this, for prologue and epilogue | |
1021 | components that can be put in more than one location, and where those | |
1022 | components can be executed more than once (the epilogue component will | |
1023 | always be executed before the prologue component is executed a second | |
1024 | time). | |
1025 | ||
1026 | What exactly is a component is target-dependent. The more usual | |
1027 | components are simple saves/restores to/from the frame of callee-saved | |
1028 | registers. This code treats components abstractly (as an sbitmap), | |
1029 | letting the target handle all details. | |
1030 | ||
1031 | Prologue components are placed in such a way that for every component | |
1032 | the prologue is executed as infrequently as possible. We do this by | |
1033 | walking the dominator tree, comparing the cost of placing a prologue | |
1034 | component before a block to the sum of costs determined for all subtrees | |
1035 | of that block. | |
1036 | ||
1037 | From this placement, we then determine for each component all blocks | |
1038 | where at least one of this block's dominators (including itself) will | |
1039 | get a prologue inserted. That then is how the components are placed. | |
1040 | We could place the epilogue components a bit smarter (we can save a | |
1041 | bit of code size sometimes); this is a possible future improvement. | |
1042 | ||
1043 | Prologues and epilogues are preferably placed into a block, either at | |
1044 | the beginning or end of it, if it is needed for all predecessor resp. | |
1045 | successor edges; or placed on the edge otherwise. | |
1046 | ||
1047 | If the placement of any prologue/epilogue leads to a situation we cannot | |
1048 | handle (for example, an abnormal edge would need to be split, or some | |
1049 | targets want to use some specific registers that may not be available | |
1050 | where we want to put them), separate shrink-wrapping for the components | |
1051 | in that prologue/epilogue is aborted. */ | |
1052 | ||
1053 | ||
1054 | /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the | |
1055 | label LABEL. */ | |
1056 | static void | |
1057 | dump_components (const char *label, sbitmap components) | |
1058 | { | |
1059 | if (bitmap_empty_p (components)) | |
1060 | return; | |
1061 | ||
1062 | fprintf (dump_file, " [%s", label); | |
1063 | ||
1064 | for (unsigned int j = 0; j < components->n_bits; j++) | |
1065 | if (bitmap_bit_p (components, j)) | |
1066 | fprintf (dump_file, " %u", j); | |
1067 | ||
1068 | fprintf (dump_file, "]"); | |
1069 | } | |
1070 | ||
1071 | /* The data we collect for each bb. */ | |
1072 | struct sw { | |
1073 | /* What components does this BB need? */ | |
1074 | sbitmap needs_components; | |
1075 | ||
1076 | /* What components does this BB have? This is the main decision this | |
1077 | pass makes. */ | |
1078 | sbitmap has_components; | |
1079 | ||
1080 | /* The components for which we placed code at the start of the BB (instead | |
1081 | of on all incoming edges). */ | |
1082 | sbitmap head_components; | |
1083 | ||
1084 | /* The components for which we placed code at the end of the BB (instead | |
1085 | of on all outgoing edges). */ | |
1086 | sbitmap tail_components; | |
1087 | ||
1088 | /* The frequency of executing the prologue for this BB, if a prologue is | |
1089 | placed on this BB. This is a pessimistic estimate (no prologue is | |
1090 | needed for edges from blocks that have the component under consideration | |
1091 | active already). */ | |
1092 | gcov_type own_cost; | |
1093 | ||
1094 | /* The frequency of executing the prologue for this BB and all BBs | |
1095 | dominated by it. */ | |
1096 | gcov_type total_cost; | |
1097 | }; | |
1098 | ||
1099 | /* A helper function for accessing the pass-specific info. */ | |
1100 | static inline struct sw * | |
1101 | SW (basic_block bb) | |
1102 | { | |
1103 | gcc_assert (bb->aux); | |
1104 | return (struct sw *) bb->aux; | |
1105 | } | |
1106 | ||
1107 | /* Create the pass-specific data structures for separately shrink-wrapping | |
1108 | with components COMPONENTS. */ | |
1109 | static void | |
1110 | init_separate_shrink_wrap (sbitmap components) | |
1111 | { | |
1112 | basic_block bb; | |
1113 | FOR_ALL_BB_FN (bb, cfun) | |
1114 | { | |
1115 | bb->aux = xcalloc (1, sizeof (struct sw)); | |
1116 | ||
1117 | SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb); | |
1118 | ||
1119 | /* Mark all basic blocks without successor as needing all components. | |
1120 | This avoids problems in at least cfgcleanup, sel-sched, and | |
1121 | regrename (largely to do with all paths to such a block still | |
1122 | needing the same dwarf CFI info). */ | |
1123 | if (EDGE_COUNT (bb->succs) == 0) | |
1124 | bitmap_copy (SW (bb)->needs_components, components); | |
1125 | ||
1126 | if (dump_file) | |
1127 | { | |
1128 | fprintf (dump_file, "bb %d components:", bb->index); | |
1129 | dump_components ("has", SW (bb)->needs_components); | |
1130 | fprintf (dump_file, "\n"); | |
1131 | } | |
1132 | ||
1133 | SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components)); | |
1134 | SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components)); | |
1135 | SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components)); | |
1136 | bitmap_clear (SW (bb)->has_components); | |
c997869f SB |
1137 | } |
1138 | } | |
1139 | ||
1140 | /* Destroy the pass-specific data. */ | |
1141 | static void | |
1142 | fini_separate_shrink_wrap (void) | |
1143 | { | |
1144 | basic_block bb; | |
1145 | FOR_ALL_BB_FN (bb, cfun) | |
1146 | if (bb->aux) | |
1147 | { | |
1148 | sbitmap_free (SW (bb)->needs_components); | |
1149 | sbitmap_free (SW (bb)->has_components); | |
1150 | sbitmap_free (SW (bb)->head_components); | |
1151 | sbitmap_free (SW (bb)->tail_components); | |
1152 | free (bb->aux); | |
1153 | bb->aux = 0; | |
1154 | } | |
1155 | } | |
1156 | ||
1157 | /* Place the prologue for component WHICH, in the basic blocks dominated | |
1158 | by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the | |
1159 | HAS_COMPONENTS of a block if either the block has that bit set in | |
1160 | NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all | |
1161 | dominator subtrees separately. */ | |
1162 | static void | |
1163 | place_prologue_for_one_component (unsigned int which, basic_block head) | |
1164 | { | |
1165 | /* The block we are currently dealing with. */ | |
1166 | basic_block bb = head; | |
1167 | /* Is this the first time we visit this block, i.e. have we just gone | |
1168 | down the tree. */ | |
1169 | bool first_visit = true; | |
1170 | ||
1171 | /* Walk the dominator tree, visit one block per iteration of this loop. | |
1172 | Each basic block is visited twice: once before visiting any children | |
1173 | of the block, and once after visiting all of them (leaf nodes are | |
1174 | visited only once). As an optimization, we do not visit subtrees | |
1175 | that can no longer influence the prologue placement. */ | |
1176 | for (;;) | |
1177 | { | |
1178 | /* First visit of a block: set the (children) cost accumulator to zero; | |
1179 | if the block does not have the component itself, walk down. */ | |
1180 | if (first_visit) | |
1181 | { | |
1182 | /* Initialize the cost. The cost is the block execution frequency | |
1183 | that does not come from backedges. Calculating this by simply | |
1184 | adding the cost of all edges that aren't backedges does not | |
1185 | work: this does not always add up to the block frequency at | |
1186 | all, and even if it does, rounding error makes for bad | |
1187 | decisions. */ | |
e7a74006 | 1188 | SW (bb)->own_cost = bb->count.to_frequency (cfun); |
c997869f SB |
1189 | |
1190 | edge e; | |
1191 | edge_iterator ei; | |
1192 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1193 | if (dominated_by_p (CDI_DOMINATORS, e->src, bb)) | |
1194 | { | |
1195 | if (SW (bb)->own_cost > EDGE_FREQUENCY (e)) | |
1196 | SW (bb)->own_cost -= EDGE_FREQUENCY (e); | |
1197 | else | |
1198 | SW (bb)->own_cost = 0; | |
1199 | } | |
1200 | ||
1201 | SW (bb)->total_cost = 0; | |
1202 | ||
1203 | if (!bitmap_bit_p (SW (bb)->needs_components, which) | |
1204 | && first_dom_son (CDI_DOMINATORS, bb)) | |
1205 | { | |
1206 | bb = first_dom_son (CDI_DOMINATORS, bb); | |
1207 | continue; | |
1208 | } | |
1209 | } | |
1210 | ||
1211 | /* If this block does need the component itself, or it is cheaper to | |
1212 | put the prologue here than in all the descendants that need it, | |
1213 | mark it so. If this block's immediate post-dominator is dominated | |
1214 | by this block, and that needs the prologue, we can put it on this | |
1215 | block as well (earlier is better). */ | |
1216 | if (bitmap_bit_p (SW (bb)->needs_components, which) | |
1217 | || SW (bb)->total_cost > SW (bb)->own_cost) | |
1218 | { | |
1219 | SW (bb)->total_cost = SW (bb)->own_cost; | |
1220 | bitmap_set_bit (SW (bb)->has_components, which); | |
1221 | } | |
1222 | else | |
1223 | { | |
1224 | basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb); | |
1225 | if (dominated_by_p (CDI_DOMINATORS, kid, bb) | |
1226 | && bitmap_bit_p (SW (kid)->has_components, which)) | |
1227 | { | |
1228 | SW (bb)->total_cost = SW (bb)->own_cost; | |
1229 | bitmap_set_bit (SW (bb)->has_components, which); | |
1230 | } | |
1231 | } | |
1232 | ||
1233 | /* We are back where we started, so we are done now. */ | |
1234 | if (bb == head) | |
1235 | return; | |
1236 | ||
1237 | /* We now know the cost of the subtree rooted at the current block. | |
1238 | Accumulate this cost in the parent. */ | |
1239 | basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb); | |
1240 | SW (parent)->total_cost += SW (bb)->total_cost; | |
1241 | ||
1242 | /* Don't walk the tree down unless necessary. */ | |
1243 | if (next_dom_son (CDI_DOMINATORS, bb) | |
1244 | && SW (parent)->total_cost <= SW (parent)->own_cost) | |
1245 | { | |
1246 | bb = next_dom_son (CDI_DOMINATORS, bb); | |
1247 | first_visit = true; | |
1248 | } | |
1249 | else | |
1250 | { | |
1251 | bb = parent; | |
1252 | first_visit = false; | |
1253 | } | |
1254 | } | |
1255 | } | |
1256 | ||
08dd2b68 SB |
1257 | /* Set HAS_COMPONENTS in every block to the maximum it can be set to without |
1258 | setting it on any path from entry to exit where it was not already set | |
1259 | somewhere (or, for blocks that have no path to the exit, consider only | |
826f35d8 SB |
1260 | paths from the entry to the block itself). Return whether any changes |
1261 | were made to some HAS_COMPONENTS. */ | |
1262 | static bool | |
08dd2b68 | 1263 | spread_components (sbitmap components) |
c997869f | 1264 | { |
08dd2b68 SB |
1265 | basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
1266 | basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun); | |
c997869f | 1267 | |
08dd2b68 SB |
1268 | /* A stack of all blocks left to consider, and a bitmap of all blocks |
1269 | on that stack. */ | |
1270 | vec<basic_block> todo; | |
1271 | todo.create (n_basic_blocks_for_fn (cfun)); | |
0e3de1d4 | 1272 | auto_bitmap seen; |
08dd2b68 | 1273 | |
8f48c622 | 1274 | auto_sbitmap old (SBITMAP_SIZE (components)); |
08dd2b68 SB |
1275 | |
1276 | /* Find for every block the components that are *not* needed on some path | |
1277 | from the entry to that block. Do this with a flood fill from the entry | |
1278 | block. Every block can be visited at most as often as the number of | |
1279 | components (plus one), and usually much less often. */ | |
1280 | ||
1281 | if (dump_file) | |
1282 | fprintf (dump_file, "Spreading down...\n"); | |
1283 | ||
1284 | basic_block bb; | |
1285 | FOR_ALL_BB_FN (bb, cfun) | |
1286 | bitmap_clear (SW (bb)->head_components); | |
1287 | ||
1288 | bitmap_copy (SW (entry_block)->head_components, components); | |
1289 | ||
1290 | edge e; | |
1291 | edge_iterator ei; | |
1292 | ||
1293 | todo.quick_push (single_succ (entry_block)); | |
1294 | bitmap_set_bit (seen, single_succ (entry_block)->index); | |
1295 | while (!todo.is_empty ()) | |
c997869f | 1296 | { |
08dd2b68 | 1297 | bb = todo.pop (); |
c997869f | 1298 | |
08dd2b68 | 1299 | bitmap_copy (old, SW (bb)->head_components); |
c997869f | 1300 | |
08dd2b68 SB |
1301 | FOR_EACH_EDGE (e, ei, bb->preds) |
1302 | bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, | |
1303 | SW (e->src)->head_components); | |
c997869f | 1304 | |
08dd2b68 SB |
1305 | bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components, |
1306 | SW (bb)->has_components); | |
1307 | ||
1308 | if (!bitmap_equal_p (old, SW (bb)->head_components)) | |
1309 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1310 | if (bitmap_set_bit (seen, e->dest->index)) | |
1311 | todo.quick_push (e->dest); | |
1312 | ||
1313 | bitmap_clear_bit (seen, bb->index); | |
1314 | } | |
1315 | ||
1316 | /* Find for every block the components that are *not* needed on some reverse | |
1317 | path from the exit to that block. */ | |
1318 | ||
1319 | if (dump_file) | |
1320 | fprintf (dump_file, "Spreading up...\n"); | |
1321 | ||
1322 | /* First, mark all blocks not reachable from the exit block as not needing | |
1323 | any component on any path to the exit. Mark everything, and then clear | |
1324 | again by a flood fill. */ | |
1325 | ||
1326 | FOR_ALL_BB_FN (bb, cfun) | |
1327 | bitmap_copy (SW (bb)->tail_components, components); | |
1328 | ||
1329 | FOR_EACH_EDGE (e, ei, exit_block->preds) | |
1330 | { | |
1331 | todo.quick_push (e->src); | |
1332 | bitmap_set_bit (seen, e->src->index); | |
1333 | } | |
1334 | ||
1335 | while (!todo.is_empty ()) | |
1336 | { | |
1337 | bb = todo.pop (); | |
1338 | ||
1339 | if (!bitmap_empty_p (SW (bb)->tail_components)) | |
1340 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1341 | if (bitmap_set_bit (seen, e->src->index)) | |
1342 | todo.quick_push (e->src); | |
1343 | ||
1344 | bitmap_clear (SW (bb)->tail_components); | |
1345 | ||
1346 | bitmap_clear_bit (seen, bb->index); | |
1347 | } | |
1348 | ||
1349 | /* And then, flood fill backwards to find for every block the components | |
1350 | not needed on some path to the exit. */ | |
1351 | ||
1352 | bitmap_copy (SW (exit_block)->tail_components, components); | |
1353 | ||
1354 | FOR_EACH_EDGE (e, ei, exit_block->preds) | |
1355 | { | |
1356 | todo.quick_push (e->src); | |
1357 | bitmap_set_bit (seen, e->src->index); | |
1358 | } | |
1359 | ||
1360 | while (!todo.is_empty ()) | |
1361 | { | |
1362 | bb = todo.pop (); | |
1363 | ||
1364 | bitmap_copy (old, SW (bb)->tail_components); | |
1365 | ||
1366 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1367 | bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, | |
1368 | SW (e->dest)->tail_components); | |
1369 | ||
1370 | bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components, | |
1371 | SW (bb)->has_components); | |
1372 | ||
1373 | if (!bitmap_equal_p (old, SW (bb)->tail_components)) | |
1374 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1375 | if (bitmap_set_bit (seen, e->src->index)) | |
1376 | todo.quick_push (e->src); | |
1377 | ||
1378 | bitmap_clear_bit (seen, bb->index); | |
1379 | } | |
1380 | ||
5ca8e744 JJ |
1381 | todo.release (); |
1382 | ||
700d4cb0 | 1383 | /* Finally, mark everything not needed both forwards and backwards. */ |
08dd2b68 | 1384 | |
826f35d8 SB |
1385 | bool did_changes = false; |
1386 | ||
08dd2b68 SB |
1387 | FOR_EACH_BB_FN (bb, cfun) |
1388 | { | |
826f35d8 SB |
1389 | bitmap_copy (old, SW (bb)->has_components); |
1390 | ||
08dd2b68 SB |
1391 | bitmap_and (SW (bb)->head_components, SW (bb)->head_components, |
1392 | SW (bb)->tail_components); | |
1393 | bitmap_and_compl (SW (bb)->has_components, components, | |
1394 | SW (bb)->head_components); | |
826f35d8 SB |
1395 | |
1396 | if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components)) | |
1397 | did_changes = true; | |
08dd2b68 SB |
1398 | } |
1399 | ||
1400 | FOR_ALL_BB_FN (bb, cfun) | |
1401 | { | |
1402 | if (dump_file) | |
c997869f | 1403 | { |
08dd2b68 SB |
1404 | fprintf (dump_file, "bb %d components:", bb->index); |
1405 | dump_components ("has", SW (bb)->has_components); | |
1406 | fprintf (dump_file, "\n"); | |
c997869f SB |
1407 | } |
1408 | } | |
826f35d8 SB |
1409 | |
1410 | return did_changes; | |
c997869f SB |
1411 | } |
1412 | ||
1413 | /* If we cannot handle placing some component's prologues or epilogues where | |
1414 | we decided we should place them, unmark that component in COMPONENTS so | |
1415 | that it is not wrapped separately. */ | |
1416 | static void | |
1417 | disqualify_problematic_components (sbitmap components) | |
1418 | { | |
8f48c622 TS |
1419 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1420 | auto_sbitmap epi (SBITMAP_SIZE (components)); | |
c997869f SB |
1421 | |
1422 | basic_block bb; | |
1423 | FOR_EACH_BB_FN (bb, cfun) | |
1424 | { | |
1425 | edge e; | |
1426 | edge_iterator ei; | |
1427 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1428 | { | |
1429 | /* Find which components we want pro/epilogues for here. */ | |
1430 | bitmap_and_compl (epi, SW (e->src)->has_components, | |
1431 | SW (e->dest)->has_components); | |
1432 | bitmap_and_compl (pro, SW (e->dest)->has_components, | |
1433 | SW (e->src)->has_components); | |
1434 | ||
1435 | /* Ask the target what it thinks about things. */ | |
1436 | if (!bitmap_empty_p (epi)) | |
1437 | targetm.shrink_wrap.disqualify_components (components, e, epi, | |
1438 | false); | |
1439 | if (!bitmap_empty_p (pro)) | |
1440 | targetm.shrink_wrap.disqualify_components (components, e, pro, | |
1441 | true); | |
1442 | ||
1443 | /* If this edge doesn't need splitting, we're fine. */ | |
1444 | if (single_pred_p (e->dest) | |
1445 | && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
1446 | continue; | |
1447 | ||
1448 | /* If the edge can be split, that is fine too. */ | |
1449 | if ((e->flags & EDGE_ABNORMAL) == 0) | |
1450 | continue; | |
1451 | ||
1452 | /* We also can handle sibcalls. */ | |
1453 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
1454 | { | |
1455 | gcc_assert (e->flags & EDGE_SIBCALL); | |
1456 | continue; | |
1457 | } | |
1458 | ||
1459 | /* Remove from consideration those components we would need | |
1460 | pro/epilogues for on edges where we cannot insert them. */ | |
1461 | bitmap_and_compl (components, components, epi); | |
1462 | bitmap_and_compl (components, components, pro); | |
1463 | ||
1464 | if (dump_file && !bitmap_subset_p (epi, components)) | |
1465 | { | |
1466 | fprintf (dump_file, " BAD epi %d->%d", e->src->index, | |
1467 | e->dest->index); | |
1468 | if (e->flags & EDGE_EH) | |
1469 | fprintf (dump_file, " for EH"); | |
1470 | dump_components ("epi", epi); | |
1471 | fprintf (dump_file, "\n"); | |
1472 | } | |
1473 | ||
1474 | if (dump_file && !bitmap_subset_p (pro, components)) | |
1475 | { | |
1476 | fprintf (dump_file, " BAD pro %d->%d", e->src->index, | |
1477 | e->dest->index); | |
1478 | if (e->flags & EDGE_EH) | |
1479 | fprintf (dump_file, " for EH"); | |
1480 | dump_components ("pro", pro); | |
1481 | fprintf (dump_file, "\n"); | |
1482 | } | |
1483 | } | |
1484 | } | |
c997869f SB |
1485 | } |
1486 | ||
1487 | /* Place code for prologues and epilogues for COMPONENTS where we can put | |
1488 | that code at the start of basic blocks. */ | |
1489 | static void | |
1490 | emit_common_heads_for_components (sbitmap components) | |
1491 | { | |
8f48c622 TS |
1492 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1493 | auto_sbitmap epi (SBITMAP_SIZE (components)); | |
1494 | auto_sbitmap tmp (SBITMAP_SIZE (components)); | |
c997869f SB |
1495 | |
1496 | basic_block bb; | |
08dd2b68 SB |
1497 | FOR_ALL_BB_FN (bb, cfun) |
1498 | bitmap_clear (SW (bb)->head_components); | |
1499 | ||
c997869f SB |
1500 | FOR_EACH_BB_FN (bb, cfun) |
1501 | { | |
1502 | /* Find which prologue resp. epilogue components are needed for all | |
1503 | predecessor edges to this block. */ | |
1504 | ||
1505 | /* First, select all possible components. */ | |
1506 | bitmap_copy (epi, components); | |
1507 | bitmap_copy (pro, components); | |
1508 | ||
1509 | edge e; | |
1510 | edge_iterator ei; | |
1511 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1512 | { | |
1513 | if (e->flags & EDGE_ABNORMAL) | |
1514 | { | |
1515 | bitmap_clear (epi); | |
1516 | bitmap_clear (pro); | |
1517 | break; | |
1518 | } | |
1519 | ||
1520 | /* Deselect those epilogue components that should not be inserted | |
1521 | for this edge. */ | |
1522 | bitmap_and_compl (tmp, SW (e->src)->has_components, | |
1523 | SW (e->dest)->has_components); | |
1524 | bitmap_and (epi, epi, tmp); | |
1525 | ||
1526 | /* Similar, for the prologue. */ | |
1527 | bitmap_and_compl (tmp, SW (e->dest)->has_components, | |
1528 | SW (e->src)->has_components); | |
1529 | bitmap_and (pro, pro, tmp); | |
1530 | } | |
1531 | ||
1532 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1533 | fprintf (dump_file, " bb %d", bb->index); | |
1534 | ||
1535 | if (dump_file && !bitmap_empty_p (epi)) | |
1536 | dump_components ("epi", epi); | |
1537 | if (dump_file && !bitmap_empty_p (pro)) | |
1538 | dump_components ("pro", pro); | |
1539 | ||
1540 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1541 | fprintf (dump_file, "\n"); | |
1542 | ||
1543 | /* Place code after the BB note. */ | |
1544 | if (!bitmap_empty_p (pro)) | |
1545 | { | |
1546 | start_sequence (); | |
1547 | targetm.shrink_wrap.emit_prologue_components (pro); | |
1548 | rtx_insn *seq = get_insns (); | |
1549 | end_sequence (); | |
64f6e1e1 | 1550 | record_prologue_seq (seq); |
c997869f SB |
1551 | |
1552 | emit_insn_after (seq, bb_note (bb)); | |
1553 | ||
1554 | bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro); | |
1555 | } | |
1556 | ||
1557 | if (!bitmap_empty_p (epi)) | |
1558 | { | |
1559 | start_sequence (); | |
1560 | targetm.shrink_wrap.emit_epilogue_components (epi); | |
1561 | rtx_insn *seq = get_insns (); | |
1562 | end_sequence (); | |
64f6e1e1 | 1563 | record_epilogue_seq (seq); |
c997869f SB |
1564 | |
1565 | emit_insn_after (seq, bb_note (bb)); | |
1566 | ||
1567 | bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi); | |
1568 | } | |
1569 | } | |
c997869f SB |
1570 | } |
1571 | ||
1572 | /* Place code for prologues and epilogues for COMPONENTS where we can put | |
1573 | that code at the end of basic blocks. */ | |
1574 | static void | |
1575 | emit_common_tails_for_components (sbitmap components) | |
1576 | { | |
8f48c622 TS |
1577 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1578 | auto_sbitmap epi (SBITMAP_SIZE (components)); | |
1579 | auto_sbitmap tmp (SBITMAP_SIZE (components)); | |
c997869f SB |
1580 | |
1581 | basic_block bb; | |
08dd2b68 SB |
1582 | FOR_ALL_BB_FN (bb, cfun) |
1583 | bitmap_clear (SW (bb)->tail_components); | |
1584 | ||
c997869f SB |
1585 | FOR_EACH_BB_FN (bb, cfun) |
1586 | { | |
1587 | /* Find which prologue resp. epilogue components are needed for all | |
1588 | successor edges from this block. */ | |
1589 | if (EDGE_COUNT (bb->succs) == 0) | |
1590 | continue; | |
1591 | ||
1592 | /* First, select all possible components. */ | |
1593 | bitmap_copy (epi, components); | |
1594 | bitmap_copy (pro, components); | |
1595 | ||
1596 | edge e; | |
1597 | edge_iterator ei; | |
1598 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1599 | { | |
1600 | if (e->flags & EDGE_ABNORMAL) | |
1601 | { | |
1602 | bitmap_clear (epi); | |
1603 | bitmap_clear (pro); | |
1604 | break; | |
1605 | } | |
1606 | ||
1607 | /* Deselect those epilogue components that should not be inserted | |
1608 | for this edge, and also those that are already put at the head | |
1609 | of the successor block. */ | |
1610 | bitmap_and_compl (tmp, SW (e->src)->has_components, | |
1611 | SW (e->dest)->has_components); | |
1612 | bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components); | |
1613 | bitmap_and (epi, epi, tmp); | |
1614 | ||
1615 | /* Similarly, for the prologue. */ | |
1616 | bitmap_and_compl (tmp, SW (e->dest)->has_components, | |
1617 | SW (e->src)->has_components); | |
1618 | bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components); | |
1619 | bitmap_and (pro, pro, tmp); | |
1620 | } | |
1621 | ||
1622 | /* If the last insn of this block is a control flow insn we cannot | |
1623 | put anything after it. We can put our code before it instead, | |
1624 | but only if that jump insn is a simple jump. */ | |
1625 | rtx_insn *last_insn = BB_END (bb); | |
1626 | if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn)) | |
1627 | { | |
1628 | bitmap_clear (epi); | |
1629 | bitmap_clear (pro); | |
1630 | } | |
1631 | ||
1632 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1633 | fprintf (dump_file, " bb %d", bb->index); | |
1634 | ||
1635 | if (dump_file && !bitmap_empty_p (epi)) | |
1636 | dump_components ("epi", epi); | |
1637 | if (dump_file && !bitmap_empty_p (pro)) | |
1638 | dump_components ("pro", pro); | |
1639 | ||
1640 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) | |
1641 | fprintf (dump_file, "\n"); | |
1642 | ||
1643 | /* Put the code at the end of the BB, but before any final jump. */ | |
1644 | if (!bitmap_empty_p (epi)) | |
1645 | { | |
1646 | start_sequence (); | |
1647 | targetm.shrink_wrap.emit_epilogue_components (epi); | |
1648 | rtx_insn *seq = get_insns (); | |
1649 | end_sequence (); | |
64f6e1e1 | 1650 | record_epilogue_seq (seq); |
c997869f SB |
1651 | |
1652 | if (control_flow_insn_p (last_insn)) | |
1653 | emit_insn_before (seq, last_insn); | |
1654 | else | |
1655 | emit_insn_after (seq, last_insn); | |
1656 | ||
1657 | bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi); | |
1658 | } | |
1659 | ||
1660 | if (!bitmap_empty_p (pro)) | |
1661 | { | |
1662 | start_sequence (); | |
1663 | targetm.shrink_wrap.emit_prologue_components (pro); | |
1664 | rtx_insn *seq = get_insns (); | |
1665 | end_sequence (); | |
64f6e1e1 | 1666 | record_prologue_seq (seq); |
c997869f SB |
1667 | |
1668 | if (control_flow_insn_p (last_insn)) | |
1669 | emit_insn_before (seq, last_insn); | |
1670 | else | |
1671 | emit_insn_after (seq, last_insn); | |
1672 | ||
1673 | bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro); | |
1674 | } | |
1675 | } | |
c997869f SB |
1676 | } |
1677 | ||
1678 | /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already | |
1679 | placed them inside blocks directly. */ | |
1680 | static void | |
1681 | insert_prologue_epilogue_for_components (sbitmap components) | |
1682 | { | |
8f48c622 TS |
1683 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1684 | auto_sbitmap epi (SBITMAP_SIZE (components)); | |
c997869f SB |
1685 | |
1686 | basic_block bb; | |
1687 | FOR_EACH_BB_FN (bb, cfun) | |
1688 | { | |
1689 | if (!bb->aux) | |
1690 | continue; | |
1691 | ||
1692 | edge e; | |
1693 | edge_iterator ei; | |
1694 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1695 | { | |
1696 | /* Find which pro/epilogue components are needed on this edge. */ | |
1697 | bitmap_and_compl (epi, SW (e->src)->has_components, | |
1698 | SW (e->dest)->has_components); | |
1699 | bitmap_and_compl (pro, SW (e->dest)->has_components, | |
1700 | SW (e->src)->has_components); | |
1701 | bitmap_and (epi, epi, components); | |
1702 | bitmap_and (pro, pro, components); | |
1703 | ||
1704 | /* Deselect those we already have put at the head or tail of the | |
1705 | edge's dest resp. src. */ | |
1706 | bitmap_and_compl (epi, epi, SW (e->dest)->head_components); | |
1707 | bitmap_and_compl (pro, pro, SW (e->dest)->head_components); | |
1708 | bitmap_and_compl (epi, epi, SW (e->src)->tail_components); | |
1709 | bitmap_and_compl (pro, pro, SW (e->src)->tail_components); | |
1710 | ||
1711 | if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro)) | |
1712 | { | |
1713 | if (dump_file) | |
1714 | { | |
1715 | fprintf (dump_file, " %d->%d", e->src->index, | |
1716 | e->dest->index); | |
1717 | dump_components ("epi", epi); | |
1718 | dump_components ("pro", pro); | |
08dd2b68 SB |
1719 | if (e->flags & EDGE_SIBCALL) |
1720 | fprintf (dump_file, " (SIBCALL)"); | |
1721 | else if (e->flags & EDGE_ABNORMAL) | |
1722 | fprintf (dump_file, " (ABNORMAL)"); | |
c997869f SB |
1723 | fprintf (dump_file, "\n"); |
1724 | } | |
1725 | ||
1726 | /* Put the epilogue components in place. */ | |
1727 | start_sequence (); | |
1728 | targetm.shrink_wrap.emit_epilogue_components (epi); | |
1729 | rtx_insn *seq = get_insns (); | |
1730 | end_sequence (); | |
64f6e1e1 | 1731 | record_epilogue_seq (seq); |
c997869f SB |
1732 | |
1733 | if (e->flags & EDGE_SIBCALL) | |
1734 | { | |
1735 | gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)); | |
1736 | ||
1737 | rtx_insn *insn = BB_END (e->src); | |
1738 | gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn)); | |
1739 | emit_insn_before (seq, insn); | |
1740 | } | |
1741 | else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
1742 | { | |
1743 | gcc_assert (e->flags & EDGE_FALLTHRU); | |
1744 | basic_block new_bb = split_edge (e); | |
1745 | emit_insn_after (seq, BB_END (new_bb)); | |
1746 | } | |
1747 | else | |
1748 | insert_insn_on_edge (seq, e); | |
1749 | ||
1750 | /* Put the prologue components in place. */ | |
1751 | start_sequence (); | |
1752 | targetm.shrink_wrap.emit_prologue_components (pro); | |
1753 | seq = get_insns (); | |
1754 | end_sequence (); | |
64f6e1e1 | 1755 | record_prologue_seq (seq); |
c997869f SB |
1756 | |
1757 | insert_insn_on_edge (seq, e); | |
1758 | } | |
1759 | } | |
1760 | } | |
1761 | ||
c997869f SB |
1762 | commit_edge_insertions (); |
1763 | } | |
1764 | ||
1765 | /* The main entry point to this subpass. FIRST_BB is where the prologue | |
1766 | would be normally put. */ | |
1767 | void | |
1768 | try_shrink_wrapping_separate (basic_block first_bb) | |
1769 | { | |
1770 | if (HAVE_cc0) | |
1771 | return; | |
1772 | ||
1773 | if (!(SHRINK_WRAPPING_ENABLED | |
1774 | && flag_shrink_wrap_separate | |
1775 | && optimize_function_for_speed_p (cfun) | |
1776 | && targetm.shrink_wrap.get_separate_components)) | |
1777 | return; | |
1778 | ||
1779 | /* We don't handle "strange" functions. */ | |
1780 | if (cfun->calls_alloca | |
1781 | || cfun->calls_setjmp | |
1782 | || cfun->can_throw_non_call_exceptions | |
1783 | || crtl->calls_eh_return | |
1784 | || crtl->has_nonlocal_goto | |
1785 | || crtl->saves_all_registers) | |
1786 | return; | |
1787 | ||
1788 | /* Ask the target what components there are. If it returns NULL, don't | |
1789 | do anything. */ | |
1790 | sbitmap components = targetm.shrink_wrap.get_separate_components (); | |
1791 | if (!components) | |
1792 | return; | |
1793 | ||
7157aa85 SB |
1794 | /* We need LIVE info, not defining anything in the entry block and not |
1795 | using anything in the exit block. A block then needs a component if | |
1796 | the register for that component is in the IN or GEN or KILL set for | |
1797 | that block. */ | |
1798 | df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT; | |
1c7926f6 | 1799 | df_update_entry_exit_and_calls (); |
c997869f SB |
1800 | df_live_add_problem (); |
1801 | df_live_set_all_dirty (); | |
1802 | df_analyze (); | |
1803 | ||
1804 | calculate_dominance_info (CDI_DOMINATORS); | |
1805 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
1806 | ||
1807 | init_separate_shrink_wrap (components); | |
1808 | ||
1809 | sbitmap_iterator sbi; | |
1810 | unsigned int j; | |
1811 | EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi) | |
1812 | place_prologue_for_one_component (j, first_bb); | |
1813 | ||
826f35d8 SB |
1814 | /* Try to minimize the number of saves and restores. Do this as long as |
1815 | it changes anything. This does not iterate more than a few times. */ | |
1816 | int spread_times = 0; | |
1817 | while (spread_components (components)) | |
1818 | { | |
1819 | spread_times++; | |
1820 | ||
1821 | if (dump_file) | |
1822 | fprintf (dump_file, "Now spread %d times.\n", spread_times); | |
1823 | } | |
c997869f SB |
1824 | |
1825 | disqualify_problematic_components (components); | |
1826 | ||
1827 | /* Don't separately shrink-wrap anything where the "main" prologue will | |
1828 | go; the target code can often optimize things if it is presented with | |
1829 | all components together (say, if it generates store-multiple insns). */ | |
1830 | bitmap_and_compl (components, components, SW (first_bb)->has_components); | |
1831 | ||
1832 | if (bitmap_empty_p (components)) | |
1833 | { | |
1834 | if (dump_file) | |
1835 | fprintf (dump_file, "Not wrapping anything separately.\n"); | |
1836 | } | |
1837 | else | |
1838 | { | |
1839 | if (dump_file) | |
1840 | { | |
1841 | fprintf (dump_file, "The components we wrap separately are"); | |
1842 | dump_components ("sep", components); | |
1843 | fprintf (dump_file, "\n"); | |
1844 | ||
1845 | fprintf (dump_file, "... Inserting common heads...\n"); | |
1846 | } | |
1847 | ||
1848 | emit_common_heads_for_components (components); | |
1849 | ||
1850 | if (dump_file) | |
1851 | fprintf (dump_file, "... Inserting common tails...\n"); | |
1852 | ||
1853 | emit_common_tails_for_components (components); | |
1854 | ||
1855 | if (dump_file) | |
1856 | fprintf (dump_file, "... Inserting the more difficult ones...\n"); | |
1857 | ||
1858 | insert_prologue_epilogue_for_components (components); | |
1859 | ||
1860 | if (dump_file) | |
1861 | fprintf (dump_file, "... Done.\n"); | |
1862 | ||
1863 | targetm.shrink_wrap.set_handled_components (components); | |
1864 | ||
1865 | crtl->shrink_wrapped_separate = true; | |
1866 | } | |
1867 | ||
1868 | fini_separate_shrink_wrap (); | |
1869 | ||
1870 | sbitmap_free (components); | |
1871 | free_dominance_info (CDI_DOMINATORS); | |
1872 | free_dominance_info (CDI_POST_DOMINATORS); | |
1873 | ||
7157aa85 SB |
1874 | /* All done. */ |
1875 | df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT; | |
1c7926f6 | 1876 | df_update_entry_exit_and_calls (); |
7157aa85 SB |
1877 | df_live_set_all_dirty (); |
1878 | df_analyze (); | |
c997869f | 1879 | } |