]>
Commit | Line | Data |
---|---|---|
617b465c | 1 | /* Loop unswitching for GNU compiler. |
25d8d27d | 2 | Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. |
617b465c ZD |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 2, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING. If not, write to the Free | |
18 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
19 | 02111-1307, USA. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "rtl.h" | |
26 | #include "hard-reg-set.h" | |
7932a3db | 27 | #include "obstack.h" |
617b465c ZD |
28 | #include "basic-block.h" |
29 | #include "cfgloop.h" | |
30 | #include "cfglayout.h" | |
31 | #include "params.h" | |
32 | #include "output.h" | |
33 | #include "expr.h" | |
34 | ||
35 | /* This pass moves constant conditions out of loops, duplicating the loop | |
e0bb17a8 | 36 | in progress, i.e. this code: |
617b465c ZD |
37 | |
38 | while (loop_cond) | |
39 | { | |
40 | A; | |
41 | if (cond) | |
42 | branch1; | |
43 | else | |
44 | branch2; | |
45 | B; | |
46 | if (cond) | |
47 | branch3; | |
48 | C; | |
49 | } | |
50 | where nothing inside the loop alters cond is transformed | |
51 | into | |
52 | ||
53 | if (cond) | |
54 | { | |
55 | while (loop_cond) | |
56 | { | |
57 | A; | |
58 | branch1; | |
59 | B; | |
60 | branch3; | |
61 | C; | |
62 | } | |
63 | } | |
64 | else | |
65 | { | |
66 | while (loop_cond) | |
67 | { | |
68 | A; | |
69 | branch2; | |
70 | B; | |
71 | C; | |
72 | } | |
73 | } | |
74 | ||
75 | Duplicating the loop might lead to code growth exponential in number of | |
76 | branches inside loop, so we limit the number of unswitchings performed | |
77 | in a single loop to PARAM_MAX_UNSWITCH_LEVEL. We only perform the | |
78 | transformation on innermost loops, as the benefit of doing it on loops | |
79 | containing subloops would not be very large compared to complications | |
80 | with handling this case. */ | |
81 | ||
0c20a65f | 82 | static struct loop *unswitch_loop (struct loops *, struct loop *, |
50654f6c | 83 | basic_block, rtx, rtx); |
0c20a65f | 84 | static void unswitch_single_loop (struct loops *, struct loop *, rtx, int); |
50654f6c ZD |
85 | static rtx may_unswitch_on (basic_block, struct loop *, rtx *); |
86 | ||
87 | /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if | |
88 | true, with probability PROB. If CINSN is not NULL, it is the insn to copy | |
89 | in order to create a jump. */ | |
90 | ||
91 | rtx | |
92 | compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, | |
93 | rtx cinsn) | |
94 | { | |
95 | rtx seq, jump, cond; | |
96 | enum machine_mode mode; | |
97 | ||
98 | mode = GET_MODE (op0); | |
99 | if (mode == VOIDmode) | |
100 | mode = GET_MODE (op1); | |
101 | ||
102 | start_sequence (); | |
103 | if (GET_MODE_CLASS (mode) == MODE_CC) | |
104 | { | |
105 | /* A hack -- there seems to be no easy generic way how to make a | |
106 | conditional jump from a ccmode comparison. */ | |
b5e624c6 | 107 | gcc_assert (cinsn); |
50654f6c | 108 | cond = XEXP (SET_SRC (pc_set (cinsn)), 0); |
b5e624c6 NS |
109 | gcc_assert (GET_CODE (cond) == comp); |
110 | gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); | |
111 | gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); | |
50654f6c ZD |
112 | emit_jump_insn (copy_insn (PATTERN (cinsn))); |
113 | jump = get_last_insn (); | |
114 | JUMP_LABEL (jump) = JUMP_LABEL (cinsn); | |
115 | LABEL_NUSES (JUMP_LABEL (jump))++; | |
116 | redirect_jump (jump, label, 0); | |
117 | } | |
118 | else | |
119 | { | |
b5e624c6 | 120 | gcc_assert (!cinsn); |
50654f6c ZD |
121 | |
122 | op0 = force_operand (op0, NULL_RTX); | |
123 | op1 = force_operand (op1, NULL_RTX); | |
124 | do_compare_rtx_and_jump (op0, op1, comp, 0, | |
125 | mode, NULL_RTX, NULL_RTX, label); | |
126 | jump = get_last_insn (); | |
127 | JUMP_LABEL (jump) = label; | |
128 | LABEL_NUSES (label)++; | |
129 | } | |
130 | REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob), | |
131 | REG_NOTES (jump)); | |
132 | seq = get_insns (); | |
133 | end_sequence (); | |
134 | ||
135 | return seq; | |
136 | } | |
617b465c ZD |
137 | |
138 | /* Main entry point. Perform loop unswitching on all suitable LOOPS. */ | |
139 | void | |
0c20a65f | 140 | unswitch_loops (struct loops *loops) |
617b465c ZD |
141 | { |
142 | int i, num; | |
143 | struct loop *loop; | |
144 | ||
145 | /* Go through inner loops (only original ones). */ | |
146 | num = loops->num; | |
0c20a65f | 147 | |
617b465c ZD |
148 | for (i = 1; i < num; i++) |
149 | { | |
150 | /* Removed loop? */ | |
151 | loop = loops->parray[i]; | |
152 | if (!loop) | |
153 | continue; | |
154 | ||
155 | if (loop->inner) | |
156 | continue; | |
157 | ||
158 | unswitch_single_loop (loops, loop, NULL_RTX, 0); | |
159 | #ifdef ENABLE_CHECKING | |
d47cc544 | 160 | verify_dominators (CDI_DOMINATORS); |
617b465c ZD |
161 | verify_loop_structure (loops); |
162 | #endif | |
163 | } | |
50654f6c ZD |
164 | |
165 | iv_analysis_done (); | |
617b465c ZD |
166 | } |
167 | ||
168 | /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its | |
50654f6c ZD |
169 | basic blocks (for what it means see comments below). In case condition |
170 | compares loop invariant cc mode register, return the jump in CINSN. */ | |
171 | ||
172 | static rtx | |
173 | may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn) | |
617b465c | 174 | { |
0aea6467 | 175 | rtx test, at, insn, op[2], stest; |
50654f6c | 176 | struct rtx_iv iv; |
617b465c | 177 | unsigned i; |
50654f6c | 178 | enum machine_mode mode; |
617b465c ZD |
179 | |
180 | /* BB must end in a simple conditional jump. */ | |
628f6a4e | 181 | if (EDGE_COUNT (bb->succs) != 2) |
50654f6c | 182 | return NULL_RTX; |
a813c111 | 183 | if (!any_condjump_p (BB_END (bb))) |
50654f6c | 184 | return NULL_RTX; |
617b465c ZD |
185 | |
186 | /* With branches inside loop. */ | |
628f6a4e BE |
187 | if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest) |
188 | || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest)) | |
50654f6c | 189 | return NULL_RTX; |
617b465c ZD |
190 | |
191 | /* It must be executed just once each iteration (because otherwise we | |
192 | are unable to update dominator/irreducible loop information correctly). */ | |
d47cc544 | 193 | if (!just_once_each_iteration_p (loop, bb)) |
50654f6c | 194 | return NULL_RTX; |
617b465c | 195 | |
50654f6c | 196 | /* Condition must be invariant. */ |
45d09c02 | 197 | test = get_condition (BB_END (bb), &at, true, false); |
617b465c | 198 | if (!test) |
50654f6c ZD |
199 | return NULL_RTX; |
200 | ||
201 | for (i = 0; i < 2; i++) | |
202 | { | |
203 | op[i] = XEXP (test, i); | |
204 | ||
205 | if (CONSTANT_P (op[i])) | |
206 | continue; | |
207 | ||
208 | insn = iv_get_reaching_def (at, op[i]); | |
6d4e0ecc | 209 | if (!iv_analyze (insn, op[i], &iv)) |
50654f6c ZD |
210 | return NULL_RTX; |
211 | if (iv.step != const0_rtx | |
212 | || iv.first_special) | |
213 | return NULL_RTX; | |
214 | ||
215 | op[i] = get_iv_value (&iv, const0_rtx); | |
216 | } | |
217 | ||
218 | mode = GET_MODE (op[0]); | |
219 | if (mode == VOIDmode) | |
220 | mode = GET_MODE (op[1]); | |
221 | if (GET_MODE_CLASS (mode) == MODE_CC) | |
222 | { | |
223 | if (at != BB_END (bb)) | |
224 | return NULL_RTX; | |
617b465c | 225 | |
50654f6c ZD |
226 | *cinsn = BB_END (bb); |
227 | if (!rtx_equal_p (op[0], XEXP (test, 0)) | |
228 | || !rtx_equal_p (op[1], XEXP (test, 1))) | |
229 | return NULL_RTX; | |
617b465c | 230 | |
50654f6c ZD |
231 | return test; |
232 | } | |
233 | ||
0aea6467 ZD |
234 | stest = simplify_gen_relational (GET_CODE (test), SImode, |
235 | mode, op[0], op[1]); | |
236 | if (stest == const0_rtx | |
237 | || stest == const_true_rtx) | |
238 | return stest; | |
239 | ||
50654f6c ZD |
240 | return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode, |
241 | op[0], op[1])); | |
617b465c ZD |
242 | } |
243 | ||
244 | /* Reverses CONDition; returns NULL if we cannot. */ | |
50654f6c | 245 | rtx |
0c20a65f | 246 | reversed_condition (rtx cond) |
617b465c ZD |
247 | { |
248 | enum rtx_code reversed; | |
249 | reversed = reversed_comparison_code (cond, NULL); | |
250 | if (reversed == UNKNOWN) | |
251 | return NULL_RTX; | |
252 | else | |
253 | return gen_rtx_fmt_ee (reversed, | |
254 | GET_MODE (cond), XEXP (cond, 0), | |
255 | XEXP (cond, 1)); | |
256 | } | |
257 | ||
258 | /* Unswitch single LOOP. COND_CHECKED holds list of conditions we already | |
259 | unswitched on and are therefore known to be true in this LOOP. NUM is | |
260 | number of unswitchings done; do not allow it to grow too much, it is too | |
261 | easy to create example on that the code would grow exponentially. */ | |
262 | static void | |
0c20a65f AJ |
263 | unswitch_single_loop (struct loops *loops, struct loop *loop, |
264 | rtx cond_checked, int num) | |
617b465c | 265 | { |
50654f6c | 266 | basic_block *bbs; |
617b465c ZD |
267 | struct loop *nloop; |
268 | unsigned i; | |
0aea6467 | 269 | rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn = NULL_RTX; |
617b465c ZD |
270 | int repeat; |
271 | edge e; | |
272 | ||
273 | /* Do not unswitch too much. */ | |
274 | if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL)) | |
275 | { | |
c263766c RH |
276 | if (dump_file) |
277 | fprintf (dump_file, ";; Not unswitching anymore, hit max level\n"); | |
617b465c ZD |
278 | return; |
279 | } | |
280 | ||
281 | /* Only unswitch innermost loops. */ | |
282 | if (loop->inner) | |
283 | { | |
c263766c RH |
284 | if (dump_file) |
285 | fprintf (dump_file, ";; Not unswitching, not innermost loop\n"); | |
617b465c ZD |
286 | return; |
287 | } | |
0c20a65f | 288 | |
617b465c ZD |
289 | /* We must be able to duplicate loop body. */ |
290 | if (!can_duplicate_loop_p (loop)) | |
291 | { | |
c263766c RH |
292 | if (dump_file) |
293 | fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n"); | |
617b465c ZD |
294 | return; |
295 | } | |
296 | ||
297 | /* The loop should not be too large, to limit code growth. */ | |
298 | if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS)) | |
299 | { | |
c263766c RH |
300 | if (dump_file) |
301 | fprintf (dump_file, ";; Not unswitching, loop too big\n"); | |
617b465c ZD |
302 | return; |
303 | } | |
0c20a65f | 304 | |
617b465c ZD |
305 | /* Do not unswitch in cold areas. */ |
306 | if (!maybe_hot_bb_p (loop->header)) | |
307 | { | |
c263766c RH |
308 | if (dump_file) |
309 | fprintf (dump_file, ";; Not unswitching, not hot area\n"); | |
617b465c ZD |
310 | return; |
311 | } | |
0c20a65f | 312 | |
617b465c ZD |
313 | /* Nor if the loop usually does not roll. */ |
314 | if (expected_loop_iterations (loop) < 1) | |
315 | { | |
c263766c RH |
316 | if (dump_file) |
317 | fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n"); | |
617b465c ZD |
318 | return; |
319 | } | |
320 | ||
321 | do | |
322 | { | |
323 | repeat = 0; | |
0c20a65f | 324 | |
617b465c ZD |
325 | /* Find a bb to unswitch on. */ |
326 | bbs = get_loop_body (loop); | |
50654f6c | 327 | iv_analysis_loop_init (loop); |
617b465c | 328 | for (i = 0; i < loop->num_nodes; i++) |
50654f6c | 329 | if ((cond = may_unswitch_on (bbs[i], loop, &cinsn))) |
617b465c ZD |
330 | break; |
331 | ||
332 | if (i == loop->num_nodes) | |
333 | { | |
334 | free (bbs); | |
335 | return; | |
336 | } | |
337 | ||
0aea6467 ZD |
338 | if (cond != const0_rtx |
339 | && cond != const_true_rtx) | |
340 | { | |
341 | rcond = reversed_condition (cond); | |
342 | if (rcond) | |
343 | rcond = canon_condition (rcond); | |
0c20a65f | 344 | |
0aea6467 ZD |
345 | /* Check whether the result can be predicted. */ |
346 | for (acond = cond_checked; acond; acond = XEXP (acond, 1)) | |
347 | simplify_using_condition (XEXP (acond, 0), &cond, NULL); | |
348 | } | |
617b465c | 349 | |
50654f6c | 350 | if (cond == const_true_rtx) |
617b465c ZD |
351 | { |
352 | /* Remove false path. */ | |
50654f6c | 353 | e = FALLTHRU_EDGE (bbs[i]); |
617b465c ZD |
354 | remove_path (loops, e); |
355 | free (bbs); | |
356 | repeat = 1; | |
357 | } | |
50654f6c | 358 | else if (cond == const0_rtx) |
617b465c ZD |
359 | { |
360 | /* Remove true path. */ | |
50654f6c | 361 | e = BRANCH_EDGE (bbs[i]); |
617b465c ZD |
362 | remove_path (loops, e); |
363 | free (bbs); | |
364 | repeat = 1; | |
365 | } | |
366 | } while (repeat); | |
0c20a65f | 367 | |
617b465c ZD |
368 | /* We found the condition we can unswitch on. */ |
369 | conds = alloc_EXPR_LIST (0, cond, cond_checked); | |
370 | if (rcond) | |
371 | rconds = alloc_EXPR_LIST (0, rcond, cond_checked); | |
372 | else | |
373 | rconds = cond_checked; | |
374 | ||
c263766c RH |
375 | if (dump_file) |
376 | fprintf (dump_file, ";; Unswitching loop\n"); | |
617b465c ZD |
377 | |
378 | /* Unswitch the loop on this condition. */ | |
50654f6c | 379 | nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn); |
b5e624c6 | 380 | gcc_assert (nloop); |
617b465c ZD |
381 | |
382 | /* Invoke itself on modified loops. */ | |
50654f6c ZD |
383 | unswitch_single_loop (loops, nloop, rconds, num + 1); |
384 | unswitch_single_loop (loops, loop, conds, num + 1); | |
617b465c ZD |
385 | |
386 | free_EXPR_LIST_node (conds); | |
387 | if (rcond) | |
388 | free_EXPR_LIST_node (rconds); | |
77e23325 AP |
389 | |
390 | free (bbs); | |
617b465c ZD |
391 | } |
392 | ||
393 | /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support | |
394 | unswitching of innermost loops. UNSWITCH_ON must be executed in every | |
50654f6c ZD |
395 | iteration, i.e. it must dominate LOOP latch. COND is the condition |
396 | determining which loop is entered. Returns NULL if impossible, new loop | |
397 | otherwise. The new loop is entered if COND is true. If CINSN is not | |
398 | NULL, it is the insn in that COND is compared. */ | |
399 | ||
617b465c | 400 | static struct loop * |
50654f6c ZD |
401 | unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on, |
402 | rtx cond, rtx cinsn) | |
617b465c | 403 | { |
50654f6c | 404 | edge entry, latch_edge, true_edge, false_edge, e; |
0d48fcd1 | 405 | basic_block switch_bb, unswitch_on_alt; |
617b465c ZD |
406 | struct loop *nloop; |
407 | sbitmap zero_bitmap; | |
50654f6c ZD |
408 | int irred_flag, prob; |
409 | rtx seq; | |
617b465c ZD |
410 | |
411 | /* Some sanity checking. */ | |
b5e624c6 NS |
412 | gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on)); |
413 | gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2); | |
414 | gcc_assert (just_once_each_iteration_p (loop, unswitch_on)); | |
415 | gcc_assert (!loop->inner); | |
416 | gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest)); | |
417 | gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest)); | |
0c20a65f | 418 | |
617b465c | 419 | entry = loop_preheader_edge (loop); |
0c20a65f | 420 | |
617b465c | 421 | /* Make a copy. */ |
35b07080 ZD |
422 | irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; |
423 | entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; | |
617b465c ZD |
424 | zero_bitmap = sbitmap_alloc (2); |
425 | sbitmap_zero (zero_bitmap); | |
426 | if (!duplicate_loop_to_header_edge (loop, entry, loops, 1, | |
427 | zero_bitmap, NULL, NULL, NULL, 0)) | |
428 | return NULL; | |
429 | free (zero_bitmap); | |
35b07080 | 430 | entry->flags |= irred_flag; |
617b465c ZD |
431 | |
432 | /* Record the block with condition we unswitch on. */ | |
bc35512f | 433 | unswitch_on_alt = unswitch_on->rbi->copy; |
50654f6c ZD |
434 | true_edge = BRANCH_EDGE (unswitch_on_alt); |
435 | false_edge = FALLTHRU_EDGE (unswitch_on); | |
c5cbcccf | 436 | latch_edge = single_succ_edge (loop->latch->rbi->copy); |
50654f6c ZD |
437 | |
438 | /* Create a block with the condition. */ | |
439 | prob = true_edge->probability; | |
440 | switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); | |
441 | seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond), | |
442 | block_label (true_edge->dest), | |
443 | prob, cinsn); | |
444 | emit_insn_after (seq, BB_END (switch_bb)); | |
445 | e = make_edge (switch_bb, true_edge->dest, 0); | |
446 | e->probability = prob; | |
447 | e->count = latch_edge->count * prob / REG_BR_PROB_BASE; | |
448 | e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU); | |
449 | e->probability = false_edge->probability; | |
450 | e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE; | |
617b465c | 451 | |
35b07080 ZD |
452 | if (irred_flag) |
453 | { | |
454 | switch_bb->flags |= BB_IRREDUCIBLE_LOOP; | |
628f6a4e BE |
455 | EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP; |
456 | EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP; | |
35b07080 ZD |
457 | } |
458 | else | |
459 | { | |
460 | switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP; | |
628f6a4e BE |
461 | EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP; |
462 | EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP; | |
35b07080 | 463 | } |
617b465c ZD |
464 | |
465 | /* Loopify from the copy of LOOP body, constructing the new loop. */ | |
617b465c | 466 | nloop = loopify (loops, latch_edge, |
c5cbcccf | 467 | single_pred_edge (loop->header->rbi->copy), switch_bb, |
5132abc2 | 468 | BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true); |
617b465c | 469 | |
50654f6c ZD |
470 | /* Remove branches that are now unreachable in new loops. */ |
471 | remove_path (loops, true_edge); | |
472 | remove_path (loops, false_edge); | |
617b465c ZD |
473 | |
474 | /* One of created loops do not have to be subloop of the outer loop now, | |
4d6922ee | 475 | so fix its placement in loop data structure. */ |
617b465c ZD |
476 | fix_loop_placement (loop); |
477 | fix_loop_placement (nloop); | |
478 | ||
bf86d71e | 479 | /* Preserve the simple loop preheaders. */ |
d47cc544 SB |
480 | loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); |
481 | loop_split_edge_with (loop_preheader_edge (nloop), NULL_RTX); | |
bf86d71e | 482 | |
617b465c ZD |
483 | return nloop; |
484 | } |