]>
Commit | Line | Data |
---|---|---|
689ba89d ZD |
1 | /* Perform doloop optimizations |
2 | Copyright (C) 2004 Free Software Foundation, Inc. | |
3 | Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "rtl.h" | |
27 | #include "flags.h" | |
28 | #include "expr.h" | |
29 | #include "hard-reg-set.h" | |
30 | #include "basic-block.h" | |
31 | #include "toplev.h" | |
32 | #include "tm_p.h" | |
33 | #include "cfgloop.h" | |
34 | #include "output.h" | |
35 | #include "params.h" | |
36 | ||
37 | /* This module is used to modify loops with a determinable number of | |
38 | iterations to use special low-overhead looping instructions. | |
39 | ||
40 | It first validates whether the loop is well behaved and has a | |
41 | determinable number of iterations (either at compile or run-time). | |
42 | It then modifies the loop to use a low-overhead looping pattern as | |
43 | follows: | |
44 | ||
45 | 1. A pseudo register is allocated as the loop iteration counter. | |
46 | ||
47 | 2. The number of loop iterations is calculated and is stored | |
48 | in the loop counter. | |
49 | ||
50 | 3. At the end of the loop, the jump insn is replaced by the | |
51 | doloop_end pattern. The compare must remain because it might be | |
52 | used elsewhere. If the loop-variable or condition register are | |
53 | used elsewhere, they will be eliminated by flow. | |
54 | ||
55 | 4. An optional doloop_begin pattern is inserted at the top of the | |
56 | loop. | |
57 | ||
58 | TODO The optimization should only performed when either the biv used for exit | |
59 | condition is unused at all except for the exit test, or if we do not have to | |
60 | change its value, since otherwise we have to add a new induction variable, | |
61 | which usually will not pay up (unless the cost of the doloop pattern is | |
62 | somehow extremely lower than the cost of compare & jump, or unless the bct | |
63 | register cannot be used for anything else but doloop -- ??? detect these | |
64 | cases). */ | |
65 | ||
66 | #ifdef HAVE_doloop_end | |
67 | ||
68 | /* Return the loop termination condition for PATTERN or zero | |
69 | if it is not a decrement and branch jump insn. */ | |
70 | ||
71 | static rtx | |
72 | doloop_condition_get (rtx pattern) | |
73 | { | |
74 | rtx cmp; | |
75 | rtx inc; | |
76 | rtx reg; | |
77 | rtx condition; | |
78 | ||
79 | /* The canonical doloop pattern we expect is: | |
80 | ||
81 | (parallel [(set (pc) (if_then_else (condition) | |
82 | (label_ref (label)) | |
83 | (pc))) | |
84 | (set (reg) (plus (reg) (const_int -1))) | |
85 | (additional clobbers and uses)]) | |
86 | ||
87 | Some machines (IA-64) make the decrement conditional on | |
88 | the condition as well, so we don't bother verifying the | |
89 | actual decrement. In summary, the branch must be the | |
90 | first entry of the parallel (also required by jump.c), | |
91 | and the second entry of the parallel must be a set of | |
92 | the loop counter register. */ | |
93 | ||
94 | if (GET_CODE (pattern) != PARALLEL) | |
95 | return 0; | |
96 | ||
97 | cmp = XVECEXP (pattern, 0, 0); | |
98 | inc = XVECEXP (pattern, 0, 1); | |
99 | ||
100 | /* Check for (set (reg) (something)). */ | |
101 | if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) | |
102 | return 0; | |
103 | ||
104 | /* Extract loop counter register. */ | |
105 | reg = SET_DEST (inc); | |
106 | ||
107 | /* Check for (set (pc) (if_then_else (condition) | |
108 | (label_ref (label)) | |
109 | (pc))). */ | |
110 | if (GET_CODE (cmp) != SET | |
111 | || SET_DEST (cmp) != pc_rtx | |
112 | || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
113 | || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
114 | || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
115 | return 0; | |
116 | ||
117 | /* Extract loop termination condition. */ | |
118 | condition = XEXP (SET_SRC (cmp), 0); | |
119 | ||
120 | if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) | |
121 | || GET_CODE (XEXP (condition, 1)) != CONST_INT) | |
122 | return 0; | |
123 | ||
124 | if (XEXP (condition, 0) == reg) | |
125 | return condition; | |
126 | ||
127 | if (GET_CODE (XEXP (condition, 0)) == PLUS | |
128 | && XEXP (XEXP (condition, 0), 0) == reg) | |
129 | return condition; | |
130 | ||
131 | /* ??? If a machine uses a funny comparison, we could return a | |
1ae58c30 | 132 | canonicalized form here. */ |
689ba89d ZD |
133 | |
134 | return 0; | |
135 | } | |
136 | ||
137 | /* Return nonzero if the loop specified by LOOP is suitable for | |
138 | the use of special low-overhead looping instructions. DESC | |
139 | describes the number of iterations of the loop. */ | |
140 | ||
141 | static bool | |
142 | doloop_valid_p (struct loop *loop, struct niter_desc *desc) | |
143 | { | |
144 | basic_block *body = get_loop_body (loop), bb; | |
145 | rtx insn; | |
146 | unsigned i; | |
7600f094 | 147 | bool result = true; |
689ba89d ZD |
148 | |
149 | /* Check for loops that may not terminate under special conditions. */ | |
150 | if (!desc->simple_p | |
151 | || desc->assumptions | |
152 | || desc->infinite) | |
153 | { | |
154 | /* There are some cases that would require a special attention. | |
155 | For example if the comparison is LEU and the comparison value | |
156 | is UINT_MAX then the loop will not terminate. Similarly, if the | |
157 | comparison code is GEU and the comparison value is 0, the | |
158 | loop will not terminate. | |
159 | ||
160 | If the absolute increment is not 1, the loop can be infinite | |
161 | even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
162 | ||
163 | ??? We could compute these conditions at run-time and have a | |
164 | additional jump around the loop to ensure an infinite loop. | |
165 | However, it is very unlikely that this is the intended | |
166 | behavior of the loop and checking for these rare boundary | |
167 | conditions would pessimize all other code. | |
168 | ||
169 | If the loop is executed only a few times an extra check to | |
170 | restart the loop could use up most of the benefits of using a | |
171 | count register loop. Note however, that normally, this | |
172 | restart branch would never execute, so it could be predicted | |
173 | well by the CPU. We should generate the pessimistic code by | |
174 | default, and have an option, e.g. -funsafe-loops that would | |
175 | enable count-register loops in this case. */ | |
176 | if (dump_file) | |
177 | fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
7600f094 AP |
178 | result = false; |
179 | goto cleanup; | |
689ba89d ZD |
180 | } |
181 | ||
182 | for (i = 0; i < loop->num_nodes; i++) | |
183 | { | |
184 | bb = body[i]; | |
185 | ||
186 | for (insn = BB_HEAD (bb); | |
187 | insn != NEXT_INSN (BB_END (bb)); | |
188 | insn = NEXT_INSN (insn)) | |
189 | { | |
190 | /* A called function may clobber any special registers required for | |
191 | low-overhead looping. */ | |
4b4bf941 | 192 | if (CALL_P (insn)) |
689ba89d ZD |
193 | { |
194 | if (dump_file) | |
195 | fprintf (dump_file, "Doloop: Function call in loop.\n"); | |
7600f094 AP |
196 | result = false; |
197 | goto cleanup; | |
689ba89d ZD |
198 | } |
199 | ||
200 | /* Some targets (eg, PPC) use the count register for branch on table | |
201 | instructions. ??? This should be a target specific check. */ | |
4b4bf941 | 202 | if (JUMP_P (insn) |
689ba89d ZD |
203 | && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC |
204 | || GET_CODE (PATTERN (insn)) == ADDR_VEC)) | |
205 | { | |
206 | if (dump_file) | |
207 | fprintf (dump_file, "Doloop: Computed branch in the loop.\n"); | |
7600f094 AP |
208 | result = false; |
209 | goto cleanup; | |
689ba89d ZD |
210 | } |
211 | } | |
212 | } | |
7600f094 AP |
213 | result = true; |
214 | ||
215 | cleanup: | |
689ba89d ZD |
216 | free (body); |
217 | ||
7600f094 | 218 | return result; |
689ba89d ZD |
219 | } |
220 | ||
221 | /* Adds test of COND jumping to DEST to the end of BB. */ | |
222 | ||
223 | static void | |
224 | add_test (rtx cond, basic_block bb, basic_block dest) | |
225 | { | |
226 | rtx seq, jump, label; | |
227 | enum machine_mode mode; | |
228 | rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); | |
229 | enum rtx_code code = GET_CODE (cond); | |
230 | ||
231 | mode = GET_MODE (XEXP (cond, 0)); | |
232 | if (mode == VOIDmode) | |
233 | mode = GET_MODE (XEXP (cond, 1)); | |
234 | ||
235 | start_sequence (); | |
236 | op0 = force_operand (op0, NULL_RTX); | |
237 | op1 = force_operand (op1, NULL_RTX); | |
238 | label = block_label (dest); | |
239 | do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); | |
240 | ||
241 | jump = get_last_insn (); | |
242 | JUMP_LABEL (jump) = label; | |
243 | ||
244 | /* The jump is supposed to handle an unlikely special case. */ | |
245 | REG_NOTES (jump) | |
246 | = gen_rtx_EXPR_LIST (REG_BR_PROB, | |
147d77b6 | 247 | const0_rtx, REG_NOTES (jump)); |
689ba89d ZD |
248 | |
249 | LABEL_NUSES (label)++; | |
250 | ||
251 | seq = get_insns (); | |
252 | end_sequence (); | |
253 | emit_insn_after (seq, BB_END (bb)); | |
254 | } | |
255 | ||
256 | /* Modify the loop to use the low-overhead looping insn where LOOP | |
257 | describes the loop, DESC describes the number of iterations of the | |
258 | loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
259 | end of the loop. CONDITION is the condition separated from the | |
260 | DOLOOP_SEQ. */ | |
261 | ||
262 | static void | |
263 | doloop_modify (struct loop *loop, struct niter_desc *desc, | |
264 | rtx doloop_seq, rtx condition) | |
265 | { | |
266 | rtx counter_reg; | |
267 | rtx count, tmp, noloop = NULL_RTX; | |
268 | rtx sequence; | |
269 | rtx jump_insn; | |
270 | rtx jump_label; | |
271 | int nonneg = 0, irr; | |
272 | bool increment_count; | |
273 | basic_block loop_end = desc->out_edge->src; | |
274 | ||
275 | jump_insn = BB_END (loop_end); | |
276 | ||
277 | if (dump_file) | |
278 | { | |
279 | fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
280 | if (desc->const_iter) | |
281 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
282 | else | |
283 | fputs ("runtime", dump_file); | |
284 | fputs (" iterations).\n", dump_file); | |
285 | } | |
286 | ||
287 | /* Discard original jump to continue loop. The original compare | |
288 | result may still be live, so it cannot be discarded explicitly. */ | |
289 | delete_insn (jump_insn); | |
290 | ||
291 | counter_reg = XEXP (condition, 0); | |
292 | if (GET_CODE (counter_reg) == PLUS) | |
293 | counter_reg = XEXP (counter_reg, 0); | |
294 | ||
295 | count = desc->niter_expr; | |
296 | increment_count = false; | |
297 | switch (GET_CODE (condition)) | |
298 | { | |
299 | case NE: | |
300 | /* Currently only NE tests against zero and one are supported. */ | |
301 | if (XEXP (condition, 1) == const1_rtx) | |
302 | { | |
303 | increment_count = true; | |
304 | noloop = const1_rtx; | |
305 | } | |
689ba89d | 306 | else |
1c43d3ca GB |
307 | { |
308 | gcc_assert (XEXP (condition, 1) == const0_rtx); | |
309 | noloop = const0_rtx; | |
310 | } | |
689ba89d ZD |
311 | break; |
312 | ||
313 | case GE: | |
314 | /* Currently only GE tests against zero are supported. */ | |
1c43d3ca | 315 | gcc_assert (XEXP (condition, 1) == const0_rtx); |
689ba89d ZD |
316 | |
317 | noloop = constm1_rtx; | |
318 | ||
319 | /* The iteration count does not need incrementing for a GE test. */ | |
320 | increment_count = false; | |
321 | ||
322 | /* Determine if the iteration counter will be non-negative. | |
323 | Note that the maximum value loaded is iterations_max - 1. */ | |
324 | if (desc->niter_max | |
325 | <= ((unsigned HOST_WIDEST_INT) 1 | |
326 | << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1))) | |
327 | nonneg = 1; | |
328 | break; | |
329 | ||
689ba89d | 330 | default: |
1c43d3ca GB |
331 | /* Abort if an invalid doloop pattern has been generated. */ |
332 | gcc_unreachable (); | |
689ba89d ZD |
333 | } |
334 | ||
335 | if (increment_count) | |
336 | count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx); | |
337 | ||
338 | /* Insert initialization of the count register into the loop header. */ | |
339 | start_sequence (); | |
340 | tmp = force_operand (count, counter_reg); | |
341 | convert_move (counter_reg, tmp, 1); | |
342 | sequence = get_insns (); | |
343 | end_sequence (); | |
344 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
345 | ||
346 | if (desc->noloop_assumptions) | |
347 | { | |
348 | rtx ass = desc->noloop_assumptions; | |
349 | basic_block preheader = loop_preheader_edge (loop)->src; | |
350 | basic_block set_zero | |
351 | = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); | |
352 | basic_block new_preheader | |
353 | = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); | |
354 | basic_block bb; | |
355 | edge te; | |
356 | gcov_type cnt; | |
357 | ||
358 | /* Expand the condition testing the assumptions and if it does not pass, | |
359 | reset the count register to 0. */ | |
360 | add_test (XEXP (ass, 0), preheader, set_zero); | |
361 | preheader->succ->flags &= ~EDGE_FALLTHRU; | |
362 | cnt = preheader->succ->count; | |
363 | preheader->succ->probability = 0; | |
364 | preheader->succ->count = 0; | |
365 | irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP; | |
366 | te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr); | |
367 | te->probability = REG_BR_PROB_BASE; | |
368 | te->count = cnt; | |
369 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); | |
370 | ||
371 | set_zero->count = 0; | |
372 | set_zero->frequency = 0; | |
373 | ||
374 | for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1)) | |
375 | { | |
376 | bb = loop_split_edge_with (te, NULL_RTX); | |
377 | te = bb->succ; | |
378 | add_test (XEXP (ass, 0), bb, set_zero); | |
379 | make_edge (bb, set_zero, irr); | |
380 | } | |
381 | ||
382 | start_sequence (); | |
383 | convert_move (counter_reg, noloop, 0); | |
384 | sequence = get_insns (); | |
385 | end_sequence (); | |
386 | emit_insn_after (sequence, BB_END (set_zero)); | |
387 | } | |
388 | ||
389 | /* Some targets (eg, C4x) need to initialize special looping | |
390 | registers. */ | |
391 | #ifdef HAVE_doloop_begin | |
392 | { | |
393 | rtx init; | |
394 | unsigned level = get_loop_level (loop) + 1; | |
395 | init = gen_doloop_begin (counter_reg, | |
396 | desc->const_iter ? desc->niter_expr : const0_rtx, | |
397 | desc->niter_max, | |
398 | GEN_INT (level)); | |
399 | if (init) | |
400 | { | |
401 | start_sequence (); | |
402 | emit_insn (init); | |
403 | sequence = get_insns (); | |
404 | end_sequence (); | |
405 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
406 | } | |
407 | } | |
408 | #endif | |
409 | ||
410 | /* Insert the new low-overhead looping insn. */ | |
411 | emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
412 | jump_insn = BB_END (loop_end); | |
413 | jump_label = block_label (desc->in_edge->dest); | |
414 | JUMP_LABEL (jump_insn) = jump_label; | |
415 | LABEL_NUSES (jump_label)++; | |
416 | ||
417 | /* Ensure the right fallthru edge is marked, for case we have reversed | |
418 | the condition. */ | |
419 | desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
420 | desc->out_edge->flags |= EDGE_FALLTHRU; | |
421 | ||
422 | /* Add a REG_NONNEG note if the actual or estimated maximum number | |
423 | of iterations is non-negative. */ | |
424 | if (nonneg) | |
425 | { | |
426 | REG_NOTES (jump_insn) | |
427 | = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); | |
428 | } | |
429 | } | |
430 | ||
431 | /* Process loop described by LOOP validating that the loop is suitable for | |
432 | conversion to use a low overhead looping instruction, replacing the jump | |
433 | insn where suitable. Returns true if the loop was successfully | |
434 | modified. */ | |
435 | ||
436 | static bool | |
437 | doloop_optimize (struct loop *loop) | |
438 | { | |
439 | enum machine_mode mode; | |
440 | rtx doloop_seq, doloop_pat, doloop_reg; | |
441 | rtx iterations; | |
442 | rtx iterations_max; | |
443 | rtx start_label; | |
444 | rtx condition; | |
445 | unsigned level, est_niter; | |
446 | struct niter_desc *desc; | |
447 | ||
448 | if (dump_file) | |
449 | fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
450 | ||
451 | iv_analysis_loop_init (loop); | |
452 | ||
453 | /* Find the simple exit of a LOOP. */ | |
454 | desc = get_simple_loop_desc (loop); | |
455 | ||
456 | /* Check that loop is a candidate for a low-overhead looping insn. */ | |
457 | if (!doloop_valid_p (loop, desc)) | |
458 | { | |
459 | if (dump_file) | |
460 | fprintf (dump_file, | |
461 | "Doloop: The loop is not suitable.\n"); | |
462 | return false; | |
463 | } | |
464 | mode = desc->mode; | |
465 | ||
466 | est_niter = 3; | |
467 | if (desc->const_iter) | |
468 | est_niter = desc->niter; | |
469 | /* If the estimate on number of iterations is reliable (comes from profile | |
470 | feedback), use it. Do not use it normally, since the expected number | |
471 | of iterations of an unrolled loop is 2. */ | |
472 | if (loop->header->count) | |
473 | est_niter = expected_loop_iterations (loop); | |
474 | ||
475 | if (est_niter < 3) | |
476 | { | |
477 | if (dump_file) | |
478 | fprintf (dump_file, | |
479 | "Doloop: Too few iterations (%u) to be profitable.\n", | |
480 | est_niter); | |
481 | return false; | |
482 | } | |
483 | ||
484 | iterations = desc->const_iter ? desc->niter_expr : const0_rtx; | |
485 | iterations_max = GEN_INT (desc->niter_max); | |
486 | level = get_loop_level (loop) + 1; | |
487 | ||
488 | /* Generate looping insn. If the pattern FAILs then give up trying | |
489 | to modify the loop since there is some aspect the back-end does | |
490 | not like. */ | |
491 | start_label = block_label (desc->in_edge->dest); | |
492 | doloop_reg = gen_reg_rtx (mode); | |
493 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
494 | GEN_INT (level), start_label); | |
495 | if (! doloop_seq && mode != word_mode) | |
496 | { | |
497 | PUT_MODE (doloop_reg, word_mode); | |
498 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
499 | GEN_INT (level), start_label); | |
500 | } | |
501 | if (! doloop_seq) | |
502 | { | |
503 | if (dump_file) | |
504 | fprintf (dump_file, | |
505 | "Doloop: Target unwilling to use doloop pattern!\n"); | |
506 | return false; | |
507 | } | |
508 | ||
509 | /* If multiple instructions were created, the last must be the | |
510 | jump instruction. Also, a raw define_insn may yield a plain | |
511 | pattern. */ | |
512 | doloop_pat = doloop_seq; | |
513 | if (INSN_P (doloop_pat)) | |
514 | { | |
515 | while (NEXT_INSN (doloop_pat) != NULL_RTX) | |
516 | doloop_pat = NEXT_INSN (doloop_pat); | |
4b4bf941 | 517 | if (JUMP_P (doloop_pat)) |
689ba89d ZD |
518 | doloop_pat = PATTERN (doloop_pat); |
519 | else | |
520 | doloop_pat = NULL_RTX; | |
521 | } | |
522 | ||
523 | if (! doloop_pat | |
524 | || ! (condition = doloop_condition_get (doloop_pat))) | |
525 | { | |
526 | if (dump_file) | |
527 | fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
528 | return false; | |
529 | } | |
530 | ||
531 | doloop_modify (loop, desc, doloop_seq, condition); | |
532 | return true; | |
533 | } | |
534 | ||
535 | /* This is the main entry point. Process all LOOPS using doloop_optimize. */ | |
536 | ||
537 | void | |
538 | doloop_optimize_loops (struct loops *loops) | |
539 | { | |
540 | unsigned i; | |
541 | struct loop *loop; | |
542 | ||
543 | for (i = 1; i < loops->num; i++) | |
544 | { | |
545 | loop = loops->parray[i]; | |
546 | if (!loop) | |
547 | continue; | |
548 | ||
549 | doloop_optimize (loop); | |
550 | } | |
551 | ||
552 | iv_analysis_done (); | |
553 | ||
554 | #ifdef ENABLE_CHECKING | |
555 | verify_dominators (CDI_DOMINATORS); | |
556 | verify_loop_structure (loops); | |
557 | #endif | |
558 | } | |
559 | #endif /* HAVE_doloop_end */ | |
560 |