]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/loop-doloop.c
Change copyright header to refer to version 3 of the GNU General Public License and...
[thirdparty/gcc.git] / gcc / loop-doloop.c
1 /* Perform doloop optimizations
2 Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "expr.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "toplev.h"
31 #include "tm_p.h"
32 #include "cfgloop.h"
33 #include "output.h"
34 #include "params.h"
35 #include "target.h"
36
37 /* This module is used to modify loops with a determinable number of
38 iterations to use special low-overhead looping instructions.
39
40 It first validates whether the loop is well behaved and has a
41 determinable number of iterations (either at compile or run-time).
42 It then modifies the loop to use a low-overhead looping pattern as
43 follows:
44
45 1. A pseudo register is allocated as the loop iteration counter.
46
47 2. The number of loop iterations is calculated and is stored
48 in the loop counter.
49
50 3. At the end of the loop, the jump insn is replaced by the
51 doloop_end pattern. The compare must remain because it might be
52 used elsewhere. If the loop-variable or condition register are
53 used elsewhere, they will be eliminated by flow.
54
55 4. An optional doloop_begin pattern is inserted at the top of the
56 loop.
57
58 TODO The optimization should only performed when either the biv used for exit
59 condition is unused at all except for the exit test, or if we do not have to
60 change its value, since otherwise we have to add a new induction variable,
61 which usually will not pay up (unless the cost of the doloop pattern is
62 somehow extremely lower than the cost of compare & jump, or unless the bct
63 register cannot be used for anything else but doloop -- ??? detect these
64 cases). */
65
66 #ifdef HAVE_doloop_end
67
68 /* Return the loop termination condition for PATTERN or zero
69 if it is not a decrement and branch jump insn. */
70
71 rtx
72 doloop_condition_get (rtx pattern)
73 {
74 rtx cmp;
75 rtx inc;
76 rtx reg;
77 rtx inc_src;
78 rtx condition;
79
80 /* The canonical doloop pattern we expect is:
81
82 (parallel [(set (pc) (if_then_else (condition)
83 (label_ref (label))
84 (pc)))
85 (set (reg) (plus (reg) (const_int -1)))
86 (additional clobbers and uses)])
87
88 Some targets (IA-64) wrap the set of the loop counter in
89 an if_then_else too.
90
91 In summary, the branch must be the first entry of the
92 parallel (also required by jump.c), and the second
93 entry of the parallel must be a set of the loop counter
94 register. */
95
96 if (GET_CODE (pattern) != PARALLEL)
97 return 0;
98
99 cmp = XVECEXP (pattern, 0, 0);
100 inc = XVECEXP (pattern, 0, 1);
101
102 /* Check for (set (reg) (something)). */
103 if (GET_CODE (inc) != SET)
104 return 0;
105 reg = SET_DEST (inc);
106 if (! REG_P (reg))
107 return 0;
108
109 /* Check if something = (plus (reg) (const_int -1)).
110 On IA-64, this decrement is wrapped in an if_then_else. */
111 inc_src = SET_SRC (inc);
112 if (GET_CODE (inc_src) == IF_THEN_ELSE)
113 inc_src = XEXP (inc_src, 1);
114 if (GET_CODE (inc_src) != PLUS
115 || XEXP (inc_src, 0) != reg
116 || XEXP (inc_src, 1) != constm1_rtx)
117 return 0;
118
119 /* Check for (set (pc) (if_then_else (condition)
120 (label_ref (label))
121 (pc))). */
122 if (GET_CODE (cmp) != SET
123 || SET_DEST (cmp) != pc_rtx
124 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
125 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
126 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
127 return 0;
128
129 /* Extract loop termination condition. */
130 condition = XEXP (SET_SRC (cmp), 0);
131
132 /* We expect a GE or NE comparison with 0 or 1. */
133 if ((GET_CODE (condition) != GE
134 && GET_CODE (condition) != NE)
135 || (XEXP (condition, 1) != const0_rtx
136 && XEXP (condition, 1) != const1_rtx))
137 return 0;
138
139 if ((XEXP (condition, 0) == reg)
140 || (GET_CODE (XEXP (condition, 0)) == PLUS
141 && XEXP (XEXP (condition, 0), 0) == reg))
142 return condition;
143
144 /* ??? If a machine uses a funny comparison, we could return a
145 canonicalized form here. */
146
147 return 0;
148 }
149
150 /* Return nonzero if the loop specified by LOOP is suitable for
151 the use of special low-overhead looping instructions. DESC
152 describes the number of iterations of the loop. */
153
154 static bool
155 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
156 {
157 basic_block *body = get_loop_body (loop), bb;
158 rtx insn;
159 unsigned i;
160 bool result = true;
161
162 /* Check for loops that may not terminate under special conditions. */
163 if (!desc->simple_p
164 || desc->assumptions
165 || desc->infinite)
166 {
167 /* There are some cases that would require a special attention.
168 For example if the comparison is LEU and the comparison value
169 is UINT_MAX then the loop will not terminate. Similarly, if the
170 comparison code is GEU and the comparison value is 0, the
171 loop will not terminate.
172
173 If the absolute increment is not 1, the loop can be infinite
174 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
175
176 ??? We could compute these conditions at run-time and have a
177 additional jump around the loop to ensure an infinite loop.
178 However, it is very unlikely that this is the intended
179 behavior of the loop and checking for these rare boundary
180 conditions would pessimize all other code.
181
182 If the loop is executed only a few times an extra check to
183 restart the loop could use up most of the benefits of using a
184 count register loop. Note however, that normally, this
185 restart branch would never execute, so it could be predicted
186 well by the CPU. We should generate the pessimistic code by
187 default, and have an option, e.g. -funsafe-loops that would
188 enable count-register loops in this case. */
189 if (dump_file)
190 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
191 result = false;
192 goto cleanup;
193 }
194
195 for (i = 0; i < loop->num_nodes; i++)
196 {
197 bb = body[i];
198
199 for (insn = BB_HEAD (bb);
200 insn != NEXT_INSN (BB_END (bb));
201 insn = NEXT_INSN (insn))
202 {
203 /* Different targets have different necessities for low-overhead
204 looping. Call the back end for each instruction within the loop
205 to let it decide whether the insn prohibits a low-overhead loop.
206 It will then return the cause for it to emit to the dump file. */
207 const char * invalid = targetm.invalid_within_doloop (insn);
208 if (invalid)
209 {
210 if (dump_file)
211 fprintf (dump_file, "Doloop: %s\n", invalid);
212 result = false;
213 goto cleanup;
214 }
215 }
216 }
217 result = true;
218
219 cleanup:
220 free (body);
221
222 return result;
223 }
224
225 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
226 edge. If the condition is always false, do not do anything. If it is always
227 true, redirect E to DEST and return false. In all other cases, true is
228 returned. */
229
230 static bool
231 add_test (rtx cond, edge *e, basic_block dest)
232 {
233 rtx seq, jump, label;
234 enum machine_mode mode;
235 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
236 enum rtx_code code = GET_CODE (cond);
237 basic_block bb;
238
239 mode = GET_MODE (XEXP (cond, 0));
240 if (mode == VOIDmode)
241 mode = GET_MODE (XEXP (cond, 1));
242
243 start_sequence ();
244 op0 = force_operand (op0, NULL_RTX);
245 op1 = force_operand (op1, NULL_RTX);
246 label = block_label (dest);
247 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
248
249 jump = get_last_insn ();
250 if (!jump || !JUMP_P (jump))
251 {
252 /* The condition is always false and the jump was optimized out. */
253 end_sequence ();
254 return true;
255 }
256
257 seq = get_insns ();
258 end_sequence ();
259
260 /* There always is at least the jump insn in the sequence. */
261 gcc_assert (seq != NULL_RTX);
262
263 bb = split_edge_and_insert (*e, seq);
264 *e = single_succ_edge (bb);
265
266 if (any_uncondjump_p (jump))
267 {
268 /* The condition is always true. */
269 delete_insn (jump);
270 redirect_edge_and_branch_force (*e, dest);
271 return false;
272 }
273
274 JUMP_LABEL (jump) = label;
275
276 /* The jump is supposed to handle an unlikely special case. */
277 REG_NOTES (jump)
278 = gen_rtx_EXPR_LIST (REG_BR_PROB,
279 const0_rtx, REG_NOTES (jump));
280 LABEL_NUSES (label)++;
281
282 make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
283 return true;
284 }
285
286 /* Modify the loop to use the low-overhead looping insn where LOOP
287 describes the loop, DESC describes the number of iterations of the
288 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
289 end of the loop. CONDITION is the condition separated from the
290 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */
291
292 static void
293 doloop_modify (struct loop *loop, struct niter_desc *desc,
294 rtx doloop_seq, rtx condition, rtx count)
295 {
296 rtx counter_reg;
297 rtx tmp, noloop = NULL_RTX;
298 rtx sequence;
299 rtx jump_insn;
300 rtx jump_label;
301 int nonneg = 0;
302 bool increment_count;
303 basic_block loop_end = desc->out_edge->src;
304 enum machine_mode mode;
305
306 jump_insn = BB_END (loop_end);
307
308 if (dump_file)
309 {
310 fprintf (dump_file, "Doloop: Inserting doloop pattern (");
311 if (desc->const_iter)
312 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
313 else
314 fputs ("runtime", dump_file);
315 fputs (" iterations).\n", dump_file);
316 }
317
318 /* Discard original jump to continue loop. The original compare
319 result may still be live, so it cannot be discarded explicitly. */
320 delete_insn (jump_insn);
321
322 counter_reg = XEXP (condition, 0);
323 if (GET_CODE (counter_reg) == PLUS)
324 counter_reg = XEXP (counter_reg, 0);
325 mode = GET_MODE (counter_reg);
326
327 increment_count = false;
328 switch (GET_CODE (condition))
329 {
330 case NE:
331 /* Currently only NE tests against zero and one are supported. */
332 noloop = XEXP (condition, 1);
333 if (noloop != const0_rtx)
334 {
335 gcc_assert (noloop == const1_rtx);
336 increment_count = true;
337 }
338 break;
339
340 case GE:
341 /* Currently only GE tests against zero are supported. */
342 gcc_assert (XEXP (condition, 1) == const0_rtx);
343
344 noloop = constm1_rtx;
345
346 /* The iteration count does not need incrementing for a GE test. */
347 increment_count = false;
348
349 /* Determine if the iteration counter will be non-negative.
350 Note that the maximum value loaded is iterations_max - 1. */
351 if (desc->niter_max
352 <= ((unsigned HOST_WIDEST_INT) 1
353 << (GET_MODE_BITSIZE (mode) - 1)))
354 nonneg = 1;
355 break;
356
357 /* Abort if an invalid doloop pattern has been generated. */
358 default:
359 gcc_unreachable ();
360 }
361
362 if (increment_count)
363 count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
364
365 /* Insert initialization of the count register into the loop header. */
366 start_sequence ();
367 tmp = force_operand (count, counter_reg);
368 convert_move (counter_reg, tmp, 1);
369 sequence = get_insns ();
370 end_sequence ();
371 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
372
373 if (desc->noloop_assumptions)
374 {
375 rtx ass = copy_rtx (desc->noloop_assumptions);
376 basic_block preheader = loop_preheader_edge (loop)->src;
377 basic_block set_zero
378 = split_edge (loop_preheader_edge (loop));
379 basic_block new_preheader
380 = split_edge (loop_preheader_edge (loop));
381 edge te;
382
383 /* Expand the condition testing the assumptions and if it does not pass,
384 reset the count register to 0. */
385 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
386 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
387
388 set_zero->count = 0;
389 set_zero->frequency = 0;
390
391 te = single_succ_edge (preheader);
392 for (; ass; ass = XEXP (ass, 1))
393 if (!add_test (XEXP (ass, 0), &te, set_zero))
394 break;
395
396 if (ass)
397 {
398 /* We reached a condition that is always true. This is very hard to
399 reproduce (such a loop does not roll, and thus it would most
400 likely get optimized out by some of the preceding optimizations).
401 In fact, I do not have any testcase for it. However, it would
402 also be very hard to show that it is impossible, so we must
403 handle this case. */
404 set_zero->count = preheader->count;
405 set_zero->frequency = preheader->frequency;
406 }
407
408 if (EDGE_COUNT (set_zero->preds) == 0)
409 {
410 /* All the conditions were simplified to false, remove the
411 unreachable set_zero block. */
412 delete_basic_block (set_zero);
413 }
414 else
415 {
416 /* Reset the counter to zero in the set_zero block. */
417 start_sequence ();
418 convert_move (counter_reg, noloop, 0);
419 sequence = get_insns ();
420 end_sequence ();
421 emit_insn_after (sequence, BB_END (set_zero));
422
423 set_immediate_dominator (CDI_DOMINATORS, set_zero,
424 recompute_dominator (CDI_DOMINATORS,
425 set_zero));
426 }
427
428 set_immediate_dominator (CDI_DOMINATORS, new_preheader,
429 recompute_dominator (CDI_DOMINATORS,
430 new_preheader));
431 }
432
433 /* Some targets (eg, C4x) need to initialize special looping
434 registers. */
435 #ifdef HAVE_doloop_begin
436 {
437 rtx init;
438 unsigned level = get_loop_level (loop) + 1;
439 init = gen_doloop_begin (counter_reg,
440 desc->const_iter ? desc->niter_expr : const0_rtx,
441 GEN_INT (desc->niter_max),
442 GEN_INT (level));
443 if (init)
444 {
445 start_sequence ();
446 emit_insn (init);
447 sequence = get_insns ();
448 end_sequence ();
449 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
450 }
451 }
452 #endif
453
454 /* Insert the new low-overhead looping insn. */
455 emit_jump_insn_after (doloop_seq, BB_END (loop_end));
456 jump_insn = BB_END (loop_end);
457 jump_label = block_label (desc->in_edge->dest);
458 JUMP_LABEL (jump_insn) = jump_label;
459 LABEL_NUSES (jump_label)++;
460
461 /* Ensure the right fallthru edge is marked, for case we have reversed
462 the condition. */
463 desc->in_edge->flags &= ~EDGE_FALLTHRU;
464 desc->out_edge->flags |= EDGE_FALLTHRU;
465
466 /* Add a REG_NONNEG note if the actual or estimated maximum number
467 of iterations is non-negative. */
468 if (nonneg)
469 {
470 REG_NOTES (jump_insn)
471 = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
472 }
473 }
474
475 /* Process loop described by LOOP validating that the loop is suitable for
476 conversion to use a low overhead looping instruction, replacing the jump
477 insn where suitable. Returns true if the loop was successfully
478 modified. */
479
480 static bool
481 doloop_optimize (struct loop *loop)
482 {
483 enum machine_mode mode;
484 rtx doloop_seq, doloop_pat, doloop_reg;
485 rtx iterations, count;
486 rtx iterations_max;
487 rtx start_label;
488 rtx condition;
489 unsigned level, est_niter;
490 int max_cost;
491 struct niter_desc *desc;
492 unsigned word_mode_size;
493 unsigned HOST_WIDE_INT word_mode_max;
494
495 if (dump_file)
496 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
497
498 iv_analysis_loop_init (loop);
499
500 /* Find the simple exit of a LOOP. */
501 desc = get_simple_loop_desc (loop);
502
503 /* Check that loop is a candidate for a low-overhead looping insn. */
504 if (!doloop_valid_p (loop, desc))
505 {
506 if (dump_file)
507 fprintf (dump_file,
508 "Doloop: The loop is not suitable.\n");
509 return false;
510 }
511 mode = desc->mode;
512
513 est_niter = 3;
514 if (desc->const_iter)
515 est_niter = desc->niter;
516 /* If the estimate on number of iterations is reliable (comes from profile
517 feedback), use it. Do not use it normally, since the expected number
518 of iterations of an unrolled loop is 2. */
519 if (loop->header->count)
520 est_niter = expected_loop_iterations (loop);
521
522 if (est_niter < 3)
523 {
524 if (dump_file)
525 fprintf (dump_file,
526 "Doloop: Too few iterations (%u) to be profitable.\n",
527 est_niter);
528 return false;
529 }
530
531 max_cost
532 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
533 if (rtx_cost (desc->niter_expr, SET) > max_cost)
534 {
535 if (dump_file)
536 fprintf (dump_file,
537 "Doloop: number of iterations too costly to compute.\n");
538 return false;
539 }
540
541 count = copy_rtx (desc->niter_expr);
542 iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
543 iterations_max = GEN_INT (desc->niter_max);
544 level = get_loop_level (loop) + 1;
545
546 /* Generate looping insn. If the pattern FAILs then give up trying
547 to modify the loop since there is some aspect the back-end does
548 not like. */
549 start_label = block_label (desc->in_edge->dest);
550 doloop_reg = gen_reg_rtx (mode);
551 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
552 GEN_INT (level), start_label);
553
554 word_mode_size = GET_MODE_BITSIZE (word_mode);
555 word_mode_max
556 = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
557 if (! doloop_seq
558 && mode != word_mode
559 /* Before trying mode different from the one in that # of iterations is
560 computed, we must be sure that the number of iterations fits into
561 the new mode. */
562 && (word_mode_size >= GET_MODE_BITSIZE (mode)
563 || desc->niter_max <= word_mode_max))
564 {
565 if (word_mode_size > GET_MODE_BITSIZE (mode))
566 {
567 count = simplify_gen_unary (ZERO_EXTEND, word_mode,
568 count, mode);
569 iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
570 iterations, mode);
571 iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
572 iterations_max, mode);
573 }
574 else
575 {
576 count = lowpart_subreg (word_mode, count, mode);
577 iterations = lowpart_subreg (word_mode, iterations, mode);
578 iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
579 }
580 PUT_MODE (doloop_reg, word_mode);
581 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
582 GEN_INT (level), start_label);
583 }
584 if (! doloop_seq)
585 {
586 if (dump_file)
587 fprintf (dump_file,
588 "Doloop: Target unwilling to use doloop pattern!\n");
589 return false;
590 }
591
592 /* If multiple instructions were created, the last must be the
593 jump instruction. Also, a raw define_insn may yield a plain
594 pattern. */
595 doloop_pat = doloop_seq;
596 if (INSN_P (doloop_pat))
597 {
598 while (NEXT_INSN (doloop_pat) != NULL_RTX)
599 doloop_pat = NEXT_INSN (doloop_pat);
600 if (JUMP_P (doloop_pat))
601 doloop_pat = PATTERN (doloop_pat);
602 else
603 doloop_pat = NULL_RTX;
604 }
605
606 if (! doloop_pat
607 || ! (condition = doloop_condition_get (doloop_pat)))
608 {
609 if (dump_file)
610 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
611 return false;
612 }
613
614 doloop_modify (loop, desc, doloop_seq, condition, count);
615 return true;
616 }
617
618 /* This is the main entry point. Process all loops using doloop_optimize. */
619
620 void
621 doloop_optimize_loops (void)
622 {
623 loop_iterator li;
624 struct loop *loop;
625
626 FOR_EACH_LOOP (li, loop, 0)
627 {
628 doloop_optimize (loop);
629 }
630
631 iv_analysis_done ();
632
633 #ifdef ENABLE_CHECKING
634 verify_dominators (CDI_DOMINATORS);
635 verify_loop_structure ();
636 #endif
637 }
638 #endif /* HAVE_doloop_end */
639