]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/loop-doloop.c
machmode.h (HWI_COMPUTABLE_MODE_P): New macro.
[thirdparty/gcc.git] / gcc / loop-doloop.c
CommitLineData
689ba89d 1/* Perform doloop optimizations
2bdd49f4
JJ
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010
3 Free Software Foundation, Inc.
689ba89d
ZD
4 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
689ba89d
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
689ba89d
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "flags.h"
28#include "expr.h"
29#include "hard-reg-set.h"
30#include "basic-block.h"
718f9c0f 31#include "diagnostic-core.h"
689ba89d
ZD
32#include "tm_p.h"
33#include "cfgloop.h"
34#include "output.h"
35#include "params.h"
a71a498d 36#include "target.h"
689ba89d
ZD
37
38/* This module is used to modify loops with a determinable number of
39 iterations to use special low-overhead looping instructions.
40
41 It first validates whether the loop is well behaved and has a
42 determinable number of iterations (either at compile or run-time).
43 It then modifies the loop to use a low-overhead looping pattern as
44 follows:
45
46 1. A pseudo register is allocated as the loop iteration counter.
47
48 2. The number of loop iterations is calculated and is stored
49 in the loop counter.
50
51 3. At the end of the loop, the jump insn is replaced by the
52 doloop_end pattern. The compare must remain because it might be
53 used elsewhere. If the loop-variable or condition register are
54 used elsewhere, they will be eliminated by flow.
55
56 4. An optional doloop_begin pattern is inserted at the top of the
57 loop.
58
59 TODO The optimization should only performed when either the biv used for exit
60 condition is unused at all except for the exit test, or if we do not have to
61 change its value, since otherwise we have to add a new induction variable,
62 which usually will not pay up (unless the cost of the doloop pattern is
63 somehow extremely lower than the cost of compare & jump, or unless the bct
64 register cannot be used for anything else but doloop -- ??? detect these
65 cases). */
66
67#ifdef HAVE_doloop_end
68
69/* Return the loop termination condition for PATTERN or zero
70 if it is not a decrement and branch jump insn. */
71
75c70254 72rtx
46dc0789 73doloop_condition_get (rtx doloop_pat)
689ba89d
ZD
74{
75 rtx cmp;
76 rtx inc;
77 rtx reg;
75c70254 78 rtx inc_src;
689ba89d 79 rtx condition;
46dc0789 80 rtx pattern;
ce7b3761
RE
81 rtx cc_reg = NULL_RTX;
82 rtx reg_orig = NULL_RTX;
689ba89d 83
46dc0789
MN
84 /* The canonical doloop pattern we expect has one of the following
85 forms:
689ba89d 86
46dc0789
MN
87 1) (parallel [(set (pc) (if_then_else (condition)
88 (label_ref (label))
89 (pc)))
90 (set (reg) (plus (reg) (const_int -1)))
91 (additional clobbers and uses)])
689ba89d 92
46dc0789
MN
93 The branch must be the first entry of the parallel (also required
94 by jump.c), and the second entry of the parallel must be a set of
95 the loop counter register. Some targets (IA-64) wrap the set of
96 the loop counter in an if_then_else too.
75c70254 97
46dc0789
MN
98 2) (set (reg) (plus (reg) (const_int -1))
99 (set (pc) (if_then_else (reg != 0)
100 (label_ref (label))
ce7b3761
RE
101 (pc))).
102
103 Some targets (ARM) do the comparison before the branch, as in the
104 following form:
105
106 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
107 (set (reg) (plus (reg) (const_int -1)))])
108 (set (pc) (if_then_else (cc == NE)
109 (label_ref (label))
110 (pc))) */
46dc0789
MN
111
112 pattern = PATTERN (doloop_pat);
689ba89d
ZD
113
114 if (GET_CODE (pattern) != PARALLEL)
46dc0789
MN
115 {
116 rtx cond;
0be955e7 117 rtx prev_insn = prev_nondebug_insn (doloop_pat);
ce7b3761
RE
118 rtx cmp_arg1, cmp_arg2;
119 rtx cmp_orig;
46dc0789 120
ce7b3761
RE
121 /* In case the pattern is not PARALLEL we expect two forms
122 of doloop which are cases 2) and 3) above: in case 2) the
123 decrement immediately precedes the branch, while in case 3)
124 the compare and decrement instructions immediately precede
125 the branch. */
689ba89d 126
0be955e7 127 if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
46dc0789
MN
128 return 0;
129
130 cmp = pattern;
ce7b3761
RE
131 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
132 {
133 /* The third case: the compare and decrement instructions
134 immediately precede the branch. */
135 cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
136 if (GET_CODE (cmp_orig) != SET)
137 return 0;
138 if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
139 return 0;
140 cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
141 cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
142 if (cmp_arg2 != const0_rtx
143 || GET_CODE (cmp_arg1) != PLUS)
144 return 0;
145 reg_orig = XEXP (cmp_arg1, 0);
146 if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
147 || !REG_P (reg_orig))
148 return 0;
149 cc_reg = SET_DEST (cmp_orig);
150
151 inc = XVECEXP (PATTERN (prev_insn), 0, 1);
152 }
153 else
b8abece3 154 inc = PATTERN (prev_insn);
46dc0789
MN
155 /* We expect the condition to be of the form (reg != 0) */
156 cond = XEXP (SET_SRC (cmp), 0);
157 if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
158 return 0;
46dc0789
MN
159 }
160 else
161 {
162 cmp = XVECEXP (pattern, 0, 0);
163 inc = XVECEXP (pattern, 0, 1);
164 }
689ba89d
ZD
165
166 /* Check for (set (reg) (something)). */
75c70254 167 if (GET_CODE (inc) != SET)
689ba89d 168 return 0;
689ba89d 169 reg = SET_DEST (inc);
75c70254
SB
170 if (! REG_P (reg))
171 return 0;
172
173 /* Check if something = (plus (reg) (const_int -1)).
174 On IA-64, this decrement is wrapped in an if_then_else. */
175 inc_src = SET_SRC (inc);
176 if (GET_CODE (inc_src) == IF_THEN_ELSE)
177 inc_src = XEXP (inc_src, 1);
178 if (GET_CODE (inc_src) != PLUS
179 || XEXP (inc_src, 0) != reg
180 || XEXP (inc_src, 1) != constm1_rtx)
181 return 0;
689ba89d
ZD
182
183 /* Check for (set (pc) (if_then_else (condition)
184 (label_ref (label))
185 (pc))). */
186 if (GET_CODE (cmp) != SET
187 || SET_DEST (cmp) != pc_rtx
188 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
189 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
190 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
191 return 0;
192
193 /* Extract loop termination condition. */
194 condition = XEXP (SET_SRC (cmp), 0);
195
75c70254
SB
196 /* We expect a GE or NE comparison with 0 or 1. */
197 if ((GET_CODE (condition) != GE
198 && GET_CODE (condition) != NE)
199 || (XEXP (condition, 1) != const0_rtx
200 && XEXP (condition, 1) != const1_rtx))
689ba89d
ZD
201 return 0;
202
75c70254 203 if ((XEXP (condition, 0) == reg)
ce7b3761
RE
204 /* For the third case: */
205 || ((cc_reg != NULL_RTX)
206 && (XEXP (condition, 0) == cc_reg)
207 && (reg_orig == reg))
75c70254 208 || (GET_CODE (XEXP (condition, 0)) == PLUS
ce7b3761 209 && XEXP (XEXP (condition, 0), 0) == reg))
46dc0789
MN
210 {
211 if (GET_CODE (pattern) != PARALLEL)
ce7b3761 212 /* For the second form we expect:
46dc0789
MN
213
214 (set (reg) (plus (reg) (const_int -1))
215 (set (pc) (if_then_else (reg != 0)
216 (label_ref (label))
217 (pc))).
218
219 is equivalent to the following:
220
221 (parallel [(set (pc) (if_then_else (reg != 1)
222 (label_ref (label))
223 (pc)))
224 (set (reg) (plus (reg) (const_int -1)))
225 (additional clobbers and uses)])
226
ce7b3761
RE
227 For the third form we expect:
228
229 (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
230 (set (reg) (plus (reg) (const_int -1)))])
231 (set (pc) (if_then_else (cc == NE)
232 (label_ref (label))
233 (pc)))
234
235 which is equivalent to the following:
236
237 (parallel [(set (cc) (compare (reg, 1))
238 (set (reg) (plus (reg) (const_int -1)))
239 (set (pc) (if_then_else (NE == cc)
240 (label_ref (label))
241 (pc))))])
242
243 So we return the second form instead for the two cases.
244
46dc0789
MN
245 */
246 condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
247
689ba89d 248 return condition;
46dc0789 249 }
689ba89d
ZD
250
251 /* ??? If a machine uses a funny comparison, we could return a
1ae58c30 252 canonicalized form here. */
689ba89d
ZD
253
254 return 0;
255}
256
257/* Return nonzero if the loop specified by LOOP is suitable for
258 the use of special low-overhead looping instructions. DESC
259 describes the number of iterations of the loop. */
260
261static bool
262doloop_valid_p (struct loop *loop, struct niter_desc *desc)
263{
264 basic_block *body = get_loop_body (loop), bb;
265 rtx insn;
266 unsigned i;
7600f094 267 bool result = true;
689ba89d
ZD
268
269 /* Check for loops that may not terminate under special conditions. */
270 if (!desc->simple_p
271 || desc->assumptions
272 || desc->infinite)
273 {
274 /* There are some cases that would require a special attention.
275 For example if the comparison is LEU and the comparison value
276 is UINT_MAX then the loop will not terminate. Similarly, if the
277 comparison code is GEU and the comparison value is 0, the
278 loop will not terminate.
279
280 If the absolute increment is not 1, the loop can be infinite
281 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
282
283 ??? We could compute these conditions at run-time and have a
284 additional jump around the loop to ensure an infinite loop.
285 However, it is very unlikely that this is the intended
286 behavior of the loop and checking for these rare boundary
287 conditions would pessimize all other code.
288
289 If the loop is executed only a few times an extra check to
290 restart the loop could use up most of the benefits of using a
291 count register loop. Note however, that normally, this
292 restart branch would never execute, so it could be predicted
293 well by the CPU. We should generate the pessimistic code by
294 default, and have an option, e.g. -funsafe-loops that would
295 enable count-register loops in this case. */
296 if (dump_file)
297 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
7600f094
AP
298 result = false;
299 goto cleanup;
689ba89d
ZD
300 }
301
302 for (i = 0; i < loop->num_nodes; i++)
303 {
304 bb = body[i];
305
306 for (insn = BB_HEAD (bb);
307 insn != NEXT_INSN (BB_END (bb));
308 insn = NEXT_INSN (insn))
309 {
a71a498d
AS
310 /* Different targets have different necessities for low-overhead
311 looping. Call the back end for each instruction within the loop
e7e64a25
AS
312 to let it decide whether the insn prohibits a low-overhead loop.
313 It will then return the cause for it to emit to the dump file. */
314 const char * invalid = targetm.invalid_within_doloop (insn);
315 if (invalid)
316 {
317 if (dump_file)
318 fprintf (dump_file, "Doloop: %s\n", invalid);
7600f094
AP
319 result = false;
320 goto cleanup;
e7e64a25 321 }
689ba89d
ZD
322 }
323 }
7600f094
AP
324 result = true;
325
326cleanup:
689ba89d
ZD
327 free (body);
328
7600f094 329 return result;
689ba89d
ZD
330}
331
ed541ddb
ZD
332/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
333 edge. If the condition is always false, do not do anything. If it is always
334 true, redirect E to DEST and return false. In all other cases, true is
335 returned. */
689ba89d 336
ed541ddb
ZD
337static bool
338add_test (rtx cond, edge *e, basic_block dest)
689ba89d
ZD
339{
340 rtx seq, jump, label;
341 enum machine_mode mode;
342 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
343 enum rtx_code code = GET_CODE (cond);
ed541ddb 344 basic_block bb;
689ba89d
ZD
345
346 mode = GET_MODE (XEXP (cond, 0));
347 if (mode == VOIDmode)
348 mode = GET_MODE (XEXP (cond, 1));
349
350 start_sequence ();
351 op0 = force_operand (op0, NULL_RTX);
352 op1 = force_operand (op1, NULL_RTX);
353 label = block_label (dest);
2bdd49f4
JJ
354 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX,
355 NULL_RTX, label, -1);
689ba89d
ZD
356
357 jump = get_last_insn ();
7d93d987 358 if (!jump || !JUMP_P (jump))
f937df35 359 {
ed541ddb
ZD
360 /* The condition is always false and the jump was optimized out. */
361 end_sequence ();
362 return true;
f937df35 363 }
689ba89d
ZD
364
365 seq = get_insns ();
366 end_sequence ();
7d93d987
ZD
367
368 /* There always is at least the jump insn in the sequence. */
369 gcc_assert (seq != NULL_RTX);
370
598ec7bd 371 bb = split_edge_and_insert (*e, seq);
ed541ddb
ZD
372 *e = single_succ_edge (bb);
373
374 if (any_uncondjump_p (jump))
375 {
376 /* The condition is always true. */
377 delete_insn (jump);
378 redirect_edge_and_branch_force (*e, dest);
379 return false;
380 }
b8698a0f 381
ed541ddb
ZD
382 JUMP_LABEL (jump) = label;
383
384 /* The jump is supposed to handle an unlikely special case. */
65c5f2a6
ILT
385 add_reg_note (jump, REG_BR_PROB, const0_rtx);
386
ed541ddb
ZD
387 LABEL_NUSES (label)++;
388
389 make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
390 return true;
689ba89d
ZD
391}
392
393/* Modify the loop to use the low-overhead looping insn where LOOP
394 describes the loop, DESC describes the number of iterations of the
395 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
396 end of the loop. CONDITION is the condition separated from the
b2a38b1d
AP
397 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP.
398 ZERO_EXTEND_P says to zero extend COUNT after the increment of it to
5b31bbab 399 word_mode from FROM_MODE. */
689ba89d
ZD
400
401static void
402doloop_modify (struct loop *loop, struct niter_desc *desc,
5b31bbab
AP
403 rtx doloop_seq, rtx condition, rtx count,
404 bool zero_extend_p, enum machine_mode from_mode)
689ba89d
ZD
405{
406 rtx counter_reg;
a82bbcbb 407 rtx tmp, noloop = NULL_RTX;
689ba89d
ZD
408 rtx sequence;
409 rtx jump_insn;
410 rtx jump_label;
ed541ddb 411 int nonneg = 0;
689ba89d
ZD
412 bool increment_count;
413 basic_block loop_end = desc->out_edge->src;
a82bbcbb 414 enum machine_mode mode;
80663107 415 rtx true_prob_val;
689ba89d
ZD
416
417 jump_insn = BB_END (loop_end);
418
419 if (dump_file)
420 {
421 fprintf (dump_file, "Doloop: Inserting doloop pattern (");
422 if (desc->const_iter)
423 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
424 else
425 fputs ("runtime", dump_file);
426 fputs (" iterations).\n", dump_file);
427 }
428
fa10beec 429 /* Get the probability of the original branch. If it exists we would
80663107
MN
430 need to update REG_BR_PROB of the new jump_insn. */
431 true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
432
689ba89d
ZD
433 /* Discard original jump to continue loop. The original compare
434 result may still be live, so it cannot be discarded explicitly. */
435 delete_insn (jump_insn);
436
437 counter_reg = XEXP (condition, 0);
438 if (GET_CODE (counter_reg) == PLUS)
439 counter_reg = XEXP (counter_reg, 0);
a82bbcbb 440 mode = GET_MODE (counter_reg);
689ba89d 441
689ba89d
ZD
442 increment_count = false;
443 switch (GET_CODE (condition))
444 {
445 case NE:
446 /* Currently only NE tests against zero and one are supported. */
b5e624c6
NS
447 noloop = XEXP (condition, 1);
448 if (noloop != const0_rtx)
689ba89d 449 {
b5e624c6 450 gcc_assert (noloop == const1_rtx);
689ba89d 451 increment_count = true;
689ba89d 452 }
689ba89d
ZD
453 break;
454
455 case GE:
456 /* Currently only GE tests against zero are supported. */
b5e624c6 457 gcc_assert (XEXP (condition, 1) == const0_rtx);
689ba89d
ZD
458
459 noloop = constm1_rtx;
460
461 /* The iteration count does not need incrementing for a GE test. */
462 increment_count = false;
463
464 /* Determine if the iteration counter will be non-negative.
465 Note that the maximum value loaded is iterations_max - 1. */
466 if (desc->niter_max
467 <= ((unsigned HOST_WIDEST_INT) 1
a82bbcbb 468 << (GET_MODE_BITSIZE (mode) - 1)))
689ba89d
ZD
469 nonneg = 1;
470 break;
471
1c43d3ca 472 /* Abort if an invalid doloop pattern has been generated. */
8127d0e0 473 default:
b5e624c6 474 gcc_unreachable ();
689ba89d
ZD
475 }
476
477 if (increment_count)
5b31bbab 478 count = simplify_gen_binary (PLUS, from_mode, count, const1_rtx);
689ba89d 479
b2a38b1d
AP
480 if (zero_extend_p)
481 count = simplify_gen_unary (ZERO_EXTEND, word_mode,
5b31bbab 482 count, from_mode);
b2a38b1d 483
689ba89d
ZD
484 /* Insert initialization of the count register into the loop header. */
485 start_sequence ();
486 tmp = force_operand (count, counter_reg);
487 convert_move (counter_reg, tmp, 1);
488 sequence = get_insns ();
489 end_sequence ();
490 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
491
492 if (desc->noloop_assumptions)
493 {
30d3fc60 494 rtx ass = copy_rtx (desc->noloop_assumptions);
689ba89d
ZD
495 basic_block preheader = loop_preheader_edge (loop)->src;
496 basic_block set_zero
598ec7bd 497 = split_edge (loop_preheader_edge (loop));
689ba89d 498 basic_block new_preheader
598ec7bd 499 = split_edge (loop_preheader_edge (loop));
689ba89d 500 edge te;
689ba89d
ZD
501
502 /* Expand the condition testing the assumptions and if it does not pass,
503 reset the count register to 0. */
ed541ddb 504 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
689ba89d
ZD
505 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
506
507 set_zero->count = 0;
508 set_zero->frequency = 0;
509
ed541ddb
ZD
510 te = single_succ_edge (preheader);
511 for (; ass; ass = XEXP (ass, 1))
512 if (!add_test (XEXP (ass, 0), &te, set_zero))
513 break;
514
515 if (ass)
689ba89d 516 {
ed541ddb
ZD
517 /* We reached a condition that is always true. This is very hard to
518 reproduce (such a loop does not roll, and thus it would most
519 likely get optimized out by some of the preceding optimizations).
520 In fact, I do not have any testcase for it. However, it would
521 also be very hard to show that it is impossible, so we must
522 handle this case. */
523 set_zero->count = preheader->count;
524 set_zero->frequency = preheader->frequency;
689ba89d 525 }
b8698a0f 526
ed541ddb
ZD
527 if (EDGE_COUNT (set_zero->preds) == 0)
528 {
529 /* All the conditions were simplified to false, remove the
530 unreachable set_zero block. */
ed541ddb
ZD
531 delete_basic_block (set_zero);
532 }
533 else
534 {
535 /* Reset the counter to zero in the set_zero block. */
536 start_sequence ();
537 convert_move (counter_reg, noloop, 0);
538 sequence = get_insns ();
539 end_sequence ();
540 emit_insn_after (sequence, BB_END (set_zero));
b8698a0f 541
ed541ddb 542 set_immediate_dominator (CDI_DOMINATORS, set_zero,
66f97d31
ZD
543 recompute_dominator (CDI_DOMINATORS,
544 set_zero));
ed541ddb
ZD
545 }
546
547 set_immediate_dominator (CDI_DOMINATORS, new_preheader,
66f97d31
ZD
548 recompute_dominator (CDI_DOMINATORS,
549 new_preheader));
689ba89d
ZD
550 }
551
552 /* Some targets (eg, C4x) need to initialize special looping
553 registers. */
554#ifdef HAVE_doloop_begin
555 {
556 rtx init;
557 unsigned level = get_loop_level (loop) + 1;
558 init = gen_doloop_begin (counter_reg,
559 desc->const_iter ? desc->niter_expr : const0_rtx,
1984d2e8 560 GEN_INT (desc->niter_max),
689ba89d
ZD
561 GEN_INT (level));
562 if (init)
563 {
564 start_sequence ();
565 emit_insn (init);
566 sequence = get_insns ();
567 end_sequence ();
568 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
569 }
570 }
571#endif
572
573 /* Insert the new low-overhead looping insn. */
574 emit_jump_insn_after (doloop_seq, BB_END (loop_end));
575 jump_insn = BB_END (loop_end);
576 jump_label = block_label (desc->in_edge->dest);
577 JUMP_LABEL (jump_insn) = jump_label;
578 LABEL_NUSES (jump_label)++;
579
580 /* Ensure the right fallthru edge is marked, for case we have reversed
581 the condition. */
582 desc->in_edge->flags &= ~EDGE_FALLTHRU;
583 desc->out_edge->flags |= EDGE_FALLTHRU;
584
585 /* Add a REG_NONNEG note if the actual or estimated maximum number
586 of iterations is non-negative. */
587 if (nonneg)
65c5f2a6
ILT
588 add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
589
80663107
MN
590 /* Update the REG_BR_PROB note. */
591 if (true_prob_val)
592 {
593 /* Seems safer to use the branch probability. */
b8698a0f 594 add_reg_note (jump_insn, REG_BR_PROB,
65c5f2a6 595 GEN_INT (desc->in_edge->probability));
80663107 596 }
689ba89d
ZD
597}
598
599/* Process loop described by LOOP validating that the loop is suitable for
600 conversion to use a low overhead looping instruction, replacing the jump
601 insn where suitable. Returns true if the loop was successfully
602 modified. */
603
604static bool
605doloop_optimize (struct loop *loop)
606{
607 enum machine_mode mode;
608 rtx doloop_seq, doloop_pat, doloop_reg;
a82bbcbb 609 rtx iterations, count;
689ba89d
ZD
610 rtx iterations_max;
611 rtx start_label;
612 rtx condition;
7607bdda
AT
613 unsigned level, est_niter;
614 int max_cost;
689ba89d 615 struct niter_desc *desc;
a82bbcbb
ZD
616 unsigned word_mode_size;
617 unsigned HOST_WIDE_INT word_mode_max;
b2a38b1d 618 bool zero_extend_p = false;
689ba89d
ZD
619
620 if (dump_file)
621 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
622
623 iv_analysis_loop_init (loop);
624
625 /* Find the simple exit of a LOOP. */
626 desc = get_simple_loop_desc (loop);
627
628 /* Check that loop is a candidate for a low-overhead looping insn. */
629 if (!doloop_valid_p (loop, desc))
630 {
631 if (dump_file)
632 fprintf (dump_file,
633 "Doloop: The loop is not suitable.\n");
634 return false;
635 }
636 mode = desc->mode;
637
638 est_niter = 3;
639 if (desc->const_iter)
640 est_niter = desc->niter;
641 /* If the estimate on number of iterations is reliable (comes from profile
642 feedback), use it. Do not use it normally, since the expected number
643 of iterations of an unrolled loop is 2. */
644 if (loop->header->count)
645 est_niter = expected_loop_iterations (loop);
646
647 if (est_niter < 3)
648 {
649 if (dump_file)
650 fprintf (dump_file,
651 "Doloop: Too few iterations (%u) to be profitable.\n",
652 est_niter);
653 return false;
654 }
655
45b9a14b
BS
656 max_cost
657 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
f40751dd
JH
658 if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop))
659 > max_cost)
45b9a14b
BS
660 {
661 if (dump_file)
662 fprintf (dump_file,
405f0587 663 "Doloop: number of iterations too costly to compute.\n");
45b9a14b
BS
664 return false;
665 }
666
a82bbcbb 667 count = copy_rtx (desc->niter_expr);
689ba89d
ZD
668 iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
669 iterations_max = GEN_INT (desc->niter_max);
670 level = get_loop_level (loop) + 1;
671
672 /* Generate looping insn. If the pattern FAILs then give up trying
673 to modify the loop since there is some aspect the back-end does
674 not like. */
675 start_label = block_label (desc->in_edge->dest);
676 doloop_reg = gen_reg_rtx (mode);
677 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
678 GEN_INT (level), start_label);
a82bbcbb
ZD
679
680 word_mode_size = GET_MODE_BITSIZE (word_mode);
681 word_mode_max
682 = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
683 if (! doloop_seq
684 && mode != word_mode
685 /* Before trying mode different from the one in that # of iterations is
686 computed, we must be sure that the number of iterations fits into
687 the new mode. */
688 && (word_mode_size >= GET_MODE_BITSIZE (mode)
689 || desc->niter_max <= word_mode_max))
689ba89d 690 {
a82bbcbb
ZD
691 if (word_mode_size > GET_MODE_BITSIZE (mode))
692 {
b2a38b1d 693 zero_extend_p = true;
a82bbcbb
ZD
694 iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
695 iterations, mode);
696 iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
697 iterations_max, mode);
698 }
699 else
700 {
701 count = lowpart_subreg (word_mode, count, mode);
702 iterations = lowpart_subreg (word_mode, iterations, mode);
703 iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
704 }
689ba89d
ZD
705 PUT_MODE (doloop_reg, word_mode);
706 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
707 GEN_INT (level), start_label);
708 }
709 if (! doloop_seq)
710 {
711 if (dump_file)
712 fprintf (dump_file,
713 "Doloop: Target unwilling to use doloop pattern!\n");
714 return false;
715 }
716
717 /* If multiple instructions were created, the last must be the
718 jump instruction. Also, a raw define_insn may yield a plain
719 pattern. */
720 doloop_pat = doloop_seq;
721 if (INSN_P (doloop_pat))
722 {
723 while (NEXT_INSN (doloop_pat) != NULL_RTX)
724 doloop_pat = NEXT_INSN (doloop_pat);
46dc0789 725 if (!JUMP_P (doloop_pat))
689ba89d
ZD
726 doloop_pat = NULL_RTX;
727 }
728
729 if (! doloop_pat
730 || ! (condition = doloop_condition_get (doloop_pat)))
731 {
732 if (dump_file)
733 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
734 return false;
735 }
736
5b31bbab
AP
737 doloop_modify (loop, desc, doloop_seq, condition, count,
738 zero_extend_p, mode);
689ba89d
ZD
739 return true;
740}
741
d73be268 742/* This is the main entry point. Process all loops using doloop_optimize. */
689ba89d
ZD
743
744void
d73be268 745doloop_optimize_loops (void)
689ba89d 746{
42fd6772 747 loop_iterator li;
689ba89d
ZD
748 struct loop *loop;
749
42fd6772 750 FOR_EACH_LOOP (li, loop, 0)
689ba89d 751 {
689ba89d
ZD
752 doloop_optimize (loop);
753 }
754
755 iv_analysis_done ();
756
757#ifdef ENABLE_CHECKING
758 verify_dominators (CDI_DOMINATORS);
d73be268 759 verify_loop_structure ();
689ba89d
ZD
760#endif
761}
762#endif /* HAVE_doloop_end */
763