]>
Commit | Line | Data |
---|---|---|
5527bf14 RH |
1 | /* Perform doloop optimizations |
2 | Copyright (C) 1999, 2000 Free Software Foundation, Inc. | |
3 | Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) | |
4 | ||
5 | This file is part of GNU CC. | |
6 | ||
7 | GNU CC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GNU CC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GNU CC; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "rtl.h" | |
5527bf14 RH |
25 | #include "flags.h" |
26 | #include "expr.h" | |
27 | #include "loop.h" | |
28 | #include "hard-reg-set.h" | |
29 | #include "basic-block.h" | |
04f378ce | 30 | #include "toplev.h" |
5527bf14 RH |
31 | #include "tm_p.h" |
32 | ||
33 | ||
34 | /* This module is used to modify loops with a determinable number of | |
35 | iterations to use special low-overhead looping instructions. | |
36 | ||
37 | It first validates whether the loop is well behaved and has a | |
38 | determinable number of iterations (either at compile or run-time). | |
39 | It then modifies the loop to use a low-overhead looping pattern as | |
40 | follows: | |
41 | ||
42 | 1. A pseudo register is allocated as the loop iteration counter. | |
43 | ||
44 | 2. The number of loop iterations is calculated and is stored | |
45 | in the loop counter. | |
46 | ||
47 | 3. At the end of the loop, the jump insn is replaced by the | |
48 | doloop_end pattern. The compare must remain because it might be | |
49 | used elsewhere. If the loop-variable or condition register are | |
50 | used elsewhere, they will be eliminated by flow. | |
51 | ||
52 | 4. An optional doloop_begin pattern is inserted at the top of the | |
53 | loop. | |
54 | */ | |
55 | ||
56 | ||
57 | #ifdef HAVE_doloop_end | |
58 | ||
59 | static rtx doloop_condition_get | |
60 | PARAMS ((rtx)); | |
61 | static unsigned HOST_WIDE_INT doloop_iterations_max | |
62 | PARAMS ((const struct loop_info *, enum machine_mode, int)); | |
63 | static int doloop_valid_p | |
64 | PARAMS ((const struct loop *, rtx)); | |
65 | static int doloop_modify | |
66 | PARAMS ((const struct loop *, rtx, rtx, rtx, rtx, rtx)); | |
67 | static int doloop_modify_runtime | |
68 | PARAMS ((const struct loop *, rtx, rtx, rtx, enum machine_mode, rtx)); | |
69 | ||
70 | ||
71 | /* Return the loop termination condition for PATTERN or zero | |
72 | if it is not a decrement and branch jump insn. */ | |
73 | static rtx | |
74 | doloop_condition_get (pattern) | |
75 | rtx pattern; | |
76 | { | |
77 | rtx cmp; | |
78 | rtx inc; | |
79 | rtx reg; | |
80 | rtx condition; | |
81 | ||
82 | /* The canonical doloop pattern we expect is: | |
83 | ||
84 | (parallel [(set (pc) (if_then_else (condition) | |
85 | (label_ref (label)) | |
86 | (pc))) | |
87 | (set (reg) (plus (reg) (const_int -1))) | |
88 | (additional clobbers and uses)]) | |
89 | ||
90 | Some machines (IA-64) make the decrement conditional on | |
91 | the condition as well, so we don't bother verifying the | |
92 | actual decrement. In summary, the branch must be the | |
93 | first entry of the parallel (also required by jump.c), | |
94 | and the second entry of the parallel must be a set of | |
95 | the loop counter register. */ | |
96 | ||
97 | if (GET_CODE (pattern) != PARALLEL) | |
98 | return 0; | |
99 | ||
100 | cmp = XVECEXP (pattern, 0, 0); | |
101 | inc = XVECEXP (pattern, 0, 1); | |
102 | ||
103 | /* Check for (set (reg) (something)). */ | |
104 | if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) | |
105 | return 0; | |
106 | ||
107 | /* Extract loop counter register. */ | |
108 | reg = SET_DEST (inc); | |
109 | ||
110 | /* Check for (set (pc) (if_then_else (condition) | |
111 | (label_ref (label)) | |
112 | (pc))). */ | |
113 | if (GET_CODE (cmp) != SET | |
114 | || SET_DEST (cmp) != pc_rtx | |
115 | || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
116 | || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
117 | || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
118 | return 0; | |
119 | ||
120 | /* Extract loop termination condition. */ | |
121 | condition = XEXP (SET_SRC (cmp), 0); | |
122 | ||
123 | if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) | |
124 | || GET_CODE (XEXP (condition, 1)) != CONST_INT) | |
125 | return 0; | |
126 | ||
127 | if (XEXP (condition, 0) == reg) | |
128 | return condition; | |
129 | ||
130 | if (GET_CODE (XEXP (condition, 0)) == PLUS | |
131 | && XEXP (XEXP (condition, 0), 0) == reg) | |
132 | return condition; | |
133 | ||
134 | /* ??? If a machine uses a funny comparison, we could return a | |
135 | canonicalised form here. */ | |
136 | ||
137 | return 0; | |
138 | } | |
139 | ||
140 | ||
141 | /* Return an estimate of the maximum number of loop iterations for the | |
142 | loop specified by LOOP or zero if the loop is not normal. | |
143 | MODE is the mode of the iteration count and NONNEG is non-zero if | |
144 | the the iteration count has been proved to be non-negative. */ | |
145 | static unsigned HOST_WIDE_INT | |
146 | doloop_iterations_max (loop_info, mode, nonneg) | |
147 | const struct loop_info *loop_info; | |
148 | enum machine_mode mode; | |
149 | int nonneg; | |
150 | { | |
151 | unsigned HOST_WIDE_INT n_iterations_max; | |
152 | enum rtx_code code; | |
153 | rtx min_value; | |
154 | rtx max_value; | |
155 | HOST_WIDE_INT abs_inc; | |
156 | int neg_inc; | |
157 | ||
158 | neg_inc = 0; | |
159 | abs_inc = INTVAL (loop_info->increment); | |
160 | if (abs_inc < 0) | |
161 | { | |
162 | abs_inc = -abs_inc; | |
163 | neg_inc = 1; | |
164 | } | |
165 | ||
166 | if (neg_inc) | |
167 | { | |
168 | code = swap_condition (loop_info->comparison_code); | |
169 | min_value = loop_info->final_equiv_value; | |
170 | max_value = loop_info->initial_equiv_value; | |
171 | } | |
172 | else | |
173 | { | |
174 | code = loop_info->comparison_code; | |
175 | min_value = loop_info->initial_equiv_value; | |
176 | max_value = loop_info->final_equiv_value; | |
177 | } | |
178 | ||
179 | /* Since the loop has a VTOP, we know that the initial test will be | |
180 | true and thus the value of max_value should be greater than the | |
181 | value of min_value. Thus the difference should always be positive | |
182 | and the code must be LT, LE, LTU, LEU, or NE. Otherwise the loop is | |
183 | not normal, e.g., `for (i = 0; i < 10; i--)'. */ | |
184 | switch (code) | |
185 | { | |
186 | case LTU: | |
187 | case LEU: | |
188 | { | |
189 | unsigned HOST_WIDE_INT umax; | |
190 | unsigned HOST_WIDE_INT umin; | |
191 | ||
192 | if (GET_CODE (min_value) == CONST_INT) | |
193 | umin = INTVAL (min_value); | |
194 | else | |
195 | umin = 0; | |
196 | ||
197 | if (GET_CODE (max_value) == CONST_INT) | |
198 | umax = INTVAL (max_value); | |
199 | else | |
e49a1d2e | 200 | umax = ((unsigned)2 << (GET_MODE_BITSIZE (mode) - 1)) - 1; |
5527bf14 RH |
201 | |
202 | n_iterations_max = umax - umin; | |
203 | break; | |
204 | } | |
205 | ||
206 | case LT: | |
207 | case LE: | |
208 | { | |
209 | HOST_WIDE_INT smax; | |
210 | HOST_WIDE_INT smin; | |
211 | ||
212 | if (GET_CODE (min_value) == CONST_INT) | |
213 | smin = INTVAL (min_value); | |
214 | else | |
e49a1d2e | 215 | smin = -((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1)); |
5527bf14 RH |
216 | |
217 | if (GET_CODE (max_value) == CONST_INT) | |
218 | smax = INTVAL (max_value); | |
219 | else | |
e49a1d2e | 220 | smax = ((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1)) - 1; |
5527bf14 RH |
221 | |
222 | n_iterations_max = smax - smin; | |
223 | break; | |
224 | } | |
225 | ||
226 | case NE: | |
227 | if (GET_CODE (min_value) == CONST_INT | |
228 | && GET_CODE (max_value) == CONST_INT) | |
229 | n_iterations_max = INTVAL (max_value) - INTVAL (min_value); | |
230 | else | |
231 | /* We need to conservatively assume that we might have the maximum | |
232 | number of iterations without any additional knowledge. */ | |
e49a1d2e | 233 | n_iterations_max = ((unsigned)2 << (GET_MODE_BITSIZE (mode) - 1)) - 1; |
5527bf14 RH |
234 | break; |
235 | ||
236 | default: | |
237 | return 0; | |
238 | } | |
239 | ||
240 | n_iterations_max /= abs_inc; | |
241 | ||
242 | /* If we know that the iteration count is non-negative then adjust | |
243 | n_iterations_max if it is so large that it appears negative. */ | |
e49a1d2e KG |
244 | if (nonneg |
245 | && n_iterations_max > ((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1))) | |
246 | n_iterations_max = ((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1)) - 1; | |
5527bf14 RH |
247 | |
248 | return n_iterations_max; | |
249 | } | |
250 | ||
251 | ||
252 | /* Return non-zero if the loop specified by LOOP is suitable for | |
253 | the use of special low-overhead looping instructions. */ | |
254 | static int | |
255 | doloop_valid_p (loop, jump_insn) | |
256 | const struct loop *loop; | |
257 | rtx jump_insn; | |
258 | { | |
259 | const struct loop_info *loop_info = LOOP_INFO (loop); | |
260 | ||
261 | /* The loop must have a conditional jump at the end. */ | |
262 | if (! any_condjump_p (jump_insn) | |
263 | || ! onlyjump_p (jump_insn)) | |
264 | { | |
265 | if (loop_dump_stream) | |
266 | fprintf (loop_dump_stream, | |
267 | "Doloop: Invalid jump at loop end.\n"); | |
268 | return 0; | |
269 | } | |
270 | ||
271 | /* Give up if a loop has been completely unrolled. */ | |
272 | if (loop_info->n_iterations == loop_info->unroll_number) | |
273 | { | |
274 | if (loop_dump_stream) | |
275 | fprintf (loop_dump_stream, | |
276 | "Doloop: Loop completely unrolled.\n"); | |
277 | return 0; | |
278 | } | |
279 | ||
280 | /* The loop must have a single exit target. A break or return | |
281 | statement within a loop will generate multiple loop exits. | |
282 | Another example of a loop that currently generates multiple exit | |
283 | targets is for (i = 0; i < (foo ? 8 : 4); i++) { }. */ | |
1db88ef9 | 284 | if (loop_info->has_multiple_exit_targets || loop->exit_count) |
5527bf14 RH |
285 | { |
286 | if (loop_dump_stream) | |
287 | fprintf (loop_dump_stream, | |
288 | "Doloop: Loop has multiple exit targets.\n"); | |
289 | return 0; | |
290 | } | |
291 | ||
292 | /* An indirect jump may jump out of the loop. */ | |
293 | if (loop_info->has_indirect_jump) | |
294 | { | |
295 | if (loop_dump_stream) | |
296 | fprintf (loop_dump_stream, | |
297 | "Doloop: Indirect jump in function.\n"); | |
298 | return 0; | |
299 | } | |
300 | ||
301 | /* A called function may clobber any special registers required for | |
302 | low-overhead looping. */ | |
303 | if (loop_info->has_call) | |
304 | { | |
305 | if (loop_dump_stream) | |
306 | fprintf (loop_dump_stream, | |
307 | "Doloop: Function call in loop.\n"); | |
308 | return 0; | |
309 | } | |
310 | ||
311 | /* Some targets (eg, PPC) use the count register for branch on table | |
312 | instructions. ??? This should be a target specific check. */ | |
313 | if (loop_info->has_tablejump) | |
314 | { | |
315 | if (loop_dump_stream) | |
316 | fprintf (loop_dump_stream, | |
317 | "Doloop: Computed branch in the loop.\n"); | |
318 | return 0; | |
319 | } | |
320 | ||
321 | if (! loop_info->increment) | |
322 | { | |
323 | if (loop_dump_stream) | |
324 | fprintf (loop_dump_stream, | |
325 | "Doloop: Could not determine iteration info.\n"); | |
326 | return 0; | |
327 | } | |
328 | ||
329 | if (GET_CODE (loop_info->increment) != CONST_INT) | |
330 | { | |
331 | if (loop_dump_stream) | |
332 | fprintf (loop_dump_stream, | |
333 | "Doloop: Increment not an integer constant.\n"); | |
334 | return 0; | |
335 | } | |
336 | ||
337 | /* There is no guarantee that a NE loop will terminate if the | |
338 | absolute increment is not unity. ??? We could compute this | |
339 | condition at run-time and have a additional jump around the loop | |
340 | to ensure an infinite loop. */ | |
341 | if (loop_info->comparison_code == NE | |
342 | && INTVAL (loop_info->increment) != -1 | |
343 | && INTVAL (loop_info->increment) != 1) | |
344 | { | |
345 | if (loop_dump_stream) | |
346 | fprintf (loop_dump_stream, | |
347 | "Doloop: NE loop with non-unity increment.\n"); | |
348 | return 0; | |
349 | } | |
350 | ||
351 | /* Check for loops that may not terminate under special conditions. */ | |
352 | if (! loop_info->n_iterations | |
353 | && ((loop_info->comparison_code == LEU | |
354 | && INTVAL (loop_info->increment) > 0) | |
355 | || (loop_info->comparison_code == GEU | |
356 | && INTVAL (loop_info->increment) < 0))) | |
357 | { | |
358 | /* If the comparison is LEU and the comparison value is UINT_MAX | |
359 | then the loop will not terminate. Similarly, if the | |
360 | comparison code is GEU and the initial value is 0, the loop | |
361 | will not terminate. | |
362 | ||
363 | Note that with LE and GE, the loop behaviour can be | |
364 | implementation dependent if an overflow occurs, say between | |
365 | INT_MAX and INT_MAX + 1. We thus don't have to worry about | |
366 | these two cases. | |
367 | ||
368 | ??? We could compute these conditions at run-time and have a | |
369 | additional jump around the loop to ensure an infinite loop. | |
370 | However, it is very unlikely that this is the intended | |
371 | behaviour of the loop and checking for these rare boundary | |
372 | conditions would pessimize all other code. */ | |
373 | if (loop_dump_stream) | |
374 | fprintf (loop_dump_stream, | |
375 | "Doloop: Possible infinite iteration case ignored.\n"); | |
376 | } | |
377 | ||
378 | return 1; | |
379 | } | |
380 | ||
381 | ||
382 | /* Modify the loop to use the low-overhead looping insn where LOOP | |
383 | describes the loop, ITERATIONS is an RTX containing the desired | |
384 | number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying | |
385 | the maximum number of loop iterations, and DOLOOP_INSN is the | |
386 | low-overhead looping insn to emit at the end of the loop. This | |
387 | returns non-zero if it was successful. */ | |
388 | static int | |
389 | doloop_modify (loop, iterations, iterations_max, | |
390 | doloop_seq, start_label, condition) | |
391 | const struct loop *loop; | |
392 | rtx iterations; | |
393 | rtx iterations_max; | |
394 | rtx doloop_seq; | |
395 | rtx start_label; | |
396 | rtx condition; | |
397 | { | |
398 | rtx counter_reg; | |
399 | rtx count; | |
400 | rtx sequence; | |
401 | rtx jump_insn; | |
402 | int nonneg = 0; | |
403 | int decrement_count; | |
404 | ||
405 | jump_insn = prev_nonnote_insn (loop->end); | |
406 | ||
407 | if (loop_dump_stream) | |
408 | { | |
409 | fprintf (loop_dump_stream, "Doloop: Inserting doloop pattern ("); | |
410 | if (GET_CODE (iterations) == CONST_INT) | |
411 | fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, | |
412 | INTVAL (iterations)); | |
413 | else | |
414 | fputs ("runtime", loop_dump_stream); | |
415 | fputs (" iterations).", loop_dump_stream); | |
416 | } | |
417 | ||
1db88ef9 FS |
418 | /* Emit the label that will delimit the top of the loop. |
419 | This has to be done before the delete_insn call below, to prevent | |
420 | delete_insn from deleting too much. */ | |
421 | emit_label_after (start_label, loop->top ? loop->top : loop->start); | |
422 | LABEL_NUSES (start_label)++; | |
423 | ||
5527bf14 RH |
424 | /* Discard original jump to continue loop. The original compare |
425 | result may still be live, so it cannot be discarded explicitly. */ | |
426 | delete_insn (jump_insn); | |
427 | ||
5527bf14 RH |
428 | counter_reg = XEXP (condition, 0); |
429 | if (GET_CODE (counter_reg) == PLUS) | |
430 | counter_reg = XEXP (counter_reg, 0); | |
431 | ||
432 | start_sequence (); | |
433 | ||
434 | count = iterations; | |
435 | decrement_count = 0; | |
436 | switch (GET_CODE (condition)) | |
437 | { | |
438 | case NE: | |
439 | /* Currently only NE tests against zero and one are supported. */ | |
440 | if (XEXP (condition, 1) == const0_rtx) | |
441 | decrement_count = 1; | |
442 | else if (XEXP (condition, 1) != const1_rtx) | |
443 | abort (); | |
444 | break; | |
445 | ||
446 | case GE: | |
447 | /* Currently only GE tests against zero are supported. */ | |
448 | if (XEXP (condition, 1) != const0_rtx) | |
449 | abort (); | |
450 | ||
451 | /* The iteration count needs decrementing for a GE test. */ | |
452 | decrement_count = 1; | |
453 | ||
454 | /* Determine if the iteration counter will be non-negative. | |
455 | Note that the maximum value loaded is iterations_max - 1. */ | |
456 | if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max) | |
e49a1d2e | 457 | <= ((unsigned)1 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1))) |
5527bf14 RH |
458 | nonneg = 1; |
459 | break; | |
460 | ||
461 | /* Abort if an invalid doloop pattern has been generated. */ | |
462 | default: | |
463 | abort(); | |
464 | } | |
465 | ||
466 | if (decrement_count) | |
467 | { | |
468 | if (GET_CODE (count) == CONST_INT) | |
469 | count = GEN_INT (INTVAL (count) - 1); | |
470 | else | |
ef89d648 ZW |
471 | count = expand_simple_binop (GET_MODE (counter_reg), MINUS, |
472 | count, GEN_INT (1), | |
473 | 0, 0, OPTAB_LIB_WIDEN); | |
5527bf14 RH |
474 | } |
475 | ||
476 | /* Insert initialization of the count register into the loop header. */ | |
477 | convert_move (counter_reg, count, 1); | |
478 | sequence = gen_sequence (); | |
479 | end_sequence (); | |
480 | emit_insn_before (sequence, loop->start); | |
481 | ||
482 | /* Some targets (eg, C4x) need to initialize special looping | |
483 | registers. */ | |
484 | #ifdef HAVE_doloop_begin | |
485 | { | |
486 | rtx init; | |
487 | ||
488 | init = gen_doloop_begin (counter_reg, | |
489 | GET_CODE (iterations) == CONST_INT | |
490 | ? iterations : const0_rtx, iterations_max, | |
491 | GEN_INT (loop->level)); | |
492 | if (init) | |
493 | { | |
494 | start_sequence (); | |
495 | emit_insn (init); | |
496 | sequence = gen_sequence (); | |
497 | end_sequence (); | |
498 | emit_insn_after (sequence, loop->start); | |
499 | } | |
500 | } | |
501 | #endif | |
502 | ||
503 | /* Insert the new low-overhead looping insn. */ | |
504 | emit_jump_insn_before (doloop_seq, loop->end); | |
505 | jump_insn = prev_nonnote_insn (loop->end); | |
506 | JUMP_LABEL (jump_insn) = start_label; | |
507 | ||
508 | /* Add a REG_NONNEG note if the actual or estimated maximum number | |
509 | of iterations is non-negative. */ | |
510 | if (nonneg) | |
511 | { | |
512 | REG_NOTES (jump_insn) | |
513 | = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); | |
514 | } | |
515 | return 1; | |
516 | } | |
517 | ||
518 | ||
519 | /* Handle the more complex case, where the bounds are not known at | |
520 | compile time. In this case we generate a run_time calculation of | |
521 | the number of iterations. We rely on the existence of a run-time | |
522 | guard to ensure that the loop executes at least once, i.e., | |
523 | initial_value obeys the loop comparison condition. If a guard is | |
524 | not present, we emit one. The loop to modify is described by LOOP. | |
525 | ITERATIONS_MAX is a CONST_INT specifying the estimated maximum | |
526 | number of loop iterations. DOLOOP_INSN is the low-overhead looping | |
527 | insn to insert. Returns non-zero if loop successfully modified. */ | |
528 | static int | |
529 | doloop_modify_runtime (loop, iterations_max, | |
530 | doloop_seq, start_label, mode, condition) | |
531 | const struct loop *loop; | |
532 | rtx iterations_max; | |
533 | rtx doloop_seq; | |
534 | rtx start_label; | |
535 | enum machine_mode mode; | |
536 | rtx condition; | |
537 | { | |
538 | const struct loop_info *loop_info = LOOP_INFO (loop); | |
539 | HOST_WIDE_INT abs_inc; | |
540 | int neg_inc; | |
541 | rtx diff; | |
542 | rtx sequence; | |
543 | rtx iterations; | |
544 | rtx initial_value; | |
545 | rtx final_value; | |
546 | rtx increment; | |
547 | int unsigned_p; | |
548 | enum rtx_code comparison_code; | |
549 | ||
550 | increment = loop_info->increment; | |
551 | initial_value = loop_info->initial_value; | |
552 | final_value = loop_info->final_value; | |
553 | ||
554 | neg_inc = 0; | |
555 | abs_inc = INTVAL (increment); | |
556 | if (abs_inc < 0) | |
557 | { | |
558 | abs_inc = -abs_inc; | |
559 | neg_inc = 1; | |
560 | } | |
561 | ||
562 | comparison_code = loop_info->comparison_code; | |
563 | unsigned_p = (comparison_code == LTU | |
564 | || comparison_code == LEU | |
565 | || comparison_code == GTU | |
566 | || comparison_code == GEU | |
567 | || comparison_code == NE); | |
568 | ||
569 | /* The number of iterations (prior to any loop unrolling) is given by: | |
b05ecb16 BS |
570 | |
571 | n = (abs (final - initial) + abs_inc - 1) / abs_inc. | |
5527bf14 RH |
572 | |
573 | However, it is possible for the summation to overflow, and a | |
574 | safer method is: | |
575 | ||
b05ecb16 BS |
576 | n = abs (final - initial) / abs_inc; |
577 | n += (abs (final - initial) % abs_inc) != 0; | |
5527bf14 RH |
578 | |
579 | If the loop has been unrolled, then the loop body has been | |
b05ecb16 BS |
580 | preconditioned to iterate a multiple of unroll_number times. If |
581 | abs_inc is != 1, the full calculation is | |
582 | ||
583 | t1 = abs_inc * unroll_number; | |
584 | n = abs (final - initial) / t1; | |
585 | n += (abs (final - initial) % t1) > t1 - abs_inc; | |
5527bf14 RH |
586 | |
587 | The division and modulo operations can be avoided by requiring | |
588 | that the increment is a power of 2 (precondition_loop_p enforces | |
589 | this requirement). Nevertheless, the RTX_COSTS should be checked | |
590 | to see if a fast divmod is available. */ | |
591 | ||
592 | start_sequence (); | |
593 | /* abs (final - initial) */ | |
ef89d648 ZW |
594 | diff = expand_simple_binop (mode, MINUS, |
595 | copy_rtx (neg_inc ? initial_value : final_value), | |
596 | copy_rtx (neg_inc ? final_value : initial_value), | |
597 | NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN); | |
5527bf14 | 598 | |
b05ecb16 | 599 | if (abs_inc * loop_info->unroll_number != 1) |
5527bf14 | 600 | { |
b05ecb16 BS |
601 | int shift_count; |
602 | rtx extra; | |
603 | rtx label; | |
604 | unsigned HOST_WIDE_INT limit; | |
605 | ||
606 | shift_count = exact_log2 (abs_inc * loop_info->unroll_number); | |
607 | if (shift_count < 0) | |
608 | abort (); | |
609 | ||
610 | /* abs (final - initial) / (abs_inc * unroll_number) */ | |
ef89d648 ZW |
611 | iterations = expand_simple_binop (GET_MODE (diff), LSHIFTRT, |
612 | diff, GEN_INT (shift_count), | |
613 | NULL_RTX, 1, | |
614 | OPTAB_LIB_WIDEN); | |
b05ecb16 | 615 | |
5527bf14 RH |
616 | if (abs_inc != 1) |
617 | { | |
b05ecb16 | 618 | /* abs (final - initial) % (abs_inc * unroll_number) */ |
ef89d648 ZW |
619 | rtx count = GEN_INT (abs_inc * loop_info->unroll_number - 1); |
620 | extra = expand_simple_binop (GET_MODE (iterations), AND, | |
621 | diff, count, NULL_RTX, 1, | |
622 | OPTAB_LIB_WIDEN); | |
5527bf14 | 623 | |
b05ecb16 BS |
624 | /* If (abs (final - initial) % (abs_inc * unroll_number) |
625 | <= abs_inc * (unroll - 1)), | |
626 | jump past following increment instruction. */ | |
5527bf14 | 627 | label = gen_label_rtx(); |
b05ecb16 BS |
628 | limit = abs_inc * (loop_info->unroll_number - 1); |
629 | emit_cmp_and_jump_insns (extra, GEN_INT (limit), | |
630 | limit == 0 ? EQ : LEU, NULL_RTX, | |
5527bf14 RH |
631 | GET_MODE (extra), 0, 0, label); |
632 | JUMP_LABEL (get_last_insn ()) = label; | |
633 | LABEL_NUSES (label)++; | |
634 | ||
635 | /* Increment the iteration count by one. */ | |
ef89d648 ZW |
636 | iterations = expand_simple_binop (GET_MODE (iterations), PLUS, |
637 | iterations, GEN_INT (1), | |
638 | iterations, 1, | |
639 | OPTAB_LIB_WIDEN); | |
5527bf14 RH |
640 | |
641 | emit_label (label); | |
642 | } | |
5527bf14 RH |
643 | } |
644 | else | |
b05ecb16 | 645 | iterations = diff; |
5527bf14 RH |
646 | |
647 | /* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while' | |
648 | style loop, with a loop exit test at the start. Thus, we can | |
649 | assume that the loop condition was true when the loop was | |
650 | entered. | |
651 | ||
652 | `do-while' loops require special treatment since the exit test is | |
653 | not executed before the start of the loop. We need to determine | |
654 | if the loop will terminate after the first pass and to limit the | |
655 | iteration count to one if necessary. */ | |
656 | if (! loop->vtop) | |
657 | { | |
658 | rtx label; | |
659 | ||
660 | if (loop_dump_stream) | |
661 | fprintf (loop_dump_stream, "Doloop: Do-while loop.\n"); | |
662 | ||
663 | /* A `do-while' loop must iterate at least once. If the | |
664 | iteration count is bogus, we set the iteration count to 1. | |
665 | Note that if the loop has been unrolled, then the loop body | |
666 | is guaranteed to execute at least once. */ | |
667 | if (loop_info->unroll_number == 1) | |
668 | { | |
669 | /* Emit insns to test if the loop will immediately | |
670 | terminate and to set the iteration count to 1 if true. */ | |
671 | label = gen_label_rtx(); | |
672 | emit_cmp_and_jump_insns (copy_rtx (initial_value), | |
673 | copy_rtx (loop_info->comparison_value), | |
674 | comparison_code, NULL_RTX, mode, 0, 0, | |
675 | label); | |
676 | JUMP_LABEL (get_last_insn ()) = label; | |
677 | LABEL_NUSES (label)++; | |
678 | emit_move_insn (iterations, const1_rtx); | |
679 | emit_label (label); | |
680 | } | |
681 | } | |
682 | ||
683 | sequence = gen_sequence (); | |
684 | end_sequence (); | |
685 | emit_insn_before (sequence, loop->start); | |
686 | ||
687 | return doloop_modify (loop, iterations, iterations_max, doloop_seq, | |
688 | start_label, condition); | |
689 | } | |
690 | ||
691 | ||
692 | /* This is the main entry point. Process loop described by LOOP | |
693 | validating that the loop is suitable for conversion to use a low | |
694 | overhead looping instruction, replacing the jump insn where | |
695 | suitable. We distinguish between loops with compile-time bounds | |
696 | and those with run-time bounds. Information from LOOP is used to | |
697 | compute the number of iterations and to determine whether the loop | |
698 | is a candidate for this optimization. Returns non-zero if loop | |
699 | successfully modified. */ | |
700 | int | |
701 | doloop_optimize (loop) | |
702 | const struct loop *loop; | |
703 | { | |
704 | struct loop_info *loop_info = LOOP_INFO (loop); | |
705 | rtx initial_value; | |
706 | rtx final_value; | |
707 | rtx increment; | |
708 | rtx jump_insn; | |
709 | enum machine_mode mode; | |
710 | unsigned HOST_WIDE_INT n_iterations; | |
711 | unsigned HOST_WIDE_INT n_iterations_max; | |
712 | rtx doloop_seq, doloop_pat, doloop_reg; | |
713 | rtx iterations; | |
714 | rtx iterations_max; | |
715 | rtx start_label; | |
716 | rtx condition; | |
717 | ||
718 | if (loop_dump_stream) | |
719 | fprintf (loop_dump_stream, | |
720 | "Doloop: Processing loop %d, enclosed levels %d.\n", | |
721 | loop->num, loop->level); | |
722 | ||
723 | jump_insn = prev_nonnote_insn (loop->end); | |
724 | ||
725 | /* Check that loop is a candidate for a low-overhead looping insn. */ | |
726 | if (! doloop_valid_p (loop, jump_insn)) | |
727 | return 0; | |
728 | ||
729 | /* Determine if the loop can be safely, and profitably, | |
730 | preconditioned. While we don't precondition the loop in a loop | |
731 | unrolling sense, this test ensures that the loop is well behaved | |
732 | and that the increment is a constant integer. */ | |
733 | if (! precondition_loop_p (loop, &initial_value, &final_value, | |
734 | &increment, &mode)) | |
735 | { | |
736 | if (loop_dump_stream) | |
737 | fprintf (loop_dump_stream, | |
738 | "Doloop: Cannot precondition loop.\n"); | |
739 | return 0; | |
740 | } | |
741 | ||
742 | /* Determine or estimate the maximum number of loop iterations. */ | |
743 | n_iterations = loop_info->n_iterations; | |
744 | if (n_iterations) | |
745 | { | |
746 | /* This is the simple case where the initial and final loop | |
747 | values are constants. */ | |
748 | n_iterations_max = n_iterations; | |
749 | } | |
750 | else | |
751 | { | |
752 | int nonneg = find_reg_note (jump_insn, REG_NONNEG, 0) != 0; | |
753 | ||
754 | /* This is the harder case where the initial and final loop | |
755 | values may not be constants. */ | |
756 | n_iterations_max = doloop_iterations_max (loop_info, mode, nonneg); | |
757 | ||
758 | if (! n_iterations_max) | |
759 | { | |
760 | /* We have something like `for (i = 0; i < 10; i--)'. */ | |
761 | if (loop_dump_stream) | |
762 | fprintf (loop_dump_stream, | |
763 | "Doloop: Not normal loop.\n"); | |
764 | return 0; | |
765 | } | |
766 | } | |
767 | ||
768 | /* Account for loop unrolling in the iteration count. This will | |
769 | have no effect if loop_iterations could not determine the number | |
770 | of iterations. */ | |
771 | n_iterations /= loop_info->unroll_number; | |
772 | n_iterations_max /= loop_info->unroll_number; | |
773 | ||
774 | if (n_iterations && n_iterations < 3) | |
775 | { | |
776 | if (loop_dump_stream) | |
777 | fprintf (loop_dump_stream, | |
778 | "Doloop: Too few iterations (%ld) to be profitable.\n", | |
779 | (long int) n_iterations); | |
780 | return 0; | |
781 | } | |
782 | ||
783 | iterations = GEN_INT (n_iterations); | |
784 | iterations_max = GEN_INT (n_iterations_max); | |
785 | ||
786 | /* Generate looping insn. If the pattern FAILs then give up trying | |
787 | to modify the loop since there is some aspect the back-end does | |
788 | not like. */ | |
789 | start_label = gen_label_rtx (); | |
790 | doloop_reg = gen_reg_rtx (mode); | |
791 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
792 | GEN_INT (loop->level), start_label); | |
793 | if (! doloop_seq && mode != word_mode) | |
794 | { | |
795 | PUT_MODE (doloop_reg, word_mode); | |
796 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
797 | GEN_INT (loop->level), start_label); | |
798 | } | |
799 | if (! doloop_seq) | |
800 | { | |
801 | if (loop_dump_stream) | |
802 | fprintf (loop_dump_stream, | |
803 | "Doloop: Target unwilling to use doloop pattern!\n"); | |
804 | return 0; | |
805 | } | |
806 | ||
807 | /* A raw define_insn may yield a plain pattern. If a sequence | |
808 | was involved, the last must be the jump instruction. */ | |
809 | if (GET_CODE (doloop_seq) == SEQUENCE) | |
810 | { | |
811 | doloop_pat = XVECEXP (doloop_seq, 0, XVECLEN (doloop_seq, 0) - 1); | |
812 | if (GET_CODE (doloop_pat) == JUMP_INSN) | |
813 | doloop_pat = PATTERN (doloop_pat); | |
814 | else | |
815 | doloop_pat = NULL_RTX; | |
816 | } | |
817 | else | |
818 | doloop_pat = doloop_seq; | |
819 | ||
820 | if (! doloop_pat | |
821 | || ! (condition = doloop_condition_get (doloop_pat))) | |
822 | { | |
823 | if (loop_dump_stream) | |
824 | fprintf (loop_dump_stream, | |
825 | "Doloop: Unrecognizable doloop pattern!\n"); | |
826 | return 0; | |
827 | } | |
828 | ||
829 | if (n_iterations != 0) | |
830 | /* Handle the simpler case, where we know the iteration count at | |
831 | compile time. */ | |
832 | return doloop_modify (loop, iterations, iterations_max, doloop_seq, | |
833 | start_label, condition); | |
834 | else | |
835 | /* Handle the harder case, where we must add additional runtime tests. */ | |
836 | return doloop_modify_runtime (loop, iterations_max, doloop_seq, | |
837 | start_label, mode, condition); | |
838 | } | |
839 | ||
840 | #endif /* HAVE_doloop_end */ |