]>
Commit | Line | Data |
---|---|---|
689ba89d | 1 | /* Perform doloop optimizations |
7607bdda | 2 | Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc. |
689ba89d ZD |
3 | Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
366ccddb KC |
19 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
20 | 02110-1301, USA. */ | |
689ba89d ZD |
21 | |
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "rtl.h" | |
27 | #include "flags.h" | |
28 | #include "expr.h" | |
29 | #include "hard-reg-set.h" | |
30 | #include "basic-block.h" | |
31 | #include "toplev.h" | |
32 | #include "tm_p.h" | |
33 | #include "cfgloop.h" | |
34 | #include "output.h" | |
35 | #include "params.h" | |
a71a498d | 36 | #include "target.h" |
689ba89d ZD |
37 | |
38 | /* This module is used to modify loops with a determinable number of | |
39 | iterations to use special low-overhead looping instructions. | |
40 | ||
41 | It first validates whether the loop is well behaved and has a | |
42 | determinable number of iterations (either at compile or run-time). | |
43 | It then modifies the loop to use a low-overhead looping pattern as | |
44 | follows: | |
45 | ||
46 | 1. A pseudo register is allocated as the loop iteration counter. | |
47 | ||
48 | 2. The number of loop iterations is calculated and is stored | |
49 | in the loop counter. | |
50 | ||
51 | 3. At the end of the loop, the jump insn is replaced by the | |
52 | doloop_end pattern. The compare must remain because it might be | |
53 | used elsewhere. If the loop-variable or condition register are | |
54 | used elsewhere, they will be eliminated by flow. | |
55 | ||
56 | 4. An optional doloop_begin pattern is inserted at the top of the | |
57 | loop. | |
58 | ||
59 | TODO The optimization should only performed when either the biv used for exit | |
60 | condition is unused at all except for the exit test, or if we do not have to | |
61 | change its value, since otherwise we have to add a new induction variable, | |
62 | which usually will not pay up (unless the cost of the doloop pattern is | |
63 | somehow extremely lower than the cost of compare & jump, or unless the bct | |
64 | register cannot be used for anything else but doloop -- ??? detect these | |
65 | cases). */ | |
66 | ||
67 | #ifdef HAVE_doloop_end | |
68 | ||
69 | /* Return the loop termination condition for PATTERN or zero | |
70 | if it is not a decrement and branch jump insn. */ | |
71 | ||
75c70254 | 72 | rtx |
689ba89d ZD |
73 | doloop_condition_get (rtx pattern) |
74 | { | |
75 | rtx cmp; | |
76 | rtx inc; | |
77 | rtx reg; | |
75c70254 | 78 | rtx inc_src; |
689ba89d ZD |
79 | rtx condition; |
80 | ||
81 | /* The canonical doloop pattern we expect is: | |
82 | ||
83 | (parallel [(set (pc) (if_then_else (condition) | |
84 | (label_ref (label)) | |
85 | (pc))) | |
86 | (set (reg) (plus (reg) (const_int -1))) | |
87 | (additional clobbers and uses)]) | |
88 | ||
75c70254 SB |
89 | Some targets (IA-64) wrap the set of the loop counter in |
90 | an if_then_else too. | |
91 | ||
92 | In summary, the branch must be the first entry of the | |
93 | parallel (also required by jump.c), and the second | |
94 | entry of the parallel must be a set of the loop counter | |
95 | register. */ | |
689ba89d ZD |
96 | |
97 | if (GET_CODE (pattern) != PARALLEL) | |
98 | return 0; | |
99 | ||
100 | cmp = XVECEXP (pattern, 0, 0); | |
101 | inc = XVECEXP (pattern, 0, 1); | |
102 | ||
103 | /* Check for (set (reg) (something)). */ | |
75c70254 | 104 | if (GET_CODE (inc) != SET) |
689ba89d | 105 | return 0; |
689ba89d | 106 | reg = SET_DEST (inc); |
75c70254 SB |
107 | if (! REG_P (reg)) |
108 | return 0; | |
109 | ||
110 | /* Check if something = (plus (reg) (const_int -1)). | |
111 | On IA-64, this decrement is wrapped in an if_then_else. */ | |
112 | inc_src = SET_SRC (inc); | |
113 | if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
114 | inc_src = XEXP (inc_src, 1); | |
115 | if (GET_CODE (inc_src) != PLUS | |
116 | || XEXP (inc_src, 0) != reg | |
117 | || XEXP (inc_src, 1) != constm1_rtx) | |
118 | return 0; | |
689ba89d ZD |
119 | |
120 | /* Check for (set (pc) (if_then_else (condition) | |
121 | (label_ref (label)) | |
122 | (pc))). */ | |
123 | if (GET_CODE (cmp) != SET | |
124 | || SET_DEST (cmp) != pc_rtx | |
125 | || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
126 | || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
127 | || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
128 | return 0; | |
129 | ||
130 | /* Extract loop termination condition. */ | |
131 | condition = XEXP (SET_SRC (cmp), 0); | |
132 | ||
75c70254 SB |
133 | /* We expect a GE or NE comparison with 0 or 1. */ |
134 | if ((GET_CODE (condition) != GE | |
135 | && GET_CODE (condition) != NE) | |
136 | || (XEXP (condition, 1) != const0_rtx | |
137 | && XEXP (condition, 1) != const1_rtx)) | |
689ba89d ZD |
138 | return 0; |
139 | ||
75c70254 SB |
140 | if ((XEXP (condition, 0) == reg) |
141 | || (GET_CODE (XEXP (condition, 0)) == PLUS | |
142 | && XEXP (XEXP (condition, 0), 0) == reg)) | |
689ba89d ZD |
143 | return condition; |
144 | ||
145 | /* ??? If a machine uses a funny comparison, we could return a | |
1ae58c30 | 146 | canonicalized form here. */ |
689ba89d ZD |
147 | |
148 | return 0; | |
149 | } | |
150 | ||
151 | /* Return nonzero if the loop specified by LOOP is suitable for | |
152 | the use of special low-overhead looping instructions. DESC | |
153 | describes the number of iterations of the loop. */ | |
154 | ||
155 | static bool | |
156 | doloop_valid_p (struct loop *loop, struct niter_desc *desc) | |
157 | { | |
158 | basic_block *body = get_loop_body (loop), bb; | |
159 | rtx insn; | |
160 | unsigned i; | |
7600f094 | 161 | bool result = true; |
689ba89d ZD |
162 | |
163 | /* Check for loops that may not terminate under special conditions. */ | |
164 | if (!desc->simple_p | |
165 | || desc->assumptions | |
166 | || desc->infinite) | |
167 | { | |
168 | /* There are some cases that would require a special attention. | |
169 | For example if the comparison is LEU and the comparison value | |
170 | is UINT_MAX then the loop will not terminate. Similarly, if the | |
171 | comparison code is GEU and the comparison value is 0, the | |
172 | loop will not terminate. | |
173 | ||
174 | If the absolute increment is not 1, the loop can be infinite | |
175 | even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
176 | ||
177 | ??? We could compute these conditions at run-time and have a | |
178 | additional jump around the loop to ensure an infinite loop. | |
179 | However, it is very unlikely that this is the intended | |
180 | behavior of the loop and checking for these rare boundary | |
181 | conditions would pessimize all other code. | |
182 | ||
183 | If the loop is executed only a few times an extra check to | |
184 | restart the loop could use up most of the benefits of using a | |
185 | count register loop. Note however, that normally, this | |
186 | restart branch would never execute, so it could be predicted | |
187 | well by the CPU. We should generate the pessimistic code by | |
188 | default, and have an option, e.g. -funsafe-loops that would | |
189 | enable count-register loops in this case. */ | |
190 | if (dump_file) | |
191 | fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
7600f094 AP |
192 | result = false; |
193 | goto cleanup; | |
689ba89d ZD |
194 | } |
195 | ||
196 | for (i = 0; i < loop->num_nodes; i++) | |
197 | { | |
198 | bb = body[i]; | |
199 | ||
200 | for (insn = BB_HEAD (bb); | |
201 | insn != NEXT_INSN (BB_END (bb)); | |
202 | insn = NEXT_INSN (insn)) | |
203 | { | |
a71a498d AS |
204 | /* Different targets have different necessities for low-overhead |
205 | looping. Call the back end for each instruction within the loop | |
e7e64a25 AS |
206 | to let it decide whether the insn prohibits a low-overhead loop. |
207 | It will then return the cause for it to emit to the dump file. */ | |
208 | const char * invalid = targetm.invalid_within_doloop (insn); | |
209 | if (invalid) | |
210 | { | |
211 | if (dump_file) | |
212 | fprintf (dump_file, "Doloop: %s\n", invalid); | |
7600f094 AP |
213 | result = false; |
214 | goto cleanup; | |
e7e64a25 | 215 | } |
689ba89d ZD |
216 | } |
217 | } | |
7600f094 AP |
218 | result = true; |
219 | ||
220 | cleanup: | |
689ba89d ZD |
221 | free (body); |
222 | ||
7600f094 | 223 | return result; |
689ba89d ZD |
224 | } |
225 | ||
ed541ddb ZD |
226 | /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru |
227 | edge. If the condition is always false, do not do anything. If it is always | |
228 | true, redirect E to DEST and return false. In all other cases, true is | |
229 | returned. */ | |
689ba89d | 230 | |
ed541ddb ZD |
231 | static bool |
232 | add_test (rtx cond, edge *e, basic_block dest) | |
689ba89d ZD |
233 | { |
234 | rtx seq, jump, label; | |
235 | enum machine_mode mode; | |
236 | rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); | |
237 | enum rtx_code code = GET_CODE (cond); | |
ed541ddb | 238 | basic_block bb; |
689ba89d ZD |
239 | |
240 | mode = GET_MODE (XEXP (cond, 0)); | |
241 | if (mode == VOIDmode) | |
242 | mode = GET_MODE (XEXP (cond, 1)); | |
243 | ||
244 | start_sequence (); | |
245 | op0 = force_operand (op0, NULL_RTX); | |
246 | op1 = force_operand (op1, NULL_RTX); | |
247 | label = block_label (dest); | |
248 | do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); | |
249 | ||
250 | jump = get_last_insn (); | |
7d93d987 | 251 | if (!jump || !JUMP_P (jump)) |
f937df35 | 252 | { |
ed541ddb ZD |
253 | /* The condition is always false and the jump was optimized out. */ |
254 | end_sequence (); | |
255 | return true; | |
f937df35 | 256 | } |
689ba89d ZD |
257 | |
258 | seq = get_insns (); | |
259 | end_sequence (); | |
7d93d987 ZD |
260 | |
261 | /* There always is at least the jump insn in the sequence. */ | |
262 | gcc_assert (seq != NULL_RTX); | |
263 | ||
598ec7bd | 264 | bb = split_edge_and_insert (*e, seq); |
ed541ddb ZD |
265 | *e = single_succ_edge (bb); |
266 | ||
267 | if (any_uncondjump_p (jump)) | |
268 | { | |
269 | /* The condition is always true. */ | |
270 | delete_insn (jump); | |
271 | redirect_edge_and_branch_force (*e, dest); | |
272 | return false; | |
273 | } | |
274 | ||
275 | JUMP_LABEL (jump) = label; | |
276 | ||
277 | /* The jump is supposed to handle an unlikely special case. */ | |
278 | REG_NOTES (jump) | |
279 | = gen_rtx_EXPR_LIST (REG_BR_PROB, | |
280 | const0_rtx, REG_NOTES (jump)); | |
281 | LABEL_NUSES (label)++; | |
282 | ||
283 | make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); | |
284 | return true; | |
689ba89d ZD |
285 | } |
286 | ||
287 | /* Modify the loop to use the low-overhead looping insn where LOOP | |
288 | describes the loop, DESC describes the number of iterations of the | |
289 | loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
290 | end of the loop. CONDITION is the condition separated from the | |
a82bbcbb | 291 | DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ |
689ba89d ZD |
292 | |
293 | static void | |
294 | doloop_modify (struct loop *loop, struct niter_desc *desc, | |
a82bbcbb | 295 | rtx doloop_seq, rtx condition, rtx count) |
689ba89d ZD |
296 | { |
297 | rtx counter_reg; | |
a82bbcbb | 298 | rtx tmp, noloop = NULL_RTX; |
689ba89d ZD |
299 | rtx sequence; |
300 | rtx jump_insn; | |
301 | rtx jump_label; | |
ed541ddb | 302 | int nonneg = 0; |
689ba89d ZD |
303 | bool increment_count; |
304 | basic_block loop_end = desc->out_edge->src; | |
a82bbcbb | 305 | enum machine_mode mode; |
689ba89d ZD |
306 | |
307 | jump_insn = BB_END (loop_end); | |
308 | ||
309 | if (dump_file) | |
310 | { | |
311 | fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
312 | if (desc->const_iter) | |
313 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
314 | else | |
315 | fputs ("runtime", dump_file); | |
316 | fputs (" iterations).\n", dump_file); | |
317 | } | |
318 | ||
319 | /* Discard original jump to continue loop. The original compare | |
320 | result may still be live, so it cannot be discarded explicitly. */ | |
321 | delete_insn (jump_insn); | |
322 | ||
323 | counter_reg = XEXP (condition, 0); | |
324 | if (GET_CODE (counter_reg) == PLUS) | |
325 | counter_reg = XEXP (counter_reg, 0); | |
a82bbcbb | 326 | mode = GET_MODE (counter_reg); |
689ba89d | 327 | |
689ba89d ZD |
328 | increment_count = false; |
329 | switch (GET_CODE (condition)) | |
330 | { | |
331 | case NE: | |
332 | /* Currently only NE tests against zero and one are supported. */ | |
b5e624c6 NS |
333 | noloop = XEXP (condition, 1); |
334 | if (noloop != const0_rtx) | |
689ba89d | 335 | { |
b5e624c6 | 336 | gcc_assert (noloop == const1_rtx); |
689ba89d | 337 | increment_count = true; |
689ba89d | 338 | } |
689ba89d ZD |
339 | break; |
340 | ||
341 | case GE: | |
342 | /* Currently only GE tests against zero are supported. */ | |
b5e624c6 | 343 | gcc_assert (XEXP (condition, 1) == const0_rtx); |
689ba89d ZD |
344 | |
345 | noloop = constm1_rtx; | |
346 | ||
347 | /* The iteration count does not need incrementing for a GE test. */ | |
348 | increment_count = false; | |
349 | ||
350 | /* Determine if the iteration counter will be non-negative. | |
351 | Note that the maximum value loaded is iterations_max - 1. */ | |
352 | if (desc->niter_max | |
353 | <= ((unsigned HOST_WIDEST_INT) 1 | |
a82bbcbb | 354 | << (GET_MODE_BITSIZE (mode) - 1))) |
689ba89d ZD |
355 | nonneg = 1; |
356 | break; | |
357 | ||
1c43d3ca | 358 | /* Abort if an invalid doloop pattern has been generated. */ |
8127d0e0 | 359 | default: |
b5e624c6 | 360 | gcc_unreachable (); |
689ba89d ZD |
361 | } |
362 | ||
363 | if (increment_count) | |
a82bbcbb | 364 | count = simplify_gen_binary (PLUS, mode, count, const1_rtx); |
689ba89d ZD |
365 | |
366 | /* Insert initialization of the count register into the loop header. */ | |
367 | start_sequence (); | |
368 | tmp = force_operand (count, counter_reg); | |
369 | convert_move (counter_reg, tmp, 1); | |
370 | sequence = get_insns (); | |
371 | end_sequence (); | |
372 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
373 | ||
374 | if (desc->noloop_assumptions) | |
375 | { | |
30d3fc60 | 376 | rtx ass = copy_rtx (desc->noloop_assumptions); |
689ba89d ZD |
377 | basic_block preheader = loop_preheader_edge (loop)->src; |
378 | basic_block set_zero | |
598ec7bd | 379 | = split_edge (loop_preheader_edge (loop)); |
689ba89d | 380 | basic_block new_preheader |
598ec7bd | 381 | = split_edge (loop_preheader_edge (loop)); |
689ba89d | 382 | edge te; |
689ba89d ZD |
383 | |
384 | /* Expand the condition testing the assumptions and if it does not pass, | |
385 | reset the count register to 0. */ | |
ed541ddb | 386 | redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); |
689ba89d ZD |
387 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); |
388 | ||
389 | set_zero->count = 0; | |
390 | set_zero->frequency = 0; | |
391 | ||
ed541ddb ZD |
392 | te = single_succ_edge (preheader); |
393 | for (; ass; ass = XEXP (ass, 1)) | |
394 | if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
395 | break; | |
396 | ||
397 | if (ass) | |
689ba89d | 398 | { |
ed541ddb ZD |
399 | /* We reached a condition that is always true. This is very hard to |
400 | reproduce (such a loop does not roll, and thus it would most | |
401 | likely get optimized out by some of the preceding optimizations). | |
402 | In fact, I do not have any testcase for it. However, it would | |
403 | also be very hard to show that it is impossible, so we must | |
404 | handle this case. */ | |
405 | set_zero->count = preheader->count; | |
406 | set_zero->frequency = preheader->frequency; | |
689ba89d | 407 | } |
ed541ddb ZD |
408 | |
409 | if (EDGE_COUNT (set_zero->preds) == 0) | |
410 | { | |
411 | /* All the conditions were simplified to false, remove the | |
412 | unreachable set_zero block. */ | |
ed541ddb ZD |
413 | delete_basic_block (set_zero); |
414 | } | |
415 | else | |
416 | { | |
417 | /* Reset the counter to zero in the set_zero block. */ | |
418 | start_sequence (); | |
419 | convert_move (counter_reg, noloop, 0); | |
420 | sequence = get_insns (); | |
421 | end_sequence (); | |
422 | emit_insn_after (sequence, BB_END (set_zero)); | |
423 | ||
424 | set_immediate_dominator (CDI_DOMINATORS, set_zero, | |
66f97d31 ZD |
425 | recompute_dominator (CDI_DOMINATORS, |
426 | set_zero)); | |
ed541ddb ZD |
427 | } |
428 | ||
429 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
66f97d31 ZD |
430 | recompute_dominator (CDI_DOMINATORS, |
431 | new_preheader)); | |
689ba89d ZD |
432 | } |
433 | ||
434 | /* Some targets (eg, C4x) need to initialize special looping | |
435 | registers. */ | |
436 | #ifdef HAVE_doloop_begin | |
437 | { | |
438 | rtx init; | |
439 | unsigned level = get_loop_level (loop) + 1; | |
440 | init = gen_doloop_begin (counter_reg, | |
441 | desc->const_iter ? desc->niter_expr : const0_rtx, | |
1984d2e8 | 442 | GEN_INT (desc->niter_max), |
689ba89d ZD |
443 | GEN_INT (level)); |
444 | if (init) | |
445 | { | |
446 | start_sequence (); | |
447 | emit_insn (init); | |
448 | sequence = get_insns (); | |
449 | end_sequence (); | |
450 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
451 | } | |
452 | } | |
453 | #endif | |
454 | ||
455 | /* Insert the new low-overhead looping insn. */ | |
456 | emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
457 | jump_insn = BB_END (loop_end); | |
458 | jump_label = block_label (desc->in_edge->dest); | |
459 | JUMP_LABEL (jump_insn) = jump_label; | |
460 | LABEL_NUSES (jump_label)++; | |
461 | ||
462 | /* Ensure the right fallthru edge is marked, for case we have reversed | |
463 | the condition. */ | |
464 | desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
465 | desc->out_edge->flags |= EDGE_FALLTHRU; | |
466 | ||
467 | /* Add a REG_NONNEG note if the actual or estimated maximum number | |
468 | of iterations is non-negative. */ | |
469 | if (nonneg) | |
470 | { | |
471 | REG_NOTES (jump_insn) | |
472 | = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); | |
473 | } | |
474 | } | |
475 | ||
476 | /* Process loop described by LOOP validating that the loop is suitable for | |
477 | conversion to use a low overhead looping instruction, replacing the jump | |
478 | insn where suitable. Returns true if the loop was successfully | |
479 | modified. */ | |
480 | ||
481 | static bool | |
482 | doloop_optimize (struct loop *loop) | |
483 | { | |
484 | enum machine_mode mode; | |
485 | rtx doloop_seq, doloop_pat, doloop_reg; | |
a82bbcbb | 486 | rtx iterations, count; |
689ba89d ZD |
487 | rtx iterations_max; |
488 | rtx start_label; | |
489 | rtx condition; | |
7607bdda AT |
490 | unsigned level, est_niter; |
491 | int max_cost; | |
689ba89d | 492 | struct niter_desc *desc; |
a82bbcbb ZD |
493 | unsigned word_mode_size; |
494 | unsigned HOST_WIDE_INT word_mode_max; | |
689ba89d ZD |
495 | |
496 | if (dump_file) | |
497 | fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
498 | ||
499 | iv_analysis_loop_init (loop); | |
500 | ||
501 | /* Find the simple exit of a LOOP. */ | |
502 | desc = get_simple_loop_desc (loop); | |
503 | ||
504 | /* Check that loop is a candidate for a low-overhead looping insn. */ | |
505 | if (!doloop_valid_p (loop, desc)) | |
506 | { | |
507 | if (dump_file) | |
508 | fprintf (dump_file, | |
509 | "Doloop: The loop is not suitable.\n"); | |
510 | return false; | |
511 | } | |
512 | mode = desc->mode; | |
513 | ||
514 | est_niter = 3; | |
515 | if (desc->const_iter) | |
516 | est_niter = desc->niter; | |
517 | /* If the estimate on number of iterations is reliable (comes from profile | |
518 | feedback), use it. Do not use it normally, since the expected number | |
519 | of iterations of an unrolled loop is 2. */ | |
520 | if (loop->header->count) | |
521 | est_niter = expected_loop_iterations (loop); | |
522 | ||
523 | if (est_niter < 3) | |
524 | { | |
525 | if (dump_file) | |
526 | fprintf (dump_file, | |
527 | "Doloop: Too few iterations (%u) to be profitable.\n", | |
528 | est_niter); | |
529 | return false; | |
530 | } | |
531 | ||
45b9a14b BS |
532 | max_cost |
533 | = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); | |
534 | if (rtx_cost (desc->niter_expr, SET) > max_cost) | |
535 | { | |
536 | if (dump_file) | |
537 | fprintf (dump_file, | |
405f0587 | 538 | "Doloop: number of iterations too costly to compute.\n"); |
45b9a14b BS |
539 | return false; |
540 | } | |
541 | ||
a82bbcbb | 542 | count = copy_rtx (desc->niter_expr); |
689ba89d ZD |
543 | iterations = desc->const_iter ? desc->niter_expr : const0_rtx; |
544 | iterations_max = GEN_INT (desc->niter_max); | |
545 | level = get_loop_level (loop) + 1; | |
546 | ||
547 | /* Generate looping insn. If the pattern FAILs then give up trying | |
548 | to modify the loop since there is some aspect the back-end does | |
549 | not like. */ | |
550 | start_label = block_label (desc->in_edge->dest); | |
551 | doloop_reg = gen_reg_rtx (mode); | |
552 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
553 | GEN_INT (level), start_label); | |
a82bbcbb ZD |
554 | |
555 | word_mode_size = GET_MODE_BITSIZE (word_mode); | |
556 | word_mode_max | |
557 | = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1; | |
558 | if (! doloop_seq | |
559 | && mode != word_mode | |
560 | /* Before trying mode different from the one in that # of iterations is | |
561 | computed, we must be sure that the number of iterations fits into | |
562 | the new mode. */ | |
563 | && (word_mode_size >= GET_MODE_BITSIZE (mode) | |
564 | || desc->niter_max <= word_mode_max)) | |
689ba89d | 565 | { |
a82bbcbb ZD |
566 | if (word_mode_size > GET_MODE_BITSIZE (mode)) |
567 | { | |
568 | count = simplify_gen_unary (ZERO_EXTEND, word_mode, | |
a252f3e8 | 569 | count, mode); |
a82bbcbb ZD |
570 | iterations = simplify_gen_unary (ZERO_EXTEND, word_mode, |
571 | iterations, mode); | |
572 | iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode, | |
573 | iterations_max, mode); | |
574 | } | |
575 | else | |
576 | { | |
577 | count = lowpart_subreg (word_mode, count, mode); | |
578 | iterations = lowpart_subreg (word_mode, iterations, mode); | |
579 | iterations_max = lowpart_subreg (word_mode, iterations_max, mode); | |
580 | } | |
689ba89d ZD |
581 | PUT_MODE (doloop_reg, word_mode); |
582 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
583 | GEN_INT (level), start_label); | |
584 | } | |
585 | if (! doloop_seq) | |
586 | { | |
587 | if (dump_file) | |
588 | fprintf (dump_file, | |
589 | "Doloop: Target unwilling to use doloop pattern!\n"); | |
590 | return false; | |
591 | } | |
592 | ||
593 | /* If multiple instructions were created, the last must be the | |
594 | jump instruction. Also, a raw define_insn may yield a plain | |
595 | pattern. */ | |
596 | doloop_pat = doloop_seq; | |
597 | if (INSN_P (doloop_pat)) | |
598 | { | |
599 | while (NEXT_INSN (doloop_pat) != NULL_RTX) | |
600 | doloop_pat = NEXT_INSN (doloop_pat); | |
4b4bf941 | 601 | if (JUMP_P (doloop_pat)) |
689ba89d ZD |
602 | doloop_pat = PATTERN (doloop_pat); |
603 | else | |
604 | doloop_pat = NULL_RTX; | |
605 | } | |
606 | ||
607 | if (! doloop_pat | |
608 | || ! (condition = doloop_condition_get (doloop_pat))) | |
609 | { | |
610 | if (dump_file) | |
611 | fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
612 | return false; | |
613 | } | |
614 | ||
a82bbcbb | 615 | doloop_modify (loop, desc, doloop_seq, condition, count); |
689ba89d ZD |
616 | return true; |
617 | } | |
618 | ||
d73be268 | 619 | /* This is the main entry point. Process all loops using doloop_optimize. */ |
689ba89d ZD |
620 | |
621 | void | |
d73be268 | 622 | doloop_optimize_loops (void) |
689ba89d | 623 | { |
42fd6772 | 624 | loop_iterator li; |
689ba89d ZD |
625 | struct loop *loop; |
626 | ||
42fd6772 | 627 | FOR_EACH_LOOP (li, loop, 0) |
689ba89d | 628 | { |
689ba89d ZD |
629 | doloop_optimize (loop); |
630 | } | |
631 | ||
632 | iv_analysis_done (); | |
633 | ||
634 | #ifdef ENABLE_CHECKING | |
635 | verify_dominators (CDI_DOMINATORS); | |
d73be268 | 636 | verify_loop_structure (); |
689ba89d ZD |
637 | #endif |
638 | } | |
639 | #endif /* HAVE_doloop_end */ | |
640 |