]>
Commit | Line | Data |
---|---|---|
2d49f824 | 1 | /* Perform doloop optimizations |
c368b03d | 2 | Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 |
3 | Free Software Foundation, Inc. | |
2d49f824 | 4 | Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 10 | Software Foundation; either version 3, or (at your option) any later |
2d49f824 | 11 | version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
2d49f824 | 21 | |
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "rtl.h" | |
27 | #include "flags.h" | |
28 | #include "expr.h" | |
29 | #include "hard-reg-set.h" | |
30 | #include "basic-block.h" | |
0b205f4c | 31 | #include "diagnostic-core.h" |
2d49f824 | 32 | #include "tm_p.h" |
33 | #include "cfgloop.h" | |
2d49f824 | 34 | #include "params.h" |
7dfa5ce3 | 35 | #include "target.h" |
2d49f824 | 36 | |
37 | /* This module is used to modify loops with a determinable number of | |
38 | iterations to use special low-overhead looping instructions. | |
39 | ||
40 | It first validates whether the loop is well behaved and has a | |
41 | determinable number of iterations (either at compile or run-time). | |
42 | It then modifies the loop to use a low-overhead looping pattern as | |
43 | follows: | |
44 | ||
45 | 1. A pseudo register is allocated as the loop iteration counter. | |
46 | ||
47 | 2. The number of loop iterations is calculated and is stored | |
48 | in the loop counter. | |
49 | ||
50 | 3. At the end of the loop, the jump insn is replaced by the | |
51 | doloop_end pattern. The compare must remain because it might be | |
52 | used elsewhere. If the loop-variable or condition register are | |
53 | used elsewhere, they will be eliminated by flow. | |
54 | ||
55 | 4. An optional doloop_begin pattern is inserted at the top of the | |
56 | loop. | |
57 | ||
58 | TODO The optimization should only performed when either the biv used for exit | |
59 | condition is unused at all except for the exit test, or if we do not have to | |
60 | change its value, since otherwise we have to add a new induction variable, | |
61 | which usually will not pay up (unless the cost of the doloop pattern is | |
62 | somehow extremely lower than the cost of compare & jump, or unless the bct | |
63 | register cannot be used for anything else but doloop -- ??? detect these | |
64 | cases). */ | |
65 | ||
66 | #ifdef HAVE_doloop_end | |
67 | ||
68 | /* Return the loop termination condition for PATTERN or zero | |
69 | if it is not a decrement and branch jump insn. */ | |
70 | ||
fc2abfd3 | 71 | rtx |
7bfc9b7c | 72 | doloop_condition_get (rtx doloop_pat) |
2d49f824 | 73 | { |
74 | rtx cmp; | |
75 | rtx inc; | |
76 | rtx reg; | |
fc2abfd3 | 77 | rtx inc_src; |
2d49f824 | 78 | rtx condition; |
7bfc9b7c | 79 | rtx pattern; |
90c2bcf0 | 80 | rtx cc_reg = NULL_RTX; |
81 | rtx reg_orig = NULL_RTX; | |
2d49f824 | 82 | |
7bfc9b7c | 83 | /* The canonical doloop pattern we expect has one of the following |
84 | forms: | |
2d49f824 | 85 | |
7bfc9b7c | 86 | 1) (parallel [(set (pc) (if_then_else (condition) |
87 | (label_ref (label)) | |
88 | (pc))) | |
89 | (set (reg) (plus (reg) (const_int -1))) | |
90 | (additional clobbers and uses)]) | |
2d49f824 | 91 | |
7bfc9b7c | 92 | The branch must be the first entry of the parallel (also required |
93 | by jump.c), and the second entry of the parallel must be a set of | |
94 | the loop counter register. Some targets (IA-64) wrap the set of | |
95 | the loop counter in an if_then_else too. | |
fc2abfd3 | 96 | |
7bfc9b7c | 97 | 2) (set (reg) (plus (reg) (const_int -1)) |
98 | (set (pc) (if_then_else (reg != 0) | |
99 | (label_ref (label)) | |
90c2bcf0 | 100 | (pc))). |
101 | ||
102 | Some targets (ARM) do the comparison before the branch, as in the | |
103 | following form: | |
104 | ||
105 | 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) | |
106 | (set (reg) (plus (reg) (const_int -1)))]) | |
107 | (set (pc) (if_then_else (cc == NE) | |
108 | (label_ref (label)) | |
109 | (pc))) */ | |
7bfc9b7c | 110 | |
111 | pattern = PATTERN (doloop_pat); | |
2d49f824 | 112 | |
113 | if (GET_CODE (pattern) != PARALLEL) | |
7bfc9b7c | 114 | { |
115 | rtx cond; | |
9c868118 | 116 | rtx prev_insn = prev_nondebug_insn (doloop_pat); |
90c2bcf0 | 117 | rtx cmp_arg1, cmp_arg2; |
118 | rtx cmp_orig; | |
7bfc9b7c | 119 | |
90c2bcf0 | 120 | /* In case the pattern is not PARALLEL we expect two forms |
121 | of doloop which are cases 2) and 3) above: in case 2) the | |
122 | decrement immediately precedes the branch, while in case 3) | |
123 | the compare and decrement instructions immediately precede | |
124 | the branch. */ | |
2d49f824 | 125 | |
9c868118 | 126 | if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) |
7bfc9b7c | 127 | return 0; |
128 | ||
129 | cmp = pattern; | |
90c2bcf0 | 130 | if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) |
131 | { | |
132 | /* The third case: the compare and decrement instructions | |
133 | immediately precede the branch. */ | |
134 | cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); | |
135 | if (GET_CODE (cmp_orig) != SET) | |
136 | return 0; | |
137 | if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) | |
138 | return 0; | |
139 | cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); | |
140 | cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); | |
141 | if (cmp_arg2 != const0_rtx | |
142 | || GET_CODE (cmp_arg1) != PLUS) | |
143 | return 0; | |
144 | reg_orig = XEXP (cmp_arg1, 0); | |
145 | if (XEXP (cmp_arg1, 1) != GEN_INT (-1) | |
146 | || !REG_P (reg_orig)) | |
147 | return 0; | |
148 | cc_reg = SET_DEST (cmp_orig); | |
149 | ||
150 | inc = XVECEXP (PATTERN (prev_insn), 0, 1); | |
151 | } | |
152 | else | |
29dc3c19 | 153 | inc = PATTERN (prev_insn); |
7bfc9b7c | 154 | /* We expect the condition to be of the form (reg != 0) */ |
155 | cond = XEXP (SET_SRC (cmp), 0); | |
156 | if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | |
157 | return 0; | |
7bfc9b7c | 158 | } |
159 | else | |
160 | { | |
161 | cmp = XVECEXP (pattern, 0, 0); | |
162 | inc = XVECEXP (pattern, 0, 1); | |
163 | } | |
2d49f824 | 164 | |
165 | /* Check for (set (reg) (something)). */ | |
fc2abfd3 | 166 | if (GET_CODE (inc) != SET) |
2d49f824 | 167 | return 0; |
2d49f824 | 168 | reg = SET_DEST (inc); |
fc2abfd3 | 169 | if (! REG_P (reg)) |
170 | return 0; | |
171 | ||
172 | /* Check if something = (plus (reg) (const_int -1)). | |
173 | On IA-64, this decrement is wrapped in an if_then_else. */ | |
174 | inc_src = SET_SRC (inc); | |
175 | if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
176 | inc_src = XEXP (inc_src, 1); | |
177 | if (GET_CODE (inc_src) != PLUS | |
178 | || XEXP (inc_src, 0) != reg | |
179 | || XEXP (inc_src, 1) != constm1_rtx) | |
180 | return 0; | |
2d49f824 | 181 | |
182 | /* Check for (set (pc) (if_then_else (condition) | |
183 | (label_ref (label)) | |
184 | (pc))). */ | |
185 | if (GET_CODE (cmp) != SET | |
186 | || SET_DEST (cmp) != pc_rtx | |
187 | || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
188 | || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
189 | || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
190 | return 0; | |
191 | ||
192 | /* Extract loop termination condition. */ | |
193 | condition = XEXP (SET_SRC (cmp), 0); | |
194 | ||
fc2abfd3 | 195 | /* We expect a GE or NE comparison with 0 or 1. */ |
196 | if ((GET_CODE (condition) != GE | |
197 | && GET_CODE (condition) != NE) | |
198 | || (XEXP (condition, 1) != const0_rtx | |
199 | && XEXP (condition, 1) != const1_rtx)) | |
2d49f824 | 200 | return 0; |
201 | ||
fc2abfd3 | 202 | if ((XEXP (condition, 0) == reg) |
90c2bcf0 | 203 | /* For the third case: */ |
204 | || ((cc_reg != NULL_RTX) | |
205 | && (XEXP (condition, 0) == cc_reg) | |
206 | && (reg_orig == reg)) | |
fc2abfd3 | 207 | || (GET_CODE (XEXP (condition, 0)) == PLUS |
90c2bcf0 | 208 | && XEXP (XEXP (condition, 0), 0) == reg)) |
7bfc9b7c | 209 | { |
210 | if (GET_CODE (pattern) != PARALLEL) | |
90c2bcf0 | 211 | /* For the second form we expect: |
7bfc9b7c | 212 | |
213 | (set (reg) (plus (reg) (const_int -1)) | |
214 | (set (pc) (if_then_else (reg != 0) | |
215 | (label_ref (label)) | |
216 | (pc))). | |
217 | ||
218 | is equivalent to the following: | |
219 | ||
220 | (parallel [(set (pc) (if_then_else (reg != 1) | |
221 | (label_ref (label)) | |
222 | (pc))) | |
223 | (set (reg) (plus (reg) (const_int -1))) | |
224 | (additional clobbers and uses)]) | |
225 | ||
90c2bcf0 | 226 | For the third form we expect: |
227 | ||
228 | (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) | |
229 | (set (reg) (plus (reg) (const_int -1)))]) | |
230 | (set (pc) (if_then_else (cc == NE) | |
231 | (label_ref (label)) | |
232 | (pc))) | |
233 | ||
234 | which is equivalent to the following: | |
235 | ||
236 | (parallel [(set (cc) (compare (reg, 1)) | |
237 | (set (reg) (plus (reg) (const_int -1))) | |
238 | (set (pc) (if_then_else (NE == cc) | |
239 | (label_ref (label)) | |
240 | (pc))))]) | |
241 | ||
242 | So we return the second form instead for the two cases. | |
243 | ||
7bfc9b7c | 244 | */ |
245 | condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | |
246 | ||
2d49f824 | 247 | return condition; |
7bfc9b7c | 248 | } |
2d49f824 | 249 | |
250 | /* ??? If a machine uses a funny comparison, we could return a | |
7bd28bba | 251 | canonicalized form here. */ |
2d49f824 | 252 | |
253 | return 0; | |
254 | } | |
255 | ||
256 | /* Return nonzero if the loop specified by LOOP is suitable for | |
257 | the use of special low-overhead looping instructions. DESC | |
258 | describes the number of iterations of the loop. */ | |
259 | ||
260 | static bool | |
261 | doloop_valid_p (struct loop *loop, struct niter_desc *desc) | |
262 | { | |
263 | basic_block *body = get_loop_body (loop), bb; | |
264 | rtx insn; | |
265 | unsigned i; | |
7caa482e | 266 | bool result = true; |
2d49f824 | 267 | |
268 | /* Check for loops that may not terminate under special conditions. */ | |
269 | if (!desc->simple_p | |
270 | || desc->assumptions | |
271 | || desc->infinite) | |
272 | { | |
273 | /* There are some cases that would require a special attention. | |
274 | For example if the comparison is LEU and the comparison value | |
275 | is UINT_MAX then the loop will not terminate. Similarly, if the | |
276 | comparison code is GEU and the comparison value is 0, the | |
277 | loop will not terminate. | |
278 | ||
279 | If the absolute increment is not 1, the loop can be infinite | |
280 | even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
281 | ||
282 | ??? We could compute these conditions at run-time and have a | |
283 | additional jump around the loop to ensure an infinite loop. | |
284 | However, it is very unlikely that this is the intended | |
285 | behavior of the loop and checking for these rare boundary | |
286 | conditions would pessimize all other code. | |
287 | ||
288 | If the loop is executed only a few times an extra check to | |
289 | restart the loop could use up most of the benefits of using a | |
290 | count register loop. Note however, that normally, this | |
291 | restart branch would never execute, so it could be predicted | |
292 | well by the CPU. We should generate the pessimistic code by | |
293 | default, and have an option, e.g. -funsafe-loops that would | |
294 | enable count-register loops in this case. */ | |
295 | if (dump_file) | |
296 | fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
7caa482e | 297 | result = false; |
298 | goto cleanup; | |
2d49f824 | 299 | } |
300 | ||
301 | for (i = 0; i < loop->num_nodes; i++) | |
302 | { | |
303 | bb = body[i]; | |
304 | ||
305 | for (insn = BB_HEAD (bb); | |
306 | insn != NEXT_INSN (BB_END (bb)); | |
307 | insn = NEXT_INSN (insn)) | |
308 | { | |
7dfa5ce3 | 309 | /* Different targets have different necessities for low-overhead |
310 | looping. Call the back end for each instruction within the loop | |
1606e68a | 311 | to let it decide whether the insn prohibits a low-overhead loop. |
312 | It will then return the cause for it to emit to the dump file. */ | |
313 | const char * invalid = targetm.invalid_within_doloop (insn); | |
314 | if (invalid) | |
315 | { | |
316 | if (dump_file) | |
317 | fprintf (dump_file, "Doloop: %s\n", invalid); | |
7caa482e | 318 | result = false; |
319 | goto cleanup; | |
1606e68a | 320 | } |
2d49f824 | 321 | } |
322 | } | |
7caa482e | 323 | result = true; |
324 | ||
325 | cleanup: | |
2d49f824 | 326 | free (body); |
327 | ||
7caa482e | 328 | return result; |
2d49f824 | 329 | } |
330 | ||
a4c7895e | 331 | /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru |
332 | edge. If the condition is always false, do not do anything. If it is always | |
333 | true, redirect E to DEST and return false. In all other cases, true is | |
334 | returned. */ | |
2d49f824 | 335 | |
a4c7895e | 336 | static bool |
337 | add_test (rtx cond, edge *e, basic_block dest) | |
2d49f824 | 338 | { |
339 | rtx seq, jump, label; | |
340 | enum machine_mode mode; | |
341 | rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); | |
342 | enum rtx_code code = GET_CODE (cond); | |
a4c7895e | 343 | basic_block bb; |
2d49f824 | 344 | |
345 | mode = GET_MODE (XEXP (cond, 0)); | |
346 | if (mode == VOIDmode) | |
347 | mode = GET_MODE (XEXP (cond, 1)); | |
348 | ||
349 | start_sequence (); | |
350 | op0 = force_operand (op0, NULL_RTX); | |
351 | op1 = force_operand (op1, NULL_RTX); | |
352 | label = block_label (dest); | |
c368b03d | 353 | do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, |
354 | NULL_RTX, label, -1); | |
2d49f824 | 355 | |
356 | jump = get_last_insn (); | |
d6a1bbdb | 357 | if (!jump || !JUMP_P (jump)) |
3c4d0a06 | 358 | { |
a4c7895e | 359 | /* The condition is always false and the jump was optimized out. */ |
360 | end_sequence (); | |
361 | return true; | |
3c4d0a06 | 362 | } |
2d49f824 | 363 | |
364 | seq = get_insns (); | |
365 | end_sequence (); | |
d6a1bbdb | 366 | |
367 | /* There always is at least the jump insn in the sequence. */ | |
368 | gcc_assert (seq != NULL_RTX); | |
369 | ||
88e6f696 | 370 | bb = split_edge_and_insert (*e, seq); |
a4c7895e | 371 | *e = single_succ_edge (bb); |
372 | ||
373 | if (any_uncondjump_p (jump)) | |
374 | { | |
375 | /* The condition is always true. */ | |
376 | delete_insn (jump); | |
377 | redirect_edge_and_branch_force (*e, dest); | |
378 | return false; | |
379 | } | |
48e1416a | 380 | |
a4c7895e | 381 | JUMP_LABEL (jump) = label; |
382 | ||
383 | /* The jump is supposed to handle an unlikely special case. */ | |
a1ddb869 | 384 | add_reg_note (jump, REG_BR_PROB, const0_rtx); |
385 | ||
a4c7895e | 386 | LABEL_NUSES (label)++; |
387 | ||
388 | make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); | |
389 | return true; | |
2d49f824 | 390 | } |
391 | ||
392 | /* Modify the loop to use the low-overhead looping insn where LOOP | |
393 | describes the loop, DESC describes the number of iterations of the | |
394 | loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
395 | end of the loop. CONDITION is the condition separated from the | |
941f640b | 396 | DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ |
2d49f824 | 397 | |
398 | static void | |
399 | doloop_modify (struct loop *loop, struct niter_desc *desc, | |
941f640b | 400 | rtx doloop_seq, rtx condition, rtx count) |
2d49f824 | 401 | { |
402 | rtx counter_reg; | |
cde5a3a0 | 403 | rtx tmp, noloop = NULL_RTX; |
2d49f824 | 404 | rtx sequence; |
405 | rtx jump_insn; | |
406 | rtx jump_label; | |
a4c7895e | 407 | int nonneg = 0; |
2d49f824 | 408 | bool increment_count; |
409 | basic_block loop_end = desc->out_edge->src; | |
cde5a3a0 | 410 | enum machine_mode mode; |
fa7f4c0c | 411 | rtx true_prob_val; |
2d49f824 | 412 | |
413 | jump_insn = BB_END (loop_end); | |
414 | ||
415 | if (dump_file) | |
416 | { | |
417 | fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
418 | if (desc->const_iter) | |
419 | fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
420 | else | |
421 | fputs ("runtime", dump_file); | |
422 | fputs (" iterations).\n", dump_file); | |
423 | } | |
424 | ||
f0b5f617 | 425 | /* Get the probability of the original branch. If it exists we would |
fa7f4c0c | 426 | need to update REG_BR_PROB of the new jump_insn. */ |
427 | true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX); | |
428 | ||
2d49f824 | 429 | /* Discard original jump to continue loop. The original compare |
430 | result may still be live, so it cannot be discarded explicitly. */ | |
431 | delete_insn (jump_insn); | |
432 | ||
433 | counter_reg = XEXP (condition, 0); | |
434 | if (GET_CODE (counter_reg) == PLUS) | |
435 | counter_reg = XEXP (counter_reg, 0); | |
cde5a3a0 | 436 | mode = GET_MODE (counter_reg); |
2d49f824 | 437 | |
2d49f824 | 438 | increment_count = false; |
439 | switch (GET_CODE (condition)) | |
440 | { | |
441 | case NE: | |
442 | /* Currently only NE tests against zero and one are supported. */ | |
972e95af | 443 | noloop = XEXP (condition, 1); |
444 | if (noloop != const0_rtx) | |
2d49f824 | 445 | { |
972e95af | 446 | gcc_assert (noloop == const1_rtx); |
2d49f824 | 447 | increment_count = true; |
2d49f824 | 448 | } |
2d49f824 | 449 | break; |
450 | ||
451 | case GE: | |
452 | /* Currently only GE tests against zero are supported. */ | |
972e95af | 453 | gcc_assert (XEXP (condition, 1) == const0_rtx); |
2d49f824 | 454 | |
455 | noloop = constm1_rtx; | |
456 | ||
457 | /* The iteration count does not need incrementing for a GE test. */ | |
458 | increment_count = false; | |
459 | ||
460 | /* Determine if the iteration counter will be non-negative. | |
461 | Note that the maximum value loaded is iterations_max - 1. */ | |
462 | if (desc->niter_max | |
463 | <= ((unsigned HOST_WIDEST_INT) 1 | |
ded805e6 | 464 | << (GET_MODE_PRECISION (mode) - 1))) |
2d49f824 | 465 | nonneg = 1; |
466 | break; | |
467 | ||
b690c0a3 | 468 | /* Abort if an invalid doloop pattern has been generated. */ |
2045cdd4 | 469 | default: |
972e95af | 470 | gcc_unreachable (); |
2d49f824 | 471 | } |
472 | ||
473 | if (increment_count) | |
941f640b | 474 | count = simplify_gen_binary (PLUS, mode, count, const1_rtx); |
560a2fa6 | 475 | |
2d49f824 | 476 | /* Insert initialization of the count register into the loop header. */ |
477 | start_sequence (); | |
478 | tmp = force_operand (count, counter_reg); | |
479 | convert_move (counter_reg, tmp, 1); | |
480 | sequence = get_insns (); | |
481 | end_sequence (); | |
482 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
483 | ||
484 | if (desc->noloop_assumptions) | |
485 | { | |
c30738d0 | 486 | rtx ass = copy_rtx (desc->noloop_assumptions); |
2d49f824 | 487 | basic_block preheader = loop_preheader_edge (loop)->src; |
488 | basic_block set_zero | |
88e6f696 | 489 | = split_edge (loop_preheader_edge (loop)); |
2d49f824 | 490 | basic_block new_preheader |
88e6f696 | 491 | = split_edge (loop_preheader_edge (loop)); |
2d49f824 | 492 | edge te; |
2d49f824 | 493 | |
494 | /* Expand the condition testing the assumptions and if it does not pass, | |
495 | reset the count register to 0. */ | |
a4c7895e | 496 | redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); |
2d49f824 | 497 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); |
498 | ||
499 | set_zero->count = 0; | |
500 | set_zero->frequency = 0; | |
501 | ||
a4c7895e | 502 | te = single_succ_edge (preheader); |
503 | for (; ass; ass = XEXP (ass, 1)) | |
504 | if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
505 | break; | |
506 | ||
507 | if (ass) | |
2d49f824 | 508 | { |
a4c7895e | 509 | /* We reached a condition that is always true. This is very hard to |
510 | reproduce (such a loop does not roll, and thus it would most | |
511 | likely get optimized out by some of the preceding optimizations). | |
512 | In fact, I do not have any testcase for it. However, it would | |
513 | also be very hard to show that it is impossible, so we must | |
514 | handle this case. */ | |
515 | set_zero->count = preheader->count; | |
516 | set_zero->frequency = preheader->frequency; | |
2d49f824 | 517 | } |
48e1416a | 518 | |
a4c7895e | 519 | if (EDGE_COUNT (set_zero->preds) == 0) |
520 | { | |
521 | /* All the conditions were simplified to false, remove the | |
522 | unreachable set_zero block. */ | |
a4c7895e | 523 | delete_basic_block (set_zero); |
524 | } | |
525 | else | |
526 | { | |
527 | /* Reset the counter to zero in the set_zero block. */ | |
528 | start_sequence (); | |
529 | convert_move (counter_reg, noloop, 0); | |
530 | sequence = get_insns (); | |
531 | end_sequence (); | |
532 | emit_insn_after (sequence, BB_END (set_zero)); | |
48e1416a | 533 | |
a4c7895e | 534 | set_immediate_dominator (CDI_DOMINATORS, set_zero, |
3f9439d7 | 535 | recompute_dominator (CDI_DOMINATORS, |
536 | set_zero)); | |
a4c7895e | 537 | } |
538 | ||
539 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
3f9439d7 | 540 | recompute_dominator (CDI_DOMINATORS, |
541 | new_preheader)); | |
2d49f824 | 542 | } |
543 | ||
544 | /* Some targets (eg, C4x) need to initialize special looping | |
545 | registers. */ | |
546 | #ifdef HAVE_doloop_begin | |
547 | { | |
548 | rtx init; | |
549 | unsigned level = get_loop_level (loop) + 1; | |
550 | init = gen_doloop_begin (counter_reg, | |
551 | desc->const_iter ? desc->niter_expr : const0_rtx, | |
f79c3176 | 552 | GEN_INT (desc->niter_max), |
2d49f824 | 553 | GEN_INT (level)); |
554 | if (init) | |
555 | { | |
556 | start_sequence (); | |
557 | emit_insn (init); | |
558 | sequence = get_insns (); | |
559 | end_sequence (); | |
560 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
561 | } | |
562 | } | |
563 | #endif | |
564 | ||
565 | /* Insert the new low-overhead looping insn. */ | |
566 | emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
567 | jump_insn = BB_END (loop_end); | |
568 | jump_label = block_label (desc->in_edge->dest); | |
569 | JUMP_LABEL (jump_insn) = jump_label; | |
570 | LABEL_NUSES (jump_label)++; | |
571 | ||
572 | /* Ensure the right fallthru edge is marked, for case we have reversed | |
573 | the condition. */ | |
574 | desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
575 | desc->out_edge->flags |= EDGE_FALLTHRU; | |
576 | ||
577 | /* Add a REG_NONNEG note if the actual or estimated maximum number | |
578 | of iterations is non-negative. */ | |
579 | if (nonneg) | |
a1ddb869 | 580 | add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); |
581 | ||
fa7f4c0c | 582 | /* Update the REG_BR_PROB note. */ |
583 | if (true_prob_val) | |
584 | { | |
585 | /* Seems safer to use the branch probability. */ | |
48e1416a | 586 | add_reg_note (jump_insn, REG_BR_PROB, |
a1ddb869 | 587 | GEN_INT (desc->in_edge->probability)); |
fa7f4c0c | 588 | } |
2d49f824 | 589 | } |
590 | ||
591 | /* Process loop described by LOOP validating that the loop is suitable for | |
592 | conversion to use a low overhead looping instruction, replacing the jump | |
593 | insn where suitable. Returns true if the loop was successfully | |
594 | modified. */ | |
595 | ||
596 | static bool | |
597 | doloop_optimize (struct loop *loop) | |
598 | { | |
599 | enum machine_mode mode; | |
600 | rtx doloop_seq, doloop_pat, doloop_reg; | |
cde5a3a0 | 601 | rtx iterations, count; |
2d49f824 | 602 | rtx iterations_max; |
603 | rtx start_label; | |
604 | rtx condition; | |
9cab63d0 | 605 | unsigned level, est_niter; |
606 | int max_cost; | |
2d49f824 | 607 | struct niter_desc *desc; |
cde5a3a0 | 608 | unsigned word_mode_size; |
609 | unsigned HOST_WIDE_INT word_mode_max; | |
2d49f824 | 610 | |
611 | if (dump_file) | |
612 | fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
613 | ||
614 | iv_analysis_loop_init (loop); | |
615 | ||
616 | /* Find the simple exit of a LOOP. */ | |
617 | desc = get_simple_loop_desc (loop); | |
618 | ||
619 | /* Check that loop is a candidate for a low-overhead looping insn. */ | |
620 | if (!doloop_valid_p (loop, desc)) | |
621 | { | |
622 | if (dump_file) | |
623 | fprintf (dump_file, | |
624 | "Doloop: The loop is not suitable.\n"); | |
625 | return false; | |
626 | } | |
627 | mode = desc->mode; | |
628 | ||
629 | est_niter = 3; | |
630 | if (desc->const_iter) | |
631 | est_niter = desc->niter; | |
632 | /* If the estimate on number of iterations is reliable (comes from profile | |
633 | feedback), use it. Do not use it normally, since the expected number | |
634 | of iterations of an unrolled loop is 2. */ | |
635 | if (loop->header->count) | |
636 | est_niter = expected_loop_iterations (loop); | |
637 | ||
638 | if (est_niter < 3) | |
639 | { | |
640 | if (dump_file) | |
641 | fprintf (dump_file, | |
642 | "Doloop: Too few iterations (%u) to be profitable.\n", | |
643 | est_niter); | |
644 | return false; | |
645 | } | |
646 | ||
883bb2bb | 647 | max_cost |
648 | = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); | |
7013e87c | 649 | if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop)) |
f529eb25 | 650 | > max_cost) |
883bb2bb | 651 | { |
652 | if (dump_file) | |
653 | fprintf (dump_file, | |
2ce55c0b | 654 | "Doloop: number of iterations too costly to compute.\n"); |
883bb2bb | 655 | return false; |
656 | } | |
657 | ||
cde5a3a0 | 658 | count = copy_rtx (desc->niter_expr); |
2d49f824 | 659 | iterations = desc->const_iter ? desc->niter_expr : const0_rtx; |
660 | iterations_max = GEN_INT (desc->niter_max); | |
661 | level = get_loop_level (loop) + 1; | |
662 | ||
663 | /* Generate looping insn. If the pattern FAILs then give up trying | |
664 | to modify the loop since there is some aspect the back-end does | |
665 | not like. */ | |
666 | start_label = block_label (desc->in_edge->dest); | |
667 | doloop_reg = gen_reg_rtx (mode); | |
668 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
669 | GEN_INT (level), start_label); | |
cde5a3a0 | 670 | |
ded805e6 | 671 | word_mode_size = GET_MODE_PRECISION (word_mode); |
cde5a3a0 | 672 | word_mode_max |
673 | = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1; | |
674 | if (! doloop_seq | |
675 | && mode != word_mode | |
676 | /* Before trying mode different from the one in that # of iterations is | |
677 | computed, we must be sure that the number of iterations fits into | |
678 | the new mode. */ | |
ded805e6 | 679 | && (word_mode_size >= GET_MODE_PRECISION (mode) |
cde5a3a0 | 680 | || desc->niter_max <= word_mode_max)) |
2d49f824 | 681 | { |
ded805e6 | 682 | if (word_mode_size > GET_MODE_PRECISION (mode)) |
cde5a3a0 | 683 | { |
941f640b | 684 | count = simplify_gen_unary (ZERO_EXTEND, word_mode, |
685 | count, mode); | |
cde5a3a0 | 686 | iterations = simplify_gen_unary (ZERO_EXTEND, word_mode, |
687 | iterations, mode); | |
688 | iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode, | |
689 | iterations_max, mode); | |
690 | } | |
691 | else | |
692 | { | |
693 | count = lowpart_subreg (word_mode, count, mode); | |
694 | iterations = lowpart_subreg (word_mode, iterations, mode); | |
695 | iterations_max = lowpart_subreg (word_mode, iterations_max, mode); | |
696 | } | |
2d49f824 | 697 | PUT_MODE (doloop_reg, word_mode); |
698 | doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, | |
699 | GEN_INT (level), start_label); | |
700 | } | |
701 | if (! doloop_seq) | |
702 | { | |
703 | if (dump_file) | |
704 | fprintf (dump_file, | |
705 | "Doloop: Target unwilling to use doloop pattern!\n"); | |
706 | return false; | |
707 | } | |
708 | ||
709 | /* If multiple instructions were created, the last must be the | |
710 | jump instruction. Also, a raw define_insn may yield a plain | |
711 | pattern. */ | |
712 | doloop_pat = doloop_seq; | |
713 | if (INSN_P (doloop_pat)) | |
714 | { | |
715 | while (NEXT_INSN (doloop_pat) != NULL_RTX) | |
716 | doloop_pat = NEXT_INSN (doloop_pat); | |
7bfc9b7c | 717 | if (!JUMP_P (doloop_pat)) |
2d49f824 | 718 | doloop_pat = NULL_RTX; |
719 | } | |
720 | ||
721 | if (! doloop_pat | |
722 | || ! (condition = doloop_condition_get (doloop_pat))) | |
723 | { | |
724 | if (dump_file) | |
725 | fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
726 | return false; | |
727 | } | |
728 | ||
941f640b | 729 | doloop_modify (loop, desc, doloop_seq, condition, count); |
2d49f824 | 730 | return true; |
731 | } | |
732 | ||
7194de72 | 733 | /* This is the main entry point. Process all loops using doloop_optimize. */ |
2d49f824 | 734 | |
735 | void | |
7194de72 | 736 | doloop_optimize_loops (void) |
2d49f824 | 737 | { |
17519ba0 | 738 | loop_iterator li; |
2d49f824 | 739 | struct loop *loop; |
740 | ||
17519ba0 | 741 | FOR_EACH_LOOP (li, loop, 0) |
2d49f824 | 742 | { |
2d49f824 | 743 | doloop_optimize (loop); |
744 | } | |
745 | ||
746 | iv_analysis_done (); | |
747 | ||
748 | #ifdef ENABLE_CHECKING | |
7194de72 | 749 | verify_loop_structure (); |
2d49f824 | 750 | #endif |
751 | } | |
752 | #endif /* HAVE_doloop_end */ | |
753 |