]>
Commit | Line | Data |
---|---|---|
2d49f824 | 1 | /* Perform doloop optimizations |
aad93da1 | 2 | Copyright (C) 2004-2017 Free Software Foundation, Inc. |
2d49f824 | 3 | Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
2d49f824 | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
2d49f824 | 20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
9ef16211 | 24 | #include "backend.h" |
7c29e30e | 25 | #include "target.h" |
2d49f824 | 26 | #include "rtl.h" |
d53441c8 | 27 | #include "tree.h" |
7c29e30e | 28 | #include "cfghooks.h" |
ad7b10a2 | 29 | #include "memmodel.h" |
7c29e30e | 30 | #include "emit-rtl.h" |
d53441c8 | 31 | #include "dojump.h" |
d53441c8 | 32 | #include "expr.h" |
2d49f824 | 33 | #include "cfgloop.h" |
94ea8568 | 34 | #include "cfgrtl.h" |
2d49f824 | 35 | #include "params.h" |
b9ed1410 | 36 | #include "dumpfile.h" |
d87e6a03 | 37 | #include "loop-unroll.h" |
a3fb1e28 | 38 | #include "regs.h" |
39 | #include "df.h" | |
2d49f824 | 40 | |
41 | /* This module is used to modify loops with a determinable number of | |
42 | iterations to use special low-overhead looping instructions. | |
43 | ||
44 | It first validates whether the loop is well behaved and has a | |
45 | determinable number of iterations (either at compile or run-time). | |
46 | It then modifies the loop to use a low-overhead looping pattern as | |
47 | follows: | |
48 | ||
49 | 1. A pseudo register is allocated as the loop iteration counter. | |
50 | ||
51 | 2. The number of loop iterations is calculated and is stored | |
52 | in the loop counter. | |
53 | ||
54 | 3. At the end of the loop, the jump insn is replaced by the | |
55 | doloop_end pattern. The compare must remain because it might be | |
56 | used elsewhere. If the loop-variable or condition register are | |
57 | used elsewhere, they will be eliminated by flow. | |
58 | ||
59 | 4. An optional doloop_begin pattern is inserted at the top of the | |
60 | loop. | |
61 | ||
62 | TODO The optimization should only performed when either the biv used for exit | |
63 | condition is unused at all except for the exit test, or if we do not have to | |
64 | change its value, since otherwise we have to add a new induction variable, | |
65 | which usually will not pay up (unless the cost of the doloop pattern is | |
66 | somehow extremely lower than the cost of compare & jump, or unless the bct | |
67 | register cannot be used for anything else but doloop -- ??? detect these | |
68 | cases). */ | |
69 | ||
2d49f824 | 70 | /* Return the loop termination condition for PATTERN or zero |
71 | if it is not a decrement and branch jump insn. */ | |
72 | ||
fc2abfd3 | 73 | rtx |
3a8bb9ce | 74 | doloop_condition_get (rtx_insn *doloop_pat) |
2d49f824 | 75 | { |
76 | rtx cmp; | |
77 | rtx inc; | |
78 | rtx reg; | |
fc2abfd3 | 79 | rtx inc_src; |
2d49f824 | 80 | rtx condition; |
7bfc9b7c | 81 | rtx pattern; |
90c2bcf0 | 82 | rtx cc_reg = NULL_RTX; |
83 | rtx reg_orig = NULL_RTX; | |
2d49f824 | 84 | |
7bfc9b7c | 85 | /* The canonical doloop pattern we expect has one of the following |
86 | forms: | |
2d49f824 | 87 | |
7bfc9b7c | 88 | 1) (parallel [(set (pc) (if_then_else (condition) |
89 | (label_ref (label)) | |
90 | (pc))) | |
91 | (set (reg) (plus (reg) (const_int -1))) | |
92 | (additional clobbers and uses)]) | |
2d49f824 | 93 | |
7bfc9b7c | 94 | The branch must be the first entry of the parallel (also required |
95 | by jump.c), and the second entry of the parallel must be a set of | |
96 | the loop counter register. Some targets (IA-64) wrap the set of | |
97 | the loop counter in an if_then_else too. | |
fc2abfd3 | 98 | |
7bfc9b7c | 99 | 2) (set (reg) (plus (reg) (const_int -1)) |
100 | (set (pc) (if_then_else (reg != 0) | |
101 | (label_ref (label)) | |
90c2bcf0 | 102 | (pc))). |
103 | ||
104 | Some targets (ARM) do the comparison before the branch, as in the | |
105 | following form: | |
106 | ||
107 | 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) | |
108 | (set (reg) (plus (reg) (const_int -1)))]) | |
109 | (set (pc) (if_then_else (cc == NE) | |
110 | (label_ref (label)) | |
111 | (pc))) */ | |
7bfc9b7c | 112 | |
113 | pattern = PATTERN (doloop_pat); | |
2d49f824 | 114 | |
115 | if (GET_CODE (pattern) != PARALLEL) | |
7bfc9b7c | 116 | { |
117 | rtx cond; | |
bf79ca12 | 118 | rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat); |
90c2bcf0 | 119 | rtx cmp_arg1, cmp_arg2; |
120 | rtx cmp_orig; | |
7bfc9b7c | 121 | |
90c2bcf0 | 122 | /* In case the pattern is not PARALLEL we expect two forms |
123 | of doloop which are cases 2) and 3) above: in case 2) the | |
124 | decrement immediately precedes the branch, while in case 3) | |
125 | the compare and decrement instructions immediately precede | |
126 | the branch. */ | |
2d49f824 | 127 | |
9c868118 | 128 | if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) |
7bfc9b7c | 129 | return 0; |
130 | ||
131 | cmp = pattern; | |
90c2bcf0 | 132 | if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) |
133 | { | |
134 | /* The third case: the compare and decrement instructions | |
135 | immediately precede the branch. */ | |
136 | cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); | |
137 | if (GET_CODE (cmp_orig) != SET) | |
138 | return 0; | |
139 | if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) | |
140 | return 0; | |
141 | cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); | |
142 | cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); | |
143 | if (cmp_arg2 != const0_rtx | |
144 | || GET_CODE (cmp_arg1) != PLUS) | |
145 | return 0; | |
146 | reg_orig = XEXP (cmp_arg1, 0); | |
147 | if (XEXP (cmp_arg1, 1) != GEN_INT (-1) | |
148 | || !REG_P (reg_orig)) | |
149 | return 0; | |
150 | cc_reg = SET_DEST (cmp_orig); | |
151 | ||
152 | inc = XVECEXP (PATTERN (prev_insn), 0, 1); | |
153 | } | |
154 | else | |
29dc3c19 | 155 | inc = PATTERN (prev_insn); |
f91625f4 | 156 | if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE) |
157 | { | |
158 | /* We expect the condition to be of the form (reg != 0) */ | |
159 | cond = XEXP (SET_SRC (cmp), 0); | |
160 | if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | |
161 | return 0; | |
162 | } | |
7bfc9b7c | 163 | } |
164 | else | |
165 | { | |
166 | cmp = XVECEXP (pattern, 0, 0); | |
167 | inc = XVECEXP (pattern, 0, 1); | |
168 | } | |
2d49f824 | 169 | |
170 | /* Check for (set (reg) (something)). */ | |
fc2abfd3 | 171 | if (GET_CODE (inc) != SET) |
2d49f824 | 172 | return 0; |
2d49f824 | 173 | reg = SET_DEST (inc); |
fc2abfd3 | 174 | if (! REG_P (reg)) |
175 | return 0; | |
176 | ||
177 | /* Check if something = (plus (reg) (const_int -1)). | |
178 | On IA-64, this decrement is wrapped in an if_then_else. */ | |
179 | inc_src = SET_SRC (inc); | |
180 | if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
181 | inc_src = XEXP (inc_src, 1); | |
182 | if (GET_CODE (inc_src) != PLUS | |
183 | || XEXP (inc_src, 0) != reg | |
184 | || XEXP (inc_src, 1) != constm1_rtx) | |
185 | return 0; | |
2d49f824 | 186 | |
187 | /* Check for (set (pc) (if_then_else (condition) | |
188 | (label_ref (label)) | |
189 | (pc))). */ | |
190 | if (GET_CODE (cmp) != SET | |
191 | || SET_DEST (cmp) != pc_rtx | |
192 | || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
193 | || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
194 | || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
195 | return 0; | |
196 | ||
197 | /* Extract loop termination condition. */ | |
198 | condition = XEXP (SET_SRC (cmp), 0); | |
199 | ||
fc2abfd3 | 200 | /* We expect a GE or NE comparison with 0 or 1. */ |
201 | if ((GET_CODE (condition) != GE | |
202 | && GET_CODE (condition) != NE) | |
203 | || (XEXP (condition, 1) != const0_rtx | |
204 | && XEXP (condition, 1) != const1_rtx)) | |
2d49f824 | 205 | return 0; |
206 | ||
fc2abfd3 | 207 | if ((XEXP (condition, 0) == reg) |
90c2bcf0 | 208 | /* For the third case: */ |
209 | || ((cc_reg != NULL_RTX) | |
210 | && (XEXP (condition, 0) == cc_reg) | |
211 | && (reg_orig == reg)) | |
fc2abfd3 | 212 | || (GET_CODE (XEXP (condition, 0)) == PLUS |
90c2bcf0 | 213 | && XEXP (XEXP (condition, 0), 0) == reg)) |
7bfc9b7c | 214 | { |
215 | if (GET_CODE (pattern) != PARALLEL) | |
90c2bcf0 | 216 | /* For the second form we expect: |
7bfc9b7c | 217 | |
218 | (set (reg) (plus (reg) (const_int -1)) | |
219 | (set (pc) (if_then_else (reg != 0) | |
220 | (label_ref (label)) | |
221 | (pc))). | |
222 | ||
223 | is equivalent to the following: | |
224 | ||
225 | (parallel [(set (pc) (if_then_else (reg != 1) | |
226 | (label_ref (label)) | |
227 | (pc))) | |
228 | (set (reg) (plus (reg) (const_int -1))) | |
229 | (additional clobbers and uses)]) | |
230 | ||
90c2bcf0 | 231 | For the third form we expect: |
232 | ||
233 | (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) | |
234 | (set (reg) (plus (reg) (const_int -1)))]) | |
235 | (set (pc) (if_then_else (cc == NE) | |
236 | (label_ref (label)) | |
237 | (pc))) | |
238 | ||
239 | which is equivalent to the following: | |
240 | ||
241 | (parallel [(set (cc) (compare (reg, 1)) | |
242 | (set (reg) (plus (reg) (const_int -1))) | |
243 | (set (pc) (if_then_else (NE == cc) | |
244 | (label_ref (label)) | |
245 | (pc))))]) | |
246 | ||
247 | So we return the second form instead for the two cases. | |
248 | ||
7bfc9b7c | 249 | */ |
250 | condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | |
251 | ||
2d49f824 | 252 | return condition; |
7bfc9b7c | 253 | } |
2d49f824 | 254 | |
255 | /* ??? If a machine uses a funny comparison, we could return a | |
7bd28bba | 256 | canonicalized form here. */ |
2d49f824 | 257 | |
258 | return 0; | |
259 | } | |
260 | ||
261 | /* Return nonzero if the loop specified by LOOP is suitable for | |
262 | the use of special low-overhead looping instructions. DESC | |
263 | describes the number of iterations of the loop. */ | |
264 | ||
265 | static bool | |
266 | doloop_valid_p (struct loop *loop, struct niter_desc *desc) | |
267 | { | |
268 | basic_block *body = get_loop_body (loop), bb; | |
3d414d5d | 269 | rtx_insn *insn; |
2d49f824 | 270 | unsigned i; |
7caa482e | 271 | bool result = true; |
2d49f824 | 272 | |
273 | /* Check for loops that may not terminate under special conditions. */ | |
274 | if (!desc->simple_p | |
275 | || desc->assumptions | |
276 | || desc->infinite) | |
277 | { | |
278 | /* There are some cases that would require a special attention. | |
279 | For example if the comparison is LEU and the comparison value | |
280 | is UINT_MAX then the loop will not terminate. Similarly, if the | |
281 | comparison code is GEU and the comparison value is 0, the | |
282 | loop will not terminate. | |
283 | ||
284 | If the absolute increment is not 1, the loop can be infinite | |
285 | even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
286 | ||
287 | ??? We could compute these conditions at run-time and have a | |
288 | additional jump around the loop to ensure an infinite loop. | |
289 | However, it is very unlikely that this is the intended | |
290 | behavior of the loop and checking for these rare boundary | |
291 | conditions would pessimize all other code. | |
292 | ||
293 | If the loop is executed only a few times an extra check to | |
294 | restart the loop could use up most of the benefits of using a | |
295 | count register loop. Note however, that normally, this | |
296 | restart branch would never execute, so it could be predicted | |
297 | well by the CPU. We should generate the pessimistic code by | |
298 | default, and have an option, e.g. -funsafe-loops that would | |
299 | enable count-register loops in this case. */ | |
300 | if (dump_file) | |
301 | fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
7caa482e | 302 | result = false; |
303 | goto cleanup; | |
2d49f824 | 304 | } |
305 | ||
306 | for (i = 0; i < loop->num_nodes; i++) | |
307 | { | |
308 | bb = body[i]; | |
309 | ||
310 | for (insn = BB_HEAD (bb); | |
311 | insn != NEXT_INSN (BB_END (bb)); | |
312 | insn = NEXT_INSN (insn)) | |
313 | { | |
7dfa5ce3 | 314 | /* Different targets have different necessities for low-overhead |
315 | looping. Call the back end for each instruction within the loop | |
1606e68a | 316 | to let it decide whether the insn prohibits a low-overhead loop. |
317 | It will then return the cause for it to emit to the dump file. */ | |
318 | const char * invalid = targetm.invalid_within_doloop (insn); | |
319 | if (invalid) | |
320 | { | |
321 | if (dump_file) | |
322 | fprintf (dump_file, "Doloop: %s\n", invalid); | |
7caa482e | 323 | result = false; |
324 | goto cleanup; | |
1606e68a | 325 | } |
2d49f824 | 326 | } |
327 | } | |
7caa482e | 328 | result = true; |
329 | ||
330 | cleanup: | |
2d49f824 | 331 | free (body); |
332 | ||
7caa482e | 333 | return result; |
2d49f824 | 334 | } |
335 | ||
a4c7895e | 336 | /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru |
337 | edge. If the condition is always false, do not do anything. If it is always | |
338 | true, redirect E to DEST and return false. In all other cases, true is | |
339 | returned. */ | |
2d49f824 | 340 | |
a4c7895e | 341 | static bool |
342 | add_test (rtx cond, edge *e, basic_block dest) | |
2d49f824 | 343 | { |
3d414d5d | 344 | rtx_insn *seq, *jump; |
f9a00e9e | 345 | rtx_code_label *label; |
3754d046 | 346 | machine_mode mode; |
2d49f824 | 347 | rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); |
348 | enum rtx_code code = GET_CODE (cond); | |
a4c7895e | 349 | basic_block bb; |
2d49f824 | 350 | |
351 | mode = GET_MODE (XEXP (cond, 0)); | |
352 | if (mode == VOIDmode) | |
353 | mode = GET_MODE (XEXP (cond, 1)); | |
354 | ||
355 | start_sequence (); | |
356 | op0 = force_operand (op0, NULL_RTX); | |
357 | op1 = force_operand (op1, NULL_RTX); | |
358 | label = block_label (dest); | |
f9a00e9e | 359 | do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, -1); |
2d49f824 | 360 | |
361 | jump = get_last_insn (); | |
d6a1bbdb | 362 | if (!jump || !JUMP_P (jump)) |
3c4d0a06 | 363 | { |
a4c7895e | 364 | /* The condition is always false and the jump was optimized out. */ |
365 | end_sequence (); | |
366 | return true; | |
3c4d0a06 | 367 | } |
2d49f824 | 368 | |
369 | seq = get_insns (); | |
43e9cea5 | 370 | unshare_all_rtl_in_chain (seq); |
2d49f824 | 371 | end_sequence (); |
d6a1bbdb | 372 | |
373 | /* There always is at least the jump insn in the sequence. */ | |
374 | gcc_assert (seq != NULL_RTX); | |
375 | ||
88e6f696 | 376 | bb = split_edge_and_insert (*e, seq); |
a4c7895e | 377 | *e = single_succ_edge (bb); |
378 | ||
379 | if (any_uncondjump_p (jump)) | |
380 | { | |
381 | /* The condition is always true. */ | |
382 | delete_insn (jump); | |
383 | redirect_edge_and_branch_force (*e, dest); | |
384 | return false; | |
385 | } | |
48e1416a | 386 | |
a4c7895e | 387 | JUMP_LABEL (jump) = label; |
388 | ||
389 | /* The jump is supposed to handle an unlikely special case. */ | |
9eb946de | 390 | add_int_reg_note (jump, REG_BR_PROB, 0); |
a1ddb869 | 391 | |
a4c7895e | 392 | LABEL_NUSES (label)++; |
393 | ||
394 | make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); | |
395 | return true; | |
2d49f824 | 396 | } |
397 | ||
398 | /* Modify the loop to use the low-overhead looping insn where LOOP | |
399 | describes the loop, DESC describes the number of iterations of the | |
400 | loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
401 | end of the loop. CONDITION is the condition separated from the | |
941f640b | 402 | DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ |
2d49f824 | 403 | |
404 | static void | |
405 | doloop_modify (struct loop *loop, struct niter_desc *desc, | |
4177c695 | 406 | rtx_insn *doloop_seq, rtx condition, rtx count) |
2d49f824 | 407 | { |
408 | rtx counter_reg; | |
cde5a3a0 | 409 | rtx tmp, noloop = NULL_RTX; |
3d414d5d | 410 | rtx_insn *sequence; |
411 | rtx_insn *jump_insn; | |
f9a00e9e | 412 | rtx_code_label *jump_label; |
a4c7895e | 413 | int nonneg = 0; |
2d49f824 | 414 | bool increment_count; |
415 | basic_block loop_end = desc->out_edge->src; | |
3754d046 | 416 | machine_mode mode; |
fa7f4c0c | 417 | rtx true_prob_val; |
5de9d3ed | 418 | widest_int iterations; |
2d49f824 | 419 | |
420 | jump_insn = BB_END (loop_end); | |
421 | ||
422 | if (dump_file) | |
423 | { | |
424 | fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
425 | if (desc->const_iter) | |
f03df321 | 426 | fprintf (dump_file, "%" PRId64, desc->niter); |
2d49f824 | 427 | else |
428 | fputs ("runtime", dump_file); | |
429 | fputs (" iterations).\n", dump_file); | |
430 | } | |
431 | ||
f0b5f617 | 432 | /* Get the probability of the original branch. If it exists we would |
fa7f4c0c | 433 | need to update REG_BR_PROB of the new jump_insn. */ |
434 | true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX); | |
435 | ||
2d49f824 | 436 | /* Discard original jump to continue loop. The original compare |
437 | result may still be live, so it cannot be discarded explicitly. */ | |
438 | delete_insn (jump_insn); | |
439 | ||
440 | counter_reg = XEXP (condition, 0); | |
441 | if (GET_CODE (counter_reg) == PLUS) | |
442 | counter_reg = XEXP (counter_reg, 0); | |
cde5a3a0 | 443 | mode = GET_MODE (counter_reg); |
2d49f824 | 444 | |
2d49f824 | 445 | increment_count = false; |
446 | switch (GET_CODE (condition)) | |
447 | { | |
448 | case NE: | |
449 | /* Currently only NE tests against zero and one are supported. */ | |
972e95af | 450 | noloop = XEXP (condition, 1); |
451 | if (noloop != const0_rtx) | |
2d49f824 | 452 | { |
972e95af | 453 | gcc_assert (noloop == const1_rtx); |
2d49f824 | 454 | increment_count = true; |
2d49f824 | 455 | } |
2d49f824 | 456 | break; |
457 | ||
458 | case GE: | |
459 | /* Currently only GE tests against zero are supported. */ | |
972e95af | 460 | gcc_assert (XEXP (condition, 1) == const0_rtx); |
2d49f824 | 461 | |
462 | noloop = constm1_rtx; | |
463 | ||
464 | /* The iteration count does not need incrementing for a GE test. */ | |
465 | increment_count = false; | |
466 | ||
467 | /* Determine if the iteration counter will be non-negative. | |
468 | Note that the maximum value loaded is iterations_max - 1. */ | |
23b937aa | 469 | if (get_max_loop_iterations (loop, &iterations) |
796b6678 | 470 | && wi::leu_p (iterations, |
cd9b5516 | 471 | wi::set_bit_in_zero <widest_int> |
472 | (GET_MODE_PRECISION (mode) - 1))) | |
2d49f824 | 473 | nonneg = 1; |
474 | break; | |
475 | ||
b690c0a3 | 476 | /* Abort if an invalid doloop pattern has been generated. */ |
2045cdd4 | 477 | default: |
972e95af | 478 | gcc_unreachable (); |
2d49f824 | 479 | } |
480 | ||
481 | if (increment_count) | |
941f640b | 482 | count = simplify_gen_binary (PLUS, mode, count, const1_rtx); |
560a2fa6 | 483 | |
2d49f824 | 484 | /* Insert initialization of the count register into the loop header. */ |
485 | start_sequence (); | |
244e9d34 | 486 | /* count has been already copied through copy_rtx. */ |
487 | reset_used_flags (count); | |
488 | set_used_flags (condition); | |
2d49f824 | 489 | tmp = force_operand (count, counter_reg); |
490 | convert_move (counter_reg, tmp, 1); | |
491 | sequence = get_insns (); | |
244e9d34 | 492 | unshare_all_rtl_in_chain (sequence); |
2d49f824 | 493 | end_sequence (); |
494 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
495 | ||
496 | if (desc->noloop_assumptions) | |
497 | { | |
c30738d0 | 498 | rtx ass = copy_rtx (desc->noloop_assumptions); |
2d49f824 | 499 | basic_block preheader = loop_preheader_edge (loop)->src; |
244e9d34 | 500 | basic_block set_zero = split_edge (loop_preheader_edge (loop)); |
501 | basic_block new_preheader = split_edge (loop_preheader_edge (loop)); | |
2d49f824 | 502 | edge te; |
2d49f824 | 503 | |
504 | /* Expand the condition testing the assumptions and if it does not pass, | |
505 | reset the count register to 0. */ | |
a4c7895e | 506 | redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); |
2d49f824 | 507 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); |
508 | ||
db9cef39 | 509 | set_zero->count = profile_count::uninitialized (); |
2d49f824 | 510 | set_zero->frequency = 0; |
511 | ||
a4c7895e | 512 | te = single_succ_edge (preheader); |
513 | for (; ass; ass = XEXP (ass, 1)) | |
514 | if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
515 | break; | |
516 | ||
517 | if (ass) | |
2d49f824 | 518 | { |
a4c7895e | 519 | /* We reached a condition that is always true. This is very hard to |
520 | reproduce (such a loop does not roll, and thus it would most | |
521 | likely get optimized out by some of the preceding optimizations). | |
522 | In fact, I do not have any testcase for it. However, it would | |
523 | also be very hard to show that it is impossible, so we must | |
524 | handle this case. */ | |
525 | set_zero->count = preheader->count; | |
526 | set_zero->frequency = preheader->frequency; | |
2d49f824 | 527 | } |
48e1416a | 528 | |
a4c7895e | 529 | if (EDGE_COUNT (set_zero->preds) == 0) |
530 | { | |
531 | /* All the conditions were simplified to false, remove the | |
532 | unreachable set_zero block. */ | |
a4c7895e | 533 | delete_basic_block (set_zero); |
534 | } | |
535 | else | |
536 | { | |
537 | /* Reset the counter to zero in the set_zero block. */ | |
538 | start_sequence (); | |
539 | convert_move (counter_reg, noloop, 0); | |
540 | sequence = get_insns (); | |
541 | end_sequence (); | |
542 | emit_insn_after (sequence, BB_END (set_zero)); | |
48e1416a | 543 | |
a4c7895e | 544 | set_immediate_dominator (CDI_DOMINATORS, set_zero, |
3f9439d7 | 545 | recompute_dominator (CDI_DOMINATORS, |
546 | set_zero)); | |
a4c7895e | 547 | } |
548 | ||
549 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
3f9439d7 | 550 | recompute_dominator (CDI_DOMINATORS, |
551 | new_preheader)); | |
2d49f824 | 552 | } |
553 | ||
554 | /* Some targets (eg, C4x) need to initialize special looping | |
555 | registers. */ | |
4177c695 | 556 | if (targetm.have_doloop_begin ()) |
557 | if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq)) | |
558 | emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src)); | |
2d49f824 | 559 | |
560 | /* Insert the new low-overhead looping insn. */ | |
561 | emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
562 | jump_insn = BB_END (loop_end); | |
563 | jump_label = block_label (desc->in_edge->dest); | |
564 | JUMP_LABEL (jump_insn) = jump_label; | |
565 | LABEL_NUSES (jump_label)++; | |
566 | ||
567 | /* Ensure the right fallthru edge is marked, for case we have reversed | |
568 | the condition. */ | |
569 | desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
570 | desc->out_edge->flags |= EDGE_FALLTHRU; | |
571 | ||
572 | /* Add a REG_NONNEG note if the actual or estimated maximum number | |
573 | of iterations is non-negative. */ | |
574 | if (nonneg) | |
a1ddb869 | 575 | add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); |
576 | ||
fa7f4c0c | 577 | /* Update the REG_BR_PROB note. */ |
578 | if (true_prob_val) | |
579 | { | |
580 | /* Seems safer to use the branch probability. */ | |
9eb946de | 581 | add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability); |
fa7f4c0c | 582 | } |
2d49f824 | 583 | } |
584 | ||
a3fb1e28 | 585 | /* Called through note_stores. */ |
586 | ||
587 | static void | |
588 | record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) | |
589 | { | |
590 | bitmap mod = (bitmap)data; | |
591 | if (REG_P (x)) | |
592 | { | |
593 | unsigned int regno = REGNO (x); | |
594 | if (HARD_REGISTER_P (x)) | |
595 | { | |
596 | unsigned int end_regno = end_hard_regno (GET_MODE (x), regno); | |
597 | do | |
598 | bitmap_set_bit (mod, regno); | |
599 | while (++regno < end_regno); | |
600 | } | |
601 | else | |
602 | bitmap_set_bit (mod, regno); | |
603 | } | |
604 | } | |
605 | ||
2d49f824 | 606 | /* Process loop described by LOOP validating that the loop is suitable for |
607 | conversion to use a low overhead looping instruction, replacing the jump | |
608 | insn where suitable. Returns true if the loop was successfully | |
609 | modified. */ | |
610 | ||
611 | static bool | |
612 | doloop_optimize (struct loop *loop) | |
613 | { | |
3754d046 | 614 | machine_mode mode; |
4177c695 | 615 | rtx doloop_reg; |
a04a7bec | 616 | rtx count; |
617 | widest_int iterations, iterations_max; | |
f9a00e9e | 618 | rtx_code_label *start_label; |
2d49f824 | 619 | rtx condition; |
22e0b7be | 620 | unsigned level; |
621 | HOST_WIDE_INT est_niter; | |
9cab63d0 | 622 | int max_cost; |
2d49f824 | 623 | struct niter_desc *desc; |
cde5a3a0 | 624 | unsigned word_mode_size; |
625 | unsigned HOST_WIDE_INT word_mode_max; | |
bbdfcf34 | 626 | int entered_at_top; |
2d49f824 | 627 | |
628 | if (dump_file) | |
629 | fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
630 | ||
631 | iv_analysis_loop_init (loop); | |
632 | ||
633 | /* Find the simple exit of a LOOP. */ | |
634 | desc = get_simple_loop_desc (loop); | |
635 | ||
636 | /* Check that loop is a candidate for a low-overhead looping insn. */ | |
637 | if (!doloop_valid_p (loop, desc)) | |
638 | { | |
639 | if (dump_file) | |
640 | fprintf (dump_file, | |
641 | "Doloop: The loop is not suitable.\n"); | |
642 | return false; | |
643 | } | |
644 | mode = desc->mode; | |
645 | ||
22e0b7be | 646 | est_niter = get_estimated_loop_iterations_int (loop); |
647 | if (est_niter == -1) | |
2630bb74 | 648 | est_niter = get_likely_max_loop_iterations_int (loop); |
22e0b7be | 649 | |
650 | if (est_niter >= 0 && est_niter < 3) | |
2d49f824 | 651 | { |
652 | if (dump_file) | |
653 | fprintf (dump_file, | |
654 | "Doloop: Too few iterations (%u) to be profitable.\n", | |
22e0b7be | 655 | (unsigned int)est_niter); |
2d49f824 | 656 | return false; |
657 | } | |
658 | ||
883bb2bb | 659 | max_cost |
660 | = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); | |
5ae4887d | 661 | if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop)) |
f529eb25 | 662 | > max_cost) |
883bb2bb | 663 | { |
664 | if (dump_file) | |
665 | fprintf (dump_file, | |
2ce55c0b | 666 | "Doloop: number of iterations too costly to compute.\n"); |
883bb2bb | 667 | return false; |
668 | } | |
669 | ||
a04a7bec | 670 | if (desc->const_iter) |
c67875ad | 671 | iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode), |
a04a7bec | 672 | UNSIGNED); |
a9ef9877 | 673 | else |
a04a7bec | 674 | iterations = 0; |
675 | if (!get_max_loop_iterations (loop, &iterations_max)) | |
676 | iterations_max = 0; | |
2d49f824 | 677 | level = get_loop_level (loop) + 1; |
a04a7bec | 678 | entered_at_top = (loop->latch == desc->in_edge->dest |
679 | && contains_no_active_insn_p (loop->latch)); | |
680 | if (!targetm.can_use_doloop_p (iterations, iterations_max, level, | |
681 | entered_at_top)) | |
682 | { | |
683 | if (dump_file) | |
684 | fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n"); | |
685 | return false; | |
686 | } | |
2d49f824 | 687 | |
688 | /* Generate looping insn. If the pattern FAILs then give up trying | |
689 | to modify the loop since there is some aspect the back-end does | |
690 | not like. */ | |
a04a7bec | 691 | count = copy_rtx (desc->niter_expr); |
2d49f824 | 692 | start_label = block_label (desc->in_edge->dest); |
693 | doloop_reg = gen_reg_rtx (mode); | |
4177c695 | 694 | rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
cde5a3a0 | 695 | |
ded805e6 | 696 | word_mode_size = GET_MODE_PRECISION (word_mode); |
244e9d34 | 697 | word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1; |
cde5a3a0 | 698 | if (! doloop_seq |
699 | && mode != word_mode | |
700 | /* Before trying mode different from the one in that # of iterations is | |
701 | computed, we must be sure that the number of iterations fits into | |
702 | the new mode. */ | |
ded805e6 | 703 | && (word_mode_size >= GET_MODE_PRECISION (mode) |
a04a7bec | 704 | || wi::leu_p (iterations_max, word_mode_max))) |
2d49f824 | 705 | { |
ded805e6 | 706 | if (word_mode_size > GET_MODE_PRECISION (mode)) |
a04a7bec | 707 | count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode); |
cde5a3a0 | 708 | else |
a04a7bec | 709 | count = lowpart_subreg (word_mode, count, mode); |
2d49f824 | 710 | PUT_MODE (doloop_reg, word_mode); |
4177c695 | 711 | doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
2d49f824 | 712 | } |
713 | if (! doloop_seq) | |
714 | { | |
715 | if (dump_file) | |
716 | fprintf (dump_file, | |
717 | "Doloop: Target unwilling to use doloop pattern!\n"); | |
718 | return false; | |
719 | } | |
720 | ||
721 | /* If multiple instructions were created, the last must be the | |
4177c695 | 722 | jump instruction. */ |
723 | rtx_insn *doloop_insn = doloop_seq; | |
724 | while (NEXT_INSN (doloop_insn) != NULL_RTX) | |
725 | doloop_insn = NEXT_INSN (doloop_insn); | |
726 | if (!JUMP_P (doloop_insn) | |
727 | || !(condition = doloop_condition_get (doloop_insn))) | |
2d49f824 | 728 | { |
729 | if (dump_file) | |
730 | fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
731 | return false; | |
732 | } | |
733 | ||
a3fb1e28 | 734 | /* Ensure that the new sequence doesn't clobber a register that |
735 | is live at the end of the block. */ | |
736 | { | |
737 | bitmap modified = BITMAP_ALLOC (NULL); | |
738 | ||
739 | for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) | |
740 | note_stores (PATTERN (i), record_reg_sets, modified); | |
741 | ||
742 | basic_block loop_end = desc->out_edge->src; | |
743 | bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); | |
744 | BITMAP_FREE (modified); | |
745 | ||
746 | if (fail) | |
747 | { | |
748 | if (dump_file) | |
749 | fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n"); | |
750 | return false; | |
751 | } | |
752 | } | |
753 | ||
941f640b | 754 | doloop_modify (loop, desc, doloop_seq, condition, count); |
2d49f824 | 755 | return true; |
756 | } | |
757 | ||
7194de72 | 758 | /* This is the main entry point. Process all loops using doloop_optimize. */ |
2d49f824 | 759 | |
760 | void | |
7194de72 | 761 | doloop_optimize_loops (void) |
2d49f824 | 762 | { |
2d49f824 | 763 | struct loop *loop; |
764 | ||
a3fb1e28 | 765 | if (optimize == 1) |
766 | { | |
767 | df_live_add_problem (); | |
768 | df_live_set_all_dirty (); | |
769 | } | |
770 | ||
e9c7354c | 771 | FOR_EACH_LOOP (loop, 0) |
2d49f824 | 772 | { |
2d49f824 | 773 | doloop_optimize (loop); |
774 | } | |
775 | ||
a3fb1e28 | 776 | if (optimize == 1) |
777 | df_remove_problem (df_live); | |
778 | ||
2d49f824 | 779 | iv_analysis_done (); |
780 | ||
382ecba7 | 781 | checking_verify_loop_structure (); |
2d49f824 | 782 | } |