]>
Commit | Line | Data |
---|---|---|
689ba89d | 1 | /* Perform doloop optimizations |
8d9254fc | 2 | Copyright (C) 2004-2020 Free Software Foundation, Inc. |
689ba89d ZD |
3 | Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
689ba89d ZD |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
689ba89d ZD |
20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
c7131fb2 | 24 | #include "backend.h" |
957060b5 | 25 | #include "target.h" |
689ba89d | 26 | #include "rtl.h" |
36566b39 | 27 | #include "tree.h" |
957060b5 | 28 | #include "cfghooks.h" |
4d0cdd0c | 29 | #include "memmodel.h" |
957060b5 | 30 | #include "emit-rtl.h" |
36566b39 | 31 | #include "dojump.h" |
36566b39 | 32 | #include "expr.h" |
689ba89d | 33 | #include "cfgloop.h" |
60393bbc | 34 | #include "cfgrtl.h" |
7ee2468b | 35 | #include "dumpfile.h" |
50684f95 | 36 | #include "loop-unroll.h" |
2a8f3223 RH |
37 | #include "regs.h" |
38 | #include "df.h" | |
689ba89d ZD |
39 | |
40 | /* This module is used to modify loops with a determinable number of | |
41 | iterations to use special low-overhead looping instructions. | |
42 | ||
43 | It first validates whether the loop is well behaved and has a | |
44 | determinable number of iterations (either at compile or run-time). | |
45 | It then modifies the loop to use a low-overhead looping pattern as | |
46 | follows: | |
47 | ||
48 | 1. A pseudo register is allocated as the loop iteration counter. | |
49 | ||
50 | 2. The number of loop iterations is calculated and is stored | |
51 | in the loop counter. | |
52 | ||
53 | 3. At the end of the loop, the jump insn is replaced by the | |
54 | doloop_end pattern. The compare must remain because it might be | |
55 | used elsewhere. If the loop-variable or condition register are | |
56 | used elsewhere, they will be eliminated by flow. | |
57 | ||
58 | 4. An optional doloop_begin pattern is inserted at the top of the | |
59 | loop. | |
60 | ||
61 | TODO The optimization should only performed when either the biv used for exit | |
62 | condition is unused at all except for the exit test, or if we do not have to | |
63 | change its value, since otherwise we have to add a new induction variable, | |
64 | which usually will not pay up (unless the cost of the doloop pattern is | |
65 | somehow extremely lower than the cost of compare & jump, or unless the bct | |
66 | register cannot be used for anything else but doloop -- ??? detect these | |
67 | cases). */ | |
68 | ||
689ba89d ZD |
69 | /* Return the loop termination condition for PATTERN or zero |
70 | if it is not a decrement and branch jump insn. */ | |
71 | ||
75c70254 | 72 | rtx |
30d2ef86 | 73 | doloop_condition_get (rtx_insn *doloop_pat) |
689ba89d ZD |
74 | { |
75 | rtx cmp; | |
76 | rtx inc; | |
77 | rtx reg; | |
75c70254 | 78 | rtx inc_src; |
689ba89d | 79 | rtx condition; |
46dc0789 | 80 | rtx pattern; |
ce7b3761 RE |
81 | rtx cc_reg = NULL_RTX; |
82 | rtx reg_orig = NULL_RTX; | |
689ba89d | 83 | |
46dc0789 MN |
84 | /* The canonical doloop pattern we expect has one of the following |
85 | forms: | |
689ba89d | 86 | |
46dc0789 MN |
87 | 1) (parallel [(set (pc) (if_then_else (condition) |
88 | (label_ref (label)) | |
89 | (pc))) | |
90 | (set (reg) (plus (reg) (const_int -1))) | |
91 | (additional clobbers and uses)]) | |
689ba89d | 92 | |
46dc0789 MN |
93 | The branch must be the first entry of the parallel (also required |
94 | by jump.c), and the second entry of the parallel must be a set of | |
95 | the loop counter register. Some targets (IA-64) wrap the set of | |
96 | the loop counter in an if_then_else too. | |
75c70254 | 97 | |
46dc0789 MN |
98 | 2) (set (reg) (plus (reg) (const_int -1)) |
99 | (set (pc) (if_then_else (reg != 0) | |
100 | (label_ref (label)) | |
ce7b3761 RE |
101 | (pc))). |
102 | ||
103 | Some targets (ARM) do the comparison before the branch, as in the | |
104 | following form: | |
105 | ||
106 | 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) | |
107 | (set (reg) (plus (reg) (const_int -1)))]) | |
108 | (set (pc) (if_then_else (cc == NE) | |
109 | (label_ref (label)) | |
110 | (pc))) */ | |
46dc0789 MN |
111 | |
112 | pattern = PATTERN (doloop_pat); | |
689ba89d ZD |
113 | |
114 | if (GET_CODE (pattern) != PARALLEL) | |
46dc0789 MN |
115 | { |
116 | rtx cond; | |
21afc57d | 117 | rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat); |
ce7b3761 RE |
118 | rtx cmp_arg1, cmp_arg2; |
119 | rtx cmp_orig; | |
46dc0789 | 120 | |
ce7b3761 RE |
121 | /* In case the pattern is not PARALLEL we expect two forms |
122 | of doloop which are cases 2) and 3) above: in case 2) the | |
123 | decrement immediately precedes the branch, while in case 3) | |
124 | the compare and decrement instructions immediately precede | |
125 | the branch. */ | |
689ba89d | 126 | |
0be955e7 | 127 | if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) |
46dc0789 MN |
128 | return 0; |
129 | ||
130 | cmp = pattern; | |
ce7b3761 RE |
131 | if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) |
132 | { | |
133 | /* The third case: the compare and decrement instructions | |
134 | immediately precede the branch. */ | |
135 | cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); | |
136 | if (GET_CODE (cmp_orig) != SET) | |
137 | return 0; | |
138 | if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) | |
139 | return 0; | |
140 | cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); | |
141 | cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); | |
142 | if (cmp_arg2 != const0_rtx | |
143 | || GET_CODE (cmp_arg1) != PLUS) | |
144 | return 0; | |
145 | reg_orig = XEXP (cmp_arg1, 0); | |
146 | if (XEXP (cmp_arg1, 1) != GEN_INT (-1) | |
147 | || !REG_P (reg_orig)) | |
148 | return 0; | |
149 | cc_reg = SET_DEST (cmp_orig); | |
150 | ||
151 | inc = XVECEXP (PATTERN (prev_insn), 0, 1); | |
152 | } | |
153 | else | |
b8abece3 | 154 | inc = PATTERN (prev_insn); |
56010684 JJ |
155 | if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE) |
156 | { | |
157 | /* We expect the condition to be of the form (reg != 0) */ | |
158 | cond = XEXP (SET_SRC (cmp), 0); | |
159 | if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | |
160 | return 0; | |
161 | } | |
46dc0789 MN |
162 | } |
163 | else | |
164 | { | |
165 | cmp = XVECEXP (pattern, 0, 0); | |
166 | inc = XVECEXP (pattern, 0, 1); | |
167 | } | |
689ba89d ZD |
168 | |
169 | /* Check for (set (reg) (something)). */ | |
75c70254 | 170 | if (GET_CODE (inc) != SET) |
689ba89d | 171 | return 0; |
689ba89d | 172 | reg = SET_DEST (inc); |
75c70254 SB |
173 | if (! REG_P (reg)) |
174 | return 0; | |
175 | ||
176 | /* Check if something = (plus (reg) (const_int -1)). | |
177 | On IA-64, this decrement is wrapped in an if_then_else. */ | |
178 | inc_src = SET_SRC (inc); | |
179 | if (GET_CODE (inc_src) == IF_THEN_ELSE) | |
180 | inc_src = XEXP (inc_src, 1); | |
181 | if (GET_CODE (inc_src) != PLUS | |
182 | || XEXP (inc_src, 0) != reg | |
183 | || XEXP (inc_src, 1) != constm1_rtx) | |
184 | return 0; | |
689ba89d ZD |
185 | |
186 | /* Check for (set (pc) (if_then_else (condition) | |
187 | (label_ref (label)) | |
188 | (pc))). */ | |
189 | if (GET_CODE (cmp) != SET | |
190 | || SET_DEST (cmp) != pc_rtx | |
191 | || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE | |
192 | || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF | |
193 | || XEXP (SET_SRC (cmp), 2) != pc_rtx) | |
194 | return 0; | |
195 | ||
196 | /* Extract loop termination condition. */ | |
197 | condition = XEXP (SET_SRC (cmp), 0); | |
198 | ||
75c70254 SB |
199 | /* We expect a GE or NE comparison with 0 or 1. */ |
200 | if ((GET_CODE (condition) != GE | |
201 | && GET_CODE (condition) != NE) | |
202 | || (XEXP (condition, 1) != const0_rtx | |
203 | && XEXP (condition, 1) != const1_rtx)) | |
689ba89d ZD |
204 | return 0; |
205 | ||
75c70254 | 206 | if ((XEXP (condition, 0) == reg) |
ce7b3761 RE |
207 | /* For the third case: */ |
208 | || ((cc_reg != NULL_RTX) | |
209 | && (XEXP (condition, 0) == cc_reg) | |
210 | && (reg_orig == reg)) | |
75c70254 | 211 | || (GET_CODE (XEXP (condition, 0)) == PLUS |
ce7b3761 | 212 | && XEXP (XEXP (condition, 0), 0) == reg)) |
46dc0789 MN |
213 | { |
214 | if (GET_CODE (pattern) != PARALLEL) | |
ce7b3761 | 215 | /* For the second form we expect: |
46dc0789 MN |
216 | |
217 | (set (reg) (plus (reg) (const_int -1)) | |
218 | (set (pc) (if_then_else (reg != 0) | |
219 | (label_ref (label)) | |
220 | (pc))). | |
221 | ||
222 | is equivalent to the following: | |
223 | ||
224 | (parallel [(set (pc) (if_then_else (reg != 1) | |
225 | (label_ref (label)) | |
226 | (pc))) | |
227 | (set (reg) (plus (reg) (const_int -1))) | |
228 | (additional clobbers and uses)]) | |
229 | ||
ce7b3761 RE |
230 | For the third form we expect: |
231 | ||
232 | (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) | |
233 | (set (reg) (plus (reg) (const_int -1)))]) | |
234 | (set (pc) (if_then_else (cc == NE) | |
235 | (label_ref (label)) | |
236 | (pc))) | |
237 | ||
238 | which is equivalent to the following: | |
239 | ||
240 | (parallel [(set (cc) (compare (reg, 1)) | |
241 | (set (reg) (plus (reg) (const_int -1))) | |
242 | (set (pc) (if_then_else (NE == cc) | |
243 | (label_ref (label)) | |
244 | (pc))))]) | |
245 | ||
246 | So we return the second form instead for the two cases. | |
247 | ||
46dc0789 MN |
248 | */ |
249 | condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | |
250 | ||
689ba89d | 251 | return condition; |
46dc0789 | 252 | } |
689ba89d ZD |
253 | |
254 | /* ??? If a machine uses a funny comparison, we could return a | |
1ae58c30 | 255 | canonicalized form here. */ |
689ba89d ZD |
256 | |
257 | return 0; | |
258 | } | |
259 | ||
260 | /* Return nonzero if the loop specified by LOOP is suitable for | |
261 | the use of special low-overhead looping instructions. DESC | |
262 | describes the number of iterations of the loop. */ | |
263 | ||
264 | static bool | |
99b1c316 | 265 | doloop_valid_p (class loop *loop, class niter_desc *desc) |
689ba89d ZD |
266 | { |
267 | basic_block *body = get_loop_body (loop), bb; | |
871eb193 | 268 | rtx_insn *insn; |
689ba89d | 269 | unsigned i; |
7600f094 | 270 | bool result = true; |
689ba89d ZD |
271 | |
272 | /* Check for loops that may not terminate under special conditions. */ | |
273 | if (!desc->simple_p | |
274 | || desc->assumptions | |
275 | || desc->infinite) | |
276 | { | |
277 | /* There are some cases that would require a special attention. | |
278 | For example if the comparison is LEU and the comparison value | |
279 | is UINT_MAX then the loop will not terminate. Similarly, if the | |
280 | comparison code is GEU and the comparison value is 0, the | |
281 | loop will not terminate. | |
282 | ||
283 | If the absolute increment is not 1, the loop can be infinite | |
284 | even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) | |
285 | ||
286 | ??? We could compute these conditions at run-time and have a | |
287 | additional jump around the loop to ensure an infinite loop. | |
288 | However, it is very unlikely that this is the intended | |
289 | behavior of the loop and checking for these rare boundary | |
290 | conditions would pessimize all other code. | |
291 | ||
292 | If the loop is executed only a few times an extra check to | |
293 | restart the loop could use up most of the benefits of using a | |
294 | count register loop. Note however, that normally, this | |
295 | restart branch would never execute, so it could be predicted | |
296 | well by the CPU. We should generate the pessimistic code by | |
297 | default, and have an option, e.g. -funsafe-loops that would | |
298 | enable count-register loops in this case. */ | |
299 | if (dump_file) | |
300 | fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); | |
7600f094 AP |
301 | result = false; |
302 | goto cleanup; | |
689ba89d ZD |
303 | } |
304 | ||
305 | for (i = 0; i < loop->num_nodes; i++) | |
306 | { | |
307 | bb = body[i]; | |
308 | ||
309 | for (insn = BB_HEAD (bb); | |
310 | insn != NEXT_INSN (BB_END (bb)); | |
311 | insn = NEXT_INSN (insn)) | |
312 | { | |
a71a498d AS |
313 | /* Different targets have different necessities for low-overhead |
314 | looping. Call the back end for each instruction within the loop | |
e7e64a25 AS |
315 | to let it decide whether the insn prohibits a low-overhead loop. |
316 | It will then return the cause for it to emit to the dump file. */ | |
317 | const char * invalid = targetm.invalid_within_doloop (insn); | |
318 | if (invalid) | |
319 | { | |
320 | if (dump_file) | |
321 | fprintf (dump_file, "Doloop: %s\n", invalid); | |
7600f094 AP |
322 | result = false; |
323 | goto cleanup; | |
e7e64a25 | 324 | } |
689ba89d ZD |
325 | } |
326 | } | |
7600f094 AP |
327 | result = true; |
328 | ||
329 | cleanup: | |
689ba89d ZD |
330 | free (body); |
331 | ||
7600f094 | 332 | return result; |
689ba89d ZD |
333 | } |
334 | ||
ed541ddb ZD |
335 | /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru |
336 | edge. If the condition is always false, do not do anything. If it is always | |
337 | true, redirect E to DEST and return false. In all other cases, true is | |
338 | returned. */ | |
689ba89d | 339 | |
ed541ddb ZD |
340 | static bool |
341 | add_test (rtx cond, edge *e, basic_block dest) | |
689ba89d | 342 | { |
871eb193 | 343 | rtx_insn *seq, *jump; |
1476d1bd | 344 | rtx_code_label *label; |
ef4bddc2 | 345 | machine_mode mode; |
689ba89d ZD |
346 | rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); |
347 | enum rtx_code code = GET_CODE (cond); | |
ed541ddb | 348 | basic_block bb; |
edfe99a4 JH |
349 | /* The jump is supposed to handle an unlikely special case. */ |
350 | profile_probability prob = profile_probability::guessed_never (); | |
689ba89d ZD |
351 | |
352 | mode = GET_MODE (XEXP (cond, 0)); | |
353 | if (mode == VOIDmode) | |
354 | mode = GET_MODE (XEXP (cond, 1)); | |
355 | ||
356 | start_sequence (); | |
357 | op0 = force_operand (op0, NULL_RTX); | |
358 | op1 = force_operand (op1, NULL_RTX); | |
359 | label = block_label (dest); | |
357067f2 | 360 | do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, |
edfe99a4 | 361 | prob); |
689ba89d ZD |
362 | |
363 | jump = get_last_insn (); | |
7d93d987 | 364 | if (!jump || !JUMP_P (jump)) |
f937df35 | 365 | { |
ed541ddb ZD |
366 | /* The condition is always false and the jump was optimized out. */ |
367 | end_sequence (); | |
368 | return true; | |
f937df35 | 369 | } |
689ba89d ZD |
370 | |
371 | seq = get_insns (); | |
74b4885d | 372 | unshare_all_rtl_in_chain (seq); |
689ba89d | 373 | end_sequence (); |
7d93d987 ZD |
374 | |
375 | /* There always is at least the jump insn in the sequence. */ | |
376 | gcc_assert (seq != NULL_RTX); | |
377 | ||
598ec7bd | 378 | bb = split_edge_and_insert (*e, seq); |
ed541ddb ZD |
379 | *e = single_succ_edge (bb); |
380 | ||
381 | if (any_uncondjump_p (jump)) | |
382 | { | |
383 | /* The condition is always true. */ | |
384 | delete_insn (jump); | |
385 | redirect_edge_and_branch_force (*e, dest); | |
386 | return false; | |
387 | } | |
b8698a0f | 388 | |
ed541ddb ZD |
389 | JUMP_LABEL (jump) = label; |
390 | ||
ed541ddb ZD |
391 | LABEL_NUSES (label)++; |
392 | ||
edfe99a4 JH |
393 | edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); |
394 | e2->probability = prob; | |
edfe99a4 | 395 | (*e)->probability = prob.invert (); |
edfe99a4 | 396 | update_br_prob_note (e2->src); |
ed541ddb | 397 | return true; |
689ba89d ZD |
398 | } |
399 | ||
8a15faa7 XL |
400 | /* Fold (add -1; zero_ext; add +1) operations to zero_ext if not wrapping. i.e: |
401 | ||
402 | 73: r145:SI=r123:DI#0-0x1 | |
403 | 74: r144:DI=zero_extend (r145:SI) | |
404 | 75: r143:DI=r144:DI+0x1 | |
405 | ... | |
406 | 31: r135:CC=cmp (r123:DI,0) | |
407 | 72: {pc={(r143:DI!=0x1)?L70:pc};r143:DI=r143:DI-0x1;...} | |
408 | ||
409 | r123:DI#0-0x1 is param count derived from loop->niter_expr equal to number of | |
410 | loop iterations, if loop iterations expression doesn't overflow, then | |
411 | (zero_extend (r123:DI#0-1))+1 can be simplified to zero_extend. */ | |
412 | ||
413 | static rtx | |
414 | doloop_simplify_count (class loop *loop, scalar_int_mode mode, rtx count) | |
415 | { | |
416 | widest_int iterations; | |
417 | if (GET_CODE (count) == ZERO_EXTEND) | |
418 | { | |
419 | rtx extop0 = XEXP (count, 0); | |
420 | if (GET_CODE (extop0) == PLUS) | |
421 | { | |
422 | rtx addop0 = XEXP (extop0, 0); | |
423 | rtx addop1 = XEXP (extop0, 1); | |
424 | ||
425 | if (get_max_loop_iterations (loop, &iterations) | |
426 | && wi::ltu_p (iterations, GET_MODE_MASK (GET_MODE (addop0))) | |
427 | && addop1 == constm1_rtx) | |
428 | return simplify_gen_unary (ZERO_EXTEND, mode, addop0, | |
429 | GET_MODE (addop0)); | |
430 | } | |
431 | } | |
432 | ||
433 | return simplify_gen_binary (PLUS, mode, count, const1_rtx); | |
434 | } | |
435 | ||
689ba89d ZD |
436 | /* Modify the loop to use the low-overhead looping insn where LOOP |
437 | describes the loop, DESC describes the number of iterations of the | |
438 | loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the | |
439 | end of the loop. CONDITION is the condition separated from the | |
5f2e30ef | 440 | DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ |
689ba89d ZD |
441 | |
442 | static void | |
99b1c316 | 443 | doloop_modify (class loop *loop, class niter_desc *desc, |
89f7b21f | 444 | rtx_insn *doloop_seq, rtx condition, rtx count) |
689ba89d ZD |
445 | { |
446 | rtx counter_reg; | |
a82bbcbb | 447 | rtx tmp, noloop = NULL_RTX; |
871eb193 DM |
448 | rtx_insn *sequence; |
449 | rtx_insn *jump_insn; | |
1476d1bd | 450 | rtx_code_label *jump_label; |
ed541ddb | 451 | int nonneg = 0; |
689ba89d ZD |
452 | bool increment_count; |
453 | basic_block loop_end = desc->out_edge->src; | |
095a2d76 | 454 | scalar_int_mode mode; |
807e902e | 455 | widest_int iterations; |
689ba89d ZD |
456 | |
457 | jump_insn = BB_END (loop_end); | |
458 | ||
459 | if (dump_file) | |
460 | { | |
461 | fprintf (dump_file, "Doloop: Inserting doloop pattern ("); | |
462 | if (desc->const_iter) | |
16998094 | 463 | fprintf (dump_file, "%" PRId64, desc->niter); |
689ba89d ZD |
464 | else |
465 | fputs ("runtime", dump_file); | |
466 | fputs (" iterations).\n", dump_file); | |
467 | } | |
468 | ||
469 | /* Discard original jump to continue loop. The original compare | |
470 | result may still be live, so it cannot be discarded explicitly. */ | |
471 | delete_insn (jump_insn); | |
472 | ||
473 | counter_reg = XEXP (condition, 0); | |
474 | if (GET_CODE (counter_reg) == PLUS) | |
475 | counter_reg = XEXP (counter_reg, 0); | |
c7ad039d RS |
476 | /* These patterns must operate on integer counters. */ |
477 | mode = as_a <scalar_int_mode> (GET_MODE (counter_reg)); | |
689ba89d | 478 | |
689ba89d ZD |
479 | increment_count = false; |
480 | switch (GET_CODE (condition)) | |
481 | { | |
482 | case NE: | |
483 | /* Currently only NE tests against zero and one are supported. */ | |
b5e624c6 NS |
484 | noloop = XEXP (condition, 1); |
485 | if (noloop != const0_rtx) | |
689ba89d | 486 | { |
b5e624c6 | 487 | gcc_assert (noloop == const1_rtx); |
689ba89d | 488 | increment_count = true; |
689ba89d | 489 | } |
689ba89d ZD |
490 | break; |
491 | ||
492 | case GE: | |
493 | /* Currently only GE tests against zero are supported. */ | |
b5e624c6 | 494 | gcc_assert (XEXP (condition, 1) == const0_rtx); |
689ba89d ZD |
495 | |
496 | noloop = constm1_rtx; | |
497 | ||
498 | /* The iteration count does not need incrementing for a GE test. */ | |
499 | increment_count = false; | |
500 | ||
501 | /* Determine if the iteration counter will be non-negative. | |
502 | Note that the maximum value loaded is iterations_max - 1. */ | |
2431114f | 503 | if (get_max_loop_iterations (loop, &iterations) |
807e902e KZ |
504 | && wi::leu_p (iterations, |
505 | wi::set_bit_in_zero <widest_int> | |
506 | (GET_MODE_PRECISION (mode) - 1))) | |
689ba89d ZD |
507 | nonneg = 1; |
508 | break; | |
509 | ||
1c43d3ca | 510 | /* Abort if an invalid doloop pattern has been generated. */ |
8127d0e0 | 511 | default: |
b5e624c6 | 512 | gcc_unreachable (); |
689ba89d ZD |
513 | } |
514 | ||
515 | if (increment_count) | |
8a15faa7 | 516 | count = doloop_simplify_count (loop, mode, count); |
b2a38b1d | 517 | |
689ba89d ZD |
518 | /* Insert initialization of the count register into the loop header. */ |
519 | start_sequence (); | |
e1bcfb92 JJ |
520 | /* count has been already copied through copy_rtx. */ |
521 | reset_used_flags (count); | |
522 | set_used_flags (condition); | |
689ba89d ZD |
523 | tmp = force_operand (count, counter_reg); |
524 | convert_move (counter_reg, tmp, 1); | |
525 | sequence = get_insns (); | |
e1bcfb92 | 526 | unshare_all_rtl_in_chain (sequence); |
689ba89d ZD |
527 | end_sequence (); |
528 | emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); | |
529 | ||
530 | if (desc->noloop_assumptions) | |
531 | { | |
30d3fc60 | 532 | rtx ass = copy_rtx (desc->noloop_assumptions); |
689ba89d | 533 | basic_block preheader = loop_preheader_edge (loop)->src; |
e1bcfb92 JJ |
534 | basic_block set_zero = split_edge (loop_preheader_edge (loop)); |
535 | basic_block new_preheader = split_edge (loop_preheader_edge (loop)); | |
689ba89d | 536 | edge te; |
689ba89d ZD |
537 | |
538 | /* Expand the condition testing the assumptions and if it does not pass, | |
539 | reset the count register to 0. */ | |
ed541ddb | 540 | redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); |
689ba89d ZD |
541 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); |
542 | ||
3995f3a2 | 543 | set_zero->count = profile_count::uninitialized (); |
689ba89d | 544 | |
ed541ddb ZD |
545 | te = single_succ_edge (preheader); |
546 | for (; ass; ass = XEXP (ass, 1)) | |
547 | if (!add_test (XEXP (ass, 0), &te, set_zero)) | |
548 | break; | |
549 | ||
550 | if (ass) | |
689ba89d | 551 | { |
ed541ddb ZD |
552 | /* We reached a condition that is always true. This is very hard to |
553 | reproduce (such a loop does not roll, and thus it would most | |
554 | likely get optimized out by some of the preceding optimizations). | |
555 | In fact, I do not have any testcase for it. However, it would | |
556 | also be very hard to show that it is impossible, so we must | |
557 | handle this case. */ | |
558 | set_zero->count = preheader->count; | |
689ba89d | 559 | } |
b8698a0f | 560 | |
ed541ddb ZD |
561 | if (EDGE_COUNT (set_zero->preds) == 0) |
562 | { | |
563 | /* All the conditions were simplified to false, remove the | |
564 | unreachable set_zero block. */ | |
ed541ddb ZD |
565 | delete_basic_block (set_zero); |
566 | } | |
567 | else | |
568 | { | |
569 | /* Reset the counter to zero in the set_zero block. */ | |
570 | start_sequence (); | |
571 | convert_move (counter_reg, noloop, 0); | |
572 | sequence = get_insns (); | |
573 | end_sequence (); | |
574 | emit_insn_after (sequence, BB_END (set_zero)); | |
b8698a0f | 575 | |
ed541ddb | 576 | set_immediate_dominator (CDI_DOMINATORS, set_zero, |
66f97d31 ZD |
577 | recompute_dominator (CDI_DOMINATORS, |
578 | set_zero)); | |
ed541ddb ZD |
579 | } |
580 | ||
581 | set_immediate_dominator (CDI_DOMINATORS, new_preheader, | |
66f97d31 ZD |
582 | recompute_dominator (CDI_DOMINATORS, |
583 | new_preheader)); | |
689ba89d ZD |
584 | } |
585 | ||
586 | /* Some targets (eg, C4x) need to initialize special looping | |
587 | registers. */ | |
89f7b21f RS |
588 | if (targetm.have_doloop_begin ()) |
589 | if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq)) | |
590 | emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src)); | |
689ba89d ZD |
591 | |
592 | /* Insert the new low-overhead looping insn. */ | |
593 | emit_jump_insn_after (doloop_seq, BB_END (loop_end)); | |
594 | jump_insn = BB_END (loop_end); | |
595 | jump_label = block_label (desc->in_edge->dest); | |
596 | JUMP_LABEL (jump_insn) = jump_label; | |
597 | LABEL_NUSES (jump_label)++; | |
598 | ||
599 | /* Ensure the right fallthru edge is marked, for case we have reversed | |
600 | the condition. */ | |
601 | desc->in_edge->flags &= ~EDGE_FALLTHRU; | |
602 | desc->out_edge->flags |= EDGE_FALLTHRU; | |
603 | ||
604 | /* Add a REG_NONNEG note if the actual or estimated maximum number | |
605 | of iterations is non-negative. */ | |
606 | if (nonneg) | |
65c5f2a6 ILT |
607 | add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); |
608 | ||
80663107 | 609 | /* Update the REG_BR_PROB note. */ |
5fa396ad JH |
610 | if (desc->in_edge->probability.initialized_p ()) |
611 | add_reg_br_prob_note (jump_insn, desc->in_edge->probability); | |
689ba89d ZD |
612 | } |
613 | ||
2a8f3223 RH |
614 | /* Called through note_stores. */ |
615 | ||
616 | static void | |
617 | record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) | |
618 | { | |
619 | bitmap mod = (bitmap)data; | |
620 | if (REG_P (x)) | |
621 | { | |
622 | unsigned int regno = REGNO (x); | |
623 | if (HARD_REGISTER_P (x)) | |
624 | { | |
625 | unsigned int end_regno = end_hard_regno (GET_MODE (x), regno); | |
626 | do | |
627 | bitmap_set_bit (mod, regno); | |
628 | while (++regno < end_regno); | |
629 | } | |
630 | else | |
631 | bitmap_set_bit (mod, regno); | |
632 | } | |
633 | } | |
634 | ||
689ba89d ZD |
635 | /* Process loop described by LOOP validating that the loop is suitable for |
636 | conversion to use a low overhead looping instruction, replacing the jump | |
637 | insn where suitable. Returns true if the loop was successfully | |
638 | modified. */ | |
639 | ||
640 | static bool | |
99b1c316 | 641 | doloop_optimize (class loop *loop) |
689ba89d | 642 | { |
095a2d76 | 643 | scalar_int_mode mode; |
89f7b21f | 644 | rtx doloop_reg; |
1d0216c8 | 645 | rtx count; |
807e902e | 646 | widest_int iterations, iterations_max; |
1476d1bd | 647 | rtx_code_label *start_label; |
689ba89d | 648 | rtx condition; |
216e8374 JH |
649 | unsigned level; |
650 | HOST_WIDE_INT est_niter; | |
7607bdda | 651 | int max_cost; |
99b1c316 | 652 | class niter_desc *desc; |
a82bbcbb ZD |
653 | unsigned word_mode_size; |
654 | unsigned HOST_WIDE_INT word_mode_max; | |
2407343c | 655 | int entered_at_top; |
689ba89d ZD |
656 | |
657 | if (dump_file) | |
658 | fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); | |
659 | ||
660 | iv_analysis_loop_init (loop); | |
661 | ||
662 | /* Find the simple exit of a LOOP. */ | |
663 | desc = get_simple_loop_desc (loop); | |
664 | ||
665 | /* Check that loop is a candidate for a low-overhead looping insn. */ | |
666 | if (!doloop_valid_p (loop, desc)) | |
667 | { | |
668 | if (dump_file) | |
669 | fprintf (dump_file, | |
670 | "Doloop: The loop is not suitable.\n"); | |
671 | return false; | |
672 | } | |
673 | mode = desc->mode; | |
674 | ||
216e8374 JH |
675 | est_niter = get_estimated_loop_iterations_int (loop); |
676 | if (est_niter == -1) | |
ae7a7472 | 677 | est_niter = get_likely_max_loop_iterations_int (loop); |
216e8374 JH |
678 | |
679 | if (est_niter >= 0 && est_niter < 3) | |
689ba89d ZD |
680 | { |
681 | if (dump_file) | |
682 | fprintf (dump_file, | |
683 | "Doloop: Too few iterations (%u) to be profitable.\n", | |
216e8374 | 684 | (unsigned int)est_niter); |
689ba89d ZD |
685 | return false; |
686 | } | |
687 | ||
45b9a14b | 688 | max_cost |
028d4092 | 689 | = COSTS_N_INSNS (param_max_iterations_computation_cost); |
e548c9df | 690 | if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop)) |
f40751dd | 691 | > max_cost) |
45b9a14b BS |
692 | { |
693 | if (dump_file) | |
694 | fprintf (dump_file, | |
405f0587 | 695 | "Doloop: number of iterations too costly to compute.\n"); |
45b9a14b BS |
696 | return false; |
697 | } | |
698 | ||
1d0216c8 | 699 | if (desc->const_iter) |
f079167a | 700 | iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode), |
807e902e | 701 | UNSIGNED); |
e3a8f1fa | 702 | else |
807e902e | 703 | iterations = 0; |
1d0216c8 | 704 | if (!get_max_loop_iterations (loop, &iterations_max)) |
807e902e | 705 | iterations_max = 0; |
689ba89d | 706 | level = get_loop_level (loop) + 1; |
1d0216c8 RS |
707 | entered_at_top = (loop->latch == desc->in_edge->dest |
708 | && contains_no_active_insn_p (loop->latch)); | |
709 | if (!targetm.can_use_doloop_p (iterations, iterations_max, level, | |
710 | entered_at_top)) | |
711 | { | |
712 | if (dump_file) | |
713 | fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n"); | |
714 | return false; | |
715 | } | |
689ba89d ZD |
716 | |
717 | /* Generate looping insn. If the pattern FAILs then give up trying | |
718 | to modify the loop since there is some aspect the back-end does | |
719 | not like. */ | |
1d0216c8 | 720 | count = copy_rtx (desc->niter_expr); |
689ba89d ZD |
721 | start_label = block_label (desc->in_edge->dest); |
722 | doloop_reg = gen_reg_rtx (mode); | |
89f7b21f | 723 | rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
a82bbcbb | 724 | |
5511bc5a | 725 | word_mode_size = GET_MODE_PRECISION (word_mode); |
e1bcfb92 | 726 | word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1; |
a82bbcbb ZD |
727 | if (! doloop_seq |
728 | && mode != word_mode | |
729 | /* Before trying mode different from the one in that # of iterations is | |
730 | computed, we must be sure that the number of iterations fits into | |
731 | the new mode. */ | |
5511bc5a | 732 | && (word_mode_size >= GET_MODE_PRECISION (mode) |
807e902e | 733 | || wi::leu_p (iterations_max, word_mode_max))) |
689ba89d | 734 | { |
5511bc5a | 735 | if (word_mode_size > GET_MODE_PRECISION (mode)) |
1d0216c8 | 736 | count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode); |
a82bbcbb | 737 | else |
1d0216c8 | 738 | count = lowpart_subreg (word_mode, count, mode); |
689ba89d | 739 | PUT_MODE (doloop_reg, word_mode); |
89f7b21f | 740 | doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label); |
689ba89d ZD |
741 | } |
742 | if (! doloop_seq) | |
743 | { | |
744 | if (dump_file) | |
745 | fprintf (dump_file, | |
746 | "Doloop: Target unwilling to use doloop pattern!\n"); | |
747 | return false; | |
748 | } | |
749 | ||
750 | /* If multiple instructions were created, the last must be the | |
89f7b21f RS |
751 | jump instruction. */ |
752 | rtx_insn *doloop_insn = doloop_seq; | |
753 | while (NEXT_INSN (doloop_insn) != NULL_RTX) | |
754 | doloop_insn = NEXT_INSN (doloop_insn); | |
755 | if (!JUMP_P (doloop_insn) | |
756 | || !(condition = doloop_condition_get (doloop_insn))) | |
689ba89d ZD |
757 | { |
758 | if (dump_file) | |
759 | fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); | |
760 | return false; | |
761 | } | |
762 | ||
2a8f3223 RH |
763 | /* Ensure that the new sequence doesn't clobber a register that |
764 | is live at the end of the block. */ | |
765 | { | |
766 | bitmap modified = BITMAP_ALLOC (NULL); | |
767 | ||
768 | for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) | |
e8448ba5 | 769 | note_stores (i, record_reg_sets, modified); |
2a8f3223 RH |
770 | |
771 | basic_block loop_end = desc->out_edge->src; | |
772 | bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); | |
773 | BITMAP_FREE (modified); | |
774 | ||
775 | if (fail) | |
776 | { | |
777 | if (dump_file) | |
778 | fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n"); | |
779 | return false; | |
780 | } | |
781 | } | |
782 | ||
5f2e30ef | 783 | doloop_modify (loop, desc, doloop_seq, condition, count); |
689ba89d ZD |
784 | return true; |
785 | } | |
786 | ||
d73be268 | 787 | /* This is the main entry point. Process all loops using doloop_optimize. */ |
689ba89d ZD |
788 | |
789 | void | |
d73be268 | 790 | doloop_optimize_loops (void) |
689ba89d | 791 | { |
99b1c316 | 792 | class loop *loop; |
689ba89d | 793 | |
2a8f3223 RH |
794 | if (optimize == 1) |
795 | { | |
796 | df_live_add_problem (); | |
797 | df_live_set_all_dirty (); | |
798 | } | |
799 | ||
fdbbed34 | 800 | FOR_EACH_LOOP (loop, 0) |
689ba89d | 801 | { |
689ba89d ZD |
802 | doloop_optimize (loop); |
803 | } | |
804 | ||
2a8f3223 RH |
805 | if (optimize == 1) |
806 | df_remove_problem (df_live); | |
807 | ||
689ba89d ZD |
808 | iv_analysis_done (); |
809 | ||
b2b29377 | 810 | checking_verify_loop_structure (); |
689ba89d | 811 | } |