]>
Commit | Line | Data |
---|---|---|
cdbd2a92 | 1 | /* Try to unroll loops, and split induction variables. |
ac0c7fb1 | 2 | Copyright (C) 1992, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002 |
6da6f625 | 3 | Free Software Foundation, Inc. |
cdbd2a92 | 4 | Contributed by James E. Wilson, Cygnus Support/UC Berkeley. |
5 | ||
f12b58b3 | 6 | This file is part of GCC. |
cdbd2a92 | 7 | |
f12b58b3 | 8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 2, or (at your option) any later | |
11 | version. | |
cdbd2a92 | 12 | |
f12b58b3 | 13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
cdbd2a92 | 17 | |
18 | You should have received a copy of the GNU General Public License | |
f12b58b3 | 19 | along with GCC; see the file COPYING. If not, write to the Free |
20 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
21 | 02111-1307, USA. */ | |
cdbd2a92 | 22 | |
23 | /* Try to unroll a loop, and split induction variables. | |
24 | ||
25 | Loops for which the number of iterations can be calculated exactly are | |
26 | handled specially. If the number of iterations times the insn_count is | |
27 | less than MAX_UNROLLED_INSNS, then the loop is unrolled completely. | |
28 | Otherwise, we try to unroll the loop a number of times modulo the number | |
29 | of iterations, so that only one exit test will be needed. It is unrolled | |
30 | a number of times approximately equal to MAX_UNROLLED_INSNS divided by | |
31 | the insn count. | |
32 | ||
33 | Otherwise, if the number of iterations can be calculated exactly at | |
34 | run time, and the loop is always entered at the top, then we try to | |
35 | precondition the loop. That is, at run time, calculate how many times | |
36 | the loop will execute, and then execute the loop body a few times so | |
37 | that the remaining iterations will be some multiple of 4 (or 2 if the | |
38 | loop is large). Then fall through to a loop unrolled 4 (or 2) times, | |
39 | with only one exit test needed at the end of the loop. | |
40 | ||
41 | Otherwise, if the number of iterations can not be calculated exactly, | |
42 | not even at run time, then we still unroll the loop a number of times | |
43 | approximately equal to MAX_UNROLLED_INSNS divided by the insn count, | |
44 | but there must be an exit test after each copy of the loop body. | |
45 | ||
46 | For each induction variable, which is dead outside the loop (replaceable) | |
47 | or for which we can easily calculate the final value, if we can easily | |
48 | calculate its value at each place where it is set as a function of the | |
49 | current loop unroll count and the variable's value at loop entry, then | |
50 | the induction variable is split into `N' different variables, one for | |
51 | each copy of the loop body. One variable is live across the backward | |
52 | branch, and the others are all calculated as a function of this variable. | |
53 | This helps eliminate data dependencies, and leads to further opportunities | |
54 | for cse. */ | |
55 | ||
56 | /* Possible improvements follow: */ | |
57 | ||
58 | /* ??? Add an extra pass somewhere to determine whether unrolling will | |
59 | give any benefit. E.g. after generating all unrolled insns, compute the | |
60 | cost of all insns and compare against cost of insns in rolled loop. | |
61 | ||
62 | - On traditional architectures, unrolling a non-constant bound loop | |
63 | is a win if there is a giv whose only use is in memory addresses, the | |
4bbea254 | 64 | memory addresses can be split, and hence giv increments can be |
cdbd2a92 | 65 | eliminated. |
66 | - It is also a win if the loop is executed many times, and preconditioning | |
67 | can be performed for the loop. | |
68 | Add code to check for these and similar cases. */ | |
69 | ||
70 | /* ??? Improve control of which loops get unrolled. Could use profiling | |
71 | info to only unroll the most commonly executed loops. Perhaps have | |
72 | a user specifyable option to control the amount of code expansion, | |
73 | or the percent of loops to consider for unrolling. Etc. */ | |
74 | ||
75 | /* ??? Look at the register copies inside the loop to see if they form a | |
76 | simple permutation. If so, iterate the permutation until it gets back to | |
77 | the start state. This is how many times we should unroll the loop, for | |
78 | best results, because then all register copies can be eliminated. | |
79 | For example, the lisp nreverse function should be unrolled 3 times | |
80 | while (this) | |
81 | { | |
82 | next = this->cdr; | |
83 | this->cdr = prev; | |
84 | prev = this; | |
85 | this = next; | |
86 | } | |
87 | ||
88 | ??? The number of times to unroll the loop may also be based on data | |
89 | references in the loop. For example, if we have a loop that references | |
90 | x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */ | |
91 | ||
92 | /* ??? Add some simple linear equation solving capability so that we can | |
93 | determine the number of loop iterations for more complex loops. | |
94 | For example, consider this loop from gdb | |
95 | #define SWAP_TARGET_AND_HOST(buffer,len) | |
96 | { | |
97 | char tmp; | |
98 | char *p = (char *) buffer; | |
99 | char *q = ((char *) buffer) + len - 1; | |
100 | int iterations = (len + 1) >> 1; | |
101 | int i; | |
102 | for (p; p < q; p++, q--;) | |
9058b363 | 103 | { |
104 | tmp = *q; | |
105 | *q = *p; | |
106 | *p = tmp; | |
107 | } | |
cdbd2a92 | 108 | } |
109 | Note that: | |
110 | start value = p = &buffer + current_iteration | |
111 | end value = q = &buffer + len - 1 - current_iteration | |
112 | Given the loop exit test of "p < q", then there must be "q - p" iterations, | |
113 | set equal to zero and solve for number of iterations: | |
114 | q - p = len - 1 - 2*current_iteration = 0 | |
115 | current_iteration = (len - 1) / 2 | |
116 | Hence, there are (len - 1) / 2 (rounded up to the nearest integer) | |
117 | iterations of this loop. */ | |
118 | ||
119 | /* ??? Currently, no labels are marked as loop invariant when doing loop | |
120 | unrolling. This is because an insn inside the loop, that loads the address | |
121 | of a label inside the loop into a register, could be moved outside the loop | |
122 | by the invariant code motion pass if labels were invariant. If the loop | |
123 | is subsequently unrolled, the code will be wrong because each unrolled | |
124 | body of the loop will use the same address, whereas each actually needs a | |
125 | different address. A case where this happens is when a loop containing | |
126 | a switch statement is unrolled. | |
127 | ||
128 | It would be better to let labels be considered invariant. When we | |
129 | unroll loops here, check to see if any insns using a label local to the | |
130 | loop were moved before the loop. If so, then correct the problem, by | |
131 | moving the insn back into the loop, or perhaps replicate the insn before | |
132 | the loop, one copy for each time the loop is unrolled. */ | |
133 | ||
45a72df8 | 134 | #include "config.h" |
135 | #include "system.h" | |
805e22b2 | 136 | #include "coretypes.h" |
137 | #include "tm.h" | |
45a72df8 | 138 | #include "rtl.h" |
139 | #include "tm_p.h" | |
140 | #include "insn-config.h" | |
141 | #include "integrate.h" | |
142 | #include "regs.h" | |
143 | #include "recog.h" | |
144 | #include "flags.h" | |
145 | #include "function.h" | |
146 | #include "expr.h" | |
147 | #include "loop.h" | |
148 | #include "toplev.h" | |
149 | #include "hard-reg-set.h" | |
150 | #include "basic-block.h" | |
151 | #include "predict.h" | |
0b11ae2e | 152 | #include "params.h" |
45a72df8 | 153 | |
cdbd2a92 | 154 | /* The prime factors looked for when trying to unroll a loop by some |
155 | number which is modulo the total number of iterations. Just checking | |
156 | for these 4 prime factors will find at least one factor for 75% of | |
157 | all numbers theoretically. Practically speaking, this will succeed | |
158 | almost all of the time since loops are generally a multiple of 2 | |
159 | and/or 5. */ | |
160 | ||
161 | #define NUM_FACTORS 4 | |
162 | ||
33b57a7b | 163 | static struct _factor { const int factor; int count; } |
6da6f625 | 164 | factors[NUM_FACTORS] = { {2, 0}, {3, 0}, {5, 0}, {7, 0}}; |
9058b363 | 165 | |
cdbd2a92 | 166 | /* Describes the different types of loop unrolling performed. */ |
167 | ||
6da6f625 | 168 | enum unroll_types |
169 | { | |
170 | UNROLL_COMPLETELY, | |
171 | UNROLL_MODULO, | |
172 | UNROLL_NAIVE | |
173 | }; | |
cdbd2a92 | 174 | |
f712a0dc | 175 | /* Indexed by register number, if nonzero, then it contains a pointer |
cdbd2a92 | 176 | to a struct induction for a DEST_REG giv which has been combined with |
177 | one of more address givs. This is needed because whenever such a DEST_REG | |
178 | giv is modified, we must modify the value of all split address givs | |
179 | that were combined with this DEST_REG giv. */ | |
180 | ||
181 | static struct induction **addr_combined_regs; | |
182 | ||
183 | /* Indexed by register number, if this is a splittable induction variable, | |
184 | then this will hold the current value of the register, which depends on the | |
185 | iteration number. */ | |
186 | ||
187 | static rtx *splittable_regs; | |
188 | ||
189 | /* Indexed by register number, if this is a splittable induction variable, | |
190 | then this will hold the number of instructions in the loop that modify | |
191 | the induction variable. Used to ensure that only the last insn modifying | |
192 | a split iv will update the original iv of the dest. */ | |
193 | ||
194 | static int *splittable_regs_updates; | |
195 | ||
cdbd2a92 | 196 | /* Forward declarations. */ |
197 | ||
fff101de | 198 | static rtx simplify_cmp_and_jump_insns PARAMS ((enum rtx_code, |
199 | enum machine_mode, | |
200 | rtx, rtx, rtx)); | |
e0909be9 | 201 | static void init_reg_map PARAMS ((struct inline_remap *, int)); |
02e7a332 | 202 | static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int)); |
e0909be9 | 203 | static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *)); |
59e339e9 | 204 | static void final_reg_note_copy PARAMS ((rtx *, struct inline_remap *)); |
8ec5f078 | 205 | static void copy_loop_body PARAMS ((struct loop *, rtx, rtx, |
206 | struct inline_remap *, rtx, int, | |
207 | enum unroll_types, rtx, rtx, rtx, rtx)); | |
15fc3eb7 | 208 | static int find_splittable_regs PARAMS ((const struct loop *, |
89e8d34f | 209 | enum unroll_types, int)); |
6da6f625 | 210 | static int find_splittable_givs PARAMS ((const struct loop *, |
15fc3eb7 | 211 | struct iv_class *, enum unroll_types, |
212 | rtx, int)); | |
213 | static int reg_dead_after_loop PARAMS ((const struct loop *, rtx)); | |
e0909be9 | 214 | static rtx fold_rtx_mult_add PARAMS ((rtx, rtx, rtx, enum machine_mode)); |
8ec5f078 | 215 | static rtx remap_split_bivs PARAMS ((struct loop *, rtx)); |
e0909be9 | 216 | static rtx find_common_reg_term PARAMS ((rtx, rtx)); |
217 | static rtx subtract_reg_term PARAMS ((rtx, rtx)); | |
15fc3eb7 | 218 | static rtx loop_find_equiv_value PARAMS ((const struct loop *, rtx)); |
3ff5e1f7 | 219 | static rtx ujump_to_loop_cont PARAMS ((rtx, rtx)); |
cdbd2a92 | 220 | |
221 | /* Try to unroll one loop and split induction variables in the loop. | |
222 | ||
15fc3eb7 | 223 | The loop is described by the arguments LOOP and INSN_COUNT. |
89e8d34f | 224 | STRENGTH_REDUCTION_P indicates whether information generated in the |
225 | strength reduction pass is available. | |
cdbd2a92 | 226 | |
227 | This function is intended to be called from within `strength_reduce' | |
228 | in loop.c. */ | |
229 | ||
230 | void | |
89e8d34f | 231 | unroll_loop (loop, insn_count, strength_reduce_p) |
ec7d7ef9 | 232 | struct loop *loop; |
cdbd2a92 | 233 | int insn_count; |
cdbd2a92 | 234 | int strength_reduce_p; |
235 | { | |
8ec5f078 | 236 | struct loop_info *loop_info = LOOP_INFO (loop); |
237 | struct loop_ivs *ivs = LOOP_IVS (loop); | |
abdd0549 | 238 | int i, j; |
02e7a332 | 239 | unsigned int r; |
abdd0549 | 240 | unsigned HOST_WIDE_INT temp; |
29768226 | 241 | int unroll_number = 1; |
cdbd2a92 | 242 | rtx copy_start, copy_end; |
7bd36830 | 243 | rtx insn, sequence, pattern, tem; |
cdbd2a92 | 244 | int max_labelno, max_insnno; |
245 | rtx insert_before; | |
246 | struct inline_remap *map; | |
df9f2bb6 | 247 | char *local_label = NULL; |
d14771e1 | 248 | char *local_regno; |
02e7a332 | 249 | unsigned int max_local_regnum; |
250 | unsigned int maxregnum; | |
cdbd2a92 | 251 | rtx exit_label = 0; |
252 | rtx start_label; | |
253 | struct iv_class *bl; | |
cdbd2a92 | 254 | int splitting_not_safe = 0; |
641b112b | 255 | enum unroll_types unroll_type = UNROLL_NAIVE; |
cdbd2a92 | 256 | int loop_preconditioned = 0; |
257 | rtx safety_label; | |
258 | /* This points to the last real insn in the loop, which should be either | |
259 | a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional | |
260 | jumps). */ | |
261 | rtx last_loop_insn; | |
ec7d7ef9 | 262 | rtx loop_start = loop->start; |
263 | rtx loop_end = loop->end; | |
cdbd2a92 | 264 | |
265 | /* Don't bother unrolling huge loops. Since the minimum factor is | |
266 | two, loops greater than one half of MAX_UNROLLED_INSNS will never | |
267 | be unrolled. */ | |
268 | if (insn_count > MAX_UNROLLED_INSNS / 2) | |
269 | { | |
270 | if (loop_dump_stream) | |
271 | fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n"); | |
272 | return; | |
273 | } | |
274 | ||
cdbd2a92 | 275 | /* Determine type of unroll to perform. Depends on the number of iterations |
276 | and the size of the loop. */ | |
277 | ||
ec5d911a | 278 | /* If there is no strength reduce info, then set |
279 | loop_info->n_iterations to zero. This can happen if | |
280 | strength_reduce can't find any bivs in the loop. A value of zero | |
281 | indicates that the number of iterations could not be calculated. */ | |
cdbd2a92 | 282 | |
283 | if (! strength_reduce_p) | |
ec5d911a | 284 | loop_info->n_iterations = 0; |
cdbd2a92 | 285 | |
ec5d911a | 286 | if (loop_dump_stream && loop_info->n_iterations > 0) |
77d71bdb | 287 | { |
288 | fputs ("Loop unrolling: ", loop_dump_stream); | |
9058b363 | 289 | fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, |
ec5d911a | 290 | loop_info->n_iterations); |
77d71bdb | 291 | fputs (" iterations.\n", loop_dump_stream); |
292 | } | |
cdbd2a92 | 293 | |
294 | /* Find and save a pointer to the last nonnote insn in the loop. */ | |
295 | ||
296 | last_loop_insn = prev_nonnote_insn (loop_end); | |
297 | ||
298 | /* Calculate how many times to unroll the loop. Indicate whether or | |
299 | not the loop is being completely unrolled. */ | |
300 | ||
ec5d911a | 301 | if (loop_info->n_iterations == 1) |
cdbd2a92 | 302 | { |
3ff5e1f7 | 303 | /* Handle the case where the loop begins with an unconditional |
304 | jump to the loop condition. Make sure to delete the jump | |
305 | insn, otherwise the loop body will never execute. */ | |
306 | ||
307 | rtx ujump = ujump_to_loop_cont (loop->start, loop->cont); | |
308 | if (ujump) | |
e4bf866d | 309 | delete_related_insns (ujump); |
6da6f625 | 310 | |
cdbd2a92 | 311 | /* If number of iterations is exactly 1, then eliminate the compare and |
312 | branch at the end of the loop since they will never be taken. | |
313 | Then return, since no other action is needed here. */ | |
314 | ||
315 | /* If the last instruction is not a BARRIER or a JUMP_INSN, then | |
316 | don't do anything. */ | |
317 | ||
318 | if (GET_CODE (last_loop_insn) == BARRIER) | |
319 | { | |
320 | /* Delete the jump insn. This will delete the barrier also. */ | |
e4bf866d | 321 | delete_related_insns (PREV_INSN (last_loop_insn)); |
cdbd2a92 | 322 | } |
323 | else if (GET_CODE (last_loop_insn) == JUMP_INSN) | |
324 | { | |
9b7006ea | 325 | #ifdef HAVE_cc0 |
1a29b174 | 326 | rtx prev = PREV_INSN (last_loop_insn); |
9b7006ea | 327 | #endif |
e4bf866d | 328 | delete_related_insns (last_loop_insn); |
cdbd2a92 | 329 | #ifdef HAVE_cc0 |
1a29b174 | 330 | /* The immediately preceding insn may be a compare which must be |
cdbd2a92 | 331 | deleted. */ |
2dcd83ba | 332 | if (only_sets_cc0_p (prev)) |
e4bf866d | 333 | delete_related_insns (prev); |
cdbd2a92 | 334 | #endif |
335 | } | |
b21ba441 | 336 | |
337 | /* Remove the loop notes since this is no longer a loop. */ | |
ec7d7ef9 | 338 | if (loop->vtop) |
e4bf866d | 339 | delete_related_insns (loop->vtop); |
ec7d7ef9 | 340 | if (loop->cont) |
e4bf866d | 341 | delete_related_insns (loop->cont); |
b21ba441 | 342 | if (loop_start) |
e4bf866d | 343 | delete_related_insns (loop_start); |
b21ba441 | 344 | if (loop_end) |
e4bf866d | 345 | delete_related_insns (loop_end); |
b21ba441 | 346 | |
cdbd2a92 | 347 | return; |
348 | } | |
ec5d911a | 349 | else if (loop_info->n_iterations > 0 |
addcf1fa | 350 | /* Avoid overflow in the next expression. */ |
be58ed1d | 351 | && loop_info->n_iterations < (unsigned) MAX_UNROLLED_INSNS |
352 | && loop_info->n_iterations * insn_count < (unsigned) MAX_UNROLLED_INSNS) | |
cdbd2a92 | 353 | { |
ec5d911a | 354 | unroll_number = loop_info->n_iterations; |
cdbd2a92 | 355 | unroll_type = UNROLL_COMPLETELY; |
356 | } | |
ec5d911a | 357 | else if (loop_info->n_iterations > 0) |
cdbd2a92 | 358 | { |
359 | /* Try to factor the number of iterations. Don't bother with the | |
360 | general case, only using 2, 3, 5, and 7 will get 75% of all | |
361 | numbers theoretically, and almost all in practice. */ | |
362 | ||
363 | for (i = 0; i < NUM_FACTORS; i++) | |
364 | factors[i].count = 0; | |
365 | ||
ec5d911a | 366 | temp = loop_info->n_iterations; |
cdbd2a92 | 367 | for (i = NUM_FACTORS - 1; i >= 0; i--) |
368 | while (temp % factors[i].factor == 0) | |
369 | { | |
370 | factors[i].count++; | |
371 | temp = temp / factors[i].factor; | |
372 | } | |
373 | ||
374 | /* Start with the larger factors first so that we generally | |
375 | get lots of unrolling. */ | |
376 | ||
377 | unroll_number = 1; | |
378 | temp = insn_count; | |
379 | for (i = 3; i >= 0; i--) | |
380 | while (factors[i].count--) | |
381 | { | |
be58ed1d | 382 | if (temp * factors[i].factor < (unsigned) MAX_UNROLLED_INSNS) |
cdbd2a92 | 383 | { |
384 | unroll_number *= factors[i].factor; | |
385 | temp *= factors[i].factor; | |
386 | } | |
387 | else | |
388 | break; | |
389 | } | |
390 | ||
391 | /* If we couldn't find any factors, then unroll as in the normal | |
392 | case. */ | |
393 | if (unroll_number == 1) | |
394 | { | |
395 | if (loop_dump_stream) | |
6da6f625 | 396 | fprintf (loop_dump_stream, "Loop unrolling: No factors found.\n"); |
cdbd2a92 | 397 | } |
398 | else | |
399 | unroll_type = UNROLL_MODULO; | |
400 | } | |
401 | ||
cdbd2a92 | 402 | /* Default case, calculate number of times to unroll loop based on its |
403 | size. */ | |
641b112b | 404 | if (unroll_type == UNROLL_NAIVE) |
cdbd2a92 | 405 | { |
406 | if (8 * insn_count < MAX_UNROLLED_INSNS) | |
407 | unroll_number = 8; | |
408 | else if (4 * insn_count < MAX_UNROLLED_INSNS) | |
409 | unroll_number = 4; | |
410 | else | |
411 | unroll_number = 2; | |
cdbd2a92 | 412 | } |
413 | ||
414 | /* Now we know how many times to unroll the loop. */ | |
415 | ||
416 | if (loop_dump_stream) | |
6da6f625 | 417 | fprintf (loop_dump_stream, "Unrolling loop %d times.\n", unroll_number); |
cdbd2a92 | 418 | |
419 | if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO) | |
420 | { | |
03cb2aac | 421 | /* Loops of these types can start with jump down to the exit condition |
422 | in rare circumstances. | |
423 | ||
424 | Consider a pair of nested loops where the inner loop is part | |
425 | of the exit code for the outer loop. | |
426 | ||
427 | In this case jump.c will not duplicate the exit test for the outer | |
428 | loop, so it will start with a jump to the exit code. | |
429 | ||
430 | Then consider if the inner loop turns out to iterate once and | |
431 | only once. We will end up deleting the jumps associated with | |
432 | the inner loop. However, the loop notes are not removed from | |
433 | the instruction stream. | |
434 | ||
435 | And finally assume that we can compute the number of iterations | |
436 | for the outer loop. | |
437 | ||
438 | In this case unroll may want to unroll the outer loop even though | |
439 | it starts with a jump to the outer loop's exit code. | |
440 | ||
441 | We could try to optimize this case, but it hardly seems worth it. | |
442 | Just return without unrolling the loop in such cases. */ | |
443 | ||
cdbd2a92 | 444 | insn = loop_start; |
445 | while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN) | |
446 | insn = NEXT_INSN (insn); | |
447 | if (GET_CODE (insn) == JUMP_INSN) | |
03cb2aac | 448 | return; |
cdbd2a92 | 449 | } |
450 | ||
451 | if (unroll_type == UNROLL_COMPLETELY) | |
452 | { | |
453 | /* Completely unrolling the loop: Delete the compare and branch at | |
454 | the end (the last two instructions). This delete must done at the | |
455 | very end of loop unrolling, to avoid problems with calls to | |
456 | back_branch_in_range_p, which is called by find_splittable_regs. | |
457 | All increments of splittable bivs/givs are changed to load constant | |
458 | instructions. */ | |
459 | ||
460 | copy_start = loop_start; | |
461 | ||
462 | /* Set insert_before to the instruction immediately after the JUMP_INSN | |
463 | (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of | |
464 | the loop will be correctly handled by copy_loop_body. */ | |
465 | insert_before = NEXT_INSN (last_loop_insn); | |
466 | ||
467 | /* Set copy_end to the insn before the jump at the end of the loop. */ | |
468 | if (GET_CODE (last_loop_insn) == BARRIER) | |
469 | copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); | |
470 | else if (GET_CODE (last_loop_insn) == JUMP_INSN) | |
471 | { | |
1a29b174 | 472 | copy_end = PREV_INSN (last_loop_insn); |
cdbd2a92 | 473 | #ifdef HAVE_cc0 |
1a29b174 | 474 | /* The instruction immediately before the JUMP_INSN may be a compare |
cdbd2a92 | 475 | instruction which we do not want to copy. */ |
1a29b174 | 476 | if (sets_cc0_p (PREV_INSN (copy_end))) |
477 | copy_end = PREV_INSN (copy_end); | |
cdbd2a92 | 478 | #endif |
479 | } | |
480 | else | |
481 | { | |
482 | /* We currently can't unroll a loop if it doesn't end with a | |
483 | JUMP_INSN. There would need to be a mechanism that recognizes | |
484 | this case, and then inserts a jump after each loop body, which | |
485 | jumps to after the last loop body. */ | |
486 | if (loop_dump_stream) | |
487 | fprintf (loop_dump_stream, | |
488 | "Unrolling failure: loop does not end with a JUMP_INSN.\n"); | |
489 | return; | |
490 | } | |
491 | } | |
492 | else if (unroll_type == UNROLL_MODULO) | |
493 | { | |
494 | /* Partially unrolling the loop: The compare and branch at the end | |
495 | (the last two instructions) must remain. Don't copy the compare | |
496 | and branch instructions at the end of the loop. Insert the unrolled | |
497 | code immediately before the compare/branch at the end so that the | |
498 | code will fall through to them as before. */ | |
499 | ||
500 | copy_start = loop_start; | |
501 | ||
502 | /* Set insert_before to the jump insn at the end of the loop. | |
503 | Set copy_end to before the jump insn at the end of the loop. */ | |
504 | if (GET_CODE (last_loop_insn) == BARRIER) | |
505 | { | |
506 | insert_before = PREV_INSN (last_loop_insn); | |
507 | copy_end = PREV_INSN (insert_before); | |
508 | } | |
509 | else if (GET_CODE (last_loop_insn) == JUMP_INSN) | |
510 | { | |
1a29b174 | 511 | insert_before = last_loop_insn; |
cdbd2a92 | 512 | #ifdef HAVE_cc0 |
1a29b174 | 513 | /* The instruction immediately before the JUMP_INSN may be a compare |
cdbd2a92 | 514 | instruction which we do not want to copy or delete. */ |
1a29b174 | 515 | if (sets_cc0_p (PREV_INSN (insert_before))) |
516 | insert_before = PREV_INSN (insert_before); | |
cdbd2a92 | 517 | #endif |
1a29b174 | 518 | copy_end = PREV_INSN (insert_before); |
cdbd2a92 | 519 | } |
520 | else | |
521 | { | |
522 | /* We currently can't unroll a loop if it doesn't end with a | |
523 | JUMP_INSN. There would need to be a mechanism that recognizes | |
524 | this case, and then inserts a jump after each loop body, which | |
525 | jumps to after the last loop body. */ | |
526 | if (loop_dump_stream) | |
527 | fprintf (loop_dump_stream, | |
528 | "Unrolling failure: loop does not end with a JUMP_INSN.\n"); | |
529 | return; | |
530 | } | |
531 | } | |
532 | else | |
533 | { | |
534 | /* Normal case: Must copy the compare and branch instructions at the | |
535 | end of the loop. */ | |
536 | ||
537 | if (GET_CODE (last_loop_insn) == BARRIER) | |
538 | { | |
539 | /* Loop ends with an unconditional jump and a barrier. | |
540 | Handle this like above, don't copy jump and barrier. | |
541 | This is not strictly necessary, but doing so prevents generating | |
542 | unconditional jumps to an immediately following label. | |
543 | ||
544 | This will be corrected below if the target of this jump is | |
545 | not the start_label. */ | |
546 | ||
547 | insert_before = PREV_INSN (last_loop_insn); | |
548 | copy_end = PREV_INSN (insert_before); | |
549 | } | |
550 | else if (GET_CODE (last_loop_insn) == JUMP_INSN) | |
551 | { | |
552 | /* Set insert_before to immediately after the JUMP_INSN, so that | |
553 | NOTEs at the end of the loop will be correctly handled by | |
554 | copy_loop_body. */ | |
555 | insert_before = NEXT_INSN (last_loop_insn); | |
556 | copy_end = last_loop_insn; | |
557 | } | |
558 | else | |
559 | { | |
560 | /* We currently can't unroll a loop if it doesn't end with a | |
561 | JUMP_INSN. There would need to be a mechanism that recognizes | |
562 | this case, and then inserts a jump after each loop body, which | |
563 | jumps to after the last loop body. */ | |
564 | if (loop_dump_stream) | |
565 | fprintf (loop_dump_stream, | |
566 | "Unrolling failure: loop does not end with a JUMP_INSN.\n"); | |
567 | return; | |
568 | } | |
569 | ||
570 | /* If copying exit test branches because they can not be eliminated, | |
571 | then must convert the fall through case of the branch to a jump past | |
572 | the end of the loop. Create a label to emit after the loop and save | |
573 | it for later use. Do not use the label after the loop, if any, since | |
574 | it might be used by insns outside the loop, or there might be insns | |
575 | added before it later by final_[bg]iv_value which must be after | |
576 | the real exit label. */ | |
577 | exit_label = gen_label_rtx (); | |
578 | ||
579 | insn = loop_start; | |
580 | while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN) | |
581 | insn = NEXT_INSN (insn); | |
582 | ||
583 | if (GET_CODE (insn) == JUMP_INSN) | |
584 | { | |
585 | /* The loop starts with a jump down to the exit condition test. | |
586 | Start copying the loop after the barrier following this | |
587 | jump insn. */ | |
588 | copy_start = NEXT_INSN (insn); | |
589 | ||
590 | /* Splitting induction variables doesn't work when the loop is | |
591 | entered via a jump to the bottom, because then we end up doing | |
592 | a comparison against a new register for a split variable, but | |
593 | we did not execute the set insn for the new register because | |
594 | it was skipped over. */ | |
595 | splitting_not_safe = 1; | |
596 | if (loop_dump_stream) | |
597 | fprintf (loop_dump_stream, | |
598 | "Splitting not safe, because loop not entered at top.\n"); | |
599 | } | |
600 | else | |
601 | copy_start = loop_start; | |
602 | } | |
603 | ||
604 | /* This should always be the first label in the loop. */ | |
605 | start_label = NEXT_INSN (copy_start); | |
606 | /* There may be a line number note and/or a loop continue note here. */ | |
607 | while (GET_CODE (start_label) == NOTE) | |
608 | start_label = NEXT_INSN (start_label); | |
609 | if (GET_CODE (start_label) != CODE_LABEL) | |
610 | { | |
611 | /* This can happen as a result of jump threading. If the first insns in | |
612 | the loop test the same condition as the loop's backward jump, or the | |
613 | opposite condition, then the backward jump will be modified to point | |
614 | to elsewhere, and the loop's start label is deleted. | |
615 | ||
616 | This case currently can not be handled by the loop unrolling code. */ | |
617 | ||
618 | if (loop_dump_stream) | |
619 | fprintf (loop_dump_stream, | |
620 | "Unrolling failure: unknown insns between BEG note and loop label.\n"); | |
621 | return; | |
622 | } | |
4529e7a1 | 623 | if (LABEL_NAME (start_label)) |
624 | { | |
625 | /* The jump optimization pass must have combined the original start label | |
626 | with a named label for a goto. We can't unroll this case because | |
627 | jumps which go to the named label must be handled differently than | |
628 | jumps to the loop start, and it is impossible to differentiate them | |
629 | in this case. */ | |
630 | if (loop_dump_stream) | |
631 | fprintf (loop_dump_stream, | |
632 | "Unrolling failure: loop start label is gone\n"); | |
633 | return; | |
634 | } | |
cdbd2a92 | 635 | |
636 | if (unroll_type == UNROLL_NAIVE | |
637 | && GET_CODE (last_loop_insn) == BARRIER | |
c2e94b25 | 638 | && GET_CODE (PREV_INSN (last_loop_insn)) == JUMP_INSN |
cdbd2a92 | 639 | && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn))) |
640 | { | |
641 | /* In this case, we must copy the jump and barrier, because they will | |
642 | not be converted to jumps to an immediately following label. */ | |
643 | ||
644 | insert_before = NEXT_INSN (last_loop_insn); | |
645 | copy_end = last_loop_insn; | |
646 | } | |
647 | ||
3cd052ae | 648 | if (unroll_type == UNROLL_NAIVE |
649 | && GET_CODE (last_loop_insn) == JUMP_INSN | |
650 | && start_label != JUMP_LABEL (last_loop_insn)) | |
651 | { | |
652 | /* ??? The loop ends with a conditional branch that does not branch back | |
653 | to the loop start label. In this case, we must emit an unconditional | |
654 | branch to the loop exit after emitting the final branch. | |
655 | copy_loop_body does not have support for this currently, so we | |
656 | give up. It doesn't seem worthwhile to unroll anyways since | |
657 | unrolling would increase the number of branch instructions | |
658 | executed. */ | |
659 | if (loop_dump_stream) | |
660 | fprintf (loop_dump_stream, | |
661 | "Unrolling failure: final conditional branch not to loop start\n"); | |
662 | return; | |
663 | } | |
664 | ||
cdbd2a92 | 665 | /* Allocate a translation table for the labels and insn numbers. |
666 | They will be filled in as we copy the insns in the loop. */ | |
667 | ||
668 | max_labelno = max_label_num (); | |
669 | max_insnno = get_max_uid (); | |
670 | ||
84a259b4 | 671 | /* Various paths through the unroll code may reach the "egress" label |
672 | without initializing fields within the map structure. | |
cdbd2a92 | 673 | |
84a259b4 | 674 | To be safe, we use xcalloc to zero the memory. */ |
675 | map = (struct inline_remap *) xcalloc (1, sizeof (struct inline_remap)); | |
b9d0ebd3 | 676 | |
cdbd2a92 | 677 | /* Allocate the label map. */ |
678 | ||
679 | if (max_labelno > 0) | |
680 | { | |
0458bddc | 681 | map->label_map = (rtx *) xcalloc (max_labelno, sizeof (rtx)); |
8b861be4 | 682 | local_label = (char *) xcalloc (max_labelno, sizeof (char)); |
cdbd2a92 | 683 | } |
cdbd2a92 | 684 | |
685 | /* Search the loop and mark all local labels, i.e. the ones which have to | |
686 | be distinct labels when copied. For all labels which might be | |
687 | non-local, set their label_map entries to point to themselves. | |
688 | If they happen to be local their label_map entries will be overwritten | |
689 | before the loop body is copied. The label_map entries for local labels | |
690 | will be set to a different value each time the loop body is copied. */ | |
691 | ||
692 | for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn)) | |
693 | { | |
322b2436 | 694 | rtx note; |
695 | ||
cdbd2a92 | 696 | if (GET_CODE (insn) == CODE_LABEL) |
697 | local_label[CODE_LABEL_NUMBER (insn)] = 1; | |
698 | else if (GET_CODE (insn) == JUMP_INSN) | |
699 | { | |
700 | if (JUMP_LABEL (insn)) | |
c3dfa4e8 | 701 | set_label_in_map (map, |
702 | CODE_LABEL_NUMBER (JUMP_LABEL (insn)), | |
703 | JUMP_LABEL (insn)); | |
cdbd2a92 | 704 | else if (GET_CODE (PATTERN (insn)) == ADDR_VEC |
705 | || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) | |
706 | { | |
707 | rtx pat = PATTERN (insn); | |
708 | int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC; | |
709 | int len = XVECLEN (pat, diff_vec_p); | |
710 | rtx label; | |
711 | ||
712 | for (i = 0; i < len; i++) | |
713 | { | |
714 | label = XEXP (XVECEXP (pat, diff_vec_p, i), 0); | |
6da6f625 | 715 | set_label_in_map (map, CODE_LABEL_NUMBER (label), label); |
cdbd2a92 | 716 | } |
717 | } | |
718 | } | |
d3ff0f75 | 719 | if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX))) |
322b2436 | 720 | set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)), |
721 | XEXP (note, 0)); | |
cdbd2a92 | 722 | } |
723 | ||
724 | /* Allocate space for the insn map. */ | |
725 | ||
8b861be4 | 726 | map->insn_map = (rtx *) xmalloc (max_insnno * sizeof (rtx)); |
cdbd2a92 | 727 | |
728 | /* Set this to zero, to indicate that we are doing loop unrolling, | |
729 | not function inlining. */ | |
730 | map->inline_target = 0; | |
731 | ||
732 | /* The register and constant maps depend on the number of registers | |
733 | present, so the final maps can't be created until after | |
734 | find_splittable_regs is called. However, they are needed for | |
735 | preconditioning, so we create temporary maps when preconditioning | |
736 | is performed. */ | |
737 | ||
738 | /* The preconditioning code may allocate two new pseudo registers. */ | |
739 | maxregnum = max_reg_num (); | |
740 | ||
90006a38 | 741 | /* local_regno is only valid for regnos < max_local_regnum. */ |
742 | max_local_regnum = maxregnum; | |
743 | ||
cdbd2a92 | 744 | /* Allocate and zero out the splittable_regs and addr_combined_regs |
745 | arrays. These must be zeroed here because they will be used if | |
746 | loop preconditioning is performed, and must be zero for that case. | |
747 | ||
748 | It is safe to do this here, since the extra registers created by the | |
749 | preconditioning code and find_splittable_regs will never be used | |
4bbea254 | 750 | to access the splittable_regs[] and addr_combined_regs[] arrays. */ |
cdbd2a92 | 751 | |
8b861be4 | 752 | splittable_regs = (rtx *) xcalloc (maxregnum, sizeof (rtx)); |
8b861be4 | 753 | splittable_regs_updates = (int *) xcalloc (maxregnum, sizeof (int)); |
cdbd2a92 | 754 | addr_combined_regs |
8b861be4 | 755 | = (struct induction **) xcalloc (maxregnum, sizeof (struct induction *)); |
756 | local_regno = (char *) xcalloc (maxregnum, sizeof (char)); | |
d14771e1 | 757 | |
758 | /* Mark all local registers, i.e. the ones which are referenced only | |
bbef98ff | 759 | inside the loop. */ |
760 | if (INSN_UID (copy_end) < max_uid_for_loop) | |
c7c7572a | 761 | { |
762 | int copy_start_luid = INSN_LUID (copy_start); | |
763 | int copy_end_luid = INSN_LUID (copy_end); | |
d14771e1 | 764 | |
c7c7572a | 765 | /* If a register is used in the jump insn, we must not duplicate it |
766 | since it will also be used outside the loop. */ | |
767 | if (GET_CODE (copy_end) == JUMP_INSN) | |
768 | copy_end_luid--; | |
8a040e41 | 769 | |
c7c7572a | 770 | /* If we have a target that uses cc0, then we also must not duplicate |
1a29b174 | 771 | the insn that sets cc0 before the jump insn, if one is present. */ |
8a040e41 | 772 | #ifdef HAVE_cc0 |
6da6f625 | 773 | if (GET_CODE (copy_end) == JUMP_INSN |
774 | && sets_cc0_p (PREV_INSN (copy_end))) | |
c7c7572a | 775 | copy_end_luid--; |
8a040e41 | 776 | #endif |
777 | ||
c7c7572a | 778 | /* If copy_start points to the NOTE that starts the loop, then we must |
779 | use the next luid, because invariant pseudo-regs moved out of the loop | |
780 | have their lifetimes modified to start here, but they are not safe | |
781 | to duplicate. */ | |
782 | if (copy_start == loop_start) | |
783 | copy_start_luid++; | |
784 | ||
785 | /* If a pseudo's lifetime is entirely contained within this loop, then we | |
786 | can use a different pseudo in each unrolled copy of the loop. This | |
787 | results in better code. */ | |
788 | /* We must limit the generic test to max_reg_before_loop, because only | |
789 | these pseudo registers have valid regno_first_uid info. */ | |
02e7a332 | 790 | for (r = FIRST_PSEUDO_REGISTER; r < max_reg_before_loop; ++r) |
791 | if (REGNO_FIRST_UID (r) > 0 && REGNO_FIRST_UID (r) <= max_uid_for_loop | |
23e52523 | 792 | && REGNO_FIRST_LUID (r) >= copy_start_luid |
02e7a332 | 793 | && REGNO_LAST_UID (r) > 0 && REGNO_LAST_UID (r) <= max_uid_for_loop |
23e52523 | 794 | && REGNO_LAST_LUID (r) <= copy_end_luid) |
c7c7572a | 795 | { |
796 | /* However, we must also check for loop-carried dependencies. | |
797 | If the value the pseudo has at the end of iteration X is | |
798 | used by iteration X+1, then we can not use a different pseudo | |
799 | for each unrolled copy of the loop. */ | |
800 | /* A pseudo is safe if regno_first_uid is a set, and this | |
801 | set dominates all instructions from regno_first_uid to | |
802 | regno_last_uid. */ | |
803 | /* ??? This check is simplistic. We would get better code if | |
804 | this check was more sophisticated. */ | |
02e7a332 | 805 | if (set_dominates_use (r, REGNO_FIRST_UID (r), REGNO_LAST_UID (r), |
c7c7572a | 806 | copy_start, copy_end)) |
02e7a332 | 807 | local_regno[r] = 1; |
c7c7572a | 808 | |
809 | if (loop_dump_stream) | |
810 | { | |
02e7a332 | 811 | if (local_regno[r]) |
812 | fprintf (loop_dump_stream, "Marked reg %d as local\n", r); | |
c7c7572a | 813 | else |
814 | fprintf (loop_dump_stream, "Did not mark reg %d as local\n", | |
02e7a332 | 815 | r); |
c7c7572a | 816 | } |
817 | } | |
c7c7572a | 818 | } |
cdbd2a92 | 819 | |
820 | /* If this loop requires exit tests when unrolled, check to see if we | |
821 | can precondition the loop so as to make the exit tests unnecessary. | |
822 | Just like variable splitting, this is not safe if the loop is entered | |
823 | via a jump to the bottom. Also, can not do this if no strength | |
824 | reduce info, because precondition_loop_p uses this info. */ | |
825 | ||
826 | /* Must copy the loop body for preconditioning before the following | |
827 | find_splittable_regs call since that will emit insns which need to | |
828 | be after the preconditioned loop copies, but immediately before the | |
829 | unrolled loop copies. */ | |
830 | ||
831 | /* Also, it is not safe to split induction variables for the preconditioned | |
832 | copies of the loop body. If we split induction variables, then the code | |
833 | assumes that each induction variable can be represented as a function | |
834 | of its initial value and the loop iteration number. This is not true | |
835 | in this case, because the last preconditioned copy of the loop body | |
836 | could be any iteration from the first up to the `unroll_number-1'th, | |
837 | depending on the initial value of the iteration variable. Therefore | |
838 | we can not split induction variables here, because we can not calculate | |
839 | their value. Hence, this code must occur before find_splittable_regs | |
840 | is called. */ | |
841 | ||
842 | if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p) | |
843 | { | |
844 | rtx initial_value, final_value, increment; | |
7bb29472 | 845 | enum machine_mode mode; |
cdbd2a92 | 846 | |
15fc3eb7 | 847 | if (precondition_loop_p (loop, |
7bb29472 | 848 | &initial_value, &final_value, &increment, |
849 | &mode)) | |
cdbd2a92 | 850 | { |
fff101de | 851 | rtx diff, insn; |
cdbd2a92 | 852 | rtx *labels; |
853 | int abs_inc, neg_inc; | |
0cb3e18b | 854 | enum rtx_code cc = loop_info->comparison_code; |
855 | int less_p = (cc == LE || cc == LEU || cc == LT || cc == LTU); | |
856 | int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU); | |
cdbd2a92 | 857 | |
8b861be4 | 858 | map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx)); |
cdbd2a92 | 859 | |
433c8fce | 860 | VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum, |
addcf1fa | 861 | "unroll_loop_precondition"); |
433c8fce | 862 | global_const_equiv_varray = map->const_equiv_varray; |
cdbd2a92 | 863 | |
864 | init_reg_map (map, maxregnum); | |
865 | ||
866 | /* Limit loop unrolling to 4, since this will make 7 copies of | |
867 | the loop body. */ | |
868 | if (unroll_number > 4) | |
869 | unroll_number = 4; | |
870 | ||
871 | /* Save the absolute value of the increment, and also whether or | |
872 | not it is negative. */ | |
873 | neg_inc = 0; | |
874 | abs_inc = INTVAL (increment); | |
875 | if (abs_inc < 0) | |
876 | { | |
6da6f625 | 877 | abs_inc = -abs_inc; |
cdbd2a92 | 878 | neg_inc = 1; |
879 | } | |
880 | ||
881 | start_sequence (); | |
882 | ||
fff101de | 883 | /* We must copy the final and initial values here to avoid |
884 | improperly shared rtl. */ | |
885 | final_value = copy_rtx (final_value); | |
886 | initial_value = copy_rtx (initial_value); | |
887 | ||
4aa863f8 | 888 | /* Final value may have form of (PLUS val1 const1_rtx). We need |
889 | to convert it into general operand, so compute the real value. */ | |
890 | ||
fff101de | 891 | final_value = force_operand (final_value, NULL_RTX); |
4aa863f8 | 892 | if (!nonmemory_operand (final_value, VOIDmode)) |
fff101de | 893 | final_value = force_reg (mode, final_value); |
4aa863f8 | 894 | |
cdbd2a92 | 895 | /* Calculate the difference between the final and initial values. |
896 | Final value may be a (plus (reg x) (const_int 1)) rtx. | |
0cb3e18b | 897 | |
898 | We have to deal with for (i = 0; --i < 6;) type loops. | |
899 | For such loops the real final value is the first time the | |
900 | loop variable overflows, so the diff we calculate is the | |
901 | distance from the overflow value. This is 0 or ~0 for | |
902 | unsigned loops depending on the direction, or INT_MAX, | |
903 | INT_MAX+1 for signed loops. We really do not need the | |
904 | exact value, since we are only interested in the diff | |
905 | modulo the increment, and the increment is a power of 2, | |
906 | so we can pretend that the overflow value is 0/~0. */ | |
907 | ||
908 | if (cc == NE || less_p != neg_inc) | |
fff101de | 909 | diff = simplify_gen_binary (MINUS, mode, final_value, |
910 | initial_value); | |
0cb3e18b | 911 | else |
fff101de | 912 | diff = simplify_gen_unary (neg_inc ? NOT : NEG, mode, |
913 | initial_value, mode); | |
914 | diff = force_operand (diff, NULL_RTX); | |
cdbd2a92 | 915 | |
916 | /* Now calculate (diff % (unroll * abs (increment))) by using an | |
917 | and instruction. */ | |
fff101de | 918 | diff = simplify_gen_binary (AND, mode, diff, |
919 | GEN_INT (unroll_number*abs_inc - 1)); | |
920 | diff = force_operand (diff, NULL_RTX); | |
cdbd2a92 | 921 | |
922 | /* Now emit a sequence of branches to jump to the proper precond | |
923 | loop entry point. */ | |
924 | ||
8b861be4 | 925 | labels = (rtx *) xmalloc (sizeof (rtx) * unroll_number); |
cdbd2a92 | 926 | for (i = 0; i < unroll_number; i++) |
927 | labels[i] = gen_label_rtx (); | |
928 | ||
22a93b8e | 929 | /* Check for the case where the initial value is greater than or |
930 | equal to the final value. In that case, we want to execute | |
931 | exactly one loop iteration. The code below will fail for this | |
932 | case. This check does not apply if the loop has a NE | |
933 | comparison at the end. */ | |
4e66cb23 | 934 | |
0cb3e18b | 935 | if (cc != NE) |
22a93b8e | 936 | { |
0cb3e18b | 937 | rtx incremented_initval; |
fff101de | 938 | enum rtx_code cmp_code; |
939 | ||
940 | incremented_initval | |
941 | = simplify_gen_binary (PLUS, mode, initial_value, increment); | |
942 | incremented_initval | |
943 | = force_operand (incremented_initval, NULL_RTX); | |
944 | ||
945 | cmp_code = (less_p | |
946 | ? (unsigned_p ? GEU : GE) | |
947 | : (unsigned_p ? LEU : LE)); | |
948 | ||
949 | insn = simplify_cmp_and_jump_insns (cmp_code, mode, | |
950 | incremented_initval, | |
951 | final_value, labels[1]); | |
952 | if (insn) | |
953 | predict_insn_def (insn, PRED_LOOP_CONDITION, TAKEN); | |
22a93b8e | 954 | } |
4e66cb23 | 955 | |
cdbd2a92 | 956 | /* Assuming the unroll_number is 4, and the increment is 2, then |
957 | for a negative increment: for a positive increment: | |
958 | diff = 0,1 precond 0 diff = 0,7 precond 0 | |
959 | diff = 2,3 precond 3 diff = 1,2 precond 1 | |
960 | diff = 4,5 precond 2 diff = 3,4 precond 2 | |
961 | diff = 6,7 precond 1 diff = 5,6 precond 3 */ | |
962 | ||
963 | /* We only need to emit (unroll_number - 1) branches here, the | |
964 | last case just falls through to the following code. */ | |
965 | ||
966 | /* ??? This would give better code if we emitted a tree of branches | |
967 | instead of the current linear list of branches. */ | |
968 | ||
969 | for (i = 0; i < unroll_number - 1; i++) | |
970 | { | |
971 | int cmp_const; | |
4e66cb23 | 972 | enum rtx_code cmp_code; |
cdbd2a92 | 973 | |
974 | /* For negative increments, must invert the constant compared | |
975 | against, except when comparing against zero. */ | |
976 | if (i == 0) | |
4e66cb23 | 977 | { |
978 | cmp_const = 0; | |
979 | cmp_code = EQ; | |
980 | } | |
cdbd2a92 | 981 | else if (neg_inc) |
4e66cb23 | 982 | { |
983 | cmp_const = unroll_number - i; | |
984 | cmp_code = GE; | |
985 | } | |
cdbd2a92 | 986 | else |
4e66cb23 | 987 | { |
988 | cmp_const = i; | |
989 | cmp_code = LE; | |
990 | } | |
cdbd2a92 | 991 | |
fff101de | 992 | insn = simplify_cmp_and_jump_insns (cmp_code, mode, diff, |
993 | GEN_INT (abs_inc*cmp_const), | |
994 | labels[i]); | |
995 | if (insn) | |
996 | predict_insn (insn, PRED_LOOP_PRECONDITIONING, | |
997 | REG_BR_PROB_BASE / (unroll_number - i)); | |
cdbd2a92 | 998 | } |
999 | ||
1000 | /* If the increment is greater than one, then we need another branch, | |
1001 | to handle other cases equivalent to 0. */ | |
1002 | ||
1003 | /* ??? This should be merged into the code above somehow to help | |
1004 | simplify the code here, and reduce the number of branches emitted. | |
1005 | For the negative increment case, the branch here could easily | |
1006 | be merged with the `0' case branch above. For the positive | |
1007 | increment case, it is not clear how this can be simplified. */ | |
9058b363 | 1008 | |
cdbd2a92 | 1009 | if (abs_inc != 1) |
1010 | { | |
1011 | int cmp_const; | |
4e66cb23 | 1012 | enum rtx_code cmp_code; |
cdbd2a92 | 1013 | |
1014 | if (neg_inc) | |
4e66cb23 | 1015 | { |
1016 | cmp_const = abs_inc - 1; | |
1017 | cmp_code = LE; | |
1018 | } | |
cdbd2a92 | 1019 | else |
4e66cb23 | 1020 | { |
1021 | cmp_const = abs_inc * (unroll_number - 1) + 1; | |
1022 | cmp_code = GE; | |
1023 | } | |
cdbd2a92 | 1024 | |
fff101de | 1025 | simplify_cmp_and_jump_insns (cmp_code, mode, diff, |
1026 | GEN_INT (cmp_const), labels[0]); | |
cdbd2a92 | 1027 | } |
1028 | ||
31d3e01c | 1029 | sequence = get_insns (); |
cdbd2a92 | 1030 | end_sequence (); |
89e8d34f | 1031 | loop_insn_hoist (loop, sequence); |
9058b363 | 1032 | |
cdbd2a92 | 1033 | /* Only the last copy of the loop body here needs the exit |
1034 | test, so set copy_end to exclude the compare/branch here, | |
1035 | and then reset it inside the loop when get to the last | |
1036 | copy. */ | |
1037 | ||
1038 | if (GET_CODE (last_loop_insn) == BARRIER) | |
1039 | copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); | |
1040 | else if (GET_CODE (last_loop_insn) == JUMP_INSN) | |
1041 | { | |
1a29b174 | 1042 | copy_end = PREV_INSN (last_loop_insn); |
cdbd2a92 | 1043 | #ifdef HAVE_cc0 |
6da6f625 | 1044 | /* The immediately preceding insn may be a compare which |
1045 | we do not want to copy. */ | |
1a29b174 | 1046 | if (sets_cc0_p (PREV_INSN (copy_end))) |
1047 | copy_end = PREV_INSN (copy_end); | |
cdbd2a92 | 1048 | #endif |
1049 | } | |
1050 | else | |
1051 | abort (); | |
1052 | ||
1053 | for (i = 1; i < unroll_number; i++) | |
1054 | { | |
1055 | emit_label_after (labels[unroll_number - i], | |
1056 | PREV_INSN (loop_start)); | |
1057 | ||
93d3b7de | 1058 | memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx)); |
1059 | memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), | |
1060 | 0, (VARRAY_SIZE (map->const_equiv_varray) | |
1061 | * sizeof (struct const_equiv_data))); | |
cdbd2a92 | 1062 | map->const_age = 0; |
1063 | ||
1064 | for (j = 0; j < max_labelno; j++) | |
1065 | if (local_label[j]) | |
c3dfa4e8 | 1066 | set_label_in_map (map, j, gen_label_rtx ()); |
cdbd2a92 | 1067 | |
02e7a332 | 1068 | for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++) |
1069 | if (local_regno[r]) | |
ea0cb7ae | 1070 | { |
02e7a332 | 1071 | map->reg_map[r] |
1072 | = gen_reg_rtx (GET_MODE (regno_reg_rtx[r])); | |
1073 | record_base_value (REGNO (map->reg_map[r]), | |
1074 | regno_reg_rtx[r], 0); | |
ea0cb7ae | 1075 | } |
cdbd2a92 | 1076 | /* The last copy needs the compare/branch insns at the end, |
1077 | so reset copy_end here if the loop ends with a conditional | |
1078 | branch. */ | |
1079 | ||
1080 | if (i == unroll_number - 1) | |
1081 | { | |
1082 | if (GET_CODE (last_loop_insn) == BARRIER) | |
1083 | copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); | |
1084 | else | |
1085 | copy_end = last_loop_insn; | |
1086 | } | |
1087 | ||
1088 | /* None of the copies are the `last_iteration', so just | |
1089 | pass zero for that parameter. */ | |
8ec5f078 | 1090 | copy_loop_body (loop, copy_start, copy_end, map, exit_label, 0, |
cdbd2a92 | 1091 | unroll_type, start_label, loop_end, |
1092 | loop_start, copy_end); | |
1093 | } | |
1094 | emit_label_after (labels[0], PREV_INSN (loop_start)); | |
1095 | ||
1096 | if (GET_CODE (last_loop_insn) == BARRIER) | |
1097 | { | |
1098 | insert_before = PREV_INSN (last_loop_insn); | |
1099 | copy_end = PREV_INSN (insert_before); | |
1100 | } | |
1101 | else | |
1102 | { | |
cdbd2a92 | 1103 | insert_before = last_loop_insn; |
1a29b174 | 1104 | #ifdef HAVE_cc0 |
6da6f625 | 1105 | /* The instruction immediately before the JUMP_INSN may |
1106 | be a compare instruction which we do not want to copy | |
1107 | or delete. */ | |
1a29b174 | 1108 | if (sets_cc0_p (PREV_INSN (insert_before))) |
1109 | insert_before = PREV_INSN (insert_before); | |
cdbd2a92 | 1110 | #endif |
1a29b174 | 1111 | copy_end = PREV_INSN (insert_before); |
cdbd2a92 | 1112 | } |
1113 | ||
1114 | /* Set unroll type to MODULO now. */ | |
1115 | unroll_type = UNROLL_MODULO; | |
1116 | loop_preconditioned = 1; | |
8b861be4 | 1117 | |
1118 | /* Clean up. */ | |
1119 | free (labels); | |
cdbd2a92 | 1120 | } |
1121 | } | |
1122 | ||
1123 | /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll | |
1124 | the loop unless all loops are being unrolled. */ | |
1125 | if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops) | |
1126 | { | |
1127 | if (loop_dump_stream) | |
6da6f625 | 1128 | fprintf (loop_dump_stream, |
1129 | "Unrolling failure: Naive unrolling not being done.\n"); | |
433c8fce | 1130 | goto egress; |
cdbd2a92 | 1131 | } |
1132 | ||
1133 | /* At this point, we are guaranteed to unroll the loop. */ | |
1134 | ||
ec5d911a | 1135 | /* Keep track of the unroll factor for the loop. */ |
1f8922d4 | 1136 | loop_info->unroll_number = unroll_number; |
3eb9a99d | 1137 | |
4db1469f | 1138 | /* And whether the loop has been preconditioned. */ |
1139 | loop_info->preconditioned = loop_preconditioned; | |
1140 | ||
45498ea1 | 1141 | /* Remember whether it was preconditioned for the second loop pass. */ |
d090276e | 1142 | NOTE_PRECONDITIONED (loop->end) = loop_preconditioned; |
1143 | ||
cdbd2a92 | 1144 | /* For each biv and giv, determine whether it can be safely split into |
1145 | a different variable for each unrolled copy of the loop body. | |
1146 | We precalculate and save this info here, since computing it is | |
1147 | expensive. | |
1148 | ||
1149 | Do this before deleting any instructions from the loop, so that | |
1150 | back_branch_in_range_p will work correctly. */ | |
1151 | ||
1152 | if (splitting_not_safe) | |
1153 | temp = 0; | |
1154 | else | |
89e8d34f | 1155 | temp = find_splittable_regs (loop, unroll_type, unroll_number); |
cdbd2a92 | 1156 | |
1157 | /* find_splittable_regs may have created some new registers, so must | |
1158 | reallocate the reg_map with the new larger size, and must realloc | |
1159 | the constant maps also. */ | |
1160 | ||
1161 | maxregnum = max_reg_num (); | |
8b861be4 | 1162 | map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx)); |
cdbd2a92 | 1163 | |
1164 | init_reg_map (map, maxregnum); | |
1165 | ||
433c8fce | 1166 | if (map->const_equiv_varray == 0) |
1167 | VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, | |
1168 | maxregnum + temp * unroll_number * 2, | |
1169 | "unroll_loop"); | |
1170 | global_const_equiv_varray = map->const_equiv_varray; | |
cdbd2a92 | 1171 | |
1172 | /* Search the list of bivs and givs to find ones which need to be remapped | |
1173 | when split, and set their reg_map entry appropriately. */ | |
1174 | ||
6f812a51 | 1175 | for (bl = ivs->list; bl; bl = bl->next) |
cdbd2a92 | 1176 | { |
1177 | if (REGNO (bl->biv->src_reg) != bl->regno) | |
1178 | map->reg_map[bl->regno] = bl->biv->src_reg; | |
1179 | #if 0 | |
1180 | /* Currently, non-reduced/final-value givs are never split. */ | |
1181 | for (v = bl->giv; v; v = v->next_iv) | |
1182 | if (REGNO (v->src_reg) != bl->regno) | |
1183 | map->reg_map[REGNO (v->dest_reg)] = v->src_reg; | |
1184 | #endif | |
1185 | } | |
1186 | ||
130e4d41 | 1187 | /* Use our current register alignment and pointer flags. */ |
08513b52 | 1188 | map->regno_pointer_align = cfun->emit->regno_pointer_align; |
e61a0a7f | 1189 | map->x_regno_reg_rtx = cfun->emit->x_regno_reg_rtx; |
130e4d41 | 1190 | |
cdbd2a92 | 1191 | /* If the loop is being partially unrolled, and the iteration variables |
1192 | are being split, and are being renamed for the split, then must fix up | |
ee2ce5c0 | 1193 | the compare/jump instruction at the end of the loop to refer to the new |
cdbd2a92 | 1194 | registers. This compare isn't copied, so the registers used in it |
1195 | will never be replaced if it isn't done here. */ | |
1196 | ||
1197 | if (unroll_type == UNROLL_MODULO) | |
1198 | { | |
1199 | insn = NEXT_INSN (copy_end); | |
ee2ce5c0 | 1200 | if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) |
8ec5f078 | 1201 | PATTERN (insn) = remap_split_bivs (loop, PATTERN (insn)); |
cdbd2a92 | 1202 | } |
1203 | ||
a490489b | 1204 | /* For unroll_number times, make a copy of each instruction |
cdbd2a92 | 1205 | between copy_start and copy_end, and insert these new instructions |
1206 | before the end of the loop. */ | |
1207 | ||
1208 | for (i = 0; i < unroll_number; i++) | |
1209 | { | |
93d3b7de | 1210 | memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx)); |
1211 | memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 0, | |
1212 | VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data)); | |
cdbd2a92 | 1213 | map->const_age = 0; |
1214 | ||
1215 | for (j = 0; j < max_labelno; j++) | |
1216 | if (local_label[j]) | |
c3dfa4e8 | 1217 | set_label_in_map (map, j, gen_label_rtx ()); |
cdbd2a92 | 1218 | |
02e7a332 | 1219 | for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++) |
1220 | if (local_regno[r]) | |
ea0cb7ae | 1221 | { |
02e7a332 | 1222 | map->reg_map[r] = gen_reg_rtx (GET_MODE (regno_reg_rtx[r])); |
1223 | record_base_value (REGNO (map->reg_map[r]), | |
1224 | regno_reg_rtx[r], 0); | |
ea0cb7ae | 1225 | } |
d14771e1 | 1226 | |
cdbd2a92 | 1227 | /* If loop starts with a branch to the test, then fix it so that |
1228 | it points to the test of the first unrolled copy of the loop. */ | |
1229 | if (i == 0 && loop_start != copy_start) | |
1230 | { | |
1231 | insn = PREV_INSN (copy_start); | |
1232 | pattern = PATTERN (insn); | |
9058b363 | 1233 | |
c3dfa4e8 | 1234 | tem = get_label_from_map (map, |
1235 | CODE_LABEL_NUMBER | |
1236 | (XEXP (SET_SRC (pattern), 0))); | |
941522d6 | 1237 | SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem); |
cdbd2a92 | 1238 | |
1239 | /* Set the jump label so that it can be used by later loop unrolling | |
1240 | passes. */ | |
1241 | JUMP_LABEL (insn) = tem; | |
1242 | LABEL_NUSES (tem)++; | |
1243 | } | |
1244 | ||
8ec5f078 | 1245 | copy_loop_body (loop, copy_start, copy_end, map, exit_label, |
cdbd2a92 | 1246 | i == unroll_number - 1, unroll_type, start_label, |
1247 | loop_end, insert_before, insert_before); | |
1248 | } | |
1249 | ||
1250 | /* Before deleting any insns, emit a CODE_LABEL immediately after the last | |
1251 | insn to be deleted. This prevents any runaway delete_insn call from | |
1252 | more insns that it should, as it always stops at a CODE_LABEL. */ | |
1253 | ||
1254 | /* Delete the compare and branch at the end of the loop if completely | |
1255 | unrolling the loop. Deleting the backward branch at the end also | |
1256 | deletes the code label at the start of the loop. This is done at | |
1257 | the very end to avoid problems with back_branch_in_range_p. */ | |
1258 | ||
1259 | if (unroll_type == UNROLL_COMPLETELY) | |
1260 | safety_label = emit_label_after (gen_label_rtx (), last_loop_insn); | |
1261 | else | |
1262 | safety_label = emit_label_after (gen_label_rtx (), copy_end); | |
1263 | ||
9058b363 | 1264 | /* Delete all of the original loop instructions. Don't delete the |
cdbd2a92 | 1265 | LOOP_BEG note, or the first code label in the loop. */ |
1266 | ||
1267 | insn = NEXT_INSN (copy_start); | |
1268 | while (insn != safety_label) | |
1269 | { | |
2922db0c | 1270 | /* ??? Don't delete named code labels. They will be deleted when the |
1271 | jump that references them is deleted. Otherwise, we end up deleting | |
1272 | them twice, which causes them to completely disappear instead of turn | |
1273 | into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in | |
1274 | dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files | |
1275 | to handle deleted labels instead. Or perhaps fix DECL_RTL of the | |
1276 | associated LABEL_DECL to point to one of the new label instances. */ | |
1277 | /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */ | |
45ae8273 | 1278 | if (insn != start_label |
2922db0c | 1279 | && ! (GET_CODE (insn) == CODE_LABEL && LABEL_NAME (insn)) |
45ae8273 | 1280 | && ! (GET_CODE (insn) == NOTE |
1281 | && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)) | |
e4bf866d | 1282 | insn = delete_related_insns (insn); |
cdbd2a92 | 1283 | else |
1284 | insn = NEXT_INSN (insn); | |
1285 | } | |
1286 | ||
1287 | /* Can now delete the 'safety' label emitted to protect us from runaway | |
e4bf866d | 1288 | delete_related_insns calls. */ |
cdbd2a92 | 1289 | if (INSN_DELETED_P (safety_label)) |
1290 | abort (); | |
e4bf866d | 1291 | delete_related_insns (safety_label); |
cdbd2a92 | 1292 | |
1293 | /* If exit_label exists, emit it after the loop. Doing the emit here | |
1294 | forces it to have a higher INSN_UID than any insn in the unrolled loop. | |
1295 | This is needed so that mostly_true_jump in reorg.c will treat jumps | |
1296 | to this loop end label correctly, i.e. predict that they are usually | |
1297 | not taken. */ | |
1298 | if (exit_label) | |
1299 | emit_label_after (exit_label, loop_end); | |
433c8fce | 1300 | |
1301 | egress: | |
b21ba441 | 1302 | if (unroll_type == UNROLL_COMPLETELY) |
1303 | { | |
1304 | /* Remove the loop notes since this is no longer a loop. */ | |
ec7d7ef9 | 1305 | if (loop->vtop) |
e4bf866d | 1306 | delete_related_insns (loop->vtop); |
ec7d7ef9 | 1307 | if (loop->cont) |
e4bf866d | 1308 | delete_related_insns (loop->cont); |
b21ba441 | 1309 | if (loop_start) |
e4bf866d | 1310 | delete_related_insns (loop_start); |
b21ba441 | 1311 | if (loop_end) |
e4bf866d | 1312 | delete_related_insns (loop_end); |
b21ba441 | 1313 | } |
1314 | ||
8b861be4 | 1315 | if (map->const_equiv_varray) |
433c8fce | 1316 | VARRAY_FREE (map->const_equiv_varray); |
8b861be4 | 1317 | if (map->label_map) |
1318 | { | |
1319 | free (map->label_map); | |
1320 | free (local_label); | |
1321 | } | |
1322 | free (map->insn_map); | |
1323 | free (splittable_regs); | |
8b861be4 | 1324 | free (splittable_regs_updates); |
1325 | free (addr_combined_regs); | |
1326 | free (local_regno); | |
1327 | if (map->reg_map) | |
1328 | free (map->reg_map); | |
1329 | free (map); | |
cdbd2a92 | 1330 | } |
fff101de | 1331 | |
1332 | /* A helper function for unroll_loop. Emit a compare and branch to | |
1333 | satisfy (CMP OP1 OP2), but pass this through the simplifier first. | |
1334 | If the branch turned out to be conditional, return it, otherwise | |
1335 | return NULL. */ | |
1336 | ||
1337 | static rtx | |
1338 | simplify_cmp_and_jump_insns (code, mode, op0, op1, label) | |
1339 | enum rtx_code code; | |
1340 | enum machine_mode mode; | |
1341 | rtx op0, op1, label; | |
1342 | { | |
1343 | rtx t, insn; | |
1344 | ||
1345 | t = simplify_relational_operation (code, mode, op0, op1); | |
1346 | if (!t) | |
1347 | { | |
1348 | enum rtx_code scode = signed_condition (code); | |
1349 | emit_cmp_and_jump_insns (op0, op1, scode, NULL_RTX, mode, | |
1350 | code != scode, label); | |
1351 | insn = get_last_insn (); | |
1352 | ||
1353 | JUMP_LABEL (insn) = label; | |
1354 | LABEL_NUSES (label) += 1; | |
1355 | ||
1356 | return insn; | |
1357 | } | |
1358 | else if (t == const_true_rtx) | |
1359 | { | |
1360 | insn = emit_jump_insn (gen_jump (label)); | |
1361 | emit_barrier (); | |
1362 | JUMP_LABEL (insn) = label; | |
1363 | LABEL_NUSES (label) += 1; | |
1364 | } | |
1365 | ||
1366 | return NULL_RTX; | |
1367 | } | |
cdbd2a92 | 1368 | \f |
1369 | /* Return true if the loop can be safely, and profitably, preconditioned | |
1370 | so that the unrolled copies of the loop body don't need exit tests. | |
1371 | ||
1372 | This only works if final_value, initial_value and increment can be | |
1373 | determined, and if increment is a constant power of 2. | |
1374 | If increment is not a power of 2, then the preconditioning modulo | |
1375 | operation would require a real modulo instead of a boolean AND, and this | |
1376 | is not considered `profitable'. */ | |
1377 | ||
1378 | /* ??? If the loop is known to be executed very many times, or the machine | |
1379 | has a very cheap divide instruction, then preconditioning is a win even | |
1380 | when the increment is not a power of 2. Use RTX_COST to compute | |
a490489b | 1381 | whether divide is cheap. |
1382 | ??? A divide by constant doesn't actually need a divide, look at | |
1383 | expand_divmod. The reduced cost of this optimized modulo is not | |
1384 | reflected in RTX_COST. */ | |
cdbd2a92 | 1385 | |
ec5d911a | 1386 | int |
15fc3eb7 | 1387 | precondition_loop_p (loop, initial_value, final_value, increment, mode) |
1388 | const struct loop *loop; | |
ec5d911a | 1389 | rtx *initial_value, *final_value, *increment; |
7bb29472 | 1390 | enum machine_mode *mode; |
cdbd2a92 | 1391 | { |
15fc3eb7 | 1392 | rtx loop_start = loop->start; |
1393 | struct loop_info *loop_info = LOOP_INFO (loop); | |
cdbd2a92 | 1394 | |
ec5d911a | 1395 | if (loop_info->n_iterations > 0) |
cdbd2a92 | 1396 | { |
ba26e2a7 | 1397 | if (INTVAL (loop_info->increment) > 0) |
1398 | { | |
1399 | *initial_value = const0_rtx; | |
1400 | *increment = const1_rtx; | |
1401 | *final_value = GEN_INT (loop_info->n_iterations); | |
1402 | } | |
1403 | else | |
1404 | { | |
1405 | *initial_value = GEN_INT (loop_info->n_iterations); | |
1406 | *increment = constm1_rtx; | |
1407 | *final_value = const0_rtx; | |
1408 | } | |
7bb29472 | 1409 | *mode = word_mode; |
cdbd2a92 | 1410 | |
1411 | if (loop_dump_stream) | |
77d71bdb | 1412 | { |
1413 | fputs ("Preconditioning: Success, number of iterations known, ", | |
1414 | loop_dump_stream); | |
1415 | fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, | |
ec5d911a | 1416 | loop_info->n_iterations); |
77d71bdb | 1417 | fputs (".\n", loop_dump_stream); |
1418 | } | |
cdbd2a92 | 1419 | return 1; |
1420 | } | |
1421 | ||
794f30af | 1422 | if (loop_info->iteration_var == 0) |
1423 | { | |
1424 | if (loop_dump_stream) | |
1425 | fprintf (loop_dump_stream, | |
1426 | "Preconditioning: Could not find iteration variable.\n"); | |
1427 | return 0; | |
1428 | } | |
1429 | else if (loop_info->initial_value == 0) | |
cdbd2a92 | 1430 | { |
1431 | if (loop_dump_stream) | |
1432 | fprintf (loop_dump_stream, | |
1433 | "Preconditioning: Could not find initial value.\n"); | |
1434 | return 0; | |
1435 | } | |
ec5d911a | 1436 | else if (loop_info->increment == 0) |
cdbd2a92 | 1437 | { |
1438 | if (loop_dump_stream) | |
1439 | fprintf (loop_dump_stream, | |
1440 | "Preconditioning: Could not find increment value.\n"); | |
1441 | return 0; | |
1442 | } | |
ec5d911a | 1443 | else if (GET_CODE (loop_info->increment) != CONST_INT) |
cdbd2a92 | 1444 | { |
1445 | if (loop_dump_stream) | |
1446 | fprintf (loop_dump_stream, | |
1447 | "Preconditioning: Increment not a constant.\n"); | |
1448 | return 0; | |
1449 | } | |
ec5d911a | 1450 | else if ((exact_log2 (INTVAL (loop_info->increment)) < 0) |
6da6f625 | 1451 | && (exact_log2 (-INTVAL (loop_info->increment)) < 0)) |
cdbd2a92 | 1452 | { |
1453 | if (loop_dump_stream) | |
1454 | fprintf (loop_dump_stream, | |
1455 | "Preconditioning: Increment not a constant power of 2.\n"); | |
1456 | return 0; | |
1457 | } | |
1458 | ||
1459 | /* Unsigned_compare and compare_dir can be ignored here, since they do | |
1460 | not matter for preconditioning. */ | |
1461 | ||
ec5d911a | 1462 | if (loop_info->final_value == 0) |
cdbd2a92 | 1463 | { |
1464 | if (loop_dump_stream) | |
1465 | fprintf (loop_dump_stream, | |
1466 | "Preconditioning: EQ comparison loop.\n"); | |
1467 | return 0; | |
1468 | } | |
1469 | ||
15fc3eb7 | 1470 | /* Must ensure that final_value is invariant, so call |
1471 | loop_invariant_p to check. Before doing so, must check regno | |
1472 | against max_reg_before_loop to make sure that the register is in | |
1473 | the range covered by loop_invariant_p. If it isn't, then it is | |
1474 | most likely a biv/giv which by definition are not invariant. */ | |
ec5d911a | 1475 | if ((GET_CODE (loop_info->final_value) == REG |
1476 | && REGNO (loop_info->final_value) >= max_reg_before_loop) | |
1477 | || (GET_CODE (loop_info->final_value) == PLUS | |
1478 | && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop) | |
15fc3eb7 | 1479 | || ! loop_invariant_p (loop, loop_info->final_value)) |
cdbd2a92 | 1480 | { |
1481 | if (loop_dump_stream) | |
1482 | fprintf (loop_dump_stream, | |
1483 | "Preconditioning: Final value not invariant.\n"); | |
1484 | return 0; | |
1485 | } | |
1486 | ||
1487 | /* Fail for floating point values, since the caller of this function | |
1488 | does not have code to deal with them. */ | |
ec5d911a | 1489 | if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT |
1490 | || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT) | |
cdbd2a92 | 1491 | { |
1492 | if (loop_dump_stream) | |
1493 | fprintf (loop_dump_stream, | |
1494 | "Preconditioning: Floating point final or initial value.\n"); | |
1495 | return 0; | |
1496 | } | |
1497 | ||
ec5d911a | 1498 | /* Fail if loop_info->iteration_var is not live before loop_start, |
1499 | since we need to test its value in the preconditioning code. */ | |
cdbd2a92 | 1500 | |
23e52523 | 1501 | if (REGNO_FIRST_LUID (REGNO (loop_info->iteration_var)) |
cdbd2a92 | 1502 | > INSN_LUID (loop_start)) |
1503 | { | |
1504 | if (loop_dump_stream) | |
1505 | fprintf (loop_dump_stream, | |
1506 | "Preconditioning: Iteration var not live before loop start.\n"); | |
1507 | return 0; | |
1508 | } | |
1509 | ||
b109aca9 | 1510 | /* Note that loop_iterations biases the initial value for GIV iterators |
37d52343 | 1511 | such as "while (i-- > 0)" so that we can calculate the number of |
1512 | iterations just like for BIV iterators. | |
9058b363 | 1513 | |
46134d72 | 1514 | Also note that the absolute values of initial_value and |
1515 | final_value are unimportant as only their difference is used for | |
1516 | calculating the number of loop iterations. */ | |
ec5d911a | 1517 | *initial_value = loop_info->initial_value; |
1518 | *increment = loop_info->increment; | |
1519 | *final_value = loop_info->final_value; | |
cdbd2a92 | 1520 | |
7bb29472 | 1521 | /* Decide what mode to do these calculations in. Choose the larger |
1522 | of final_value's mode and initial_value's mode, or a full-word if | |
1523 | both are constants. */ | |
1524 | *mode = GET_MODE (*final_value); | |
1525 | if (*mode == VOIDmode) | |
1526 | { | |
1527 | *mode = GET_MODE (*initial_value); | |
1528 | if (*mode == VOIDmode) | |
1529 | *mode = word_mode; | |
1530 | } | |
1531 | else if (*mode != GET_MODE (*initial_value) | |
1532 | && (GET_MODE_SIZE (*mode) | |
1533 | < GET_MODE_SIZE (GET_MODE (*initial_value)))) | |
1534 | *mode = GET_MODE (*initial_value); | |
1535 | ||
ec7d7ef9 | 1536 | /* Success! */ |
cdbd2a92 | 1537 | if (loop_dump_stream) |
1538 | fprintf (loop_dump_stream, "Preconditioning: Successful.\n"); | |
1539 | return 1; | |
1540 | } | |
1541 | ||
cdbd2a92 | 1542 | /* All pseudo-registers must be mapped to themselves. Two hard registers |
1543 | must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_ | |
1544 | REGNUM, to avoid function-inlining specific conversions of these | |
1545 | registers. All other hard regs can not be mapped because they may be | |
1546 | used with different | |
1547 | modes. */ | |
1548 | ||
1549 | static void | |
1550 | init_reg_map (map, maxregnum) | |
1551 | struct inline_remap *map; | |
1552 | int maxregnum; | |
1553 | { | |
1554 | int i; | |
1555 | ||
1556 | for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--) | |
1557 | map->reg_map[i] = regno_reg_rtx[i]; | |
1558 | /* Just clear the rest of the entries. */ | |
1559 | for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--) | |
1560 | map->reg_map[i] = 0; | |
1561 | ||
1562 | map->reg_map[VIRTUAL_STACK_VARS_REGNUM] | |
1563 | = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM]; | |
1564 | map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM] | |
1565 | = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM]; | |
1566 | } | |
1567 | \f | |
1568 | /* Strength-reduction will often emit code for optimized biv/givs which | |
1569 | calculates their value in a temporary register, and then copies the result | |
1570 | to the iv. This procedure reconstructs the pattern computing the iv; | |
1571 | verifying that all operands are of the proper form. | |
1572 | ||
997d68fe | 1573 | PATTERN must be the result of single_set. |
cdbd2a92 | 1574 | The return value is the amount that the giv is incremented by. */ |
1575 | ||
1576 | static rtx | |
1577 | calculate_giv_inc (pattern, src_insn, regno) | |
1578 | rtx pattern, src_insn; | |
02e7a332 | 1579 | unsigned int regno; |
cdbd2a92 | 1580 | { |
1581 | rtx increment; | |
9fdb4293 | 1582 | rtx increment_total = 0; |
1583 | int tries = 0; | |
cdbd2a92 | 1584 | |
9fdb4293 | 1585 | retry: |
cdbd2a92 | 1586 | /* Verify that we have an increment insn here. First check for a plus |
1587 | as the set source. */ | |
1588 | if (GET_CODE (SET_SRC (pattern)) != PLUS) | |
1589 | { | |
1590 | /* SR sometimes computes the new giv value in a temp, then copies it | |
1591 | to the new_reg. */ | |
1592 | src_insn = PREV_INSN (src_insn); | |
78494636 | 1593 | pattern = single_set (src_insn); |
cdbd2a92 | 1594 | if (GET_CODE (SET_SRC (pattern)) != PLUS) |
1595 | abort (); | |
9058b363 | 1596 | |
cdbd2a92 | 1597 | /* The last insn emitted is not needed, so delete it to avoid confusing |
1598 | the second cse pass. This insn sets the giv unnecessarily. */ | |
e4bf866d | 1599 | delete_related_insns (get_last_insn ()); |
cdbd2a92 | 1600 | } |
1601 | ||
1602 | /* Verify that we have a constant as the second operand of the plus. */ | |
1603 | increment = XEXP (SET_SRC (pattern), 1); | |
1604 | if (GET_CODE (increment) != CONST_INT) | |
1605 | { | |
1606 | /* SR sometimes puts the constant in a register, especially if it is | |
1607 | too big to be an add immed operand. */ | |
78494636 | 1608 | increment = find_last_value (increment, &src_insn, NULL_RTX, 0); |
cdbd2a92 | 1609 | |
1610 | /* SR may have used LO_SUM to compute the constant if it is too large | |
1611 | for a load immed operand. In this case, the constant is in operand | |
1612 | one of the LO_SUM rtx. */ | |
1613 | if (GET_CODE (increment) == LO_SUM) | |
1614 | increment = XEXP (increment, 1); | |
9342f8f0 | 1615 | |
1616 | /* Some ports store large constants in memory and add a REG_EQUAL | |
1617 | note to the store insn. */ | |
1618 | else if (GET_CODE (increment) == MEM) | |
9058b363 | 1619 | { |
1620 | rtx note = find_reg_note (src_insn, REG_EQUAL, 0); | |
1621 | if (note) | |
1622 | increment = XEXP (note, 0); | |
1623 | } | |
9342f8f0 | 1624 | |
d5ce3443 | 1625 | else if (GET_CODE (increment) == IOR |
c1c664b2 | 1626 | || GET_CODE (increment) == ASHIFT |
1627 | || GET_CODE (increment) == PLUS) | |
bd18220b | 1628 | { |
d5ce3443 | 1629 | /* The rs6000 port loads some constants with IOR. |
c1c664b2 | 1630 | The alpha port loads some constants with ASHIFT and PLUS. */ |
bd18220b | 1631 | rtx second_part = XEXP (increment, 1); |
d5ce3443 | 1632 | enum rtx_code code = GET_CODE (increment); |
bd18220b | 1633 | |
6c34d0c2 | 1634 | increment = find_last_value (XEXP (increment, 0), |
78494636 | 1635 | &src_insn, NULL_RTX, 0); |
bd18220b | 1636 | /* Don't need the last insn anymore. */ |
e4bf866d | 1637 | delete_related_insns (get_last_insn ()); |
bd18220b | 1638 | |
1639 | if (GET_CODE (second_part) != CONST_INT | |
1640 | || GET_CODE (increment) != CONST_INT) | |
1641 | abort (); | |
1642 | ||
d5ce3443 | 1643 | if (code == IOR) |
1644 | increment = GEN_INT (INTVAL (increment) | INTVAL (second_part)); | |
c1c664b2 | 1645 | else if (code == PLUS) |
1646 | increment = GEN_INT (INTVAL (increment) + INTVAL (second_part)); | |
d5ce3443 | 1647 | else |
1648 | increment = GEN_INT (INTVAL (increment) << INTVAL (second_part)); | |
bd18220b | 1649 | } |
cdbd2a92 | 1650 | |
1651 | if (GET_CODE (increment) != CONST_INT) | |
1652 | abort (); | |
9058b363 | 1653 | |
bd18220b | 1654 | /* The insn loading the constant into a register is no longer needed, |
cdbd2a92 | 1655 | so delete it. */ |
e4bf866d | 1656 | delete_related_insns (get_last_insn ()); |
cdbd2a92 | 1657 | } |
1658 | ||
9fdb4293 | 1659 | if (increment_total) |
1660 | increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment)); | |
1661 | else | |
1662 | increment_total = increment; | |
1663 | ||
1664 | /* Check that the source register is the same as the register we expected | |
1665 | to see as the source. If not, something is seriously wrong. */ | |
cdbd2a92 | 1666 | if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG |
1667 | || REGNO (XEXP (SET_SRC (pattern), 0)) != regno) | |
9fdb4293 | 1668 | { |
1669 | /* Some machines (e.g. the romp), may emit two add instructions for | |
1670 | certain constants, so lets try looking for another add immediately | |
1671 | before this one if we have only seen one add insn so far. */ | |
1672 | ||
1673 | if (tries == 0) | |
1674 | { | |
1675 | tries++; | |
1676 | ||
1677 | src_insn = PREV_INSN (src_insn); | |
78494636 | 1678 | pattern = single_set (src_insn); |
9fdb4293 | 1679 | |
e4bf866d | 1680 | delete_related_insns (get_last_insn ()); |
9fdb4293 | 1681 | |
1682 | goto retry; | |
1683 | } | |
1684 | ||
1685 | abort (); | |
1686 | } | |
cdbd2a92 | 1687 | |
9fdb4293 | 1688 | return increment_total; |
cdbd2a92 | 1689 | } |
1690 | ||
375e70d1 | 1691 | /* Copy REG_NOTES, except for insn references, because not all insn_map |
1692 | entries are valid yet. We do need to copy registers now though, because | |
1693 | the reg_map entries can change during copying. */ | |
1694 | ||
1695 | static rtx | |
1696 | initial_reg_note_copy (notes, map) | |
1697 | rtx notes; | |
1698 | struct inline_remap *map; | |
1699 | { | |
1700 | rtx copy; | |
1701 | ||
1702 | if (notes == 0) | |
1703 | return 0; | |
1704 | ||
1705 | copy = rtx_alloc (GET_CODE (notes)); | |
59e339e9 | 1706 | PUT_REG_NOTE_KIND (copy, REG_NOTE_KIND (notes)); |
375e70d1 | 1707 | |
1708 | if (GET_CODE (notes) == EXPR_LIST) | |
155b05dc | 1709 | XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map, 0); |
375e70d1 | 1710 | else if (GET_CODE (notes) == INSN_LIST) |
1711 | /* Don't substitute for these yet. */ | |
e9107d97 | 1712 | XEXP (copy, 0) = copy_rtx (XEXP (notes, 0)); |
375e70d1 | 1713 | else |
1714 | abort (); | |
1715 | ||
1716 | XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map); | |
1717 | ||
1718 | return copy; | |
1719 | } | |
1720 | ||
1721 | /* Fixup insn references in copied REG_NOTES. */ | |
1722 | ||
1723 | static void | |
59e339e9 | 1724 | final_reg_note_copy (notesp, map) |
1725 | rtx *notesp; | |
375e70d1 | 1726 | struct inline_remap *map; |
1727 | { | |
59e339e9 | 1728 | while (*notesp) |
1729 | { | |
1730 | rtx note = *notesp; | |
6c34d0c2 | 1731 | |
59e339e9 | 1732 | if (GET_CODE (note) == INSN_LIST) |
1733 | { | |
1734 | /* Sometimes, we have a REG_WAS_0 note that points to a | |
1735 | deleted instruction. In that case, we can just delete the | |
1736 | note. */ | |
1737 | if (REG_NOTE_KIND (note) == REG_WAS_0) | |
1738 | { | |
1739 | *notesp = XEXP (note, 1); | |
1740 | continue; | |
1741 | } | |
1742 | else | |
1743 | { | |
1744 | rtx insn = map->insn_map[INSN_UID (XEXP (note, 0))]; | |
1745 | ||
d1d13567 | 1746 | /* If we failed to remap the note, something is awry. |
1747 | Allow REG_LABEL as it may reference label outside | |
1748 | the unrolled loop. */ | |
59e339e9 | 1749 | if (!insn) |
d1d13567 | 1750 | { |
1751 | if (REG_NOTE_KIND (note) != REG_LABEL) | |
1752 | abort (); | |
1753 | } | |
1754 | else | |
1755 | XEXP (note, 0) = insn; | |
59e339e9 | 1756 | } |
1757 | } | |
375e70d1 | 1758 | |
59e339e9 | 1759 | notesp = &XEXP (note, 1); |
1760 | } | |
375e70d1 | 1761 | } |
cdbd2a92 | 1762 | |
1763 | /* Copy each instruction in the loop, substituting from map as appropriate. | |
1764 | This is very similar to a loop in expand_inline_function. */ | |
9058b363 | 1765 | |
cdbd2a92 | 1766 | static void |
8ec5f078 | 1767 | copy_loop_body (loop, copy_start, copy_end, map, exit_label, last_iteration, |
cdbd2a92 | 1768 | unroll_type, start_label, loop_end, insert_before, |
1769 | copy_notes_from) | |
8ec5f078 | 1770 | struct loop *loop; |
cdbd2a92 | 1771 | rtx copy_start, copy_end; |
1772 | struct inline_remap *map; | |
dc24bb51 | 1773 | rtx exit_label; |
cdbd2a92 | 1774 | int last_iteration; |
1775 | enum unroll_types unroll_type; | |
1776 | rtx start_label, loop_end, insert_before, copy_notes_from; | |
1777 | { | |
8ec5f078 | 1778 | struct loop_ivs *ivs = LOOP_IVS (loop); |
cdbd2a92 | 1779 | rtx insn, pattern; |
ef2c4a29 | 1780 | rtx set, tem, copy = NULL_RTX; |
cdbd2a92 | 1781 | int dest_reg_was_split, i; |
7d27e4c9 | 1782 | #ifdef HAVE_cc0 |
cdbd2a92 | 1783 | rtx cc0_insn = 0; |
7d27e4c9 | 1784 | #endif |
cdbd2a92 | 1785 | rtx final_label = 0; |
1786 | rtx giv_inc, giv_dest_reg, giv_src_reg; | |
1787 | ||
1788 | /* If this isn't the last iteration, then map any references to the | |
1789 | start_label to final_label. Final label will then be emitted immediately | |
1790 | after the end of this loop body if it was ever used. | |
1791 | ||
1792 | If this is the last iteration, then map references to the start_label | |
1793 | to itself. */ | |
1794 | if (! last_iteration) | |
1795 | { | |
1796 | final_label = gen_label_rtx (); | |
6da6f625 | 1797 | set_label_in_map (map, CODE_LABEL_NUMBER (start_label), final_label); |
cdbd2a92 | 1798 | } |
1799 | else | |
c3dfa4e8 | 1800 | set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label); |
cdbd2a92 | 1801 | |
1802 | start_sequence (); | |
9058b363 | 1803 | |
cdbd2a92 | 1804 | insn = copy_start; |
1805 | do | |
1806 | { | |
1807 | insn = NEXT_INSN (insn); | |
9058b363 | 1808 | |
cdbd2a92 | 1809 | map->orig_asm_operands_vector = 0; |
9058b363 | 1810 | |
cdbd2a92 | 1811 | switch (GET_CODE (insn)) |
1812 | { | |
1813 | case INSN: | |
1814 | pattern = PATTERN (insn); | |
1815 | copy = 0; | |
1816 | giv_inc = 0; | |
9058b363 | 1817 | |
cdbd2a92 | 1818 | /* Check to see if this is a giv that has been combined with |
9058b363 | 1819 | some split address givs. (Combined in the sense that |
cdbd2a92 | 1820 | `combine_givs' in loop.c has put two givs in the same register.) |
1821 | In this case, we must search all givs based on the same biv to | |
1822 | find the address givs. Then split the address givs. | |
1823 | Do this before splitting the giv, since that may map the | |
1824 | SET_DEST to a new register. */ | |
9058b363 | 1825 | |
997d68fe | 1826 | if ((set = single_set (insn)) |
1827 | && GET_CODE (SET_DEST (set)) == REG | |
1828 | && addr_combined_regs[REGNO (SET_DEST (set))]) | |
cdbd2a92 | 1829 | { |
1830 | struct iv_class *bl; | |
1831 | struct induction *v, *tv; | |
02e7a332 | 1832 | unsigned int regno = REGNO (SET_DEST (set)); |
9058b363 | 1833 | |
997d68fe | 1834 | v = addr_combined_regs[REGNO (SET_DEST (set))]; |
b995c337 | 1835 | bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); |
9058b363 | 1836 | |
cdbd2a92 | 1837 | /* Although the giv_inc amount is not needed here, we must call |
1838 | calculate_giv_inc here since it might try to delete the | |
1839 | last insn emitted. If we wait until later to call it, | |
1840 | we might accidentally delete insns generated immediately | |
1841 | below by emit_unrolled_add. */ | |
1842 | ||
7912da71 | 1843 | giv_inc = calculate_giv_inc (set, insn, regno); |
cdbd2a92 | 1844 | |
1845 | /* Now find all address giv's that were combined with this | |
1846 | giv 'v'. */ | |
1847 | for (tv = bl->giv; tv; tv = tv->next_iv) | |
1848 | if (tv->giv_type == DEST_ADDR && tv->same == v) | |
1849 | { | |
c5a80968 | 1850 | int this_giv_inc; |
1851 | ||
1852 | /* If this DEST_ADDR giv was not split, then ignore it. */ | |
1853 | if (*tv->location != tv->dest_reg) | |
1854 | continue; | |
9e562be2 | 1855 | |
1856 | /* Scale this_giv_inc if the multiplicative factors of | |
1857 | the two givs are different. */ | |
c5a80968 | 1858 | this_giv_inc = INTVAL (giv_inc); |
9e562be2 | 1859 | if (tv->mult_val != v->mult_val) |
1860 | this_giv_inc = (this_giv_inc / INTVAL (v->mult_val) | |
1861 | * INTVAL (tv->mult_val)); | |
9058b363 | 1862 | |
9e562be2 | 1863 | tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc); |
cdbd2a92 | 1864 | *tv->location = tv->dest_reg; |
9058b363 | 1865 | |
cdbd2a92 | 1866 | if (last_iteration && unroll_type != UNROLL_COMPLETELY) |
1867 | { | |
1868 | /* Must emit an insn to increment the split address | |
1869 | giv. Add in the const_adjust field in case there | |
1870 | was a constant eliminated from the address. */ | |
1871 | rtx value, dest_reg; | |
9058b363 | 1872 | |
cdbd2a92 | 1873 | /* tv->dest_reg will be either a bare register, |
1874 | or else a register plus a constant. */ | |
1875 | if (GET_CODE (tv->dest_reg) == REG) | |
1876 | dest_reg = tv->dest_reg; | |
1877 | else | |
1878 | dest_reg = XEXP (tv->dest_reg, 0); | |
9058b363 | 1879 | |
84f97156 | 1880 | /* Check for shared address givs, and avoid |
c3418f42 | 1881 | incrementing the shared pseudo reg more than |
84f97156 | 1882 | once. */ |
ea0cb7ae | 1883 | if (! tv->same_insn && ! tv->shared) |
84f97156 | 1884 | { |
1885 | /* tv->dest_reg may actually be a (PLUS (REG) | |
1886 | (CONST)) here, so we must call plus_constant | |
1887 | to add the const_adjust amount before calling | |
1888 | emit_unrolled_add below. */ | |
1889 | value = plus_constant (tv->dest_reg, | |
1890 | tv->const_adjust); | |
1891 | ||
34f6bd81 | 1892 | if (GET_CODE (value) == PLUS) |
1893 | { | |
1894 | /* The constant could be too large for an add | |
1895 | immediate, so can't directly emit an insn | |
1896 | here. */ | |
1897 | emit_unrolled_add (dest_reg, XEXP (value, 0), | |
1898 | XEXP (value, 1)); | |
1899 | } | |
84f97156 | 1900 | } |
9058b363 | 1901 | |
cdbd2a92 | 1902 | /* Reset the giv to be just the register again, in case |
e45ce1bf | 1903 | it is used after the set we have just emitted. |
1904 | We must subtract the const_adjust factor added in | |
1905 | above. */ | |
1906 | tv->dest_reg = plus_constant (dest_reg, | |
6da6f625 | 1907 | -tv->const_adjust); |
cdbd2a92 | 1908 | *tv->location = tv->dest_reg; |
1909 | } | |
1910 | } | |
1911 | } | |
9058b363 | 1912 | |
cdbd2a92 | 1913 | /* If this is a setting of a splittable variable, then determine |
1914 | how to split the variable, create a new set based on this split, | |
1915 | and set up the reg_map so that later uses of the variable will | |
1916 | use the new split variable. */ | |
9058b363 | 1917 | |
cdbd2a92 | 1918 | dest_reg_was_split = 0; |
9058b363 | 1919 | |
997d68fe | 1920 | if ((set = single_set (insn)) |
1921 | && GET_CODE (SET_DEST (set)) == REG | |
1922 | && splittable_regs[REGNO (SET_DEST (set))]) | |
cdbd2a92 | 1923 | { |
02e7a332 | 1924 | unsigned int regno = REGNO (SET_DEST (set)); |
1925 | unsigned int src_regno; | |
9058b363 | 1926 | |
cdbd2a92 | 1927 | dest_reg_was_split = 1; |
9058b363 | 1928 | |
997d68fe | 1929 | giv_dest_reg = SET_DEST (set); |
7912da71 | 1930 | giv_src_reg = giv_dest_reg; |
1931 | /* Compute the increment value for the giv, if it wasn't | |
1932 | already computed above. */ | |
1933 | if (giv_inc == 0) | |
1934 | giv_inc = calculate_giv_inc (set, insn, regno); | |
1935 | ||
8bd88b68 | 1936 | src_regno = REGNO (giv_src_reg); |
cdbd2a92 | 1937 | |
1938 | if (unroll_type == UNROLL_COMPLETELY) | |
1939 | { | |
1940 | /* Completely unrolling the loop. Set the induction | |
1941 | variable to a known constant value. */ | |
9058b363 | 1942 | |
cdbd2a92 | 1943 | /* The value in splittable_regs may be an invariant |
1944 | value, so we must use plus_constant here. */ | |
1945 | splittable_regs[regno] | |
8bd88b68 | 1946 | = plus_constant (splittable_regs[src_regno], |
1947 | INTVAL (giv_inc)); | |
cdbd2a92 | 1948 | |
1949 | if (GET_CODE (splittable_regs[regno]) == PLUS) | |
1950 | { | |
1951 | giv_src_reg = XEXP (splittable_regs[regno], 0); | |
1952 | giv_inc = XEXP (splittable_regs[regno], 1); | |
1953 | } | |
1954 | else | |
1955 | { | |
1956 | /* The splittable_regs value must be a REG or a | |
1957 | CONST_INT, so put the entire value in the giv_src_reg | |
1958 | variable. */ | |
1959 | giv_src_reg = splittable_regs[regno]; | |
1960 | giv_inc = const0_rtx; | |
1961 | } | |
1962 | } | |
1963 | else | |
1964 | { | |
1965 | /* Partially unrolling loop. Create a new pseudo | |
1966 | register for the iteration variable, and set it to | |
1967 | be a constant plus the original register. Except | |
1968 | on the last iteration, when the result has to | |
1969 | go back into the original iteration var register. */ | |
9058b363 | 1970 | |
cdbd2a92 | 1971 | /* Handle bivs which must be mapped to a new register |
1972 | when split. This happens for bivs which need their | |
1973 | final value set before loop entry. The new register | |
1974 | for the biv was stored in the biv's first struct | |
1975 | induction entry by find_splittable_regs. */ | |
1976 | ||
d1908618 | 1977 | if (regno < ivs->n_regs |
8ec5f078 | 1978 | && REG_IV_TYPE (ivs, regno) == BASIC_INDUCT) |
cdbd2a92 | 1979 | { |
b995c337 | 1980 | giv_src_reg = REG_IV_CLASS (ivs, regno)->biv->src_reg; |
cdbd2a92 | 1981 | giv_dest_reg = giv_src_reg; |
1982 | } | |
9058b363 | 1983 | |
cdbd2a92 | 1984 | #if 0 |
1985 | /* If non-reduced/final-value givs were split, then | |
1986 | this would have to remap those givs also. See | |
1987 | find_splittable_regs. */ | |
1988 | #endif | |
9058b363 | 1989 | |
cdbd2a92 | 1990 | splittable_regs[regno] |
4ff73812 | 1991 | = simplify_gen_binary (PLUS, GET_MODE (giv_src_reg), |
1992 | giv_inc, | |
1993 | splittable_regs[src_regno]); | |
cdbd2a92 | 1994 | giv_inc = splittable_regs[regno]; |
9058b363 | 1995 | |
cdbd2a92 | 1996 | /* Now split the induction variable by changing the dest |
1997 | of this insn to a new register, and setting its | |
1998 | reg_map entry to point to this new register. | |
1999 | ||
2000 | If this is the last iteration, and this is the last insn | |
2001 | that will update the iv, then reuse the original dest, | |
2002 | to ensure that the iv will have the proper value when | |
2003 | the loop exits or repeats. | |
2004 | ||
2005 | Using splittable_regs_updates here like this is safe, | |
2006 | because it can only be greater than one if all | |
2007 | instructions modifying the iv are always executed in | |
2008 | order. */ | |
2009 | ||
2010 | if (! last_iteration | |
2011 | || (splittable_regs_updates[regno]-- != 1)) | |
2012 | { | |
2013 | tem = gen_reg_rtx (GET_MODE (giv_src_reg)); | |
2014 | giv_dest_reg = tem; | |
2015 | map->reg_map[regno] = tem; | |
1a6c3add | 2016 | record_base_value (REGNO (tem), |
2017 | giv_inc == const0_rtx | |
2018 | ? giv_src_reg | |
2019 | : gen_rtx_PLUS (GET_MODE (giv_src_reg), | |
2020 | giv_src_reg, giv_inc), | |
2021 | 1); | |
cdbd2a92 | 2022 | } |
2023 | else | |
2024 | map->reg_map[regno] = giv_src_reg; | |
2025 | } | |
2026 | ||
2027 | /* The constant being added could be too large for an add | |
2028 | immediate, so can't directly emit an insn here. */ | |
2029 | emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc); | |
2030 | copy = get_last_insn (); | |
2031 | pattern = PATTERN (copy); | |
2032 | } | |
2033 | else | |
2034 | { | |
155b05dc | 2035 | pattern = copy_rtx_and_substitute (pattern, map, 0); |
cdbd2a92 | 2036 | copy = emit_insn (pattern); |
2037 | } | |
375e70d1 | 2038 | REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); |
ab87d1bc | 2039 | INSN_SCOPE (copy) = INSN_SCOPE (insn); |
9058b363 | 2040 | |
6d470b77 | 2041 | /* If there is a REG_EQUAL note present whose value |
2042 | is not loop invariant, then delete it, since it | |
2043 | may cause problems with later optimization passes. */ | |
2044 | if ((tem = find_reg_note (copy, REG_EQUAL, NULL_RTX)) | |
2045 | && !loop_invariant_p (loop, XEXP (tem, 0))) | |
2046 | remove_note (copy, tem); | |
2047 | ||
cdbd2a92 | 2048 | #ifdef HAVE_cc0 |
2049 | /* If this insn is setting CC0, it may need to look at | |
2050 | the insn that uses CC0 to see what type of insn it is. | |
2051 | In that case, the call to recog via validate_change will | |
2052 | fail. So don't substitute constants here. Instead, | |
2053 | do it when we emit the following insn. | |
2054 | ||
2055 | For example, see the pyr.md file. That machine has signed and | |
2056 | unsigned compares. The compare patterns must check the | |
2057 | following branch insn to see which what kind of compare to | |
2058 | emit. | |
2059 | ||
2060 | If the previous insn set CC0, substitute constants on it as | |
2061 | well. */ | |
5f96731e | 2062 | if (sets_cc0_p (PATTERN (copy)) != 0) |
cdbd2a92 | 2063 | cc0_insn = copy; |
2064 | else | |
2065 | { | |
2066 | if (cc0_insn) | |
2067 | try_constants (cc0_insn, map); | |
2068 | cc0_insn = 0; | |
2069 | try_constants (copy, map); | |
2070 | } | |
2071 | #else | |
2072 | try_constants (copy, map); | |
2073 | #endif | |
2074 | ||
2075 | /* Make split induction variable constants `permanent' since we | |
2076 | know there are no backward branches across iteration variable | |
2077 | settings which would invalidate this. */ | |
36a561f2 | 2078 | if (dest_reg_was_split) |
cdbd2a92 | 2079 | { |
32397bcd | 2080 | int regno = REGNO (SET_DEST (set)); |
cdbd2a92 | 2081 | |
df9f2bb6 | 2082 | if ((size_t) regno < VARRAY_SIZE (map->const_equiv_varray) |
433c8fce | 2083 | && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age |
2084 | == map->const_age)) | |
2085 | VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1; | |
cdbd2a92 | 2086 | } |
2087 | break; | |
9058b363 | 2088 | |
cdbd2a92 | 2089 | case JUMP_INSN: |
155b05dc | 2090 | pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); |
c5bbc9bd | 2091 | copy = emit_jump_insn (pattern); |
375e70d1 | 2092 | REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); |
ab87d1bc | 2093 | INSN_SCOPE (copy) = INSN_SCOPE (insn); |
c5bbc9bd | 2094 | |
111f982d | 2095 | if (JUMP_LABEL (insn)) |
cdbd2a92 | 2096 | { |
ba08b7e7 | 2097 | JUMP_LABEL (copy) = get_label_from_map (map, |
2098 | CODE_LABEL_NUMBER | |
2099 | (JUMP_LABEL (insn))); | |
cc89632f | 2100 | LABEL_NUSES (JUMP_LABEL (copy))++; |
111f982d | 2101 | } |
2102 | if (JUMP_LABEL (insn) == start_label && insn == copy_end | |
2103 | && ! last_iteration) | |
2104 | { | |
cc89632f | 2105 | |
cdbd2a92 | 2106 | /* This is a branch to the beginning of the loop; this is the |
2107 | last insn being copied; and this is not the last iteration. | |
2108 | In this case, we want to change the original fall through | |
2109 | case to be a branch past the end of the loop, and the | |
2110 | original jump label case to fall_through. */ | |
f07354fb | 2111 | |
ba08b7e7 | 2112 | if (!invert_jump (copy, exit_label, 0)) |
c8ff8daa | 2113 | { |
2114 | rtx jmp; | |
2115 | rtx lab = gen_label_rtx (); | |
c3418f42 | 2116 | /* Can't do it by reversing the jump (probably because we |
2117 | couldn't reverse the conditions), so emit a new | |
c8ff8daa | 2118 | jump_insn after COPY, and redirect the jump around |
2119 | that. */ | |
2120 | jmp = emit_jump_insn_after (gen_jump (exit_label), copy); | |
870d0994 | 2121 | JUMP_LABEL (jmp) = exit_label; |
0b97e93b | 2122 | LABEL_NUSES (exit_label)++; |
c8ff8daa | 2123 | jmp = emit_barrier_after (jmp); |
2124 | emit_label_after (lab, jmp); | |
2125 | LABEL_NUSES (lab) = 0; | |
ba08b7e7 | 2126 | if (!redirect_jump (copy, lab, 0)) |
6da6f625 | 2127 | abort (); |
c8ff8daa | 2128 | } |
cdbd2a92 | 2129 | } |
9058b363 | 2130 | |
cdbd2a92 | 2131 | #ifdef HAVE_cc0 |
2132 | if (cc0_insn) | |
2133 | try_constants (cc0_insn, map); | |
2134 | cc0_insn = 0; | |
2135 | #endif | |
2136 | try_constants (copy, map); | |
2137 | ||
2138 | /* Set the jump label of COPY correctly to avoid problems with | |
2139 | later passes of unroll_loop, if INSN had jump label set. */ | |
2140 | if (JUMP_LABEL (insn)) | |
2141 | { | |
7c2cd568 | 2142 | rtx label = 0; |
2143 | ||
cdbd2a92 | 2144 | /* Can't use the label_map for every insn, since this may be |
2145 | the backward branch, and hence the label was not mapped. */ | |
997d68fe | 2146 | if ((set = single_set (copy))) |
cdbd2a92 | 2147 | { |
997d68fe | 2148 | tem = SET_SRC (set); |
cdbd2a92 | 2149 | if (GET_CODE (tem) == LABEL_REF) |
7c2cd568 | 2150 | label = XEXP (tem, 0); |
cdbd2a92 | 2151 | else if (GET_CODE (tem) == IF_THEN_ELSE) |
2152 | { | |
2153 | if (XEXP (tem, 1) != pc_rtx) | |
7c2cd568 | 2154 | label = XEXP (XEXP (tem, 1), 0); |
cdbd2a92 | 2155 | else |
7c2cd568 | 2156 | label = XEXP (XEXP (tem, 2), 0); |
cdbd2a92 | 2157 | } |
cdbd2a92 | 2158 | } |
7c2cd568 | 2159 | |
2160 | if (label && GET_CODE (label) == CODE_LABEL) | |
2161 | JUMP_LABEL (copy) = label; | |
cdbd2a92 | 2162 | else |
2163 | { | |
2164 | /* An unrecognizable jump insn, probably the entry jump | |
2165 | for a switch statement. This label must have been mapped, | |
2166 | so just use the label_map to get the new jump label. */ | |
8ef7054a | 2167 | JUMP_LABEL (copy) |
9058b363 | 2168 | = get_label_from_map (map, |
2169 | CODE_LABEL_NUMBER (JUMP_LABEL (insn))); | |
cdbd2a92 | 2170 | } |
9058b363 | 2171 | |
cdbd2a92 | 2172 | /* If this is a non-local jump, then must increase the label |
2173 | use count so that the label will not be deleted when the | |
2174 | original jump is deleted. */ | |
2175 | LABEL_NUSES (JUMP_LABEL (copy))++; | |
2176 | } | |
2177 | else if (GET_CODE (PATTERN (copy)) == ADDR_VEC | |
2178 | || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC) | |
2179 | { | |
2180 | rtx pat = PATTERN (copy); | |
2181 | int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC; | |
2182 | int len = XVECLEN (pat, diff_vec_p); | |
2183 | int i; | |
2184 | ||
2185 | for (i = 0; i < len; i++) | |
2186 | LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++; | |
2187 | } | |
2188 | ||
2189 | /* If this used to be a conditional jump insn but whose branch | |
2190 | direction is now known, we must do something special. */ | |
b2816317 | 2191 | if (any_condjump_p (insn) && onlyjump_p (insn) && map->last_pc_value) |
cdbd2a92 | 2192 | { |
2193 | #ifdef HAVE_cc0 | |
1a29b174 | 2194 | /* If the previous insn set cc0 for us, delete it. */ |
2dcd83ba | 2195 | if (only_sets_cc0_p (PREV_INSN (copy))) |
e4bf866d | 2196 | delete_related_insns (PREV_INSN (copy)); |
cdbd2a92 | 2197 | #endif |
2198 | ||
2199 | /* If this is now a no-op, delete it. */ | |
2200 | if (map->last_pc_value == pc_rtx) | |
2201 | { | |
b36d64df | 2202 | delete_insn (copy); |
cdbd2a92 | 2203 | copy = 0; |
2204 | } | |
2205 | else | |
2206 | /* Otherwise, this is unconditional jump so we must put a | |
2207 | BARRIER after it. We could do some dead code elimination | |
2208 | here, but jump.c will do it just as well. */ | |
2209 | emit_barrier (); | |
2210 | } | |
2211 | break; | |
9058b363 | 2212 | |
cdbd2a92 | 2213 | case CALL_INSN: |
155b05dc | 2214 | pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); |
cdbd2a92 | 2215 | copy = emit_call_insn (pattern); |
375e70d1 | 2216 | REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); |
ab87d1bc | 2217 | INSN_SCOPE (copy) = INSN_SCOPE (insn); |
f456a8bd | 2218 | SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn); |
2936db7d | 2219 | CONST_OR_PURE_CALL_P (copy) = CONST_OR_PURE_CALL_P (insn); |
cdbd2a92 | 2220 | |
819e2036 | 2221 | /* Because the USAGE information potentially contains objects other |
2222 | than hard registers, we need to copy it. */ | |
0bc644e0 | 2223 | CALL_INSN_FUNCTION_USAGE (copy) |
155b05dc | 2224 | = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), |
2225 | map, 0); | |
819e2036 | 2226 | |
cdbd2a92 | 2227 | #ifdef HAVE_cc0 |
2228 | if (cc0_insn) | |
2229 | try_constants (cc0_insn, map); | |
2230 | cc0_insn = 0; | |
2231 | #endif | |
2232 | try_constants (copy, map); | |
2233 | ||
2234 | /* Be lazy and assume CALL_INSNs clobber all hard registers. */ | |
2235 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) | |
433c8fce | 2236 | VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0; |
cdbd2a92 | 2237 | break; |
9058b363 | 2238 | |
cdbd2a92 | 2239 | case CODE_LABEL: |
2240 | /* If this is the loop start label, then we don't need to emit a | |
2241 | copy of this label since no one will use it. */ | |
2242 | ||
2243 | if (insn != start_label) | |
2244 | { | |
c3dfa4e8 | 2245 | copy = emit_label (get_label_from_map (map, |
2246 | CODE_LABEL_NUMBER (insn))); | |
cdbd2a92 | 2247 | map->const_age++; |
2248 | } | |
2249 | break; | |
9058b363 | 2250 | |
cdbd2a92 | 2251 | case BARRIER: |
2252 | copy = emit_barrier (); | |
2253 | break; | |
9058b363 | 2254 | |
cdbd2a92 | 2255 | case NOTE: |
5d28e08b | 2256 | /* VTOP and CONT notes are valid only before the loop exit test. |
2257 | If placed anywhere else, loop may generate bad code. */ | |
71caadc0 | 2258 | /* BASIC_BLOCK notes exist to stabilize basic block structures with |
6da6f625 | 2259 | the associated rtl. We do not want to share the structure in |
71caadc0 | 2260 | this new block. */ |
9058b363 | 2261 | |
6e79311f | 2262 | if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED |
74b0991d | 2263 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL |
71caadc0 | 2264 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK |
5d28e08b | 2265 | && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP |
2266 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT) | |
6e79311f | 2267 | || (last_iteration && unroll_type != UNROLL_COMPLETELY))) |
cdbd2a92 | 2268 | copy = emit_note (NOTE_SOURCE_FILE (insn), |
2269 | NOTE_LINE_NUMBER (insn)); | |
2270 | else | |
2271 | copy = 0; | |
2272 | break; | |
9058b363 | 2273 | |
cdbd2a92 | 2274 | default: |
2275 | abort (); | |
cdbd2a92 | 2276 | } |
9058b363 | 2277 | |
cdbd2a92 | 2278 | map->insn_map[INSN_UID (insn)] = copy; |
2279 | } | |
2280 | while (insn != copy_end); | |
9058b363 | 2281 | |
375e70d1 | 2282 | /* Now finish coping the REG_NOTES. */ |
76ab4c9e | 2283 | insn = copy_start; |
2284 | do | |
2285 | { | |
2286 | insn = NEXT_INSN (insn); | |
2287 | if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN | |
2288 | || GET_CODE (insn) == CALL_INSN) | |
2289 | && map->insn_map[INSN_UID (insn)]) | |
59e339e9 | 2290 | final_reg_note_copy (®_NOTES (map->insn_map[INSN_UID (insn)]), map); |
76ab4c9e | 2291 | } |
2292 | while (insn != copy_end); | |
2293 | ||
cdbd2a92 | 2294 | /* There may be notes between copy_notes_from and loop_end. Emit a copy of |
2295 | each of these notes here, since there may be some important ones, such as | |
2296 | NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last | |
2297 | iteration, because the original notes won't be deleted. | |
2298 | ||
2299 | We can't use insert_before here, because when from preconditioning, | |
2300 | insert_before points before the loop. We can't use copy_end, because | |
2301 | there may be insns already inserted after it (which we don't want to | |
2302 | copy) when not from preconditioning code. */ | |
2303 | ||
2304 | if (! last_iteration) | |
2305 | { | |
2306 | for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn)) | |
2307 | { | |
dffae35b | 2308 | /* VTOP notes are valid only before the loop exit test. |
2309 | If placed anywhere else, loop may generate bad code. | |
349e0af1 | 2310 | Although COPY_NOTES_FROM will be at most one or two (for cc0) |
2311 | instructions before the last insn in the loop, COPY_NOTES_FROM | |
2312 | can be a NOTE_INSN_LOOP_CONT note if there is no VTOP note, | |
2313 | as in a do .. while loop. */ | |
cdbd2a92 | 2314 | if (GET_CODE (insn) == NOTE |
71caadc0 | 2315 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED |
dffae35b | 2316 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK |
349e0af1 | 2317 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP |
2318 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT) | |
cdbd2a92 | 2319 | emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn)); |
2320 | } | |
2321 | } | |
2322 | ||
2323 | if (final_label && LABEL_NUSES (final_label) > 0) | |
2324 | emit_label (final_label); | |
2325 | ||
31d3e01c | 2326 | tem = get_insns (); |
cdbd2a92 | 2327 | end_sequence (); |
0ab94f0c | 2328 | loop_insn_emit_before (loop, 0, insert_before, tem); |
cdbd2a92 | 2329 | } |
2330 | \f | |
2331 | /* Emit an insn, using the expand_binop to ensure that a valid insn is | |
2332 | emitted. This will correctly handle the case where the increment value | |
2333 | won't fit in the immediate field of a PLUS insns. */ | |
2334 | ||
2335 | void | |
2336 | emit_unrolled_add (dest_reg, src_reg, increment) | |
2337 | rtx dest_reg, src_reg, increment; | |
2338 | { | |
2339 | rtx result; | |
2340 | ||
ad99e708 | 2341 | result = expand_simple_binop (GET_MODE (dest_reg), PLUS, src_reg, increment, |
2342 | dest_reg, 0, OPTAB_LIB_WIDEN); | |
cdbd2a92 | 2343 | |
2344 | if (dest_reg != result) | |
2345 | emit_move_insn (dest_reg, result); | |
2346 | } | |
2347 | \f | |
15fc3eb7 | 2348 | /* Searches the insns between INSN and LOOP->END. Returns 1 if there |
cdbd2a92 | 2349 | is a backward branch in that range that branches to somewhere between |
15fc3eb7 | 2350 | LOOP->START and INSN. Returns 0 otherwise. */ |
cdbd2a92 | 2351 | |
4bbea254 | 2352 | /* ??? This is quadratic algorithm. Could be rewritten to be linear. |
cdbd2a92 | 2353 | In practice, this is not a problem, because this function is seldom called, |
2354 | and uses a negligible amount of CPU time on average. */ | |
2355 | ||
06f19437 | 2356 | int |
15fc3eb7 | 2357 | back_branch_in_range_p (loop, insn) |
2358 | const struct loop *loop; | |
cdbd2a92 | 2359 | rtx insn; |
cdbd2a92 | 2360 | { |
2361 | rtx p, q, target_insn; | |
15fc3eb7 | 2362 | rtx loop_start = loop->start; |
2363 | rtx loop_end = loop->end; | |
2364 | rtx orig_loop_end = loop->end; | |
cdbd2a92 | 2365 | |
2366 | /* Stop before we get to the backward branch at the end of the loop. */ | |
2367 | loop_end = prev_nonnote_insn (loop_end); | |
2368 | if (GET_CODE (loop_end) == BARRIER) | |
2369 | loop_end = PREV_INSN (loop_end); | |
2370 | ||
2371 | /* Check in case insn has been deleted, search forward for first non | |
2372 | deleted insn following it. */ | |
2373 | while (INSN_DELETED_P (insn)) | |
2374 | insn = NEXT_INSN (insn); | |
2375 | ||
ad87de1e | 2376 | /* Check for the case where insn is the last insn in the loop. Deal |
2377 | with the case where INSN was a deleted loop test insn, in which case | |
2378 | it will now be the NOTE_LOOP_END. */ | |
2379 | if (insn == loop_end || insn == orig_loop_end) | |
cdbd2a92 | 2380 | return 0; |
2381 | ||
2382 | for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p)) | |
2383 | { | |
2384 | if (GET_CODE (p) == JUMP_INSN) | |
2385 | { | |
2386 | target_insn = JUMP_LABEL (p); | |
9058b363 | 2387 | |
cdbd2a92 | 2388 | /* Search from loop_start to insn, to see if one of them is |
2389 | the target_insn. We can't use INSN_LUID comparisons here, | |
2390 | since insn may not have an LUID entry. */ | |
2391 | for (q = loop_start; q != insn; q = NEXT_INSN (q)) | |
2392 | if (q == target_insn) | |
2393 | return 1; | |
2394 | } | |
2395 | } | |
2396 | ||
2397 | return 0; | |
2398 | } | |
2399 | ||
2400 | /* Try to generate the simplest rtx for the expression | |
2401 | (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial | |
2402 | value of giv's. */ | |
2403 | ||
2404 | static rtx | |
2405 | fold_rtx_mult_add (mult1, mult2, add1, mode) | |
2406 | rtx mult1, mult2, add1; | |
2407 | enum machine_mode mode; | |
2408 | { | |
2409 | rtx temp, mult_res; | |
2410 | rtx result; | |
2411 | ||
2412 | /* The modes must all be the same. This should always be true. For now, | |
2413 | check to make sure. */ | |
2414 | if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode) | |
2415 | || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode) | |
2416 | || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode)) | |
2417 | abort (); | |
2418 | ||
2419 | /* Ensure that if at least one of mult1/mult2 are constant, then mult2 | |
2420 | will be a constant. */ | |
2421 | if (GET_CODE (mult1) == CONST_INT) | |
2422 | { | |
2423 | temp = mult2; | |
2424 | mult2 = mult1; | |
2425 | mult1 = temp; | |
2426 | } | |
2427 | ||
2428 | mult_res = simplify_binary_operation (MULT, mode, mult1, mult2); | |
2429 | if (! mult_res) | |
941522d6 | 2430 | mult_res = gen_rtx_MULT (mode, mult1, mult2); |
cdbd2a92 | 2431 | |
2432 | /* Again, put the constant second. */ | |
2433 | if (GET_CODE (add1) == CONST_INT) | |
2434 | { | |
2435 | temp = add1; | |
2436 | add1 = mult_res; | |
2437 | mult_res = temp; | |
2438 | } | |
2439 | ||
2440 | result = simplify_binary_operation (PLUS, mode, add1, mult_res); | |
2441 | if (! result) | |
941522d6 | 2442 | result = gen_rtx_PLUS (mode, add1, mult_res); |
cdbd2a92 | 2443 | |
2444 | return result; | |
2445 | } | |
2446 | ||
2447 | /* Searches the list of induction struct's for the biv BL, to try to calculate | |
2448 | the total increment value for one iteration of the loop as a constant. | |
2449 | ||
2450 | Returns the increment value as an rtx, simplified as much as possible, | |
2451 | if it can be calculated. Otherwise, returns 0. */ | |
2452 | ||
9058b363 | 2453 | rtx |
15fc3eb7 | 2454 | biv_total_increment (bl) |
3b853ab8 | 2455 | const struct iv_class *bl; |
cdbd2a92 | 2456 | { |
2457 | struct induction *v; | |
2458 | rtx result; | |
2459 | ||
2460 | /* For increment, must check every instruction that sets it. Each | |
2461 | instruction must be executed only once each time through the loop. | |
3398e91d | 2462 | To verify this, we check that the insn is always executed, and that |
cdbd2a92 | 2463 | there are no backward branches after the insn that branch to before it. |
2464 | Also, the insn must have a mult_val of one (to make sure it really is | |
2465 | an increment). */ | |
2466 | ||
2467 | result = const0_rtx; | |
2468 | for (v = bl->biv; v; v = v->next_iv) | |
2469 | { | |
2470 | if (v->always_computable && v->mult_val == const1_rtx | |
0da71ef8 | 2471 | && ! v->maybe_multiple |
2472 | && SCALAR_INT_MODE_P (v->mode)) | |
cdbd2a92 | 2473 | result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode); |
2474 | else | |
2475 | return 0; | |
2476 | } | |
2477 | ||
2478 | return result; | |
2479 | } | |
2480 | ||
cdbd2a92 | 2481 | /* For each biv and giv, determine whether it can be safely split into |
2482 | a different variable for each unrolled copy of the loop body. If it | |
2483 | is safe to split, then indicate that by saving some useful info | |
2484 | in the splittable_regs array. | |
2485 | ||
2486 | If the loop is being completely unrolled, then splittable_regs will hold | |
2487 | the current value of the induction variable while the loop is unrolled. | |
2488 | It must be set to the initial value of the induction variable here. | |
2489 | Otherwise, splittable_regs will hold the difference between the current | |
2490 | value of the induction variable and the value the induction variable had | |
a433411e | 2491 | at the top of the loop. It must be set to the value 0 here. |
2492 | ||
2493 | Returns the total number of instructions that set registers that are | |
2494 | splittable. */ | |
cdbd2a92 | 2495 | |
2496 | /* ?? If the loop is only unrolled twice, then most of the restrictions to | |
2497 | constant values are unnecessary, since we can easily calculate increment | |
2498 | values in this case even if nothing is constant. The increment value | |
2499 | should not involve a multiply however. */ | |
2500 | ||
2501 | /* ?? Even if the biv/giv increment values aren't constant, it may still | |
2502 | be beneficial to split the variable if the loop is only unrolled a few | |
2503 | times, since multiplies by small integers (1,2,3,4) are very cheap. */ | |
2504 | ||
2505 | static int | |
89e8d34f | 2506 | find_splittable_regs (loop, unroll_type, unroll_number) |
15fc3eb7 | 2507 | const struct loop *loop; |
cdbd2a92 | 2508 | enum unroll_types unroll_type; |
cdbd2a92 | 2509 | int unroll_number; |
2510 | { | |
8ec5f078 | 2511 | struct loop_ivs *ivs = LOOP_IVS (loop); |
cdbd2a92 | 2512 | struct iv_class *bl; |
625294a0 | 2513 | struct induction *v; |
cdbd2a92 | 2514 | rtx increment, tem; |
2515 | rtx biv_final_value; | |
2516 | int biv_splittable; | |
2517 | int result = 0; | |
2518 | ||
6f812a51 | 2519 | for (bl = ivs->list; bl; bl = bl->next) |
cdbd2a92 | 2520 | { |
2521 | /* Biv_total_increment must return a constant value, | |
2522 | otherwise we can not calculate the split values. */ | |
2523 | ||
15fc3eb7 | 2524 | increment = biv_total_increment (bl); |
cdbd2a92 | 2525 | if (! increment || GET_CODE (increment) != CONST_INT) |
2526 | continue; | |
2527 | ||
2528 | /* The loop must be unrolled completely, or else have a known number | |
2529 | of iterations and only one exit, or else the biv must be dead | |
2530 | outside the loop, or else the final value must be known. Otherwise, | |
2531 | it is unsafe to split the biv since it may not have the proper | |
2532 | value on loop exit. */ | |
2533 | ||
f712a0dc | 2534 | /* loop_number_exit_count is nonzero if the loop has an exit other than |
cdbd2a92 | 2535 | a fall through at the end. */ |
2536 | ||
2537 | biv_splittable = 1; | |
2538 | biv_final_value = 0; | |
2539 | if (unroll_type != UNROLL_COMPLETELY | |
15fc3eb7 | 2540 | && (loop->exit_count || unroll_type == UNROLL_NAIVE) |
89e8d34f | 2541 | && (REGNO_LAST_LUID (bl->regno) >= INSN_LUID (loop->end) |
cdbd2a92 | 2542 | || ! bl->init_insn |
2543 | || INSN_UID (bl->init_insn) >= max_uid_for_loop | |
23e52523 | 2544 | || (REGNO_FIRST_LUID (bl->regno) |
cdbd2a92 | 2545 | < INSN_LUID (bl->init_insn)) |
2546 | || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set))) | |
15fc3eb7 | 2547 | && ! (biv_final_value = final_biv_value (loop, bl))) |
cdbd2a92 | 2548 | biv_splittable = 0; |
2549 | ||
625294a0 | 2550 | /* If any of the insns setting the BIV don't do so with a simple |
2551 | PLUS, we don't know how to split it. */ | |
2552 | for (v = bl->biv; biv_splittable && v; v = v->next_iv) | |
2553 | if ((tem = single_set (v->insn)) == 0 | |
2554 | || GET_CODE (SET_DEST (tem)) != REG | |
2555 | || REGNO (SET_DEST (tem)) != bl->regno | |
2556 | || GET_CODE (SET_SRC (tem)) != PLUS) | |
2557 | biv_splittable = 0; | |
2558 | ||
f712a0dc | 2559 | /* If final value is nonzero, then must emit an instruction which sets |
cdbd2a92 | 2560 | the value of the biv to the proper value. This is done after |
2561 | handling all of the givs, since some of them may need to use the | |
2562 | biv's value in their initialization code. */ | |
2563 | ||
2564 | /* This biv is splittable. If completely unrolling the loop, save | |
2565 | the biv's initial value. Otherwise, save the constant zero. */ | |
2566 | ||
2567 | if (biv_splittable == 1) | |
2568 | { | |
2569 | if (unroll_type == UNROLL_COMPLETELY) | |
2570 | { | |
2571 | /* If the initial value of the biv is itself (i.e. it is too | |
2572 | complicated for strength_reduce to compute), or is a hard | |
8909da5f | 2573 | register, or it isn't invariant, then we must create a new |
2574 | pseudo reg to hold the initial value of the biv. */ | |
cdbd2a92 | 2575 | |
2576 | if (GET_CODE (bl->initial_value) == REG | |
2577 | && (REGNO (bl->initial_value) == bl->regno | |
8909da5f | 2578 | || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER |
15fc3eb7 | 2579 | || ! loop_invariant_p (loop, bl->initial_value))) |
cdbd2a92 | 2580 | { |
2581 | rtx tem = gen_reg_rtx (bl->biv->mode); | |
ea0cb7ae | 2582 | |
1a6c3add | 2583 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
6c34d0c2 | 2584 | loop_insn_hoist (loop, |
89e8d34f | 2585 | gen_move_insn (tem, bl->biv->src_reg)); |
cdbd2a92 | 2586 | |
2587 | if (loop_dump_stream) | |
6da6f625 | 2588 | fprintf (loop_dump_stream, |
2589 | "Biv %d initial value remapped to %d.\n", | |
cdbd2a92 | 2590 | bl->regno, REGNO (tem)); |
2591 | ||
2592 | splittable_regs[bl->regno] = tem; | |
2593 | } | |
2594 | else | |
2595 | splittable_regs[bl->regno] = bl->initial_value; | |
2596 | } | |
2597 | else | |
2598 | splittable_regs[bl->regno] = const0_rtx; | |
2599 | ||
2600 | /* Save the number of instructions that modify the biv, so that | |
2601 | we can treat the last one specially. */ | |
2602 | ||
2603 | splittable_regs_updates[bl->regno] = bl->biv_count; | |
a433411e | 2604 | result += bl->biv_count; |
cdbd2a92 | 2605 | |
2606 | if (loop_dump_stream) | |
2607 | fprintf (loop_dump_stream, | |
2608 | "Biv %d safe to split.\n", bl->regno); | |
2609 | } | |
2610 | ||
2611 | /* Check every giv that depends on this biv to see whether it is | |
2612 | splittable also. Even if the biv isn't splittable, givs which | |
2613 | depend on it may be splittable if the biv is live outside the | |
2614 | loop, and the givs aren't. */ | |
2615 | ||
6da6f625 | 2616 | result += find_splittable_givs (loop, bl, unroll_type, increment, |
15fc3eb7 | 2617 | unroll_number); |
cdbd2a92 | 2618 | |
f712a0dc | 2619 | /* If final value is nonzero, then must emit an instruction which sets |
cdbd2a92 | 2620 | the value of the biv to the proper value. This is done after |
2621 | handling all of the givs, since some of them may need to use the | |
2622 | biv's value in their initialization code. */ | |
2623 | if (biv_final_value) | |
2624 | { | |
2625 | /* If the loop has multiple exits, emit the insns before the | |
2626 | loop to ensure that it will always be executed no matter | |
2627 | how the loop exits. Otherwise emit the insn after the loop, | |
2628 | since this is slightly more efficient. */ | |
15fc3eb7 | 2629 | if (! loop->exit_count) |
89e8d34f | 2630 | loop_insn_sink (loop, gen_move_insn (bl->biv->src_reg, |
2631 | biv_final_value)); | |
cdbd2a92 | 2632 | else |
2633 | { | |
2634 | /* Create a new register to hold the value of the biv, and then | |
2635 | set the biv to its final value before the loop start. The biv | |
2636 | is set to its final value before loop start to ensure that | |
2637 | this insn will always be executed, no matter how the loop | |
2638 | exits. */ | |
2639 | rtx tem = gen_reg_rtx (bl->biv->mode); | |
1a6c3add | 2640 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
ea0cb7ae | 2641 | |
89e8d34f | 2642 | loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg)); |
2643 | loop_insn_hoist (loop, gen_move_insn (bl->biv->src_reg, | |
2644 | biv_final_value)); | |
cdbd2a92 | 2645 | |
2646 | if (loop_dump_stream) | |
2647 | fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n", | |
2648 | REGNO (bl->biv->src_reg), REGNO (tem)); | |
2649 | ||
2650 | /* Set up the mapping from the original biv register to the new | |
2651 | register. */ | |
2652 | bl->biv->src_reg = tem; | |
2653 | } | |
2654 | } | |
2655 | } | |
2656 | return result; | |
2657 | } | |
2658 | ||
2659 | /* For every giv based on the biv BL, check to determine whether it is | |
a433411e | 2660 | splittable. This is a subroutine to find_splittable_regs (). |
2661 | ||
2662 | Return the number of instructions that set splittable registers. */ | |
cdbd2a92 | 2663 | |
2664 | static int | |
15fc3eb7 | 2665 | find_splittable_givs (loop, bl, unroll_type, increment, unroll_number) |
2666 | const struct loop *loop; | |
cdbd2a92 | 2667 | struct iv_class *bl; |
2668 | enum unroll_types unroll_type; | |
cdbd2a92 | 2669 | rtx increment; |
d6484866 | 2670 | int unroll_number ATTRIBUTE_UNUSED; |
cdbd2a92 | 2671 | { |
8ec5f078 | 2672 | struct loop_ivs *ivs = LOOP_IVS (loop); |
b5b239db | 2673 | struct induction *v, *v2; |
cdbd2a92 | 2674 | rtx final_value; |
2675 | rtx tem; | |
a433411e | 2676 | int result = 0; |
cdbd2a92 | 2677 | |
b5b239db | 2678 | /* Scan the list of givs, and set the same_insn field when there are |
2679 | multiple identical givs in the same insn. */ | |
2680 | for (v = bl->giv; v; v = v->next_iv) | |
2681 | for (v2 = v->next_iv; v2; v2 = v2->next_iv) | |
2682 | if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg) | |
2683 | && ! v2->same_insn) | |
2684 | v2->same_insn = v; | |
2685 | ||
cdbd2a92 | 2686 | for (v = bl->giv; v; v = v->next_iv) |
2687 | { | |
2688 | rtx giv_inc, value; | |
2689 | ||
2690 | /* Only split the giv if it has already been reduced, or if the loop is | |
2691 | being completely unrolled. */ | |
2692 | if (unroll_type != UNROLL_COMPLETELY && v->ignore) | |
2693 | continue; | |
2694 | ||
2695 | /* The giv can be split if the insn that sets the giv is executed once | |
2696 | and only once on every iteration of the loop. */ | |
2697 | /* An address giv can always be split. v->insn is just a use not a set, | |
2698 | and hence it does not matter whether it is always executed. All that | |
2699 | matters is that all the biv increments are always executed, and we | |
2700 | won't reach here if they aren't. */ | |
2701 | if (v->giv_type != DEST_ADDR | |
2702 | && (! v->always_computable | |
15fc3eb7 | 2703 | || back_branch_in_range_p (loop, v->insn))) |
cdbd2a92 | 2704 | continue; |
9058b363 | 2705 | |
cdbd2a92 | 2706 | /* The giv increment value must be a constant. */ |
2707 | giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx, | |
2708 | v->mode); | |
2709 | if (! giv_inc || GET_CODE (giv_inc) != CONST_INT) | |
2710 | continue; | |
2711 | ||
2712 | /* The loop must be unrolled completely, or else have a known number of | |
2713 | iterations and only one exit, or else the giv must be dead outside | |
2714 | the loop, or else the final value of the giv must be known. | |
2715 | Otherwise, it is not safe to split the giv since it may not have the | |
2716 | proper value on loop exit. */ | |
9058b363 | 2717 | |
cdbd2a92 | 2718 | /* The used outside loop test will fail for DEST_ADDR givs. They are |
2719 | never used outside the loop anyways, so it is always safe to split a | |
2720 | DEST_ADDR giv. */ | |
2721 | ||
2722 | final_value = 0; | |
2723 | if (unroll_type != UNROLL_COMPLETELY | |
15fc3eb7 | 2724 | && (loop->exit_count || unroll_type == UNROLL_NAIVE) |
cdbd2a92 | 2725 | && v->giv_type != DEST_ADDR |
91f09a1b | 2726 | /* The next part is true if the pseudo is used outside the loop. |
2727 | We assume that this is true for any pseudo created after loop | |
2728 | starts, because we don't have a reg_n_info entry for them. */ | |
2729 | && (REGNO (v->dest_reg) >= max_reg_before_loop | |
2730 | || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn) | |
2731 | /* Check for the case where the pseudo is set by a shift/add | |
2732 | sequence, in which case the first insn setting the pseudo | |
2733 | is the first insn of the shift/add sequence. */ | |
2734 | && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX)) | |
2735 | || (REGNO_FIRST_UID (REGNO (v->dest_reg)) | |
2736 | != INSN_UID (XEXP (tem, 0))))) | |
cdbd2a92 | 2737 | /* Line above always fails if INSN was moved by loop opt. */ |
23e52523 | 2738 | || (REGNO_LAST_LUID (REGNO (v->dest_reg)) |
15fc3eb7 | 2739 | >= INSN_LUID (loop->end))) |
cdbd2a92 | 2740 | && ! (final_value = v->final_value)) |
2741 | continue; | |
2742 | ||
2743 | #if 0 | |
2744 | /* Currently, non-reduced/final-value givs are never split. */ | |
2745 | /* Should emit insns after the loop if possible, as the biv final value | |
2746 | code below does. */ | |
2747 | ||
f712a0dc | 2748 | /* If the final value is nonzero, and the giv has not been reduced, |
cdbd2a92 | 2749 | then must emit an instruction to set the final value. */ |
2750 | if (final_value && !v->new_reg) | |
2751 | { | |
2752 | /* Create a new register to hold the value of the giv, and then set | |
2753 | the giv to its final value before the loop start. The giv is set | |
2754 | to its final value before loop start to ensure that this insn | |
2755 | will always be executed, no matter how we exit. */ | |
2756 | tem = gen_reg_rtx (v->mode); | |
89e8d34f | 2757 | loop_insn_hoist (loop, gen_move_insn (tem, v->dest_reg)); |
2758 | loop_insn_hoist (loop, gen_move_insn (v->dest_reg, final_value)); | |
9058b363 | 2759 | |
cdbd2a92 | 2760 | if (loop_dump_stream) |
2761 | fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n", | |
2762 | REGNO (v->dest_reg), REGNO (tem)); | |
9058b363 | 2763 | |
cdbd2a92 | 2764 | v->src_reg = tem; |
2765 | } | |
2766 | #endif | |
2767 | ||
2768 | /* This giv is splittable. If completely unrolling the loop, save the | |
2769 | giv's initial value. Otherwise, save the constant zero for it. */ | |
2770 | ||
2771 | if (unroll_type == UNROLL_COMPLETELY) | |
625294a0 | 2772 | { |
2773 | /* It is not safe to use bl->initial_value here, because it may not | |
2774 | be invariant. It is safe to use the initial value stored in | |
2775 | the splittable_regs array if it is set. In rare cases, it won't | |
2776 | be set, so then we do exactly the same thing as | |
2777 | find_splittable_regs does to get a safe value. */ | |
2778 | rtx biv_initial_value; | |
2779 | ||
2780 | if (splittable_regs[bl->regno]) | |
2781 | biv_initial_value = splittable_regs[bl->regno]; | |
2782 | else if (GET_CODE (bl->initial_value) != REG | |
2783 | || (REGNO (bl->initial_value) != bl->regno | |
2784 | && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER)) | |
2785 | biv_initial_value = bl->initial_value; | |
2786 | else | |
2787 | { | |
2788 | rtx tem = gen_reg_rtx (bl->biv->mode); | |
2789 | ||
1a6c3add | 2790 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
89e8d34f | 2791 | loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg)); |
625294a0 | 2792 | biv_initial_value = tem; |
2793 | } | |
ff57e249 | 2794 | biv_initial_value = extend_value_for_giv (v, biv_initial_value); |
625294a0 | 2795 | value = fold_rtx_mult_add (v->mult_val, biv_initial_value, |
2796 | v->add_val, v->mode); | |
2797 | } | |
cdbd2a92 | 2798 | else |
2799 | value = const0_rtx; | |
2800 | ||
2801 | if (v->new_reg) | |
2802 | { | |
0f0b4dbc | 2803 | /* If a giv was combined with another giv, then we can only split |
2804 | this giv if the giv it was combined with was reduced. This | |
2805 | is because the value of v->new_reg is meaningless in this | |
2806 | case. */ | |
2807 | if (v->same && ! v->same->new_reg) | |
cdbd2a92 | 2808 | { |
2809 | if (loop_dump_stream) | |
2810 | fprintf (loop_dump_stream, | |
0f0b4dbc | 2811 | "giv combined with unreduced giv not split.\n"); |
2812 | continue; | |
cdbd2a92 | 2813 | } |
0f0b4dbc | 2814 | /* If the giv is an address destination, it could be something other |
2815 | than a simple register, these have to be treated differently. */ | |
2816 | else if (v->giv_type == DEST_REG) | |
03cdbc29 | 2817 | { |
2818 | /* If value is not a constant, register, or register plus | |
2819 | constant, then compute its value into a register before | |
67c92c26 | 2820 | loop start. This prevents invalid rtx sharing, and should |
03cdbc29 | 2821 | generate better code. We can use bl->initial_value here |
2822 | instead of splittable_regs[bl->regno] because this code | |
2823 | is going before the loop start. */ | |
2824 | if (unroll_type == UNROLL_COMPLETELY | |
2825 | && GET_CODE (value) != CONST_INT | |
2826 | && GET_CODE (value) != REG | |
2827 | && (GET_CODE (value) != PLUS | |
2828 | || GET_CODE (XEXP (value, 0)) != REG | |
2829 | || GET_CODE (XEXP (value, 1)) != CONST_INT)) | |
2830 | { | |
2831 | rtx tem = gen_reg_rtx (v->mode); | |
1a6c3add | 2832 | record_base_value (REGNO (tem), v->add_val, 0); |
89e8d34f | 2833 | loop_iv_add_mult_hoist (loop, bl->initial_value, v->mult_val, |
2834 | v->add_val, tem); | |
03cdbc29 | 2835 | value = tem; |
2836 | } | |
9058b363 | 2837 | |
b627bae7 | 2838 | splittable_regs[reg_or_subregno (v->new_reg)] = value; |
03cdbc29 | 2839 | } |
cdbd2a92 | 2840 | else |
d6484866 | 2841 | continue; |
cdbd2a92 | 2842 | } |
2843 | else | |
2844 | { | |
2845 | #if 0 | |
2846 | /* Currently, unreduced giv's can't be split. This is not too much | |
2847 | of a problem since unreduced giv's are not live across loop | |
2848 | iterations anyways. When unrolling a loop completely though, | |
2849 | it makes sense to reduce&split givs when possible, as this will | |
2850 | result in simpler instructions, and will not require that a reg | |
2851 | be live across loop iterations. */ | |
9058b363 | 2852 | |
cdbd2a92 | 2853 | splittable_regs[REGNO (v->dest_reg)] = value; |
2854 | fprintf (stderr, "Giv %d at insn %d not reduced\n", | |
2855 | REGNO (v->dest_reg), INSN_UID (v->insn)); | |
2856 | #else | |
2857 | continue; | |
2858 | #endif | |
2859 | } | |
9058b363 | 2860 | |
6232e173 | 2861 | /* Unreduced givs are only updated once by definition. Reduced givs |
2862 | are updated as many times as their biv is. Mark it so if this is | |
cdbd2a92 | 2863 | a splittable register. Don't need to do anything for address givs |
2864 | where this may not be a register. */ | |
2865 | ||
2866 | if (GET_CODE (v->new_reg) == REG) | |
6232e173 | 2867 | { |
2868 | int count = 1; | |
2869 | if (! v->ignore) | |
b995c337 | 2870 | count = REG_IV_CLASS (ivs, REGNO (v->src_reg))->biv_count; |
6232e173 | 2871 | |
b627bae7 | 2872 | splittable_regs_updates[reg_or_subregno (v->new_reg)] = count; |
6232e173 | 2873 | } |
cdbd2a92 | 2874 | |
2875 | result++; | |
9058b363 | 2876 | |
cdbd2a92 | 2877 | if (loop_dump_stream) |
2878 | { | |
2879 | int regnum; | |
9058b363 | 2880 | |
cdbd2a92 | 2881 | if (GET_CODE (v->dest_reg) == CONST_INT) |
2882 | regnum = -1; | |
2883 | else if (GET_CODE (v->dest_reg) != REG) | |
2884 | regnum = REGNO (XEXP (v->dest_reg, 0)); | |
2885 | else | |
2886 | regnum = REGNO (v->dest_reg); | |
2887 | fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n", | |
2888 | regnum, INSN_UID (v->insn)); | |
2889 | } | |
2890 | } | |
2891 | ||
2892 | return result; | |
2893 | } | |
2894 | \f | |
2895 | /* Try to prove that the register is dead after the loop exits. Trace every | |
2896 | loop exit looking for an insn that will always be executed, which sets | |
2897 | the register to some value, and appears before the first use of the register | |
2898 | is found. If successful, then return 1, otherwise return 0. */ | |
2899 | ||
2900 | /* ?? Could be made more intelligent in the handling of jumps, so that | |
2901 | it can search past if statements and other similar structures. */ | |
2902 | ||
2903 | static int | |
15fc3eb7 | 2904 | reg_dead_after_loop (loop, reg) |
2905 | const struct loop *loop; | |
2906 | rtx reg; | |
cdbd2a92 | 2907 | { |
2908 | rtx insn, label; | |
2909 | enum rtx_code code; | |
e45ce1bf | 2910 | int jump_count = 0; |
cdb56c74 | 2911 | int label_count = 0; |
cdb56c74 | 2912 | |
2913 | /* In addition to checking all exits of this loop, we must also check | |
2914 | all exits of inner nested loops that would exit this loop. We don't | |
2915 | have any way to identify those, so we just give up if there are any | |
2916 | such inner loop exits. */ | |
9058b363 | 2917 | |
ec7d7ef9 | 2918 | for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label)) |
cdb56c74 | 2919 | label_count++; |
2920 | ||
ec7d7ef9 | 2921 | if (label_count != loop->exit_count) |
cdb56c74 | 2922 | return 0; |
cdbd2a92 | 2923 | |
2924 | /* HACK: Must also search the loop fall through exit, create a label_ref | |
15fc3eb7 | 2925 | here which points to the loop->end, and append the loop_number_exit_labels |
cdbd2a92 | 2926 | list to it. */ |
15fc3eb7 | 2927 | label = gen_rtx_LABEL_REF (VOIDmode, loop->end); |
ec7d7ef9 | 2928 | LABEL_NEXTREF (label) = loop->exit_labels; |
cdbd2a92 | 2929 | |
6da6f625 | 2930 | for (; label; label = LABEL_NEXTREF (label)) |
cdbd2a92 | 2931 | { |
2932 | /* Succeed if find an insn which sets the biv or if reach end of | |
2933 | function. Fail if find an insn that uses the biv, or if come to | |
2934 | a conditional jump. */ | |
2935 | ||
2936 | insn = NEXT_INSN (XEXP (label, 0)); | |
e45ce1bf | 2937 | while (insn) |
cdbd2a92 | 2938 | { |
e45ce1bf | 2939 | code = GET_CODE (insn); |
2940 | if (GET_RTX_CLASS (code) == 'i') | |
cdbd2a92 | 2941 | { |
e45ce1bf | 2942 | rtx set; |
2943 | ||
2944 | if (reg_referenced_p (reg, PATTERN (insn))) | |
cdbd2a92 | 2945 | return 0; |
e45ce1bf | 2946 | |
2947 | set = single_set (insn); | |
2948 | if (set && rtx_equal_p (SET_DEST (set), reg)) | |
2949 | break; | |
cdbd2a92 | 2950 | } |
e45ce1bf | 2951 | |
cdbd2a92 | 2952 | if (code == JUMP_INSN) |
2953 | { | |
2954 | if (GET_CODE (PATTERN (insn)) == RETURN) | |
2955 | break; | |
b2816317 | 2956 | else if (!any_uncondjump_p (insn) |
a92771b8 | 2957 | /* Prevent infinite loop following infinite loops. */ |
e45ce1bf | 2958 | || jump_count++ > 20) |
cdbd2a92 | 2959 | return 0; |
2960 | else | |
e45ce1bf | 2961 | insn = JUMP_LABEL (insn); |
cdbd2a92 | 2962 | } |
e45ce1bf | 2963 | |
cdbd2a92 | 2964 | insn = NEXT_INSN (insn); |
2965 | } | |
2966 | } | |
2967 | ||
2968 | /* Success, the register is dead on all loop exits. */ | |
2969 | return 1; | |
2970 | } | |
2971 | ||
2972 | /* Try to calculate the final value of the biv, the value it will have at | |
2973 | the end of the loop. If we can do it, return that value. */ | |
9058b363 | 2974 | |
cdbd2a92 | 2975 | rtx |
15fc3eb7 | 2976 | final_biv_value (loop, bl) |
2977 | const struct loop *loop; | |
cdbd2a92 | 2978 | struct iv_class *bl; |
cdbd2a92 | 2979 | { |
15fc3eb7 | 2980 | unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations; |
cdbd2a92 | 2981 | rtx increment, tem; |
2982 | ||
bdf74c8a | 2983 | /* ??? This only works for MODE_INT biv's. Reject all others for now. */ |
2984 | ||
2985 | if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT) | |
2986 | return 0; | |
2987 | ||
cdbd2a92 | 2988 | /* The final value for reversed bivs must be calculated differently than |
6da6f625 | 2989 | for ordinary bivs. In this case, there is already an insn after the |
cdbd2a92 | 2990 | loop which sets this biv's final value (if necessary), and there are |
2991 | no other loop exits, so we can return any value. */ | |
2992 | if (bl->reversed) | |
2993 | { | |
2994 | if (loop_dump_stream) | |
2995 | fprintf (loop_dump_stream, | |
2996 | "Final biv value for %d, reversed biv.\n", bl->regno); | |
9058b363 | 2997 | |
cdbd2a92 | 2998 | return const0_rtx; |
2999 | } | |
3000 | ||
3001 | /* Try to calculate the final value as initial value + (number of iterations | |
3002 | * increment). For this to work, increment must be invariant, the only | |
3003 | exit from the loop must be the fall through at the bottom (otherwise | |
3004 | it may not have its final value when the loop exits), and the initial | |
3005 | value of the biv must be invariant. */ | |
3006 | ||
ec5d911a | 3007 | if (n_iterations != 0 |
15fc3eb7 | 3008 | && ! loop->exit_count |
3009 | && loop_invariant_p (loop, bl->initial_value)) | |
cdbd2a92 | 3010 | { |
15fc3eb7 | 3011 | increment = biv_total_increment (bl); |
9058b363 | 3012 | |
15fc3eb7 | 3013 | if (increment && loop_invariant_p (loop, increment)) |
cdbd2a92 | 3014 | { |
3015 | /* Can calculate the loop exit value, emit insns after loop | |
3016 | end to calculate this value into a temporary register in | |
3017 | case it is needed later. */ | |
3018 | ||
3019 | tem = gen_reg_rtx (bl->biv->mode); | |
1a6c3add | 3020 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
89e8d34f | 3021 | loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations), |
3022 | bl->initial_value, tem); | |
cdbd2a92 | 3023 | |
3024 | if (loop_dump_stream) | |
3025 | fprintf (loop_dump_stream, | |
3026 | "Final biv value for %d, calculated.\n", bl->regno); | |
9058b363 | 3027 | |
cdbd2a92 | 3028 | return tem; |
3029 | } | |
3030 | } | |
3031 | ||
3032 | /* Check to see if the biv is dead at all loop exits. */ | |
15fc3eb7 | 3033 | if (reg_dead_after_loop (loop, bl->biv->src_reg)) |
cdbd2a92 | 3034 | { |
3035 | if (loop_dump_stream) | |
3036 | fprintf (loop_dump_stream, | |
3037 | "Final biv value for %d, biv dead after loop exit.\n", | |
3038 | bl->regno); | |
3039 | ||
3040 | return const0_rtx; | |
3041 | } | |
3042 | ||
3043 | return 0; | |
3044 | } | |
3045 | ||
3046 | /* Try to calculate the final value of the giv, the value it will have at | |
3047 | the end of the loop. If we can do it, return that value. */ | |
3048 | ||
3049 | rtx | |
15fc3eb7 | 3050 | final_giv_value (loop, v) |
3051 | const struct loop *loop; | |
cdbd2a92 | 3052 | struct induction *v; |
cdbd2a92 | 3053 | { |
8ec5f078 | 3054 | struct loop_ivs *ivs = LOOP_IVS (loop); |
cdbd2a92 | 3055 | struct iv_class *bl; |
625294a0 | 3056 | rtx insn; |
cdbd2a92 | 3057 | rtx increment, tem; |
89e8d34f | 3058 | rtx seq; |
15fc3eb7 | 3059 | rtx loop_end = loop->end; |
3060 | unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations; | |
cdbd2a92 | 3061 | |
b995c337 | 3062 | bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); |
cdbd2a92 | 3063 | |
3064 | /* The final value for givs which depend on reversed bivs must be calculated | |
3065 | differently than for ordinary givs. In this case, there is already an | |
3066 | insn after the loop which sets this giv's final value (if necessary), | |
3067 | and there are no other loop exits, so we can return any value. */ | |
3068 | if (bl->reversed) | |
3069 | { | |
3070 | if (loop_dump_stream) | |
3071 | fprintf (loop_dump_stream, | |
3072 | "Final giv value for %d, depends on reversed biv\n", | |
3073 | REGNO (v->dest_reg)); | |
3074 | return const0_rtx; | |
3075 | } | |
3076 | ||
3077 | /* Try to calculate the final value as a function of the biv it depends | |
3078 | upon. The only exit from the loop must be the fall through at the bottom | |
a22e3dfa | 3079 | and the insn that sets the giv must be executed on every iteration |
3080 | (otherwise the giv may not have its final value when the loop exits). */ | |
9058b363 | 3081 | |
cdbd2a92 | 3082 | /* ??? Can calculate the final giv value by subtracting off the |
3083 | extra biv increments times the giv's mult_val. The loop must have | |
3084 | only one exit for this to work, but the loop iterations does not need | |
3085 | to be known. */ | |
3086 | ||
ec5d911a | 3087 | if (n_iterations != 0 |
a22e3dfa | 3088 | && ! loop->exit_count |
3089 | && v->always_executed) | |
cdbd2a92 | 3090 | { |
3091 | /* ?? It is tempting to use the biv's value here since these insns will | |
3092 | be put after the loop, and hence the biv will have its final value | |
3093 | then. However, this fails if the biv is subsequently eliminated. | |
3094 | Perhaps determine whether biv's are eliminable before trying to | |
3095 | determine whether giv's are replaceable so that we can use the | |
3096 | biv value here if it is not eliminable. */ | |
3097 | ||
95b81401 | 3098 | /* We are emitting code after the end of the loop, so we must make |
3099 | sure that bl->initial_value is still valid then. It will still | |
3100 | be valid if it is invariant. */ | |
3101 | ||
15fc3eb7 | 3102 | increment = biv_total_increment (bl); |
cdbd2a92 | 3103 | |
15fc3eb7 | 3104 | if (increment && loop_invariant_p (loop, increment) |
3105 | && loop_invariant_p (loop, bl->initial_value)) | |
cdbd2a92 | 3106 | { |
3107 | /* Can calculate the loop exit value of its biv as | |
ec5d911a | 3108 | (n_iterations * increment) + initial_value */ |
9058b363 | 3109 | |
cdbd2a92 | 3110 | /* The loop exit value of the giv is then |
3111 | (final_biv_value - extra increments) * mult_val + add_val. | |
3112 | The extra increments are any increments to the biv which | |
3113 | occur in the loop after the giv's value is calculated. | |
3114 | We must search from the insn that sets the giv to the end | |
3115 | of the loop to calculate this value. */ | |
3116 | ||
cdbd2a92 | 3117 | /* Put the final biv value in tem. */ |
ff57e249 | 3118 | tem = gen_reg_rtx (v->mode); |
1a6c3add | 3119 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
89e8d34f | 3120 | loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment), |
3121 | GEN_INT (n_iterations), | |
3122 | extend_value_for_giv (v, bl->initial_value), | |
3123 | tem); | |
cdbd2a92 | 3124 | |
3125 | /* Subtract off extra increments as we find them. */ | |
3126 | for (insn = NEXT_INSN (v->insn); insn != loop_end; | |
3127 | insn = NEXT_INSN (insn)) | |
3128 | { | |
625294a0 | 3129 | struct induction *biv; |
3130 | ||
3131 | for (biv = bl->biv; biv; biv = biv->next_iv) | |
3132 | if (biv->insn == insn) | |
3133 | { | |
3134 | start_sequence (); | |
ad99e708 | 3135 | tem = expand_simple_binop (GET_MODE (tem), MINUS, tem, |
3136 | biv->add_val, NULL_RTX, 0, | |
3137 | OPTAB_LIB_WIDEN); | |
31d3e01c | 3138 | seq = get_insns (); |
625294a0 | 3139 | end_sequence (); |
89e8d34f | 3140 | loop_insn_sink (loop, seq); |
625294a0 | 3141 | } |
cdbd2a92 | 3142 | } |
9058b363 | 3143 | |
cdbd2a92 | 3144 | /* Now calculate the giv's final value. */ |
89e8d34f | 3145 | loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem); |
9058b363 | 3146 | |
cdbd2a92 | 3147 | if (loop_dump_stream) |
3148 | fprintf (loop_dump_stream, | |
3149 | "Final giv value for %d, calc from biv's value.\n", | |
3150 | REGNO (v->dest_reg)); | |
3151 | ||
3152 | return tem; | |
3153 | } | |
3154 | } | |
3155 | ||
3156 | /* Replaceable giv's should never reach here. */ | |
3157 | if (v->replaceable) | |
3158 | abort (); | |
3159 | ||
3160 | /* Check to see if the biv is dead at all loop exits. */ | |
15fc3eb7 | 3161 | if (reg_dead_after_loop (loop, v->dest_reg)) |
cdbd2a92 | 3162 | { |
3163 | if (loop_dump_stream) | |
3164 | fprintf (loop_dump_stream, | |
3165 | "Final giv value for %d, giv dead after loop exit.\n", | |
3166 | REGNO (v->dest_reg)); | |
3167 | ||
3168 | return const0_rtx; | |
3169 | } | |
3170 | ||
3171 | return 0; | |
3172 | } | |
3173 | ||
dae39efc | 3174 | /* Look back before LOOP->START for the insn that sets REG and return |
7bb29472 | 3175 | the equivalent constant if there is a REG_EQUAL note otherwise just |
3176 | the SET_SRC of REG. */ | |
3177 | ||
3178 | static rtx | |
15fc3eb7 | 3179 | loop_find_equiv_value (loop, reg) |
3180 | const struct loop *loop; | |
7bb29472 | 3181 | rtx reg; |
3182 | { | |
15fc3eb7 | 3183 | rtx loop_start = loop->start; |
7bb29472 | 3184 | rtx insn, set; |
3185 | rtx ret; | |
9058b363 | 3186 | |
7bb29472 | 3187 | ret = reg; |
6da6f625 | 3188 | for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn)) |
7bb29472 | 3189 | { |
3190 | if (GET_CODE (insn) == CODE_LABEL) | |
3191 | break; | |
9058b363 | 3192 | |
9204e736 | 3193 | else if (INSN_P (insn) && reg_set_p (reg, insn)) |
7bb29472 | 3194 | { |
3195 | /* We found the last insn before the loop that sets the register. | |
3196 | If it sets the entire register, and has a REG_EQUAL note, | |
3197 | then use the value of the REG_EQUAL note. */ | |
3198 | if ((set = single_set (insn)) | |
6da6f625 | 3199 | && (SET_DEST (set) == reg)) |
7bb29472 | 3200 | { |
3201 | rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX); | |
9058b363 | 3202 | |
7bb29472 | 3203 | /* Only use the REG_EQUAL note if it is a constant. |
3204 | Other things, divide in particular, will cause | |
3205 | problems later if we use them. */ | |
3206 | if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST | |
3207 | && CONSTANT_P (XEXP (note, 0))) | |
3208 | ret = XEXP (note, 0); | |
3209 | else | |
3210 | ret = SET_SRC (set); | |
6bc21dca | 3211 | |
3212 | /* We cannot do this if it changes between the | |
3213 | assignment and loop start though. */ | |
3214 | if (modified_between_p (ret, insn, loop_start)) | |
3215 | ret = reg; | |
7bb29472 | 3216 | } |
3217 | break; | |
3218 | } | |
3219 | } | |
3220 | return ret; | |
3221 | } | |
3222 | ||
d888299d | 3223 | /* Return a simplified rtx for the expression OP - REG. |
3224 | ||
3225 | REG must appear in OP, and OP must be a register or the sum of a register | |
3226 | and a second term. | |
3227 | ||
3228 | Thus, the return value must be const0_rtx or the second term. | |
3229 | ||
3230 | The caller is responsible for verifying that REG appears in OP and OP has | |
3231 | the proper form. */ | |
3232 | ||
3233 | static rtx | |
3234 | subtract_reg_term (op, reg) | |
3235 | rtx op, reg; | |
3236 | { | |
3237 | if (op == reg) | |
3238 | return const0_rtx; | |
3239 | if (GET_CODE (op) == PLUS) | |
3240 | { | |
3241 | if (XEXP (op, 0) == reg) | |
3242 | return XEXP (op, 1); | |
3243 | else if (XEXP (op, 1) == reg) | |
3244 | return XEXP (op, 0); | |
3245 | } | |
3246 | /* OP does not contain REG as a term. */ | |
3247 | abort (); | |
3248 | } | |
3249 | ||
d888299d | 3250 | /* Find and return register term common to both expressions OP0 and |
3251 | OP1 or NULL_RTX if no such term exists. Each expression must be a | |
3252 | REG or a PLUS of a REG. */ | |
3253 | ||
3254 | static rtx | |
3255 | find_common_reg_term (op0, op1) | |
3256 | rtx op0, op1; | |
3257 | { | |
3258 | if ((GET_CODE (op0) == REG || GET_CODE (op0) == PLUS) | |
3259 | && (GET_CODE (op1) == REG || GET_CODE (op1) == PLUS)) | |
3260 | { | |
3261 | rtx op00; | |
3262 | rtx op01; | |
3263 | rtx op10; | |
3264 | rtx op11; | |
3265 | ||
3266 | if (GET_CODE (op0) == PLUS) | |
3267 | op01 = XEXP (op0, 1), op00 = XEXP (op0, 0); | |
3268 | else | |
3269 | op01 = const0_rtx, op00 = op0; | |
3270 | ||
3271 | if (GET_CODE (op1) == PLUS) | |
3272 | op11 = XEXP (op1, 1), op10 = XEXP (op1, 0); | |
3273 | else | |
3274 | op11 = const0_rtx, op10 = op1; | |
3275 | ||
3276 | /* Find and return common register term if present. */ | |
3277 | if (REG_P (op00) && (op00 == op10 || op00 == op11)) | |
3278 | return op00; | |
3279 | else if (REG_P (op01) && (op01 == op10 || op01 == op11)) | |
3280 | return op01; | |
3281 | } | |
3282 | ||
3283 | /* No common register term found. */ | |
3284 | return NULL_RTX; | |
3285 | } | |
3286 | ||
b109aca9 | 3287 | /* Determine the loop iterator and calculate the number of loop |
3288 | iterations. Returns the exact number of loop iterations if it can | |
3289 | be calculated, otherwise returns zero. */ | |
cdbd2a92 | 3290 | |
d3115c90 | 3291 | unsigned HOST_WIDE_INT |
ec7d7ef9 | 3292 | loop_iterations (loop) |
3293 | struct loop *loop; | |
cdbd2a92 | 3294 | { |
8ec5f078 | 3295 | struct loop_info *loop_info = LOOP_INFO (loop); |
3296 | struct loop_ivs *ivs = LOOP_IVS (loop); | |
cdbd2a92 | 3297 | rtx comparison, comparison_value; |
3298 | rtx iteration_var, initial_value, increment, final_value; | |
3299 | enum rtx_code comparison_code; | |
9c3da436 | 3300 | HOST_WIDE_INT inc; |
3301 | unsigned HOST_WIDE_INT abs_inc; | |
7bb29472 | 3302 | unsigned HOST_WIDE_INT abs_diff; |
3303 | int off_by_one; | |
d3115c90 | 3304 | int increment_dir; |
7bb29472 | 3305 | int unsigned_p, compare_dir, final_larger; |
cdbd2a92 | 3306 | rtx last_loop_insn; |
d888299d | 3307 | rtx reg_term; |
b109aca9 | 3308 | struct iv_class *bl; |
cdbd2a92 | 3309 | |
7bb29472 | 3310 | loop_info->n_iterations = 0; |
ec5d911a | 3311 | loop_info->initial_value = 0; |
7bb29472 | 3312 | loop_info->initial_equiv_value = 0; |
3313 | loop_info->comparison_value = 0; | |
ec5d911a | 3314 | loop_info->final_value = 0; |
7bb29472 | 3315 | loop_info->final_equiv_value = 0; |
3316 | loop_info->increment = 0; | |
ec5d911a | 3317 | loop_info->iteration_var = 0; |
7bb29472 | 3318 | loop_info->unroll_number = 1; |
b109aca9 | 3319 | loop_info->iv = 0; |
cdbd2a92 | 3320 | |
7bb29472 | 3321 | /* We used to use prev_nonnote_insn here, but that fails because it might |
d3361f22 | 3322 | accidentally get the branch for a contained loop if the branch for this |
3323 | loop was deleted. We can only trust branches immediately before the | |
3324 | loop_end. */ | |
603590db | 3325 | last_loop_insn = PREV_INSN (loop->end); |
cdbd2a92 | 3326 | |
8758222c | 3327 | /* ??? We should probably try harder to find the jump insn |
9058b363 | 3328 | at the end of the loop. The following code assumes that |
8758222c | 3329 | the last loop insn is a jump to the top of the loop. */ |
3330 | if (GET_CODE (last_loop_insn) != JUMP_INSN) | |
3331 | { | |
3332 | if (loop_dump_stream) | |
3333 | fprintf (loop_dump_stream, | |
3334 | "Loop iterations: No final conditional branch found.\n"); | |
3335 | return 0; | |
3336 | } | |
3337 | ||
3338 | /* If there is a more than a single jump to the top of the loop | |
3339 | we cannot (easily) determine the iteration count. */ | |
3340 | if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1) | |
3341 | { | |
3342 | if (loop_dump_stream) | |
3343 | fprintf (loop_dump_stream, | |
3344 | "Loop iterations: Loop has multiple back edges.\n"); | |
3345 | return 0; | |
3346 | } | |
3347 | ||
ef40dec5 | 3348 | /* If there are multiple conditionalized loop exit tests, they may jump |
3349 | back to differing CODE_LABELs. */ | |
3350 | if (loop->top && loop->cont) | |
3351 | { | |
3352 | rtx temp = PREV_INSN (last_loop_insn); | |
3353 | ||
3354 | do | |
3355 | { | |
a3ed7887 | 3356 | if (GET_CODE (temp) == JUMP_INSN) |
ef40dec5 | 3357 | { |
a3ed7887 | 3358 | /* There are some kinds of jumps we can't deal with easily. */ |
3359 | if (JUMP_LABEL (temp) == 0) | |
3360 | { | |
3361 | if (loop_dump_stream) | |
3362 | fprintf | |
3363 | (loop_dump_stream, | |
3364 | "Loop iterations: Jump insn has null JUMP_LABEL.\n"); | |
3365 | return 0; | |
3366 | } | |
3367 | ||
3368 | if (/* Previous unrolling may have generated new insns not | |
3369 | covered by the uid_luid array. */ | |
3370 | INSN_UID (JUMP_LABEL (temp)) < max_uid_for_loop | |
3371 | /* Check if we jump back into the loop body. */ | |
3372 | && INSN_LUID (JUMP_LABEL (temp)) > INSN_LUID (loop->top) | |
3373 | && INSN_LUID (JUMP_LABEL (temp)) < INSN_LUID (loop->cont)) | |
3374 | { | |
3375 | if (loop_dump_stream) | |
6c34d0c2 | 3376 | fprintf |
a3ed7887 | 3377 | (loop_dump_stream, |
3378 | "Loop iterations: Loop has multiple back edges.\n"); | |
3379 | return 0; | |
3380 | } | |
ef40dec5 | 3381 | } |
3382 | } | |
3383 | while ((temp = PREV_INSN (temp)) != loop->cont); | |
3384 | } | |
3385 | ||
8758222c | 3386 | /* Find the iteration variable. If the last insn is a conditional |
3387 | branch, and the insn before tests a register value, make that the | |
3388 | iteration variable. */ | |
9058b363 | 3389 | |
15fc3eb7 | 3390 | comparison = get_condition_for_loop (loop, last_loop_insn); |
cdbd2a92 | 3391 | if (comparison == 0) |
3392 | { | |
3393 | if (loop_dump_stream) | |
3394 | fprintf (loop_dump_stream, | |
8758222c | 3395 | "Loop iterations: No final comparison found.\n"); |
cdbd2a92 | 3396 | return 0; |
3397 | } | |
3398 | ||
3399 | /* ??? Get_condition may switch position of induction variable and | |
3400 | invariant register when it canonicalizes the comparison. */ | |
3401 | ||
3402 | comparison_code = GET_CODE (comparison); | |
3403 | iteration_var = XEXP (comparison, 0); | |
3404 | comparison_value = XEXP (comparison, 1); | |
9058b363 | 3405 | |
cdbd2a92 | 3406 | if (GET_CODE (iteration_var) != REG) |
3407 | { | |
3408 | if (loop_dump_stream) | |
3409 | fprintf (loop_dump_stream, | |
7bb29472 | 3410 | "Loop iterations: Comparison not against register.\n"); |
cdbd2a92 | 3411 | return 0; |
3412 | } | |
3413 | ||
c5aaf45f | 3414 | /* The only new registers that are created before loop iterations |
3415 | are givs made from biv increments or registers created by | |
3416 | load_mems. In the latter case, it is possible that try_copy_prop | |
3417 | will propagate a new pseudo into the old iteration register but | |
3418 | this will be marked by having the REG_USERVAR_P bit set. */ | |
3419 | ||
6f812a51 | 3420 | if ((unsigned) REGNO (iteration_var) >= ivs->n_regs |
c5aaf45f | 3421 | && ! REG_USERVAR_P (iteration_var)) |
ec7d7ef9 | 3422 | abort (); |
cdbd2a92 | 3423 | |
b109aca9 | 3424 | /* Determine the initial value of the iteration variable, and the amount |
3425 | that it is incremented each loop. Use the tables constructed by | |
3426 | the strength reduction pass to calculate these values. */ | |
3427 | ||
3428 | /* Clear the result values, in case no answer can be found. */ | |
3429 | initial_value = 0; | |
3430 | increment = 0; | |
3431 | ||
3432 | /* The iteration variable can be either a giv or a biv. Check to see | |
3433 | which it is, and compute the variable's initial value, and increment | |
3434 | value if possible. */ | |
3435 | ||
3436 | /* If this is a new register, can't handle it since we don't have any | |
3437 | reg_iv_type entry for it. */ | |
6f812a51 | 3438 | if ((unsigned) REGNO (iteration_var) >= ivs->n_regs) |
b109aca9 | 3439 | { |
3440 | if (loop_dump_stream) | |
3441 | fprintf (loop_dump_stream, | |
3442 | "Loop iterations: No reg_iv_type entry for iteration var.\n"); | |
3443 | return 0; | |
3444 | } | |
3445 | ||
3446 | /* Reject iteration variables larger than the host wide int size, since they | |
3447 | could result in a number of iterations greater than the range of our | |
3448 | `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */ | |
3449 | else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var)) | |
3450 | > HOST_BITS_PER_WIDE_INT)) | |
3451 | { | |
3452 | if (loop_dump_stream) | |
3453 | fprintf (loop_dump_stream, | |
3454 | "Loop iterations: Iteration var rejected because mode too large.\n"); | |
3455 | return 0; | |
3456 | } | |
3457 | else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT) | |
3458 | { | |
3459 | if (loop_dump_stream) | |
3460 | fprintf (loop_dump_stream, | |
3461 | "Loop iterations: Iteration var not an integer.\n"); | |
3462 | return 0; | |
3463 | } | |
8ec5f078 | 3464 | else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT) |
b109aca9 | 3465 | { |
d1908618 | 3466 | if (REGNO (iteration_var) >= ivs->n_regs) |
b109aca9 | 3467 | abort (); |
3468 | ||
3469 | /* Grab initial value, only useful if it is a constant. */ | |
b995c337 | 3470 | bl = REG_IV_CLASS (ivs, REGNO (iteration_var)); |
b109aca9 | 3471 | initial_value = bl->initial_value; |
e7d93d2e | 3472 | if (!bl->biv->always_executed || bl->biv->maybe_multiple) |
3473 | { | |
3474 | if (loop_dump_stream) | |
3475 | fprintf (loop_dump_stream, | |
3476 | "Loop iterations: Basic induction var not set once in each iteration.\n"); | |
3477 | return 0; | |
3478 | } | |
b109aca9 | 3479 | |
3480 | increment = biv_total_increment (bl); | |
3481 | } | |
8ec5f078 | 3482 | else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT) |
b109aca9 | 3483 | { |
3484 | HOST_WIDE_INT offset = 0; | |
8ec5f078 | 3485 | struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var)); |
b109aca9 | 3486 | rtx biv_initial_value; |
3487 | ||
d1908618 | 3488 | if (REGNO (v->src_reg) >= ivs->n_regs) |
b109aca9 | 3489 | abort (); |
3490 | ||
e7d93d2e | 3491 | if (!v->always_executed || v->maybe_multiple) |
3492 | { | |
3493 | if (loop_dump_stream) | |
3494 | fprintf (loop_dump_stream, | |
3495 | "Loop iterations: General induction var not set once in each iteration.\n"); | |
3496 | return 0; | |
3497 | } | |
3498 | ||
b995c337 | 3499 | bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); |
b109aca9 | 3500 | |
3501 | /* Increment value is mult_val times the increment value of the biv. */ | |
3502 | ||
3503 | increment = biv_total_increment (bl); | |
3504 | if (increment) | |
3505 | { | |
3506 | struct induction *biv_inc; | |
3507 | ||
deb2362f | 3508 | increment = fold_rtx_mult_add (v->mult_val, |
3509 | extend_value_for_giv (v, increment), | |
3510 | const0_rtx, v->mode); | |
424da949 | 3511 | /* The caller assumes that one full increment has occurred at the |
b109aca9 | 3512 | first loop test. But that's not true when the biv is incremented |
3513 | after the giv is set (which is the usual case), e.g.: | |
3514 | i = 6; do {;} while (i++ < 9) . | |
3515 | Therefore, we bias the initial value by subtracting the amount of | |
3516 | the increment that occurs between the giv set and the giv test. */ | |
3517 | for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv) | |
3518 | { | |
3519 | if (loop_insn_first_p (v->insn, biv_inc->insn)) | |
3a64bedc | 3520 | { |
3521 | if (REG_P (biv_inc->add_val)) | |
3522 | { | |
3523 | if (loop_dump_stream) | |
3524 | fprintf (loop_dump_stream, | |
3525 | "Loop iterations: Basic induction var add_val is REG %d.\n", | |
3526 | REGNO (biv_inc->add_val)); | |
3527 | return 0; | |
3528 | } | |
3529 | ||
3530 | offset -= INTVAL (biv_inc->add_val); | |
3531 | } | |
b109aca9 | 3532 | } |
b109aca9 | 3533 | } |
3534 | if (loop_dump_stream) | |
3535 | fprintf (loop_dump_stream, | |
3536 | "Loop iterations: Giv iterator, initial value bias %ld.\n", | |
3537 | (long) offset); | |
3538 | ||
3539 | /* Initial value is mult_val times the biv's initial value plus | |
3540 | add_val. Only useful if it is a constant. */ | |
3541 | biv_initial_value = extend_value_for_giv (v, bl->initial_value); | |
3542 | initial_value | |
3543 | = fold_rtx_mult_add (v->mult_val, | |
3544 | plus_constant (biv_initial_value, offset), | |
3545 | v->add_val, v->mode); | |
3546 | } | |
3547 | else | |
3548 | { | |
3549 | if (loop_dump_stream) | |
3550 | fprintf (loop_dump_stream, | |
3551 | "Loop iterations: Not basic or general induction var.\n"); | |
3552 | return 0; | |
3553 | } | |
15fc3eb7 | 3554 | |
cdbd2a92 | 3555 | if (initial_value == 0) |
cdbd2a92 | 3556 | return 0; |
3557 | ||
7bb29472 | 3558 | unsigned_p = 0; |
3559 | off_by_one = 0; | |
3560 | switch (comparison_code) | |
3561 | { | |
3562 | case LEU: | |
3563 | unsigned_p = 1; | |
3564 | case LE: | |
3565 | compare_dir = 1; | |
3566 | off_by_one = 1; | |
3567 | break; | |
3568 | case GEU: | |
3569 | unsigned_p = 1; | |
3570 | case GE: | |
3571 | compare_dir = -1; | |
3572 | off_by_one = -1; | |
3573 | break; | |
3574 | case EQ: | |
3575 | /* Cannot determine loop iterations with this case. */ | |
3576 | compare_dir = 0; | |
3577 | break; | |
3578 | case LTU: | |
3579 | unsigned_p = 1; | |
3580 | case LT: | |
3581 | compare_dir = 1; | |
3582 | break; | |
3583 | case GTU: | |
3584 | unsigned_p = 1; | |
3585 | case GT: | |
3586 | compare_dir = -1; | |
3587 | case NE: | |
3588 | compare_dir = 0; | |
3589 | break; | |
3590 | default: | |
3591 | abort (); | |
3592 | } | |
3593 | ||
cdbd2a92 | 3594 | /* If the comparison value is an invariant register, then try to find |
3595 | its value from the insns before the start of the loop. */ | |
3596 | ||
7bb29472 | 3597 | final_value = comparison_value; |
6da6f625 | 3598 | if (GET_CODE (comparison_value) == REG |
15fc3eb7 | 3599 | && loop_invariant_p (loop, comparison_value)) |
cdbd2a92 | 3600 | { |
15fc3eb7 | 3601 | final_value = loop_find_equiv_value (loop, comparison_value); |
3602 | ||
7bb29472 | 3603 | /* If we don't get an invariant final value, we are better |
3604 | off with the original register. */ | |
15fc3eb7 | 3605 | if (! loop_invariant_p (loop, final_value)) |
7bb29472 | 3606 | final_value = comparison_value; |
3607 | } | |
3608 | ||
3609 | /* Calculate the approximate final value of the induction variable | |
3610 | (on the last successful iteration). The exact final value | |
3611 | depends on the branch operator, and increment sign. It will be | |
3612 | wrong if the iteration variable is not incremented by one each | |
3613 | time through the loop and (comparison_value + off_by_one - | |
3614 | initial_value) % increment != 0. | |
3615 | ??? Note that the final_value may overflow and thus final_larger | |
3616 | will be bogus. A potentially infinite loop will be classified | |
3617 | as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */ | |
3618 | if (off_by_one) | |
3619 | final_value = plus_constant (final_value, off_by_one); | |
cdbd2a92 | 3620 | |
3621 | /* Save the calculated values describing this loop's bounds, in case | |
3622 | precondition_loop_p will need them later. These values can not be | |
3623 | recalculated inside precondition_loop_p because strength reduction | |
9058b363 | 3624 | optimizations may obscure the loop's structure. |
cdbd2a92 | 3625 | |
7bb29472 | 3626 | These values are only required by precondition_loop_p and insert_bct |
3627 | whenever the number of iterations cannot be computed at compile time. | |
3628 | Only the difference between final_value and initial_value is | |
3629 | important. Note that final_value is only approximate. */ | |
ec5d911a | 3630 | loop_info->initial_value = initial_value; |
7bb29472 | 3631 | loop_info->comparison_value = comparison_value; |
3632 | loop_info->final_value = plus_constant (comparison_value, off_by_one); | |
ec5d911a | 3633 | loop_info->increment = increment; |
7bb29472 | 3634 | loop_info->iteration_var = iteration_var; |
ec5d911a | 3635 | loop_info->comparison_code = comparison_code; |
b109aca9 | 3636 | loop_info->iv = bl; |
cdbd2a92 | 3637 | |
d888299d | 3638 | /* Try to determine the iteration count for loops such |
3639 | as (for i = init; i < init + const; i++). When running the | |
3640 | loop optimization twice, the first pass often converts simple | |
3641 | loops into this form. */ | |
7bb29472 | 3642 | |
d888299d | 3643 | if (REG_P (initial_value)) |
7bb29472 | 3644 | { |
d888299d | 3645 | rtx reg1; |
3646 | rtx reg2; | |
3647 | rtx const2; | |
7bb29472 | 3648 | |
d888299d | 3649 | reg1 = initial_value; |
7bb29472 | 3650 | if (GET_CODE (final_value) == PLUS) |
d888299d | 3651 | reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1); |
7bb29472 | 3652 | else |
d888299d | 3653 | reg2 = final_value, const2 = const0_rtx; |
7bb29472 | 3654 | |
d888299d | 3655 | /* Check for initial_value = reg1, final_value = reg2 + const2, |
3656 | where reg1 != reg2. */ | |
3657 | if (REG_P (reg2) && reg2 != reg1) | |
7bb29472 | 3658 | { |
d888299d | 3659 | rtx temp; |
3660 | ||
3661 | /* Find what reg1 is equivalent to. Hopefully it will | |
3662 | either be reg2 or reg2 plus a constant. */ | |
15fc3eb7 | 3663 | temp = loop_find_equiv_value (loop, reg1); |
3664 | ||
d888299d | 3665 | if (find_common_reg_term (temp, reg2)) |
3666 | initial_value = temp; | |
3667 | else | |
3668 | { | |
3669 | /* Find what reg2 is equivalent to. Hopefully it will | |
3670 | either be reg1 or reg1 plus a constant. Let's ignore | |
3671 | the latter case for now since it is not so common. */ | |
15fc3eb7 | 3672 | temp = loop_find_equiv_value (loop, reg2); |
3673 | ||
d888299d | 3674 | if (temp == loop_info->iteration_var) |
3675 | temp = initial_value; | |
3676 | if (temp == reg1) | |
3677 | final_value = (const2 == const0_rtx) | |
3678 | ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2); | |
3679 | } | |
7bb29472 | 3680 | } |
ec7d7ef9 | 3681 | else if (loop->vtop && GET_CODE (reg2) == CONST_INT) |
7bb29472 | 3682 | { |
d888299d | 3683 | rtx temp; |
3684 | ||
6da6f625 | 3685 | /* When running the loop optimizer twice, check_dbra_loop |
3686 | further obfuscates reversible loops of the form: | |
3687 | for (i = init; i < init + const; i++). We often end up with | |
3688 | final_value = 0, initial_value = temp, temp = temp2 - init, | |
3689 | where temp2 = init + const. If the loop has a vtop we | |
3690 | can replace initial_value with const. */ | |
d888299d | 3691 | |
15fc3eb7 | 3692 | temp = loop_find_equiv_value (loop, reg1); |
3693 | ||
d888299d | 3694 | if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0))) |
3695 | { | |
15fc3eb7 | 3696 | rtx temp2 = loop_find_equiv_value (loop, XEXP (temp, 0)); |
3697 | ||
d888299d | 3698 | if (GET_CODE (temp2) == PLUS |
3699 | && XEXP (temp2, 0) == XEXP (temp, 1)) | |
3700 | initial_value = XEXP (temp2, 1); | |
3701 | } | |
7bb29472 | 3702 | } |
3703 | } | |
9058b363 | 3704 | |
d888299d | 3705 | /* If have initial_value = reg + const1 and final_value = reg + |
3706 | const2, then replace initial_value with const1 and final_value | |
3707 | with const2. This should be safe since we are protected by the | |
3708 | initial comparison before entering the loop if we have a vtop. | |
3709 | For example, a + b < a + c is not equivalent to b < c for all a | |
3710 | when using modulo arithmetic. | |
3711 | ||
3712 | ??? Without a vtop we could still perform the optimization if we check | |
3713 | the initial and final values carefully. */ | |
ec7d7ef9 | 3714 | if (loop->vtop |
d888299d | 3715 | && (reg_term = find_common_reg_term (initial_value, final_value))) |
3716 | { | |
3717 | initial_value = subtract_reg_term (initial_value, reg_term); | |
3718 | final_value = subtract_reg_term (final_value, reg_term); | |
3719 | } | |
3720 | ||
7bb29472 | 3721 | loop_info->initial_equiv_value = initial_value; |
3722 | loop_info->final_equiv_value = final_value; | |
9058b363 | 3723 | |
94836c40 | 3724 | /* For EQ comparison loops, we don't have a valid final value. |
3725 | Check this now so that we won't leave an invalid value if we | |
3726 | return early for any other reason. */ | |
3727 | if (comparison_code == EQ) | |
6da6f625 | 3728 | loop_info->final_equiv_value = loop_info->final_value = 0; |
94836c40 | 3729 | |
a3254dd2 | 3730 | if (increment == 0) |
3731 | { | |
3732 | if (loop_dump_stream) | |
3733 | fprintf (loop_dump_stream, | |
7bb29472 | 3734 | "Loop iterations: Increment value can't be calculated.\n"); |
a3254dd2 | 3735 | return 0; |
3736 | } | |
7bb29472 | 3737 | |
3738 | if (GET_CODE (increment) != CONST_INT) | |
a3254dd2 | 3739 | { |
02f1afa5 | 3740 | /* If we have a REG, check to see if REG holds a constant value. */ |
3741 | /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't | |
3742 | clear if it is worthwhile to try to handle such RTL. */ | |
3743 | if (GET_CODE (increment) == REG || GET_CODE (increment) == SUBREG) | |
15fc3eb7 | 3744 | increment = loop_find_equiv_value (loop, increment); |
7bb29472 | 3745 | |
3746 | if (GET_CODE (increment) != CONST_INT) | |
3747 | { | |
3748 | if (loop_dump_stream) | |
3749 | { | |
3750 | fprintf (loop_dump_stream, | |
3751 | "Loop iterations: Increment value not constant "); | |
bb6f5cc9 | 3752 | print_simple_rtl (loop_dump_stream, increment); |
7bb29472 | 3753 | fprintf (loop_dump_stream, ".\n"); |
3754 | } | |
3755 | return 0; | |
3756 | } | |
3757 | loop_info->increment = increment; | |
a3254dd2 | 3758 | } |
7bb29472 | 3759 | |
3760 | if (GET_CODE (initial_value) != CONST_INT) | |
a3254dd2 | 3761 | { |
3762 | if (loop_dump_stream) | |
7bb29472 | 3763 | { |
3764 | fprintf (loop_dump_stream, | |
3765 | "Loop iterations: Initial value not constant "); | |
bb6f5cc9 | 3766 | print_simple_rtl (loop_dump_stream, initial_value); |
7bb29472 | 3767 | fprintf (loop_dump_stream, ".\n"); |
3768 | } | |
a3254dd2 | 3769 | return 0; |
3770 | } | |
cdbd2a92 | 3771 | else if (GET_CODE (final_value) != CONST_INT) |
3772 | { | |
3773 | if (loop_dump_stream) | |
7bb29472 | 3774 | { |
3775 | fprintf (loop_dump_stream, | |
3776 | "Loop iterations: Final value not constant "); | |
bb6f5cc9 | 3777 | print_simple_rtl (loop_dump_stream, final_value); |
7bb29472 | 3778 | fprintf (loop_dump_stream, ".\n"); |
3779 | } | |
cdbd2a92 | 3780 | return 0; |
3781 | } | |
e10235cc | 3782 | else if (comparison_code == EQ) |
3783 | { | |
3784 | rtx inc_once; | |
3785 | ||
3786 | if (loop_dump_stream) | |
3787 | fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n"); | |
3788 | ||
3789 | inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment), | |
3790 | GET_MODE (iteration_var)); | |
3791 | ||
3792 | if (inc_once == final_value) | |
3793 | { | |
3794 | /* The iterator value once through the loop is equal to the | |
3795 | comparision value. Either we have an infinite loop, or | |
3796 | we'll loop twice. */ | |
3797 | if (increment == const0_rtx) | |
3798 | return 0; | |
3799 | loop_info->n_iterations = 2; | |
3800 | } | |
3801 | else | |
3802 | loop_info->n_iterations = 1; | |
3803 | ||
3804 | if (GET_CODE (loop_info->initial_value) == CONST_INT) | |
3805 | loop_info->final_value | |
3806 | = gen_int_mode ((INTVAL (loop_info->initial_value) | |
3807 | + loop_info->n_iterations * INTVAL (increment)), | |
3808 | GET_MODE (iteration_var)); | |
3809 | else | |
3810 | loop_info->final_value | |
3811 | = plus_constant (loop_info->initial_value, | |
3812 | loop_info->n_iterations * INTVAL (increment)); | |
3813 | loop_info->final_equiv_value | |
3814 | = gen_int_mode ((INTVAL (initial_value) | |
3815 | + loop_info->n_iterations * INTVAL (increment)), | |
3816 | GET_MODE (iteration_var)); | |
3817 | return loop_info->n_iterations; | |
3818 | } | |
cdbd2a92 | 3819 | |
cdbd2a92 | 3820 | /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */ |
7bb29472 | 3821 | if (unsigned_p) |
cdbd2a92 | 3822 | final_larger |
d3115c90 | 3823 | = ((unsigned HOST_WIDE_INT) INTVAL (final_value) |
3824 | > (unsigned HOST_WIDE_INT) INTVAL (initial_value)) | |
3825 | - ((unsigned HOST_WIDE_INT) INTVAL (final_value) | |
3826 | < (unsigned HOST_WIDE_INT) INTVAL (initial_value)); | |
cdbd2a92 | 3827 | else |
d3115c90 | 3828 | final_larger = (INTVAL (final_value) > INTVAL (initial_value)) |
3829 | - (INTVAL (final_value) < INTVAL (initial_value)); | |
cdbd2a92 | 3830 | |
3831 | if (INTVAL (increment) > 0) | |
3832 | increment_dir = 1; | |
3833 | else if (INTVAL (increment) == 0) | |
3834 | increment_dir = 0; | |
3835 | else | |
3836 | increment_dir = -1; | |
3837 | ||
3838 | /* There are 27 different cases: compare_dir = -1, 0, 1; | |
3839 | final_larger = -1, 0, 1; increment_dir = -1, 0, 1. | |
3840 | There are 4 normal cases, 4 reverse cases (where the iteration variable | |
3841 | will overflow before the loop exits), 4 infinite loop cases, and 15 | |
3842 | immediate exit (0 or 1 iteration depending on loop type) cases. | |
3843 | Only try to optimize the normal cases. */ | |
9058b363 | 3844 | |
cdbd2a92 | 3845 | /* (compare_dir/final_larger/increment_dir) |
3846 | Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1) | |
3847 | Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1) | |
3848 | Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0) | |
3849 | Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */ | |
3850 | ||
3851 | /* ?? If the meaning of reverse loops (where the iteration variable | |
3852 | will overflow before the loop exits) is undefined, then could | |
3853 | eliminate all of these special checks, and just always assume | |
3854 | the loops are normal/immediate/infinite. Note that this means | |
3855 | the sign of increment_dir does not have to be known. Also, | |
3856 | since it does not really hurt if immediate exit loops or infinite loops | |
3857 | are optimized, then that case could be ignored also, and hence all | |
3858 | loops can be optimized. | |
3859 | ||
3860 | According to ANSI Spec, the reverse loop case result is undefined, | |
3861 | because the action on overflow is undefined. | |
3862 | ||
3863 | See also the special test for NE loops below. */ | |
3864 | ||
3865 | if (final_larger == increment_dir && final_larger != 0 | |
3866 | && (final_larger == compare_dir || compare_dir == 0)) | |
3867 | /* Normal case. */ | |
3868 | ; | |
3869 | else | |
3870 | { | |
3871 | if (loop_dump_stream) | |
6da6f625 | 3872 | fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n"); |
cdbd2a92 | 3873 | return 0; |
3874 | } | |
3875 | ||
3876 | /* Calculate the number of iterations, final_value is only an approximation, | |
7bb29472 | 3877 | so correct for that. Note that abs_diff and n_iterations are |
cdbd2a92 | 3878 | unsigned, because they can be as large as 2^n - 1. */ |
3879 | ||
9c3da436 | 3880 | inc = INTVAL (increment); |
3881 | if (inc > 0) | |
3882 | { | |
3883 | abs_diff = INTVAL (final_value) - INTVAL (initial_value); | |
3884 | abs_inc = inc; | |
3885 | } | |
3886 | else if (inc < 0) | |
cdbd2a92 | 3887 | { |
7bb29472 | 3888 | abs_diff = INTVAL (initial_value) - INTVAL (final_value); |
9c3da436 | 3889 | abs_inc = -inc; |
cdbd2a92 | 3890 | } |
3891 | else | |
3892 | abort (); | |
3893 | ||
9c3da436 | 3894 | /* Given that iteration_var is going to iterate over its own mode, |
3895 | not HOST_WIDE_INT, disregard higher bits that might have come | |
3896 | into the picture due to sign extension of initial and final | |
3897 | values. */ | |
ac0c7fb1 | 3898 | abs_diff &= ((unsigned HOST_WIDE_INT) 1 |
9c3da436 | 3899 | << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1) |
3900 | << 1) - 1; | |
213b27c9 | 3901 | |
7bb29472 | 3902 | /* For NE tests, make sure that the iteration variable won't miss |
3903 | the final value. If abs_diff mod abs_incr is not zero, then the | |
3904 | iteration variable will overflow before the loop exits, and we | |
3905 | can not calculate the number of iterations. */ | |
3906 | if (compare_dir == 0 && (abs_diff % abs_inc) != 0) | |
cdbd2a92 | 3907 | return 0; |
3908 | ||
7bb29472 | 3909 | /* Note that the number of iterations could be calculated using |
3910 | (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to | |
3911 | handle potential overflow of the summation. */ | |
3912 | loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0); | |
3913 | return loop_info->n_iterations; | |
cdbd2a92 | 3914 | } |
ee2ce5c0 | 3915 | |
c3418f42 | 3916 | /* Replace uses of split bivs with their split pseudo register. This is |
ee2ce5c0 | 3917 | for original instructions which remain after loop unrolling without |
3918 | copying. */ | |
3919 | ||
3920 | static rtx | |
8ec5f078 | 3921 | remap_split_bivs (loop, x) |
3922 | struct loop *loop; | |
ee2ce5c0 | 3923 | rtx x; |
3924 | { | |
8ec5f078 | 3925 | struct loop_ivs *ivs = LOOP_IVS (loop); |
19cb6b50 | 3926 | enum rtx_code code; |
3927 | int i; | |
3928 | const char *fmt; | |
ee2ce5c0 | 3929 | |
3930 | if (x == 0) | |
3931 | return x; | |
3932 | ||
3933 | code = GET_CODE (x); | |
3934 | switch (code) | |
3935 | { | |
3936 | case SCRATCH: | |
3937 | case PC: | |
3938 | case CC0: | |
3939 | case CONST_INT: | |
3940 | case CONST_DOUBLE: | |
3941 | case CONST: | |
3942 | case SYMBOL_REF: | |
3943 | case LABEL_REF: | |
3944 | return x; | |
3945 | ||
3946 | case REG: | |
3947 | #if 0 | |
3948 | /* If non-reduced/final-value givs were split, then this would also | |
3949 | have to remap those givs also. */ | |
3950 | #endif | |
d1908618 | 3951 | if (REGNO (x) < ivs->n_regs |
8ec5f078 | 3952 | && REG_IV_TYPE (ivs, REGNO (x)) == BASIC_INDUCT) |
b995c337 | 3953 | return REG_IV_CLASS (ivs, REGNO (x))->biv->src_reg; |
0dbd1c74 | 3954 | break; |
9058b363 | 3955 | |
0dbd1c74 | 3956 | default: |
3957 | break; | |
ee2ce5c0 | 3958 | } |
3959 | ||
3960 | fmt = GET_RTX_FORMAT (code); | |
3961 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
3962 | { | |
3963 | if (fmt[i] == 'e') | |
8ec5f078 | 3964 | XEXP (x, i) = remap_split_bivs (loop, XEXP (x, i)); |
1bd8ca86 | 3965 | else if (fmt[i] == 'E') |
ee2ce5c0 | 3966 | { |
19cb6b50 | 3967 | int j; |
ee2ce5c0 | 3968 | for (j = 0; j < XVECLEN (x, i); j++) |
8ec5f078 | 3969 | XVECEXP (x, i, j) = remap_split_bivs (loop, XVECEXP (x, i, j)); |
ee2ce5c0 | 3970 | } |
3971 | } | |
3972 | return x; | |
3973 | } | |
e5530f5b | 3974 | |
3975 | /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g. | |
3976 | FIST_UID is always executed if LAST_UID is), then return 1. Otherwise | |
3977 | return 0. COPY_START is where we can start looking for the insns | |
3978 | FIRST_UID and LAST_UID. COPY_END is where we stop looking for these | |
3979 | insns. | |
3980 | ||
3981 | If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID | |
3982 | must dominate LAST_UID. | |
3983 | ||
3984 | If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID | |
3985 | may not dominate LAST_UID. | |
3986 | ||
3987 | If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID | |
3988 | must dominate LAST_UID. */ | |
3989 | ||
3990 | int | |
3991 | set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end) | |
3992 | int regno; | |
3993 | int first_uid; | |
3994 | int last_uid; | |
3995 | rtx copy_start; | |
3996 | rtx copy_end; | |
3997 | { | |
3998 | int passed_jump = 0; | |
3999 | rtx p = NEXT_INSN (copy_start); | |
4000 | ||
4001 | while (INSN_UID (p) != first_uid) | |
4002 | { | |
4003 | if (GET_CODE (p) == JUMP_INSN) | |
6da6f625 | 4004 | passed_jump = 1; |
e5530f5b | 4005 | /* Could not find FIRST_UID. */ |
4006 | if (p == copy_end) | |
4007 | return 0; | |
4008 | p = NEXT_INSN (p); | |
4009 | } | |
4010 | ||
4011 | /* Verify that FIRST_UID is an insn that entirely sets REGNO. */ | |
9204e736 | 4012 | if (! INSN_P (p) || ! dead_or_set_regno_p (p, regno)) |
e5530f5b | 4013 | return 0; |
4014 | ||
4015 | /* FIRST_UID is always executed. */ | |
4016 | if (passed_jump == 0) | |
4017 | return 1; | |
4018 | ||
4019 | while (INSN_UID (p) != last_uid) | |
4020 | { | |
4021 | /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we | |
4022 | can not be sure that FIRST_UID dominates LAST_UID. */ | |
4023 | if (GET_CODE (p) == CODE_LABEL) | |
4024 | return 0; | |
51d2c1df | 4025 | /* Could not find LAST_UID, but we reached the end of the loop, so |
4026 | it must be safe. */ | |
4027 | else if (p == copy_end) | |
4028 | return 1; | |
e5530f5b | 4029 | p = NEXT_INSN (p); |
4030 | } | |
4031 | ||
4032 | /* FIRST_UID is always executed if LAST_UID is executed. */ | |
4033 | return 1; | |
4034 | } | |
3ff5e1f7 | 4035 | |
4036 | /* This routine is called when the number of iterations for the unrolled | |
4037 | loop is one. The goal is to identify a loop that begins with an | |
4038 | unconditional branch to the loop continuation note (or a label just after). | |
4039 | In this case, the unconditional branch that starts the loop needs to be | |
4040 | deleted so that we execute the single iteration. */ | |
6da6f625 | 4041 | |
3ff5e1f7 | 4042 | static rtx |
4043 | ujump_to_loop_cont (loop_start, loop_cont) | |
6da6f625 | 4044 | rtx loop_start; |
4045 | rtx loop_cont; | |
3ff5e1f7 | 4046 | { |
4047 | rtx x, label, label_ref; | |
4048 | ||
4049 | /* See if loop start, or the next insn is an unconditional jump. */ | |
4050 | loop_start = next_nonnote_insn (loop_start); | |
4051 | ||
4052 | x = pc_set (loop_start); | |
4053 | if (!x) | |
4054 | return NULL_RTX; | |
4055 | ||
4056 | label_ref = SET_SRC (x); | |
4057 | if (!label_ref) | |
4058 | return NULL_RTX; | |
4059 | ||
4060 | /* Examine insn after loop continuation note. Return if not a label. */ | |
4061 | label = next_nonnote_insn (loop_cont); | |
4062 | if (label == 0 || GET_CODE (label) != CODE_LABEL) | |
4063 | return NULL_RTX; | |
4064 | ||
4065 | /* Return the loop start if the branch label matches the code label. */ | |
6da6f625 | 4066 | if (CODE_LABEL_NUMBER (label) == CODE_LABEL_NUMBER (XEXP (label_ref, 0))) |
3ff5e1f7 | 4067 | return loop_start; |
4068 | else | |
4069 | return NULL_RTX; | |
3ff5e1f7 | 4070 | } |