]>
Commit | Line | Data |
---|---|---|
67f2de41 | 1 | /* Try to unroll loops, and split induction variables. |
2e1eedd6 | 2 | Copyright (C) 1992, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, |
ec8e098d | 3 | 2002, 2003, 2004 |
a86dc4a3 | 4 | Free Software Foundation, Inc. |
67f2de41 RK |
5 | Contributed by James E. Wilson, Cygnus Support/UC Berkeley. |
6 | ||
1322177d | 7 | This file is part of GCC. |
67f2de41 | 8 | |
1322177d LB |
9 | GCC is free software; you can redistribute it and/or modify it under |
10 | the terms of the GNU General Public License as published by the Free | |
11 | Software Foundation; either version 2, or (at your option) any later | |
12 | version. | |
67f2de41 | 13 | |
1322177d LB |
14 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 | for more details. | |
67f2de41 RK |
18 | |
19 | You should have received a copy of the GNU General Public License | |
1322177d LB |
20 | along with GCC; see the file COPYING. If not, write to the Free |
21 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
22 | 02111-1307, USA. */ | |
67f2de41 RK |
23 | |
24 | /* Try to unroll a loop, and split induction variables. | |
25 | ||
26 | Loops for which the number of iterations can be calculated exactly are | |
27 | handled specially. If the number of iterations times the insn_count is | |
28 | less than MAX_UNROLLED_INSNS, then the loop is unrolled completely. | |
29 | Otherwise, we try to unroll the loop a number of times modulo the number | |
30 | of iterations, so that only one exit test will be needed. It is unrolled | |
31 | a number of times approximately equal to MAX_UNROLLED_INSNS divided by | |
32 | the insn count. | |
33 | ||
34 | Otherwise, if the number of iterations can be calculated exactly at | |
35 | run time, and the loop is always entered at the top, then we try to | |
36 | precondition the loop. That is, at run time, calculate how many times | |
37 | the loop will execute, and then execute the loop body a few times so | |
38 | that the remaining iterations will be some multiple of 4 (or 2 if the | |
39 | loop is large). Then fall through to a loop unrolled 4 (or 2) times, | |
40 | with only one exit test needed at the end of the loop. | |
41 | ||
42 | Otherwise, if the number of iterations can not be calculated exactly, | |
43 | not even at run time, then we still unroll the loop a number of times | |
44 | approximately equal to MAX_UNROLLED_INSNS divided by the insn count, | |
45 | but there must be an exit test after each copy of the loop body. | |
46 | ||
47 | For each induction variable, which is dead outside the loop (replaceable) | |
48 | or for which we can easily calculate the final value, if we can easily | |
49 | calculate its value at each place where it is set as a function of the | |
50 | current loop unroll count and the variable's value at loop entry, then | |
51 | the induction variable is split into `N' different variables, one for | |
52 | each copy of the loop body. One variable is live across the backward | |
53 | branch, and the others are all calculated as a function of this variable. | |
54 | This helps eliminate data dependencies, and leads to further opportunities | |
55 | for cse. */ | |
56 | ||
57 | /* Possible improvements follow: */ | |
58 | ||
59 | /* ??? Add an extra pass somewhere to determine whether unrolling will | |
60 | give any benefit. E.g. after generating all unrolled insns, compute the | |
61 | cost of all insns and compare against cost of insns in rolled loop. | |
62 | ||
63 | - On traditional architectures, unrolling a non-constant bound loop | |
64 | is a win if there is a giv whose only use is in memory addresses, the | |
6dc42e49 | 65 | memory addresses can be split, and hence giv increments can be |
67f2de41 RK |
66 | eliminated. |
67 | - It is also a win if the loop is executed many times, and preconditioning | |
68 | can be performed for the loop. | |
69 | Add code to check for these and similar cases. */ | |
70 | ||
71 | /* ??? Improve control of which loops get unrolled. Could use profiling | |
72 | info to only unroll the most commonly executed loops. Perhaps have | |
14b493d6 | 73 | a user specifiable option to control the amount of code expansion, |
67f2de41 RK |
74 | or the percent of loops to consider for unrolling. Etc. */ |
75 | ||
76 | /* ??? Look at the register copies inside the loop to see if they form a | |
77 | simple permutation. If so, iterate the permutation until it gets back to | |
78 | the start state. This is how many times we should unroll the loop, for | |
79 | best results, because then all register copies can be eliminated. | |
80 | For example, the lisp nreverse function should be unrolled 3 times | |
81 | while (this) | |
82 | { | |
83 | next = this->cdr; | |
84 | this->cdr = prev; | |
85 | prev = this; | |
86 | this = next; | |
87 | } | |
88 | ||
89 | ??? The number of times to unroll the loop may also be based on data | |
90 | references in the loop. For example, if we have a loop that references | |
91 | x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */ | |
92 | ||
93 | /* ??? Add some simple linear equation solving capability so that we can | |
94 | determine the number of loop iterations for more complex loops. | |
95 | For example, consider this loop from gdb | |
96 | #define SWAP_TARGET_AND_HOST(buffer,len) | |
97 | { | |
98 | char tmp; | |
99 | char *p = (char *) buffer; | |
100 | char *q = ((char *) buffer) + len - 1; | |
101 | int iterations = (len + 1) >> 1; | |
102 | int i; | |
103 | for (p; p < q; p++, q--;) | |
a5af23fe R |
104 | { |
105 | tmp = *q; | |
106 | *q = *p; | |
107 | *p = tmp; | |
108 | } | |
67f2de41 RK |
109 | } |
110 | Note that: | |
111 | start value = p = &buffer + current_iteration | |
112 | end value = q = &buffer + len - 1 - current_iteration | |
113 | Given the loop exit test of "p < q", then there must be "q - p" iterations, | |
114 | set equal to zero and solve for number of iterations: | |
115 | q - p = len - 1 - 2*current_iteration = 0 | |
116 | current_iteration = (len - 1) / 2 | |
117 | Hence, there are (len - 1) / 2 (rounded up to the nearest integer) | |
118 | iterations of this loop. */ | |
119 | ||
120 | /* ??? Currently, no labels are marked as loop invariant when doing loop | |
121 | unrolling. This is because an insn inside the loop, that loads the address | |
122 | of a label inside the loop into a register, could be moved outside the loop | |
123 | by the invariant code motion pass if labels were invariant. If the loop | |
124 | is subsequently unrolled, the code will be wrong because each unrolled | |
125 | body of the loop will use the same address, whereas each actually needs a | |
126 | different address. A case where this happens is when a loop containing | |
127 | a switch statement is unrolled. | |
128 | ||
129 | It would be better to let labels be considered invariant. When we | |
130 | unroll loops here, check to see if any insns using a label local to the | |
131 | loop were moved before the loop. If so, then correct the problem, by | |
132 | moving the insn back into the loop, or perhaps replicate the insn before | |
133 | the loop, one copy for each time the loop is unrolled. */ | |
134 | ||
f6e67fa5 KG |
135 | #include "config.h" |
136 | #include "system.h" | |
4977bab6 ZW |
137 | #include "coretypes.h" |
138 | #include "tm.h" | |
f6e67fa5 KG |
139 | #include "rtl.h" |
140 | #include "tm_p.h" | |
141 | #include "insn-config.h" | |
142 | #include "integrate.h" | |
143 | #include "regs.h" | |
144 | #include "recog.h" | |
145 | #include "flags.h" | |
146 | #include "function.h" | |
147 | #include "expr.h" | |
148 | #include "loop.h" | |
149 | #include "toplev.h" | |
150 | #include "hard-reg-set.h" | |
151 | #include "basic-block.h" | |
152 | #include "predict.h" | |
03e9dbc9 | 153 | #include "params.h" |
3d436d2a | 154 | #include "cfgloop.h" |
f6e67fa5 | 155 | |
67f2de41 RK |
156 | /* The prime factors looked for when trying to unroll a loop by some |
157 | number which is modulo the total number of iterations. Just checking | |
158 | for these 4 prime factors will find at least one factor for 75% of | |
159 | all numbers theoretically. Practically speaking, this will succeed | |
160 | almost all of the time since loops are generally a multiple of 2 | |
161 | and/or 5. */ | |
162 | ||
163 | #define NUM_FACTORS 4 | |
164 | ||
c083a819 | 165 | static struct _factor { const int factor; int count; } |
a86dc4a3 | 166 | factors[NUM_FACTORS] = { {2, 0}, {3, 0}, {5, 0}, {7, 0}}; |
a5af23fe | 167 | |
67f2de41 RK |
168 | /* Describes the different types of loop unrolling performed. */ |
169 | ||
a86dc4a3 KH |
170 | enum unroll_types |
171 | { | |
172 | UNROLL_COMPLETELY, | |
173 | UNROLL_MODULO, | |
174 | UNROLL_NAIVE | |
175 | }; | |
67f2de41 | 176 | |
0e9e1e0a | 177 | /* Indexed by register number, if nonzero, then it contains a pointer |
67f2de41 RK |
178 | to a struct induction for a DEST_REG giv which has been combined with |
179 | one of more address givs. This is needed because whenever such a DEST_REG | |
180 | giv is modified, we must modify the value of all split address givs | |
181 | that were combined with this DEST_REG giv. */ | |
182 | ||
183 | static struct induction **addr_combined_regs; | |
184 | ||
185 | /* Indexed by register number, if this is a splittable induction variable, | |
186 | then this will hold the current value of the register, which depends on the | |
187 | iteration number. */ | |
188 | ||
189 | static rtx *splittable_regs; | |
190 | ||
191 | /* Indexed by register number, if this is a splittable induction variable, | |
192 | then this will hold the number of instructions in the loop that modify | |
193 | the induction variable. Used to ensure that only the last insn modifying | |
194 | a split iv will update the original iv of the dest. */ | |
195 | ||
196 | static int *splittable_regs_updates; | |
197 | ||
67f2de41 RK |
198 | /* Forward declarations. */ |
199 | ||
2e1eedd6 AJ |
200 | static rtx simplify_cmp_and_jump_insns (enum rtx_code, enum machine_mode, |
201 | rtx, rtx, rtx); | |
202 | static void init_reg_map (struct inline_remap *, int); | |
203 | static rtx calculate_giv_inc (rtx, rtx, unsigned int); | |
204 | static rtx initial_reg_note_copy (rtx, struct inline_remap *); | |
205 | static void final_reg_note_copy (rtx *, struct inline_remap *); | |
206 | static void copy_loop_body (struct loop *, rtx, rtx, | |
207 | struct inline_remap *, rtx, int, | |
208 | enum unroll_types, rtx, rtx, rtx, rtx); | |
209 | static int find_splittable_regs (const struct loop *, enum unroll_types, | |
210 | int); | |
211 | static int find_splittable_givs (const struct loop *, struct iv_class *, | |
212 | enum unroll_types, rtx, int); | |
213 | static int reg_dead_after_loop (const struct loop *, rtx); | |
214 | static rtx fold_rtx_mult_add (rtx, rtx, rtx, enum machine_mode); | |
215 | static rtx remap_split_bivs (struct loop *, rtx); | |
216 | static rtx find_common_reg_term (rtx, rtx); | |
217 | static rtx subtract_reg_term (rtx, rtx); | |
218 | static rtx loop_find_equiv_value (const struct loop *, rtx); | |
219 | static rtx ujump_to_loop_cont (rtx, rtx); | |
67f2de41 RK |
220 | |
221 | /* Try to unroll one loop and split induction variables in the loop. | |
222 | ||
0534b804 | 223 | The loop is described by the arguments LOOP and INSN_COUNT. |
96a45535 MH |
224 | STRENGTH_REDUCTION_P indicates whether information generated in the |
225 | strength reduction pass is available. | |
67f2de41 RK |
226 | |
227 | This function is intended to be called from within `strength_reduce' | |
228 | in loop.c. */ | |
229 | ||
230 | void | |
2e1eedd6 | 231 | unroll_loop (struct loop *loop, int insn_count, int strength_reduce_p) |
67f2de41 | 232 | { |
ed5bb68d MH |
233 | struct loop_info *loop_info = LOOP_INFO (loop); |
234 | struct loop_ivs *ivs = LOOP_IVS (loop); | |
1953b2a3 | 235 | int i, j; |
770ae6cc | 236 | unsigned int r; |
1953b2a3 | 237 | unsigned HOST_WIDE_INT temp; |
c8d8ed65 | 238 | int unroll_number = 1; |
67f2de41 | 239 | rtx copy_start, copy_end; |
29a82058 | 240 | rtx insn, sequence, pattern, tem; |
67f2de41 RK |
241 | int max_labelno, max_insnno; |
242 | rtx insert_before; | |
243 | struct inline_remap *map; | |
6a651371 | 244 | char *local_label = NULL; |
7f5b8ca7 | 245 | char *local_regno; |
770ae6cc RK |
246 | unsigned int max_local_regnum; |
247 | unsigned int maxregnum; | |
67f2de41 RK |
248 | rtx exit_label = 0; |
249 | rtx start_label; | |
250 | struct iv_class *bl; | |
67f2de41 | 251 | int splitting_not_safe = 0; |
78a0d70c | 252 | enum unroll_types unroll_type = UNROLL_NAIVE; |
67f2de41 RK |
253 | int loop_preconditioned = 0; |
254 | rtx safety_label; | |
255 | /* This points to the last real insn in the loop, which should be either | |
256 | a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional | |
257 | jumps). */ | |
258 | rtx last_loop_insn; | |
a2be868f MH |
259 | rtx loop_start = loop->start; |
260 | rtx loop_end = loop->end; | |
67f2de41 RK |
261 | |
262 | /* Don't bother unrolling huge loops. Since the minimum factor is | |
263 | two, loops greater than one half of MAX_UNROLLED_INSNS will never | |
264 | be unrolled. */ | |
265 | if (insn_count > MAX_UNROLLED_INSNS / 2) | |
266 | { | |
267 | if (loop_dump_stream) | |
268 | fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n"); | |
269 | return; | |
270 | } | |
271 | ||
67f2de41 RK |
272 | /* Determine type of unroll to perform. Depends on the number of iterations |
273 | and the size of the loop. */ | |
274 | ||
302670f3 MH |
275 | /* If there is no strength reduce info, then set |
276 | loop_info->n_iterations to zero. This can happen if | |
277 | strength_reduce can't find any bivs in the loop. A value of zero | |
278 | indicates that the number of iterations could not be calculated. */ | |
67f2de41 RK |
279 | |
280 | if (! strength_reduce_p) | |
302670f3 | 281 | loop_info->n_iterations = 0; |
67f2de41 | 282 | |
302670f3 | 283 | if (loop_dump_stream && loop_info->n_iterations > 0) |
90ff44cf KG |
284 | fprintf (loop_dump_stream, "Loop unrolling: " HOST_WIDE_INT_PRINT_DEC |
285 | " iterations.\n", loop_info->n_iterations); | |
67f2de41 RK |
286 | |
287 | /* Find and save a pointer to the last nonnote insn in the loop. */ | |
288 | ||
289 | last_loop_insn = prev_nonnote_insn (loop_end); | |
290 | ||
291 | /* Calculate how many times to unroll the loop. Indicate whether or | |
292 | not the loop is being completely unrolled. */ | |
293 | ||
302670f3 | 294 | if (loop_info->n_iterations == 1) |
67f2de41 | 295 | { |
4598ffe9 CM |
296 | /* Handle the case where the loop begins with an unconditional |
297 | jump to the loop condition. Make sure to delete the jump | |
298 | insn, otherwise the loop body will never execute. */ | |
299 | ||
2e2255ff JM |
300 | /* FIXME this actually checks for a jump to the continue point, which |
301 | is not the same as the condition in a for loop. As a result, this | |
302 | optimization fails for most for loops. We should really use flow | |
303 | information rather than instruction pattern matching. */ | |
4598ffe9 | 304 | rtx ujump = ujump_to_loop_cont (loop->start, loop->cont); |
a86dc4a3 | 305 | |
67f2de41 RK |
306 | /* If number of iterations is exactly 1, then eliminate the compare and |
307 | branch at the end of the loop since they will never be taken. | |
308 | Then return, since no other action is needed here. */ | |
309 | ||
310 | /* If the last instruction is not a BARRIER or a JUMP_INSN, then | |
311 | don't do anything. */ | |
312 | ||
4b4bf941 | 313 | if (BARRIER_P (last_loop_insn)) |
67f2de41 RK |
314 | { |
315 | /* Delete the jump insn. This will delete the barrier also. */ | |
2e2255ff | 316 | last_loop_insn = PREV_INSN (last_loop_insn); |
67f2de41 | 317 | } |
2e2255ff | 318 | |
4b4bf941 | 319 | if (ujump && JUMP_P (last_loop_insn)) |
67f2de41 | 320 | { |
da09e317 | 321 | #ifdef HAVE_cc0 |
b30f05db | 322 | rtx prev = PREV_INSN (last_loop_insn); |
da09e317 | 323 | #endif |
53c17031 | 324 | delete_related_insns (last_loop_insn); |
67f2de41 | 325 | #ifdef HAVE_cc0 |
b30f05db | 326 | /* The immediately preceding insn may be a compare which must be |
67f2de41 | 327 | deleted. */ |
44ce0063 | 328 | if (only_sets_cc0_p (prev)) |
53c17031 | 329 | delete_related_insns (prev); |
67f2de41 | 330 | #endif |
f5da5c87 | 331 | |
2e2255ff | 332 | delete_related_insns (ujump); |
f5da5c87 | 333 | |
2e2255ff JM |
334 | /* Remove the loop notes since this is no longer a loop. */ |
335 | if (loop->vtop) | |
336 | delete_related_insns (loop->vtop); | |
337 | if (loop->cont) | |
338 | delete_related_insns (loop->cont); | |
339 | if (loop_start) | |
340 | delete_related_insns (loop_start); | |
341 | if (loop_end) | |
342 | delete_related_insns (loop_end); | |
343 | ||
344 | return; | |
345 | } | |
67f2de41 | 346 | } |
2e2255ff JM |
347 | |
348 | if (loop_info->n_iterations > 0 | |
349 | /* Avoid overflow in the next expression. */ | |
350 | && loop_info->n_iterations < (unsigned) MAX_UNROLLED_INSNS | |
351 | && loop_info->n_iterations * insn_count < (unsigned) MAX_UNROLLED_INSNS) | |
67f2de41 | 352 | { |
302670f3 | 353 | unroll_number = loop_info->n_iterations; |
67f2de41 RK |
354 | unroll_type = UNROLL_COMPLETELY; |
355 | } | |
302670f3 | 356 | else if (loop_info->n_iterations > 0) |
67f2de41 RK |
357 | { |
358 | /* Try to factor the number of iterations. Don't bother with the | |
359 | general case, only using 2, 3, 5, and 7 will get 75% of all | |
360 | numbers theoretically, and almost all in practice. */ | |
361 | ||
362 | for (i = 0; i < NUM_FACTORS; i++) | |
363 | factors[i].count = 0; | |
364 | ||
302670f3 | 365 | temp = loop_info->n_iterations; |
67f2de41 RK |
366 | for (i = NUM_FACTORS - 1; i >= 0; i--) |
367 | while (temp % factors[i].factor == 0) | |
368 | { | |
369 | factors[i].count++; | |
370 | temp = temp / factors[i].factor; | |
371 | } | |
372 | ||
373 | /* Start with the larger factors first so that we generally | |
374 | get lots of unrolling. */ | |
375 | ||
376 | unroll_number = 1; | |
377 | temp = insn_count; | |
378 | for (i = 3; i >= 0; i--) | |
379 | while (factors[i].count--) | |
380 | { | |
a4b5414c | 381 | if (temp * factors[i].factor < (unsigned) MAX_UNROLLED_INSNS) |
67f2de41 RK |
382 | { |
383 | unroll_number *= factors[i].factor; | |
384 | temp *= factors[i].factor; | |
385 | } | |
386 | else | |
387 | break; | |
388 | } | |
389 | ||
390 | /* If we couldn't find any factors, then unroll as in the normal | |
391 | case. */ | |
392 | if (unroll_number == 1) | |
393 | { | |
394 | if (loop_dump_stream) | |
a86dc4a3 | 395 | fprintf (loop_dump_stream, "Loop unrolling: No factors found.\n"); |
67f2de41 RK |
396 | } |
397 | else | |
398 | unroll_type = UNROLL_MODULO; | |
399 | } | |
400 | ||
67f2de41 RK |
401 | /* Default case, calculate number of times to unroll loop based on its |
402 | size. */ | |
78a0d70c | 403 | if (unroll_type == UNROLL_NAIVE) |
67f2de41 RK |
404 | { |
405 | if (8 * insn_count < MAX_UNROLLED_INSNS) | |
406 | unroll_number = 8; | |
407 | else if (4 * insn_count < MAX_UNROLLED_INSNS) | |
408 | unroll_number = 4; | |
409 | else | |
410 | unroll_number = 2; | |
67f2de41 RK |
411 | } |
412 | ||
413 | /* Now we know how many times to unroll the loop. */ | |
414 | ||
415 | if (loop_dump_stream) | |
a86dc4a3 | 416 | fprintf (loop_dump_stream, "Unrolling loop %d times.\n", unroll_number); |
67f2de41 RK |
417 | |
418 | if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO) | |
419 | { | |
15fec413 JL |
420 | /* Loops of these types can start with jump down to the exit condition |
421 | in rare circumstances. | |
422 | ||
423 | Consider a pair of nested loops where the inner loop is part | |
424 | of the exit code for the outer loop. | |
425 | ||
426 | In this case jump.c will not duplicate the exit test for the outer | |
427 | loop, so it will start with a jump to the exit code. | |
428 | ||
429 | Then consider if the inner loop turns out to iterate once and | |
430 | only once. We will end up deleting the jumps associated with | |
431 | the inner loop. However, the loop notes are not removed from | |
432 | the instruction stream. | |
433 | ||
434 | And finally assume that we can compute the number of iterations | |
435 | for the outer loop. | |
436 | ||
437 | In this case unroll may want to unroll the outer loop even though | |
438 | it starts with a jump to the outer loop's exit code. | |
439 | ||
440 | We could try to optimize this case, but it hardly seems worth it. | |
441 | Just return without unrolling the loop in such cases. */ | |
442 | ||
67f2de41 | 443 | insn = loop_start; |
4b4bf941 | 444 | while (!LABEL_P (insn) && !JUMP_P (insn)) |
67f2de41 | 445 | insn = NEXT_INSN (insn); |
4b4bf941 | 446 | if (JUMP_P (insn)) |
15fec413 | 447 | return; |
67f2de41 RK |
448 | } |
449 | ||
450 | if (unroll_type == UNROLL_COMPLETELY) | |
451 | { | |
452 | /* Completely unrolling the loop: Delete the compare and branch at | |
453 | the end (the last two instructions). This delete must done at the | |
454 | very end of loop unrolling, to avoid problems with calls to | |
455 | back_branch_in_range_p, which is called by find_splittable_regs. | |
456 | All increments of splittable bivs/givs are changed to load constant | |
457 | instructions. */ | |
458 | ||
459 | copy_start = loop_start; | |
460 | ||
461 | /* Set insert_before to the instruction immediately after the JUMP_INSN | |
462 | (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of | |
463 | the loop will be correctly handled by copy_loop_body. */ | |
464 | insert_before = NEXT_INSN (last_loop_insn); | |
465 | ||
466 | /* Set copy_end to the insn before the jump at the end of the loop. */ | |
4b4bf941 | 467 | if (BARRIER_P (last_loop_insn)) |
67f2de41 | 468 | copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); |
4b4bf941 | 469 | else if (JUMP_P (last_loop_insn)) |
67f2de41 | 470 | { |
b30f05db | 471 | copy_end = PREV_INSN (last_loop_insn); |
67f2de41 | 472 | #ifdef HAVE_cc0 |
b30f05db | 473 | /* The instruction immediately before the JUMP_INSN may be a compare |
67f2de41 | 474 | instruction which we do not want to copy. */ |
b30f05db BS |
475 | if (sets_cc0_p (PREV_INSN (copy_end))) |
476 | copy_end = PREV_INSN (copy_end); | |
67f2de41 RK |
477 | #endif |
478 | } | |
479 | else | |
480 | { | |
481 | /* We currently can't unroll a loop if it doesn't end with a | |
482 | JUMP_INSN. There would need to be a mechanism that recognizes | |
483 | this case, and then inserts a jump after each loop body, which | |
484 | jumps to after the last loop body. */ | |
485 | if (loop_dump_stream) | |
486 | fprintf (loop_dump_stream, | |
487 | "Unrolling failure: loop does not end with a JUMP_INSN.\n"); | |
488 | return; | |
489 | } | |
490 | } | |
491 | else if (unroll_type == UNROLL_MODULO) | |
492 | { | |
493 | /* Partially unrolling the loop: The compare and branch at the end | |
494 | (the last two instructions) must remain. Don't copy the compare | |
495 | and branch instructions at the end of the loop. Insert the unrolled | |
496 | code immediately before the compare/branch at the end so that the | |
497 | code will fall through to them as before. */ | |
498 | ||
499 | copy_start = loop_start; | |
500 | ||
501 | /* Set insert_before to the jump insn at the end of the loop. | |
502 | Set copy_end to before the jump insn at the end of the loop. */ | |
4b4bf941 | 503 | if (BARRIER_P (last_loop_insn)) |
67f2de41 RK |
504 | { |
505 | insert_before = PREV_INSN (last_loop_insn); | |
506 | copy_end = PREV_INSN (insert_before); | |
507 | } | |
4b4bf941 | 508 | else if (JUMP_P (last_loop_insn)) |
67f2de41 | 509 | { |
b30f05db | 510 | insert_before = last_loop_insn; |
67f2de41 | 511 | #ifdef HAVE_cc0 |
b30f05db | 512 | /* The instruction immediately before the JUMP_INSN may be a compare |
67f2de41 | 513 | instruction which we do not want to copy or delete. */ |
b30f05db BS |
514 | if (sets_cc0_p (PREV_INSN (insert_before))) |
515 | insert_before = PREV_INSN (insert_before); | |
67f2de41 | 516 | #endif |
b30f05db | 517 | copy_end = PREV_INSN (insert_before); |
67f2de41 RK |
518 | } |
519 | else | |
520 | { | |
521 | /* We currently can't unroll a loop if it doesn't end with a | |
522 | JUMP_INSN. There would need to be a mechanism that recognizes | |
523 | this case, and then inserts a jump after each loop body, which | |
524 | jumps to after the last loop body. */ | |
525 | if (loop_dump_stream) | |
526 | fprintf (loop_dump_stream, | |
527 | "Unrolling failure: loop does not end with a JUMP_INSN.\n"); | |
528 | return; | |
529 | } | |
530 | } | |
531 | else | |
532 | { | |
533 | /* Normal case: Must copy the compare and branch instructions at the | |
534 | end of the loop. */ | |
535 | ||
4b4bf941 | 536 | if (BARRIER_P (last_loop_insn)) |
67f2de41 RK |
537 | { |
538 | /* Loop ends with an unconditional jump and a barrier. | |
539 | Handle this like above, don't copy jump and barrier. | |
540 | This is not strictly necessary, but doing so prevents generating | |
541 | unconditional jumps to an immediately following label. | |
542 | ||
543 | This will be corrected below if the target of this jump is | |
544 | not the start_label. */ | |
545 | ||
546 | insert_before = PREV_INSN (last_loop_insn); | |
547 | copy_end = PREV_INSN (insert_before); | |
548 | } | |
4b4bf941 | 549 | else if (JUMP_P (last_loop_insn)) |
67f2de41 RK |
550 | { |
551 | /* Set insert_before to immediately after the JUMP_INSN, so that | |
552 | NOTEs at the end of the loop will be correctly handled by | |
553 | copy_loop_body. */ | |
554 | insert_before = NEXT_INSN (last_loop_insn); | |
555 | copy_end = last_loop_insn; | |
556 | } | |
557 | else | |
558 | { | |
559 | /* We currently can't unroll a loop if it doesn't end with a | |
560 | JUMP_INSN. There would need to be a mechanism that recognizes | |
561 | this case, and then inserts a jump after each loop body, which | |
562 | jumps to after the last loop body. */ | |
563 | if (loop_dump_stream) | |
564 | fprintf (loop_dump_stream, | |
565 | "Unrolling failure: loop does not end with a JUMP_INSN.\n"); | |
566 | return; | |
567 | } | |
568 | ||
569 | /* If copying exit test branches because they can not be eliminated, | |
570 | then must convert the fall through case of the branch to a jump past | |
571 | the end of the loop. Create a label to emit after the loop and save | |
572 | it for later use. Do not use the label after the loop, if any, since | |
573 | it might be used by insns outside the loop, or there might be insns | |
574 | added before it later by final_[bg]iv_value which must be after | |
575 | the real exit label. */ | |
576 | exit_label = gen_label_rtx (); | |
577 | ||
578 | insn = loop_start; | |
4b4bf941 | 579 | while (!LABEL_P (insn) && !JUMP_P (insn)) |
67f2de41 RK |
580 | insn = NEXT_INSN (insn); |
581 | ||
4b4bf941 | 582 | if (JUMP_P (insn)) |
67f2de41 RK |
583 | { |
584 | /* The loop starts with a jump down to the exit condition test. | |
585 | Start copying the loop after the barrier following this | |
586 | jump insn. */ | |
587 | copy_start = NEXT_INSN (insn); | |
588 | ||
589 | /* Splitting induction variables doesn't work when the loop is | |
590 | entered via a jump to the bottom, because then we end up doing | |
591 | a comparison against a new register for a split variable, but | |
592 | we did not execute the set insn for the new register because | |
593 | it was skipped over. */ | |
594 | splitting_not_safe = 1; | |
595 | if (loop_dump_stream) | |
596 | fprintf (loop_dump_stream, | |
597 | "Splitting not safe, because loop not entered at top.\n"); | |
598 | } | |
599 | else | |
600 | copy_start = loop_start; | |
601 | } | |
602 | ||
603 | /* This should always be the first label in the loop. */ | |
604 | start_label = NEXT_INSN (copy_start); | |
605 | /* There may be a line number note and/or a loop continue note here. */ | |
4b4bf941 | 606 | while (NOTE_P (start_label)) |
67f2de41 | 607 | start_label = NEXT_INSN (start_label); |
4b4bf941 | 608 | if (!LABEL_P (start_label)) |
67f2de41 RK |
609 | { |
610 | /* This can happen as a result of jump threading. If the first insns in | |
611 | the loop test the same condition as the loop's backward jump, or the | |
612 | opposite condition, then the backward jump will be modified to point | |
613 | to elsewhere, and the loop's start label is deleted. | |
614 | ||
615 | This case currently can not be handled by the loop unrolling code. */ | |
616 | ||
617 | if (loop_dump_stream) | |
618 | fprintf (loop_dump_stream, | |
619 | "Unrolling failure: unknown insns between BEG note and loop label.\n"); | |
620 | return; | |
621 | } | |
b9b817f0 RS |
622 | if (LABEL_NAME (start_label)) |
623 | { | |
624 | /* The jump optimization pass must have combined the original start label | |
625 | with a named label for a goto. We can't unroll this case because | |
626 | jumps which go to the named label must be handled differently than | |
627 | jumps to the loop start, and it is impossible to differentiate them | |
628 | in this case. */ | |
629 | if (loop_dump_stream) | |
630 | fprintf (loop_dump_stream, | |
631 | "Unrolling failure: loop start label is gone\n"); | |
632 | return; | |
633 | } | |
67f2de41 RK |
634 | |
635 | if (unroll_type == UNROLL_NAIVE | |
4b4bf941 JQ |
636 | && BARRIER_P (last_loop_insn) |
637 | && JUMP_P (PREV_INSN (last_loop_insn)) | |
67f2de41 RK |
638 | && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn))) |
639 | { | |
640 | /* In this case, we must copy the jump and barrier, because they will | |
641 | not be converted to jumps to an immediately following label. */ | |
642 | ||
643 | insert_before = NEXT_INSN (last_loop_insn); | |
644 | copy_end = last_loop_insn; | |
645 | } | |
646 | ||
92905582 | 647 | if (unroll_type == UNROLL_NAIVE |
4b4bf941 | 648 | && JUMP_P (last_loop_insn) |
92905582 JW |
649 | && start_label != JUMP_LABEL (last_loop_insn)) |
650 | { | |
651 | /* ??? The loop ends with a conditional branch that does not branch back | |
652 | to the loop start label. In this case, we must emit an unconditional | |
653 | branch to the loop exit after emitting the final branch. | |
654 | copy_loop_body does not have support for this currently, so we | |
655 | give up. It doesn't seem worthwhile to unroll anyways since | |
656 | unrolling would increase the number of branch instructions | |
657 | executed. */ | |
658 | if (loop_dump_stream) | |
659 | fprintf (loop_dump_stream, | |
660 | "Unrolling failure: final conditional branch not to loop start\n"); | |
661 | return; | |
662 | } | |
663 | ||
67f2de41 RK |
664 | /* Allocate a translation table for the labels and insn numbers. |
665 | They will be filled in as we copy the insns in the loop. */ | |
666 | ||
667 | max_labelno = max_label_num (); | |
668 | max_insnno = get_max_uid (); | |
669 | ||
d269eb53 JL |
670 | /* Various paths through the unroll code may reach the "egress" label |
671 | without initializing fields within the map structure. | |
67f2de41 | 672 | |
d269eb53 | 673 | To be safe, we use xcalloc to zero the memory. */ |
703ad42b | 674 | map = xcalloc (1, sizeof (struct inline_remap)); |
bae12186 | 675 | |
67f2de41 RK |
676 | /* Allocate the label map. */ |
677 | ||
678 | if (max_labelno > 0) | |
679 | { | |
703ad42b KG |
680 | map->label_map = xcalloc (max_labelno, sizeof (rtx)); |
681 | local_label = xcalloc (max_labelno, sizeof (char)); | |
67f2de41 | 682 | } |
67f2de41 RK |
683 | |
684 | /* Search the loop and mark all local labels, i.e. the ones which have to | |
685 | be distinct labels when copied. For all labels which might be | |
686 | non-local, set their label_map entries to point to themselves. | |
687 | If they happen to be local their label_map entries will be overwritten | |
688 | before the loop body is copied. The label_map entries for local labels | |
689 | will be set to a different value each time the loop body is copied. */ | |
690 | ||
691 | for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn)) | |
692 | { | |
aeb2f500 JW |
693 | rtx note; |
694 | ||
4b4bf941 | 695 | if (LABEL_P (insn)) |
67f2de41 | 696 | local_label[CODE_LABEL_NUMBER (insn)] = 1; |
4b4bf941 | 697 | else if (JUMP_P (insn)) |
67f2de41 RK |
698 | { |
699 | if (JUMP_LABEL (insn)) | |
1f3d3a31 JL |
700 | set_label_in_map (map, |
701 | CODE_LABEL_NUMBER (JUMP_LABEL (insn)), | |
702 | JUMP_LABEL (insn)); | |
67f2de41 RK |
703 | else if (GET_CODE (PATTERN (insn)) == ADDR_VEC |
704 | || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) | |
705 | { | |
706 | rtx pat = PATTERN (insn); | |
707 | int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC; | |
708 | int len = XVECLEN (pat, diff_vec_p); | |
709 | rtx label; | |
710 | ||
711 | for (i = 0; i < len; i++) | |
712 | { | |
713 | label = XEXP (XVECEXP (pat, diff_vec_p, i), 0); | |
a86dc4a3 | 714 | set_label_in_map (map, CODE_LABEL_NUMBER (label), label); |
67f2de41 RK |
715 | } |
716 | } | |
717 | } | |
f759eb8b | 718 | if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX))) |
aeb2f500 JW |
719 | set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)), |
720 | XEXP (note, 0)); | |
67f2de41 RK |
721 | } |
722 | ||
723 | /* Allocate space for the insn map. */ | |
724 | ||
703ad42b | 725 | map->insn_map = xmalloc (max_insnno * sizeof (rtx)); |
67f2de41 | 726 | |
67f2de41 RK |
727 | /* The register and constant maps depend on the number of registers |
728 | present, so the final maps can't be created until after | |
729 | find_splittable_regs is called. However, they are needed for | |
730 | preconditioning, so we create temporary maps when preconditioning | |
731 | is performed. */ | |
732 | ||
733 | /* The preconditioning code may allocate two new pseudo registers. */ | |
734 | maxregnum = max_reg_num (); | |
735 | ||
69688f1e R |
736 | /* local_regno is only valid for regnos < max_local_regnum. */ |
737 | max_local_regnum = maxregnum; | |
738 | ||
67f2de41 RK |
739 | /* Allocate and zero out the splittable_regs and addr_combined_regs |
740 | arrays. These must be zeroed here because they will be used if | |
741 | loop preconditioning is performed, and must be zero for that case. | |
742 | ||
743 | It is safe to do this here, since the extra registers created by the | |
744 | preconditioning code and find_splittable_regs will never be used | |
6dc42e49 | 745 | to access the splittable_regs[] and addr_combined_regs[] arrays. */ |
67f2de41 | 746 | |
703ad42b KG |
747 | splittable_regs = xcalloc (maxregnum, sizeof (rtx)); |
748 | splittable_regs_updates = xcalloc (maxregnum, sizeof (int)); | |
749 | addr_combined_regs = xcalloc (maxregnum, sizeof (struct induction *)); | |
750 | local_regno = xcalloc (maxregnum, sizeof (char)); | |
7f5b8ca7 RK |
751 | |
752 | /* Mark all local registers, i.e. the ones which are referenced only | |
ba68fc32 JW |
753 | inside the loop. */ |
754 | if (INSN_UID (copy_end) < max_uid_for_loop) | |
9bb21998 BS |
755 | { |
756 | int copy_start_luid = INSN_LUID (copy_start); | |
757 | int copy_end_luid = INSN_LUID (copy_end); | |
7f5b8ca7 | 758 | |
9bb21998 BS |
759 | /* If a register is used in the jump insn, we must not duplicate it |
760 | since it will also be used outside the loop. */ | |
4b4bf941 | 761 | if (JUMP_P (copy_end)) |
9bb21998 | 762 | copy_end_luid--; |
47cf37f9 | 763 | |
9bb21998 | 764 | /* If we have a target that uses cc0, then we also must not duplicate |
b30f05db | 765 | the insn that sets cc0 before the jump insn, if one is present. */ |
47cf37f9 | 766 | #ifdef HAVE_cc0 |
4b4bf941 | 767 | if (JUMP_P (copy_end) |
a86dc4a3 | 768 | && sets_cc0_p (PREV_INSN (copy_end))) |
9bb21998 | 769 | copy_end_luid--; |
47cf37f9 JL |
770 | #endif |
771 | ||
9bb21998 BS |
772 | /* If copy_start points to the NOTE that starts the loop, then we must |
773 | use the next luid, because invariant pseudo-regs moved out of the loop | |
774 | have their lifetimes modified to start here, but they are not safe | |
775 | to duplicate. */ | |
776 | if (copy_start == loop_start) | |
777 | copy_start_luid++; | |
778 | ||
779 | /* If a pseudo's lifetime is entirely contained within this loop, then we | |
780 | can use a different pseudo in each unrolled copy of the loop. This | |
781 | results in better code. */ | |
782 | /* We must limit the generic test to max_reg_before_loop, because only | |
783 | these pseudo registers have valid regno_first_uid info. */ | |
770ae6cc | 784 | for (r = FIRST_PSEUDO_REGISTER; r < max_reg_before_loop; ++r) |
5adf448c | 785 | if (REGNO_FIRST_UID (r) > 0 && REGNO_FIRST_UID (r) < max_uid_for_loop |
8529a489 | 786 | && REGNO_FIRST_LUID (r) >= copy_start_luid |
5adf448c | 787 | && REGNO_LAST_UID (r) > 0 && REGNO_LAST_UID (r) < max_uid_for_loop |
8529a489 | 788 | && REGNO_LAST_LUID (r) <= copy_end_luid) |
9bb21998 BS |
789 | { |
790 | /* However, we must also check for loop-carried dependencies. | |
791 | If the value the pseudo has at the end of iteration X is | |
792 | used by iteration X+1, then we can not use a different pseudo | |
793 | for each unrolled copy of the loop. */ | |
794 | /* A pseudo is safe if regno_first_uid is a set, and this | |
795 | set dominates all instructions from regno_first_uid to | |
796 | regno_last_uid. */ | |
797 | /* ??? This check is simplistic. We would get better code if | |
798 | this check was more sophisticated. */ | |
770ae6cc | 799 | if (set_dominates_use (r, REGNO_FIRST_UID (r), REGNO_LAST_UID (r), |
9bb21998 | 800 | copy_start, copy_end)) |
770ae6cc | 801 | local_regno[r] = 1; |
9bb21998 BS |
802 | |
803 | if (loop_dump_stream) | |
804 | { | |
770ae6cc RK |
805 | if (local_regno[r]) |
806 | fprintf (loop_dump_stream, "Marked reg %d as local\n", r); | |
9bb21998 BS |
807 | else |
808 | fprintf (loop_dump_stream, "Did not mark reg %d as local\n", | |
770ae6cc | 809 | r); |
9bb21998 BS |
810 | } |
811 | } | |
9bb21998 | 812 | } |
67f2de41 RK |
813 | |
814 | /* If this loop requires exit tests when unrolled, check to see if we | |
815 | can precondition the loop so as to make the exit tests unnecessary. | |
816 | Just like variable splitting, this is not safe if the loop is entered | |
817 | via a jump to the bottom. Also, can not do this if no strength | |
818 | reduce info, because precondition_loop_p uses this info. */ | |
819 | ||
820 | /* Must copy the loop body for preconditioning before the following | |
821 | find_splittable_regs call since that will emit insns which need to | |
822 | be after the preconditioned loop copies, but immediately before the | |
823 | unrolled loop copies. */ | |
824 | ||
825 | /* Also, it is not safe to split induction variables for the preconditioned | |
826 | copies of the loop body. If we split induction variables, then the code | |
827 | assumes that each induction variable can be represented as a function | |
828 | of its initial value and the loop iteration number. This is not true | |
829 | in this case, because the last preconditioned copy of the loop body | |
830 | could be any iteration from the first up to the `unroll_number-1'th, | |
831 | depending on the initial value of the iteration variable. Therefore | |
832 | we can not split induction variables here, because we can not calculate | |
833 | their value. Hence, this code must occur before find_splittable_regs | |
834 | is called. */ | |
835 | ||
836 | if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p) | |
837 | { | |
838 | rtx initial_value, final_value, increment; | |
e96b4d7a | 839 | enum machine_mode mode; |
67f2de41 | 840 | |
0534b804 | 841 | if (precondition_loop_p (loop, |
e96b4d7a MH |
842 | &initial_value, &final_value, &increment, |
843 | &mode)) | |
67f2de41 | 844 | { |
5d0f3df7 | 845 | rtx diff, insn; |
67f2de41 RK |
846 | rtx *labels; |
847 | int abs_inc, neg_inc; | |
d2384b42 ZH |
848 | enum rtx_code cc = loop_info->comparison_code; |
849 | int less_p = (cc == LE || cc == LEU || cc == LT || cc == LTU); | |
850 | int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU); | |
67f2de41 | 851 | |
703ad42b | 852 | map->reg_map = xmalloc (maxregnum * sizeof (rtx)); |
67f2de41 | 853 | |
c68da89c | 854 | VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum, |
28b6b9b2 | 855 | "unroll_loop_precondition"); |
c68da89c | 856 | global_const_equiv_varray = map->const_equiv_varray; |
67f2de41 RK |
857 | |
858 | init_reg_map (map, maxregnum); | |
859 | ||
860 | /* Limit loop unrolling to 4, since this will make 7 copies of | |
861 | the loop body. */ | |
862 | if (unroll_number > 4) | |
863 | unroll_number = 4; | |
864 | ||
865 | /* Save the absolute value of the increment, and also whether or | |
866 | not it is negative. */ | |
867 | neg_inc = 0; | |
868 | abs_inc = INTVAL (increment); | |
869 | if (abs_inc < 0) | |
870 | { | |
a86dc4a3 | 871 | abs_inc = -abs_inc; |
67f2de41 RK |
872 | neg_inc = 1; |
873 | } | |
874 | ||
875 | start_sequence (); | |
876 | ||
5d0f3df7 RH |
877 | /* We must copy the final and initial values here to avoid |
878 | improperly shared rtl. */ | |
879 | final_value = copy_rtx (final_value); | |
880 | initial_value = copy_rtx (initial_value); | |
881 | ||
48d79c43 JH |
882 | /* Final value may have form of (PLUS val1 const1_rtx). We need |
883 | to convert it into general operand, so compute the real value. */ | |
884 | ||
5d0f3df7 | 885 | final_value = force_operand (final_value, NULL_RTX); |
48d79c43 | 886 | if (!nonmemory_operand (final_value, VOIDmode)) |
5d0f3df7 | 887 | final_value = force_reg (mode, final_value); |
48d79c43 | 888 | |
67f2de41 RK |
889 | /* Calculate the difference between the final and initial values. |
890 | Final value may be a (plus (reg x) (const_int 1)) rtx. | |
d2384b42 ZH |
891 | |
892 | We have to deal with for (i = 0; --i < 6;) type loops. | |
893 | For such loops the real final value is the first time the | |
894 | loop variable overflows, so the diff we calculate is the | |
895 | distance from the overflow value. This is 0 or ~0 for | |
896 | unsigned loops depending on the direction, or INT_MAX, | |
897 | INT_MAX+1 for signed loops. We really do not need the | |
898 | exact value, since we are only interested in the diff | |
899 | modulo the increment, and the increment is a power of 2, | |
900 | so we can pretend that the overflow value is 0/~0. */ | |
901 | ||
902 | if (cc == NE || less_p != neg_inc) | |
5d0f3df7 RH |
903 | diff = simplify_gen_binary (MINUS, mode, final_value, |
904 | initial_value); | |
d2384b42 | 905 | else |
5d0f3df7 RH |
906 | diff = simplify_gen_unary (neg_inc ? NOT : NEG, mode, |
907 | initial_value, mode); | |
908 | diff = force_operand (diff, NULL_RTX); | |
67f2de41 RK |
909 | |
910 | /* Now calculate (diff % (unroll * abs (increment))) by using an | |
911 | and instruction. */ | |
5d0f3df7 RH |
912 | diff = simplify_gen_binary (AND, mode, diff, |
913 | GEN_INT (unroll_number*abs_inc - 1)); | |
914 | diff = force_operand (diff, NULL_RTX); | |
67f2de41 RK |
915 | |
916 | /* Now emit a sequence of branches to jump to the proper precond | |
917 | loop entry point. */ | |
918 | ||
703ad42b | 919 | labels = xmalloc (sizeof (rtx) * unroll_number); |
67f2de41 RK |
920 | for (i = 0; i < unroll_number; i++) |
921 | labels[i] = gen_label_rtx (); | |
922 | ||
1dcfa896 JW |
923 | /* Check for the case where the initial value is greater than or |
924 | equal to the final value. In that case, we want to execute | |
925 | exactly one loop iteration. The code below will fail for this | |
926 | case. This check does not apply if the loop has a NE | |
927 | comparison at the end. */ | |
7edd39eb | 928 | |
d2384b42 | 929 | if (cc != NE) |
1dcfa896 | 930 | { |
d2384b42 | 931 | rtx incremented_initval; |
5d0f3df7 RH |
932 | enum rtx_code cmp_code; |
933 | ||
934 | incremented_initval | |
935 | = simplify_gen_binary (PLUS, mode, initial_value, increment); | |
936 | incremented_initval | |
937 | = force_operand (incremented_initval, NULL_RTX); | |
938 | ||
939 | cmp_code = (less_p | |
940 | ? (unsigned_p ? GEU : GE) | |
941 | : (unsigned_p ? LEU : LE)); | |
942 | ||
943 | insn = simplify_cmp_and_jump_insns (cmp_code, mode, | |
944 | incremented_initval, | |
945 | final_value, labels[1]); | |
946 | if (insn) | |
947 | predict_insn_def (insn, PRED_LOOP_CONDITION, TAKEN); | |
1dcfa896 | 948 | } |
7edd39eb | 949 | |
67f2de41 RK |
950 | /* Assuming the unroll_number is 4, and the increment is 2, then |
951 | for a negative increment: for a positive increment: | |
952 | diff = 0,1 precond 0 diff = 0,7 precond 0 | |
953 | diff = 2,3 precond 3 diff = 1,2 precond 1 | |
954 | diff = 4,5 precond 2 diff = 3,4 precond 2 | |
955 | diff = 6,7 precond 1 diff = 5,6 precond 3 */ | |
956 | ||
957 | /* We only need to emit (unroll_number - 1) branches here, the | |
958 | last case just falls through to the following code. */ | |
959 | ||
960 | /* ??? This would give better code if we emitted a tree of branches | |
961 | instead of the current linear list of branches. */ | |
962 | ||
963 | for (i = 0; i < unroll_number - 1; i++) | |
964 | { | |
965 | int cmp_const; | |
7edd39eb | 966 | enum rtx_code cmp_code; |
67f2de41 RK |
967 | |
968 | /* For negative increments, must invert the constant compared | |
969 | against, except when comparing against zero. */ | |
970 | if (i == 0) | |
7edd39eb RK |
971 | { |
972 | cmp_const = 0; | |
973 | cmp_code = EQ; | |
974 | } | |
67f2de41 | 975 | else if (neg_inc) |
7edd39eb RK |
976 | { |
977 | cmp_const = unroll_number - i; | |
978 | cmp_code = GE; | |
979 | } | |
67f2de41 | 980 | else |
7edd39eb RK |
981 | { |
982 | cmp_const = i; | |
983 | cmp_code = LE; | |
984 | } | |
67f2de41 | 985 | |
5d0f3df7 RH |
986 | insn = simplify_cmp_and_jump_insns (cmp_code, mode, diff, |
987 | GEN_INT (abs_inc*cmp_const), | |
988 | labels[i]); | |
989 | if (insn) | |
990 | predict_insn (insn, PRED_LOOP_PRECONDITIONING, | |
991 | REG_BR_PROB_BASE / (unroll_number - i)); | |
67f2de41 RK |
992 | } |
993 | ||
994 | /* If the increment is greater than one, then we need another branch, | |
995 | to handle other cases equivalent to 0. */ | |
996 | ||
997 | /* ??? This should be merged into the code above somehow to help | |
998 | simplify the code here, and reduce the number of branches emitted. | |
999 | For the negative increment case, the branch here could easily | |
1000 | be merged with the `0' case branch above. For the positive | |
1001 | increment case, it is not clear how this can be simplified. */ | |
a5af23fe | 1002 | |
67f2de41 RK |
1003 | if (abs_inc != 1) |
1004 | { | |
1005 | int cmp_const; | |
7edd39eb | 1006 | enum rtx_code cmp_code; |
67f2de41 RK |
1007 | |
1008 | if (neg_inc) | |
7edd39eb RK |
1009 | { |
1010 | cmp_const = abs_inc - 1; | |
1011 | cmp_code = LE; | |
1012 | } | |
67f2de41 | 1013 | else |
7edd39eb RK |
1014 | { |
1015 | cmp_const = abs_inc * (unroll_number - 1) + 1; | |
1016 | cmp_code = GE; | |
1017 | } | |
67f2de41 | 1018 | |
5d0f3df7 RH |
1019 | simplify_cmp_and_jump_insns (cmp_code, mode, diff, |
1020 | GEN_INT (cmp_const), labels[0]); | |
67f2de41 RK |
1021 | } |
1022 | ||
2f937369 | 1023 | sequence = get_insns (); |
67f2de41 | 1024 | end_sequence (); |
96a45535 | 1025 | loop_insn_hoist (loop, sequence); |
a5af23fe | 1026 | |
67f2de41 RK |
1027 | /* Only the last copy of the loop body here needs the exit |
1028 | test, so set copy_end to exclude the compare/branch here, | |
1029 | and then reset it inside the loop when get to the last | |
1030 | copy. */ | |
1031 | ||
4b4bf941 | 1032 | if (BARRIER_P (last_loop_insn)) |
67f2de41 | 1033 | copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); |
4b4bf941 | 1034 | else if (JUMP_P (last_loop_insn)) |
67f2de41 | 1035 | { |
b30f05db | 1036 | copy_end = PREV_INSN (last_loop_insn); |
67f2de41 | 1037 | #ifdef HAVE_cc0 |
a86dc4a3 KH |
1038 | /* The immediately preceding insn may be a compare which |
1039 | we do not want to copy. */ | |
b30f05db BS |
1040 | if (sets_cc0_p (PREV_INSN (copy_end))) |
1041 | copy_end = PREV_INSN (copy_end); | |
67f2de41 RK |
1042 | #endif |
1043 | } | |
1044 | else | |
1045 | abort (); | |
1046 | ||
1047 | for (i = 1; i < unroll_number; i++) | |
1048 | { | |
1049 | emit_label_after (labels[unroll_number - i], | |
1050 | PREV_INSN (loop_start)); | |
1051 | ||
703ad42b KG |
1052 | memset (map->insn_map, 0, max_insnno * sizeof (rtx)); |
1053 | memset (&VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), | |
961192e1 JM |
1054 | 0, (VARRAY_SIZE (map->const_equiv_varray) |
1055 | * sizeof (struct const_equiv_data))); | |
67f2de41 RK |
1056 | map->const_age = 0; |
1057 | ||
1058 | for (j = 0; j < max_labelno; j++) | |
1059 | if (local_label[j]) | |
1f3d3a31 | 1060 | set_label_in_map (map, j, gen_label_rtx ()); |
67f2de41 | 1061 | |
770ae6cc RK |
1062 | for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++) |
1063 | if (local_regno[r]) | |
9ae8ffe7 | 1064 | { |
770ae6cc RK |
1065 | map->reg_map[r] |
1066 | = gen_reg_rtx (GET_MODE (regno_reg_rtx[r])); | |
1067 | record_base_value (REGNO (map->reg_map[r]), | |
1068 | regno_reg_rtx[r], 0); | |
9ae8ffe7 | 1069 | } |
67f2de41 RK |
1070 | /* The last copy needs the compare/branch insns at the end, |
1071 | so reset copy_end here if the loop ends with a conditional | |
1072 | branch. */ | |
1073 | ||
1074 | if (i == unroll_number - 1) | |
1075 | { | |
4b4bf941 | 1076 | if (BARRIER_P (last_loop_insn)) |
67f2de41 RK |
1077 | copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); |
1078 | else | |
1079 | copy_end = last_loop_insn; | |
1080 | } | |
1081 | ||
1082 | /* None of the copies are the `last_iteration', so just | |
1083 | pass zero for that parameter. */ | |
ed5bb68d | 1084 | copy_loop_body (loop, copy_start, copy_end, map, exit_label, 0, |
67f2de41 RK |
1085 | unroll_type, start_label, loop_end, |
1086 | loop_start, copy_end); | |
1087 | } | |
1088 | emit_label_after (labels[0], PREV_INSN (loop_start)); | |
1089 | ||
4b4bf941 | 1090 | if (BARRIER_P (last_loop_insn)) |
67f2de41 RK |
1091 | { |
1092 | insert_before = PREV_INSN (last_loop_insn); | |
1093 | copy_end = PREV_INSN (insert_before); | |
1094 | } | |
1095 | else | |
1096 | { | |
67f2de41 | 1097 | insert_before = last_loop_insn; |
b30f05db | 1098 | #ifdef HAVE_cc0 |
a86dc4a3 KH |
1099 | /* The instruction immediately before the JUMP_INSN may |
1100 | be a compare instruction which we do not want to copy | |
1101 | or delete. */ | |
b30f05db BS |
1102 | if (sets_cc0_p (PREV_INSN (insert_before))) |
1103 | insert_before = PREV_INSN (insert_before); | |
67f2de41 | 1104 | #endif |
b30f05db | 1105 | copy_end = PREV_INSN (insert_before); |
67f2de41 RK |
1106 | } |
1107 | ||
1108 | /* Set unroll type to MODULO now. */ | |
1109 | unroll_type = UNROLL_MODULO; | |
1110 | loop_preconditioned = 1; | |
67289ea6 MM |
1111 | |
1112 | /* Clean up. */ | |
1113 | free (labels); | |
67f2de41 RK |
1114 | } |
1115 | } | |
1116 | ||
1117 | /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll | |
1118 | the loop unless all loops are being unrolled. */ | |
b17d5d7c | 1119 | if (unroll_type == UNROLL_NAIVE && ! flag_old_unroll_all_loops) |
67f2de41 RK |
1120 | { |
1121 | if (loop_dump_stream) | |
a86dc4a3 KH |
1122 | fprintf (loop_dump_stream, |
1123 | "Unrolling failure: Naive unrolling not being done.\n"); | |
c68da89c | 1124 | goto egress; |
67f2de41 RK |
1125 | } |
1126 | ||
1127 | /* At this point, we are guaranteed to unroll the loop. */ | |
1128 | ||
302670f3 | 1129 | /* Keep track of the unroll factor for the loop. */ |
3c748bb6 | 1130 | loop_info->unroll_number = unroll_number; |
8c660648 | 1131 | |
8b583747 AM |
1132 | /* And whether the loop has been preconditioned. */ |
1133 | loop_info->preconditioned = loop_preconditioned; | |
1134 | ||
3eae4643 | 1135 | /* Remember whether it was preconditioned for the second loop pass. */ |
dad482e6 DJ |
1136 | NOTE_PRECONDITIONED (loop->end) = loop_preconditioned; |
1137 | ||
67f2de41 RK |
1138 | /* For each biv and giv, determine whether it can be safely split into |
1139 | a different variable for each unrolled copy of the loop body. | |
1140 | We precalculate and save this info here, since computing it is | |
1141 | expensive. | |
1142 | ||
1143 | Do this before deleting any instructions from the loop, so that | |
1144 | back_branch_in_range_p will work correctly. */ | |
1145 | ||
1146 | if (splitting_not_safe) | |
1147 | temp = 0; | |
1148 | else | |
96a45535 | 1149 | temp = find_splittable_regs (loop, unroll_type, unroll_number); |
67f2de41 RK |
1150 | |
1151 | /* find_splittable_regs may have created some new registers, so must | |
1152 | reallocate the reg_map with the new larger size, and must realloc | |
1153 | the constant maps also. */ | |
1154 | ||
1155 | maxregnum = max_reg_num (); | |
703ad42b | 1156 | map->reg_map = xmalloc (maxregnum * sizeof (rtx)); |
67f2de41 RK |
1157 | |
1158 | init_reg_map (map, maxregnum); | |
1159 | ||
c68da89c KR |
1160 | if (map->const_equiv_varray == 0) |
1161 | VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, | |
1162 | maxregnum + temp * unroll_number * 2, | |
1163 | "unroll_loop"); | |
1164 | global_const_equiv_varray = map->const_equiv_varray; | |
67f2de41 RK |
1165 | |
1166 | /* Search the list of bivs and givs to find ones which need to be remapped | |
1167 | when split, and set their reg_map entry appropriately. */ | |
1168 | ||
14be28e5 | 1169 | for (bl = ivs->list; bl; bl = bl->next) |
67f2de41 RK |
1170 | { |
1171 | if (REGNO (bl->biv->src_reg) != bl->regno) | |
1172 | map->reg_map[bl->regno] = bl->biv->src_reg; | |
1173 | #if 0 | |
1174 | /* Currently, non-reduced/final-value givs are never split. */ | |
1175 | for (v = bl->giv; v; v = v->next_iv) | |
1176 | if (REGNO (v->src_reg) != bl->regno) | |
1177 | map->reg_map[REGNO (v->dest_reg)] = v->src_reg; | |
1178 | #endif | |
1179 | } | |
1180 | ||
8b4904a3 | 1181 | /* Use our current register alignment and pointer flags. */ |
01d939e8 | 1182 | map->regno_pointer_align = cfun->emit->regno_pointer_align; |
3502dc9c | 1183 | map->x_regno_reg_rtx = cfun->emit->x_regno_reg_rtx; |
8b4904a3 | 1184 | |
67f2de41 RK |
1185 | /* If the loop is being partially unrolled, and the iteration variables |
1186 | are being split, and are being renamed for the split, then must fix up | |
2b59419a | 1187 | the compare/jump instruction at the end of the loop to refer to the new |
67f2de41 RK |
1188 | registers. This compare isn't copied, so the registers used in it |
1189 | will never be replaced if it isn't done here. */ | |
1190 | ||
1191 | if (unroll_type == UNROLL_MODULO) | |
1192 | { | |
1193 | insn = NEXT_INSN (copy_end); | |
4b4bf941 | 1194 | if (NONJUMP_INSN_P (insn) || JUMP_P (insn)) |
ed5bb68d | 1195 | PATTERN (insn) = remap_split_bivs (loop, PATTERN (insn)); |
67f2de41 RK |
1196 | } |
1197 | ||
5353610b | 1198 | /* For unroll_number times, make a copy of each instruction |
67f2de41 RK |
1199 | between copy_start and copy_end, and insert these new instructions |
1200 | before the end of the loop. */ | |
1201 | ||
1202 | for (i = 0; i < unroll_number; i++) | |
1203 | { | |
703ad42b KG |
1204 | memset (map->insn_map, 0, max_insnno * sizeof (rtx)); |
1205 | memset (&VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 0, | |
961192e1 | 1206 | VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data)); |
67f2de41 RK |
1207 | map->const_age = 0; |
1208 | ||
1209 | for (j = 0; j < max_labelno; j++) | |
1210 | if (local_label[j]) | |
1f3d3a31 | 1211 | set_label_in_map (map, j, gen_label_rtx ()); |
67f2de41 | 1212 | |
770ae6cc RK |
1213 | for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++) |
1214 | if (local_regno[r]) | |
9ae8ffe7 | 1215 | { |
770ae6cc RK |
1216 | map->reg_map[r] = gen_reg_rtx (GET_MODE (regno_reg_rtx[r])); |
1217 | record_base_value (REGNO (map->reg_map[r]), | |
1218 | regno_reg_rtx[r], 0); | |
9ae8ffe7 | 1219 | } |
7f5b8ca7 | 1220 | |
67f2de41 RK |
1221 | /* If loop starts with a branch to the test, then fix it so that |
1222 | it points to the test of the first unrolled copy of the loop. */ | |
1223 | if (i == 0 && loop_start != copy_start) | |
1224 | { | |
1225 | insn = PREV_INSN (copy_start); | |
1226 | pattern = PATTERN (insn); | |
a5af23fe | 1227 | |
1f3d3a31 JL |
1228 | tem = get_label_from_map (map, |
1229 | CODE_LABEL_NUMBER | |
1230 | (XEXP (SET_SRC (pattern), 0))); | |
38a448ca | 1231 | SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem); |
67f2de41 RK |
1232 | |
1233 | /* Set the jump label so that it can be used by later loop unrolling | |
1234 | passes. */ | |
1235 | JUMP_LABEL (insn) = tem; | |
1236 | LABEL_NUSES (tem)++; | |
1237 | } | |
1238 | ||
ed5bb68d | 1239 | copy_loop_body (loop, copy_start, copy_end, map, exit_label, |
67f2de41 RK |
1240 | i == unroll_number - 1, unroll_type, start_label, |
1241 | loop_end, insert_before, insert_before); | |
1242 | } | |
1243 | ||
1244 | /* Before deleting any insns, emit a CODE_LABEL immediately after the last | |
1245 | insn to be deleted. This prevents any runaway delete_insn call from | |
1246 | more insns that it should, as it always stops at a CODE_LABEL. */ | |
1247 | ||
1248 | /* Delete the compare and branch at the end of the loop if completely | |
1249 | unrolling the loop. Deleting the backward branch at the end also | |
1250 | deletes the code label at the start of the loop. This is done at | |
1251 | the very end to avoid problems with back_branch_in_range_p. */ | |
1252 | ||
1253 | if (unroll_type == UNROLL_COMPLETELY) | |
1254 | safety_label = emit_label_after (gen_label_rtx (), last_loop_insn); | |
1255 | else | |
1256 | safety_label = emit_label_after (gen_label_rtx (), copy_end); | |
1257 | ||
a5af23fe | 1258 | /* Delete all of the original loop instructions. Don't delete the |
67f2de41 RK |
1259 | LOOP_BEG note, or the first code label in the loop. */ |
1260 | ||
1261 | insn = NEXT_INSN (copy_start); | |
1262 | while (insn != safety_label) | |
1263 | { | |
570621d5 JW |
1264 | /* ??? Don't delete named code labels. They will be deleted when the |
1265 | jump that references them is deleted. Otherwise, we end up deleting | |
1266 | them twice, which causes them to completely disappear instead of turn | |
1267 | into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in | |
1268 | dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files | |
1269 | to handle deleted labels instead. Or perhaps fix DECL_RTL of the | |
1270 | associated LABEL_DECL to point to one of the new label instances. */ | |
1271 | /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */ | |
22f8dffc | 1272 | if (insn != start_label |
4b4bf941 JQ |
1273 | && ! (LABEL_P (insn) && LABEL_NAME (insn)) |
1274 | && ! (NOTE_P (insn) | |
22f8dffc | 1275 | && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)) |
53c17031 | 1276 | insn = delete_related_insns (insn); |
67f2de41 RK |
1277 | else |
1278 | insn = NEXT_INSN (insn); | |
1279 | } | |
1280 | ||
1281 | /* Can now delete the 'safety' label emitted to protect us from runaway | |
53c17031 | 1282 | delete_related_insns calls. */ |
67f2de41 RK |
1283 | if (INSN_DELETED_P (safety_label)) |
1284 | abort (); | |
53c17031 | 1285 | delete_related_insns (safety_label); |
67f2de41 RK |
1286 | |
1287 | /* If exit_label exists, emit it after the loop. Doing the emit here | |
1288 | forces it to have a higher INSN_UID than any insn in the unrolled loop. | |
1289 | This is needed so that mostly_true_jump in reorg.c will treat jumps | |
1290 | to this loop end label correctly, i.e. predict that they are usually | |
1291 | not taken. */ | |
1292 | if (exit_label) | |
1293 | emit_label_after (exit_label, loop_end); | |
c68da89c KR |
1294 | |
1295 | egress: | |
f5da5c87 JH |
1296 | if (unroll_type == UNROLL_COMPLETELY) |
1297 | { | |
1298 | /* Remove the loop notes since this is no longer a loop. */ | |
a2be868f | 1299 | if (loop->vtop) |
53c17031 | 1300 | delete_related_insns (loop->vtop); |
a2be868f | 1301 | if (loop->cont) |
53c17031 | 1302 | delete_related_insns (loop->cont); |
f5da5c87 | 1303 | if (loop_start) |
53c17031 | 1304 | delete_related_insns (loop_start); |
f5da5c87 | 1305 | if (loop_end) |
53c17031 | 1306 | delete_related_insns (loop_end); |
f5da5c87 JH |
1307 | } |
1308 | ||
67289ea6 | 1309 | if (map->const_equiv_varray) |
c68da89c | 1310 | VARRAY_FREE (map->const_equiv_varray); |
67289ea6 MM |
1311 | if (map->label_map) |
1312 | { | |
1313 | free (map->label_map); | |
1314 | free (local_label); | |
1315 | } | |
1316 | free (map->insn_map); | |
1317 | free (splittable_regs); | |
67289ea6 MM |
1318 | free (splittable_regs_updates); |
1319 | free (addr_combined_regs); | |
1320 | free (local_regno); | |
1321 | if (map->reg_map) | |
1322 | free (map->reg_map); | |
1323 | free (map); | |
67f2de41 | 1324 | } |
5d0f3df7 | 1325 | |
2e1eedd6 | 1326 | /* A helper function for unroll_loop. Emit a compare and branch to |
5d0f3df7 RH |
1327 | satisfy (CMP OP1 OP2), but pass this through the simplifier first. |
1328 | If the branch turned out to be conditional, return it, otherwise | |
1329 | return NULL. */ | |
1330 | ||
1331 | static rtx | |
2e1eedd6 AJ |
1332 | simplify_cmp_and_jump_insns (enum rtx_code code, enum machine_mode mode, |
1333 | rtx op0, rtx op1, rtx label) | |
5d0f3df7 RH |
1334 | { |
1335 | rtx t, insn; | |
1336 | ||
7ce3e360 | 1337 | t = simplify_const_relational_operation (code, mode, op0, op1); |
5d0f3df7 RH |
1338 | if (!t) |
1339 | { | |
1340 | enum rtx_code scode = signed_condition (code); | |
1341 | emit_cmp_and_jump_insns (op0, op1, scode, NULL_RTX, mode, | |
1342 | code != scode, label); | |
1343 | insn = get_last_insn (); | |
1344 | ||
1345 | JUMP_LABEL (insn) = label; | |
1346 | LABEL_NUSES (label) += 1; | |
1347 | ||
1348 | return insn; | |
1349 | } | |
1350 | else if (t == const_true_rtx) | |
1351 | { | |
1352 | insn = emit_jump_insn (gen_jump (label)); | |
1353 | emit_barrier (); | |
1354 | JUMP_LABEL (insn) = label; | |
1355 | LABEL_NUSES (label) += 1; | |
1356 | } | |
1357 | ||
1358 | return NULL_RTX; | |
1359 | } | |
67f2de41 RK |
1360 | \f |
1361 | /* Return true if the loop can be safely, and profitably, preconditioned | |
1362 | so that the unrolled copies of the loop body don't need exit tests. | |
1363 | ||
1364 | This only works if final_value, initial_value and increment can be | |
1365 | determined, and if increment is a constant power of 2. | |
1366 | If increment is not a power of 2, then the preconditioning modulo | |
1367 | operation would require a real modulo instead of a boolean AND, and this | |
1368 | is not considered `profitable'. */ | |
1369 | ||
1370 | /* ??? If the loop is known to be executed very many times, or the machine | |
1371 | has a very cheap divide instruction, then preconditioning is a win even | |
1372 | when the increment is not a power of 2. Use RTX_COST to compute | |
5353610b R |
1373 | whether divide is cheap. |
1374 | ??? A divide by constant doesn't actually need a divide, look at | |
1375 | expand_divmod. The reduced cost of this optimized modulo is not | |
1376 | reflected in RTX_COST. */ | |
67f2de41 | 1377 | |
302670f3 | 1378 | int |
2e1eedd6 AJ |
1379 | precondition_loop_p (const struct loop *loop, rtx *initial_value, |
1380 | rtx *final_value, rtx *increment, | |
1381 | enum machine_mode *mode) | |
67f2de41 | 1382 | { |
0534b804 MH |
1383 | rtx loop_start = loop->start; |
1384 | struct loop_info *loop_info = LOOP_INFO (loop); | |
67f2de41 | 1385 | |
302670f3 | 1386 | if (loop_info->n_iterations > 0) |
67f2de41 | 1387 | { |
04894c5a DJ |
1388 | if (INTVAL (loop_info->increment) > 0) |
1389 | { | |
1390 | *initial_value = const0_rtx; | |
1391 | *increment = const1_rtx; | |
1392 | *final_value = GEN_INT (loop_info->n_iterations); | |
1393 | } | |
1394 | else | |
1395 | { | |
1396 | *initial_value = GEN_INT (loop_info->n_iterations); | |
1397 | *increment = constm1_rtx; | |
1398 | *final_value = const0_rtx; | |
1399 | } | |
e96b4d7a | 1400 | *mode = word_mode; |
67f2de41 RK |
1401 | |
1402 | if (loop_dump_stream) | |
90ff44cf KG |
1403 | fprintf (loop_dump_stream, |
1404 | "Preconditioning: Success, number of iterations known, " | |
1405 | HOST_WIDE_INT_PRINT_DEC ".\n", | |
1406 | loop_info->n_iterations); | |
67f2de41 RK |
1407 | return 1; |
1408 | } | |
1409 | ||
c55fa4d6 RH |
1410 | if (loop_info->iteration_var == 0) |
1411 | { | |
1412 | if (loop_dump_stream) | |
1413 | fprintf (loop_dump_stream, | |
1414 | "Preconditioning: Could not find iteration variable.\n"); | |
1415 | return 0; | |
1416 | } | |
1417 | else if (loop_info->initial_value == 0) | |
67f2de41 RK |
1418 | { |
1419 | if (loop_dump_stream) | |
1420 | fprintf (loop_dump_stream, | |
1421 | "Preconditioning: Could not find initial value.\n"); | |
1422 | return 0; | |
1423 | } | |
302670f3 | 1424 | else if (loop_info->increment == 0) |
67f2de41 RK |
1425 | { |
1426 | if (loop_dump_stream) | |
1427 | fprintf (loop_dump_stream, | |
1428 | "Preconditioning: Could not find increment value.\n"); | |
1429 | return 0; | |
1430 | } | |
302670f3 | 1431 | else if (GET_CODE (loop_info->increment) != CONST_INT) |
67f2de41 RK |
1432 | { |
1433 | if (loop_dump_stream) | |
1434 | fprintf (loop_dump_stream, | |
1435 | "Preconditioning: Increment not a constant.\n"); | |
1436 | return 0; | |
1437 | } | |
302670f3 | 1438 | else if ((exact_log2 (INTVAL (loop_info->increment)) < 0) |
a86dc4a3 | 1439 | && (exact_log2 (-INTVAL (loop_info->increment)) < 0)) |
67f2de41 RK |
1440 | { |
1441 | if (loop_dump_stream) | |
1442 | fprintf (loop_dump_stream, | |
1443 | "Preconditioning: Increment not a constant power of 2.\n"); | |
1444 | return 0; | |
1445 | } | |
1446 | ||
1447 | /* Unsigned_compare and compare_dir can be ignored here, since they do | |
1448 | not matter for preconditioning. */ | |
1449 | ||
302670f3 | 1450 | if (loop_info->final_value == 0) |
67f2de41 RK |
1451 | { |
1452 | if (loop_dump_stream) | |
1453 | fprintf (loop_dump_stream, | |
1454 | "Preconditioning: EQ comparison loop.\n"); | |
1455 | return 0; | |
1456 | } | |
1457 | ||
0534b804 MH |
1458 | /* Must ensure that final_value is invariant, so call |
1459 | loop_invariant_p to check. Before doing so, must check regno | |
1460 | against max_reg_before_loop to make sure that the register is in | |
1461 | the range covered by loop_invariant_p. If it isn't, then it is | |
1462 | most likely a biv/giv which by definition are not invariant. */ | |
f8cfc6aa | 1463 | if ((REG_P (loop_info->final_value) |
302670f3 MH |
1464 | && REGNO (loop_info->final_value) >= max_reg_before_loop) |
1465 | || (GET_CODE (loop_info->final_value) == PLUS | |
1466 | && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop) | |
0534b804 | 1467 | || ! loop_invariant_p (loop, loop_info->final_value)) |
67f2de41 RK |
1468 | { |
1469 | if (loop_dump_stream) | |
1470 | fprintf (loop_dump_stream, | |
1471 | "Preconditioning: Final value not invariant.\n"); | |
1472 | return 0; | |
1473 | } | |
1474 | ||
1475 | /* Fail for floating point values, since the caller of this function | |
1476 | does not have code to deal with them. */ | |
302670f3 MH |
1477 | if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT |
1478 | || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT) | |
67f2de41 RK |
1479 | { |
1480 | if (loop_dump_stream) | |
1481 | fprintf (loop_dump_stream, | |
1482 | "Preconditioning: Floating point final or initial value.\n"); | |
1483 | return 0; | |
1484 | } | |
1485 | ||
302670f3 MH |
1486 | /* Fail if loop_info->iteration_var is not live before loop_start, |
1487 | since we need to test its value in the preconditioning code. */ | |
67f2de41 | 1488 | |
8529a489 | 1489 | if (REGNO_FIRST_LUID (REGNO (loop_info->iteration_var)) |
67f2de41 RK |
1490 | > INSN_LUID (loop_start)) |
1491 | { | |
1492 | if (loop_dump_stream) | |
1493 | fprintf (loop_dump_stream, | |
1494 | "Preconditioning: Iteration var not live before loop start.\n"); | |
1495 | return 0; | |
1496 | } | |
1497 | ||
0a5b41f2 | 1498 | /* Note that loop_iterations biases the initial value for GIV iterators |
c99f8c2a R |
1499 | such as "while (i-- > 0)" so that we can calculate the number of |
1500 | iterations just like for BIV iterators. | |
a5af23fe | 1501 | |
a7060368 MH |
1502 | Also note that the absolute values of initial_value and |
1503 | final_value are unimportant as only their difference is used for | |
1504 | calculating the number of loop iterations. */ | |
302670f3 MH |
1505 | *initial_value = loop_info->initial_value; |
1506 | *increment = loop_info->increment; | |
1507 | *final_value = loop_info->final_value; | |
67f2de41 | 1508 | |
e96b4d7a MH |
1509 | /* Decide what mode to do these calculations in. Choose the larger |
1510 | of final_value's mode and initial_value's mode, or a full-word if | |
1511 | both are constants. */ | |
1512 | *mode = GET_MODE (*final_value); | |
1513 | if (*mode == VOIDmode) | |
1514 | { | |
1515 | *mode = GET_MODE (*initial_value); | |
1516 | if (*mode == VOIDmode) | |
1517 | *mode = word_mode; | |
1518 | } | |
1519 | else if (*mode != GET_MODE (*initial_value) | |
1520 | && (GET_MODE_SIZE (*mode) | |
1521 | < GET_MODE_SIZE (GET_MODE (*initial_value)))) | |
1522 | *mode = GET_MODE (*initial_value); | |
1523 | ||
a2be868f | 1524 | /* Success! */ |
67f2de41 RK |
1525 | if (loop_dump_stream) |
1526 | fprintf (loop_dump_stream, "Preconditioning: Successful.\n"); | |
1527 | return 1; | |
1528 | } | |
1529 | ||
67f2de41 RK |
1530 | /* All pseudo-registers must be mapped to themselves. Two hard registers |
1531 | must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_ | |
1532 | REGNUM, to avoid function-inlining specific conversions of these | |
1533 | registers. All other hard regs can not be mapped because they may be | |
1534 | used with different | |
1535 | modes. */ | |
1536 | ||
1537 | static void | |
2e1eedd6 | 1538 | init_reg_map (struct inline_remap *map, int maxregnum) |
67f2de41 RK |
1539 | { |
1540 | int i; | |
1541 | ||
1542 | for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--) | |
1543 | map->reg_map[i] = regno_reg_rtx[i]; | |
1544 | /* Just clear the rest of the entries. */ | |
1545 | for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--) | |
1546 | map->reg_map[i] = 0; | |
1547 | ||
1548 | map->reg_map[VIRTUAL_STACK_VARS_REGNUM] | |
1549 | = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM]; | |
1550 | map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM] | |
1551 | = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM]; | |
1552 | } | |
1553 | \f | |
1554 | /* Strength-reduction will often emit code for optimized biv/givs which | |
1555 | calculates their value in a temporary register, and then copies the result | |
1556 | to the iv. This procedure reconstructs the pattern computing the iv; | |
1557 | verifying that all operands are of the proper form. | |
1558 | ||
e5e809f4 | 1559 | PATTERN must be the result of single_set. |
67f2de41 RK |
1560 | The return value is the amount that the giv is incremented by. */ |
1561 | ||
1562 | static rtx | |
2e1eedd6 | 1563 | calculate_giv_inc (rtx pattern, rtx src_insn, unsigned int regno) |
67f2de41 RK |
1564 | { |
1565 | rtx increment; | |
6511b4f8 RS |
1566 | rtx increment_total = 0; |
1567 | int tries = 0; | |
67f2de41 | 1568 | |
6511b4f8 | 1569 | retry: |
67f2de41 RK |
1570 | /* Verify that we have an increment insn here. First check for a plus |
1571 | as the set source. */ | |
1572 | if (GET_CODE (SET_SRC (pattern)) != PLUS) | |
1573 | { | |
1574 | /* SR sometimes computes the new giv value in a temp, then copies it | |
1575 | to the new_reg. */ | |
1576 | src_insn = PREV_INSN (src_insn); | |
27d80140 | 1577 | pattern = single_set (src_insn); |
67f2de41 RK |
1578 | if (GET_CODE (SET_SRC (pattern)) != PLUS) |
1579 | abort (); | |
a5af23fe | 1580 | |
67f2de41 RK |
1581 | /* The last insn emitted is not needed, so delete it to avoid confusing |
1582 | the second cse pass. This insn sets the giv unnecessarily. */ | |
53c17031 | 1583 | delete_related_insns (get_last_insn ()); |
67f2de41 RK |
1584 | } |
1585 | ||
1586 | /* Verify that we have a constant as the second operand of the plus. */ | |
1587 | increment = XEXP (SET_SRC (pattern), 1); | |
1588 | if (GET_CODE (increment) != CONST_INT) | |
1589 | { | |
1590 | /* SR sometimes puts the constant in a register, especially if it is | |
1591 | too big to be an add immed operand. */ | |
27d80140 | 1592 | increment = find_last_value (increment, &src_insn, NULL_RTX, 0); |
67f2de41 RK |
1593 | |
1594 | /* SR may have used LO_SUM to compute the constant if it is too large | |
1595 | for a load immed operand. In this case, the constant is in operand | |
1596 | one of the LO_SUM rtx. */ | |
1597 | if (GET_CODE (increment) == LO_SUM) | |
1598 | increment = XEXP (increment, 1); | |
efb84aa5 GK |
1599 | |
1600 | /* Some ports store large constants in memory and add a REG_EQUAL | |
1601 | note to the store insn. */ | |
3c0cb5de | 1602 | else if (MEM_P (increment)) |
a5af23fe R |
1603 | { |
1604 | rtx note = find_reg_note (src_insn, REG_EQUAL, 0); | |
1605 | if (note) | |
1606 | increment = XEXP (note, 0); | |
1607 | } | |
efb84aa5 | 1608 | |
92065e1f | 1609 | else if (GET_CODE (increment) == IOR |
ed97aa66 | 1610 | || GET_CODE (increment) == PLUS |
2599dcc7 | 1611 | || GET_CODE (increment) == ASHIFT |
ed97aa66 | 1612 | || GET_CODE (increment) == LSHIFTRT) |
8700c494 | 1613 | { |
92065e1f | 1614 | /* The rs6000 port loads some constants with IOR. |
ed97aa66 EB |
1615 | The alpha port loads some constants with ASHIFT and PLUS. |
1616 | The sparc64 port loads some constants with LSHIFTRT. */ | |
8700c494 | 1617 | rtx second_part = XEXP (increment, 1); |
92065e1f | 1618 | enum rtx_code code = GET_CODE (increment); |
8700c494 | 1619 | |
41077ce4 | 1620 | increment = find_last_value (XEXP (increment, 0), |
27d80140 | 1621 | &src_insn, NULL_RTX, 0); |
8700c494 | 1622 | /* Don't need the last insn anymore. */ |
53c17031 | 1623 | delete_related_insns (get_last_insn ()); |
8700c494 JW |
1624 | |
1625 | if (GET_CODE (second_part) != CONST_INT | |
1626 | || GET_CODE (increment) != CONST_INT) | |
1627 | abort (); | |
1628 | ||
92065e1f RK |
1629 | if (code == IOR) |
1630 | increment = GEN_INT (INTVAL (increment) | INTVAL (second_part)); | |
2599dcc7 JW |
1631 | else if (code == PLUS) |
1632 | increment = GEN_INT (INTVAL (increment) + INTVAL (second_part)); | |
ed97aa66 | 1633 | else if (code == ASHIFT) |
92065e1f | 1634 | increment = GEN_INT (INTVAL (increment) << INTVAL (second_part)); |
ed97aa66 EB |
1635 | else |
1636 | increment = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (increment) >> INTVAL (second_part)); | |
8700c494 | 1637 | } |
67f2de41 RK |
1638 | |
1639 | if (GET_CODE (increment) != CONST_INT) | |
1640 | abort (); | |
a5af23fe | 1641 | |
8700c494 | 1642 | /* The insn loading the constant into a register is no longer needed, |
67f2de41 | 1643 | so delete it. */ |
53c17031 | 1644 | delete_related_insns (get_last_insn ()); |
67f2de41 RK |
1645 | } |
1646 | ||
6511b4f8 RS |
1647 | if (increment_total) |
1648 | increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment)); | |
1649 | else | |
1650 | increment_total = increment; | |
1651 | ||
1652 | /* Check that the source register is the same as the register we expected | |
1653 | to see as the source. If not, something is seriously wrong. */ | |
f8cfc6aa | 1654 | if (!REG_P (XEXP (SET_SRC (pattern), 0)) |
67f2de41 | 1655 | || REGNO (XEXP (SET_SRC (pattern), 0)) != regno) |
6511b4f8 RS |
1656 | { |
1657 | /* Some machines (e.g. the romp), may emit two add instructions for | |
1658 | certain constants, so lets try looking for another add immediately | |
1659 | before this one if we have only seen one add insn so far. */ | |
1660 | ||
1661 | if (tries == 0) | |
1662 | { | |
1663 | tries++; | |
1664 | ||
1665 | src_insn = PREV_INSN (src_insn); | |
27d80140 | 1666 | pattern = single_set (src_insn); |
6511b4f8 | 1667 | |
53c17031 | 1668 | delete_related_insns (get_last_insn ()); |
6511b4f8 RS |
1669 | |
1670 | goto retry; | |
1671 | } | |
1672 | ||
1673 | abort (); | |
1674 | } | |
67f2de41 | 1675 | |
6511b4f8 | 1676 | return increment_total; |
67f2de41 RK |
1677 | } |
1678 | ||
6bd43524 RS |
1679 | /* Copy REG_NOTES, except for insn references, because not all insn_map |
1680 | entries are valid yet. We do need to copy registers now though, because | |
1681 | the reg_map entries can change during copying. */ | |
1682 | ||
1683 | static rtx | |
2e1eedd6 | 1684 | initial_reg_note_copy (rtx notes, struct inline_remap *map) |
6bd43524 RS |
1685 | { |
1686 | rtx copy; | |
1687 | ||
1688 | if (notes == 0) | |
1689 | return 0; | |
1690 | ||
1691 | copy = rtx_alloc (GET_CODE (notes)); | |
b59a386b | 1692 | PUT_REG_NOTE_KIND (copy, REG_NOTE_KIND (notes)); |
6bd43524 RS |
1693 | |
1694 | if (GET_CODE (notes) == EXPR_LIST) | |
14a774a9 | 1695 | XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map, 0); |
6bd43524 RS |
1696 | else if (GET_CODE (notes) == INSN_LIST) |
1697 | /* Don't substitute for these yet. */ | |
e8c8470b | 1698 | XEXP (copy, 0) = copy_rtx (XEXP (notes, 0)); |
6bd43524 RS |
1699 | else |
1700 | abort (); | |
1701 | ||
1702 | XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map); | |
1703 | ||
1704 | return copy; | |
1705 | } | |
1706 | ||
1707 | /* Fixup insn references in copied REG_NOTES. */ | |
1708 | ||
1709 | static void | |
2e1eedd6 | 1710 | final_reg_note_copy (rtx *notesp, struct inline_remap *map) |
6bd43524 | 1711 | { |
b59a386b MM |
1712 | while (*notesp) |
1713 | { | |
1714 | rtx note = *notesp; | |
41077ce4 | 1715 | |
b59a386b MM |
1716 | if (GET_CODE (note) == INSN_LIST) |
1717 | { | |
6001794d KH |
1718 | rtx insn = map->insn_map[INSN_UID (XEXP (note, 0))]; |
1719 | ||
1720 | /* If we failed to remap the note, something is awry. | |
1721 | Allow REG_LABEL as it may reference label outside | |
1722 | the unrolled loop. */ | |
1723 | if (!insn) | |
b59a386b | 1724 | { |
6001794d KH |
1725 | if (REG_NOTE_KIND (note) != REG_LABEL) |
1726 | abort (); | |
b59a386b MM |
1727 | } |
1728 | else | |
6001794d | 1729 | XEXP (note, 0) = insn; |
b59a386b | 1730 | } |
6bd43524 | 1731 | |
b59a386b MM |
1732 | notesp = &XEXP (note, 1); |
1733 | } | |
6bd43524 | 1734 | } |
67f2de41 RK |
1735 | |
1736 | /* Copy each instruction in the loop, substituting from map as appropriate. | |
1737 | This is very similar to a loop in expand_inline_function. */ | |
a5af23fe | 1738 | |
67f2de41 | 1739 | static void |
2e1eedd6 AJ |
1740 | copy_loop_body (struct loop *loop, rtx copy_start, rtx copy_end, |
1741 | struct inline_remap *map, rtx exit_label, | |
1742 | int last_iteration, enum unroll_types unroll_type, | |
1743 | rtx start_label, rtx loop_end, rtx insert_before, | |
1744 | rtx copy_notes_from) | |
67f2de41 | 1745 | { |
ed5bb68d | 1746 | struct loop_ivs *ivs = LOOP_IVS (loop); |
67f2de41 | 1747 | rtx insn, pattern; |
a544cfd2 | 1748 | rtx set, tem, copy = NULL_RTX; |
67f2de41 | 1749 | int dest_reg_was_split, i; |
51723711 | 1750 | #ifdef HAVE_cc0 |
67f2de41 | 1751 | rtx cc0_insn = 0; |
51723711 | 1752 | #endif |
67f2de41 RK |
1753 | rtx final_label = 0; |
1754 | rtx giv_inc, giv_dest_reg, giv_src_reg; | |
1755 | ||
1756 | /* If this isn't the last iteration, then map any references to the | |
1757 | start_label to final_label. Final label will then be emitted immediately | |
1758 | after the end of this loop body if it was ever used. | |
1759 | ||
1760 | If this is the last iteration, then map references to the start_label | |
1761 | to itself. */ | |
1762 | if (! last_iteration) | |
1763 | { | |
1764 | final_label = gen_label_rtx (); | |
a86dc4a3 | 1765 | set_label_in_map (map, CODE_LABEL_NUMBER (start_label), final_label); |
67f2de41 RK |
1766 | } |
1767 | else | |
1f3d3a31 | 1768 | set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label); |
67f2de41 RK |
1769 | |
1770 | start_sequence (); | |
a5af23fe | 1771 | |
67f2de41 RK |
1772 | insn = copy_start; |
1773 | do | |
1774 | { | |
1775 | insn = NEXT_INSN (insn); | |
a5af23fe | 1776 | |
67f2de41 | 1777 | map->orig_asm_operands_vector = 0; |
a5af23fe | 1778 | |
67f2de41 RK |
1779 | switch (GET_CODE (insn)) |
1780 | { | |
1781 | case INSN: | |
1782 | pattern = PATTERN (insn); | |
1783 | copy = 0; | |
1784 | giv_inc = 0; | |
a5af23fe | 1785 | |
67f2de41 | 1786 | /* Check to see if this is a giv that has been combined with |
a5af23fe | 1787 | some split address givs. (Combined in the sense that |
67f2de41 RK |
1788 | `combine_givs' in loop.c has put two givs in the same register.) |
1789 | In this case, we must search all givs based on the same biv to | |
1790 | find the address givs. Then split the address givs. | |
1791 | Do this before splitting the giv, since that may map the | |
1792 | SET_DEST to a new register. */ | |
a5af23fe | 1793 | |
e5e809f4 | 1794 | if ((set = single_set (insn)) |
f8cfc6aa | 1795 | && REG_P (SET_DEST (set)) |
e5e809f4 | 1796 | && addr_combined_regs[REGNO (SET_DEST (set))]) |
67f2de41 RK |
1797 | { |
1798 | struct iv_class *bl; | |
1799 | struct induction *v, *tv; | |
770ae6cc | 1800 | unsigned int regno = REGNO (SET_DEST (set)); |
a5af23fe | 1801 | |
e5e809f4 | 1802 | v = addr_combined_regs[REGNO (SET_DEST (set))]; |
8b634749 | 1803 | bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); |
a5af23fe | 1804 | |
67f2de41 RK |
1805 | /* Although the giv_inc amount is not needed here, we must call |
1806 | calculate_giv_inc here since it might try to delete the | |
1807 | last insn emitted. If we wait until later to call it, | |
1808 | we might accidentally delete insns generated immediately | |
1809 | below by emit_unrolled_add. */ | |
1810 | ||
b4f75276 | 1811 | giv_inc = calculate_giv_inc (set, insn, regno); |
67f2de41 RK |
1812 | |
1813 | /* Now find all address giv's that were combined with this | |
1814 | giv 'v'. */ | |
1815 | for (tv = bl->giv; tv; tv = tv->next_iv) | |
1816 | if (tv->giv_type == DEST_ADDR && tv->same == v) | |
1817 | { | |
7085bad3 JW |
1818 | int this_giv_inc; |
1819 | ||
1820 | /* If this DEST_ADDR giv was not split, then ignore it. */ | |
1821 | if (*tv->location != tv->dest_reg) | |
1822 | continue; | |
2b4bd1bc JW |
1823 | |
1824 | /* Scale this_giv_inc if the multiplicative factors of | |
1825 | the two givs are different. */ | |
7085bad3 | 1826 | this_giv_inc = INTVAL (giv_inc); |
2b4bd1bc JW |
1827 | if (tv->mult_val != v->mult_val) |
1828 | this_giv_inc = (this_giv_inc / INTVAL (v->mult_val) | |
1829 | * INTVAL (tv->mult_val)); | |
a5af23fe | 1830 | |
2b4bd1bc | 1831 | tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc); |
67f2de41 | 1832 | *tv->location = tv->dest_reg; |
a5af23fe | 1833 | |
67f2de41 RK |
1834 | if (last_iteration && unroll_type != UNROLL_COMPLETELY) |
1835 | { | |
1836 | /* Must emit an insn to increment the split address | |
1837 | giv. Add in the const_adjust field in case there | |
1838 | was a constant eliminated from the address. */ | |
1839 | rtx value, dest_reg; | |
a5af23fe | 1840 | |
67f2de41 RK |
1841 | /* tv->dest_reg will be either a bare register, |
1842 | or else a register plus a constant. */ | |
f8cfc6aa | 1843 | if (REG_P (tv->dest_reg)) |
67f2de41 RK |
1844 | dest_reg = tv->dest_reg; |
1845 | else | |
1846 | dest_reg = XEXP (tv->dest_reg, 0); | |
a5af23fe | 1847 | |
8d092274 | 1848 | /* Check for shared address givs, and avoid |
9faa82d8 | 1849 | incrementing the shared pseudo reg more than |
8d092274 | 1850 | once. */ |
9ae8ffe7 | 1851 | if (! tv->same_insn && ! tv->shared) |
8d092274 JW |
1852 | { |
1853 | /* tv->dest_reg may actually be a (PLUS (REG) | |
1854 | (CONST)) here, so we must call plus_constant | |
1855 | to add the const_adjust amount before calling | |
1856 | emit_unrolled_add below. */ | |
1857 | value = plus_constant (tv->dest_reg, | |
1858 | tv->const_adjust); | |
1859 | ||
3f0aabf2 R |
1860 | if (GET_CODE (value) == PLUS) |
1861 | { | |
1862 | /* The constant could be too large for an add | |
1863 | immediate, so can't directly emit an insn | |
1864 | here. */ | |
1865 | emit_unrolled_add (dest_reg, XEXP (value, 0), | |
1866 | XEXP (value, 1)); | |
1867 | } | |
8d092274 | 1868 | } |
a5af23fe | 1869 | |
67f2de41 | 1870 | /* Reset the giv to be just the register again, in case |
412dc348 RK |
1871 | it is used after the set we have just emitted. |
1872 | We must subtract the const_adjust factor added in | |
1873 | above. */ | |
1874 | tv->dest_reg = plus_constant (dest_reg, | |
a86dc4a3 | 1875 | -tv->const_adjust); |
67f2de41 RK |
1876 | *tv->location = tv->dest_reg; |
1877 | } | |
1878 | } | |
1879 | } | |
a5af23fe | 1880 | |
67f2de41 RK |
1881 | /* If this is a setting of a splittable variable, then determine |
1882 | how to split the variable, create a new set based on this split, | |
1883 | and set up the reg_map so that later uses of the variable will | |
1884 | use the new split variable. */ | |
a5af23fe | 1885 | |
67f2de41 | 1886 | dest_reg_was_split = 0; |
a5af23fe | 1887 | |
e5e809f4 | 1888 | if ((set = single_set (insn)) |
f8cfc6aa | 1889 | && REG_P (SET_DEST (set)) |
e5e809f4 | 1890 | && splittable_regs[REGNO (SET_DEST (set))]) |
67f2de41 | 1891 | { |
770ae6cc RK |
1892 | unsigned int regno = REGNO (SET_DEST (set)); |
1893 | unsigned int src_regno; | |
a5af23fe | 1894 | |
67f2de41 | 1895 | dest_reg_was_split = 1; |
a5af23fe | 1896 | |
e5e809f4 | 1897 | giv_dest_reg = SET_DEST (set); |
b4f75276 BS |
1898 | giv_src_reg = giv_dest_reg; |
1899 | /* Compute the increment value for the giv, if it wasn't | |
1900 | already computed above. */ | |
1901 | if (giv_inc == 0) | |
1902 | giv_inc = calculate_giv_inc (set, insn, regno); | |
1903 | ||
3ec2b590 | 1904 | src_regno = REGNO (giv_src_reg); |
67f2de41 RK |
1905 | |
1906 | if (unroll_type == UNROLL_COMPLETELY) | |
1907 | { | |
1908 | /* Completely unrolling the loop. Set the induction | |
1909 | variable to a known constant value. */ | |
a5af23fe | 1910 | |
67f2de41 RK |
1911 | /* The value in splittable_regs may be an invariant |
1912 | value, so we must use plus_constant here. */ | |
1913 | splittable_regs[regno] | |
3ec2b590 R |
1914 | = plus_constant (splittable_regs[src_regno], |
1915 | INTVAL (giv_inc)); | |
67f2de41 RK |
1916 | |
1917 | if (GET_CODE (splittable_regs[regno]) == PLUS) | |
1918 | { | |
1919 | giv_src_reg = XEXP (splittable_regs[regno], 0); | |
1920 | giv_inc = XEXP (splittable_regs[regno], 1); | |
1921 | } | |
1922 | else | |
1923 | { | |
1924 | /* The splittable_regs value must be a REG or a | |
1925 | CONST_INT, so put the entire value in the giv_src_reg | |
1926 | variable. */ | |
1927 | giv_src_reg = splittable_regs[regno]; | |
1928 | giv_inc = const0_rtx; | |
1929 | } | |
1930 | } | |
1931 | else | |
1932 | { | |
1933 | /* Partially unrolling loop. Create a new pseudo | |
1934 | register for the iteration variable, and set it to | |
1935 | be a constant plus the original register. Except | |
1936 | on the last iteration, when the result has to | |
1937 | go back into the original iteration var register. */ | |
a5af23fe | 1938 | |
67f2de41 RK |
1939 | /* Handle bivs which must be mapped to a new register |
1940 | when split. This happens for bivs which need their | |
1941 | final value set before loop entry. The new register | |
1942 | for the biv was stored in the biv's first struct | |
1943 | induction entry by find_splittable_regs. */ | |
1944 | ||
86fee241 | 1945 | if (regno < ivs->n_regs |
ed5bb68d | 1946 | && REG_IV_TYPE (ivs, regno) == BASIC_INDUCT) |
67f2de41 | 1947 | { |
8b634749 | 1948 | giv_src_reg = REG_IV_CLASS (ivs, regno)->biv->src_reg; |
67f2de41 RK |
1949 | giv_dest_reg = giv_src_reg; |
1950 | } | |
a5af23fe | 1951 | |
67f2de41 RK |
1952 | #if 0 |
1953 | /* If non-reduced/final-value givs were split, then | |
1954 | this would have to remap those givs also. See | |
1955 | find_splittable_regs. */ | |
1956 | #endif | |
a5af23fe | 1957 | |
67f2de41 | 1958 | splittable_regs[regno] |
5b1a6de4 GK |
1959 | = simplify_gen_binary (PLUS, GET_MODE (giv_src_reg), |
1960 | giv_inc, | |
1961 | splittable_regs[src_regno]); | |
67f2de41 | 1962 | giv_inc = splittable_regs[regno]; |
a5af23fe | 1963 | |
67f2de41 RK |
1964 | /* Now split the induction variable by changing the dest |
1965 | of this insn to a new register, and setting its | |
1966 | reg_map entry to point to this new register. | |
1967 | ||
1968 | If this is the last iteration, and this is the last insn | |
1969 | that will update the iv, then reuse the original dest, | |
1970 | to ensure that the iv will have the proper value when | |
1971 | the loop exits or repeats. | |
1972 | ||
1973 | Using splittable_regs_updates here like this is safe, | |
1974 | because it can only be greater than one if all | |
1975 | instructions modifying the iv are always executed in | |
1976 | order. */ | |
1977 | ||
1978 | if (! last_iteration | |
1979 | || (splittable_regs_updates[regno]-- != 1)) | |
1980 | { | |
1981 | tem = gen_reg_rtx (GET_MODE (giv_src_reg)); | |
1982 | giv_dest_reg = tem; | |
1983 | map->reg_map[regno] = tem; | |
de12be17 JC |
1984 | record_base_value (REGNO (tem), |
1985 | giv_inc == const0_rtx | |
1986 | ? giv_src_reg | |
1987 | : gen_rtx_PLUS (GET_MODE (giv_src_reg), | |
1988 | giv_src_reg, giv_inc), | |
1989 | 1); | |
67f2de41 RK |
1990 | } |
1991 | else | |
1992 | map->reg_map[regno] = giv_src_reg; | |
1993 | } | |
1994 | ||
1995 | /* The constant being added could be too large for an add | |
1996 | immediate, so can't directly emit an insn here. */ | |
1997 | emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc); | |
1998 | copy = get_last_insn (); | |
1999 | pattern = PATTERN (copy); | |
2000 | } | |
2001 | else | |
2002 | { | |
14a774a9 | 2003 | pattern = copy_rtx_and_substitute (pattern, map, 0); |
67f2de41 RK |
2004 | copy = emit_insn (pattern); |
2005 | } | |
6bd43524 | 2006 | REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); |
0435312e | 2007 | INSN_LOCATOR (copy) = INSN_LOCATOR (insn); |
a5af23fe | 2008 | |
f7a21c70 DE |
2009 | /* If there is a REG_EQUAL note present whose value |
2010 | is not loop invariant, then delete it, since it | |
2011 | may cause problems with later optimization passes. */ | |
2012 | if ((tem = find_reg_note (copy, REG_EQUAL, NULL_RTX)) | |
2013 | && !loop_invariant_p (loop, XEXP (tem, 0))) | |
2014 | remove_note (copy, tem); | |
2015 | ||
67f2de41 RK |
2016 | #ifdef HAVE_cc0 |
2017 | /* If this insn is setting CC0, it may need to look at | |
2018 | the insn that uses CC0 to see what type of insn it is. | |
2019 | In that case, the call to recog via validate_change will | |
2020 | fail. So don't substitute constants here. Instead, | |
2021 | do it when we emit the following insn. | |
2022 | ||
2023 | For example, see the pyr.md file. That machine has signed and | |
2024 | unsigned compares. The compare patterns must check the | |
2025 | following branch insn to see which what kind of compare to | |
2026 | emit. | |
2027 | ||
2028 | If the previous insn set CC0, substitute constants on it as | |
2029 | well. */ | |
e65886db | 2030 | if (sets_cc0_p (PATTERN (copy)) != 0) |
67f2de41 RK |
2031 | cc0_insn = copy; |
2032 | else | |
2033 | { | |
2034 | if (cc0_insn) | |
2035 | try_constants (cc0_insn, map); | |
2036 | cc0_insn = 0; | |
2037 | try_constants (copy, map); | |
2038 | } | |
2039 | #else | |
2040 | try_constants (copy, map); | |
2041 | #endif | |
2042 | ||
2043 | /* Make split induction variable constants `permanent' since we | |
2044 | know there are no backward branches across iteration variable | |
2045 | settings which would invalidate this. */ | |
0bae0184 | 2046 | if (dest_reg_was_split) |
67f2de41 | 2047 | { |
65c8a03d | 2048 | int regno = REGNO (SET_DEST (set)); |
67f2de41 | 2049 | |
6a651371 | 2050 | if ((size_t) regno < VARRAY_SIZE (map->const_equiv_varray) |
c68da89c KR |
2051 | && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age |
2052 | == map->const_age)) | |
2053 | VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1; | |
67f2de41 RK |
2054 | } |
2055 | break; | |
a5af23fe | 2056 | |
67f2de41 | 2057 | case JUMP_INSN: |
14a774a9 | 2058 | pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); |
483404b6 | 2059 | copy = emit_jump_insn (pattern); |
6bd43524 | 2060 | REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); |
0435312e | 2061 | INSN_LOCATOR (copy) = INSN_LOCATOR (insn); |
483404b6 | 2062 | |
6ba4d630 | 2063 | if (JUMP_LABEL (insn)) |
67f2de41 | 2064 | { |
742dff15 JH |
2065 | JUMP_LABEL (copy) = get_label_from_map (map, |
2066 | CODE_LABEL_NUMBER | |
2067 | (JUMP_LABEL (insn))); | |
8aa0e194 | 2068 | LABEL_NUSES (JUMP_LABEL (copy))++; |
6ba4d630 JH |
2069 | } |
2070 | if (JUMP_LABEL (insn) == start_label && insn == copy_end | |
2071 | && ! last_iteration) | |
2072 | { | |
8aa0e194 | 2073 | |
67f2de41 RK |
2074 | /* This is a branch to the beginning of the loop; this is the |
2075 | last insn being copied; and this is not the last iteration. | |
2076 | In this case, we want to change the original fall through | |
2077 | case to be a branch past the end of the loop, and the | |
2078 | original jump label case to fall_through. */ | |
a9d27cb2 | 2079 | |
742dff15 | 2080 | if (!invert_jump (copy, exit_label, 0)) |
ca72f752 RE |
2081 | { |
2082 | rtx jmp; | |
2083 | rtx lab = gen_label_rtx (); | |
9faa82d8 RK |
2084 | /* Can't do it by reversing the jump (probably because we |
2085 | couldn't reverse the conditions), so emit a new | |
ca72f752 RE |
2086 | jump_insn after COPY, and redirect the jump around |
2087 | that. */ | |
2088 | jmp = emit_jump_insn_after (gen_jump (exit_label), copy); | |
bcb07710 | 2089 | JUMP_LABEL (jmp) = exit_label; |
1b735a57 | 2090 | LABEL_NUSES (exit_label)++; |
ca72f752 RE |
2091 | jmp = emit_barrier_after (jmp); |
2092 | emit_label_after (lab, jmp); | |
2093 | LABEL_NUSES (lab) = 0; | |
742dff15 | 2094 | if (!redirect_jump (copy, lab, 0)) |
a86dc4a3 | 2095 | abort (); |
ca72f752 | 2096 | } |
67f2de41 | 2097 | } |
a5af23fe | 2098 | |
67f2de41 RK |
2099 | #ifdef HAVE_cc0 |
2100 | if (cc0_insn) | |
2101 | try_constants (cc0_insn, map); | |
2102 | cc0_insn = 0; | |
2103 | #endif | |
2104 | try_constants (copy, map); | |
2105 | ||
2106 | /* Set the jump label of COPY correctly to avoid problems with | |
2107 | later passes of unroll_loop, if INSN had jump label set. */ | |
2108 | if (JUMP_LABEL (insn)) | |
2109 | { | |
57467646 JW |
2110 | rtx label = 0; |
2111 | ||
67f2de41 RK |
2112 | /* Can't use the label_map for every insn, since this may be |
2113 | the backward branch, and hence the label was not mapped. */ | |
e5e809f4 | 2114 | if ((set = single_set (copy))) |
67f2de41 | 2115 | { |
e5e809f4 | 2116 | tem = SET_SRC (set); |
67f2de41 | 2117 | if (GET_CODE (tem) == LABEL_REF) |
57467646 | 2118 | label = XEXP (tem, 0); |
67f2de41 RK |
2119 | else if (GET_CODE (tem) == IF_THEN_ELSE) |
2120 | { | |
2121 | if (XEXP (tem, 1) != pc_rtx) | |
57467646 | 2122 | label = XEXP (XEXP (tem, 1), 0); |
67f2de41 | 2123 | else |
57467646 | 2124 | label = XEXP (XEXP (tem, 2), 0); |
67f2de41 | 2125 | } |
67f2de41 | 2126 | } |
57467646 | 2127 | |
4b4bf941 | 2128 | if (label && LABEL_P (label)) |
57467646 | 2129 | JUMP_LABEL (copy) = label; |
67f2de41 RK |
2130 | else |
2131 | { | |
2132 | /* An unrecognizable jump insn, probably the entry jump | |
2133 | for a switch statement. This label must have been mapped, | |
2134 | so just use the label_map to get the new jump label. */ | |
4ac75a4e | 2135 | JUMP_LABEL (copy) |
a5af23fe R |
2136 | = get_label_from_map (map, |
2137 | CODE_LABEL_NUMBER (JUMP_LABEL (insn))); | |
67f2de41 | 2138 | } |
a5af23fe | 2139 | |
67f2de41 RK |
2140 | /* If this is a non-local jump, then must increase the label |
2141 | use count so that the label will not be deleted when the | |
2142 | original jump is deleted. */ | |
2143 | LABEL_NUSES (JUMP_LABEL (copy))++; | |
2144 | } | |
2145 | else if (GET_CODE (PATTERN (copy)) == ADDR_VEC | |
2146 | || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC) | |
2147 | { | |
2148 | rtx pat = PATTERN (copy); | |
2149 | int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC; | |
2150 | int len = XVECLEN (pat, diff_vec_p); | |
2151 | int i; | |
2152 | ||
2153 | for (i = 0; i < len; i++) | |
2154 | LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++; | |
2155 | } | |
2156 | ||
2157 | /* If this used to be a conditional jump insn but whose branch | |
2158 | direction is now known, we must do something special. */ | |
7f1c097d | 2159 | if (any_condjump_p (insn) && onlyjump_p (insn) && map->last_pc_value) |
67f2de41 RK |
2160 | { |
2161 | #ifdef HAVE_cc0 | |
b30f05db | 2162 | /* If the previous insn set cc0 for us, delete it. */ |
44ce0063 | 2163 | if (only_sets_cc0_p (PREV_INSN (copy))) |
53c17031 | 2164 | delete_related_insns (PREV_INSN (copy)); |
67f2de41 RK |
2165 | #endif |
2166 | ||
2167 | /* If this is now a no-op, delete it. */ | |
2168 | if (map->last_pc_value == pc_rtx) | |
2169 | { | |
ca6c03ca | 2170 | delete_insn (copy); |
67f2de41 RK |
2171 | copy = 0; |
2172 | } | |
2173 | else | |
2174 | /* Otherwise, this is unconditional jump so we must put a | |
2175 | BARRIER after it. We could do some dead code elimination | |
2176 | here, but jump.c will do it just as well. */ | |
2177 | emit_barrier (); | |
2178 | } | |
2179 | break; | |
a5af23fe | 2180 | |
67f2de41 | 2181 | case CALL_INSN: |
14a774a9 | 2182 | pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); |
67f2de41 | 2183 | copy = emit_call_insn (pattern); |
6bd43524 | 2184 | REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); |
0435312e | 2185 | INSN_LOCATOR (copy) = INSN_LOCATOR (insn); |
f2f4927e | 2186 | SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn); |
4ff38cd5 | 2187 | CONST_OR_PURE_CALL_P (copy) = CONST_OR_PURE_CALL_P (insn); |
67f2de41 | 2188 | |
2d98fe23 JW |
2189 | /* Because the USAGE information potentially contains objects other |
2190 | than hard registers, we need to copy it. */ | |
db3cf6fb | 2191 | CALL_INSN_FUNCTION_USAGE (copy) |
14a774a9 RK |
2192 | = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), |
2193 | map, 0); | |
2d98fe23 | 2194 | |
67f2de41 RK |
2195 | #ifdef HAVE_cc0 |
2196 | if (cc0_insn) | |
2197 | try_constants (cc0_insn, map); | |
2198 | cc0_insn = 0; | |
2199 | #endif | |
2200 | try_constants (copy, map); | |
2201 | ||
2202 | /* Be lazy and assume CALL_INSNs clobber all hard registers. */ | |
2203 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) | |
c68da89c | 2204 | VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0; |
67f2de41 | 2205 | break; |
a5af23fe | 2206 | |
67f2de41 RK |
2207 | case CODE_LABEL: |
2208 | /* If this is the loop start label, then we don't need to emit a | |
2209 | copy of this label since no one will use it. */ | |
2210 | ||
2211 | if (insn != start_label) | |
2212 | { | |
1f3d3a31 JL |
2213 | copy = emit_label (get_label_from_map (map, |
2214 | CODE_LABEL_NUMBER (insn))); | |
67f2de41 RK |
2215 | map->const_age++; |
2216 | } | |
2217 | break; | |
a5af23fe | 2218 | |
67f2de41 RK |
2219 | case BARRIER: |
2220 | copy = emit_barrier (); | |
2221 | break; | |
a5af23fe | 2222 | |
67f2de41 | 2223 | case NOTE: |
c956720a R |
2224 | /* VTOP and CONT notes are valid only before the loop exit test. |
2225 | If placed anywhere else, loop may generate bad code. */ | |
e881bb1b | 2226 | /* BASIC_BLOCK notes exist to stabilize basic block structures with |
a86dc4a3 | 2227 | the associated rtl. We do not want to share the structure in |
e881bb1b | 2228 | this new block. */ |
a5af23fe | 2229 | |
5f2fc772 | 2230 | if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED |
fd3acbb3 NS |
2231 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL |
2232 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK | |
2233 | && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP | |
2234 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT) | |
2235 | || (last_iteration | |
2236 | && unroll_type != UNROLL_COMPLETELY))) | |
5f2fc772 | 2237 | copy = emit_note_copy (insn); |
67f2de41 RK |
2238 | else |
2239 | copy = 0; | |
2240 | break; | |
a5af23fe | 2241 | |
67f2de41 RK |
2242 | default: |
2243 | abort (); | |
67f2de41 | 2244 | } |
a5af23fe | 2245 | |
67f2de41 RK |
2246 | map->insn_map[INSN_UID (insn)] = copy; |
2247 | } | |
2248 | while (insn != copy_end); | |
a5af23fe | 2249 | |
6bd43524 | 2250 | /* Now finish coping the REG_NOTES. */ |
ef39bb95 JW |
2251 | insn = copy_start; |
2252 | do | |
2253 | { | |
2254 | insn = NEXT_INSN (insn); | |
4b4bf941 | 2255 | if (INSN_P (insn) |
ef39bb95 | 2256 | && map->insn_map[INSN_UID (insn)]) |
b59a386b | 2257 | final_reg_note_copy (®_NOTES (map->insn_map[INSN_UID (insn)]), map); |
ef39bb95 JW |
2258 | } |
2259 | while (insn != copy_end); | |
2260 | ||
67f2de41 RK |
2261 | /* There may be notes between copy_notes_from and loop_end. Emit a copy of |
2262 | each of these notes here, since there may be some important ones, such as | |
2263 | NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last | |
2264 | iteration, because the original notes won't be deleted. | |
2265 | ||
2266 | We can't use insert_before here, because when from preconditioning, | |
2267 | insert_before points before the loop. We can't use copy_end, because | |
2268 | there may be insns already inserted after it (which we don't want to | |
2269 | copy) when not from preconditioning code. */ | |
2270 | ||
2271 | if (! last_iteration) | |
2272 | { | |
2273 | for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn)) | |
2274 | { | |
36e9ee91 R |
2275 | /* VTOP notes are valid only before the loop exit test. |
2276 | If placed anywhere else, loop may generate bad code. | |
0a756401 R |
2277 | Although COPY_NOTES_FROM will be at most one or two (for cc0) |
2278 | instructions before the last insn in the loop, COPY_NOTES_FROM | |
2279 | can be a NOTE_INSN_LOOP_CONT note if there is no VTOP note, | |
2280 | as in a do .. while loop. */ | |
4b4bf941 | 2281 | if (NOTE_P (insn) |
5f2fc772 | 2282 | && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED |
fd3acbb3 NS |
2283 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK |
2284 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP | |
5f2fc772 NS |
2285 | && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT))) |
2286 | emit_note_copy (insn); | |
67f2de41 RK |
2287 | } |
2288 | } | |
2289 | ||
2290 | if (final_label && LABEL_NUSES (final_label) > 0) | |
2291 | emit_label (final_label); | |
2292 | ||
2f937369 | 2293 | tem = get_insns (); |
67f2de41 | 2294 | end_sequence (); |
86e21212 | 2295 | loop_insn_emit_before (loop, 0, insert_before, tem); |
67f2de41 RK |
2296 | } |
2297 | \f | |
2298 | /* Emit an insn, using the expand_binop to ensure that a valid insn is | |
2299 | emitted. This will correctly handle the case where the increment value | |
2300 | won't fit in the immediate field of a PLUS insns. */ | |
2301 | ||
2302 | void | |
2e1eedd6 | 2303 | emit_unrolled_add (rtx dest_reg, rtx src_reg, rtx increment) |
67f2de41 RK |
2304 | { |
2305 | rtx result; | |
2306 | ||
ef89d648 ZW |
2307 | result = expand_simple_binop (GET_MODE (dest_reg), PLUS, src_reg, increment, |
2308 | dest_reg, 0, OPTAB_LIB_WIDEN); | |
67f2de41 RK |
2309 | |
2310 | if (dest_reg != result) | |
2311 | emit_move_insn (dest_reg, result); | |
2312 | } | |
2313 | \f | |
0534b804 | 2314 | /* Searches the insns between INSN and LOOP->END. Returns 1 if there |
67f2de41 | 2315 | is a backward branch in that range that branches to somewhere between |
0534b804 | 2316 | LOOP->START and INSN. Returns 0 otherwise. */ |
67f2de41 | 2317 | |
6dc42e49 | 2318 | /* ??? This is quadratic algorithm. Could be rewritten to be linear. |
67f2de41 RK |
2319 | In practice, this is not a problem, because this function is seldom called, |
2320 | and uses a negligible amount of CPU time on average. */ | |
2321 | ||
9923a30d | 2322 | int |
2e1eedd6 | 2323 | back_branch_in_range_p (const struct loop *loop, rtx insn) |
67f2de41 RK |
2324 | { |
2325 | rtx p, q, target_insn; | |
0534b804 MH |
2326 | rtx loop_start = loop->start; |
2327 | rtx loop_end = loop->end; | |
2328 | rtx orig_loop_end = loop->end; | |
67f2de41 RK |
2329 | |
2330 | /* Stop before we get to the backward branch at the end of the loop. */ | |
2331 | loop_end = prev_nonnote_insn (loop_end); | |
4b4bf941 | 2332 | if (BARRIER_P (loop_end)) |
67f2de41 RK |
2333 | loop_end = PREV_INSN (loop_end); |
2334 | ||
2335 | /* Check in case insn has been deleted, search forward for first non | |
2336 | deleted insn following it. */ | |
2337 | while (INSN_DELETED_P (insn)) | |
2338 | insn = NEXT_INSN (insn); | |
2339 | ||
956d6950 JL |
2340 | /* Check for the case where insn is the last insn in the loop. Deal |
2341 | with the case where INSN was a deleted loop test insn, in which case | |
2342 | it will now be the NOTE_LOOP_END. */ | |
2343 | if (insn == loop_end || insn == orig_loop_end) | |
67f2de41 RK |
2344 | return 0; |
2345 | ||
2346 | for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p)) | |
2347 | { | |
4b4bf941 | 2348 | if (JUMP_P (p)) |
67f2de41 RK |
2349 | { |
2350 | target_insn = JUMP_LABEL (p); | |
a5af23fe | 2351 | |
67f2de41 RK |
2352 | /* Search from loop_start to insn, to see if one of them is |
2353 | the target_insn. We can't use INSN_LUID comparisons here, | |
2354 | since insn may not have an LUID entry. */ | |
2355 | for (q = loop_start; q != insn; q = NEXT_INSN (q)) | |
2356 | if (q == target_insn) | |
2357 | return 1; | |
2358 | } | |
2359 | } | |
2360 | ||
2361 | return 0; | |
2362 | } | |
2363 | ||
2364 | /* Try to generate the simplest rtx for the expression | |
2365 | (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial | |
2366 | value of giv's. */ | |
2367 | ||
2368 | static rtx | |
2e1eedd6 | 2369 | fold_rtx_mult_add (rtx mult1, rtx mult2, rtx add1, enum machine_mode mode) |
67f2de41 RK |
2370 | { |
2371 | rtx temp, mult_res; | |
2372 | rtx result; | |
2373 | ||
2374 | /* The modes must all be the same. This should always be true. For now, | |
2375 | check to make sure. */ | |
2376 | if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode) | |
2377 | || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode) | |
2378 | || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode)) | |
2379 | abort (); | |
2380 | ||
2381 | /* Ensure that if at least one of mult1/mult2 are constant, then mult2 | |
2382 | will be a constant. */ | |
2383 | if (GET_CODE (mult1) == CONST_INT) | |
2384 | { | |
2385 | temp = mult2; | |
2386 | mult2 = mult1; | |
2387 | mult1 = temp; | |
2388 | } | |
2389 | ||
2390 | mult_res = simplify_binary_operation (MULT, mode, mult1, mult2); | |
2391 | if (! mult_res) | |
38a448ca | 2392 | mult_res = gen_rtx_MULT (mode, mult1, mult2); |
67f2de41 RK |
2393 | |
2394 | /* Again, put the constant second. */ | |
2395 | if (GET_CODE (add1) == CONST_INT) | |
2396 | { | |
2397 | temp = add1; | |
2398 | add1 = mult_res; | |
2399 | mult_res = temp; | |
2400 | } | |
2401 | ||
2402 | result = simplify_binary_operation (PLUS, mode, add1, mult_res); | |
2403 | if (! result) | |
38a448ca | 2404 | result = gen_rtx_PLUS (mode, add1, mult_res); |
67f2de41 RK |
2405 | |
2406 | return result; | |
2407 | } | |
2408 | ||
2409 | /* Searches the list of induction struct's for the biv BL, to try to calculate | |
2410 | the total increment value for one iteration of the loop as a constant. | |
2411 | ||
2412 | Returns the increment value as an rtx, simplified as much as possible, | |
2413 | if it can be calculated. Otherwise, returns 0. */ | |
2414 | ||
a5af23fe | 2415 | rtx |
2e1eedd6 | 2416 | biv_total_increment (const struct iv_class *bl) |
67f2de41 RK |
2417 | { |
2418 | struct induction *v; | |
2419 | rtx result; | |
2420 | ||
2421 | /* For increment, must check every instruction that sets it. Each | |
2422 | instruction must be executed only once each time through the loop. | |
38e01259 | 2423 | To verify this, we check that the insn is always executed, and that |
67f2de41 RK |
2424 | there are no backward branches after the insn that branch to before it. |
2425 | Also, the insn must have a mult_val of one (to make sure it really is | |
2426 | an increment). */ | |
2427 | ||
2428 | result = const0_rtx; | |
2429 | for (v = bl->biv; v; v = v->next_iv) | |
2430 | { | |
2431 | if (v->always_computable && v->mult_val == const1_rtx | |
3823f0b2 GK |
2432 | && ! v->maybe_multiple |
2433 | && SCALAR_INT_MODE_P (v->mode)) | |
e8226879 EB |
2434 | { |
2435 | /* If we have already counted it, skip it. */ | |
2436 | if (v->same) | |
2437 | continue; | |
2438 | ||
2439 | result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode); | |
2440 | } | |
67f2de41 RK |
2441 | else |
2442 | return 0; | |
2443 | } | |
2444 | ||
2445 | return result; | |
2446 | } | |
2447 | ||
67f2de41 RK |
2448 | /* For each biv and giv, determine whether it can be safely split into |
2449 | a different variable for each unrolled copy of the loop body. If it | |
2450 | is safe to split, then indicate that by saving some useful info | |
2451 | in the splittable_regs array. | |
2452 | ||
2453 | If the loop is being completely unrolled, then splittable_regs will hold | |
2454 | the current value of the induction variable while the loop is unrolled. | |
2455 | It must be set to the initial value of the induction variable here. | |
2456 | Otherwise, splittable_regs will hold the difference between the current | |
2457 | value of the induction variable and the value the induction variable had | |
8dc0b179 RS |
2458 | at the top of the loop. It must be set to the value 0 here. |
2459 | ||
2460 | Returns the total number of instructions that set registers that are | |
2461 | splittable. */ | |
67f2de41 RK |
2462 | |
2463 | /* ?? If the loop is only unrolled twice, then most of the restrictions to | |
2464 | constant values are unnecessary, since we can easily calculate increment | |
2465 | values in this case even if nothing is constant. The increment value | |
2466 | should not involve a multiply however. */ | |
2467 | ||
2468 | /* ?? Even if the biv/giv increment values aren't constant, it may still | |
2469 | be beneficial to split the variable if the loop is only unrolled a few | |
2470 | times, since multiplies by small integers (1,2,3,4) are very cheap. */ | |
2471 | ||
2472 | static int | |
2e1eedd6 AJ |
2473 | find_splittable_regs (const struct loop *loop, |
2474 | enum unroll_types unroll_type, int unroll_number) | |
67f2de41 | 2475 | { |
ed5bb68d | 2476 | struct loop_ivs *ivs = LOOP_IVS (loop); |
67f2de41 | 2477 | struct iv_class *bl; |
0e91429a | 2478 | struct induction *v; |
67f2de41 RK |
2479 | rtx increment, tem; |
2480 | rtx biv_final_value; | |
2481 | int biv_splittable; | |
2482 | int result = 0; | |
2483 | ||
14be28e5 | 2484 | for (bl = ivs->list; bl; bl = bl->next) |
67f2de41 RK |
2485 | { |
2486 | /* Biv_total_increment must return a constant value, | |
2487 | otherwise we can not calculate the split values. */ | |
2488 | ||
0534b804 | 2489 | increment = biv_total_increment (bl); |
67f2de41 RK |
2490 | if (! increment || GET_CODE (increment) != CONST_INT) |
2491 | continue; | |
2492 | ||
2493 | /* The loop must be unrolled completely, or else have a known number | |
2494 | of iterations and only one exit, or else the biv must be dead | |
2495 | outside the loop, or else the final value must be known. Otherwise, | |
2496 | it is unsafe to split the biv since it may not have the proper | |
2497 | value on loop exit. */ | |
2498 | ||
0e9e1e0a | 2499 | /* loop_number_exit_count is nonzero if the loop has an exit other than |
67f2de41 RK |
2500 | a fall through at the end. */ |
2501 | ||
2502 | biv_splittable = 1; | |
2503 | biv_final_value = 0; | |
2504 | if (unroll_type != UNROLL_COMPLETELY | |
0534b804 | 2505 | && (loop->exit_count || unroll_type == UNROLL_NAIVE) |
96a45535 | 2506 | && (REGNO_LAST_LUID (bl->regno) >= INSN_LUID (loop->end) |
67f2de41 RK |
2507 | || ! bl->init_insn |
2508 | || INSN_UID (bl->init_insn) >= max_uid_for_loop | |
8529a489 | 2509 | || (REGNO_FIRST_LUID (bl->regno) |
67f2de41 RK |
2510 | < INSN_LUID (bl->init_insn)) |
2511 | || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set))) | |
0534b804 | 2512 | && ! (biv_final_value = final_biv_value (loop, bl))) |
67f2de41 RK |
2513 | biv_splittable = 0; |
2514 | ||
0e91429a RK |
2515 | /* If any of the insns setting the BIV don't do so with a simple |
2516 | PLUS, we don't know how to split it. */ | |
2517 | for (v = bl->biv; biv_splittable && v; v = v->next_iv) | |
2518 | if ((tem = single_set (v->insn)) == 0 | |
f8cfc6aa | 2519 | || !REG_P (SET_DEST (tem)) |
0e91429a RK |
2520 | || REGNO (SET_DEST (tem)) != bl->regno |
2521 | || GET_CODE (SET_SRC (tem)) != PLUS) | |
2522 | biv_splittable = 0; | |
2523 | ||
0e9e1e0a | 2524 | /* If final value is nonzero, then must emit an instruction which sets |
67f2de41 RK |
2525 | the value of the biv to the proper value. This is done after |
2526 | handling all of the givs, since some of them may need to use the | |
2527 | biv's value in their initialization code. */ | |
2528 | ||
2529 | /* This biv is splittable. If completely unrolling the loop, save | |
2530 | the biv's initial value. Otherwise, save the constant zero. */ | |
2531 | ||
2532 | if (biv_splittable == 1) | |
2533 | { | |
2534 | if (unroll_type == UNROLL_COMPLETELY) | |
2535 | { | |
2536 | /* If the initial value of the biv is itself (i.e. it is too | |
2537 | complicated for strength_reduce to compute), or is a hard | |
f47f2c17 RK |
2538 | register, or it isn't invariant, then we must create a new |
2539 | pseudo reg to hold the initial value of the biv. */ | |
67f2de41 | 2540 | |
f8cfc6aa | 2541 | if (REG_P (bl->initial_value) |
67f2de41 | 2542 | && (REGNO (bl->initial_value) == bl->regno |
f47f2c17 | 2543 | || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER |
0534b804 | 2544 | || ! loop_invariant_p (loop, bl->initial_value))) |
67f2de41 RK |
2545 | { |
2546 | rtx tem = gen_reg_rtx (bl->biv->mode); | |
9ae8ffe7 | 2547 | |
de12be17 | 2548 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
41077ce4 | 2549 | loop_insn_hoist (loop, |
96a45535 | 2550 | gen_move_insn (tem, bl->biv->src_reg)); |
67f2de41 RK |
2551 | |
2552 | if (loop_dump_stream) | |
a86dc4a3 KH |
2553 | fprintf (loop_dump_stream, |
2554 | "Biv %d initial value remapped to %d.\n", | |
67f2de41 RK |
2555 | bl->regno, REGNO (tem)); |
2556 | ||
2557 | splittable_regs[bl->regno] = tem; | |
2558 | } | |
2559 | else | |
2560 | splittable_regs[bl->regno] = bl->initial_value; | |
2561 | } | |
2562 | else | |
2563 | splittable_regs[bl->regno] = const0_rtx; | |
2564 | ||
2565 | /* Save the number of instructions that modify the biv, so that | |
2566 | we can treat the last one specially. */ | |
2567 | ||
2568 | splittable_regs_updates[bl->regno] = bl->biv_count; | |
8dc0b179 | 2569 | result += bl->biv_count; |
67f2de41 RK |
2570 | |
2571 | if (loop_dump_stream) | |
2572 | fprintf (loop_dump_stream, | |
2573 | "Biv %d safe to split.\n", bl->regno); | |
2574 | } | |
2575 | ||
2576 | /* Check every giv that depends on this biv to see whether it is | |
2577 | splittable also. Even if the biv isn't splittable, givs which | |
2578 | depend on it may be splittable if the biv is live outside the | |
2579 | loop, and the givs aren't. */ | |
2580 | ||
a86dc4a3 | 2581 | result += find_splittable_givs (loop, bl, unroll_type, increment, |
0534b804 | 2582 | unroll_number); |
67f2de41 | 2583 | |
0e9e1e0a | 2584 | /* If final value is nonzero, then must emit an instruction which sets |
67f2de41 RK |
2585 | the value of the biv to the proper value. This is done after |
2586 | handling all of the givs, since some of them may need to use the | |
2587 | biv's value in their initialization code. */ | |
2588 | if (biv_final_value) | |
2589 | { | |
2590 | /* If the loop has multiple exits, emit the insns before the | |
2591 | loop to ensure that it will always be executed no matter | |
2592 | how the loop exits. Otherwise emit the insn after the loop, | |
2593 | since this is slightly more efficient. */ | |
0534b804 | 2594 | if (! loop->exit_count) |
96a45535 MH |
2595 | loop_insn_sink (loop, gen_move_insn (bl->biv->src_reg, |
2596 | biv_final_value)); | |
67f2de41 RK |
2597 | else |
2598 | { | |
2599 | /* Create a new register to hold the value of the biv, and then | |
2600 | set the biv to its final value before the loop start. The biv | |
2601 | is set to its final value before loop start to ensure that | |
2602 | this insn will always be executed, no matter how the loop | |
2603 | exits. */ | |
2604 | rtx tem = gen_reg_rtx (bl->biv->mode); | |
de12be17 | 2605 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
9ae8ffe7 | 2606 | |
96a45535 MH |
2607 | loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg)); |
2608 | loop_insn_hoist (loop, gen_move_insn (bl->biv->src_reg, | |
2609 | biv_final_value)); | |
67f2de41 RK |
2610 | |
2611 | if (loop_dump_stream) | |
2612 | fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n", | |
2613 | REGNO (bl->biv->src_reg), REGNO (tem)); | |
2614 | ||
2615 | /* Set up the mapping from the original biv register to the new | |
2616 | register. */ | |
2617 | bl->biv->src_reg = tem; | |
2618 | } | |
2619 | } | |
2620 | } | |
2621 | return result; | |
2622 | } | |
2623 | ||
2624 | /* For every giv based on the biv BL, check to determine whether it is | |
8dc0b179 RS |
2625 | splittable. This is a subroutine to find_splittable_regs (). |
2626 | ||
2627 | Return the number of instructions that set splittable registers. */ | |
67f2de41 RK |
2628 | |
2629 | static int | |
2e1eedd6 AJ |
2630 | find_splittable_givs (const struct loop *loop, struct iv_class *bl, |
2631 | enum unroll_types unroll_type, rtx increment, | |
2632 | int unroll_number ATTRIBUTE_UNUSED) | |
67f2de41 | 2633 | { |
ed5bb68d | 2634 | struct loop_ivs *ivs = LOOP_IVS (loop); |
3fc347fa | 2635 | struct induction *v, *v2; |
67f2de41 RK |
2636 | rtx final_value; |
2637 | rtx tem; | |
8dc0b179 | 2638 | int result = 0; |
67f2de41 | 2639 | |
3fc347fa JW |
2640 | /* Scan the list of givs, and set the same_insn field when there are |
2641 | multiple identical givs in the same insn. */ | |
2642 | for (v = bl->giv; v; v = v->next_iv) | |
2643 | for (v2 = v->next_iv; v2; v2 = v2->next_iv) | |
2644 | if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg) | |
2645 | && ! v2->same_insn) | |
2646 | v2->same_insn = v; | |
2647 | ||
67f2de41 RK |
2648 | for (v = bl->giv; v; v = v->next_iv) |
2649 | { | |
2650 | rtx giv_inc, value; | |
2651 | ||
2652 | /* Only split the giv if it has already been reduced, or if the loop is | |
2653 | being completely unrolled. */ | |
2654 | if (unroll_type != UNROLL_COMPLETELY && v->ignore) | |
2655 | continue; | |
2656 | ||
2657 | /* The giv can be split if the insn that sets the giv is executed once | |
2658 | and only once on every iteration of the loop. */ | |
2659 | /* An address giv can always be split. v->insn is just a use not a set, | |
2660 | and hence it does not matter whether it is always executed. All that | |
2661 | matters is that all the biv increments are always executed, and we | |
2662 | won't reach here if they aren't. */ | |
2663 | if (v->giv_type != DEST_ADDR | |
2664 | && (! v->always_computable | |
0534b804 | 2665 | || back_branch_in_range_p (loop, v->insn))) |
67f2de41 | 2666 | continue; |
a5af23fe | 2667 | |
67f2de41 RK |
2668 | /* The giv increment value must be a constant. */ |
2669 | giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx, | |
2670 | v->mode); | |
2671 | if (! giv_inc || GET_CODE (giv_inc) != CONST_INT) | |
2672 | continue; | |
2673 | ||
2674 | /* The loop must be unrolled completely, or else have a known number of | |
2675 | iterations and only one exit, or else the giv must be dead outside | |
2676 | the loop, or else the final value of the giv must be known. | |
2677 | Otherwise, it is not safe to split the giv since it may not have the | |
2678 | proper value on loop exit. */ | |
a5af23fe | 2679 | |
67f2de41 RK |
2680 | /* The used outside loop test will fail for DEST_ADDR givs. They are |
2681 | never used outside the loop anyways, so it is always safe to split a | |
2682 | DEST_ADDR giv. */ | |
2683 | ||
2684 | final_value = 0; | |
2685 | if (unroll_type != UNROLL_COMPLETELY | |
0534b804 | 2686 | && (loop->exit_count || unroll_type == UNROLL_NAIVE) |
67f2de41 | 2687 | && v->giv_type != DEST_ADDR |
9fb82071 JW |
2688 | /* The next part is true if the pseudo is used outside the loop. |
2689 | We assume that this is true for any pseudo created after loop | |
2690 | starts, because we don't have a reg_n_info entry for them. */ | |
2691 | && (REGNO (v->dest_reg) >= max_reg_before_loop | |
2692 | || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn) | |
2693 | /* Check for the case where the pseudo is set by a shift/add | |
2694 | sequence, in which case the first insn setting the pseudo | |
2695 | is the first insn of the shift/add sequence. */ | |
2696 | && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX)) | |
2697 | || (REGNO_FIRST_UID (REGNO (v->dest_reg)) | |
2698 | != INSN_UID (XEXP (tem, 0))))) | |
67f2de41 | 2699 | /* Line above always fails if INSN was moved by loop opt. */ |
8529a489 | 2700 | || (REGNO_LAST_LUID (REGNO (v->dest_reg)) |
0534b804 | 2701 | >= INSN_LUID (loop->end))) |
67f2de41 RK |
2702 | && ! (final_value = v->final_value)) |
2703 | continue; | |
2704 | ||
2705 | #if 0 | |
2706 | /* Currently, non-reduced/final-value givs are never split. */ | |
2707 | /* Should emit insns after the loop if possible, as the biv final value | |
2708 | code below does. */ | |
2709 | ||
0e9e1e0a | 2710 | /* If the final value is nonzero, and the giv has not been reduced, |
67f2de41 RK |
2711 | then must emit an instruction to set the final value. */ |
2712 | if (final_value && !v->new_reg) | |
2713 | { | |
2714 | /* Create a new register to hold the value of the giv, and then set | |
2715 | the giv to its final value before the loop start. The giv is set | |
2716 | to its final value before loop start to ensure that this insn | |
2717 | will always be executed, no matter how we exit. */ | |
2718 | tem = gen_reg_rtx (v->mode); | |
96a45535 MH |
2719 | loop_insn_hoist (loop, gen_move_insn (tem, v->dest_reg)); |
2720 | loop_insn_hoist (loop, gen_move_insn (v->dest_reg, final_value)); | |
a5af23fe | 2721 | |
67f2de41 RK |
2722 | if (loop_dump_stream) |
2723 | fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n", | |
2724 | REGNO (v->dest_reg), REGNO (tem)); | |
a5af23fe | 2725 | |
67f2de41 RK |
2726 | v->src_reg = tem; |
2727 | } | |
2728 | #endif | |
2729 | ||
2730 | /* This giv is splittable. If completely unrolling the loop, save the | |
2731 | giv's initial value. Otherwise, save the constant zero for it. */ | |
2732 | ||
2733 | if (unroll_type == UNROLL_COMPLETELY) | |
0e91429a RK |
2734 | { |
2735 | /* It is not safe to use bl->initial_value here, because it may not | |
2736 | be invariant. It is safe to use the initial value stored in | |
2737 | the splittable_regs array if it is set. In rare cases, it won't | |
2738 | be set, so then we do exactly the same thing as | |
2739 | find_splittable_regs does to get a safe value. */ | |
2740 | rtx biv_initial_value; | |
2741 | ||
2742 | if (splittable_regs[bl->regno]) | |
2743 | biv_initial_value = splittable_regs[bl->regno]; | |
f8cfc6aa | 2744 | else if (!REG_P (bl->initial_value) |
0e91429a RK |
2745 | || (REGNO (bl->initial_value) != bl->regno |
2746 | && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER)) | |
2747 | biv_initial_value = bl->initial_value; | |
2748 | else | |
2749 | { | |
2750 | rtx tem = gen_reg_rtx (bl->biv->mode); | |
2751 | ||
de12be17 | 2752 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
96a45535 | 2753 | loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg)); |
0e91429a RK |
2754 | biv_initial_value = tem; |
2755 | } | |
e8cb4873 | 2756 | biv_initial_value = extend_value_for_giv (v, biv_initial_value); |
0e91429a RK |
2757 | value = fold_rtx_mult_add (v->mult_val, biv_initial_value, |
2758 | v->add_val, v->mode); | |
2759 | } | |
67f2de41 RK |
2760 | else |
2761 | value = const0_rtx; | |
2762 | ||
2763 | if (v->new_reg) | |
2764 | { | |
bf1c6940 JW |
2765 | /* If a giv was combined with another giv, then we can only split |
2766 | this giv if the giv it was combined with was reduced. This | |
2767 | is because the value of v->new_reg is meaningless in this | |
2768 | case. */ | |
2769 | if (v->same && ! v->same->new_reg) | |
67f2de41 RK |
2770 | { |
2771 | if (loop_dump_stream) | |
2772 | fprintf (loop_dump_stream, | |
bf1c6940 JW |
2773 | "giv combined with unreduced giv not split.\n"); |
2774 | continue; | |
67f2de41 | 2775 | } |
bf1c6940 JW |
2776 | /* If the giv is an address destination, it could be something other |
2777 | than a simple register, these have to be treated differently. */ | |
2778 | else if (v->giv_type == DEST_REG) | |
2b9a9aea JW |
2779 | { |
2780 | /* If value is not a constant, register, or register plus | |
2781 | constant, then compute its value into a register before | |
181c6568 | 2782 | loop start. This prevents invalid rtx sharing, and should |
2b9a9aea JW |
2783 | generate better code. We can use bl->initial_value here |
2784 | instead of splittable_regs[bl->regno] because this code | |
2785 | is going before the loop start. */ | |
2786 | if (unroll_type == UNROLL_COMPLETELY | |
2787 | && GET_CODE (value) != CONST_INT | |
f8cfc6aa | 2788 | && !REG_P (value) |
2b9a9aea | 2789 | && (GET_CODE (value) != PLUS |
f8cfc6aa | 2790 | || !REG_P (XEXP (value, 0)) |
2b9a9aea JW |
2791 | || GET_CODE (XEXP (value, 1)) != CONST_INT)) |
2792 | { | |
2793 | rtx tem = gen_reg_rtx (v->mode); | |
de12be17 | 2794 | record_base_value (REGNO (tem), v->add_val, 0); |
1bcec223 UW |
2795 | loop_iv_add_mult_hoist (loop, |
2796 | extend_value_for_giv (v, bl->initial_value), | |
2797 | v->mult_val, v->add_val, tem); | |
2b9a9aea JW |
2798 | value = tem; |
2799 | } | |
a5af23fe | 2800 | |
344b78b8 | 2801 | splittable_regs[reg_or_subregno (v->new_reg)] = value; |
2b9a9aea | 2802 | } |
67f2de41 | 2803 | else |
b642261e | 2804 | continue; |
67f2de41 RK |
2805 | } |
2806 | else | |
2807 | { | |
2808 | #if 0 | |
2809 | /* Currently, unreduced giv's can't be split. This is not too much | |
2810 | of a problem since unreduced giv's are not live across loop | |
2811 | iterations anyways. When unrolling a loop completely though, | |
2812 | it makes sense to reduce&split givs when possible, as this will | |
2813 | result in simpler instructions, and will not require that a reg | |
2814 | be live across loop iterations. */ | |
a5af23fe | 2815 | |
67f2de41 RK |
2816 | splittable_regs[REGNO (v->dest_reg)] = value; |
2817 | fprintf (stderr, "Giv %d at insn %d not reduced\n", | |
2818 | REGNO (v->dest_reg), INSN_UID (v->insn)); | |
2819 | #else | |
2820 | continue; | |
2821 | #endif | |
2822 | } | |
a5af23fe | 2823 | |
3bb24246 JW |
2824 | /* Unreduced givs are only updated once by definition. Reduced givs |
2825 | are updated as many times as their biv is. Mark it so if this is | |
67f2de41 RK |
2826 | a splittable register. Don't need to do anything for address givs |
2827 | where this may not be a register. */ | |
2828 | ||
f8cfc6aa | 2829 | if (REG_P (v->new_reg)) |
3bb24246 JW |
2830 | { |
2831 | int count = 1; | |
2832 | if (! v->ignore) | |
8b634749 | 2833 | count = REG_IV_CLASS (ivs, REGNO (v->src_reg))->biv_count; |
3bb24246 | 2834 | |
344b78b8 | 2835 | splittable_regs_updates[reg_or_subregno (v->new_reg)] = count; |
3bb24246 | 2836 | } |
67f2de41 RK |
2837 | |
2838 | result++; | |
a5af23fe | 2839 | |
67f2de41 RK |
2840 | if (loop_dump_stream) |
2841 | { | |
2842 | int regnum; | |
a5af23fe | 2843 | |
67f2de41 RK |
2844 | if (GET_CODE (v->dest_reg) == CONST_INT) |
2845 | regnum = -1; | |
f8cfc6aa | 2846 | else if (!REG_P (v->dest_reg)) |
67f2de41 RK |
2847 | regnum = REGNO (XEXP (v->dest_reg, 0)); |
2848 | else | |
2849 | regnum = REGNO (v->dest_reg); | |
2850 | fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n", | |
2851 | regnum, INSN_UID (v->insn)); | |
2852 | } | |
2853 | } | |
2854 | ||
2855 | return result; | |
2856 | } | |
2857 | \f | |
2858 | /* Try to prove that the register is dead after the loop exits. Trace every | |
2859 | loop exit looking for an insn that will always be executed, which sets | |
2860 | the register to some value, and appears before the first use of the register | |
2861 | is found. If successful, then return 1, otherwise return 0. */ | |
2862 | ||
2863 | /* ?? Could be made more intelligent in the handling of jumps, so that | |
2864 | it can search past if statements and other similar structures. */ | |
2865 | ||
2866 | static int | |
2e1eedd6 | 2867 | reg_dead_after_loop (const struct loop *loop, rtx reg) |
67f2de41 RK |
2868 | { |
2869 | rtx insn, label; | |
412dc348 | 2870 | int jump_count = 0; |
3669e646 | 2871 | int label_count = 0; |
3669e646 RK |
2872 | |
2873 | /* In addition to checking all exits of this loop, we must also check | |
2874 | all exits of inner nested loops that would exit this loop. We don't | |
2875 | have any way to identify those, so we just give up if there are any | |
2876 | such inner loop exits. */ | |
a5af23fe | 2877 | |
a2be868f | 2878 | for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label)) |
3669e646 RK |
2879 | label_count++; |
2880 | ||
a2be868f | 2881 | if (label_count != loop->exit_count) |
3669e646 | 2882 | return 0; |
67f2de41 RK |
2883 | |
2884 | /* HACK: Must also search the loop fall through exit, create a label_ref | |
0534b804 | 2885 | here which points to the loop->end, and append the loop_number_exit_labels |
67f2de41 | 2886 | list to it. */ |
0534b804 | 2887 | label = gen_rtx_LABEL_REF (VOIDmode, loop->end); |
a2be868f | 2888 | LABEL_NEXTREF (label) = loop->exit_labels; |
67f2de41 | 2889 | |
a86dc4a3 | 2890 | for (; label; label = LABEL_NEXTREF (label)) |
67f2de41 RK |
2891 | { |
2892 | /* Succeed if find an insn which sets the biv or if reach end of | |
2893 | function. Fail if find an insn that uses the biv, or if come to | |
2894 | a conditional jump. */ | |
2895 | ||
2896 | insn = NEXT_INSN (XEXP (label, 0)); | |
412dc348 | 2897 | while (insn) |
67f2de41 | 2898 | { |
ec8e098d | 2899 | if (INSN_P (insn)) |
67f2de41 | 2900 | { |
8df63efa | 2901 | rtx set, note; |
412dc348 RK |
2902 | |
2903 | if (reg_referenced_p (reg, PATTERN (insn))) | |
67f2de41 | 2904 | return 0; |
412dc348 | 2905 | |
8df63efa JJ |
2906 | note = find_reg_equal_equiv_note (insn); |
2907 | if (note && reg_overlap_mentioned_p (reg, XEXP (note, 0))) | |
2908 | return 0; | |
2909 | ||
412dc348 RK |
2910 | set = single_set (insn); |
2911 | if (set && rtx_equal_p (SET_DEST (set), reg)) | |
2912 | break; | |
412dc348 | 2913 | |
4b4bf941 | 2914 | if (JUMP_P (insn)) |
ec8e098d PB |
2915 | { |
2916 | if (GET_CODE (PATTERN (insn)) == RETURN) | |
2917 | break; | |
2918 | else if (!any_uncondjump_p (insn) | |
2919 | /* Prevent infinite loop following infinite loops. */ | |
2920 | || jump_count++ > 20) | |
2921 | return 0; | |
2922 | else | |
2923 | insn = JUMP_LABEL (insn); | |
2924 | } | |
67f2de41 | 2925 | } |
412dc348 | 2926 | |
67f2de41 RK |
2927 | insn = NEXT_INSN (insn); |
2928 | } | |
2929 | } | |
2930 | ||
2931 | /* Success, the register is dead on all loop exits. */ | |
2932 | return 1; | |
2933 | } | |
2934 | ||
2935 | /* Try to calculate the final value of the biv, the value it will have at | |
2936 | the end of the loop. If we can do it, return that value. */ | |
a5af23fe | 2937 | |
67f2de41 | 2938 | rtx |
2e1eedd6 | 2939 | final_biv_value (const struct loop *loop, struct iv_class *bl) |
67f2de41 | 2940 | { |
0534b804 | 2941 | unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations; |
67f2de41 RK |
2942 | rtx increment, tem; |
2943 | ||
b4ac57ab RS |
2944 | /* ??? This only works for MODE_INT biv's. Reject all others for now. */ |
2945 | ||
2946 | if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT) | |
2947 | return 0; | |
2948 | ||
67f2de41 | 2949 | /* The final value for reversed bivs must be calculated differently than |
a86dc4a3 | 2950 | for ordinary bivs. In this case, there is already an insn after the |
67f2de41 RK |
2951 | loop which sets this biv's final value (if necessary), and there are |
2952 | no other loop exits, so we can return any value. */ | |
2953 | if (bl->reversed) | |
2954 | { | |
2955 | if (loop_dump_stream) | |
2956 | fprintf (loop_dump_stream, | |
2957 | "Final biv value for %d, reversed biv.\n", bl->regno); | |
a5af23fe | 2958 | |
67f2de41 RK |
2959 | return const0_rtx; |
2960 | } | |
2961 | ||
2962 | /* Try to calculate the final value as initial value + (number of iterations | |
2963 | * increment). For this to work, increment must be invariant, the only | |
2964 | exit from the loop must be the fall through at the bottom (otherwise | |
2965 | it may not have its final value when the loop exits), and the initial | |
2966 | value of the biv must be invariant. */ | |
2967 | ||
302670f3 | 2968 | if (n_iterations != 0 |
0534b804 MH |
2969 | && ! loop->exit_count |
2970 | && loop_invariant_p (loop, bl->initial_value)) | |
67f2de41 | 2971 | { |
0534b804 | 2972 | increment = biv_total_increment (bl); |
a5af23fe | 2973 | |
0534b804 | 2974 | if (increment && loop_invariant_p (loop, increment)) |
67f2de41 RK |
2975 | { |
2976 | /* Can calculate the loop exit value, emit insns after loop | |
2977 | end to calculate this value into a temporary register in | |
2978 | case it is needed later. */ | |
2979 | ||
2980 | tem = gen_reg_rtx (bl->biv->mode); | |
de12be17 | 2981 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
96a45535 MH |
2982 | loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations), |
2983 | bl->initial_value, tem); | |
67f2de41 RK |
2984 | |
2985 | if (loop_dump_stream) | |
2986 | fprintf (loop_dump_stream, | |
2987 | "Final biv value for %d, calculated.\n", bl->regno); | |
a5af23fe | 2988 | |
67f2de41 RK |
2989 | return tem; |
2990 | } | |
2991 | } | |
2992 | ||
2993 | /* Check to see if the biv is dead at all loop exits. */ | |
0534b804 | 2994 | if (reg_dead_after_loop (loop, bl->biv->src_reg)) |
67f2de41 RK |
2995 | { |
2996 | if (loop_dump_stream) | |
2997 | fprintf (loop_dump_stream, | |
2998 | "Final biv value for %d, biv dead after loop exit.\n", | |
2999 | bl->regno); | |
3000 | ||
3001 | return const0_rtx; | |
3002 | } | |
3003 | ||
3004 | return 0; | |
3005 | } | |
3006 | ||
3007 | /* Try to calculate the final value of the giv, the value it will have at | |
3008 | the end of the loop. If we can do it, return that value. */ | |
3009 | ||
3010 | rtx | |
2e1eedd6 | 3011 | final_giv_value (const struct loop *loop, struct induction *v) |
67f2de41 | 3012 | { |
ed5bb68d | 3013 | struct loop_ivs *ivs = LOOP_IVS (loop); |
67f2de41 | 3014 | struct iv_class *bl; |
0e91429a | 3015 | rtx insn; |
67f2de41 | 3016 | rtx increment, tem; |
96a45535 | 3017 | rtx seq; |
0534b804 MH |
3018 | rtx loop_end = loop->end; |
3019 | unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations; | |
67f2de41 | 3020 | |
8b634749 | 3021 | bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); |
67f2de41 RK |
3022 | |
3023 | /* The final value for givs which depend on reversed bivs must be calculated | |
3024 | differently than for ordinary givs. In this case, there is already an | |
3025 | insn after the loop which sets this giv's final value (if necessary), | |
3026 | and there are no other loop exits, so we can return any value. */ | |
3027 | if (bl->reversed) | |
3028 | { | |
3029 | if (loop_dump_stream) | |
3030 | fprintf (loop_dump_stream, | |
3031 | "Final giv value for %d, depends on reversed biv\n", | |
3032 | REGNO (v->dest_reg)); | |
3033 | return const0_rtx; | |
3034 | } | |
3035 | ||
3036 | /* Try to calculate the final value as a function of the biv it depends | |
3037 | upon. The only exit from the loop must be the fall through at the bottom | |
045d7161 EB |
3038 | and the insn that sets the giv must be executed on every iteration |
3039 | (otherwise the giv may not have its final value when the loop exits). */ | |
a5af23fe | 3040 | |
67f2de41 RK |
3041 | /* ??? Can calculate the final giv value by subtracting off the |
3042 | extra biv increments times the giv's mult_val. The loop must have | |
3043 | only one exit for this to work, but the loop iterations does not need | |
3044 | to be known. */ | |
3045 | ||
302670f3 | 3046 | if (n_iterations != 0 |
045d7161 EB |
3047 | && ! loop->exit_count |
3048 | && v->always_executed) | |
67f2de41 RK |
3049 | { |
3050 | /* ?? It is tempting to use the biv's value here since these insns will | |
3051 | be put after the loop, and hence the biv will have its final value | |
3052 | then. However, this fails if the biv is subsequently eliminated. | |
3053 | Perhaps determine whether biv's are eliminable before trying to | |
3054 | determine whether giv's are replaceable so that we can use the | |
3055 | biv value here if it is not eliminable. */ | |
3056 | ||
db2f7559 JW |
3057 | /* We are emitting code after the end of the loop, so we must make |
3058 | sure that bl->initial_value is still valid then. It will still | |
3059 | be valid if it is invariant. */ | |
3060 | ||
0534b804 | 3061 | increment = biv_total_increment (bl); |
67f2de41 | 3062 | |
0534b804 MH |
3063 | if (increment && loop_invariant_p (loop, increment) |
3064 | && loop_invariant_p (loop, bl->initial_value)) | |
67f2de41 RK |
3065 | { |
3066 | /* Can calculate the loop exit value of its biv as | |
302670f3 | 3067 | (n_iterations * increment) + initial_value */ |
a5af23fe | 3068 | |
67f2de41 RK |
3069 | /* The loop exit value of the giv is then |
3070 | (final_biv_value - extra increments) * mult_val + add_val. | |
3071 | The extra increments are any increments to the biv which | |
3072 | occur in the loop after the giv's value is calculated. | |
3073 | We must search from the insn that sets the giv to the end | |
3074 | of the loop to calculate this value. */ | |
3075 | ||
67f2de41 | 3076 | /* Put the final biv value in tem. */ |
e8cb4873 | 3077 | tem = gen_reg_rtx (v->mode); |
de12be17 | 3078 | record_base_value (REGNO (tem), bl->biv->add_val, 0); |
96a45535 MH |
3079 | loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment), |
3080 | GEN_INT (n_iterations), | |
3081 | extend_value_for_giv (v, bl->initial_value), | |
3082 | tem); | |
67f2de41 RK |
3083 | |
3084 | /* Subtract off extra increments as we find them. */ | |
3085 | for (insn = NEXT_INSN (v->insn); insn != loop_end; | |
3086 | insn = NEXT_INSN (insn)) | |
3087 | { | |
0e91429a RK |
3088 | struct induction *biv; |
3089 | ||
3090 | for (biv = bl->biv; biv; biv = biv->next_iv) | |
3091 | if (biv->insn == insn) | |
3092 | { | |
3093 | start_sequence (); | |
ef89d648 ZW |
3094 | tem = expand_simple_binop (GET_MODE (tem), MINUS, tem, |
3095 | biv->add_val, NULL_RTX, 0, | |
3096 | OPTAB_LIB_WIDEN); | |
2f937369 | 3097 | seq = get_insns (); |
0e91429a | 3098 | end_sequence (); |
96a45535 | 3099 | loop_insn_sink (loop, seq); |
0e91429a | 3100 | } |
67f2de41 | 3101 | } |
a5af23fe | 3102 | |
67f2de41 | 3103 | /* Now calculate the giv's final value. */ |
96a45535 | 3104 | loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem); |
a5af23fe | 3105 | |
67f2de41 RK |
3106 | if (loop_dump_stream) |
3107 | fprintf (loop_dump_stream, | |
3108 | "Final giv value for %d, calc from biv's value.\n", | |
3109 | REGNO (v->dest_reg)); | |
3110 | ||
3111 | return tem; | |
3112 | } | |
3113 | } | |
3114 | ||
3115 | /* Replaceable giv's should never reach here. */ | |
3116 | if (v->replaceable) | |
3117 | abort (); | |
3118 | ||
3119 | /* Check to see if the biv is dead at all loop exits. */ | |
0534b804 | 3120 | if (reg_dead_after_loop (loop, v->dest_reg)) |
67f2de41 RK |
3121 | { |
3122 | if (loop_dump_stream) | |
3123 | fprintf (loop_dump_stream, | |
3124 | "Final giv value for %d, giv dead after loop exit.\n", | |
3125 | REGNO (v->dest_reg)); | |
3126 | ||
3127 | return const0_rtx; | |
3128 | } | |
3129 | ||
3130 | return 0; | |
3131 | } | |
3132 | ||
fb530c07 | 3133 | /* Look back before LOOP->START for the insn that sets REG and return |
e96b4d7a MH |
3134 | the equivalent constant if there is a REG_EQUAL note otherwise just |
3135 | the SET_SRC of REG. */ | |
3136 | ||
3137 | static rtx | |
2e1eedd6 | 3138 | loop_find_equiv_value (const struct loop *loop, rtx reg) |
e96b4d7a | 3139 | { |
0534b804 | 3140 | rtx loop_start = loop->start; |
e96b4d7a MH |
3141 | rtx insn, set; |
3142 | rtx ret; | |
a5af23fe | 3143 | |
e96b4d7a | 3144 | ret = reg; |
a86dc4a3 | 3145 | for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn)) |
e96b4d7a | 3146 | { |
4b4bf941 | 3147 | if (LABEL_P (insn)) |
e96b4d7a | 3148 | break; |
a5af23fe | 3149 | |
2c3c49de | 3150 | else if (INSN_P (insn) && reg_set_p (reg, insn)) |
e96b4d7a MH |
3151 | { |
3152 | /* We found the last insn before the loop that sets the register. | |
3153 | If it sets the entire register, and has a REG_EQUAL note, | |
3154 | then use the value of the REG_EQUAL note. */ | |
3155 | if ((set = single_set (insn)) | |
a86dc4a3 | 3156 | && (SET_DEST (set) == reg)) |
e96b4d7a MH |
3157 | { |
3158 | rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX); | |
a5af23fe | 3159 | |
e96b4d7a MH |
3160 | /* Only use the REG_EQUAL note if it is a constant. |
3161 | Other things, divide in particular, will cause | |
3162 | problems later if we use them. */ | |
3163 | if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST | |
3164 | && CONSTANT_P (XEXP (note, 0))) | |
3165 | ret = XEXP (note, 0); | |
3166 | else | |
3167 | ret = SET_SRC (set); | |
6315f068 JJ |
3168 | |
3169 | /* We cannot do this if it changes between the | |
3170 | assignment and loop start though. */ | |
3171 | if (modified_between_p (ret, insn, loop_start)) | |
3172 | ret = reg; | |
e96b4d7a MH |
3173 | } |
3174 | break; | |
3175 | } | |
3176 | } | |
3177 | return ret; | |
3178 | } | |
3179 | ||
35704c46 MH |
3180 | /* Return a simplified rtx for the expression OP - REG. |
3181 | ||
3182 | REG must appear in OP, and OP must be a register or the sum of a register | |
3183 | and a second term. | |
3184 | ||
3185 | Thus, the return value must be const0_rtx or the second term. | |
3186 | ||
3187 | The caller is responsible for verifying that REG appears in OP and OP has | |
3188 | the proper form. */ | |
3189 | ||
3190 | static rtx | |
2e1eedd6 | 3191 | subtract_reg_term (rtx op, rtx reg) |
35704c46 MH |
3192 | { |
3193 | if (op == reg) | |
3194 | return const0_rtx; | |
3195 | if (GET_CODE (op) == PLUS) | |
3196 | { | |
3197 | if (XEXP (op, 0) == reg) | |
3198 | return XEXP (op, 1); | |
3199 | else if (XEXP (op, 1) == reg) | |
3200 | return XEXP (op, 0); | |
3201 | } | |
3202 | /* OP does not contain REG as a term. */ | |
3203 | abort (); | |
3204 | } | |
3205 | ||
35704c46 MH |
3206 | /* Find and return register term common to both expressions OP0 and |
3207 | OP1 or NULL_RTX if no such term exists. Each expression must be a | |
3208 | REG or a PLUS of a REG. */ | |
3209 | ||
3210 | static rtx | |
2e1eedd6 | 3211 | find_common_reg_term (rtx op0, rtx op1) |
35704c46 | 3212 | { |
f8cfc6aa JQ |
3213 | if ((REG_P (op0) || GET_CODE (op0) == PLUS) |
3214 | && (REG_P (op1) || GET_CODE (op1) == PLUS)) | |
35704c46 MH |
3215 | { |
3216 | rtx op00; | |
3217 | rtx op01; | |
3218 | rtx op10; | |
3219 | rtx op11; | |
3220 | ||
3221 | if (GET_CODE (op0) == PLUS) | |
3222 | op01 = XEXP (op0, 1), op00 = XEXP (op0, 0); | |
3223 | else | |
3224 | op01 = const0_rtx, op00 = op0; | |
3225 | ||
3226 | if (GET_CODE (op1) == PLUS) | |
3227 | op11 = XEXP (op1, 1), op10 = XEXP (op1, 0); | |
3228 | else | |
3229 | op11 = const0_rtx, op10 = op1; | |
3230 | ||
3231 | /* Find and return common register term if present. */ | |
3232 | if (REG_P (op00) && (op00 == op10 || op00 == op11)) | |
3233 | return op00; | |
3234 | else if (REG_P (op01) && (op01 == op10 || op01 == op11)) | |
3235 | return op01; | |
3236 | } | |
3237 | ||
3238 | /* No common register term found. */ | |
3239 | return NULL_RTX; | |
3240 | } | |
3241 | ||
0a5b41f2 MH |
3242 | /* Determine the loop iterator and calculate the number of loop |
3243 | iterations. Returns the exact number of loop iterations if it can | |
3244 | be calculated, otherwise returns zero. */ | |
67f2de41 | 3245 | |
c166a311 | 3246 | unsigned HOST_WIDE_INT |
2e1eedd6 | 3247 | loop_iterations (struct loop *loop) |
67f2de41 | 3248 | { |
ed5bb68d MH |
3249 | struct loop_info *loop_info = LOOP_INFO (loop); |
3250 | struct loop_ivs *ivs = LOOP_IVS (loop); | |
67f2de41 RK |
3251 | rtx comparison, comparison_value; |
3252 | rtx iteration_var, initial_value, increment, final_value; | |
3253 | enum rtx_code comparison_code; | |
b8ebd779 AO |
3254 | HOST_WIDE_INT inc; |
3255 | unsigned HOST_WIDE_INT abs_inc; | |
e96b4d7a MH |
3256 | unsigned HOST_WIDE_INT abs_diff; |
3257 | int off_by_one; | |
c166a311 | 3258 | int increment_dir; |
e96b4d7a | 3259 | int unsigned_p, compare_dir, final_larger; |
67f2de41 | 3260 | rtx last_loop_insn; |
35704c46 | 3261 | rtx reg_term; |
0a5b41f2 | 3262 | struct iv_class *bl; |
67f2de41 | 3263 | |
e96b4d7a | 3264 | loop_info->n_iterations = 0; |
302670f3 | 3265 | loop_info->initial_value = 0; |
e96b4d7a MH |
3266 | loop_info->initial_equiv_value = 0; |
3267 | loop_info->comparison_value = 0; | |
302670f3 | 3268 | loop_info->final_value = 0; |
e96b4d7a MH |
3269 | loop_info->final_equiv_value = 0; |
3270 | loop_info->increment = 0; | |
302670f3 | 3271 | loop_info->iteration_var = 0; |
e96b4d7a | 3272 | loop_info->unroll_number = 1; |
0a5b41f2 | 3273 | loop_info->iv = 0; |
67f2de41 | 3274 | |
e96b4d7a | 3275 | /* We used to use prev_nonnote_insn here, but that fails because it might |
40e81af5 JW |
3276 | accidentally get the branch for a contained loop if the branch for this |
3277 | loop was deleted. We can only trust branches immediately before the | |
3278 | loop_end. */ | |
f10aac29 | 3279 | last_loop_insn = PREV_INSN (loop->end); |
67f2de41 | 3280 | |
98dcbc07 | 3281 | /* ??? We should probably try harder to find the jump insn |
a5af23fe | 3282 | at the end of the loop. The following code assumes that |
98dcbc07 | 3283 | the last loop insn is a jump to the top of the loop. */ |
4b4bf941 | 3284 | if (!JUMP_P (last_loop_insn)) |
98dcbc07 MH |
3285 | { |
3286 | if (loop_dump_stream) | |
3287 | fprintf (loop_dump_stream, | |
3288 | "Loop iterations: No final conditional branch found.\n"); | |
3289 | return 0; | |
3290 | } | |
3291 | ||
3292 | /* If there is a more than a single jump to the top of the loop | |
3293 | we cannot (easily) determine the iteration count. */ | |
3294 | if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1) | |
3295 | { | |
3296 | if (loop_dump_stream) | |
3297 | fprintf (loop_dump_stream, | |
3298 | "Loop iterations: Loop has multiple back edges.\n"); | |
3299 | return 0; | |
3300 | } | |
3301 | ||
9d1e9f93 FS |
3302 | /* If there are multiple conditionalized loop exit tests, they may jump |
3303 | back to differing CODE_LABELs. */ | |
3304 | if (loop->top && loop->cont) | |
3305 | { | |
3306 | rtx temp = PREV_INSN (last_loop_insn); | |
3307 | ||
3308 | do | |
3309 | { | |
4b4bf941 | 3310 | if (JUMP_P (temp)) |
9d1e9f93 | 3311 | { |
a22455df OH |
3312 | /* There are some kinds of jumps we can't deal with easily. */ |
3313 | if (JUMP_LABEL (temp) == 0) | |
3314 | { | |
3315 | if (loop_dump_stream) | |
3316 | fprintf | |
3317 | (loop_dump_stream, | |
3318 | "Loop iterations: Jump insn has null JUMP_LABEL.\n"); | |
3319 | return 0; | |
3320 | } | |
3321 | ||
3322 | if (/* Previous unrolling may have generated new insns not | |
3323 | covered by the uid_luid array. */ | |
3324 | INSN_UID (JUMP_LABEL (temp)) < max_uid_for_loop | |
3325 | /* Check if we jump back into the loop body. */ | |
3326 | && INSN_LUID (JUMP_LABEL (temp)) > INSN_LUID (loop->top) | |
3327 | && INSN_LUID (JUMP_LABEL (temp)) < INSN_LUID (loop->cont)) | |
3328 | { | |
3329 | if (loop_dump_stream) | |
41077ce4 | 3330 | fprintf |
a22455df OH |
3331 | (loop_dump_stream, |
3332 | "Loop iterations: Loop has multiple back edges.\n"); | |
3333 | return 0; | |
3334 | } | |
9d1e9f93 FS |
3335 | } |
3336 | } | |
3337 | while ((temp = PREV_INSN (temp)) != loop->cont); | |
3338 | } | |
3339 | ||
98dcbc07 MH |
3340 | /* Find the iteration variable. If the last insn is a conditional |
3341 | branch, and the insn before tests a register value, make that the | |
3342 | iteration variable. */ | |
a5af23fe | 3343 | |
0534b804 | 3344 | comparison = get_condition_for_loop (loop, last_loop_insn); |
67f2de41 RK |
3345 | if (comparison == 0) |
3346 | { | |
3347 | if (loop_dump_stream) | |
3348 | fprintf (loop_dump_stream, | |
98dcbc07 | 3349 | "Loop iterations: No final comparison found.\n"); |
67f2de41 RK |
3350 | return 0; |
3351 | } | |
3352 | ||
3353 | /* ??? Get_condition may switch position of induction variable and | |
3354 | invariant register when it canonicalizes the comparison. */ | |
3355 | ||
3356 | comparison_code = GET_CODE (comparison); | |
3357 | iteration_var = XEXP (comparison, 0); | |
3358 | comparison_value = XEXP (comparison, 1); | |
a5af23fe | 3359 | |
f8cfc6aa | 3360 | if (!REG_P (iteration_var)) |
67f2de41 RK |
3361 | { |
3362 | if (loop_dump_stream) | |
3363 | fprintf (loop_dump_stream, | |
e96b4d7a | 3364 | "Loop iterations: Comparison not against register.\n"); |
67f2de41 RK |
3365 | return 0; |
3366 | } | |
3367 | ||
e4b68ced MH |
3368 | /* The only new registers that are created before loop iterations |
3369 | are givs made from biv increments or registers created by | |
3370 | load_mems. In the latter case, it is possible that try_copy_prop | |
3371 | will propagate a new pseudo into the old iteration register but | |
3372 | this will be marked by having the REG_USERVAR_P bit set. */ | |
3373 | ||
14be28e5 | 3374 | if ((unsigned) REGNO (iteration_var) >= ivs->n_regs |
e4b68ced | 3375 | && ! REG_USERVAR_P (iteration_var)) |
a2be868f | 3376 | abort (); |
67f2de41 | 3377 | |
0a5b41f2 MH |
3378 | /* Determine the initial value of the iteration variable, and the amount |
3379 | that it is incremented each loop. Use the tables constructed by | |
3380 | the strength reduction pass to calculate these values. */ | |
3381 | ||
3382 | /* Clear the result values, in case no answer can be found. */ | |
3383 | initial_value = 0; | |
3384 | increment = 0; | |
3385 | ||
3386 | /* The iteration variable can be either a giv or a biv. Check to see | |
3387 | which it is, and compute the variable's initial value, and increment | |
3388 | value if possible. */ | |
3389 | ||
3390 | /* If this is a new register, can't handle it since we don't have any | |
3391 | reg_iv_type entry for it. */ | |
14be28e5 | 3392 | if ((unsigned) REGNO (iteration_var) >= ivs->n_regs) |
0a5b41f2 MH |
3393 | { |
3394 | if (loop_dump_stream) | |
3395 | fprintf (loop_dump_stream, | |
3396 | "Loop iterations: No reg_iv_type entry for iteration var.\n"); | |
3397 | return 0; | |
3398 | } | |
3399 | ||
3400 | /* Reject iteration variables larger than the host wide int size, since they | |
3401 | could result in a number of iterations greater than the range of our | |
3402 | `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */ | |
3403 | else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var)) | |
3404 | > HOST_BITS_PER_WIDE_INT)) | |
3405 | { | |
3406 | if (loop_dump_stream) | |
3407 | fprintf (loop_dump_stream, | |
3408 | "Loop iterations: Iteration var rejected because mode too large.\n"); | |
3409 | return 0; | |
3410 | } | |
3411 | else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT) | |
3412 | { | |
3413 | if (loop_dump_stream) | |
3414 | fprintf (loop_dump_stream, | |
3415 | "Loop iterations: Iteration var not an integer.\n"); | |
3416 | return 0; | |
3417 | } | |
ce4191ee RS |
3418 | |
3419 | /* Try swapping the comparison to identify a suitable iv. */ | |
3420 | if (REG_IV_TYPE (ivs, REGNO (iteration_var)) != BASIC_INDUCT | |
3421 | && REG_IV_TYPE (ivs, REGNO (iteration_var)) != GENERAL_INDUCT | |
f8cfc6aa | 3422 | && REG_P (comparison_value) |
ce4191ee RS |
3423 | && REGNO (comparison_value) < ivs->n_regs) |
3424 | { | |
3425 | rtx temp = comparison_value; | |
3426 | comparison_code = swap_condition (comparison_code); | |
3427 | comparison_value = iteration_var; | |
3428 | iteration_var = temp; | |
3429 | } | |
3430 | ||
3431 | if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT) | |
0a5b41f2 | 3432 | { |
86fee241 | 3433 | if (REGNO (iteration_var) >= ivs->n_regs) |
0a5b41f2 MH |
3434 | abort (); |
3435 | ||
3436 | /* Grab initial value, only useful if it is a constant. */ | |
8b634749 | 3437 | bl = REG_IV_CLASS (ivs, REGNO (iteration_var)); |
0a5b41f2 | 3438 | initial_value = bl->initial_value; |
9e2adb2a JJ |
3439 | if (!bl->biv->always_executed || bl->biv->maybe_multiple) |
3440 | { | |
3441 | if (loop_dump_stream) | |
3442 | fprintf (loop_dump_stream, | |
3443 | "Loop iterations: Basic induction var not set once in each iteration.\n"); | |
3444 | return 0; | |
3445 | } | |
0a5b41f2 MH |
3446 | |
3447 | increment = biv_total_increment (bl); | |
3448 | } | |
ed5bb68d | 3449 | else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT) |
0a5b41f2 MH |
3450 | { |
3451 | HOST_WIDE_INT offset = 0; | |
ed5bb68d | 3452 | struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var)); |
0a5b41f2 MH |
3453 | rtx biv_initial_value; |
3454 | ||
86fee241 | 3455 | if (REGNO (v->src_reg) >= ivs->n_regs) |
0a5b41f2 MH |
3456 | abort (); |
3457 | ||
9e2adb2a JJ |
3458 | if (!v->always_executed || v->maybe_multiple) |
3459 | { | |
3460 | if (loop_dump_stream) | |
3461 | fprintf (loop_dump_stream, | |
3462 | "Loop iterations: General induction var not set once in each iteration.\n"); | |
3463 | return 0; | |
3464 | } | |
3465 | ||
8b634749 | 3466 | bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); |
0a5b41f2 MH |
3467 | |
3468 | /* Increment value is mult_val times the increment value of the biv. */ | |
3469 | ||
3470 | increment = biv_total_increment (bl); | |
3471 | if (increment) | |
3472 | { | |
3473 | struct induction *biv_inc; | |
3474 | ||
5c3c320e JW |
3475 | increment = fold_rtx_mult_add (v->mult_val, |
3476 | extend_value_for_giv (v, increment), | |
3477 | const0_rtx, v->mode); | |
ff7cc307 | 3478 | /* The caller assumes that one full increment has occurred at the |
0a5b41f2 MH |
3479 | first loop test. But that's not true when the biv is incremented |
3480 | after the giv is set (which is the usual case), e.g.: | |
3481 | i = 6; do {;} while (i++ < 9) . | |
3482 | Therefore, we bias the initial value by subtracting the amount of | |
3483 | the increment that occurs between the giv set and the giv test. */ | |
3484 | for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv) | |
3485 | { | |
3486 | if (loop_insn_first_p (v->insn, biv_inc->insn)) | |
8ed805d2 GS |
3487 | { |
3488 | if (REG_P (biv_inc->add_val)) | |
3489 | { | |
3490 | if (loop_dump_stream) | |
3491 | fprintf (loop_dump_stream, | |
3492 | "Loop iterations: Basic induction var add_val is REG %d.\n", | |
3493 | REGNO (biv_inc->add_val)); | |
3494 | return 0; | |
3495 | } | |
3496 | ||
e8226879 EB |
3497 | /* If we have already counted it, skip it. */ |
3498 | if (biv_inc->same) | |
3499 | continue; | |
3500 | ||
8ed805d2 GS |
3501 | offset -= INTVAL (biv_inc->add_val); |
3502 | } | |
0a5b41f2 | 3503 | } |
0a5b41f2 MH |
3504 | } |
3505 | if (loop_dump_stream) | |
3506 | fprintf (loop_dump_stream, | |
3507 | "Loop iterations: Giv iterator, initial value bias %ld.\n", | |
3508 | (long) offset); | |
3509 | ||
3510 | /* Initial value is mult_val times the biv's initial value plus | |
3511 | add_val. Only useful if it is a constant. */ | |
3512 | biv_initial_value = extend_value_for_giv (v, bl->initial_value); | |
3513 | initial_value | |
3514 | = fold_rtx_mult_add (v->mult_val, | |
3515 | plus_constant (biv_initial_value, offset), | |
3516 | v->add_val, v->mode); | |
3517 | } | |
3518 | else | |
3519 | { | |
3520 | if (loop_dump_stream) | |
3521 | fprintf (loop_dump_stream, | |
3522 | "Loop iterations: Not basic or general induction var.\n"); | |
3523 | return 0; | |
3524 | } | |
0534b804 | 3525 | |
67f2de41 | 3526 | if (initial_value == 0) |
67f2de41 RK |
3527 | return 0; |
3528 | ||
e96b4d7a MH |
3529 | unsigned_p = 0; |
3530 | off_by_one = 0; | |
3531 | switch (comparison_code) | |
3532 | { | |
3533 | case LEU: | |
3534 | unsigned_p = 1; | |
3535 | case LE: | |
3536 | compare_dir = 1; | |
3537 | off_by_one = 1; | |
3538 | break; | |
3539 | case GEU: | |
3540 | unsigned_p = 1; | |
3541 | case GE: | |
3542 | compare_dir = -1; | |
3543 | off_by_one = -1; | |
3544 | break; | |
3545 | case EQ: | |
3546 | /* Cannot determine loop iterations with this case. */ | |
3547 | compare_dir = 0; | |
3548 | break; | |
3549 | case LTU: | |
3550 | unsigned_p = 1; | |
3551 | case LT: | |
3552 | compare_dir = 1; | |
3553 | break; | |
3554 | case GTU: | |
3555 | unsigned_p = 1; | |
3556 | case GT: | |
3557 | compare_dir = -1; | |
337f35bb | 3558 | break; |
e96b4d7a MH |
3559 | case NE: |
3560 | compare_dir = 0; | |
3561 | break; | |
3562 | default: | |
3563 | abort (); | |
3564 | } | |
3565 | ||
67f2de41 RK |
3566 | /* If the comparison value is an invariant register, then try to find |
3567 | its value from the insns before the start of the loop. */ | |
3568 | ||
e96b4d7a | 3569 | final_value = comparison_value; |
f8cfc6aa | 3570 | if (REG_P (comparison_value) |
0534b804 | 3571 | && loop_invariant_p (loop, comparison_value)) |
67f2de41 | 3572 | { |
0534b804 MH |
3573 | final_value = loop_find_equiv_value (loop, comparison_value); |
3574 | ||
e96b4d7a MH |
3575 | /* If we don't get an invariant final value, we are better |
3576 | off with the original register. */ | |
0534b804 | 3577 | if (! loop_invariant_p (loop, final_value)) |
e96b4d7a MH |
3578 | final_value = comparison_value; |
3579 | } | |
3580 | ||
3581 | /* Calculate the approximate final value of the induction variable | |
3582 | (on the last successful iteration). The exact final value | |
3583 | depends on the branch operator, and increment sign. It will be | |
3584 | wrong if the iteration variable is not incremented by one each | |
3585 | time through the loop and (comparison_value + off_by_one - | |
3586 | initial_value) % increment != 0. | |
3587 | ??? Note that the final_value may overflow and thus final_larger | |
3588 | will be bogus. A potentially infinite loop will be classified | |
3589 | as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */ | |
3590 | if (off_by_one) | |
3591 | final_value = plus_constant (final_value, off_by_one); | |
67f2de41 RK |
3592 | |
3593 | /* Save the calculated values describing this loop's bounds, in case | |
3594 | precondition_loop_p will need them later. These values can not be | |
3595 | recalculated inside precondition_loop_p because strength reduction | |
a5af23fe | 3596 | optimizations may obscure the loop's structure. |
67f2de41 | 3597 | |
e96b4d7a MH |
3598 | These values are only required by precondition_loop_p and insert_bct |
3599 | whenever the number of iterations cannot be computed at compile time. | |
3600 | Only the difference between final_value and initial_value is | |
3601 | important. Note that final_value is only approximate. */ | |
302670f3 | 3602 | loop_info->initial_value = initial_value; |
e96b4d7a MH |
3603 | loop_info->comparison_value = comparison_value; |
3604 | loop_info->final_value = plus_constant (comparison_value, off_by_one); | |
302670f3 | 3605 | loop_info->increment = increment; |
e96b4d7a | 3606 | loop_info->iteration_var = iteration_var; |
302670f3 | 3607 | loop_info->comparison_code = comparison_code; |
0a5b41f2 | 3608 | loop_info->iv = bl; |
67f2de41 | 3609 | |
35704c46 MH |
3610 | /* Try to determine the iteration count for loops such |
3611 | as (for i = init; i < init + const; i++). When running the | |
3612 | loop optimization twice, the first pass often converts simple | |
3613 | loops into this form. */ | |
e96b4d7a | 3614 | |
35704c46 | 3615 | if (REG_P (initial_value)) |
e96b4d7a | 3616 | { |
35704c46 MH |
3617 | rtx reg1; |
3618 | rtx reg2; | |
3619 | rtx const2; | |
e96b4d7a | 3620 | |
35704c46 | 3621 | reg1 = initial_value; |
e96b4d7a | 3622 | if (GET_CODE (final_value) == PLUS) |
35704c46 | 3623 | reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1); |
e96b4d7a | 3624 | else |
35704c46 | 3625 | reg2 = final_value, const2 = const0_rtx; |
e96b4d7a | 3626 | |
35704c46 MH |
3627 | /* Check for initial_value = reg1, final_value = reg2 + const2, |
3628 | where reg1 != reg2. */ | |
3629 | if (REG_P (reg2) && reg2 != reg1) | |
e96b4d7a | 3630 | { |
35704c46 MH |
3631 | rtx temp; |
3632 | ||
3633 | /* Find what reg1 is equivalent to. Hopefully it will | |
3634 | either be reg2 or reg2 plus a constant. */ | |
0534b804 MH |
3635 | temp = loop_find_equiv_value (loop, reg1); |
3636 | ||
35704c46 MH |
3637 | if (find_common_reg_term (temp, reg2)) |
3638 | initial_value = temp; | |
bbda30a4 | 3639 | else if (loop_invariant_p (loop, reg2)) |
35704c46 MH |
3640 | { |
3641 | /* Find what reg2 is equivalent to. Hopefully it will | |
3642 | either be reg1 or reg1 plus a constant. Let's ignore | |
3643 | the latter case for now since it is not so common. */ | |
0534b804 MH |
3644 | temp = loop_find_equiv_value (loop, reg2); |
3645 | ||
35704c46 MH |
3646 | if (temp == loop_info->iteration_var) |
3647 | temp = initial_value; | |
3648 | if (temp == reg1) | |
3649 | final_value = (const2 == const0_rtx) | |
3650 | ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2); | |
3651 | } | |
e96b4d7a | 3652 | } |
a2be868f | 3653 | else if (loop->vtop && GET_CODE (reg2) == CONST_INT) |
e96b4d7a | 3654 | { |
35704c46 MH |
3655 | rtx temp; |
3656 | ||
a86dc4a3 KH |
3657 | /* When running the loop optimizer twice, check_dbra_loop |
3658 | further obfuscates reversible loops of the form: | |
3659 | for (i = init; i < init + const; i++). We often end up with | |
3660 | final_value = 0, initial_value = temp, temp = temp2 - init, | |
3661 | where temp2 = init + const. If the loop has a vtop we | |
3662 | can replace initial_value with const. */ | |
35704c46 | 3663 | |
0534b804 MH |
3664 | temp = loop_find_equiv_value (loop, reg1); |
3665 | ||
35704c46 MH |
3666 | if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0))) |
3667 | { | |
0534b804 MH |
3668 | rtx temp2 = loop_find_equiv_value (loop, XEXP (temp, 0)); |
3669 | ||
35704c46 MH |
3670 | if (GET_CODE (temp2) == PLUS |
3671 | && XEXP (temp2, 0) == XEXP (temp, 1)) | |
3672 | initial_value = XEXP (temp2, 1); | |
3673 | } | |
e96b4d7a MH |
3674 | } |
3675 | } | |
a5af23fe | 3676 | |
35704c46 MH |
3677 | /* If have initial_value = reg + const1 and final_value = reg + |
3678 | const2, then replace initial_value with const1 and final_value | |
3679 | with const2. This should be safe since we are protected by the | |
3680 | initial comparison before entering the loop if we have a vtop. | |
3681 | For example, a + b < a + c is not equivalent to b < c for all a | |
3682 | when using modulo arithmetic. | |
3683 | ||
3684 | ??? Without a vtop we could still perform the optimization if we check | |
3685 | the initial and final values carefully. */ | |
a2be868f | 3686 | if (loop->vtop |
35704c46 MH |
3687 | && (reg_term = find_common_reg_term (initial_value, final_value))) |
3688 | { | |
3689 | initial_value = subtract_reg_term (initial_value, reg_term); | |
3690 | final_value = subtract_reg_term (final_value, reg_term); | |
3691 | } | |
3692 | ||
e96b4d7a MH |
3693 | loop_info->initial_equiv_value = initial_value; |
3694 | loop_info->final_equiv_value = final_value; | |
a5af23fe | 3695 | |
d24de7d1 R |
3696 | /* For EQ comparison loops, we don't have a valid final value. |
3697 | Check this now so that we won't leave an invalid value if we | |
3698 | return early for any other reason. */ | |
3699 | if (comparison_code == EQ) | |
a86dc4a3 | 3700 | loop_info->final_equiv_value = loop_info->final_value = 0; |
d24de7d1 | 3701 | |
00c0c63c JW |
3702 | if (increment == 0) |
3703 | { | |
3704 | if (loop_dump_stream) | |
3705 | fprintf (loop_dump_stream, | |
e96b4d7a | 3706 | "Loop iterations: Increment value can't be calculated.\n"); |
00c0c63c JW |
3707 | return 0; |
3708 | } | |
e96b4d7a MH |
3709 | |
3710 | if (GET_CODE (increment) != CONST_INT) | |
00c0c63c | 3711 | { |
66830675 JW |
3712 | /* If we have a REG, check to see if REG holds a constant value. */ |
3713 | /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't | |
3714 | clear if it is worthwhile to try to handle such RTL. */ | |
f8cfc6aa | 3715 | if (REG_P (increment) || GET_CODE (increment) == SUBREG) |
0534b804 | 3716 | increment = loop_find_equiv_value (loop, increment); |
e96b4d7a MH |
3717 | |
3718 | if (GET_CODE (increment) != CONST_INT) | |
3719 | { | |
3720 | if (loop_dump_stream) | |
3721 | { | |
3722 | fprintf (loop_dump_stream, | |
3723 | "Loop iterations: Increment value not constant "); | |
c804f3f8 | 3724 | print_simple_rtl (loop_dump_stream, increment); |
e96b4d7a MH |
3725 | fprintf (loop_dump_stream, ".\n"); |
3726 | } | |
3727 | return 0; | |
3728 | } | |
3729 | loop_info->increment = increment; | |
00c0c63c | 3730 | } |
e96b4d7a MH |
3731 | |
3732 | if (GET_CODE (initial_value) != CONST_INT) | |
00c0c63c JW |
3733 | { |
3734 | if (loop_dump_stream) | |
e96b4d7a MH |
3735 | { |
3736 | fprintf (loop_dump_stream, | |
3737 | "Loop iterations: Initial value not constant "); | |
c804f3f8 | 3738 | print_simple_rtl (loop_dump_stream, initial_value); |
e96b4d7a MH |
3739 | fprintf (loop_dump_stream, ".\n"); |
3740 | } | |
00c0c63c JW |
3741 | return 0; |
3742 | } | |
67f2de41 RK |
3743 | else if (GET_CODE (final_value) != CONST_INT) |
3744 | { | |
3745 | if (loop_dump_stream) | |
e96b4d7a MH |
3746 | { |
3747 | fprintf (loop_dump_stream, | |
3748 | "Loop iterations: Final value not constant "); | |
c804f3f8 | 3749 | print_simple_rtl (loop_dump_stream, final_value); |
e96b4d7a MH |
3750 | fprintf (loop_dump_stream, ".\n"); |
3751 | } | |
67f2de41 RK |
3752 | return 0; |
3753 | } | |
c8b64bf2 AM |
3754 | else if (comparison_code == EQ) |
3755 | { | |
3756 | rtx inc_once; | |
3757 | ||
3758 | if (loop_dump_stream) | |
3759 | fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n"); | |
3760 | ||
3761 | inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment), | |
3762 | GET_MODE (iteration_var)); | |
3763 | ||
3764 | if (inc_once == final_value) | |
3765 | { | |
3766 | /* The iterator value once through the loop is equal to the | |
14b493d6 | 3767 | comparison value. Either we have an infinite loop, or |
c8b64bf2 AM |
3768 | we'll loop twice. */ |
3769 | if (increment == const0_rtx) | |
3770 | return 0; | |
3771 | loop_info->n_iterations = 2; | |
3772 | } | |
3773 | else | |
3774 | loop_info->n_iterations = 1; | |
3775 | ||
3776 | if (GET_CODE (loop_info->initial_value) == CONST_INT) | |
3777 | loop_info->final_value | |
3778 | = gen_int_mode ((INTVAL (loop_info->initial_value) | |
3779 | + loop_info->n_iterations * INTVAL (increment)), | |
3780 | GET_MODE (iteration_var)); | |
3781 | else | |
3782 | loop_info->final_value | |
3783 | = plus_constant (loop_info->initial_value, | |
3784 | loop_info->n_iterations * INTVAL (increment)); | |
3785 | loop_info->final_equiv_value | |
3786 | = gen_int_mode ((INTVAL (initial_value) | |
3787 | + loop_info->n_iterations * INTVAL (increment)), | |
3788 | GET_MODE (iteration_var)); | |
3789 | return loop_info->n_iterations; | |
3790 | } | |
67f2de41 | 3791 | |
67f2de41 | 3792 | /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */ |
e96b4d7a | 3793 | if (unsigned_p) |
67f2de41 | 3794 | final_larger |
c166a311 CH |
3795 | = ((unsigned HOST_WIDE_INT) INTVAL (final_value) |
3796 | > (unsigned HOST_WIDE_INT) INTVAL (initial_value)) | |
3797 | - ((unsigned HOST_WIDE_INT) INTVAL (final_value) | |
3798 | < (unsigned HOST_WIDE_INT) INTVAL (initial_value)); | |
67f2de41 | 3799 | else |
c166a311 CH |
3800 | final_larger = (INTVAL (final_value) > INTVAL (initial_value)) |
3801 | - (INTVAL (final_value) < INTVAL (initial_value)); | |
67f2de41 RK |
3802 | |
3803 | if (INTVAL (increment) > 0) | |
3804 | increment_dir = 1; | |
3805 | else if (INTVAL (increment) == 0) | |
3806 | increment_dir = 0; | |
3807 | else | |
3808 | increment_dir = -1; | |
3809 | ||
3810 | /* There are 27 different cases: compare_dir = -1, 0, 1; | |
3811 | final_larger = -1, 0, 1; increment_dir = -1, 0, 1. | |
3812 | There are 4 normal cases, 4 reverse cases (where the iteration variable | |
3813 | will overflow before the loop exits), 4 infinite loop cases, and 15 | |
3814 | immediate exit (0 or 1 iteration depending on loop type) cases. | |
3815 | Only try to optimize the normal cases. */ | |
a5af23fe | 3816 | |
67f2de41 RK |
3817 | /* (compare_dir/final_larger/increment_dir) |
3818 | Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1) | |
3819 | Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1) | |
3820 | Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0) | |
3821 | Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */ | |
3822 | ||
3823 | /* ?? If the meaning of reverse loops (where the iteration variable | |
3824 | will overflow before the loop exits) is undefined, then could | |
3825 | eliminate all of these special checks, and just always assume | |
3826 | the loops are normal/immediate/infinite. Note that this means | |
3827 | the sign of increment_dir does not have to be known. Also, | |
3828 | since it does not really hurt if immediate exit loops or infinite loops | |
3829 | are optimized, then that case could be ignored also, and hence all | |
3830 | loops can be optimized. | |
3831 | ||
3832 | According to ANSI Spec, the reverse loop case result is undefined, | |
3833 | because the action on overflow is undefined. | |
3834 | ||
3835 | See also the special test for NE loops below. */ | |
3836 | ||
3837 | if (final_larger == increment_dir && final_larger != 0 | |
3838 | && (final_larger == compare_dir || compare_dir == 0)) | |
3839 | /* Normal case. */ | |
3840 | ; | |
3841 | else | |
3842 | { | |
3843 | if (loop_dump_stream) | |
a86dc4a3 | 3844 | fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n"); |
67f2de41 RK |
3845 | return 0; |
3846 | } | |
3847 | ||
3848 | /* Calculate the number of iterations, final_value is only an approximation, | |
e96b4d7a | 3849 | so correct for that. Note that abs_diff and n_iterations are |
67f2de41 RK |
3850 | unsigned, because they can be as large as 2^n - 1. */ |
3851 | ||
b8ebd779 AO |
3852 | inc = INTVAL (increment); |
3853 | if (inc > 0) | |
3854 | { | |
3855 | abs_diff = INTVAL (final_value) - INTVAL (initial_value); | |
3856 | abs_inc = inc; | |
3857 | } | |
3858 | else if (inc < 0) | |
67f2de41 | 3859 | { |
e96b4d7a | 3860 | abs_diff = INTVAL (initial_value) - INTVAL (final_value); |
b8ebd779 | 3861 | abs_inc = -inc; |
67f2de41 RK |
3862 | } |
3863 | else | |
3864 | abort (); | |
3865 | ||
b8ebd779 AO |
3866 | /* Given that iteration_var is going to iterate over its own mode, |
3867 | not HOST_WIDE_INT, disregard higher bits that might have come | |
3868 | into the picture due to sign extension of initial and final | |
3869 | values. */ | |
a01da83b | 3870 | abs_diff &= ((unsigned HOST_WIDE_INT) 1 |
b8ebd779 AO |
3871 | << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1) |
3872 | << 1) - 1; | |
69107307 | 3873 | |
e96b4d7a MH |
3874 | /* For NE tests, make sure that the iteration variable won't miss |
3875 | the final value. If abs_diff mod abs_incr is not zero, then the | |
3876 | iteration variable will overflow before the loop exits, and we | |
3877 | can not calculate the number of iterations. */ | |
3878 | if (compare_dir == 0 && (abs_diff % abs_inc) != 0) | |
67f2de41 RK |
3879 | return 0; |
3880 | ||
e96b4d7a MH |
3881 | /* Note that the number of iterations could be calculated using |
3882 | (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to | |
3883 | handle potential overflow of the summation. */ | |
3884 | loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0); | |
3885 | return loop_info->n_iterations; | |
67f2de41 | 3886 | } |
2b59419a | 3887 | |
9faa82d8 | 3888 | /* Replace uses of split bivs with their split pseudo register. This is |
2b59419a JW |
3889 | for original instructions which remain after loop unrolling without |
3890 | copying. */ | |
3891 | ||
3892 | static rtx | |
2e1eedd6 | 3893 | remap_split_bivs (struct loop *loop, rtx x) |
2b59419a | 3894 | { |
ed5bb68d | 3895 | struct loop_ivs *ivs = LOOP_IVS (loop); |
b3694847 SS |
3896 | enum rtx_code code; |
3897 | int i; | |
3898 | const char *fmt; | |
2b59419a JW |
3899 | |
3900 | if (x == 0) | |
3901 | return x; | |
3902 | ||
3903 | code = GET_CODE (x); | |
3904 | switch (code) | |
3905 | { | |
3906 | case SCRATCH: | |
3907 | case PC: | |
3908 | case CC0: | |
3909 | case CONST_INT: | |
3910 | case CONST_DOUBLE: | |
3911 | case CONST: | |
3912 | case SYMBOL_REF: | |
3913 | case LABEL_REF: | |
3914 | return x; | |
3915 | ||
3916 | case REG: | |
3917 | #if 0 | |
3918 | /* If non-reduced/final-value givs were split, then this would also | |
3919 | have to remap those givs also. */ | |
3920 | #endif | |
86fee241 | 3921 | if (REGNO (x) < ivs->n_regs |
ed5bb68d | 3922 | && REG_IV_TYPE (ivs, REGNO (x)) == BASIC_INDUCT) |
8b634749 | 3923 | return REG_IV_CLASS (ivs, REGNO (x))->biv->src_reg; |
e9a25f70 | 3924 | break; |
a5af23fe | 3925 | |
e9a25f70 JL |
3926 | default: |
3927 | break; | |
2b59419a JW |
3928 | } |
3929 | ||
3930 | fmt = GET_RTX_FORMAT (code); | |
3931 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
3932 | { | |
3933 | if (fmt[i] == 'e') | |
ed5bb68d | 3934 | XEXP (x, i) = remap_split_bivs (loop, XEXP (x, i)); |
d4757e6a | 3935 | else if (fmt[i] == 'E') |
2b59419a | 3936 | { |
b3694847 | 3937 | int j; |
2b59419a | 3938 | for (j = 0; j < XVECLEN (x, i); j++) |
ed5bb68d | 3939 | XVECEXP (x, i, j) = remap_split_bivs (loop, XVECEXP (x, i, j)); |
2b59419a JW |
3940 | } |
3941 | } | |
3942 | return x; | |
3943 | } | |
1fe33d17 JW |
3944 | |
3945 | /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g. | |
3946 | FIST_UID is always executed if LAST_UID is), then return 1. Otherwise | |
3947 | return 0. COPY_START is where we can start looking for the insns | |
3948 | FIRST_UID and LAST_UID. COPY_END is where we stop looking for these | |
3949 | insns. | |
3950 | ||
3951 | If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID | |
3952 | must dominate LAST_UID. | |
3953 | ||
3954 | If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID | |
3955 | may not dominate LAST_UID. | |
3956 | ||
3957 | If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID | |
3958 | must dominate LAST_UID. */ | |
3959 | ||
3960 | int | |
2e1eedd6 AJ |
3961 | set_dominates_use (int regno, int first_uid, int last_uid, rtx copy_start, |
3962 | rtx copy_end) | |
1fe33d17 JW |
3963 | { |
3964 | int passed_jump = 0; | |
3965 | rtx p = NEXT_INSN (copy_start); | |
3966 | ||
3967 | while (INSN_UID (p) != first_uid) | |
3968 | { | |
4b4bf941 | 3969 | if (JUMP_P (p)) |
a86dc4a3 | 3970 | passed_jump = 1; |
1fe33d17 JW |
3971 | /* Could not find FIRST_UID. */ |
3972 | if (p == copy_end) | |
3973 | return 0; | |
3974 | p = NEXT_INSN (p); | |
3975 | } | |
3976 | ||
3977 | /* Verify that FIRST_UID is an insn that entirely sets REGNO. */ | |
2c3c49de | 3978 | if (! INSN_P (p) || ! dead_or_set_regno_p (p, regno)) |
1fe33d17 JW |
3979 | return 0; |
3980 | ||
3981 | /* FIRST_UID is always executed. */ | |
3982 | if (passed_jump == 0) | |
3983 | return 1; | |
3984 | ||
3985 | while (INSN_UID (p) != last_uid) | |
3986 | { | |
3987 | /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we | |
3988 | can not be sure that FIRST_UID dominates LAST_UID. */ | |
4b4bf941 | 3989 | if (LABEL_P (p)) |
1fe33d17 | 3990 | return 0; |
6b857f04 JW |
3991 | /* Could not find LAST_UID, but we reached the end of the loop, so |
3992 | it must be safe. */ | |
3993 | else if (p == copy_end) | |
3994 | return 1; | |
1fe33d17 JW |
3995 | p = NEXT_INSN (p); |
3996 | } | |
3997 | ||
3998 | /* FIRST_UID is always executed if LAST_UID is executed. */ | |
3999 | return 1; | |
4000 | } | |
4598ffe9 CM |
4001 | |
4002 | /* This routine is called when the number of iterations for the unrolled | |
4003 | loop is one. The goal is to identify a loop that begins with an | |
4004 | unconditional branch to the loop continuation note (or a label just after). | |
4005 | In this case, the unconditional branch that starts the loop needs to be | |
4006 | deleted so that we execute the single iteration. */ | |
a86dc4a3 | 4007 | |
4598ffe9 | 4008 | static rtx |
2e1eedd6 | 4009 | ujump_to_loop_cont (rtx loop_start, rtx loop_cont) |
4598ffe9 CM |
4010 | { |
4011 | rtx x, label, label_ref; | |
4012 | ||
4013 | /* See if loop start, or the next insn is an unconditional jump. */ | |
4014 | loop_start = next_nonnote_insn (loop_start); | |
4015 | ||
4016 | x = pc_set (loop_start); | |
4017 | if (!x) | |
4018 | return NULL_RTX; | |
4019 | ||
4020 | label_ref = SET_SRC (x); | |
4021 | if (!label_ref) | |
4022 | return NULL_RTX; | |
4023 | ||
4024 | /* Examine insn after loop continuation note. Return if not a label. */ | |
4025 | label = next_nonnote_insn (loop_cont); | |
4b4bf941 | 4026 | if (label == 0 || !LABEL_P (label)) |
4598ffe9 CM |
4027 | return NULL_RTX; |
4028 | ||
4029 | /* Return the loop start if the branch label matches the code label. */ | |
a86dc4a3 | 4030 | if (CODE_LABEL_NUMBER (label) == CODE_LABEL_NUMBER (XEXP (label_ref, 0))) |
4598ffe9 CM |
4031 | return loop_start; |
4032 | else | |
4033 | return NULL_RTX; | |
4598ffe9 | 4034 | } |