]>
Commit | Line | Data |
---|---|---|
42a3a38b | 1 | /* RTL-based forward propagation pass for GNU compiler. |
3072d30e | 2 | Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. |
42a3a38b | 3 | Contributed by Paolo Bonzini and Steven Bosscher. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
42a3a38b | 10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
42a3a38b | 20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "toplev.h" | |
26 | ||
27 | #include "timevar.h" | |
28 | #include "rtl.h" | |
29 | #include "tm_p.h" | |
30 | #include "emit-rtl.h" | |
31 | #include "insn-config.h" | |
32 | #include "recog.h" | |
33 | #include "flags.h" | |
34 | #include "obstack.h" | |
35 | #include "basic-block.h" | |
36 | #include "output.h" | |
37 | #include "df.h" | |
38 | #include "target.h" | |
39 | #include "cfgloop.h" | |
40 | #include "tree-pass.h" | |
41 | ||
42 | ||
43 | /* This pass does simple forward propagation and simplification when an | |
44 | operand of an insn can only come from a single def. This pass uses | |
45 | df.c, so it is global. However, we only do limited analysis of | |
46 | available expressions. | |
47 | ||
48 | 1) The pass tries to propagate the source of the def into the use, | |
49 | and checks if the result is independent of the substituted value. | |
50 | For example, the high word of a (zero_extend:DI (reg:SI M)) is always | |
51 | zero, independent of the source register. | |
52 | ||
53 | In particular, we propagate constants into the use site. Sometimes | |
54 | RTL expansion did not put the constant in the same insn on purpose, | |
55 | to satisfy a predicate, and the result will fail to be recognized; | |
56 | but this happens rarely and in this case we can still create a | |
57 | REG_EQUAL note. For multi-word operations, this | |
58 | ||
59 | (set (subreg:SI (reg:DI 120) 0) (const_int 0)) | |
60 | (set (subreg:SI (reg:DI 120) 4) (const_int -1)) | |
61 | (set (subreg:SI (reg:DI 122) 0) | |
62 | (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) | |
63 | (set (subreg:SI (reg:DI 122) 4) | |
64 | (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) | |
65 | ||
66 | can be simplified to the much simpler | |
67 | ||
68 | (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119))) | |
69 | (set (subreg:SI (reg:DI 122) 4) (const_int -1)) | |
70 | ||
71 | This particular propagation is also effective at putting together | |
72 | complex addressing modes. We are more aggressive inside MEMs, in | |
73 | that all definitions are propagated if the use is in a MEM; if the | |
74 | result is a valid memory address we check address_cost to decide | |
75 | whether the substitution is worthwhile. | |
76 | ||
77 | 2) The pass propagates register copies. This is not as effective as | |
78 | the copy propagation done by CSE's canon_reg, which works by walking | |
79 | the instruction chain, it can help the other transformations. | |
80 | ||
81 | We should consider removing this optimization, and instead reorder the | |
82 | RTL passes, because GCSE does this transformation too. With some luck, | |
83 | the CSE pass at the end of rest_of_handle_gcse could also go away. | |
84 | ||
85 | 3) The pass looks for paradoxical subregs that are actually unnecessary. | |
86 | Things like this: | |
87 | ||
88 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) | |
89 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) | |
90 | (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) | |
91 | (subreg:SI (reg:QI 121) 0))) | |
92 | ||
93 | are very common on machines that can only do word-sized operations. | |
94 | For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), | |
95 | if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0), | |
96 | we can replace the paradoxical subreg with simply (reg:WIDE M). The | |
97 | above will simplify this to | |
98 | ||
99 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) | |
100 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) | |
101 | (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) | |
102 | ||
103 | where the first two insns are now dead. */ | |
104 | ||
105 | ||
42a3a38b | 106 | static int num_changes; |
107 | ||
108 | \f | |
109 | /* Do not try to replace constant addresses or addresses of local and | |
110 | argument slots. These MEM expressions are made only once and inserted | |
111 | in many instructions, as well as being used to control symbol table | |
112 | output. It is not safe to clobber them. | |
113 | ||
114 | There are some uncommon cases where the address is already in a register | |
115 | for some reason, but we cannot take advantage of that because we have | |
116 | no easy way to unshare the MEM. In addition, looking up all stack | |
117 | addresses is costly. */ | |
118 | ||
119 | static bool | |
120 | can_simplify_addr (rtx addr) | |
121 | { | |
122 | rtx reg; | |
123 | ||
124 | if (CONSTANT_ADDRESS_P (addr)) | |
125 | return false; | |
126 | ||
127 | if (GET_CODE (addr) == PLUS) | |
128 | reg = XEXP (addr, 0); | |
129 | else | |
130 | reg = addr; | |
131 | ||
132 | return (!REG_P (reg) | |
133 | || (REGNO (reg) != FRAME_POINTER_REGNUM | |
134 | && REGNO (reg) != HARD_FRAME_POINTER_REGNUM | |
135 | && REGNO (reg) != ARG_POINTER_REGNUM)); | |
136 | } | |
137 | ||
138 | /* Returns a canonical version of X for the address, from the point of view, | |
139 | that all multiplications are represented as MULT instead of the multiply | |
140 | by a power of 2 being represented as ASHIFT. | |
141 | ||
142 | Every ASHIFT we find has been made by simplify_gen_binary and was not | |
143 | there before, so it is not shared. So we can do this in place. */ | |
144 | ||
145 | static void | |
146 | canonicalize_address (rtx x) | |
147 | { | |
148 | for (;;) | |
149 | switch (GET_CODE (x)) | |
150 | { | |
151 | case ASHIFT: | |
152 | if (GET_CODE (XEXP (x, 1)) == CONST_INT | |
153 | && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x)) | |
154 | && INTVAL (XEXP (x, 1)) >= 0) | |
155 | { | |
156 | HOST_WIDE_INT shift = INTVAL (XEXP (x, 1)); | |
157 | PUT_CODE (x, MULT); | |
158 | XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift, | |
159 | GET_MODE (x)); | |
160 | } | |
161 | ||
162 | x = XEXP (x, 0); | |
163 | break; | |
164 | ||
165 | case PLUS: | |
166 | if (GET_CODE (XEXP (x, 0)) == PLUS | |
167 | || GET_CODE (XEXP (x, 0)) == ASHIFT | |
168 | || GET_CODE (XEXP (x, 0)) == CONST) | |
169 | canonicalize_address (XEXP (x, 0)); | |
170 | ||
171 | x = XEXP (x, 1); | |
172 | break; | |
173 | ||
174 | case CONST: | |
175 | x = XEXP (x, 0); | |
176 | break; | |
177 | ||
178 | default: | |
179 | return; | |
180 | } | |
181 | } | |
182 | ||
183 | /* OLD is a memory address. Return whether it is good to use NEW instead, | |
184 | for a memory access in the given MODE. */ | |
185 | ||
186 | static bool | |
187 | should_replace_address (rtx old, rtx new, enum machine_mode mode) | |
188 | { | |
189 | int gain; | |
190 | ||
191 | if (rtx_equal_p (old, new) || !memory_address_p (mode, new)) | |
192 | return false; | |
193 | ||
194 | /* Copy propagation is always ok. */ | |
195 | if (REG_P (old) && REG_P (new)) | |
196 | return true; | |
197 | ||
198 | /* Prefer the new address if it is less expensive. */ | |
199 | gain = address_cost (old, mode) - address_cost (new, mode); | |
200 | ||
201 | /* If the addresses have equivalent cost, prefer the new address | |
202 | if it has the highest `rtx_cost'. That has the potential of | |
203 | eliminating the most insns without additional costs, and it | |
204 | is the same that cse.c used to do. */ | |
205 | if (gain == 0) | |
206 | gain = rtx_cost (new, SET) - rtx_cost (old, SET); | |
207 | ||
208 | return (gain > 0); | |
209 | } | |
210 | ||
211 | /* Replace all occurrences of OLD in *PX with NEW and try to simplify the | |
212 | resulting expression. Replace *PX with a new RTL expression if an | |
213 | occurrence of OLD was found. | |
214 | ||
215 | If CAN_APPEAR is true, we always return true; if it is false, we | |
216 | can return false if, for at least one occurrence OLD, we failed to | |
217 | collapse the result to a constant. For example, (mult:M (reg:M A) | |
218 | (minus:M (reg:M B) (reg:M A))) may collapse to zero if replacing | |
219 | (reg:M B) with (reg:M A). | |
220 | ||
221 | CAN_APPEAR is disregarded inside MEMs: in that case, we always return | |
222 | true if the simplification is a cheaper and valid memory address. | |
223 | ||
224 | This is only a wrapper around simplify-rtx.c: do not add any pattern | |
225 | matching code here. (The sole exception is the handling of LO_SUM, but | |
226 | that is because there is no simplify_gen_* function for LO_SUM). */ | |
227 | ||
228 | static bool | |
229 | propagate_rtx_1 (rtx *px, rtx old, rtx new, bool can_appear) | |
230 | { | |
231 | rtx x = *px, tem = NULL_RTX, op0, op1, op2; | |
232 | enum rtx_code code = GET_CODE (x); | |
233 | enum machine_mode mode = GET_MODE (x); | |
234 | enum machine_mode op_mode; | |
235 | bool valid_ops = true; | |
236 | ||
237 | /* If X is OLD_RTX, return NEW_RTX. Otherwise, if this is an expression, | |
238 | try to build a new expression from recursive substitution. */ | |
239 | ||
240 | if (x == old) | |
241 | { | |
242 | *px = new; | |
243 | return can_appear; | |
244 | } | |
245 | ||
246 | switch (GET_RTX_CLASS (code)) | |
247 | { | |
248 | case RTX_UNARY: | |
249 | op0 = XEXP (x, 0); | |
250 | op_mode = GET_MODE (op0); | |
251 | valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); | |
252 | if (op0 == XEXP (x, 0)) | |
253 | return true; | |
254 | tem = simplify_gen_unary (code, mode, op0, op_mode); | |
255 | break; | |
256 | ||
257 | case RTX_BIN_ARITH: | |
258 | case RTX_COMM_ARITH: | |
259 | op0 = XEXP (x, 0); | |
260 | op1 = XEXP (x, 1); | |
261 | valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); | |
262 | valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); | |
263 | if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) | |
264 | return true; | |
265 | tem = simplify_gen_binary (code, mode, op0, op1); | |
266 | break; | |
267 | ||
268 | case RTX_COMPARE: | |
269 | case RTX_COMM_COMPARE: | |
270 | op0 = XEXP (x, 0); | |
271 | op1 = XEXP (x, 1); | |
272 | op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); | |
273 | valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); | |
274 | valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); | |
275 | if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) | |
276 | return true; | |
277 | tem = simplify_gen_relational (code, mode, op_mode, op0, op1); | |
278 | break; | |
279 | ||
280 | case RTX_TERNARY: | |
281 | case RTX_BITFIELD_OPS: | |
282 | op0 = XEXP (x, 0); | |
283 | op1 = XEXP (x, 1); | |
284 | op2 = XEXP (x, 2); | |
285 | op_mode = GET_MODE (op0); | |
286 | valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); | |
287 | valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); | |
288 | valid_ops &= propagate_rtx_1 (&op2, old, new, can_appear); | |
289 | if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) | |
290 | return true; | |
291 | if (op_mode == VOIDmode) | |
292 | op_mode = GET_MODE (op0); | |
293 | tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2); | |
294 | break; | |
295 | ||
296 | case RTX_EXTRA: | |
297 | /* The only case we try to handle is a SUBREG. */ | |
298 | if (code == SUBREG) | |
299 | { | |
300 | op0 = XEXP (x, 0); | |
301 | valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); | |
302 | if (op0 == XEXP (x, 0)) | |
303 | return true; | |
304 | tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), | |
305 | SUBREG_BYTE (x)); | |
306 | } | |
307 | break; | |
308 | ||
309 | case RTX_OBJ: | |
310 | if (code == MEM && x != new) | |
311 | { | |
312 | rtx new_op0; | |
313 | op0 = XEXP (x, 0); | |
314 | ||
315 | /* There are some addresses that we cannot work on. */ | |
316 | if (!can_simplify_addr (op0)) | |
317 | return true; | |
318 | ||
319 | op0 = new_op0 = targetm.delegitimize_address (op0); | |
320 | valid_ops &= propagate_rtx_1 (&new_op0, old, new, true); | |
321 | ||
322 | /* Dismiss transformation that we do not want to carry on. */ | |
323 | if (!valid_ops | |
324 | || new_op0 == op0 | |
deb3d513 | 325 | || !(GET_MODE (new_op0) == GET_MODE (op0) |
326 | || GET_MODE (new_op0) == VOIDmode)) | |
42a3a38b | 327 | return true; |
328 | ||
329 | canonicalize_address (new_op0); | |
330 | ||
331 | /* Copy propagations are always ok. Otherwise check the costs. */ | |
332 | if (!(REG_P (old) && REG_P (new)) | |
333 | && !should_replace_address (op0, new_op0, GET_MODE (x))) | |
334 | return true; | |
335 | ||
336 | tem = replace_equiv_address_nv (x, new_op0); | |
337 | } | |
338 | ||
339 | else if (code == LO_SUM) | |
340 | { | |
341 | op0 = XEXP (x, 0); | |
342 | op1 = XEXP (x, 1); | |
343 | ||
344 | /* The only simplification we do attempts to remove references to op0 | |
345 | or make it constant -- in both cases, op0's invalidity will not | |
346 | make the result invalid. */ | |
347 | propagate_rtx_1 (&op0, old, new, true); | |
348 | valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); | |
349 | if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) | |
350 | return true; | |
351 | ||
352 | /* (lo_sum (high x) x) -> x */ | |
353 | if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1)) | |
354 | tem = op1; | |
355 | else | |
356 | tem = gen_rtx_LO_SUM (mode, op0, op1); | |
357 | ||
358 | /* OP1 is likely not a legitimate address, otherwise there would have | |
359 | been no LO_SUM. We want it to disappear if it is invalid, return | |
360 | false in that case. */ | |
361 | return memory_address_p (mode, tem); | |
362 | } | |
363 | ||
364 | else if (code == REG) | |
365 | { | |
366 | if (rtx_equal_p (x, old)) | |
367 | { | |
368 | *px = new; | |
369 | return can_appear; | |
370 | } | |
371 | } | |
372 | break; | |
373 | ||
374 | default: | |
375 | break; | |
376 | } | |
377 | ||
378 | /* No change, no trouble. */ | |
379 | if (tem == NULL_RTX) | |
380 | return true; | |
381 | ||
382 | *px = tem; | |
383 | ||
384 | /* The replacement we made so far is valid, if all of the recursive | |
385 | replacements were valid, or we could simplify everything to | |
386 | a constant. */ | |
387 | return valid_ops || can_appear || CONSTANT_P (tem); | |
388 | } | |
389 | ||
390 | /* Replace all occurrences of OLD in X with NEW and try to simplify the | |
fa7637bd | 391 | resulting expression (in mode MODE). Return a new expression if it is |
42a3a38b | 392 | a constant, otherwise X. |
393 | ||
394 | Simplifications where occurrences of NEW collapse to a constant are always | |
395 | accepted. All simplifications are accepted if NEW is a pseudo too. | |
396 | Otherwise, we accept simplifications that have a lower or equal cost. */ | |
397 | ||
398 | static rtx | |
399 | propagate_rtx (rtx x, enum machine_mode mode, rtx old, rtx new) | |
400 | { | |
401 | rtx tem; | |
402 | bool collapsed; | |
403 | ||
404 | if (REG_P (new) && REGNO (new) < FIRST_PSEUDO_REGISTER) | |
405 | return NULL_RTX; | |
406 | ||
407 | new = copy_rtx (new); | |
408 | ||
409 | tem = x; | |
410 | collapsed = propagate_rtx_1 (&tem, old, new, REG_P (new) || CONSTANT_P (new)); | |
411 | if (tem == x || !collapsed) | |
412 | return NULL_RTX; | |
413 | ||
414 | /* gen_lowpart_common will not be able to process VOIDmode entities other | |
415 | than CONST_INTs. */ | |
416 | if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT) | |
417 | return NULL_RTX; | |
418 | ||
419 | if (GET_MODE (tem) == VOIDmode) | |
420 | tem = rtl_hooks.gen_lowpart_no_emit (mode, tem); | |
421 | else | |
422 | gcc_assert (GET_MODE (tem) == mode); | |
423 | ||
424 | return tem; | |
425 | } | |
426 | ||
427 | ||
428 | \f | |
429 | ||
430 | /* Return true if the register from reference REF is killed | |
431 | between FROM to (but not including) TO. */ | |
432 | ||
3072d30e | 433 | static bool |
42a3a38b | 434 | local_ref_killed_between_p (struct df_ref * ref, rtx from, rtx to) |
435 | { | |
436 | rtx insn; | |
42a3a38b | 437 | |
438 | for (insn = from; insn != to; insn = NEXT_INSN (insn)) | |
439 | { | |
3072d30e | 440 | struct df_ref **def_rec; |
42a3a38b | 441 | if (!INSN_P (insn)) |
442 | continue; | |
443 | ||
3072d30e | 444 | for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) |
42a3a38b | 445 | { |
3072d30e | 446 | struct df_ref *def = *def_rec; |
42a3a38b | 447 | if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) |
448 | return true; | |
42a3a38b | 449 | } |
450 | } | |
451 | return false; | |
452 | } | |
453 | ||
454 | ||
455 | /* Check if the given DEF is available in INSN. This would require full | |
456 | computation of available expressions; we check only restricted conditions: | |
457 | - if DEF is the sole definition of its register, go ahead; | |
458 | - in the same basic block, we check for no definitions killing the | |
459 | definition of DEF_INSN; | |
460 | - if USE's basic block has DEF's basic block as the sole predecessor, | |
461 | we check if the definition is killed after DEF_INSN or before | |
462 | TARGET_INSN insn, in their respective basic blocks. */ | |
463 | static bool | |
464 | use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn) | |
465 | { | |
a39fe687 | 466 | basic_block def_bb = BLOCK_FOR_INSN (def_insn); |
467 | basic_block target_bb = BLOCK_FOR_INSN (target_insn); | |
42a3a38b | 468 | int regno; |
469 | struct df_ref * def; | |
470 | ||
a39fe687 | 471 | /* In some obscure situations we can have a def reaching a use |
472 | that is _before_ the def. In other words the def does not | |
473 | dominate the use even though the use and def are in the same | |
474 | basic block. This can happen when a register may be used | |
475 | uninitialized in a loop. In such cases, we must assume that | |
476 | DEF is not available. */ | |
477 | if (def_bb == target_bb | |
3072d30e | 478 | ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn) |
a39fe687 | 479 | : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb)) |
480 | return true; | |
481 | ||
42a3a38b | 482 | /* Check if the reg in USE has only one definition. We already |
483 | know that this definition reaches use, or we wouldn't be here. */ | |
484 | regno = DF_REF_REGNO (use); | |
3072d30e | 485 | def = DF_REG_DEF_CHAIN (regno); |
42a3a38b | 486 | if (def && (def->next_reg == NULL)) |
487 | return false; | |
488 | ||
a39fe687 | 489 | /* Check locally if we are in the same basic block. */ |
42a3a38b | 490 | if (def_bb == target_bb) |
a39fe687 | 491 | return local_ref_killed_between_p (use, def_insn, target_insn); |
42a3a38b | 492 | |
493 | /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */ | |
494 | if (single_pred_p (target_bb) | |
495 | && single_pred (target_bb) == def_bb) | |
496 | { | |
497 | struct df_ref *x; | |
498 | ||
499 | /* See if USE is killed between DEF_INSN and the last insn in the | |
500 | basic block containing DEF_INSN. */ | |
3072d30e | 501 | x = df_bb_regno_last_def_find (def_bb, regno); |
502 | if (x && DF_INSN_LUID (x->insn) >= DF_INSN_LUID (def_insn)) | |
42a3a38b | 503 | return true; |
504 | ||
505 | /* See if USE is killed between TARGET_INSN and the first insn in the | |
506 | basic block containing TARGET_INSN. */ | |
3072d30e | 507 | x = df_bb_regno_first_def_find (target_bb, regno); |
508 | if (x && DF_INSN_LUID (x->insn) < DF_INSN_LUID (target_insn)) | |
42a3a38b | 509 | return true; |
510 | ||
511 | return false; | |
512 | } | |
513 | ||
514 | /* Otherwise assume the worst case. */ | |
515 | return true; | |
516 | } | |
517 | ||
518 | ||
519 | /* for_each_rtx traversal function that returns 1 if BODY points to | |
520 | a non-constant mem. */ | |
521 | ||
522 | static int | |
523 | varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED) | |
524 | { | |
525 | rtx x = *body; | |
526 | return MEM_P (x) && !MEM_READONLY_P (x); | |
527 | } | |
3072d30e | 528 | |
42a3a38b | 529 | /* Check if all uses in DEF_INSN can be used in TARGET_INSN. This |
530 | would require full computation of available expressions; | |
531 | we check only restricted conditions, see use_killed_between. */ | |
532 | static bool | |
533 | all_uses_available_at (rtx def_insn, rtx target_insn) | |
534 | { | |
3072d30e | 535 | struct df_ref **use_rec; |
42a3a38b | 536 | rtx def_set = single_set (def_insn); |
537 | ||
538 | gcc_assert (def_set); | |
539 | ||
540 | /* If target_insn comes right after def_insn, which is very common | |
541 | for addresses, we can use a quicker test. */ | |
542 | if (NEXT_INSN (def_insn) == target_insn | |
543 | && REG_P (SET_DEST (def_set))) | |
544 | { | |
545 | rtx def_reg = SET_DEST (def_set); | |
546 | ||
547 | /* If the insn uses the reg that it defines, the substitution is | |
548 | invalid. */ | |
3072d30e | 549 | for (use_rec = DF_INSN_USES (def_insn); *use_rec; use_rec++) |
550 | { | |
551 | struct df_ref *use = *use_rec; | |
552 | if (rtx_equal_p (DF_REF_REG (use), def_reg)) | |
553 | return false; | |
554 | } | |
555 | for (use_rec = DF_INSN_EQ_USES (def_insn); *use_rec; use_rec++) | |
556 | { | |
557 | struct df_ref *use = *use_rec; | |
558 | if (rtx_equal_p (use->reg, def_reg)) | |
559 | return false; | |
560 | } | |
42a3a38b | 561 | } |
562 | else | |
563 | { | |
564 | /* Look at all the uses of DEF_INSN, and see if they are not | |
565 | killed between DEF_INSN and TARGET_INSN. */ | |
3072d30e | 566 | for (use_rec = DF_INSN_USES (def_insn); *use_rec; use_rec++) |
567 | { | |
568 | struct df_ref *use = *use_rec; | |
569 | if (use_killed_between (use, def_insn, target_insn)) | |
570 | return false; | |
571 | } | |
572 | for (use_rec = DF_INSN_EQ_USES (def_insn); *use_rec; use_rec++) | |
573 | { | |
574 | struct df_ref *use = *use_rec; | |
575 | if (use_killed_between (use, def_insn, target_insn)) | |
576 | return false; | |
577 | } | |
42a3a38b | 578 | } |
579 | ||
580 | /* We don't do any analysis of memories or aliasing. Reject any | |
581 | instruction that involves references to non-constant memory. */ | |
582 | return !for_each_rtx (&SET_SRC (def_set), varying_mem_p, NULL); | |
583 | } | |
584 | ||
585 | \f | |
586 | struct find_occurrence_data | |
587 | { | |
588 | rtx find; | |
589 | rtx *retval; | |
590 | }; | |
591 | ||
592 | /* Callback for for_each_rtx, used in find_occurrence. | |
593 | See if PX is the rtx we have to find. Return 1 to stop for_each_rtx | |
594 | if successful, or 0 to continue traversing otherwise. */ | |
595 | ||
596 | static int | |
597 | find_occurrence_callback (rtx *px, void *data) | |
598 | { | |
599 | struct find_occurrence_data *fod = (struct find_occurrence_data *) data; | |
600 | rtx x = *px; | |
601 | rtx find = fod->find; | |
602 | ||
603 | if (x == find) | |
604 | { | |
605 | fod->retval = px; | |
606 | return 1; | |
607 | } | |
608 | ||
609 | return 0; | |
610 | } | |
611 | ||
612 | /* Return a pointer to one of the occurrences of register FIND in *PX. */ | |
613 | ||
614 | static rtx * | |
615 | find_occurrence (rtx *px, rtx find) | |
616 | { | |
617 | struct find_occurrence_data data; | |
618 | ||
619 | gcc_assert (REG_P (find) | |
620 | || (GET_CODE (find) == SUBREG | |
621 | && REG_P (SUBREG_REG (find)))); | |
622 | ||
623 | data.find = find; | |
624 | data.retval = NULL; | |
625 | for_each_rtx (px, find_occurrence_callback, &data); | |
626 | return data.retval; | |
627 | } | |
628 | ||
629 | \f | |
630 | /* Inside INSN, the expression rooted at *LOC has been changed, moving some | |
3072d30e | 631 | uses from USE_VEC. Find those that are present, and create new items |
42a3a38b | 632 | in the data flow object of the pass. Mark any new uses as having the |
633 | given TYPE. */ | |
634 | static void | |
3072d30e | 635 | update_df (rtx insn, rtx *loc, struct df_ref **use_rec, enum df_ref_type type, |
42a3a38b | 636 | int new_flags) |
637 | { | |
3072d30e | 638 | bool changed = false; |
42a3a38b | 639 | |
640 | /* Add a use for the registers that were propagated. */ | |
3072d30e | 641 | while (*use_rec) |
42a3a38b | 642 | { |
3072d30e | 643 | struct df_ref *use = *use_rec; |
42a3a38b | 644 | struct df_ref *orig_use = use, *new_use; |
645 | rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use)); | |
3072d30e | 646 | use_rec++; |
42a3a38b | 647 | |
648 | if (!new_loc) | |
649 | continue; | |
650 | ||
651 | /* Add a new insn use. Use the original type, because it says if the | |
652 | use was within a MEM. */ | |
3072d30e | 653 | new_use = df_ref_create (DF_REF_REG (orig_use), new_loc, |
42a3a38b | 654 | insn, BLOCK_FOR_INSN (insn), |
655 | type, DF_REF_FLAGS (orig_use) | new_flags); | |
656 | ||
657 | /* Set up the use-def chain. */ | |
3072d30e | 658 | df_chain_copy (new_use, DF_REF_CHAIN (orig_use)); |
659 | changed = true; | |
42a3a38b | 660 | } |
3072d30e | 661 | if (changed) |
662 | df_insn_rescan (insn); | |
42a3a38b | 663 | } |
664 | ||
665 | ||
666 | /* Try substituting NEW into LOC, which originated from forward propagation | |
667 | of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are | |
668 | substituting the whole SET_SRC, so we can set a REG_EQUAL note if the | |
669 | new insn is not recognized. Return whether the substitution was | |
670 | performed. */ | |
671 | ||
672 | static bool | |
673 | try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new, rtx def_insn, bool set_reg_equal) | |
674 | { | |
675 | rtx insn = DF_REF_INSN (use); | |
676 | enum df_ref_type type = DF_REF_TYPE (use); | |
677 | int flags = DF_REF_FLAGS (use); | |
678 | ||
679 | if (dump_file) | |
680 | { | |
681 | fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn)); | |
682 | print_inline_rtx (dump_file, *loc, 2); | |
683 | fprintf (dump_file, "\n with "); | |
684 | print_inline_rtx (dump_file, new, 2); | |
685 | fprintf (dump_file, "\n"); | |
686 | } | |
687 | ||
11d686e2 | 688 | if (validate_unshare_change (insn, loc, new, false)) |
42a3a38b | 689 | { |
690 | num_changes++; | |
691 | if (dump_file) | |
692 | fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn)); | |
693 | ||
3072d30e | 694 | df_ref_remove (use); |
42a3a38b | 695 | if (!CONSTANT_P (new)) |
3072d30e | 696 | { |
697 | update_df (insn, loc, DF_INSN_USES (def_insn), type, flags); | |
698 | update_df (insn, loc, DF_INSN_EQ_USES (def_insn), type, flags); | |
699 | } | |
42a3a38b | 700 | return true; |
701 | } | |
702 | else | |
703 | { | |
704 | if (dump_file) | |
705 | fprintf (dump_file, "Changes to insn %d not recognized\n", | |
706 | INSN_UID (insn)); | |
707 | ||
708 | /* Can also record a simplified value in a REG_EQUAL note, making a | |
709 | new one if one does not already exist. */ | |
710 | if (set_reg_equal) | |
711 | { | |
712 | if (dump_file) | |
713 | fprintf (dump_file, " Setting REG_EQUAL note\n"); | |
714 | ||
750a330e | 715 | set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new)); |
42a3a38b | 716 | |
750a330e | 717 | /* ??? Is this still necessary if we add the note through |
718 | set_unique_reg_note? */ | |
42a3a38b | 719 | if (!CONSTANT_P (new)) |
3072d30e | 720 | { |
721 | update_df (insn, loc, DF_INSN_USES (def_insn), | |
722 | type, DF_REF_IN_NOTE); | |
723 | update_df (insn, loc, DF_INSN_EQ_USES (def_insn), | |
724 | type, DF_REF_IN_NOTE); | |
725 | } | |
42a3a38b | 726 | } |
727 | ||
728 | return false; | |
729 | } | |
730 | } | |
731 | ||
732 | ||
733 | /* If USE is a paradoxical subreg, see if it can be replaced by a pseudo. */ | |
734 | ||
735 | static bool | |
736 | forward_propagate_subreg (struct df_ref *use, rtx def_insn, rtx def_set) | |
737 | { | |
738 | rtx use_reg = DF_REF_REG (use); | |
739 | rtx use_insn, src; | |
740 | ||
741 | /* Only consider paradoxical subregs... */ | |
742 | enum machine_mode use_mode = GET_MODE (use_reg); | |
743 | if (GET_CODE (use_reg) != SUBREG | |
744 | || !REG_P (SET_DEST (def_set)) | |
745 | || GET_MODE_SIZE (use_mode) | |
746 | <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg)))) | |
747 | return false; | |
748 | ||
749 | /* If this is a paradoxical SUBREG, we have no idea what value the | |
750 | extra bits would have. However, if the operand is equivalent to | |
751 | a SUBREG whose operand is the same as our mode, and all the modes | |
752 | are within a word, we can just use the inner operand because | |
753 | these SUBREGs just say how to treat the register. */ | |
754 | use_insn = DF_REF_INSN (use); | |
755 | src = SET_SRC (def_set); | |
756 | if (GET_CODE (src) == SUBREG | |
757 | && REG_P (SUBREG_REG (src)) | |
758 | && GET_MODE (SUBREG_REG (src)) == use_mode | |
759 | && subreg_lowpart_p (src) | |
760 | && all_uses_available_at (def_insn, use_insn)) | |
761 | return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src), | |
762 | def_insn, false); | |
763 | else | |
764 | return false; | |
765 | } | |
766 | ||
767 | /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the | |
768 | result. */ | |
769 | ||
770 | static bool | |
771 | forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) | |
772 | { | |
773 | rtx use_insn = DF_REF_INSN (use); | |
774 | rtx use_set = single_set (use_insn); | |
775 | rtx src, reg, new, *loc; | |
776 | bool set_reg_equal; | |
777 | enum machine_mode mode; | |
778 | ||
779 | if (!use_set) | |
780 | return false; | |
781 | ||
782 | /* Do not propagate into PC, CC0, etc. */ | |
783 | if (GET_MODE (SET_DEST (use_set)) == VOIDmode) | |
784 | return false; | |
785 | ||
786 | /* If def and use are subreg, check if they match. */ | |
787 | reg = DF_REF_REG (use); | |
788 | if (GET_CODE (reg) == SUBREG | |
789 | && GET_CODE (SET_DEST (def_set)) == SUBREG | |
790 | && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg) | |
791 | || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))) | |
792 | return false; | |
793 | ||
794 | /* Check if the def had a subreg, but the use has the whole reg. */ | |
795 | if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) | |
796 | return false; | |
797 | ||
798 | /* Check if the use has a subreg, but the def had the whole reg. Unlike the | |
799 | previous case, the optimization is possible and often useful indeed. */ | |
800 | if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) | |
801 | reg = SUBREG_REG (reg); | |
802 | ||
803 | /* Check if the substitution is valid (last, because it's the most | |
804 | expensive check!). */ | |
805 | src = SET_SRC (def_set); | |
806 | if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn)) | |
807 | return false; | |
808 | ||
809 | /* Check if the def is loading something from the constant pool; in this | |
810 | case we would undo optimization such as compress_float_constant. | |
811 | Still, we can set a REG_EQUAL note. */ | |
812 | if (MEM_P (src) && MEM_READONLY_P (src)) | |
813 | { | |
814 | rtx x = avoid_constant_pool_reference (src); | |
815 | if (x != src) | |
816 | { | |
817 | rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); | |
818 | rtx old = note ? XEXP (note, 0) : SET_SRC (use_set); | |
819 | rtx new = simplify_replace_rtx (old, src, x); | |
3072d30e | 820 | if (old != new) |
42a3a38b | 821 | set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new)); |
822 | } | |
823 | return false; | |
824 | } | |
825 | ||
826 | /* Else try simplifying. */ | |
827 | ||
828 | if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE) | |
829 | { | |
830 | loc = &SET_DEST (use_set); | |
831 | set_reg_equal = false; | |
832 | } | |
833 | else | |
834 | { | |
835 | rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); | |
836 | if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) | |
837 | loc = &XEXP (note, 0); | |
838 | else | |
839 | loc = &SET_SRC (use_set); | |
3072d30e | 840 | |
42a3a38b | 841 | /* Do not replace an existing REG_EQUAL note if the insn is not |
842 | recognized. Either we're already replacing in the note, or | |
843 | we'll separately try plugging the definition in the note and | |
844 | simplifying. */ | |
845 | set_reg_equal = (note == NULL_RTX); | |
846 | } | |
847 | ||
848 | if (GET_MODE (*loc) == VOIDmode) | |
849 | mode = GET_MODE (SET_DEST (use_set)); | |
850 | else | |
851 | mode = GET_MODE (*loc); | |
852 | ||
853 | new = propagate_rtx (*loc, mode, reg, src); | |
3072d30e | 854 | |
42a3a38b | 855 | if (!new) |
856 | return false; | |
857 | ||
858 | return try_fwprop_subst (use, loc, new, def_insn, set_reg_equal); | |
859 | } | |
860 | ||
861 | ||
862 | /* Given a use USE of an insn, if it has a single reaching | |
863 | definition, try to forward propagate it into that insn. */ | |
864 | ||
865 | static void | |
866 | forward_propagate_into (struct df_ref *use) | |
867 | { | |
868 | struct df_link *defs; | |
869 | struct df_ref *def; | |
870 | rtx def_insn, def_set, use_insn; | |
3072d30e | 871 | rtx parent; |
42a3a38b | 872 | |
873 | if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) | |
874 | return; | |
3072d30e | 875 | if (DF_REF_IS_ARTIFICIAL (use)) |
225c0ccb | 876 | return; |
42a3a38b | 877 | |
878 | /* Only consider uses that have a single definition. */ | |
879 | defs = DF_REF_CHAIN (use); | |
880 | if (!defs || defs->next) | |
881 | return; | |
882 | ||
883 | def = defs->ref; | |
884 | if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE) | |
885 | return; | |
3072d30e | 886 | if (DF_REF_IS_ARTIFICIAL (def)) |
225c0ccb | 887 | return; |
42a3a38b | 888 | |
243f24c5 | 889 | /* Do not propagate loop invariant definitions inside the loop. */ |
890 | if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father) | |
42a3a38b | 891 | return; |
892 | ||
893 | /* Check if the use is still present in the insn! */ | |
894 | use_insn = DF_REF_INSN (use); | |
895 | if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) | |
896 | parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); | |
897 | else | |
898 | parent = PATTERN (use_insn); | |
899 | ||
900 | if (!loc_mentioned_in_p (DF_REF_LOC (use), parent)) | |
901 | return; | |
902 | ||
903 | def_insn = DF_REF_INSN (def); | |
3072d30e | 904 | if (multiple_sets (def_insn)) |
905 | return; | |
42a3a38b | 906 | def_set = single_set (def_insn); |
907 | if (!def_set) | |
908 | return; | |
909 | ||
910 | /* Only try one kind of propagation. If two are possible, we'll | |
911 | do it on the following iterations. */ | |
912 | if (!forward_propagate_and_simplify (use, def_insn, def_set)) | |
913 | forward_propagate_subreg (use, def_insn, def_set); | |
914 | } | |
915 | ||
916 | \f | |
917 | static void | |
918 | fwprop_init (void) | |
919 | { | |
920 | num_changes = 0; | |
a39fe687 | 921 | calculate_dominance_info (CDI_DOMINATORS); |
42a3a38b | 922 | |
923 | /* We do not always want to propagate into loops, so we have to find | |
924 | loops and be careful about them. But we have to call flow_loops_find | |
925 | before df_analyze, because flow_loops_find may introduce new jump | |
926 | insns (sadly) if we are not working in cfglayout mode. */ | |
243f24c5 | 927 | loop_optimizer_init (0); |
42a3a38b | 928 | |
929 | /* Now set up the dataflow problem (we only want use-def chains) and | |
930 | put the dataflow solver to work. */ | |
3072d30e | 931 | df_set_flags (DF_EQ_NOTES); |
932 | df_chain_add_problem (DF_UD_CHAIN); | |
933 | df_analyze (); | |
934 | df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES); | |
935 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
42a3a38b | 936 | } |
937 | ||
938 | static void | |
939 | fwprop_done (void) | |
940 | { | |
243f24c5 | 941 | loop_optimizer_finalize (); |
3072d30e | 942 | |
a39fe687 | 943 | free_dominance_info (CDI_DOMINATORS); |
42a3a38b | 944 | cleanup_cfg (0); |
945 | delete_trivially_dead_insns (get_insns (), max_reg_num ()); | |
946 | ||
947 | if (dump_file) | |
948 | fprintf (dump_file, | |
949 | "\nNumber of successful forward propagations: %d\n\n", | |
950 | num_changes); | |
951 | } | |
952 | ||
953 | ||
954 | ||
955 | /* Main entry point. */ | |
956 | ||
957 | static bool | |
958 | gate_fwprop (void) | |
959 | { | |
960 | return optimize > 0 && flag_forward_propagate; | |
961 | } | |
962 | ||
963 | static unsigned int | |
964 | fwprop (void) | |
965 | { | |
966 | unsigned i; | |
967 | ||
968 | fwprop_init (); | |
969 | ||
970 | /* Go through all the uses. update_df will create new ones at the | |
971 | end, and we'll go through them as well. | |
972 | ||
973 | Do not forward propagate addresses into loops until after unrolling. | |
974 | CSE did so because it was able to fix its own mess, but we are not. */ | |
975 | ||
3072d30e | 976 | for (i = 0; i < DF_USES_TABLE_SIZE (); i++) |
42a3a38b | 977 | { |
3072d30e | 978 | struct df_ref *use = DF_USES_GET (i); |
42a3a38b | 979 | if (use) |
243f24c5 | 980 | if (DF_REF_TYPE (use) == DF_REF_REG_USE |
42a3a38b | 981 | || DF_REF_BB (use)->loop_father == NULL) |
982 | forward_propagate_into (use); | |
983 | } | |
984 | ||
985 | fwprop_done (); | |
42a3a38b | 986 | return 0; |
987 | } | |
988 | ||
989 | struct tree_opt_pass pass_rtl_fwprop = | |
990 | { | |
991 | "fwprop1", /* name */ | |
3072d30e | 992 | gate_fwprop, /* gate */ |
993 | fwprop, /* execute */ | |
42a3a38b | 994 | NULL, /* sub */ |
995 | NULL, /* next */ | |
996 | 0, /* static_pass_number */ | |
997 | TV_FWPROP, /* tv_id */ | |
998 | 0, /* properties_required */ | |
999 | 0, /* properties_provided */ | |
1000 | 0, /* properties_destroyed */ | |
1001 | 0, /* todo_flags_start */ | |
3072d30e | 1002 | TODO_df_finish | |
42a3a38b | 1003 | TODO_dump_func, /* todo_flags_finish */ |
1004 | 0 /* letter */ | |
1005 | }; | |
1006 | ||
42a3a38b | 1007 | static unsigned int |
1008 | fwprop_addr (void) | |
1009 | { | |
1010 | unsigned i; | |
1011 | fwprop_init (); | |
1012 | ||
1013 | /* Go through all the uses. update_df will create new ones at the | |
1014 | end, and we'll go through them as well. */ | |
3072d30e | 1015 | df_set_flags (DF_DEFER_INSN_RESCAN); |
1016 | ||
1017 | for (i = 0; i < DF_USES_TABLE_SIZE (); i++) | |
42a3a38b | 1018 | { |
3072d30e | 1019 | struct df_ref *use = DF_USES_GET (i); |
42a3a38b | 1020 | if (use) |
1021 | if (DF_REF_TYPE (use) != DF_REF_REG_USE | |
1022 | && DF_REF_BB (use)->loop_father != NULL) | |
1023 | forward_propagate_into (use); | |
1024 | } | |
1025 | ||
1026 | fwprop_done (); | |
1027 | ||
1028 | return 0; | |
1029 | } | |
1030 | ||
1031 | struct tree_opt_pass pass_rtl_fwprop_addr = | |
1032 | { | |
1033 | "fwprop2", /* name */ | |
3072d30e | 1034 | gate_fwprop, /* gate */ |
1035 | fwprop_addr, /* execute */ | |
42a3a38b | 1036 | NULL, /* sub */ |
1037 | NULL, /* next */ | |
1038 | 0, /* static_pass_number */ | |
1039 | TV_FWPROP, /* tv_id */ | |
1040 | 0, /* properties_required */ | |
1041 | 0, /* properties_provided */ | |
1042 | 0, /* properties_destroyed */ | |
1043 | 0, /* todo_flags_start */ | |
3072d30e | 1044 | TODO_df_finish | |
42a3a38b | 1045 | TODO_dump_func, /* todo_flags_finish */ |
1046 | 0 /* letter */ | |
1047 | }; |