]>
Commit | Line | Data |
---|---|---|
a52b023a | 1 | /* RTL-based forward propagation pass for GNU compiler. |
99dee823 | 2 | Copyright (C) 2005-2021 Free Software Foundation, Inc. |
a52b023a PB |
3 | Contributed by Paolo Bonzini and Steven Bosscher. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
a52b023a PB |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
a52b023a | 20 | |
0b76990a RS |
21 | #define INCLUDE_ALGORITHM |
22 | #define INCLUDE_FUNCTIONAL | |
a52b023a PB |
23 | #include "config.h" |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
c7131fb2 AM |
26 | #include "backend.h" |
27 | #include "rtl.h" | |
28 | #include "df.h" | |
0b76990a | 29 | #include "rtl-ssa.h" |
957060b5 | 30 | |
0b76990a | 31 | #include "predict.h" |
60393bbc AM |
32 | #include "cfgrtl.h" |
33 | #include "cfgcleanup.h" | |
a52b023a PB |
34 | #include "cfgloop.h" |
35 | #include "tree-pass.h" | |
aa4e2d7e | 36 | #include "rtl-iter.h" |
0b76990a | 37 | #include "target.h" |
a52b023a PB |
38 | |
39 | /* This pass does simple forward propagation and simplification when an | |
40 | operand of an insn can only come from a single def. This pass uses | |
0b76990a | 41 | RTL SSA, so it is global. However, we only do limited analysis of |
a52b023a PB |
42 | available expressions. |
43 | ||
44 | 1) The pass tries to propagate the source of the def into the use, | |
45 | and checks if the result is independent of the substituted value. | |
46 | For example, the high word of a (zero_extend:DI (reg:SI M)) is always | |
47 | zero, independent of the source register. | |
48 | ||
49 | In particular, we propagate constants into the use site. Sometimes | |
50 | RTL expansion did not put the constant in the same insn on purpose, | |
51 | to satisfy a predicate, and the result will fail to be recognized; | |
52 | but this happens rarely and in this case we can still create a | |
53 | REG_EQUAL note. For multi-word operations, this | |
54 | ||
55 | (set (subreg:SI (reg:DI 120) 0) (const_int 0)) | |
56 | (set (subreg:SI (reg:DI 120) 4) (const_int -1)) | |
57 | (set (subreg:SI (reg:DI 122) 0) | |
0b76990a | 58 | (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) |
a52b023a | 59 | (set (subreg:SI (reg:DI 122) 4) |
0b76990a | 60 | (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) |
a52b023a PB |
61 | |
62 | can be simplified to the much simpler | |
63 | ||
64 | (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119))) | |
65 | (set (subreg:SI (reg:DI 122) 4) (const_int -1)) | |
66 | ||
67 | This particular propagation is also effective at putting together | |
68 | complex addressing modes. We are more aggressive inside MEMs, in | |
69 | that all definitions are propagated if the use is in a MEM; if the | |
70 | result is a valid memory address we check address_cost to decide | |
71 | whether the substitution is worthwhile. | |
72 | ||
73 | 2) The pass propagates register copies. This is not as effective as | |
74 | the copy propagation done by CSE's canon_reg, which works by walking | |
75 | the instruction chain, it can help the other transformations. | |
76 | ||
77 | We should consider removing this optimization, and instead reorder the | |
78 | RTL passes, because GCSE does this transformation too. With some luck, | |
79 | the CSE pass at the end of rest_of_handle_gcse could also go away. | |
80 | ||
81 | 3) The pass looks for paradoxical subregs that are actually unnecessary. | |
82 | Things like this: | |
83 | ||
84 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) | |
85 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) | |
86 | (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) | |
0b76990a | 87 | (subreg:SI (reg:QI 121) 0))) |
a52b023a PB |
88 | |
89 | are very common on machines that can only do word-sized operations. | |
90 | For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), | |
91 | if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0), | |
92 | we can replace the paradoxical subreg with simply (reg:WIDE M). The | |
93 | above will simplify this to | |
94 | ||
95 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) | |
96 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) | |
97 | (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) | |
98 | ||
0b76990a | 99 | where the first two insns are now dead. */ |
a52b023a | 100 | |
0b76990a | 101 | using namespace rtl_ssa; |
a52b023a | 102 | |
a52b023a PB |
103 | static int num_changes; |
104 | ||
a52b023a PB |
105 | /* Do not try to replace constant addresses or addresses of local and |
106 | argument slots. These MEM expressions are made only once and inserted | |
107 | in many instructions, as well as being used to control symbol table | |
108 | output. It is not safe to clobber them. | |
109 | ||
110 | There are some uncommon cases where the address is already in a register | |
111 | for some reason, but we cannot take advantage of that because we have | |
112 | no easy way to unshare the MEM. In addition, looking up all stack | |
113 | addresses is costly. */ | |
114 | ||
115 | static bool | |
116 | can_simplify_addr (rtx addr) | |
117 | { | |
118 | rtx reg; | |
119 | ||
120 | if (CONSTANT_ADDRESS_P (addr)) | |
121 | return false; | |
122 | ||
123 | if (GET_CODE (addr) == PLUS) | |
124 | reg = XEXP (addr, 0); | |
125 | else | |
126 | reg = addr; | |
127 | ||
128 | return (!REG_P (reg) | |
129 | || (REGNO (reg) != FRAME_POINTER_REGNUM | |
130 | && REGNO (reg) != HARD_FRAME_POINTER_REGNUM | |
131 | && REGNO (reg) != ARG_POINTER_REGNUM)); | |
132 | } | |
133 | ||
0b76990a RS |
134 | /* MEM is the result of an address simplification, and temporarily |
135 | undoing changes OLD_NUM_CHANGES onwards restores the original address. | |
136 | Return whether it is good to use the new address instead of the | |
137 | old one. INSN is the containing instruction. */ | |
a52b023a PB |
138 | |
139 | static bool | |
0b76990a | 140 | should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn) |
a52b023a PB |
141 | { |
142 | int gain; | |
143 | ||
a52b023a | 144 | /* Prefer the new address if it is less expensive. */ |
0b76990a RS |
145 | bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); |
146 | temporarily_undo_changes (old_num_changes); | |
147 | gain = address_cost (XEXP (mem, 0), GET_MODE (mem), | |
148 | MEM_ADDR_SPACE (mem), speed); | |
149 | redo_changes (old_num_changes); | |
150 | gain -= address_cost (XEXP (mem, 0), GET_MODE (mem), | |
151 | MEM_ADDR_SPACE (mem), speed); | |
a52b023a PB |
152 | |
153 | /* If the addresses have equivalent cost, prefer the new address | |
5e8f01f4 | 154 | if it has the highest `set_src_cost'. That has the potential of |
a52b023a PB |
155 | eliminating the most insns without additional costs, and it |
156 | is the same that cse.c used to do. */ | |
157 | if (gain == 0) | |
0b76990a RS |
158 | { |
159 | gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed); | |
160 | temporarily_undo_changes (old_num_changes); | |
161 | gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed); | |
162 | redo_changes (old_num_changes); | |
163 | } | |
a52b023a PB |
164 | |
165 | return (gain > 0); | |
166 | } | |
167 | ||
460d667d | 168 | |
0b76990a RS |
169 | namespace |
170 | { | |
171 | class fwprop_propagation : public insn_propagation | |
172 | { | |
173 | public: | |
174 | static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT; | |
175 | static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1; | |
176 | static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2; | |
460d667d | 177 | |
b61461ac | 178 | fwprop_propagation (insn_info *, set_info *, rtx, rtx); |
460d667d | 179 | |
0b76990a RS |
180 | bool changed_mem_p () const { return result_flags & CHANGED_MEM; } |
181 | bool folded_to_constants_p () const; | |
182 | bool profitable_p () const; | |
f40751dd | 183 | |
0b76990a RS |
184 | bool check_mem (int, rtx) final override; |
185 | void note_simplification (int, uint16_t, rtx, rtx) final override; | |
186 | uint16_t classify_result (rtx, rtx); | |
efb6bc55 IL |
187 | |
188 | private: | |
189 | const bool single_use_p; | |
190 | const bool single_ebb_p; | |
0b76990a RS |
191 | }; |
192 | } | |
460d667d | 193 | |
b61461ac | 194 | /* Prepare to replace FROM with TO in USE_INSN. */ |
75da268e | 195 | |
efb6bc55 | 196 | fwprop_propagation::fwprop_propagation (insn_info *use_insn, |
b61461ac | 197 | set_info *def, rtx from, rtx to) |
efb6bc55 | 198 | : insn_propagation (use_insn->rtl (), from, to), |
b61461ac IL |
199 | single_use_p (def->single_nondebug_use ()), |
200 | single_ebb_p (use_insn->ebb () == def->ebb ()) | |
75da268e | 201 | { |
0b76990a RS |
202 | should_check_mems = true; |
203 | should_note_simplifications = true; | |
75da268e | 204 | } |
460d667d | 205 | |
0b76990a RS |
206 | /* MEM is the result of an address simplification, and temporarily |
207 | undoing changes OLD_NUM_CHANGES onwards restores the original address. | |
208 | Return true if the propagation should continue, false if it has failed. */ | |
a52b023a | 209 | |
0b76990a RS |
210 | bool |
211 | fwprop_propagation::check_mem (int old_num_changes, rtx mem) | |
a52b023a | 212 | { |
0b76990a RS |
213 | if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0), |
214 | MEM_ADDR_SPACE (mem))) | |
460d667d | 215 | { |
0b76990a | 216 | failure_reason = "would create an invalid MEM"; |
460d667d PB |
217 | return false; |
218 | } | |
a52b023a | 219 | |
0b76990a RS |
220 | temporarily_undo_changes (old_num_changes); |
221 | bool can_simplify = can_simplify_addr (XEXP (mem, 0)); | |
222 | redo_changes (old_num_changes); | |
223 | if (!can_simplify) | |
a52b023a | 224 | { |
0b76990a RS |
225 | failure_reason = "would replace a frame address"; |
226 | return false; | |
a52b023a PB |
227 | } |
228 | ||
0b76990a RS |
229 | /* Copy propagations are always ok. Otherwise check the costs. */ |
230 | if (!(REG_P (from) && REG_P (to)) | |
231 | && !should_replace_address (old_num_changes, mem, insn)) | |
a52b023a | 232 | { |
0b76990a RS |
233 | failure_reason = "would increase the cost of a MEM"; |
234 | return false; | |
235 | } | |
a52b023a | 236 | |
0b76990a RS |
237 | result_flags |= CHANGED_MEM; |
238 | return true; | |
239 | } | |
a52b023a | 240 | |
0b76990a RS |
241 | /* OLDX has been simplified to NEWX. Describe the change in terms of |
242 | result_flags. */ | |
a52b023a | 243 | |
0b76990a RS |
244 | uint16_t |
245 | fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx) | |
246 | { | |
247 | if (CONSTANT_P (new_rtx)) | |
248 | { | |
249 | /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason, | |
250 | and NEW_RTX is likely not a legitimate address. We want it to | |
251 | disappear if it is invalid. | |
252 | ||
253 | ??? Using the mode of the LO_SUM as the mode of the address | |
254 | seems odd, but it was what the pre-SSA code did. */ | |
255 | if (GET_CODE (old_rtx) == LO_SUM | |
256 | && !memory_address_p (GET_MODE (old_rtx), new_rtx)) | |
257 | return CONSTANT; | |
258 | return CONSTANT | PROFITABLE; | |
a52b023a PB |
259 | } |
260 | ||
712a93d6 RB |
261 | /* Allow replacements that simplify operations on a vector or complex |
262 | value to a component. The most prominent case is | |
263 | (subreg ([vec_]concat ...)). */ | |
0b76990a RS |
264 | if (REG_P (new_rtx) |
265 | && !HARD_REGISTER_P (new_rtx) | |
266 | && (VECTOR_MODE_P (GET_MODE (from)) | |
267 | || COMPLEX_MODE_P (GET_MODE (from))) | |
268 | && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from))) | |
269 | return PROFITABLE; | |
712a93d6 | 270 | |
efb6bc55 IL |
271 | /* Allow (subreg (mem)) -> (mem) simplifications with the following |
272 | exceptions: | |
273 | 1) Propagating (mem)s into multiple uses is not profitable. | |
274 | 2) Propagating (mem)s across EBBs may not be profitable if the source EBB | |
275 | runs less frequently. | |
276 | 3) Propagating (mem)s into paradoxical (subreg)s is not profitable. | |
277 | 4) Creating new (mem/v)s is not correct, since DCE will not remove the old | |
278 | ones. */ | |
279 | if (single_use_p | |
280 | && single_ebb_p | |
281 | && SUBREG_P (old_rtx) | |
282 | && !paradoxical_subreg_p (old_rtx) | |
283 | && MEM_P (new_rtx) | |
284 | && !MEM_VOLATILE_P (new_rtx)) | |
285 | return PROFITABLE; | |
286 | ||
0b76990a | 287 | return 0; |
a52b023a PB |
288 | } |
289 | ||
0b76990a RS |
290 | /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES |
291 | is the number of unrelated changes that had been made before processing | |
292 | OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags | |
293 | had at that point. */ | |
460d667d | 294 | |
0b76990a RS |
295 | void |
296 | fwprop_propagation::note_simplification (int old_num_changes, | |
297 | uint16_t old_result_flags, | |
298 | rtx old_rtx, rtx new_rtx) | |
299 | { | |
300 | result_flags &= ~(CONSTANT | PROFITABLE); | |
301 | uint16_t new_flags = classify_result (old_rtx, new_rtx); | |
302 | if (old_num_changes) | |
303 | new_flags &= old_result_flags; | |
304 | result_flags |= new_flags; | |
305 | } | |
460d667d | 306 | |
0b76990a RS |
307 | /* Return true if all substitutions eventually folded to constants. */ |
308 | ||
309 | bool | |
310 | fwprop_propagation::folded_to_constants_p () const | |
460d667d | 311 | { |
0b76990a RS |
312 | /* If we're propagating a HIGH, require it to be folded with a |
313 | partnering LO_SUM. For example, a REG_EQUAL note with a register | |
314 | replaced by an unfolded HIGH is not useful. */ | |
315 | if (CONSTANT_P (to) && GET_CODE (to) != HIGH) | |
316 | return true; | |
317 | return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT); | |
460d667d PB |
318 | } |
319 | ||
320 | ||
0b76990a RS |
321 | /* Return true if it is worth keeping the result of the propagation, |
322 | false if it would increase the complexity of the pattern too much. */ | |
a52b023a | 323 | |
0b76990a RS |
324 | bool |
325 | fwprop_propagation::profitable_p () const | |
a52b023a | 326 | { |
0b76990a RS |
327 | if (changed_mem_p ()) |
328 | return true; | |
a52b023a | 329 | |
0b76990a RS |
330 | if (!(result_flags & UNSIMPLIFIED) |
331 | && (result_flags & PROFITABLE)) | |
332 | return true; | |
a52b023a | 333 | |
0b76990a RS |
334 | if (REG_P (to)) |
335 | return true; | |
a52b023a | 336 | |
0b76990a RS |
337 | if (GET_CODE (to) == SUBREG |
338 | && REG_P (SUBREG_REG (to)) | |
339 | && !paradoxical_subreg_p (to)) | |
340 | return true; | |
a52b023a | 341 | |
0b76990a RS |
342 | if (CONSTANT_P (to)) |
343 | return true; | |
a52b023a | 344 | |
0b76990a RS |
345 | return false; |
346 | } | |
a52b023a | 347 | |
0b76990a | 348 | /* Check that X has a single def. */ |
a52b023a | 349 | |
6fb5fa3c | 350 | static bool |
0b76990a | 351 | reg_single_def_p (rtx x) |
a52b023a | 352 | { |
0b76990a RS |
353 | return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x)); |
354 | } | |
a52b023a | 355 | |
0b76990a | 356 | /* Return true if X contains a paradoxical subreg. */ |
a52b023a | 357 | |
0b76990a RS |
358 | static bool |
359 | contains_paradoxical_subreg_p (rtx x) | |
360 | { | |
361 | subrtx_var_iterator::array_type array; | |
362 | FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) | |
363 | { | |
364 | x = *iter; | |
365 | if (SUBREG_P (x) && paradoxical_subreg_p (x)) | |
366 | return true; | |
a52b023a PB |
367 | } |
368 | return false; | |
369 | } | |
370 | ||
b61461ac IL |
371 | /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of |
372 | USE_INSN. Return the number of substitutions on success, otherwise return | |
373 | -1 and leave USE_INSN unchanged. | |
a52b023a | 374 | |
b61461ac | 375 | If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC |
0b76990a RS |
376 | to fold to a constant, so that the note does not use any more registers |
377 | than it did previously. If REQUIRE_CONSTANT is false, also allow the | |
378 | substitution if it's something we'd normally allow for the main | |
379 | instruction pattern. */ | |
692e7e54 | 380 | |
0b76990a | 381 | static int |
b61461ac | 382 | try_fwprop_subst_note (insn_info *use_insn, set_info *def, |
0b76990a | 383 | rtx note, rtx dest, rtx src, bool require_constant) |
a52b023a | 384 | { |
0b76990a | 385 | rtx_insn *use_rtl = use_insn->rtl (); |
b61461ac | 386 | insn_info *def_insn = def->insn (); |
a52b023a | 387 | |
0b76990a | 388 | insn_change_watermark watermark; |
b61461ac | 389 | fwprop_propagation prop (use_insn, def, dest, src); |
0b76990a | 390 | if (!prop.apply_to_rvalue (&XEXP (note, 0))) |
a52b023a | 391 | { |
0b76990a RS |
392 | if (dump_file && (dump_flags & TDF_DETAILS)) |
393 | fprintf (dump_file, "cannot propagate from insn %d into" | |
394 | " notes of insn %d: %s\n", def_insn->uid (), | |
395 | use_insn->uid (), prop.failure_reason); | |
396 | return -1; | |
a52b023a PB |
397 | } |
398 | ||
0b76990a RS |
399 | if (prop.num_replacements == 0) |
400 | return 0; | |
a52b023a | 401 | |
0b76990a | 402 | if (require_constant) |
a52b023a | 403 | { |
0b76990a RS |
404 | if (!prop.folded_to_constants_p ()) |
405 | { | |
406 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
407 | fprintf (dump_file, "cannot propagate from insn %d into" | |
408 | " notes of insn %d: %s\n", def_insn->uid (), | |
409 | use_insn->uid (), "wouldn't fold to constants"); | |
410 | return -1; | |
411 | } | |
a52b023a PB |
412 | } |
413 | else | |
414 | { | |
0b76990a | 415 | if (!prop.folded_to_constants_p () && !prop.profitable_p ()) |
6fb5fa3c | 416 | { |
0b76990a RS |
417 | if (dump_file && (dump_flags & TDF_DETAILS)) |
418 | fprintf (dump_file, "cannot propagate from insn %d into" | |
419 | " notes of insn %d: %s\n", def_insn->uid (), | |
420 | use_insn->uid (), "would increase complexity of node"); | |
421 | return -1; | |
6fb5fa3c | 422 | } |
a52b023a PB |
423 | } |
424 | ||
0b76990a RS |
425 | if (dump_file && (dump_flags & TDF_DETAILS)) |
426 | { | |
427 | fprintf (dump_file, "\nin notes of insn %d, replacing:\n ", | |
428 | INSN_UID (use_rtl)); | |
429 | temporarily_undo_changes (0); | |
430 | print_inline_rtx (dump_file, note, 2); | |
431 | redo_changes (0); | |
432 | fprintf (dump_file, "\n with:\n "); | |
433 | print_inline_rtx (dump_file, note, 2); | |
434 | fprintf (dump_file, "\n"); | |
435 | } | |
436 | watermark.keep (); | |
437 | return prop.num_replacements; | |
a52b023a PB |
438 | } |
439 | ||
b61461ac | 440 | /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of |
0b76990a RS |
441 | USE_INSN's pattern. Return true on success, otherwise leave USE_INSN |
442 | unchanged. */ | |
a52b023a | 443 | |
0b76990a RS |
444 | static bool |
445 | try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, | |
b61461ac | 446 | set_info *def, rtx *loc, rtx dest, rtx src) |
a52b023a | 447 | { |
0b76990a RS |
448 | insn_info *use_insn = use_change.insn (); |
449 | rtx_insn *use_rtl = use_insn->rtl (); | |
b61461ac | 450 | insn_info *def_insn = def->insn (); |
a52b023a | 451 | |
0b76990a | 452 | insn_change_watermark watermark; |
b61461ac | 453 | fwprop_propagation prop (use_insn, def, dest, src); |
0b76990a RS |
454 | if (!prop.apply_to_pattern (loc)) |
455 | { | |
456 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
457 | fprintf (dump_file, "cannot propagate from insn %d into" | |
458 | " insn %d: %s\n", def_insn->uid (), use_insn->uid (), | |
459 | prop.failure_reason); | |
460 | return false; | |
dc007c1f | 461 | } |
a52b023a | 462 | |
0b76990a RS |
463 | if (prop.num_replacements == 0) |
464 | return false; | |
a52b023a | 465 | |
0b76990a RS |
466 | if (!prop.profitable_p ()) |
467 | { | |
468 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
469 | fprintf (dump_file, "cannot propagate from insn %d into" | |
470 | " insn %d: %s\n", def_insn->uid (), use_insn->uid (), | |
471 | "would increase complexity of pattern"); | |
472 | return false; | |
473 | } | |
a52b023a | 474 | |
0b76990a RS |
475 | if (dump_file && (dump_flags & TDF_DETAILS)) |
476 | { | |
477 | fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n", | |
478 | def_insn->uid (), use_insn->uid ()); | |
479 | temporarily_undo_changes (0); | |
480 | print_rtl_single (dump_file, PATTERN (use_rtl)); | |
481 | redo_changes (0); | |
482 | } | |
a52b023a | 483 | |
0b76990a RS |
484 | /* ??? In theory, it should be better to use insn costs rather than |
485 | set_src_costs here. That would involve replacing this code with | |
486 | change_is_worthwhile. */ | |
487 | bool ok = recog (attempt, use_change); | |
488 | if (ok && !prop.changed_mem_p () && !use_insn->is_asm ()) | |
489 | if (rtx use_set = single_set (use_rtl)) | |
490 | { | |
491 | bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl)); | |
492 | temporarily_undo_changes (0); | |
493 | auto old_cost = set_src_cost (SET_SRC (use_set), | |
494 | GET_MODE (SET_DEST (use_set)), speed); | |
495 | redo_changes (0); | |
496 | auto new_cost = set_src_cost (SET_SRC (use_set), | |
497 | GET_MODE (SET_DEST (use_set)), speed); | |
498 | if (new_cost > old_cost) | |
499 | { | |
500 | if (dump_file) | |
501 | fprintf (dump_file, "change not profitable" | |
502 | " (cost %d -> cost %d)\n", old_cost, new_cost); | |
503 | ok = false; | |
504 | } | |
505 | } | |
a52b023a | 506 | |
0b76990a | 507 | if (!ok) |
a52b023a | 508 | { |
0b76990a RS |
509 | /* The pattern didn't match, but if all uses of SRC folded to |
510 | constants, we can add a REG_EQUAL note for the result, if there | |
511 | isn't one already. */ | |
512 | if (!prop.folded_to_constants_p ()) | |
513 | return false; | |
a52b023a | 514 | |
0b76990a RS |
515 | /* Test this first to avoid creating an unnecessary copy of SRC. */ |
516 | if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX)) | |
517 | return false; | |
a52b023a | 518 | |
0b76990a RS |
519 | rtx set = set_for_reg_notes (use_rtl); |
520 | if (!set || !REG_P (SET_DEST (set))) | |
521 | return false; | |
a52b023a | 522 | |
0b76990a RS |
523 | rtx value = copy_rtx (SET_SRC (set)); |
524 | cancel_changes (0); | |
dc007c1f | 525 | |
0b76990a RS |
526 | /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note, |
527 | because the bits in there can be anything and so might not | |
528 | match the REG_EQUAL note content. See PR70574. */ | |
529 | if (contains_paradoxical_subreg_p (SET_SRC (set))) | |
530 | return false; | |
dc007c1f | 531 | |
0b76990a RS |
532 | if (dump_file && (dump_flags & TDF_DETAILS)) |
533 | fprintf (dump_file, " Setting REG_EQUAL note\n"); | |
dc007c1f | 534 | |
0b76990a | 535 | return set_unique_reg_note (use_rtl, REG_EQUAL, value); |
dc007c1f | 536 | } |
0b76990a RS |
537 | |
538 | rtx *note_ptr = ®_NOTES (use_rtl); | |
539 | while (rtx note = *note_ptr) | |
dc007c1f | 540 | { |
0b76990a RS |
541 | if ((REG_NOTE_KIND (note) == REG_EQUAL |
542 | || REG_NOTE_KIND (note) == REG_EQUIV) | |
b61461ac | 543 | && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0) |
0b76990a RS |
544 | { |
545 | *note_ptr = XEXP (note, 1); | |
546 | free_EXPR_LIST_node (note); | |
547 | } | |
548 | else | |
549 | note_ptr = &XEXP (note, 1); | |
a52b023a | 550 | } |
dc007c1f | 551 | |
0b76990a RS |
552 | confirm_change_group (); |
553 | crtl->ssa->change_insn (use_change); | |
554 | num_changes++; | |
555 | return true; | |
a52b023a PB |
556 | } |
557 | ||
b61461ac | 558 | /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes, |
0b76990a RS |
559 | given that it was not possible to do this for USE_INSN's main pattern. |
560 | Return true on success, otherwise leave USE_INSN unchanged. */ | |
a52b023a PB |
561 | |
562 | static bool | |
b61461ac | 563 | try_fwprop_subst_notes (insn_info *use_insn, set_info *def, |
0b76990a RS |
564 | rtx dest, rtx src) |
565 | { | |
566 | rtx_insn *use_rtl = use_insn->rtl (); | |
567 | for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1)) | |
568 | if ((REG_NOTE_KIND (note) == REG_EQUAL | |
569 | || REG_NOTE_KIND (note) == REG_EQUIV) | |
b61461ac | 570 | && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0) |
0b76990a RS |
571 | { |
572 | confirm_change_group (); | |
573 | return true; | |
574 | } | |
a52b023a | 575 | |
0b76990a RS |
576 | return false; |
577 | } | |
dc007c1f | 578 | |
b61461ac | 579 | /* Check whether we could validly substitute (set DEST SRC), which defines DEF, |
0b76990a RS |
580 | into USE. If so, first try performing the substitution in location LOC |
581 | of USE->insn ()'s pattern. If that fails, try instead to substitute | |
582 | into the notes. | |
a52b023a | 583 | |
0b76990a | 584 | Return true on success, otherwise leave USE_INSN unchanged. */ |
de266950 | 585 | |
0b76990a | 586 | static bool |
b61461ac | 587 | try_fwprop_subst (use_info *use, set_info *def, |
0b76990a RS |
588 | rtx *loc, rtx dest, rtx src) |
589 | { | |
590 | insn_info *use_insn = use->insn (); | |
b61461ac | 591 | insn_info *def_insn = def->insn (); |
de266950 | 592 | |
0b76990a RS |
593 | auto attempt = crtl->ssa->new_change_attempt (); |
594 | use_array src_uses = remove_note_accesses (attempt, def_insn->uses ()); | |
595 | ||
596 | /* ??? Not really a meaningful test: it means we can propagate arithmetic | |
597 | involving hard registers but not bare references to them. A better | |
598 | test would be to iterate over src_uses looking for hard registers | |
599 | that are not fixed. */ | |
600 | if (REG_P (src) && HARD_REGISTER_P (src)) | |
601 | return false; | |
de266950 | 602 | |
0b76990a RS |
603 | /* ??? It would be better to make this EBB-based instead. That would |
604 | involve checking for equal EBBs rather than equal BBs and trying | |
605 | to make the uses available at use_insn->ebb ()->first_bb (). */ | |
606 | if (def_insn->bb () != use_insn->bb ()) | |
de266950 | 607 | { |
0b76990a | 608 | src_uses = crtl->ssa->make_uses_available (attempt, src_uses, |
c97351c0 RS |
609 | use_insn->bb (), |
610 | use_insn->is_debug_insn ()); | |
0b76990a RS |
611 | if (!src_uses.is_valid ()) |
612 | return false; | |
a52b023a | 613 | } |
a52b023a | 614 | |
0b76990a RS |
615 | insn_change use_change (use_insn); |
616 | use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses, | |
617 | src_uses); | |
618 | if (!use_change.new_uses.is_valid ()) | |
619 | return false; | |
1a13c0a2 | 620 | |
0b76990a | 621 | /* ??? We could allow movement within the EBB by adding: |
de266950 | 622 | |
0b76990a RS |
623 | use_change.move_range = use_insn->ebb ()->insn_range (); */ |
624 | if (!restrict_movement (use_change)) | |
625 | return false; | |
dc007c1f | 626 | |
b61461ac IL |
627 | return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src) |
628 | || try_fwprop_subst_notes (use_insn, def, dest, src)); | |
a52b023a PB |
629 | } |
630 | ||
e85122be AM |
631 | /* For the given single_set INSN, containing SRC known to be a |
632 | ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN | |
633 | is redundant due to the register being set by a LOAD_EXTEND_OP | |
634 | load from memory. */ | |
635 | ||
636 | static bool | |
0b76990a | 637 | free_load_extend (rtx src, insn_info *insn) |
e25b7843 | 638 | { |
0b76990a | 639 | rtx reg = XEXP (src, 0); |
3712c7a3 | 640 | if (load_extend_op (GET_MODE (reg)) != GET_CODE (src)) |
e85122be | 641 | return false; |
e25b7843 | 642 | |
0b76990a RS |
643 | def_info *def = nullptr; |
644 | for (use_info *use : insn->uses ()) | |
645 | if (use->regno () == REGNO (reg)) | |
646 | { | |
647 | def = use->def (); | |
648 | break; | |
649 | } | |
e85122be | 650 | |
e85122be AM |
651 | if (!def) |
652 | return false; | |
653 | ||
0b76990a RS |
654 | insn_info *def_insn = def->insn (); |
655 | if (def_insn->is_artificial ()) | |
e85122be AM |
656 | return false; |
657 | ||
0b76990a RS |
658 | rtx_insn *def_rtl = def_insn->rtl (); |
659 | if (NONJUMP_INSN_P (def_rtl)) | |
e85122be | 660 | { |
0b76990a | 661 | rtx patt = PATTERN (def_rtl); |
e85122be AM |
662 | |
663 | if (GET_CODE (patt) == SET | |
664 | && GET_CODE (SET_SRC (patt)) == MEM | |
665 | && rtx_equal_p (SET_DEST (patt), reg)) | |
666 | return true; | |
e25b7843 | 667 | } |
e85122be | 668 | return false; |
e25b7843 | 669 | } |
e25b7843 | 670 | |
0b76990a RS |
671 | /* Subroutine of forward_propagate_subreg that handles a use of DEST |
672 | in REF. The other parameters are the same. */ | |
a52b023a PB |
673 | |
674 | static bool | |
b61461ac | 675 | forward_propagate_subreg (use_info *use, set_info *def, |
0b76990a | 676 | rtx dest, rtx src, df_ref ref) |
a52b023a | 677 | { |
6b9c3dec | 678 | scalar_int_mode int_use_mode, src_mode; |
a52b023a | 679 | |
e25b7843 | 680 | /* Only consider subregs... */ |
0b76990a | 681 | rtx use_reg = DF_REF_REG (ref); |
ef4bddc2 | 682 | machine_mode use_mode = GET_MODE (use_reg); |
a52b023a | 683 | if (GET_CODE (use_reg) != SUBREG |
0b76990a | 684 | || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest)) |
a52b023a PB |
685 | return false; |
686 | ||
0b76990a RS |
687 | /* ??? Replacing throughout the pattern would help for match_dups. */ |
688 | rtx *loc = DF_REF_LOC (ref); | |
03a95621 | 689 | if (paradoxical_subreg_p (use_reg)) |
e25b7843 AM |
690 | { |
691 | /* If this is a paradoxical SUBREG, we have no idea what value the | |
692 | extra bits would have. However, if the operand is equivalent to | |
693 | a SUBREG whose operand is the same as our mode, and all the modes | |
694 | are within a word, we can just use the inner operand because | |
695 | these SUBREGs just say how to treat the register. */ | |
e25b7843 AM |
696 | if (GET_CODE (src) == SUBREG |
697 | && REG_P (SUBREG_REG (src)) | |
509a31f8 | 698 | && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER |
e25b7843 | 699 | && GET_MODE (SUBREG_REG (src)) == use_mode |
0b76990a | 700 | && subreg_lowpart_p (src)) |
b61461ac | 701 | return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src)); |
e25b7843 AM |
702 | } |
703 | ||
704 | /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG | |
705 | is the low part of the reg being extended then just use the inner | |
706 | operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will | |
7a613929 RS |
707 | be removed due to it matching a LOAD_EXTEND_OP load from memory, |
708 | or due to the operation being a no-op when applied to registers. | |
709 | For example, if we have: | |
710 | ||
711 | A: (set (reg:DI X) (sign_extend:DI (reg:SI Y))) | |
712 | B: (... (subreg:SI (reg:DI X)) ...) | |
713 | ||
714 | and mode_rep_extended says that Y is already sign-extended, | |
715 | the backend will typically allow A to be combined with the | |
716 | definition of Y or, failing that, allow A to be deleted after | |
717 | reload through register tying. Introducing more uses of Y | |
718 | prevents both optimisations. */ | |
6b9c3dec RS |
719 | else if (is_a <scalar_int_mode> (use_mode, &int_use_mode) |
720 | && subreg_lowpart_p (use_reg)) | |
e25b7843 | 721 | { |
e25b7843 AM |
722 | if ((GET_CODE (src) == ZERO_EXTEND |
723 | || GET_CODE (src) == SIGN_EXTEND) | |
6b9c3dec | 724 | && is_a <scalar_int_mode> (GET_MODE (src), &src_mode) |
e25b7843 | 725 | && REG_P (XEXP (src, 0)) |
509a31f8 | 726 | && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER |
e25b7843 | 727 | && GET_MODE (XEXP (src, 0)) == use_mode |
b61461ac | 728 | && !free_load_extend (src, def->insn ()) |
6b9c3dec | 729 | && (targetm.mode_rep_extended (int_use_mode, src_mode) |
0b76990a | 730 | != (int) GET_CODE (src))) |
b61461ac | 731 | return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0)); |
e25b7843 AM |
732 | } |
733 | ||
734 | return false; | |
a52b023a PB |
735 | } |
736 | ||
b61461ac | 737 | /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify |
0b76990a RS |
738 | the result, handling cases where DEST is used in a subreg and where |
739 | applying that subreg to SRC results in a useful simplification. */ | |
ce372372 JJ |
740 | |
741 | static bool | |
b61461ac | 742 | forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src) |
ce372372 | 743 | { |
0b76990a RS |
744 | if (!use->includes_subregs () || !REG_P (dest)) |
745 | return false; | |
ce372372 | 746 | |
0b76990a RS |
747 | if (GET_CODE (src) != SUBREG |
748 | && GET_CODE (src) != ZERO_EXTEND | |
749 | && GET_CODE (src) != SIGN_EXTEND) | |
ce372372 JJ |
750 | return false; |
751 | ||
0b76990a RS |
752 | rtx_insn *use_rtl = use->insn ()->rtl (); |
753 | df_ref ref; | |
ce372372 | 754 | |
0b76990a RS |
755 | FOR_EACH_INSN_USE (ref, use_rtl) |
756 | if (DF_REF_REGNO (ref) == use->regno () | |
b61461ac | 757 | && forward_propagate_subreg (use, def, dest, src, ref)) |
0b76990a | 758 | return true; |
ce372372 | 759 | |
0b76990a RS |
760 | FOR_EACH_INSN_EQ_USE (ref, use_rtl) |
761 | if (DF_REF_REGNO (ref) == use->regno () | |
b61461ac | 762 | && forward_propagate_subreg (use, def, dest, src, ref)) |
0b76990a | 763 | return true; |
ce372372 | 764 | |
0b76990a | 765 | return false; |
ce372372 JJ |
766 | } |
767 | ||
b61461ac | 768 | /* Try to substitute (set DEST SRC), which defines DEF, into USE and |
0b76990a | 769 | simplify the result. */ |
a52b023a PB |
770 | |
771 | static bool | |
b61461ac | 772 | forward_propagate_and_simplify (use_info *use, set_info *def, |
0b76990a RS |
773 | rtx dest, rtx src) |
774 | { | |
775 | insn_info *use_insn = use->insn (); | |
776 | rtx_insn *use_rtl = use_insn->rtl (); | |
b61461ac | 777 | insn_info *def_insn = def->insn (); |
0b76990a RS |
778 | |
779 | /* ??? This check seems unnecessary. We should be able to propagate | |
780 | into any kind of instruction, regardless of whether it's a single set. | |
781 | It seems odd to be more permissive with asms than normal instructions. */ | |
782 | bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ()); | |
783 | rtx use_set = single_set (use_rtl); | |
784 | if (need_single_set && !use_set) | |
a52b023a PB |
785 | return false; |
786 | ||
bd1cd0d0 | 787 | /* Do not propagate into PC etc. |
a52b023a | 788 | |
0b76990a RS |
789 | ??? This too seems unnecessary. The current code should work correctly |
790 | without it, including cases where jumps become unconditional. */ | |
791 | if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode) | |
8e8af9b7 RS |
792 | return false; |
793 | ||
0b76990a RS |
794 | /* In __asm don't replace if src might need more registers than |
795 | reg, as that could increase register pressure on the __asm. */ | |
796 | if (use_insn->is_asm () && def_insn->uses ().size () > 1) | |
a52b023a PB |
797 | return false; |
798 | ||
799 | /* Check if the def is loading something from the constant pool; in this | |
800 | case we would undo optimization such as compress_float_constant. | |
801 | Still, we can set a REG_EQUAL note. */ | |
802 | if (MEM_P (src) && MEM_READONLY_P (src)) | |
803 | { | |
804 | rtx x = avoid_constant_pool_reference (src); | |
0b76990a RS |
805 | rtx note_set; |
806 | if (x != src | |
807 | && (note_set = set_for_reg_notes (use_rtl)) | |
808 | && REG_P (SET_DEST (note_set)) | |
809 | && !contains_paradoxical_subreg_p (SET_SRC (note_set))) | |
a52b023a | 810 | { |
0b76990a RS |
811 | rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX); |
812 | rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set); | |
60564289 KG |
813 | rtx new_rtx = simplify_replace_rtx (old_rtx, src, x); |
814 | if (old_rtx != new_rtx) | |
0b76990a | 815 | set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx)); |
a52b023a PB |
816 | } |
817 | return false; | |
818 | } | |
819 | ||
0b76990a RS |
820 | /* ??? Unconditionally propagating into PATTERN would work better |
821 | for instructions that have match_dups. */ | |
822 | rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl); | |
b61461ac | 823 | return try_fwprop_subst (use, def, loc, dest, src); |
a52b023a PB |
824 | } |
825 | ||
a52b023a | 826 | /* Given a use USE of an insn, if it has a single reaching |
7360d2ac | 827 | definition, try to forward propagate it into that insn. |
0b76990a RS |
828 | Return true if something changed. |
829 | ||
75da268e | 830 | REG_PROP_ONLY is true if we should only propagate register copies. */ |
a52b023a | 831 | |
7360d2ac | 832 | static bool |
0b76990a | 833 | forward_propagate_into (use_info *use, bool reg_prop_only = false) |
a52b023a | 834 | { |
0b76990a | 835 | if (use->includes_read_writes ()) |
7360d2ac | 836 | return false; |
a52b023a | 837 | |
0b76990a | 838 | /* Disregard uninitialized uses. */ |
b61461ac | 839 | set_info *def = use->def (); |
00952e97 | 840 | if (!def) |
7360d2ac | 841 | return false; |
a52b023a | 842 | |
0b76990a RS |
843 | /* Only consider single-register definitions. This could be relaxed, |
844 | but it should rarely be needed before RA. */ | |
845 | def = look_through_degenerate_phi (def); | |
846 | if (def->includes_multiregs ()) | |
847 | return false; | |
a52b023a | 848 | |
0b76990a RS |
849 | /* Only consider uses whose definition comes from a real instruction. */ |
850 | insn_info *def_insn = def->insn (); | |
851 | if (def_insn->is_artificial ()) | |
7360d2ac | 852 | return false; |
a52b023a | 853 | |
0b76990a RS |
854 | rtx_insn *def_rtl = def_insn->rtl (); |
855 | if (!NONJUMP_INSN_P (def_rtl)) | |
856 | return false; | |
857 | /* ??? This seems an unnecessary restriction. We can easily tell | |
858 | which set the definition comes from. */ | |
859 | if (multiple_sets (def_rtl)) | |
7360d2ac | 860 | return false; |
0b76990a | 861 | rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ()); |
a52b023a | 862 | if (!def_set) |
7360d2ac | 863 | return false; |
a52b023a | 864 | |
0b76990a RS |
865 | rtx dest = SET_DEST (def_set); |
866 | rtx src = SET_SRC (def_set); | |
75da268e PK |
867 | |
868 | /* Allow propagations into a loop only for reg-to-reg copies, since | |
869 | replacing one register by another shouldn't increase the cost. */ | |
0b76990a RS |
870 | struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father; |
871 | struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father; | |
872 | if ((reg_prop_only || def_loop != use_loop) | |
873 | && (!reg_single_def_p (dest) || !reg_single_def_p (src))) | |
874 | return false; | |
75da268e | 875 | |
0b76990a RS |
876 | /* Don't substitute into a non-local goto, this confuses CFG. */ |
877 | insn_info *use_insn = use->insn (); | |
878 | rtx_insn *use_rtl = use_insn->rtl (); | |
879 | if (JUMP_P (use_rtl) | |
880 | && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX)) | |
75da268e PK |
881 | return false; |
882 | ||
b61461ac IL |
883 | if (forward_propagate_and_simplify (use, def, dest, src) |
884 | || forward_propagate_subreg (use, def, dest, src)) | |
0b76990a | 885 | return true; |
a5a9046d | 886 | |
7360d2ac | 887 | return false; |
a52b023a | 888 | } |
a52b023a PB |
889 | \f |
890 | static void | |
891 | fwprop_init (void) | |
892 | { | |
893 | num_changes = 0; | |
6e0b633f | 894 | calculate_dominance_info (CDI_DOMINATORS); |
a52b023a PB |
895 | |
896 | /* We do not always want to propagate into loops, so we have to find | |
c5f4be84 SB |
897 | loops and be careful about them. Avoid CFG modifications so that |
898 | we don't have to update dominance information afterwards for | |
899 | build_single_def_use_links. */ | |
900 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
a52b023a | 901 | |
0b76990a RS |
902 | df_analyze (); |
903 | crtl->ssa = new rtl_ssa::function_info (cfun); | |
a52b023a PB |
904 | } |
905 | ||
906 | static void | |
907 | fwprop_done (void) | |
908 | { | |
fb406162 | 909 | loop_optimizer_finalize (); |
6fb5fa3c | 910 | |
0b76990a | 911 | crtl->ssa->perform_pending_updates (); |
6e0b633f | 912 | free_dominance_info (CDI_DOMINATORS); |
a52b023a | 913 | cleanup_cfg (0); |
0b76990a RS |
914 | |
915 | delete crtl->ssa; | |
916 | crtl->ssa = nullptr; | |
917 | ||
a52b023a PB |
918 | delete_trivially_dead_insns (get_insns (), max_reg_num ()); |
919 | ||
920 | if (dump_file) | |
921 | fprintf (dump_file, | |
922 | "\nNumber of successful forward propagations: %d\n\n", | |
923 | num_changes); | |
924 | } | |
925 | ||
0b76990a RS |
926 | /* Try to optimize INSN, returning true if something changes. |
927 | FWPROP_ADDR_P is true if we are running fwprop_addr rather than | |
928 | the full fwprop. */ | |
929 | ||
930 | static bool | |
931 | fwprop_insn (insn_info *insn, bool fwprop_addr_p) | |
932 | { | |
933 | for (use_info *use : insn->uses ()) | |
934 | { | |
935 | if (use->is_mem ()) | |
936 | continue; | |
937 | /* ??? The choices here follow those in the pre-SSA code. */ | |
938 | if (!use->includes_address_uses ()) | |
939 | { | |
940 | if (forward_propagate_into (use, fwprop_addr_p)) | |
941 | return true; | |
942 | } | |
943 | else | |
944 | { | |
945 | struct loop *loop = insn->bb ()->cfg_bb ()->loop_father; | |
946 | /* The outermost loop is not really a loop. */ | |
947 | if (loop == NULL || loop_outer (loop) == NULL) | |
948 | { | |
949 | if (forward_propagate_into (use, fwprop_addr_p)) | |
950 | return true; | |
951 | } | |
952 | else if (fwprop_addr_p) | |
953 | { | |
954 | if (forward_propagate_into (use, false)) | |
955 | return true; | |
956 | } | |
957 | } | |
958 | } | |
959 | return false; | |
960 | } | |
a52b023a | 961 | |
a52b023a PB |
962 | /* Main entry point. */ |
963 | ||
964 | static bool | |
965 | gate_fwprop (void) | |
966 | { | |
967 | return optimize > 0 && flag_forward_propagate; | |
968 | } | |
969 | ||
970 | static unsigned int | |
75da268e | 971 | fwprop (bool fwprop_addr_p) |
a52b023a | 972 | { |
a52b023a PB |
973 | fwprop_init (); |
974 | ||
0b76990a RS |
975 | /* Go through all the instructions (including debug instructions) looking |
976 | for uses that we could propagate into. | |
a52b023a PB |
977 | |
978 | Do not forward propagate addresses into loops until after unrolling. | |
979 | CSE did so because it was able to fix its own mess, but we are not. */ | |
980 | ||
0b76990a | 981 | insn_info *next; |
75da268e | 982 | |
0b76990a RS |
983 | /* ??? This code uses a worklist in order to preserve the behavior |
984 | of the pre-SSA implementation. It would be better to instead | |
985 | iterate on each instruction until no more propagations are | |
986 | possible, then move on to the next. */ | |
987 | auto_vec<insn_info *> worklist; | |
988 | for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next) | |
989 | { | |
990 | next = insn->next_any_insn (); | |
991 | if (insn->can_be_optimized () || insn->is_debug_insn ()) | |
992 | if (fwprop_insn (insn, fwprop_addr_p)) | |
993 | worklist.safe_push (insn); | |
994 | } | |
995 | for (unsigned int i = 0; i < worklist.length (); ++i) | |
996 | { | |
997 | insn_info *insn = worklist[i]; | |
998 | if (fwprop_insn (insn, fwprop_addr_p)) | |
999 | worklist.safe_push (insn); | |
a52b023a PB |
1000 | } |
1001 | ||
1002 | fwprop_done (); | |
a52b023a PB |
1003 | return 0; |
1004 | } | |
1005 | ||
27a4cd48 DM |
1006 | namespace { |
1007 | ||
1008 | const pass_data pass_data_rtl_fwprop = | |
a52b023a | 1009 | { |
27a4cd48 DM |
1010 | RTL_PASS, /* type */ |
1011 | "fwprop1", /* name */ | |
1012 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
1013 | TV_FWPROP, /* tv_id */ |
1014 | 0, /* properties_required */ | |
1015 | 0, /* properties_provided */ | |
1016 | 0, /* properties_destroyed */ | |
1017 | 0, /* todo_flags_start */ | |
3bea341f | 1018 | TODO_df_finish, /* todo_flags_finish */ |
a52b023a PB |
1019 | }; |
1020 | ||
27a4cd48 DM |
1021 | class pass_rtl_fwprop : public rtl_opt_pass |
1022 | { | |
1023 | public: | |
c3284718 RS |
1024 | pass_rtl_fwprop (gcc::context *ctxt) |
1025 | : rtl_opt_pass (pass_data_rtl_fwprop, ctxt) | |
27a4cd48 DM |
1026 | {} |
1027 | ||
1028 | /* opt_pass methods: */ | |
1a3d085c | 1029 | virtual bool gate (function *) { return gate_fwprop (); } |
75da268e | 1030 | virtual unsigned int execute (function *) { return fwprop (false); } |
27a4cd48 DM |
1031 | |
1032 | }; // class pass_rtl_fwprop | |
1033 | ||
1034 | } // anon namespace | |
1035 | ||
1036 | rtl_opt_pass * | |
1037 | make_pass_rtl_fwprop (gcc::context *ctxt) | |
1038 | { | |
1039 | return new pass_rtl_fwprop (ctxt); | |
1040 | } | |
1041 | ||
27a4cd48 DM |
1042 | namespace { |
1043 | ||
1044 | const pass_data pass_data_rtl_fwprop_addr = | |
a52b023a | 1045 | { |
27a4cd48 DM |
1046 | RTL_PASS, /* type */ |
1047 | "fwprop2", /* name */ | |
1048 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
1049 | TV_FWPROP, /* tv_id */ |
1050 | 0, /* properties_required */ | |
1051 | 0, /* properties_provided */ | |
1052 | 0, /* properties_destroyed */ | |
1053 | 0, /* todo_flags_start */ | |
3bea341f | 1054 | TODO_df_finish, /* todo_flags_finish */ |
a52b023a | 1055 | }; |
27a4cd48 DM |
1056 | |
1057 | class pass_rtl_fwprop_addr : public rtl_opt_pass | |
1058 | { | |
1059 | public: | |
c3284718 RS |
1060 | pass_rtl_fwprop_addr (gcc::context *ctxt) |
1061 | : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt) | |
27a4cd48 DM |
1062 | {} |
1063 | ||
1064 | /* opt_pass methods: */ | |
1a3d085c | 1065 | virtual bool gate (function *) { return gate_fwprop (); } |
75da268e | 1066 | virtual unsigned int execute (function *) { return fwprop (true); } |
27a4cd48 DM |
1067 | |
1068 | }; // class pass_rtl_fwprop_addr | |
1069 | ||
1070 | } // anon namespace | |
1071 | ||
1072 | rtl_opt_pass * | |
1073 | make_pass_rtl_fwprop_addr (gcc::context *ctxt) | |
1074 | { | |
1075 | return new pass_rtl_fwprop_addr (ctxt); | |
1076 | } |