]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/fwprop.c
x86: Remove "%!" before ret
[thirdparty/gcc.git] / gcc / fwprop.c
CommitLineData
a52b023a 1/* RTL-based forward propagation pass for GNU compiler.
99dee823 2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
a52b023a
PB
3 Contributed by Paolo Bonzini and Steven Bosscher.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
a52b023a
PB
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
a52b023a 20
0b76990a
RS
21#define INCLUDE_ALGORITHM
22#define INCLUDE_FUNCTIONAL
a52b023a
PB
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
c7131fb2
AM
26#include "backend.h"
27#include "rtl.h"
28#include "df.h"
0b76990a 29#include "rtl-ssa.h"
957060b5 30
0b76990a 31#include "predict.h"
60393bbc
AM
32#include "cfgrtl.h"
33#include "cfgcleanup.h"
a52b023a
PB
34#include "cfgloop.h"
35#include "tree-pass.h"
aa4e2d7e 36#include "rtl-iter.h"
0b76990a 37#include "target.h"
a52b023a
PB
38
39/* This pass does simple forward propagation and simplification when an
40 operand of an insn can only come from a single def. This pass uses
0b76990a 41 RTL SSA, so it is global. However, we only do limited analysis of
a52b023a
PB
42 available expressions.
43
44 1) The pass tries to propagate the source of the def into the use,
45 and checks if the result is independent of the substituted value.
46 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
47 zero, independent of the source register.
48
49 In particular, we propagate constants into the use site. Sometimes
50 RTL expansion did not put the constant in the same insn on purpose,
51 to satisfy a predicate, and the result will fail to be recognized;
52 but this happens rarely and in this case we can still create a
53 REG_EQUAL note. For multi-word operations, this
54
55 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
56 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
57 (set (subreg:SI (reg:DI 122) 0)
0b76990a 58 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
a52b023a 59 (set (subreg:SI (reg:DI 122) 4)
0b76990a 60 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
a52b023a
PB
61
62 can be simplified to the much simpler
63
64 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
65 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
66
67 This particular propagation is also effective at putting together
68 complex addressing modes. We are more aggressive inside MEMs, in
69 that all definitions are propagated if the use is in a MEM; if the
70 result is a valid memory address we check address_cost to decide
71 whether the substitution is worthwhile.
72
73 2) The pass propagates register copies. This is not as effective as
74 the copy propagation done by CSE's canon_reg, which works by walking
75 the instruction chain, it can help the other transformations.
76
77 We should consider removing this optimization, and instead reorder the
78 RTL passes, because GCSE does this transformation too. With some luck,
79 the CSE pass at the end of rest_of_handle_gcse could also go away.
80
81 3) The pass looks for paradoxical subregs that are actually unnecessary.
82 Things like this:
83
84 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
85 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
86 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
0b76990a 87 (subreg:SI (reg:QI 121) 0)))
a52b023a
PB
88
89 are very common on machines that can only do word-sized operations.
90 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
91 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
92 we can replace the paradoxical subreg with simply (reg:WIDE M). The
93 above will simplify this to
94
95 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
96 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
97 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
98
0b76990a 99 where the first two insns are now dead. */
a52b023a 100
0b76990a 101using namespace rtl_ssa;
a52b023a 102
a52b023a
PB
103static int num_changes;
104
a52b023a
PB
105/* Do not try to replace constant addresses or addresses of local and
106 argument slots. These MEM expressions are made only once and inserted
107 in many instructions, as well as being used to control symbol table
108 output. It is not safe to clobber them.
109
110 There are some uncommon cases where the address is already in a register
111 for some reason, but we cannot take advantage of that because we have
112 no easy way to unshare the MEM. In addition, looking up all stack
113 addresses is costly. */
114
115static bool
116can_simplify_addr (rtx addr)
117{
118 rtx reg;
119
120 if (CONSTANT_ADDRESS_P (addr))
121 return false;
122
123 if (GET_CODE (addr) == PLUS)
124 reg = XEXP (addr, 0);
125 else
126 reg = addr;
127
128 return (!REG_P (reg)
129 || (REGNO (reg) != FRAME_POINTER_REGNUM
130 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
131 && REGNO (reg) != ARG_POINTER_REGNUM));
132}
133
0b76990a
RS
134/* MEM is the result of an address simplification, and temporarily
135 undoing changes OLD_NUM_CHANGES onwards restores the original address.
136 Return whether it is good to use the new address instead of the
137 old one. INSN is the containing instruction. */
a52b023a
PB
138
139static bool
0b76990a 140should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
a52b023a
PB
141{
142 int gain;
143
a52b023a 144 /* Prefer the new address if it is less expensive. */
0b76990a
RS
145 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
146 temporarily_undo_changes (old_num_changes);
147 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
148 MEM_ADDR_SPACE (mem), speed);
149 redo_changes (old_num_changes);
150 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
151 MEM_ADDR_SPACE (mem), speed);
a52b023a
PB
152
153 /* If the addresses have equivalent cost, prefer the new address
5e8f01f4 154 if it has the highest `set_src_cost'. That has the potential of
a52b023a
PB
155 eliminating the most insns without additional costs, and it
156 is the same that cse.c used to do. */
157 if (gain == 0)
0b76990a
RS
158 {
159 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
160 temporarily_undo_changes (old_num_changes);
161 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
162 redo_changes (old_num_changes);
163 }
a52b023a
PB
164
165 return (gain > 0);
166}
167
460d667d 168
0b76990a
RS
169namespace
170{
171 class fwprop_propagation : public insn_propagation
172 {
173 public:
174 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
175 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
176 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
460d667d 177
b61461ac 178 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
460d667d 179
0b76990a
RS
180 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
181 bool folded_to_constants_p () const;
182 bool profitable_p () const;
f40751dd 183
0b76990a
RS
184 bool check_mem (int, rtx) final override;
185 void note_simplification (int, uint16_t, rtx, rtx) final override;
186 uint16_t classify_result (rtx, rtx);
efb6bc55
IL
187
188 private:
189 const bool single_use_p;
190 const bool single_ebb_p;
0b76990a
RS
191 };
192}
460d667d 193
b61461ac 194/* Prepare to replace FROM with TO in USE_INSN. */
75da268e 195
efb6bc55 196fwprop_propagation::fwprop_propagation (insn_info *use_insn,
b61461ac 197 set_info *def, rtx from, rtx to)
efb6bc55 198 : insn_propagation (use_insn->rtl (), from, to),
b61461ac
IL
199 single_use_p (def->single_nondebug_use ()),
200 single_ebb_p (use_insn->ebb () == def->ebb ())
75da268e 201{
0b76990a
RS
202 should_check_mems = true;
203 should_note_simplifications = true;
75da268e 204}
460d667d 205
0b76990a
RS
206/* MEM is the result of an address simplification, and temporarily
207 undoing changes OLD_NUM_CHANGES onwards restores the original address.
208 Return true if the propagation should continue, false if it has failed. */
a52b023a 209
0b76990a
RS
210bool
211fwprop_propagation::check_mem (int old_num_changes, rtx mem)
a52b023a 212{
0b76990a
RS
213 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
214 MEM_ADDR_SPACE (mem)))
460d667d 215 {
0b76990a 216 failure_reason = "would create an invalid MEM";
460d667d
PB
217 return false;
218 }
a52b023a 219
0b76990a
RS
220 temporarily_undo_changes (old_num_changes);
221 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
222 redo_changes (old_num_changes);
223 if (!can_simplify)
a52b023a 224 {
0b76990a
RS
225 failure_reason = "would replace a frame address";
226 return false;
a52b023a
PB
227 }
228
0b76990a
RS
229 /* Copy propagations are always ok. Otherwise check the costs. */
230 if (!(REG_P (from) && REG_P (to))
231 && !should_replace_address (old_num_changes, mem, insn))
a52b023a 232 {
0b76990a
RS
233 failure_reason = "would increase the cost of a MEM";
234 return false;
235 }
a52b023a 236
0b76990a
RS
237 result_flags |= CHANGED_MEM;
238 return true;
239}
a52b023a 240
0b76990a
RS
241/* OLDX has been simplified to NEWX. Describe the change in terms of
242 result_flags. */
a52b023a 243
0b76990a
RS
244uint16_t
245fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
246{
247 if (CONSTANT_P (new_rtx))
248 {
249 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
250 and NEW_RTX is likely not a legitimate address. We want it to
251 disappear if it is invalid.
252
253 ??? Using the mode of the LO_SUM as the mode of the address
254 seems odd, but it was what the pre-SSA code did. */
255 if (GET_CODE (old_rtx) == LO_SUM
256 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
257 return CONSTANT;
258 return CONSTANT | PROFITABLE;
a52b023a
PB
259 }
260
712a93d6
RB
261 /* Allow replacements that simplify operations on a vector or complex
262 value to a component. The most prominent case is
263 (subreg ([vec_]concat ...)). */
0b76990a
RS
264 if (REG_P (new_rtx)
265 && !HARD_REGISTER_P (new_rtx)
266 && (VECTOR_MODE_P (GET_MODE (from))
267 || COMPLEX_MODE_P (GET_MODE (from)))
268 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
269 return PROFITABLE;
712a93d6 270
efb6bc55
IL
271 /* Allow (subreg (mem)) -> (mem) simplifications with the following
272 exceptions:
273 1) Propagating (mem)s into multiple uses is not profitable.
274 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
275 runs less frequently.
276 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
277 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
278 ones. */
279 if (single_use_p
280 && single_ebb_p
281 && SUBREG_P (old_rtx)
282 && !paradoxical_subreg_p (old_rtx)
283 && MEM_P (new_rtx)
284 && !MEM_VOLATILE_P (new_rtx))
285 return PROFITABLE;
286
0b76990a 287 return 0;
a52b023a
PB
288}
289
0b76990a
RS
290/* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
291 is the number of unrelated changes that had been made before processing
292 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
293 had at that point. */
460d667d 294
0b76990a
RS
295void
296fwprop_propagation::note_simplification (int old_num_changes,
297 uint16_t old_result_flags,
298 rtx old_rtx, rtx new_rtx)
299{
300 result_flags &= ~(CONSTANT | PROFITABLE);
301 uint16_t new_flags = classify_result (old_rtx, new_rtx);
302 if (old_num_changes)
303 new_flags &= old_result_flags;
304 result_flags |= new_flags;
305}
460d667d 306
0b76990a
RS
307/* Return true if all substitutions eventually folded to constants. */
308
309bool
310fwprop_propagation::folded_to_constants_p () const
460d667d 311{
0b76990a
RS
312 /* If we're propagating a HIGH, require it to be folded with a
313 partnering LO_SUM. For example, a REG_EQUAL note with a register
314 replaced by an unfolded HIGH is not useful. */
315 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
316 return true;
317 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
460d667d
PB
318}
319
320
0b76990a
RS
321/* Return true if it is worth keeping the result of the propagation,
322 false if it would increase the complexity of the pattern too much. */
a52b023a 323
0b76990a
RS
324bool
325fwprop_propagation::profitable_p () const
a52b023a 326{
0b76990a
RS
327 if (changed_mem_p ())
328 return true;
a52b023a 329
0b76990a
RS
330 if (!(result_flags & UNSIMPLIFIED)
331 && (result_flags & PROFITABLE))
332 return true;
a52b023a 333
0b76990a
RS
334 if (REG_P (to))
335 return true;
a52b023a 336
0b76990a
RS
337 if (GET_CODE (to) == SUBREG
338 && REG_P (SUBREG_REG (to))
339 && !paradoxical_subreg_p (to))
340 return true;
a52b023a 341
0b76990a
RS
342 if (CONSTANT_P (to))
343 return true;
a52b023a 344
0b76990a
RS
345 return false;
346}
a52b023a 347
0b76990a 348/* Check that X has a single def. */
a52b023a 349
6fb5fa3c 350static bool
0b76990a 351reg_single_def_p (rtx x)
a52b023a 352{
0b76990a
RS
353 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
354}
a52b023a 355
0b76990a 356/* Return true if X contains a paradoxical subreg. */
a52b023a 357
0b76990a
RS
358static bool
359contains_paradoxical_subreg_p (rtx x)
360{
361 subrtx_var_iterator::array_type array;
362 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
363 {
364 x = *iter;
365 if (SUBREG_P (x) && paradoxical_subreg_p (x))
366 return true;
a52b023a
PB
367 }
368 return false;
369}
370
b61461ac
IL
371/* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
372 USE_INSN. Return the number of substitutions on success, otherwise return
373 -1 and leave USE_INSN unchanged.
a52b023a 374
b61461ac 375 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
0b76990a
RS
376 to fold to a constant, so that the note does not use any more registers
377 than it did previously. If REQUIRE_CONSTANT is false, also allow the
378 substitution if it's something we'd normally allow for the main
379 instruction pattern. */
692e7e54 380
0b76990a 381static int
b61461ac 382try_fwprop_subst_note (insn_info *use_insn, set_info *def,
0b76990a 383 rtx note, rtx dest, rtx src, bool require_constant)
a52b023a 384{
0b76990a 385 rtx_insn *use_rtl = use_insn->rtl ();
b61461ac 386 insn_info *def_insn = def->insn ();
a52b023a 387
0b76990a 388 insn_change_watermark watermark;
b61461ac 389 fwprop_propagation prop (use_insn, def, dest, src);
0b76990a 390 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
a52b023a 391 {
0b76990a
RS
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, "cannot propagate from insn %d into"
394 " notes of insn %d: %s\n", def_insn->uid (),
395 use_insn->uid (), prop.failure_reason);
396 return -1;
a52b023a
PB
397 }
398
0b76990a
RS
399 if (prop.num_replacements == 0)
400 return 0;
a52b023a 401
0b76990a 402 if (require_constant)
a52b023a 403 {
0b76990a
RS
404 if (!prop.folded_to_constants_p ())
405 {
406 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, "cannot propagate from insn %d into"
408 " notes of insn %d: %s\n", def_insn->uid (),
409 use_insn->uid (), "wouldn't fold to constants");
410 return -1;
411 }
a52b023a
PB
412 }
413 else
414 {
0b76990a 415 if (!prop.folded_to_constants_p () && !prop.profitable_p ())
6fb5fa3c 416 {
0b76990a
RS
417 if (dump_file && (dump_flags & TDF_DETAILS))
418 fprintf (dump_file, "cannot propagate from insn %d into"
419 " notes of insn %d: %s\n", def_insn->uid (),
420 use_insn->uid (), "would increase complexity of node");
421 return -1;
6fb5fa3c 422 }
a52b023a
PB
423 }
424
0b76990a
RS
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 {
427 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
428 INSN_UID (use_rtl));
429 temporarily_undo_changes (0);
430 print_inline_rtx (dump_file, note, 2);
431 redo_changes (0);
432 fprintf (dump_file, "\n with:\n ");
433 print_inline_rtx (dump_file, note, 2);
434 fprintf (dump_file, "\n");
435 }
436 watermark.keep ();
437 return prop.num_replacements;
a52b023a
PB
438}
439
b61461ac 440/* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
0b76990a
RS
441 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
442 unchanged. */
a52b023a 443
0b76990a
RS
444static bool
445try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
b61461ac 446 set_info *def, rtx *loc, rtx dest, rtx src)
a52b023a 447{
0b76990a
RS
448 insn_info *use_insn = use_change.insn ();
449 rtx_insn *use_rtl = use_insn->rtl ();
b61461ac 450 insn_info *def_insn = def->insn ();
a52b023a 451
0b76990a 452 insn_change_watermark watermark;
b61461ac 453 fwprop_propagation prop (use_insn, def, dest, src);
0b76990a
RS
454 if (!prop.apply_to_pattern (loc))
455 {
456 if (dump_file && (dump_flags & TDF_DETAILS))
457 fprintf (dump_file, "cannot propagate from insn %d into"
458 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
459 prop.failure_reason);
460 return false;
dc007c1f 461 }
a52b023a 462
0b76990a
RS
463 if (prop.num_replacements == 0)
464 return false;
a52b023a 465
0b76990a
RS
466 if (!prop.profitable_p ())
467 {
468 if (dump_file && (dump_flags & TDF_DETAILS))
469 fprintf (dump_file, "cannot propagate from insn %d into"
470 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
471 "would increase complexity of pattern");
472 return false;
473 }
a52b023a 474
0b76990a
RS
475 if (dump_file && (dump_flags & TDF_DETAILS))
476 {
477 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
478 def_insn->uid (), use_insn->uid ());
479 temporarily_undo_changes (0);
480 print_rtl_single (dump_file, PATTERN (use_rtl));
481 redo_changes (0);
482 }
a52b023a 483
0b76990a
RS
484 /* ??? In theory, it should be better to use insn costs rather than
485 set_src_costs here. That would involve replacing this code with
486 change_is_worthwhile. */
487 bool ok = recog (attempt, use_change);
488 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
489 if (rtx use_set = single_set (use_rtl))
490 {
491 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
492 temporarily_undo_changes (0);
493 auto old_cost = set_src_cost (SET_SRC (use_set),
494 GET_MODE (SET_DEST (use_set)), speed);
495 redo_changes (0);
496 auto new_cost = set_src_cost (SET_SRC (use_set),
497 GET_MODE (SET_DEST (use_set)), speed);
498 if (new_cost > old_cost)
499 {
500 if (dump_file)
501 fprintf (dump_file, "change not profitable"
502 " (cost %d -> cost %d)\n", old_cost, new_cost);
503 ok = false;
504 }
505 }
a52b023a 506
0b76990a 507 if (!ok)
a52b023a 508 {
0b76990a
RS
509 /* The pattern didn't match, but if all uses of SRC folded to
510 constants, we can add a REG_EQUAL note for the result, if there
511 isn't one already. */
512 if (!prop.folded_to_constants_p ())
513 return false;
a52b023a 514
0b76990a
RS
515 /* Test this first to avoid creating an unnecessary copy of SRC. */
516 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
517 return false;
a52b023a 518
0b76990a
RS
519 rtx set = set_for_reg_notes (use_rtl);
520 if (!set || !REG_P (SET_DEST (set)))
521 return false;
a52b023a 522
0b76990a
RS
523 rtx value = copy_rtx (SET_SRC (set));
524 cancel_changes (0);
dc007c1f 525
0b76990a
RS
526 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
527 because the bits in there can be anything and so might not
528 match the REG_EQUAL note content. See PR70574. */
529 if (contains_paradoxical_subreg_p (SET_SRC (set)))
530 return false;
dc007c1f 531
0b76990a
RS
532 if (dump_file && (dump_flags & TDF_DETAILS))
533 fprintf (dump_file, " Setting REG_EQUAL note\n");
dc007c1f 534
0b76990a 535 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
dc007c1f 536 }
0b76990a
RS
537
538 rtx *note_ptr = &REG_NOTES (use_rtl);
539 while (rtx note = *note_ptr)
dc007c1f 540 {
0b76990a
RS
541 if ((REG_NOTE_KIND (note) == REG_EQUAL
542 || REG_NOTE_KIND (note) == REG_EQUIV)
b61461ac 543 && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
0b76990a
RS
544 {
545 *note_ptr = XEXP (note, 1);
546 free_EXPR_LIST_node (note);
547 }
548 else
549 note_ptr = &XEXP (note, 1);
a52b023a 550 }
dc007c1f 551
0b76990a
RS
552 confirm_change_group ();
553 crtl->ssa->change_insn (use_change);
554 num_changes++;
555 return true;
a52b023a
PB
556}
557
b61461ac 558/* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
0b76990a
RS
559 given that it was not possible to do this for USE_INSN's main pattern.
560 Return true on success, otherwise leave USE_INSN unchanged. */
a52b023a
PB
561
562static bool
b61461ac 563try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
0b76990a
RS
564 rtx dest, rtx src)
565{
566 rtx_insn *use_rtl = use_insn->rtl ();
567 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
568 if ((REG_NOTE_KIND (note) == REG_EQUAL
569 || REG_NOTE_KIND (note) == REG_EQUIV)
b61461ac 570 && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
0b76990a
RS
571 {
572 confirm_change_group ();
573 return true;
574 }
a52b023a 575
0b76990a
RS
576 return false;
577}
dc007c1f 578
b61461ac 579/* Check whether we could validly substitute (set DEST SRC), which defines DEF,
0b76990a
RS
580 into USE. If so, first try performing the substitution in location LOC
581 of USE->insn ()'s pattern. If that fails, try instead to substitute
582 into the notes.
a52b023a 583
0b76990a 584 Return true on success, otherwise leave USE_INSN unchanged. */
de266950 585
0b76990a 586static bool
b61461ac 587try_fwprop_subst (use_info *use, set_info *def,
0b76990a
RS
588 rtx *loc, rtx dest, rtx src)
589{
590 insn_info *use_insn = use->insn ();
b61461ac 591 insn_info *def_insn = def->insn ();
de266950 592
0b76990a
RS
593 auto attempt = crtl->ssa->new_change_attempt ();
594 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
595
596 /* ??? Not really a meaningful test: it means we can propagate arithmetic
597 involving hard registers but not bare references to them. A better
598 test would be to iterate over src_uses looking for hard registers
599 that are not fixed. */
600 if (REG_P (src) && HARD_REGISTER_P (src))
601 return false;
de266950 602
0b76990a
RS
603 /* ??? It would be better to make this EBB-based instead. That would
604 involve checking for equal EBBs rather than equal BBs and trying
605 to make the uses available at use_insn->ebb ()->first_bb (). */
606 if (def_insn->bb () != use_insn->bb ())
de266950 607 {
0b76990a 608 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
c97351c0
RS
609 use_insn->bb (),
610 use_insn->is_debug_insn ());
0b76990a
RS
611 if (!src_uses.is_valid ())
612 return false;
a52b023a 613 }
a52b023a 614
0b76990a
RS
615 insn_change use_change (use_insn);
616 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
617 src_uses);
618 if (!use_change.new_uses.is_valid ())
619 return false;
1a13c0a2 620
0b76990a 621 /* ??? We could allow movement within the EBB by adding:
de266950 622
0b76990a
RS
623 use_change.move_range = use_insn->ebb ()->insn_range (); */
624 if (!restrict_movement (use_change))
625 return false;
dc007c1f 626
b61461ac
IL
627 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
628 || try_fwprop_subst_notes (use_insn, def, dest, src));
a52b023a
PB
629}
630
e85122be
AM
631/* For the given single_set INSN, containing SRC known to be a
632 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
633 is redundant due to the register being set by a LOAD_EXTEND_OP
634 load from memory. */
635
636static bool
0b76990a 637free_load_extend (rtx src, insn_info *insn)
e25b7843 638{
0b76990a 639 rtx reg = XEXP (src, 0);
3712c7a3 640 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
e85122be 641 return false;
e25b7843 642
0b76990a
RS
643 def_info *def = nullptr;
644 for (use_info *use : insn->uses ())
645 if (use->regno () == REGNO (reg))
646 {
647 def = use->def ();
648 break;
649 }
e85122be 650
e85122be
AM
651 if (!def)
652 return false;
653
0b76990a
RS
654 insn_info *def_insn = def->insn ();
655 if (def_insn->is_artificial ())
e85122be
AM
656 return false;
657
0b76990a
RS
658 rtx_insn *def_rtl = def_insn->rtl ();
659 if (NONJUMP_INSN_P (def_rtl))
e85122be 660 {
0b76990a 661 rtx patt = PATTERN (def_rtl);
e85122be
AM
662
663 if (GET_CODE (patt) == SET
664 && GET_CODE (SET_SRC (patt)) == MEM
665 && rtx_equal_p (SET_DEST (patt), reg))
666 return true;
e25b7843 667 }
e85122be 668 return false;
e25b7843 669}
e25b7843 670
0b76990a
RS
671/* Subroutine of forward_propagate_subreg that handles a use of DEST
672 in REF. The other parameters are the same. */
a52b023a
PB
673
674static bool
b61461ac 675forward_propagate_subreg (use_info *use, set_info *def,
0b76990a 676 rtx dest, rtx src, df_ref ref)
a52b023a 677{
6b9c3dec 678 scalar_int_mode int_use_mode, src_mode;
a52b023a 679
e25b7843 680 /* Only consider subregs... */
0b76990a 681 rtx use_reg = DF_REF_REG (ref);
ef4bddc2 682 machine_mode use_mode = GET_MODE (use_reg);
a52b023a 683 if (GET_CODE (use_reg) != SUBREG
0b76990a 684 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
a52b023a
PB
685 return false;
686
0b76990a
RS
687 /* ??? Replacing throughout the pattern would help for match_dups. */
688 rtx *loc = DF_REF_LOC (ref);
03a95621 689 if (paradoxical_subreg_p (use_reg))
e25b7843
AM
690 {
691 /* If this is a paradoxical SUBREG, we have no idea what value the
692 extra bits would have. However, if the operand is equivalent to
693 a SUBREG whose operand is the same as our mode, and all the modes
694 are within a word, we can just use the inner operand because
695 these SUBREGs just say how to treat the register. */
e25b7843
AM
696 if (GET_CODE (src) == SUBREG
697 && REG_P (SUBREG_REG (src))
509a31f8 698 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
e25b7843 699 && GET_MODE (SUBREG_REG (src)) == use_mode
0b76990a 700 && subreg_lowpart_p (src))
b61461ac 701 return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
e25b7843
AM
702 }
703
704 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
705 is the low part of the reg being extended then just use the inner
706 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
7a613929
RS
707 be removed due to it matching a LOAD_EXTEND_OP load from memory,
708 or due to the operation being a no-op when applied to registers.
709 For example, if we have:
710
711 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
712 B: (... (subreg:SI (reg:DI X)) ...)
713
714 and mode_rep_extended says that Y is already sign-extended,
715 the backend will typically allow A to be combined with the
716 definition of Y or, failing that, allow A to be deleted after
717 reload through register tying. Introducing more uses of Y
718 prevents both optimisations. */
6b9c3dec
RS
719 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
720 && subreg_lowpart_p (use_reg))
e25b7843 721 {
e25b7843
AM
722 if ((GET_CODE (src) == ZERO_EXTEND
723 || GET_CODE (src) == SIGN_EXTEND)
6b9c3dec 724 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
e25b7843 725 && REG_P (XEXP (src, 0))
509a31f8 726 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
e25b7843 727 && GET_MODE (XEXP (src, 0)) == use_mode
b61461ac 728 && !free_load_extend (src, def->insn ())
6b9c3dec 729 && (targetm.mode_rep_extended (int_use_mode, src_mode)
0b76990a 730 != (int) GET_CODE (src)))
b61461ac 731 return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
e25b7843
AM
732 }
733
734 return false;
a52b023a
PB
735}
736
b61461ac 737/* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
0b76990a
RS
738 the result, handling cases where DEST is used in a subreg and where
739 applying that subreg to SRC results in a useful simplification. */
ce372372
JJ
740
741static bool
b61461ac 742forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
ce372372 743{
0b76990a
RS
744 if (!use->includes_subregs () || !REG_P (dest))
745 return false;
ce372372 746
0b76990a
RS
747 if (GET_CODE (src) != SUBREG
748 && GET_CODE (src) != ZERO_EXTEND
749 && GET_CODE (src) != SIGN_EXTEND)
ce372372
JJ
750 return false;
751
0b76990a
RS
752 rtx_insn *use_rtl = use->insn ()->rtl ();
753 df_ref ref;
ce372372 754
0b76990a
RS
755 FOR_EACH_INSN_USE (ref, use_rtl)
756 if (DF_REF_REGNO (ref) == use->regno ()
b61461ac 757 && forward_propagate_subreg (use, def, dest, src, ref))
0b76990a 758 return true;
ce372372 759
0b76990a
RS
760 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
761 if (DF_REF_REGNO (ref) == use->regno ()
b61461ac 762 && forward_propagate_subreg (use, def, dest, src, ref))
0b76990a 763 return true;
ce372372 764
0b76990a 765 return false;
ce372372
JJ
766}
767
b61461ac 768/* Try to substitute (set DEST SRC), which defines DEF, into USE and
0b76990a 769 simplify the result. */
a52b023a
PB
770
771static bool
b61461ac 772forward_propagate_and_simplify (use_info *use, set_info *def,
0b76990a
RS
773 rtx dest, rtx src)
774{
775 insn_info *use_insn = use->insn ();
776 rtx_insn *use_rtl = use_insn->rtl ();
b61461ac 777 insn_info *def_insn = def->insn ();
0b76990a
RS
778
779 /* ??? This check seems unnecessary. We should be able to propagate
780 into any kind of instruction, regardless of whether it's a single set.
781 It seems odd to be more permissive with asms than normal instructions. */
782 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
783 rtx use_set = single_set (use_rtl);
784 if (need_single_set && !use_set)
a52b023a
PB
785 return false;
786
bd1cd0d0 787 /* Do not propagate into PC etc.
a52b023a 788
0b76990a
RS
789 ??? This too seems unnecessary. The current code should work correctly
790 without it, including cases where jumps become unconditional. */
791 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
8e8af9b7
RS
792 return false;
793
0b76990a
RS
794 /* In __asm don't replace if src might need more registers than
795 reg, as that could increase register pressure on the __asm. */
796 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
a52b023a
PB
797 return false;
798
799 /* Check if the def is loading something from the constant pool; in this
800 case we would undo optimization such as compress_float_constant.
801 Still, we can set a REG_EQUAL note. */
802 if (MEM_P (src) && MEM_READONLY_P (src))
803 {
804 rtx x = avoid_constant_pool_reference (src);
0b76990a
RS
805 rtx note_set;
806 if (x != src
807 && (note_set = set_for_reg_notes (use_rtl))
808 && REG_P (SET_DEST (note_set))
809 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
a52b023a 810 {
0b76990a
RS
811 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
812 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
60564289
KG
813 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
814 if (old_rtx != new_rtx)
0b76990a 815 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
a52b023a
PB
816 }
817 return false;
818 }
819
0b76990a
RS
820 /* ??? Unconditionally propagating into PATTERN would work better
821 for instructions that have match_dups. */
822 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
b61461ac 823 return try_fwprop_subst (use, def, loc, dest, src);
a52b023a
PB
824}
825
a52b023a 826/* Given a use USE of an insn, if it has a single reaching
7360d2ac 827 definition, try to forward propagate it into that insn.
0b76990a
RS
828 Return true if something changed.
829
75da268e 830 REG_PROP_ONLY is true if we should only propagate register copies. */
a52b023a 831
7360d2ac 832static bool
0b76990a 833forward_propagate_into (use_info *use, bool reg_prop_only = false)
a52b023a 834{
0b76990a 835 if (use->includes_read_writes ())
7360d2ac 836 return false;
a52b023a 837
0b76990a 838 /* Disregard uninitialized uses. */
b61461ac 839 set_info *def = use->def ();
00952e97 840 if (!def)
7360d2ac 841 return false;
a52b023a 842
0b76990a
RS
843 /* Only consider single-register definitions. This could be relaxed,
844 but it should rarely be needed before RA. */
845 def = look_through_degenerate_phi (def);
846 if (def->includes_multiregs ())
847 return false;
a52b023a 848
0b76990a
RS
849 /* Only consider uses whose definition comes from a real instruction. */
850 insn_info *def_insn = def->insn ();
851 if (def_insn->is_artificial ())
7360d2ac 852 return false;
a52b023a 853
0b76990a
RS
854 rtx_insn *def_rtl = def_insn->rtl ();
855 if (!NONJUMP_INSN_P (def_rtl))
856 return false;
857 /* ??? This seems an unnecessary restriction. We can easily tell
858 which set the definition comes from. */
859 if (multiple_sets (def_rtl))
7360d2ac 860 return false;
0b76990a 861 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
a52b023a 862 if (!def_set)
7360d2ac 863 return false;
a52b023a 864
0b76990a
RS
865 rtx dest = SET_DEST (def_set);
866 rtx src = SET_SRC (def_set);
75da268e
PK
867
868 /* Allow propagations into a loop only for reg-to-reg copies, since
869 replacing one register by another shouldn't increase the cost. */
0b76990a
RS
870 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
871 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
872 if ((reg_prop_only || def_loop != use_loop)
873 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
874 return false;
75da268e 875
0b76990a
RS
876 /* Don't substitute into a non-local goto, this confuses CFG. */
877 insn_info *use_insn = use->insn ();
878 rtx_insn *use_rtl = use_insn->rtl ();
879 if (JUMP_P (use_rtl)
880 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
75da268e
PK
881 return false;
882
b61461ac
IL
883 if (forward_propagate_and_simplify (use, def, dest, src)
884 || forward_propagate_subreg (use, def, dest, src))
0b76990a 885 return true;
a5a9046d 886
7360d2ac 887 return false;
a52b023a 888}
a52b023a
PB
889\f
890static void
891fwprop_init (void)
892{
893 num_changes = 0;
6e0b633f 894 calculate_dominance_info (CDI_DOMINATORS);
a52b023a
PB
895
896 /* We do not always want to propagate into loops, so we have to find
c5f4be84
SB
897 loops and be careful about them. Avoid CFG modifications so that
898 we don't have to update dominance information afterwards for
899 build_single_def_use_links. */
900 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
a52b023a 901
0b76990a
RS
902 df_analyze ();
903 crtl->ssa = new rtl_ssa::function_info (cfun);
a52b023a
PB
904}
905
906static void
907fwprop_done (void)
908{
fb406162 909 loop_optimizer_finalize ();
6fb5fa3c 910
0b76990a 911 crtl->ssa->perform_pending_updates ();
6e0b633f 912 free_dominance_info (CDI_DOMINATORS);
a52b023a 913 cleanup_cfg (0);
0b76990a
RS
914
915 delete crtl->ssa;
916 crtl->ssa = nullptr;
917
a52b023a
PB
918 delete_trivially_dead_insns (get_insns (), max_reg_num ());
919
920 if (dump_file)
921 fprintf (dump_file,
922 "\nNumber of successful forward propagations: %d\n\n",
923 num_changes);
924}
925
0b76990a
RS
926/* Try to optimize INSN, returning true if something changes.
927 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
928 the full fwprop. */
929
930static bool
931fwprop_insn (insn_info *insn, bool fwprop_addr_p)
932{
933 for (use_info *use : insn->uses ())
934 {
935 if (use->is_mem ())
936 continue;
937 /* ??? The choices here follow those in the pre-SSA code. */
938 if (!use->includes_address_uses ())
939 {
940 if (forward_propagate_into (use, fwprop_addr_p))
941 return true;
942 }
943 else
944 {
945 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
946 /* The outermost loop is not really a loop. */
947 if (loop == NULL || loop_outer (loop) == NULL)
948 {
949 if (forward_propagate_into (use, fwprop_addr_p))
950 return true;
951 }
952 else if (fwprop_addr_p)
953 {
954 if (forward_propagate_into (use, false))
955 return true;
956 }
957 }
958 }
959 return false;
960}
a52b023a 961
a52b023a
PB
962/* Main entry point. */
963
964static bool
965gate_fwprop (void)
966{
967 return optimize > 0 && flag_forward_propagate;
968}
969
970static unsigned int
75da268e 971fwprop (bool fwprop_addr_p)
a52b023a 972{
a52b023a
PB
973 fwprop_init ();
974
0b76990a
RS
975 /* Go through all the instructions (including debug instructions) looking
976 for uses that we could propagate into.
a52b023a
PB
977
978 Do not forward propagate addresses into loops until after unrolling.
979 CSE did so because it was able to fix its own mess, but we are not. */
980
0b76990a 981 insn_info *next;
75da268e 982
0b76990a
RS
983 /* ??? This code uses a worklist in order to preserve the behavior
984 of the pre-SSA implementation. It would be better to instead
985 iterate on each instruction until no more propagations are
986 possible, then move on to the next. */
987 auto_vec<insn_info *> worklist;
988 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
989 {
990 next = insn->next_any_insn ();
991 if (insn->can_be_optimized () || insn->is_debug_insn ())
992 if (fwprop_insn (insn, fwprop_addr_p))
993 worklist.safe_push (insn);
994 }
995 for (unsigned int i = 0; i < worklist.length (); ++i)
996 {
997 insn_info *insn = worklist[i];
998 if (fwprop_insn (insn, fwprop_addr_p))
999 worklist.safe_push (insn);
a52b023a
PB
1000 }
1001
1002 fwprop_done ();
a52b023a
PB
1003 return 0;
1004}
1005
27a4cd48
DM
1006namespace {
1007
1008const pass_data pass_data_rtl_fwprop =
a52b023a 1009{
27a4cd48
DM
1010 RTL_PASS, /* type */
1011 "fwprop1", /* name */
1012 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1013 TV_FWPROP, /* tv_id */
1014 0, /* properties_required */
1015 0, /* properties_provided */
1016 0, /* properties_destroyed */
1017 0, /* todo_flags_start */
3bea341f 1018 TODO_df_finish, /* todo_flags_finish */
a52b023a
PB
1019};
1020
27a4cd48
DM
1021class pass_rtl_fwprop : public rtl_opt_pass
1022{
1023public:
c3284718
RS
1024 pass_rtl_fwprop (gcc::context *ctxt)
1025 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
27a4cd48
DM
1026 {}
1027
1028 /* opt_pass methods: */
1a3d085c 1029 virtual bool gate (function *) { return gate_fwprop (); }
75da268e 1030 virtual unsigned int execute (function *) { return fwprop (false); }
27a4cd48
DM
1031
1032}; // class pass_rtl_fwprop
1033
1034} // anon namespace
1035
1036rtl_opt_pass *
1037make_pass_rtl_fwprop (gcc::context *ctxt)
1038{
1039 return new pass_rtl_fwprop (ctxt);
1040}
1041
27a4cd48
DM
1042namespace {
1043
1044const pass_data pass_data_rtl_fwprop_addr =
a52b023a 1045{
27a4cd48
DM
1046 RTL_PASS, /* type */
1047 "fwprop2", /* name */
1048 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1049 TV_FWPROP, /* tv_id */
1050 0, /* properties_required */
1051 0, /* properties_provided */
1052 0, /* properties_destroyed */
1053 0, /* todo_flags_start */
3bea341f 1054 TODO_df_finish, /* todo_flags_finish */
a52b023a 1055};
27a4cd48
DM
1056
1057class pass_rtl_fwprop_addr : public rtl_opt_pass
1058{
1059public:
c3284718
RS
1060 pass_rtl_fwprop_addr (gcc::context *ctxt)
1061 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
27a4cd48
DM
1062 {}
1063
1064 /* opt_pass methods: */
1a3d085c 1065 virtual bool gate (function *) { return gate_fwprop (); }
75da268e 1066 virtual unsigned int execute (function *) { return fwprop (true); }
27a4cd48
DM
1067
1068}; // class pass_rtl_fwprop_addr
1069
1070} // anon namespace
1071
1072rtl_opt_pass *
1073make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1074{
1075 return new pass_rtl_fwprop_addr (ctxt);
1076}