gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
   3    Contributed by Paolo Bonzini and Steven Bosscher.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #define INCLUDE_ALGORITHM
  22 #define INCLUDE_FUNCTIONAL
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "rtl.h"
  28 #include "df.h"
  29 #include "rtl-ssa.h"
  30
  31 #include "predict.h"
  32 #include "cfgrtl.h"
  33 #include "cfgcleanup.h"
  34 #include "cfgloop.h"
  35 #include "tree-pass.h"
  36 #include "rtl-iter.h"
  37 #include "target.h"
  38
  39 /* This pass does simple forward propagation and simplification when an
  40    operand of an insn can only come from a single def.  This pass uses
  41    RTL SSA, so it is global.  However, we only do limited analysis of
  42    available expressions.
  43
  44    1) The pass tries to propagate the source of the def into the use,
  45    and checks if the result is independent of the substituted value.
  46    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  47    zero, independent of the source register.
  48
  49    In particular, we propagate constants into the use site.  Sometimes
  50    RTL expansion did not put the constant in the same insn on purpose,
  51    to satisfy a predicate, and the result will fail to be recognized;
  52    but this happens rarely and in this case we can still create a
  53    REG_EQUAL note.  For multi-word operations, this
  54
  55       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  56       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  57       (set (subreg:SI (reg:DI 122) 0)
  58          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  59       (set (subreg:SI (reg:DI 122) 4)
  60          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  61
  62    can be simplified to the much simpler
  63
  64       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  65       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  66
  67    This particular propagation is also effective at putting together
  68    complex addressing modes.  We are more aggressive inside MEMs, in
  69    that all definitions are propagated if the use is in a MEM; if the
  70    result is a valid memory address we check address_cost to decide
  71    whether the substitution is worthwhile.
  72
  73    2) The pass propagates register copies.  This is not as effective as
  74    the copy propagation done by CSE's canon_reg, which works by walking
  75    the instruction chain, it can help the other transformations.
  76
  77    We should consider removing this optimization, and instead reorder the
  78    RTL passes, because GCSE does this transformation too.  With some luck,
  79    the CSE pass at the end of rest_of_handle_gcse could also go away.
  80
  81    3) The pass looks for paradoxical subregs that are actually unnecessary.
  82    Things like this:
  83
  84      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  85      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  86      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  87                                 (subreg:SI (reg:QI 121) 0)))
  88
  89    are very common on machines that can only do word-sized operations.
  90    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  91    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  92    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  93    above will simplify this to
  94
  95      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  96      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  97      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
  98
  99    where the first two insns are now dead.  */
 100
 101 using namespace rtl_ssa;
 102
 103 static int num_changes;
 104
 105 /* Do not try to replace constant addresses or addresses of local and
 106    argument slots.  These MEM expressions are made only once and inserted
 107    in many instructions, as well as being used to control symbol table
 108    output.  It is not safe to clobber them.
 109
 110    There are some uncommon cases where the address is already in a register
 111    for some reason, but we cannot take advantage of that because we have
 112    no easy way to unshare the MEM.  In addition, looking up all stack
 113    addresses is costly.  */
 114
 115 static bool
 116 can_simplify_addr (rtx addr)
 117 {
 118   rtx reg;
 119
 120   if (CONSTANT_ADDRESS_P (addr))
 121     return false;
 122
 123   if (GET_CODE (addr) == PLUS)
 124     reg = XEXP (addr, 0);
 125   else
 126     reg = addr;
 127
 128   return (!REG_P (reg)
 129           || (REGNO (reg) != FRAME_POINTER_REGNUM
 130               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 131               && REGNO (reg) != ARG_POINTER_REGNUM));
 132 }
 133
 134 /* MEM is the result of an address simplification, and temporarily
 135    undoing changes OLD_NUM_CHANGES onwards restores the original address.
 136    Return whether it is good to use the new address instead of the
 137    old one.  INSN is the containing instruction.  */
 138
 139 static bool
 140 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
 141 {
 142   int gain;
 143
 144   /* Prefer the new address if it is less expensive.  */
 145   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 146   temporarily_undo_changes (old_num_changes);
 147   gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
 148                        MEM_ADDR_SPACE (mem), speed);
 149   redo_changes (old_num_changes);
 150   gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
 151                         MEM_ADDR_SPACE (mem), speed);
 152
 153   /* If the addresses have equivalent cost, prefer the new address
 154      if it has the highest `set_src_cost'.  That has the potential of
 155      eliminating the most insns without additional costs, and it
 156      is the same that cse.c used to do.  */
 157   if (gain == 0)
 158     {
 159       gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
 160       temporarily_undo_changes (old_num_changes);
 161       gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
 162       redo_changes (old_num_changes);
 163     }
 164
 165   return (gain > 0);
 166 }
 167
 168
 169 namespace
 170 {
 171   class fwprop_propagation : public insn_propagation
 172   {
 173   public:
 174     static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
 175     static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
 176     static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
 177
 178     fwprop_propagation (insn_info *, set_info *, rtx, rtx);
 179
 180     bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
 181     bool folded_to_constants_p () const;
 182     bool profitable_p () const;
 183
 184     bool check_mem (int, rtx) final override;
 185     void note_simplification (int, uint16_t, rtx, rtx) final override;
 186     uint16_t classify_result (rtx, rtx);
 187
 188   private:
 189     const bool single_use_p;
 190     const bool single_ebb_p;
 191   };
 192 }
 193
 194 /* Prepare to replace FROM with TO in USE_INSN.  */
 195
 196 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
 197                                         set_info *def, rtx from, rtx to)
 198   : insn_propagation (use_insn->rtl (), from, to),
 199     single_use_p (def->single_nondebug_use ()),
 200     single_ebb_p (use_insn->ebb () == def->ebb ())
 201 {
 202   should_check_mems = true;
 203   should_note_simplifications = true;
 204 }
 205
 206 /* MEM is the result of an address simplification, and temporarily
 207    undoing changes OLD_NUM_CHANGES onwards restores the original address.
 208    Return true if the propagation should continue, false if it has failed.  */
 209
 210 bool
 211 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
 212 {
 213   if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
 214                                     MEM_ADDR_SPACE (mem)))
 215     {
 216       failure_reason = "would create an invalid MEM";
 217       return false;
 218     }
 219
 220   temporarily_undo_changes (old_num_changes);
 221   bool can_simplify = can_simplify_addr (XEXP (mem, 0));
 222   redo_changes (old_num_changes);
 223   if (!can_simplify)
 224     {
 225       failure_reason = "would replace a frame address";
 226       return false;
 227     }
 228
 229   /* Copy propagations are always ok.  Otherwise check the costs.  */
 230   if (!(REG_P (from) && REG_P (to))
 231       && !should_replace_address (old_num_changes, mem, insn))
 232     {
 233       failure_reason = "would increase the cost of a MEM";
 234       return false;
 235     }
 236
 237   result_flags |= CHANGED_MEM;
 238   return true;
 239 }
 240
 241 /* OLDX has been simplified to NEWX.  Describe the change in terms of
 242    result_flags.  */
 243
 244 uint16_t
 245 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
 246 {
 247   if (CONSTANT_P (new_rtx))
 248     {
 249       /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
 250          and NEW_RTX is likely not a legitimate address.  We want it to
 251          disappear if it is invalid.
 252
 253          ??? Using the mode of the LO_SUM as the mode of the address
 254          seems odd, but it was what the pre-SSA code did.  */
 255       if (GET_CODE (old_rtx) == LO_SUM
 256           && !memory_address_p (GET_MODE (old_rtx), new_rtx))
 257         return CONSTANT;
 258       return CONSTANT | PROFITABLE;
 259     }
 260
 261   /* Allow replacements that simplify operations on a vector or complex
 262      value to a component.  The most prominent case is
 263      (subreg ([vec_]concat ...)).   */
 264   if (REG_P (new_rtx)
 265       && !HARD_REGISTER_P (new_rtx)
 266       && (VECTOR_MODE_P (GET_MODE (from))
 267           || COMPLEX_MODE_P (GET_MODE (from)))
 268       && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
 269     return PROFITABLE;
 270
 271   /* Allow (subreg (mem)) -> (mem) simplifications with the following
 272      exceptions:
 273      1) Propagating (mem)s into multiple uses is not profitable.
 274      2) Propagating (mem)s across EBBs may not be profitable if the source EBB
 275         runs less frequently.
 276      3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
 277      4) Creating new (mem/v)s is not correct, since DCE will not remove the old
 278         ones.  */
 279   if (single_use_p
 280       && single_ebb_p
 281       && SUBREG_P (old_rtx)
 282       && !paradoxical_subreg_p (old_rtx)
 283       && MEM_P (new_rtx)
 284       && !MEM_VOLATILE_P (new_rtx))
 285     return PROFITABLE;
 286
 287   return 0;
 288 }
 289
 290 /* Record that OLD_RTX has been simplified to NEW_RTX.  OLD_NUM_CHANGES
 291    is the number of unrelated changes that had been made before processing
 292    OLD_RTX and its subrtxes.  OLD_RESULT_FLAGS is the value that result_flags
 293    had at that point.  */
 294
 295 void
 296 fwprop_propagation::note_simplification (int old_num_changes,
 297                                          uint16_t old_result_flags,
 298                                          rtx old_rtx, rtx new_rtx)
 299 {
 300   result_flags &= ~(CONSTANT | PROFITABLE);
 301   uint16_t new_flags = classify_result (old_rtx, new_rtx);
 302   if (old_num_changes)
 303     new_flags &= old_result_flags;
 304   result_flags |= new_flags;
 305 }
 306
 307 /* Return true if all substitutions eventually folded to constants.  */
 308
 309 bool
 310 fwprop_propagation::folded_to_constants_p () const
 311 {
 312   /* If we're propagating a HIGH, require it to be folded with a
 313      partnering LO_SUM.  For example, a REG_EQUAL note with a register
 314      replaced by an unfolded HIGH is not useful.  */
 315   if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
 316     return true;
 317   return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
 318 }
 319
 320
 321 /* Return true if it is worth keeping the result of the propagation,
 322    false if it would increase the complexity of the pattern too much.  */
 323
 324 bool
 325 fwprop_propagation::profitable_p () const
 326 {
 327   if (changed_mem_p ())
 328     return true;
 329
 330   if (!(result_flags & UNSIMPLIFIED)
 331       && (result_flags & PROFITABLE))
 332     return true;
 333
 334   if (REG_P (to))
 335     return true;
 336
 337   if (GET_CODE (to) == SUBREG
 338       && REG_P (SUBREG_REG (to))
 339       && !paradoxical_subreg_p (to))
 340     return true;
 341
 342   if (CONSTANT_P (to))
 343     return true;
 344
 345   return false;
 346 }
 347
 348 /* Check that X has a single def.  */
 349
 350 static bool
 351 reg_single_def_p (rtx x)
 352 {
 353   return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
 354 }
 355
 356 /* Return true if X contains a paradoxical subreg.  */
 357
 358 static bool
 359 contains_paradoxical_subreg_p (rtx x)
 360 {
 361   subrtx_var_iterator::array_type array;
 362   FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
 363     {
 364       x = *iter;
 365       if (SUBREG_P (x) && paradoxical_subreg_p (x))
 366         return true;
 367     }
 368   return false;
 369 }
 370
 371 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
 372    USE_INSN.  Return the number of substitutions on success, otherwise return
 373    -1 and leave USE_INSN unchanged.
 374
 375    If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
 376    to fold to a constant, so that the note does not use any more registers
 377    than it did previously.  If REQUIRE_CONSTANT is false, also allow the
 378    substitution if it's something we'd normally allow for the main
 379    instruction pattern.  */
 380
 381 static int
 382 try_fwprop_subst_note (insn_info *use_insn, set_info *def,
 383                        rtx note, rtx dest, rtx src, bool require_constant)
 384 {
 385   rtx_insn *use_rtl = use_insn->rtl ();
 386   insn_info *def_insn = def->insn ();
 387
 388   insn_change_watermark watermark;
 389   fwprop_propagation prop (use_insn, def, dest, src);
 390   if (!prop.apply_to_rvalue (&XEXP (note, 0)))
 391     {
 392       if (dump_file && (dump_flags & TDF_DETAILS))
 393         fprintf (dump_file, "cannot propagate from insn %d into"
 394                  " notes of insn %d: %s\n", def_insn->uid (),
 395                  use_insn->uid (), prop.failure_reason);
 396       return -1;
 397     }
 398
 399   if (prop.num_replacements == 0)
 400     return 0;
 401
 402   if (require_constant)
 403     {
 404       if (!prop.folded_to_constants_p ())
 405         {
 406           if (dump_file && (dump_flags & TDF_DETAILS))
 407             fprintf (dump_file, "cannot propagate from insn %d into"
 408                      " notes of insn %d: %s\n", def_insn->uid (),
 409                      use_insn->uid (), "wouldn't fold to constants");
 410           return -1;
 411         }
 412     }
 413   else
 414     {
 415       if (!prop.folded_to_constants_p () && !prop.profitable_p ())
 416         {
 417           if (dump_file && (dump_flags & TDF_DETAILS))
 418             fprintf (dump_file, "cannot propagate from insn %d into"
 419                      " notes of insn %d: %s\n", def_insn->uid (),
 420                      use_insn->uid (), "would increase complexity of node");
 421           return -1;
 422         }
 423     }
 424
 425   if (dump_file && (dump_flags & TDF_DETAILS))
 426     {
 427       fprintf (dump_file, "\nin notes of insn %d, replacing:\n  ",
 428                INSN_UID (use_rtl));
 429       temporarily_undo_changes (0);
 430       print_inline_rtx (dump_file, note, 2);
 431       redo_changes (0);
 432       fprintf (dump_file, "\n with:\n  ");
 433       print_inline_rtx (dump_file, note, 2);
 434       fprintf (dump_file, "\n");
 435     }
 436   watermark.keep ();
 437   return prop.num_replacements;
 438 }
 439
 440 /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
 441    USE_INSN's pattern.  Return true on success, otherwise leave USE_INSN
 442    unchanged.  */
 443
 444 static bool
 445 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
 446                           set_info *def, rtx *loc, rtx dest, rtx src)
 447 {
 448   insn_info *use_insn = use_change.insn ();
 449   rtx_insn *use_rtl = use_insn->rtl ();
 450   insn_info *def_insn = def->insn ();
 451
 452   insn_change_watermark watermark;
 453   fwprop_propagation prop (use_insn, def, dest, src);
 454   if (!prop.apply_to_pattern (loc))
 455     {
 456       if (dump_file && (dump_flags & TDF_DETAILS))
 457         fprintf (dump_file, "cannot propagate from insn %d into"
 458                  " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
 459                  prop.failure_reason);
 460       return false;
 461     }
 462
 463   if (prop.num_replacements == 0)
 464     return false;
 465
 466   if (!prop.profitable_p ())
 467     {
 468       if (dump_file && (dump_flags & TDF_DETAILS))
 469         fprintf (dump_file, "cannot propagate from insn %d into"
 470                  " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
 471                  "would increase complexity of pattern");
 472       return false;
 473     }
 474
 475   if (dump_file && (dump_flags & TDF_DETAILS))
 476     {
 477       fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
 478                def_insn->uid (), use_insn->uid ());
 479       temporarily_undo_changes (0);
 480       print_rtl_single (dump_file, PATTERN (use_rtl));
 481       redo_changes (0);
 482     }
 483
 484   /* ??? In theory, it should be better to use insn costs rather than
 485      set_src_costs here.  That would involve replacing this code with
 486      change_is_worthwhile.  */
 487   bool ok = recog (attempt, use_change);
 488   if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
 489     if (rtx use_set = single_set (use_rtl))
 490       {
 491         bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
 492         temporarily_undo_changes (0);
 493         auto old_cost = set_src_cost (SET_SRC (use_set),
 494                                       GET_MODE (SET_DEST (use_set)), speed);
 495         redo_changes (0);
 496         auto new_cost = set_src_cost (SET_SRC (use_set),
 497                                       GET_MODE (SET_DEST (use_set)), speed);
 498         if (new_cost > old_cost)
 499           {
 500             if (dump_file)
 501               fprintf (dump_file, "change not profitable"
 502                        " (cost %d -> cost %d)\n", old_cost, new_cost);
 503             ok = false;
 504           }
 505       }
 506
 507   if (!ok)
 508     {
 509       /* The pattern didn't match, but if all uses of SRC folded to
 510          constants, we can add a REG_EQUAL note for the result, if there
 511          isn't one already.  */
 512       if (!prop.folded_to_constants_p ())
 513         return false;
 514
 515       /* Test this first to avoid creating an unnecessary copy of SRC.  */
 516       if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
 517         return false;
 518
 519       rtx set = set_for_reg_notes (use_rtl);
 520       if (!set || !REG_P (SET_DEST (set)))
 521         return false;
 522
 523       rtx value = copy_rtx (SET_SRC (set));
 524       cancel_changes (0);
 525
 526       /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
 527          because the bits in there can be anything and so might not
 528          match the REG_EQUAL note content.  See PR70574.  */
 529       if (contains_paradoxical_subreg_p (SET_SRC (set)))
 530         return false;
 531
 532       if (dump_file && (dump_flags & TDF_DETAILS))
 533         fprintf (dump_file, " Setting REG_EQUAL note\n");
 534
 535       return set_unique_reg_note (use_rtl, REG_EQUAL, value);
 536     }
 537
 538   rtx *note_ptr = &REG_NOTES (use_rtl);
 539   while (rtx note = *note_ptr)
 540     {
 541       if ((REG_NOTE_KIND (note) == REG_EQUAL
 542            || REG_NOTE_KIND (note) == REG_EQUIV)
 543           && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
 544         {
 545           *note_ptr = XEXP (note, 1);
 546           free_EXPR_LIST_node (note);
 547         }
 548       else
 549         note_ptr = &XEXP (note, 1);
 550     }
 551
 552   confirm_change_group ();
 553   crtl->ssa->change_insn (use_change);
 554   num_changes++;
 555   return true;
 556 }
 557
 558 /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
 559    given that it was not possible to do this for USE_INSN's main pattern.
 560    Return true on success, otherwise leave USE_INSN unchanged.  */
 561
 562 static bool
 563 try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
 564                         rtx dest, rtx src)
 565 {
 566   rtx_insn *use_rtl = use_insn->rtl ();
 567   for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
 568     if ((REG_NOTE_KIND (note) == REG_EQUAL
 569          || REG_NOTE_KIND (note) == REG_EQUIV)
 570         && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
 571       {
 572         confirm_change_group ();
 573         return true;
 574       }
 575
 576   return false;
 577 }
 578
 579 /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
 580    into USE.  If so, first try performing the substitution in location LOC
 581    of USE->insn ()'s pattern.  If that fails, try instead to substitute
 582    into the notes.
 583
 584    Return true on success, otherwise leave USE_INSN unchanged.  */
 585
 586 static bool
 587 try_fwprop_subst (use_info *use, set_info *def,
 588                   rtx *loc, rtx dest, rtx src)
 589 {
 590   insn_info *use_insn = use->insn ();
 591   insn_info *def_insn = def->insn ();
 592
 593   auto attempt = crtl->ssa->new_change_attempt ();
 594   use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
 595
 596   /* ??? Not really a meaningful test: it means we can propagate arithmetic
 597      involving hard registers but not bare references to them.  A better
 598      test would be to iterate over src_uses looking for hard registers
 599      that are not fixed.  */
 600   if (REG_P (src) && HARD_REGISTER_P (src))
 601     return false;
 602
 603   /* ??? It would be better to make this EBB-based instead.  That would
 604      involve checking for equal EBBs rather than equal BBs and trying
 605      to make the uses available at use_insn->ebb ()->first_bb ().  */
 606   if (def_insn->bb () != use_insn->bb ())
 607     {
 608       src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
 609                                                  use_insn->bb (),
 610                                                  use_insn->is_debug_insn ());
 611       if (!src_uses.is_valid ())
 612         return false;
 613     }
 614
 615   insn_change use_change (use_insn);
 616   use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
 617                                              src_uses);
 618   if (!use_change.new_uses.is_valid ())
 619     return false;
 620
 621   /* ??? We could allow movement within the EBB by adding:
 622
 623      use_change.move_range = use_insn->ebb ()->insn_range ();  */
 624   if (!restrict_movement (use_change))
 625     return false;
 626
 627   return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
 628           || try_fwprop_subst_notes (use_insn, def, dest, src));
 629 }
 630
 631 /* For the given single_set INSN, containing SRC known to be a
 632    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
 633    is redundant due to the register being set by a LOAD_EXTEND_OP
 634    load from memory.  */
 635
 636 static bool
 637 free_load_extend (rtx src, insn_info *insn)
 638 {
 639   rtx reg = XEXP (src, 0);
 640   if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
 641     return false;
 642
 643   def_info *def = nullptr;
 644   for (use_info *use : insn->uses ())
 645     if (use->regno () == REGNO (reg))
 646       {
 647         def = use->def ();
 648         break;
 649       }
 650
 651   if (!def)
 652     return false;
 653
 654   insn_info *def_insn = def->insn ();
 655   if (def_insn->is_artificial ())
 656     return false;
 657
 658   rtx_insn *def_rtl = def_insn->rtl ();
 659   if (NONJUMP_INSN_P (def_rtl))
 660     {
 661       rtx patt = PATTERN (def_rtl);
 662
 663       if (GET_CODE (patt) == SET
 664           && GET_CODE (SET_SRC (patt)) == MEM
 665           && rtx_equal_p (SET_DEST (patt), reg))
 666         return true;
 667     }
 668   return false;
 669 }
 670
 671 /* Subroutine of forward_propagate_subreg that handles a use of DEST
 672    in REF.  The other parameters are the same.  */
 673
 674 static bool
 675 forward_propagate_subreg (use_info *use, set_info *def,
 676                           rtx dest, rtx src, df_ref ref)
 677 {
 678   scalar_int_mode int_use_mode, src_mode;
 679
 680   /* Only consider subregs... */
 681   rtx use_reg = DF_REF_REG (ref);
 682   machine_mode use_mode = GET_MODE (use_reg);
 683   if (GET_CODE (use_reg) != SUBREG
 684       || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
 685     return false;
 686
 687   /* ??? Replacing throughout the pattern would help for match_dups.  */
 688   rtx *loc = DF_REF_LOC (ref);
 689   if (paradoxical_subreg_p (use_reg))
 690     {
 691       /* If this is a paradoxical SUBREG, we have no idea what value the
 692          extra bits would have.  However, if the operand is equivalent to
 693          a SUBREG whose operand is the same as our mode, and all the modes
 694          are within a word, we can just use the inner operand because
 695          these SUBREGs just say how to treat the register.  */
 696       if (GET_CODE (src) == SUBREG
 697           && REG_P (SUBREG_REG (src))
 698           && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
 699           && GET_MODE (SUBREG_REG (src)) == use_mode
 700           && subreg_lowpart_p (src))
 701         return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
 702     }
 703
 704   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
 705      is the low part of the reg being extended then just use the inner
 706      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
 707      be removed due to it matching a LOAD_EXTEND_OP load from memory,
 708      or due to the operation being a no-op when applied to registers.
 709      For example, if we have:
 710
 711          A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
 712          B: (... (subreg:SI (reg:DI X)) ...)
 713
 714      and mode_rep_extended says that Y is already sign-extended,
 715      the backend will typically allow A to be combined with the
 716      definition of Y or, failing that, allow A to be deleted after
 717      reload through register tying.  Introducing more uses of Y
 718      prevents both optimisations.  */
 719   else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
 720            && subreg_lowpart_p (use_reg))
 721     {
 722       if ((GET_CODE (src) == ZERO_EXTEND
 723            || GET_CODE (src) == SIGN_EXTEND)
 724           && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
 725           && REG_P (XEXP (src, 0))
 726           && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
 727           && GET_MODE (XEXP (src, 0)) == use_mode
 728           && !free_load_extend (src, def->insn ())
 729           && (targetm.mode_rep_extended (int_use_mode, src_mode)
 730               != (int) GET_CODE (src)))
 731         return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
 732     }
 733
 734   return false;
 735 }
 736
 737 /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
 738    the result, handling cases where DEST is used in a subreg and where
 739    applying that subreg to SRC results in a useful simplification.  */
 740
 741 static bool
 742 forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
 743 {
 744   if (!use->includes_subregs () || !REG_P (dest))
 745     return false;
 746
 747   if (GET_CODE (src) != SUBREG
 748       && GET_CODE (src) != ZERO_EXTEND
 749       && GET_CODE (src) != SIGN_EXTEND)
 750     return false;
 751
 752   rtx_insn *use_rtl = use->insn ()->rtl ();
 753   df_ref ref;
 754
 755   FOR_EACH_INSN_USE (ref, use_rtl)
 756     if (DF_REF_REGNO (ref) == use->regno ()
 757         && forward_propagate_subreg (use, def, dest, src, ref))
 758       return true;
 759
 760   FOR_EACH_INSN_EQ_USE (ref, use_rtl)
 761     if (DF_REF_REGNO (ref) == use->regno ()
 762         && forward_propagate_subreg (use, def, dest, src, ref))
 763       return true;
 764
 765   return false;
 766 }
 767
 768 /* Try to substitute (set DEST SRC), which defines DEF, into USE and
 769    simplify the result.  */
 770
 771 static bool
 772 forward_propagate_and_simplify (use_info *use, set_info *def,
 773                                 rtx dest, rtx src)
 774 {
 775   insn_info *use_insn = use->insn ();
 776   rtx_insn *use_rtl = use_insn->rtl ();
 777   insn_info *def_insn = def->insn ();
 778
 779   /* ??? This check seems unnecessary.  We should be able to propagate
 780      into any kind of instruction, regardless of whether it's a single set.
 781      It seems odd to be more permissive with asms than normal instructions.  */
 782   bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
 783   rtx use_set = single_set (use_rtl);
 784   if (need_single_set && !use_set)
 785     return false;
 786
 787   /* Do not propagate into PC etc.
 788
 789      ??? This too seems unnecessary.  The current code should work correctly
 790      without it, including cases where jumps become unconditional.  */
 791   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
 792     return false;
 793
 794   /* In __asm don't replace if src might need more registers than
 795      reg, as that could increase register pressure on the __asm.  */
 796   if (use_insn->is_asm () && def_insn->uses ().size () > 1)
 797     return false;
 798
 799   /* Check if the def is loading something from the constant pool; in this
 800      case we would undo optimization such as compress_float_constant.
 801      Still, we can set a REG_EQUAL note.  */
 802   if (MEM_P (src) && MEM_READONLY_P (src))
 803     {
 804       rtx x = avoid_constant_pool_reference (src);
 805       rtx note_set;
 806       if (x != src
 807           && (note_set = set_for_reg_notes (use_rtl))
 808           && REG_P (SET_DEST (note_set))
 809           && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
 810         {
 811           rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
 812           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
 813           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
 814           if (old_rtx != new_rtx)
 815             set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
 816         }
 817       return false;
 818     }
 819
 820   /* ??? Unconditionally propagating into PATTERN would work better
 821      for instructions that have match_dups.  */
 822   rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
 823   return try_fwprop_subst (use, def, loc, dest, src);
 824 }
 825
 826 /* Given a use USE of an insn, if it has a single reaching
 827    definition, try to forward propagate it into that insn.
 828    Return true if something changed.
 829
 830    REG_PROP_ONLY is true if we should only propagate register copies.  */
 831
 832 static bool
 833 forward_propagate_into (use_info *use, bool reg_prop_only = false)
 834 {
 835   if (use->includes_read_writes ())
 836     return false;
 837
 838   /* Disregard uninitialized uses.  */
 839   set_info *def = use->def ();
 840   if (!def)
 841     return false;
 842
 843   /* Only consider single-register definitions.  This could be relaxed,
 844      but it should rarely be needed before RA.  */
 845   def = look_through_degenerate_phi (def);
 846   if (def->includes_multiregs ())
 847     return false;
 848
 849   /* Only consider uses whose definition comes from a real instruction.  */
 850   insn_info *def_insn = def->insn ();
 851   if (def_insn->is_artificial ())
 852     return false;
 853
 854   rtx_insn *def_rtl = def_insn->rtl ();
 855   if (!NONJUMP_INSN_P (def_rtl))
 856     return false;
 857   /* ??? This seems an unnecessary restriction.  We can easily tell
 858      which set the definition comes from.  */
 859   if (multiple_sets (def_rtl))
 860     return false;
 861   rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
 862   if (!def_set)
 863     return false;
 864
 865   rtx dest = SET_DEST (def_set);
 866   rtx src = SET_SRC (def_set);
 867
 868   /* Allow propagations into a loop only for reg-to-reg copies, since
 869      replacing one register by another shouldn't increase the cost.  */
 870   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
 871   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
 872   if ((reg_prop_only || def_loop != use_loop)
 873       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
 874     return false;
 875
 876   /* Don't substitute into a non-local goto, this confuses CFG.  */
 877   insn_info *use_insn = use->insn ();
 878   rtx_insn *use_rtl = use_insn->rtl ();
 879   if (JUMP_P (use_rtl)
 880       && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
 881     return false;
 882
 883   if (forward_propagate_and_simplify (use, def, dest, src)
 884       || forward_propagate_subreg (use, def, dest, src))
 885     return true;
 886
 887   return false;
 888 }
 889 \f
 890 static void
 891 fwprop_init (void)
 892 {
 893   num_changes = 0;
 894   calculate_dominance_info (CDI_DOMINATORS);
 895
 896   /* We do not always want to propagate into loops, so we have to find
 897      loops and be careful about them.  Avoid CFG modifications so that
 898      we don't have to update dominance information afterwards for
 899      build_single_def_use_links.  */
 900   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 901
 902   df_analyze ();
 903   crtl->ssa = new rtl_ssa::function_info (cfun);
 904 }
 905
 906 static void
 907 fwprop_done (void)
 908 {
 909   loop_optimizer_finalize ();
 910
 911   crtl->ssa->perform_pending_updates ();
 912   free_dominance_info (CDI_DOMINATORS);
 913   cleanup_cfg (0);
 914
 915   delete crtl->ssa;
 916   crtl->ssa = nullptr;
 917
 918   delete_trivially_dead_insns (get_insns (), max_reg_num ());
 919
 920   if (dump_file)
 921     fprintf (dump_file,
 922              "\nNumber of successful forward propagations: %d\n\n",
 923              num_changes);
 924 }
 925
 926 /* Try to optimize INSN, returning true if something changes.
 927    FWPROP_ADDR_P is true if we are running fwprop_addr rather than
 928    the full fwprop.  */
 929
 930 static bool
 931 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
 932 {
 933   for (use_info *use : insn->uses ())
 934     {
 935       if (use->is_mem ())
 936         continue;
 937       /* ??? The choices here follow those in the pre-SSA code.  */
 938       if (!use->includes_address_uses ())
 939         {
 940           if (forward_propagate_into (use, fwprop_addr_p))
 941             return true;
 942         }
 943       else
 944         {
 945           struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
 946           /* The outermost loop is not really a loop.  */
 947           if (loop == NULL || loop_outer (loop) == NULL)
 948             {
 949               if (forward_propagate_into (use, fwprop_addr_p))
 950                 return true;
 951             }
 952           else if (fwprop_addr_p)
 953             {
 954               if (forward_propagate_into (use, false))
 955                 return true;
 956             }
 957         }
 958     }
 959   return false;
 960 }
 961
 962 /* Main entry point.  */
 963
 964 static bool
 965 gate_fwprop (void)
 966 {
 967   return optimize > 0 && flag_forward_propagate;
 968 }
 969
 970 static unsigned int
 971 fwprop (bool fwprop_addr_p)
 972 {
 973   fwprop_init ();
 974
 975   /* Go through all the instructions (including debug instructions) looking
 976      for uses that we could propagate into.
 977
 978      Do not forward propagate addresses into loops until after unrolling.
 979      CSE did so because it was able to fix its own mess, but we are not.  */
 980
 981   insn_info *next;
 982
 983   /* ??? This code uses a worklist in order to preserve the behavior
 984      of the pre-SSA implementation.  It would be better to instead
 985      iterate on each instruction until no more propagations are
 986      possible, then move on to the next.  */
 987   auto_vec<insn_info *> worklist;
 988   for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
 989     {
 990       next = insn->next_any_insn ();
 991       if (insn->can_be_optimized () || insn->is_debug_insn ())
 992         if (fwprop_insn (insn, fwprop_addr_p))
 993           worklist.safe_push (insn);
 994     }
 995   for (unsigned int i = 0; i < worklist.length (); ++i)
 996     {
 997       insn_info *insn = worklist[i];
 998       if (fwprop_insn (insn, fwprop_addr_p))
 999         worklist.safe_push (insn);
1000     }
1001
1002   fwprop_done ();
1003   return 0;
1004 }
1005
1006 namespace {
1007
1008 const pass_data pass_data_rtl_fwprop =
1009 {
1010   RTL_PASS, /* type */
1011   "fwprop1", /* name */
1012   OPTGROUP_NONE, /* optinfo_flags */
1013   TV_FWPROP, /* tv_id */
1014   0, /* properties_required */
1015   0, /* properties_provided */
1016   0, /* properties_destroyed */
1017   0, /* todo_flags_start */
1018   TODO_df_finish, /* todo_flags_finish */
1019 };
1020
1021 class pass_rtl_fwprop : public rtl_opt_pass
1022 {
1023 public:
1024   pass_rtl_fwprop (gcc::context *ctxt)
1025     : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1026   {}
1027
1028   /* opt_pass methods: */
1029   virtual bool gate (function *) { return gate_fwprop (); }
1030   virtual unsigned int execute (function *) { return fwprop (false); }
1031
1032 }; // class pass_rtl_fwprop
1033
1034 } // anon namespace
1035
1036 rtl_opt_pass *
1037 make_pass_rtl_fwprop (gcc::context *ctxt)
1038 {
1039   return new pass_rtl_fwprop (ctxt);
1040 }
1041
1042 namespace {
1043
1044 const pass_data pass_data_rtl_fwprop_addr =
1045 {
1046   RTL_PASS, /* type */
1047   "fwprop2", /* name */
1048   OPTGROUP_NONE, /* optinfo_flags */
1049   TV_FWPROP, /* tv_id */
1050   0, /* properties_required */
1051   0, /* properties_provided */
1052   0, /* properties_destroyed */
1053   0, /* todo_flags_start */
1054   TODO_df_finish, /* todo_flags_finish */
1055 };
1056
1057 class pass_rtl_fwprop_addr : public rtl_opt_pass
1058 {
1059 public:
1060   pass_rtl_fwprop_addr (gcc::context *ctxt)
1061     : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1062   {}
1063
1064   /* opt_pass methods: */
1065   virtual bool gate (function *) { return gate_fwprop (); }
1066   virtual unsigned int execute (function *) { return fwprop (true); }
1067
1068 }; // class pass_rtl_fwprop_addr
1069
1070 } // anon namespace
1071
1072 rtl_opt_pass *
1073 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1074 {
1075   return new pass_rtl_fwprop_addr (ctxt);
1076 }