gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Paolo Bonzini and Steven Bosscher.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "toplev.h"
  27
  28 #include "timevar.h"
  29 #include "rtl.h"
  30 #include "tm_p.h"
  31 #include "emit-rtl.h"
  32 #include "insn-config.h"
  33 #include "recog.h"
  34 #include "flags.h"
  35 #include "obstack.h"
  36 #include "basic-block.h"
  37 #include "output.h"
  38 #include "df.h"
  39 #include "target.h"
  40 #include "cfgloop.h"
  41 #include "tree-pass.h"
  42
  43
  44 /* This pass does simple forward propagation and simplification when an
  45    operand of an insn can only come from a single def.  This pass uses
  46    df.c, so it is global.  However, we only do limited analysis of
  47    available expressions.
  48
  49    1) The pass tries to propagate the source of the def into the use,
  50    and checks if the result is independent of the substituted value.
  51    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  52    zero, independent of the source register.
  53
  54    In particular, we propagate constants into the use site.  Sometimes
  55    RTL expansion did not put the constant in the same insn on purpose,
  56    to satisfy a predicate, and the result will fail to be recognized;
  57    but this happens rarely and in this case we can still create a
  58    REG_EQUAL note.  For multi-word operations, this
  59
  60       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  61       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  62       (set (subreg:SI (reg:DI 122) 0)
  63          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  64       (set (subreg:SI (reg:DI 122) 4)
  65          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  66
  67    can be simplified to the much simpler
  68
  69       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  70       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  71
  72    This particular propagation is also effective at putting together
  73    complex addressing modes.  We are more aggressive inside MEMs, in
  74    that all definitions are propagated if the use is in a MEM; if the
  75    result is a valid memory address we check address_cost to decide
  76    whether the substitution is worthwhile.
  77
  78    2) The pass propagates register copies.  This is not as effective as
  79    the copy propagation done by CSE's canon_reg, which works by walking
  80    the instruction chain, it can help the other transformations.
  81
  82    We should consider removing this optimization, and instead reorder the
  83    RTL passes, because GCSE does this transformation too.  With some luck,
  84    the CSE pass at the end of rest_of_handle_gcse could also go away.
  85
  86    3) The pass looks for paradoxical subregs that are actually unnecessary.
  87    Things like this:
  88
  89      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  90      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  91      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  92                                 (subreg:SI (reg:QI 121) 0)))
  93
  94    are very common on machines that can only do word-sized operations.
  95    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  96    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  97    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  98    above will simplify this to
  99
 100      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 101      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 102      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 103
 104    where the first two insns are now dead.  */
 105
 106
 107 static int num_changes;
 108
 109 \f
 110 /* Do not try to replace constant addresses or addresses of local and
 111    argument slots.  These MEM expressions are made only once and inserted
 112    in many instructions, as well as being used to control symbol table
 113    output.  It is not safe to clobber them.
 114
 115    There are some uncommon cases where the address is already in a register
 116    for some reason, but we cannot take advantage of that because we have
 117    no easy way to unshare the MEM.  In addition, looking up all stack
 118    addresses is costly.  */
 119
 120 static bool
 121 can_simplify_addr (rtx addr)
 122 {
 123   rtx reg;
 124
 125   if (CONSTANT_ADDRESS_P (addr))
 126     return false;
 127
 128   if (GET_CODE (addr) == PLUS)
 129     reg = XEXP (addr, 0);
 130   else
 131     reg = addr;
 132
 133   return (!REG_P (reg)
 134           || (REGNO (reg) != FRAME_POINTER_REGNUM
 135               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 136               && REGNO (reg) != ARG_POINTER_REGNUM));
 137 }
 138
 139 /* Returns a canonical version of X for the address, from the point of view,
 140    that all multiplications are represented as MULT instead of the multiply
 141    by a power of 2 being represented as ASHIFT.
 142
 143    Every ASHIFT we find has been made by simplify_gen_binary and was not
 144    there before, so it is not shared.  So we can do this in place.  */
 145
 146 static void
 147 canonicalize_address (rtx x)
 148 {
 149   for (;;)
 150     switch (GET_CODE (x))
 151       {
 152       case ASHIFT:
 153         if (GET_CODE (XEXP (x, 1)) == CONST_INT
 154             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 155             && INTVAL (XEXP (x, 1)) >= 0)
 156           {
 157             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 158             PUT_CODE (x, MULT);
 159             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 160                                         GET_MODE (x));
 161           }
 162
 163         x = XEXP (x, 0);
 164         break;
 165
 166       case PLUS:
 167         if (GET_CODE (XEXP (x, 0)) == PLUS
 168             || GET_CODE (XEXP (x, 0)) == ASHIFT
 169             || GET_CODE (XEXP (x, 0)) == CONST)
 170           canonicalize_address (XEXP (x, 0));
 171
 172         x = XEXP (x, 1);
 173         break;
 174
 175       case CONST:
 176         x = XEXP (x, 0);
 177         break;
 178
 179       default:
 180         return;
 181       }
 182 }
 183
 184 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 185    for a memory access in the given MODE.  */
 186
 187 static bool
 188 should_replace_address (rtx old, rtx new, enum machine_mode mode)
 189 {
 190   int gain;
 191
 192   if (rtx_equal_p (old, new) || !memory_address_p (mode, new))
 193     return false;
 194
 195   /* Copy propagation is always ok.  */
 196   if (REG_P (old) && REG_P (new))
 197     return true;
 198
 199   /* Prefer the new address if it is less expensive.  */
 200   gain = address_cost (old, mode) - address_cost (new, mode);
 201
 202   /* If the addresses have equivalent cost, prefer the new address
 203      if it has the highest `rtx_cost'.  That has the potential of
 204      eliminating the most insns without additional costs, and it
 205      is the same that cse.c used to do.  */
 206   if (gain == 0)
 207     gain = rtx_cost (new, SET) - rtx_cost (old, SET);
 208
 209   return (gain > 0);
 210 }
 211
 212 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 213    resulting expression.  Replace *PX with a new RTL expression if an
 214    occurrence of OLD was found.
 215
 216    If CAN_APPEAR is true, we always return true; if it is false, we
 217    can return false if, for at least one occurrence OLD, we failed to
 218    collapse the result to a constant.  For example, (mult:M (reg:M A)
 219    (minus:M (reg:M B) (reg:M A))) may collapse to zero if replacing
 220    (reg:M B) with (reg:M A).
 221
 222    CAN_APPEAR is disregarded inside MEMs: in that case, we always return
 223    true if the simplification is a cheaper and valid memory address.
 224
 225    This is only a wrapper around simplify-rtx.c: do not add any pattern
 226    matching code here.  (The sole exception is the handling of LO_SUM, but
 227    that is because there is no simplify_gen_* function for LO_SUM).  */
 228
 229 static bool
 230 propagate_rtx_1 (rtx *px, rtx old, rtx new, bool can_appear)
 231 {
 232   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 233   enum rtx_code code = GET_CODE (x);
 234   enum machine_mode mode = GET_MODE (x);
 235   enum machine_mode op_mode;
 236   bool valid_ops = true;
 237
 238   /* If X is OLD_RTX, return NEW_RTX.  Otherwise, if this is an expression,
 239      try to build a new expression from recursive substitution.  */
 240
 241   if (x == old)
 242     {
 243       *px = new;
 244       return can_appear;
 245     }
 246
 247   switch (GET_RTX_CLASS (code))
 248     {
 249     case RTX_UNARY:
 250       op0 = XEXP (x, 0);
 251       op_mode = GET_MODE (op0);
 252       valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
 253       if (op0 == XEXP (x, 0))
 254         return true;
 255       tem = simplify_gen_unary (code, mode, op0, op_mode);
 256       break;
 257
 258     case RTX_BIN_ARITH:
 259     case RTX_COMM_ARITH:
 260       op0 = XEXP (x, 0);
 261       op1 = XEXP (x, 1);
 262       valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
 263       valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
 264       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 265         return true;
 266       tem = simplify_gen_binary (code, mode, op0, op1);
 267       break;
 268
 269     case RTX_COMPARE:
 270     case RTX_COMM_COMPARE:
 271       op0 = XEXP (x, 0);
 272       op1 = XEXP (x, 1);
 273       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 274       valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
 275       valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
 276       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 277         return true;
 278       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 279       break;
 280
 281     case RTX_TERNARY:
 282     case RTX_BITFIELD_OPS:
 283       op0 = XEXP (x, 0);
 284       op1 = XEXP (x, 1);
 285       op2 = XEXP (x, 2);
 286       op_mode = GET_MODE (op0);
 287       valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
 288       valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
 289       valid_ops &= propagate_rtx_1 (&op2, old, new, can_appear);
 290       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 291         return true;
 292       if (op_mode == VOIDmode)
 293         op_mode = GET_MODE (op0);
 294       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 295       break;
 296
 297     case RTX_EXTRA:
 298       /* The only case we try to handle is a SUBREG.  */
 299       if (code == SUBREG)
 300         {
 301           op0 = XEXP (x, 0);
 302           valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
 303           if (op0 == XEXP (x, 0))
 304             return true;
 305           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 306                                      SUBREG_BYTE (x));
 307         }
 308       break;
 309
 310     case RTX_OBJ:
 311       if (code == MEM && x != new)
 312         {
 313           rtx new_op0;
 314           op0 = XEXP (x, 0);
 315
 316           /* There are some addresses that we cannot work on.  */
 317           if (!can_simplify_addr (op0))
 318             return true;
 319
 320           op0 = new_op0 = targetm.delegitimize_address (op0);
 321           valid_ops &= propagate_rtx_1 (&new_op0, old, new, true);
 322
 323           /* Dismiss transformation that we do not want to carry on.  */
 324           if (!valid_ops
 325               || new_op0 == op0
 326               || !(GET_MODE (new_op0) == GET_MODE (op0)
 327                    || GET_MODE (new_op0) == VOIDmode))
 328             return true;
 329
 330           canonicalize_address (new_op0);
 331
 332           /* Copy propagations are always ok.  Otherwise check the costs.  */
 333           if (!(REG_P (old) && REG_P (new))
 334               && !should_replace_address (op0, new_op0, GET_MODE (x)))
 335             return true;
 336
 337           tem = replace_equiv_address_nv (x, new_op0);
 338         }
 339
 340       else if (code == LO_SUM)
 341         {
 342           op0 = XEXP (x, 0);
 343           op1 = XEXP (x, 1);
 344
 345           /* The only simplification we do attempts to remove references to op0
 346              or make it constant -- in both cases, op0's invalidity will not
 347              make the result invalid.  */
 348           propagate_rtx_1 (&op0, old, new, true);
 349           valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
 350           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 351             return true;
 352
 353           /* (lo_sum (high x) x) -> x  */
 354           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 355             tem = op1;
 356           else
 357             tem = gen_rtx_LO_SUM (mode, op0, op1);
 358
 359           /* OP1 is likely not a legitimate address, otherwise there would have
 360              been no LO_SUM.  We want it to disappear if it is invalid, return
 361              false in that case.  */
 362           return memory_address_p (mode, tem);
 363         }
 364
 365       else if (code == REG)
 366         {
 367           if (rtx_equal_p (x, old))
 368             {
 369               *px = new;
 370               return can_appear;
 371             }
 372         }
 373       break;
 374
 375     default:
 376       break;
 377     }
 378
 379   /* No change, no trouble.  */
 380   if (tem == NULL_RTX)
 381     return true;
 382
 383   *px = tem;
 384
 385   /* The replacement we made so far is valid, if all of the recursive
 386      replacements were valid, or we could simplify everything to
 387      a constant.  */
 388   return valid_ops || can_appear || CONSTANT_P (tem);
 389 }
 390
 391 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 392    resulting expression (in mode MODE).  Return a new expression if it is
 393    a constant, otherwise X.
 394
 395    Simplifications where occurrences of NEW collapse to a constant are always
 396    accepted.  All simplifications are accepted if NEW is a pseudo too.
 397    Otherwise, we accept simplifications that have a lower or equal cost.  */
 398
 399 static rtx
 400 propagate_rtx (rtx x, enum machine_mode mode, rtx old, rtx new)
 401 {
 402   rtx tem;
 403   bool collapsed;
 404
 405   if (REG_P (new) && REGNO (new) < FIRST_PSEUDO_REGISTER)
 406     return NULL_RTX;
 407
 408   new = copy_rtx (new);
 409
 410   tem = x;
 411   collapsed = propagate_rtx_1 (&tem, old, new, REG_P (new) || CONSTANT_P (new));
 412   if (tem == x || !collapsed)
 413     return NULL_RTX;
 414
 415   /* gen_lowpart_common will not be able to process VOIDmode entities other
 416      than CONST_INTs.  */
 417   if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT)
 418     return NULL_RTX;
 419
 420   if (GET_MODE (tem) == VOIDmode)
 421     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 422   else
 423     gcc_assert (GET_MODE (tem) == mode);
 424
 425   return tem;
 426 }
 427
 428
 429 \f
 430
 431 /* Return true if the register from reference REF is killed
 432    between FROM to (but not including) TO.  */
 433
 434 static bool
 435 local_ref_killed_between_p (struct df_ref * ref, rtx from, rtx to)
 436 {
 437   rtx insn;
 438
 439   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 440     {
 441       struct df_ref **def_rec;
 442       if (!INSN_P (insn))
 443         continue;
 444
 445       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 446         {
 447           struct df_ref *def = *def_rec;
 448           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 449             return true;
 450         }
 451     }
 452   return false;
 453 }
 454
 455
 456 /* Check if the given DEF is available in INSN.  This would require full
 457    computation of available expressions; we check only restricted conditions:
 458    - if DEF is the sole definition of its register, go ahead;
 459    - in the same basic block, we check for no definitions killing the
 460      definition of DEF_INSN;
 461    - if USE's basic block has DEF's basic block as the sole predecessor,
 462      we check if the definition is killed after DEF_INSN or before
 463      TARGET_INSN insn, in their respective basic blocks.  */
 464 static bool
 465 use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn)
 466 {
 467   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 468   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 469   int regno;
 470   struct df_ref * def;
 471
 472   /* In some obscure situations we can have a def reaching a use
 473      that is _before_ the def.  In other words the def does not
 474      dominate the use even though the use and def are in the same
 475      basic block.  This can happen when a register may be used
 476      uninitialized in a loop.  In such cases, we must assume that
 477      DEF is not available.  */
 478   if (def_bb == target_bb
 479       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 480       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 481     return true;
 482
 483   /* Check if the reg in USE has only one definition.  We already
 484      know that this definition reaches use, or we wouldn't be here.  */
 485   regno = DF_REF_REGNO (use);
 486   def = DF_REG_DEF_CHAIN (regno);
 487   if (def && (def->next_reg == NULL))
 488     return false;
 489
 490   /* Check locally if we are in the same basic block.  */
 491   if (def_bb == target_bb)
 492     return local_ref_killed_between_p (use, def_insn, target_insn);
 493
 494   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 495   if (single_pred_p (target_bb)
 496       && single_pred (target_bb) == def_bb)
 497     {
 498       struct df_ref *x;
 499
 500       /* See if USE is killed between DEF_INSN and the last insn in the
 501          basic block containing DEF_INSN.  */
 502       x = df_bb_regno_last_def_find (def_bb, regno);
 503       if (x && DF_INSN_LUID (x->insn) >= DF_INSN_LUID (def_insn))
 504         return true;
 505
 506       /* See if USE is killed between TARGET_INSN and the first insn in the
 507          basic block containing TARGET_INSN.  */
 508       x = df_bb_regno_first_def_find (target_bb, regno);
 509       if (x && DF_INSN_LUID (x->insn) < DF_INSN_LUID (target_insn))
 510         return true;
 511
 512       return false;
 513     }
 514
 515   /* Otherwise assume the worst case.  */
 516   return true;
 517 }
 518
 519
 520 /* for_each_rtx traversal function that returns 1 if BODY points to
 521    a non-constant mem.  */
 522
 523 static int
 524 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 525 {
 526   rtx x = *body;
 527   return MEM_P (x) && !MEM_READONLY_P (x);
 528 }
 529
 530 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 531    would require full computation of available expressions;
 532    we check only restricted conditions, see use_killed_between.  */
 533 static bool
 534 all_uses_available_at (rtx def_insn, rtx target_insn)
 535 {
 536   struct df_ref **use_rec;
 537   rtx def_set = single_set (def_insn);
 538
 539   gcc_assert (def_set);
 540
 541   /* If target_insn comes right after def_insn, which is very common
 542      for addresses, we can use a quicker test.  */
 543   if (NEXT_INSN (def_insn) == target_insn
 544       && REG_P (SET_DEST (def_set)))
 545     {
 546       rtx def_reg = SET_DEST (def_set);
 547
 548       /* If the insn uses the reg that it defines, the substitution is
 549          invalid.  */
 550       for (use_rec = DF_INSN_USES (def_insn); *use_rec; use_rec++)
 551         {
 552           struct df_ref *use = *use_rec;
 553           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 554             return false;
 555         }
 556       for (use_rec = DF_INSN_EQ_USES (def_insn); *use_rec; use_rec++)
 557         {
 558           struct df_ref *use = *use_rec;
 559           if (rtx_equal_p (use->reg, def_reg))
 560             return false;
 561         }
 562     }
 563   else
 564     {
 565       /* Look at all the uses of DEF_INSN, and see if they are not
 566          killed between DEF_INSN and TARGET_INSN.  */
 567       for (use_rec = DF_INSN_USES (def_insn); *use_rec; use_rec++)
 568         {
 569           struct df_ref *use = *use_rec;
 570           if (use_killed_between (use, def_insn, target_insn))
 571             return false;
 572         }
 573       for (use_rec = DF_INSN_EQ_USES (def_insn); *use_rec; use_rec++)
 574         {
 575           struct df_ref *use = *use_rec;
 576           if (use_killed_between (use, def_insn, target_insn))
 577             return false;
 578         }
 579     }
 580
 581   /* We don't do any analysis of memories or aliasing.  Reject any
 582      instruction that involves references to non-constant memory.  */
 583   return !for_each_rtx (&SET_SRC (def_set), varying_mem_p, NULL);
 584 }
 585
 586 \f
 587 struct find_occurrence_data
 588 {
 589   rtx find;
 590   rtx *retval;
 591 };
 592
 593 /* Callback for for_each_rtx, used in find_occurrence.
 594    See if PX is the rtx we have to find.  Return 1 to stop for_each_rtx
 595    if successful, or 0 to continue traversing otherwise.  */
 596
 597 static int
 598 find_occurrence_callback (rtx *px, void *data)
 599 {
 600   struct find_occurrence_data *fod = (struct find_occurrence_data *) data;
 601   rtx x = *px;
 602   rtx find = fod->find;
 603
 604   if (x == find)
 605     {
 606       fod->retval = px;
 607       return 1;
 608     }
 609
 610   return 0;
 611 }
 612
 613 /* Return a pointer to one of the occurrences of register FIND in *PX.  */
 614
 615 static rtx *
 616 find_occurrence (rtx *px, rtx find)
 617 {
 618   struct find_occurrence_data data;
 619
 620   gcc_assert (REG_P (find)
 621               || (GET_CODE (find) == SUBREG
 622                   && REG_P (SUBREG_REG (find))));
 623
 624   data.find = find;
 625   data.retval = NULL;
 626   for_each_rtx (px, find_occurrence_callback, &data);
 627   return data.retval;
 628 }
 629
 630 \f
 631 /* Inside INSN, the expression rooted at *LOC has been changed, moving some
 632    uses from USE_VEC.  Find those that are present, and create new items
 633    in the data flow object of the pass.  Mark any new uses as having the
 634    given TYPE.  */
 635 static void
 636 update_df (rtx insn, rtx *loc, struct df_ref **use_rec, enum df_ref_type type,
 637            int new_flags)
 638 {
 639   bool changed = false;
 640
 641   /* Add a use for the registers that were propagated.  */
 642   while (*use_rec)
 643     {
 644       struct df_ref *use = *use_rec;
 645       struct df_ref *orig_use = use, *new_use;
 646       rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use));
 647       use_rec++;
 648
 649       if (!new_loc)
 650         continue;
 651
 652       /* Add a new insn use.  Use the original type, because it says if the
 653          use was within a MEM.  */
 654       new_use = df_ref_create (DF_REF_REG (orig_use), new_loc,
 655                                insn, BLOCK_FOR_INSN (insn),
 656                                type, DF_REF_FLAGS (orig_use) | new_flags);
 657
 658       /* Set up the use-def chain.  */
 659       df_chain_copy (new_use, DF_REF_CHAIN (orig_use));
 660       changed = true;
 661     }
 662   if (changed)
 663     df_insn_rescan (insn);
 664 }
 665
 666
 667 /* Try substituting NEW into LOC, which originated from forward propagation
 668    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 669    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 670    new insn is not recognized.  Return whether the substitution was
 671    performed.  */
 672
 673 static bool
 674 try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new, rtx def_insn, bool set_reg_equal)
 675 {
 676   rtx insn = DF_REF_INSN (use);
 677   enum df_ref_type type = DF_REF_TYPE (use);
 678   int flags = DF_REF_FLAGS (use);
 679
 680   if (dump_file)
 681     {
 682       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 683       print_inline_rtx (dump_file, *loc, 2);
 684       fprintf (dump_file, "\n with ");
 685       print_inline_rtx (dump_file, new, 2);
 686       fprintf (dump_file, "\n");
 687     }
 688
 689   if (validate_change (insn, loc, new, false))
 690     {
 691       num_changes++;
 692       if (dump_file)
 693         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
 694
 695       df_ref_remove (use);
 696       if (!CONSTANT_P (new))
 697         {
 698           update_df (insn, loc, DF_INSN_USES (def_insn), type, flags);
 699           update_df (insn, loc, DF_INSN_EQ_USES (def_insn), type, flags);
 700         }
 701       return true;
 702     }
 703   else
 704     {
 705       if (dump_file)
 706         fprintf (dump_file, "Changes to insn %d not recognized\n",
 707                  INSN_UID (insn));
 708
 709       /* Can also record a simplified value in a REG_EQUAL note, making a
 710          new one if one does not already exist.  */
 711       if (set_reg_equal)
 712         {
 713           if (dump_file)
 714             fprintf (dump_file, " Setting REG_EQUAL note\n");
 715
 716           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new));
 717
 718           /* ??? Is this still necessary if we add the note through
 719              set_unique_reg_note?  */
 720           if (!CONSTANT_P (new))
 721             {
 722               update_df (insn, loc, DF_INSN_USES (def_insn),
 723                          type, DF_REF_IN_NOTE);
 724               update_df (insn, loc, DF_INSN_EQ_USES (def_insn),
 725                          type, DF_REF_IN_NOTE);
 726             }
 727         }
 728
 729       return false;
 730     }
 731 }
 732
 733
 734 /* If USE is a paradoxical subreg, see if it can be replaced by a pseudo.  */
 735
 736 static bool
 737 forward_propagate_subreg (struct df_ref *use, rtx def_insn, rtx def_set)
 738 {
 739   rtx use_reg = DF_REF_REG (use);
 740   rtx use_insn, src;
 741
 742   /* Only consider paradoxical subregs... */
 743   enum machine_mode use_mode = GET_MODE (use_reg);
 744   if (GET_CODE (use_reg) != SUBREG
 745       || !REG_P (SET_DEST (def_set))
 746       || GET_MODE_SIZE (use_mode)
 747          <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
 748     return false;
 749
 750   /* If this is a paradoxical SUBREG, we have no idea what value the
 751      extra bits would have.  However, if the operand is equivalent to
 752      a SUBREG whose operand is the same as our mode, and all the modes
 753      are within a word, we can just use the inner operand because
 754      these SUBREGs just say how to treat the register.  */
 755   use_insn = DF_REF_INSN (use);
 756   src = SET_SRC (def_set);
 757   if (GET_CODE (src) == SUBREG
 758       && REG_P (SUBREG_REG (src))
 759       && GET_MODE (SUBREG_REG (src)) == use_mode
 760       && subreg_lowpart_p (src)
 761       && all_uses_available_at (def_insn, use_insn))
 762     return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
 763                              def_insn, false);
 764   else
 765     return false;
 766 }
 767
 768 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
 769    result.  */
 770
 771 static bool
 772 forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set)
 773 {
 774   rtx use_insn = DF_REF_INSN (use);
 775   rtx use_set = single_set (use_insn);
 776   rtx src, reg, new, *loc;
 777   bool set_reg_equal;
 778   enum machine_mode mode;
 779
 780   if (!use_set)
 781     return false;
 782
 783   /* Do not propagate into PC, CC0, etc.  */
 784   if (GET_MODE (SET_DEST (use_set)) == VOIDmode)
 785     return false;
 786
 787   /* If def and use are subreg, check if they match.  */
 788   reg = DF_REF_REG (use);
 789   if (GET_CODE (reg) == SUBREG
 790       && GET_CODE (SET_DEST (def_set)) == SUBREG
 791       && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)
 792           || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)))
 793     return false;
 794
 795   /* Check if the def had a subreg, but the use has the whole reg.  */
 796   if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
 797     return false;
 798
 799   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
 800      previous case, the optimization is possible and often useful indeed.  */
 801   if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
 802     reg = SUBREG_REG (reg);
 803
 804   /* Check if the substitution is valid (last, because it's the most
 805      expensive check!).  */
 806   src = SET_SRC (def_set);
 807   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
 808     return false;
 809
 810   /* Check if the def is loading something from the constant pool; in this
 811      case we would undo optimization such as compress_float_constant.
 812      Still, we can set a REG_EQUAL note.  */
 813   if (MEM_P (src) && MEM_READONLY_P (src))
 814     {
 815       rtx x = avoid_constant_pool_reference (src);
 816       if (x != src)
 817         {
 818           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
 819           rtx old = note ? XEXP (note, 0) : SET_SRC (use_set);
 820           rtx new = simplify_replace_rtx (old, src, x);
 821           if (old != new)
 822             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new));
 823         }
 824       return false;
 825     }
 826
 827   /* Else try simplifying.  */
 828
 829   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
 830     {
 831       loc = &SET_DEST (use_set);
 832       set_reg_equal = false;
 833     }
 834   else
 835     {
 836       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
 837       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
 838         loc = &XEXP (note, 0);
 839       else
 840         loc = &SET_SRC (use_set);
 841
 842       /* Do not replace an existing REG_EQUAL note if the insn is not
 843          recognized.  Either we're already replacing in the note, or
 844          we'll separately try plugging the definition in the note and
 845          simplifying.  */
 846       set_reg_equal = (note == NULL_RTX);
 847     }
 848
 849   if (GET_MODE (*loc) == VOIDmode)
 850     mode = GET_MODE (SET_DEST (use_set));
 851   else
 852     mode = GET_MODE (*loc);
 853
 854   new = propagate_rtx (*loc, mode, reg, src);
 855
 856   if (!new)
 857     return false;
 858
 859   return try_fwprop_subst (use, loc, new, def_insn, set_reg_equal);
 860 }
 861
 862
 863 /* Given a use USE of an insn, if it has a single reaching
 864    definition, try to forward propagate it into that insn.  */
 865
 866 static void
 867 forward_propagate_into (struct df_ref *use)
 868 {
 869   struct df_link *defs;
 870   struct df_ref *def;
 871   rtx def_insn, def_set, use_insn;
 872   rtx parent;
 873
 874   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
 875     return;
 876   if (DF_REF_IS_ARTIFICIAL (use))
 877     return;
 878
 879   /* Only consider uses that have a single definition.  */
 880   defs = DF_REF_CHAIN (use);
 881   if (!defs || defs->next)
 882     return;
 883
 884   def = defs->ref;
 885   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
 886     return;
 887   if (DF_REF_IS_ARTIFICIAL (def))
 888     return;
 889
 890   /* Do not propagate loop invariant definitions inside the loop.  */
 891   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
 892     return;
 893
 894   /* Check if the use is still present in the insn!  */
 895   use_insn = DF_REF_INSN (use);
 896   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
 897     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
 898   else
 899     parent = PATTERN (use_insn);
 900
 901   if (!loc_mentioned_in_p (DF_REF_LOC (use), parent))
 902     return;
 903
 904   def_insn = DF_REF_INSN (def);
 905   if (multiple_sets (def_insn))
 906     return;
 907   def_set = single_set (def_insn);
 908   if (!def_set)
 909     return;
 910
 911   /* Only try one kind of propagation.  If two are possible, we'll
 912      do it on the following iterations.  */
 913   if (!forward_propagate_and_simplify (use, def_insn, def_set))
 914     forward_propagate_subreg (use, def_insn, def_set);
 915 }
 916
 917 \f
 918 static void
 919 fwprop_init (void)
 920 {
 921   num_changes = 0;
 922   calculate_dominance_info (CDI_DOMINATORS);
 923
 924   /* We do not always want to propagate into loops, so we have to find
 925      loops and be careful about them.  But we have to call flow_loops_find
 926      before df_analyze, because flow_loops_find may introduce new jump
 927      insns (sadly) if we are not working in cfglayout mode.  */
 928   loop_optimizer_init (0);
 929
 930   /* Now set up the dataflow problem (we only want use-def chains) and
 931      put the dataflow solver to work.  */
 932   df_set_flags (DF_EQ_NOTES);
 933   df_chain_add_problem (DF_UD_CHAIN);
 934   df_analyze ();
 935   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 936   df_set_flags (DF_DEFER_INSN_RESCAN);
 937 }
 938
 939 static void
 940 fwprop_done (void)
 941 {
 942   loop_optimizer_finalize ();
 943
 944   free_dominance_info (CDI_DOMINATORS);
 945   cleanup_cfg (0);
 946   delete_trivially_dead_insns (get_insns (), max_reg_num ());
 947
 948   if (dump_file)
 949     fprintf (dump_file,
 950              "\nNumber of successful forward propagations: %d\n\n",
 951              num_changes);
 952 }
 953
 954
 955
 956 /* Main entry point.  */
 957
 958 static bool
 959 gate_fwprop (void)
 960 {
 961   return optimize > 0 && flag_forward_propagate;
 962 }
 963
 964 static unsigned int
 965 fwprop (void)
 966 {
 967   unsigned i;
 968
 969   fwprop_init ();
 970
 971   /* Go through all the uses.  update_df will create new ones at the
 972      end, and we'll go through them as well.
 973
 974      Do not forward propagate addresses into loops until after unrolling.
 975      CSE did so because it was able to fix its own mess, but we are not.  */
 976
 977   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
 978     {
 979       struct df_ref *use = DF_USES_GET (i);
 980       if (use)
 981         if (DF_REF_TYPE (use) == DF_REF_REG_USE
 982             || DF_REF_BB (use)->loop_father == NULL)
 983           forward_propagate_into (use);
 984     }
 985
 986   fwprop_done ();
 987   return 0;
 988 }
 989
 990 struct tree_opt_pass pass_rtl_fwprop =
 991 {
 992   "fwprop1",                            /* name */
 993   gate_fwprop,                          /* gate */
 994   fwprop,                               /* execute */
 995   NULL,                                 /* sub */
 996   NULL,                                 /* next */
 997   0,                                    /* static_pass_number */
 998   TV_FWPROP,                            /* tv_id */
 999   0,                                    /* properties_required */
1000   0,                                    /* properties_provided */
1001   0,                                    /* properties_destroyed */
1002   0,                                    /* todo_flags_start */
1003   TODO_df_finish |
1004   TODO_dump_func,                       /* todo_flags_finish */
1005   0                                     /* letter */
1006 };
1007
1008 static unsigned int
1009 fwprop_addr (void)
1010 {
1011   unsigned i;
1012   fwprop_init ();
1013
1014   /* Go through all the uses.  update_df will create new ones at the
1015      end, and we'll go through them as well.  */
1016   df_set_flags (DF_DEFER_INSN_RESCAN);
1017
1018   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1019     {
1020       struct df_ref *use = DF_USES_GET (i);
1021       if (use)
1022         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1023             && DF_REF_BB (use)->loop_father != NULL)
1024           forward_propagate_into (use);
1025     }
1026
1027   fwprop_done ();
1028
1029   return 0;
1030 }
1031
1032 struct tree_opt_pass pass_rtl_fwprop_addr =
1033 {
1034   "fwprop2",                            /* name */
1035   gate_fwprop,                          /* gate */
1036   fwprop_addr,                          /* execute */
1037   NULL,                                 /* sub */
1038   NULL,                                 /* next */
1039   0,                                    /* static_pass_number */
1040   TV_FWPROP,                            /* tv_id */
1041   0,                                    /* properties_required */
1042   0,                                    /* properties_provided */
1043   0,                                    /* properties_destroyed */
1044   0,                                    /* todo_flags_start */
1045   TODO_df_finish |
1046   TODO_dump_func,                       /* todo_flags_finish */
1047   0                                     /* letter */
1048 };