gcc/gcse.c

   1 /* Partial redundancy elimination / Hoisting for RTL.
   2    Copyright (C) 1997-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* TODO
  21    - reordering of memory allocation and freeing to be more space efficient
  22    - calc rough register pressure information and use the info to drive all
  23      kinds of code motion (including code hoisting) in a unified way.
  24 */
  25
  26 /* References searched while implementing this.
  27
  28    Compilers Principles, Techniques and Tools
  29    Aho, Sethi, Ullman
  30    Addison-Wesley, 1988
  31
  32    Global Optimization by Suppression of Partial Redundancies
  33    E. Morel, C. Renvoise
  34    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  35
  36    A Portable Machine-Independent Global Optimizer - Design and Measurements
  37    Frederick Chow
  38    Stanford Ph.D. thesis, Dec. 1983
  39
  40    A Fast Algorithm for Code Movement Optimization
  41    D.M. Dhamdhere
  42    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  43
  44    A Solution to a Problem with Morel and Renvoise's
  45    Global Optimization by Suppression of Partial Redundancies
  46    K-H Drechsler, M.P. Stadel
  47    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  48
  49    Practical Adaptation of the Global Optimization
  50    Algorithm of Morel and Renvoise
  51    D.M. Dhamdhere
  52    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  53
  54    Efficiently Computing Static Single Assignment Form and the Control
  55    Dependence Graph
  56    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  57    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  58
  59    Lazy Code Motion
  60    J. Knoop, O. Ruthing, B. Steffen
  61    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  62
  63    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  64    Time for Reducible Flow Control
  65    Thomas Ball
  66    ACM Letters on Programming Languages and Systems,
  67    Vol. 2, Num. 1-4, Mar-Dec 1993
  68
  69    An Efficient Representation for Sparse Sets
  70    Preston Briggs, Linda Torczon
  71    ACM Letters on Programming Languages and Systems,
  72    Vol. 2, Num. 1-4, Mar-Dec 1993
  73
  74    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  75    K-H Drechsler, M.P. Stadel
  76    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  77
  78    Partial Dead Code Elimination
  79    J. Knoop, O. Ruthing, B. Steffen
  80    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  81
  82    Effective Partial Redundancy Elimination
  83    P. Briggs, K.D. Cooper
  84    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  85
  86    The Program Structure Tree: Computing Control Regions in Linear Time
  87    R. Johnson, D. Pearson, K. Pingali
  88    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  89
  90    Optimal Code Motion: Theory and Practice
  91    J. Knoop, O. Ruthing, B. Steffen
  92    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
  93
  94    The power of assignment motion
  95    J. Knoop, O. Ruthing, B. Steffen
  96    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
  97
  98    Global code motion / global value numbering
  99    C. Click
 100    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 101
 102    Value Driven Redundancy Elimination
 103    L.T. Simpson
 104    Rice University Ph.D. thesis, Apr. 1996
 105
 106    Value Numbering
 107    L.T. Simpson
 108    Massively Scalar Compiler Project, Rice University, Sep. 1996
 109
 110    High Performance Compilers for Parallel Computing
 111    Michael Wolfe
 112    Addison-Wesley, 1996
 113
 114    Advanced Compiler Design and Implementation
 115    Steven Muchnick
 116    Morgan Kaufmann, 1997
 117
 118    Building an Optimizing Compiler
 119    Robert Morgan
 120    Digital Press, 1998
 121
 122    People wishing to speed up the code here should read:
 123      Elimination Algorithms for Data Flow Analysis
 124      B.G. Ryder, M.C. Paull
 125      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 126
 127      How to Analyze Large Programs Efficiently and Informatively
 128      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 129      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 130
 131    People wishing to do something different can find various possibilities
 132    in the above papers and elsewhere.
 133 */
 134
 135 #include "config.h"
 136 #include "system.h"
 137 #include "coretypes.h"
 138 #include "backend.h"
 139 #include "target.h"
 140 #include "rtl.h"
 141 #include "tree.h"
 142 #include "predict.h"
 143 #include "df.h"
 144 #include "memmodel.h"
 145 #include "tm_p.h"
 146 #include "insn-config.h"
 147 #include "print-rtl.h"
 148 #include "regs.h"
 149 #include "ira.h"
 150 #include "recog.h"
 151 #include "diagnostic-core.h"
 152 #include "cfgrtl.h"
 153 #include "cfganal.h"
 154 #include "lcm.h"
 155 #include "cfgcleanup.h"
 156 #include "expr.h"
 157 #include "intl.h"
 158 #include "tree-pass.h"
 159 #include "dbgcnt.h"
 160 #include "gcse.h"
 161 #include "gcse-common.h"
 162 #include "function-abi.h"
 163
 164 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 165    are a superset of those done by classic GCSE.
 166
 167    Two passes of copy/constant propagation are done around PRE or hoisting
 168    because the first one enables more GCSE and the second one helps to clean
 169    up the copies that PRE and HOIST create.  This is needed more for PRE than
 170    for HOIST because code hoisting will try to use an existing register
 171    containing the common subexpression rather than create a new one.  This is
 172    harder to do for PRE because of the code motion (which HOIST doesn't do).
 173
 174    Expressions we are interested in GCSE-ing are of the form
 175    (set (pseudo-reg) (expression)).
 176    Function want_to_gcse_p says what these are.
 177
 178    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 179    This allows PRE to hoist expressions that are expressed in multiple insns,
 180    such as complex address calculations (e.g. for PIC code, or loads with a
 181    high part and a low part).
 182
 183    PRE handles moving invariant expressions out of loops (by treating them as
 184    partially redundant).
 185
 186    **********************
 187
 188    We used to support multiple passes but there are diminishing returns in
 189    doing so.  The first pass usually makes 90% of the changes that are doable.
 190    A second pass can make a few more changes made possible by the first pass.
 191    Experiments show any further passes don't make enough changes to justify
 192    the expense.
 193
 194    A study of spec92 using an unlimited number of passes:
 195    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 196    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 197    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 198
 199    It was found doing copy propagation between each pass enables further
 200    substitutions.
 201
 202    This study was done before expressions in REG_EQUAL notes were added as
 203    candidate expressions for optimization, and before the GIMPLE optimizers
 204    were added.  Probably, multiple passes is even less efficient now than
 205    at the time when the study was conducted.
 206
 207    PRE is quite expensive in complicated functions because the DFA can take
 208    a while to converge.  Hence we only perform one pass.
 209
 210    **********************
 211
 212    The steps for PRE are:
 213
 214    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 215
 216    2) Perform the data flow analysis for PRE.
 217
 218    3) Delete the redundant instructions
 219
 220    4) Insert the required copies [if any] that make the partially
 221       redundant instructions fully redundant.
 222
 223    5) For other reaching expressions, insert an instruction to copy the value
 224       to a newly created pseudo that will reach the redundant instruction.
 225
 226    The deletion is done first so that when we do insertions we
 227    know which pseudo reg to use.
 228
 229    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 230    argue it is not.  The number of iterations for the algorithm to converge
 231    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 232
 233    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 234    we create.  To make an expression reach the place where it's redundant,
 235    the result of the expression is copied to a new register, and the redundant
 236    expression is deleted by replacing it with this new register.  Classic GCSE
 237    doesn't have this problem as much as it computes the reaching defs of
 238    each register in each block and thus can try to use an existing
 239    register.  */
 240 \f
 241 /* GCSE global vars.  */
 242
 243 struct target_gcse default_target_gcse;
 244 #if SWITCHABLE_TARGET
 245 struct target_gcse *this_target_gcse = &default_target_gcse;
 246 #endif
 247
 248 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 249 int flag_rerun_cse_after_global_opts;
 250
 251 /* An obstack for our working variables.  */
 252 static struct obstack gcse_obstack;
 253
 254 /* Hash table of expressions.  */
 255
 256 struct gcse_expr
 257 {
 258   /* The expression.  */
 259   rtx expr;
 260   /* Index in the available expression bitmaps.  */
 261   int bitmap_index;
 262   /* Next entry with the same hash.  */
 263   struct gcse_expr *next_same_hash;
 264   /* List of anticipatable occurrences in basic blocks in the function.
 265      An "anticipatable occurrence" is one that is the first occurrence in the
 266      basic block, the operands are not modified in the basic block prior
 267      to the occurrence and the output is not used between the start of
 268      the block and the occurrence.  */
 269   struct gcse_occr *antic_occr;
 270   /* List of available occurrence in basic blocks in the function.
 271      An "available occurrence" is one that is the last occurrence in the
 272      basic block and the operands are not modified by following statements in
 273      the basic block [including this insn].  */
 274   struct gcse_occr *avail_occr;
 275   /* Non-null if the computation is PRE redundant.
 276      The value is the newly created pseudo-reg to record a copy of the
 277      expression in all the places that reach the redundant copy.  */
 278   rtx reaching_reg;
 279   /* Maximum distance in instructions this expression can travel.
 280      We avoid moving simple expressions for more than a few instructions
 281      to keep register pressure under control.
 282      A value of "0" removes restrictions on how far the expression can
 283      travel.  */
 284   HOST_WIDE_INT max_distance;
 285 };
 286
 287 /* Occurrence of an expression.
 288    There is one per basic block.  If a pattern appears more than once the
 289    last appearance is used [or first for anticipatable expressions].  */
 290
 291 struct gcse_occr
 292 {
 293   /* Next occurrence of this expression.  */
 294   struct gcse_occr *next;
 295   /* The insn that computes the expression.  */
 296   rtx_insn *insn;
 297   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 298   char deleted_p;
 299   /* Nonzero if this [available] occurrence has been copied to
 300      reaching_reg.  */
 301   /* ??? This is mutually exclusive with deleted_p, so they could share
 302      the same byte.  */
 303   char copied_p;
 304 };
 305
 306 typedef struct gcse_occr *occr_t;
 307
 308 /* Expression hash tables.
 309    Each hash table is an array of buckets.
 310    ??? It is known that if it were an array of entries, structure elements
 311    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 312    not clear whether in the final analysis a sufficient amount of memory would
 313    be saved as the size of the available expression bitmaps would be larger
 314    [one could build a mapping table without holes afterwards though].
 315    Someday I'll perform the computation and figure it out.  */
 316
 317 struct gcse_hash_table_d
 318 {
 319   /* The table itself.
 320      This is an array of `expr_hash_table_size' elements.  */
 321   struct gcse_expr **table;
 322
 323   /* Size of the hash table, in elements.  */
 324   unsigned int size;
 325
 326   /* Number of hash table elements.  */
 327   unsigned int n_elems;
 328 };
 329
 330 /* Expression hash table.  */
 331 static struct gcse_hash_table_d expr_hash_table;
 332
 333 /* This is a list of expressions which are MEMs and will be used by load
 334    or store motion.
 335    Load motion tracks MEMs which aren't killed by anything except itself,
 336    i.e. loads and stores to a single location.
 337    We can then allow movement of these MEM refs with a little special
 338    allowance. (all stores copy the same value to the reaching reg used
 339    for the loads).  This means all values used to store into memory must have
 340    no side effects so we can re-issue the setter value.  */
 341
 342 struct ls_expr
 343 {
 344   struct gcse_expr * expr;      /* Gcse expression reference for LM.  */
 345   rtx pattern;                  /* Pattern of this mem.  */
 346   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 347   vec<rtx_insn *> stores;       /* INSN list of stores seen.  */
 348   struct ls_expr * next;        /* Next in the list.  */
 349   int invalid;                  /* Invalid for some reason.  */
 350   int index;                    /* If it maps to a bitmap index.  */
 351   unsigned int hash_index;      /* Index when in a hash table.  */
 352   rtx reaching_reg;             /* Register to use when re-writing.  */
 353 };
 354
 355 /* Head of the list of load/store memory refs.  */
 356 static struct ls_expr * pre_ldst_mems = NULL;
 357
 358 struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
 359 {
 360   typedef value_type compare_type;
 361   static inline hashval_t hash (const ls_expr *);
 362   static inline bool equal (const ls_expr *, const ls_expr *);
 363 };
 364
 365 /* Hashtable helpers.  */
 366 inline hashval_t
 367 pre_ldst_expr_hasher::hash (const ls_expr *x)
 368 {
 369   int do_not_record_p = 0;
 370   return
 371     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
 372 }
 373
 374 static int expr_equiv_p (const_rtx, const_rtx);
 375
 376 inline bool
 377 pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
 378                              const ls_expr *ptr2)
 379 {
 380   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
 381 }
 382
 383 /* Hashtable for the load/store memory refs.  */
 384 static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
 385
 386 /* Bitmap containing one bit for each register in the program.
 387    Used when performing GCSE to track which registers have been set since
 388    the start of the basic block.  */
 389 static regset reg_set_bitmap;
 390
 391 /* Array, indexed by basic block number for a list of insns which modify
 392    memory within that block.  */
 393 static vec<rtx_insn *> *modify_mem_list;
 394 static bitmap modify_mem_list_set;
 395
 396 /* This array parallels modify_mem_list, except that it stores MEMs
 397    being set and their canonicalized memory addresses.  */
 398 static vec<modify_pair> *canon_modify_mem_list;
 399
 400 /* Bitmap indexed by block numbers to record which blocks contain
 401    function calls.  */
 402 static bitmap blocks_with_calls;
 403
 404 /* Various variables for statistics gathering.  */
 405
 406 /* Memory used in a pass.
 407    This isn't intended to be absolutely precise.  Its intent is only
 408    to keep an eye on memory usage.  */
 409 static int bytes_used;
 410
 411 /* GCSE substitutions made.  */
 412 static int gcse_subst_count;
 413 /* Number of copy instructions created.  */
 414 static int gcse_create_count;
 415 \f
 416 /* Doing code hoisting.  */
 417 static bool doing_code_hoisting_p = false;
 418 \f
 419 /* For available exprs */
 420 static sbitmap *ae_kill;
 421 \f
 422 /* Data stored for each basic block.  */
 423 struct bb_data
 424 {
 425   /* Maximal register pressure inside basic block for given register class
 426      (defined only for the pressure classes).  */
 427   int max_reg_pressure[N_REG_CLASSES];
 428   /* Recorded register pressure of basic block before trying to hoist
 429      an expression.  Will be used to restore the register pressure
 430      if the expression should not be hoisted.  */
 431   int old_pressure;
 432   /* Recorded register live_in info of basic block during code hoisting
 433      process.  BACKUP is used to record live_in info before trying to
 434      hoist an expression, and will be used to restore LIVE_IN if the
 435      expression should not be hoisted.  */
 436   bitmap live_in, backup;
 437 };
 438
 439 #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
 440
 441 static basic_block curr_bb;
 442
 443 /* Current register pressure for each pressure class.  */
 444 static int curr_reg_pressure[N_REG_CLASSES];
 445 \f
 446
 447 static void compute_can_copy (void);
 448 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 449 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 450 static void *gcse_alloc (unsigned long);
 451 static void alloc_gcse_mem (void);
 452 static void free_gcse_mem (void);
 453 static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
 454 static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
 455 static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
 456 static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
 457 static int oprs_unchanged_p (const_rtx, const rtx_insn *, int);
 458 static int oprs_anticipatable_p (const_rtx, const rtx_insn *);
 459 static int oprs_available_p (const_rtx, const rtx_insn *);
 460 static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, int, int,
 461                                   HOST_WIDE_INT, struct gcse_hash_table_d *);
 462 static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
 463 static void record_last_reg_set_info (rtx_insn *, int);
 464 static void record_last_mem_set_info (rtx_insn *);
 465 static void record_last_set_info (rtx, const_rtx, void *);
 466 static void compute_hash_table (struct gcse_hash_table_d *);
 467 static void alloc_hash_table (struct gcse_hash_table_d *);
 468 static void free_hash_table (struct gcse_hash_table_d *);
 469 static void compute_hash_table_work (struct gcse_hash_table_d *);
 470 static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
 471 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 472                                       struct gcse_hash_table_d *);
 473 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 474 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 475 static void alloc_pre_mem (int, int);
 476 static void free_pre_mem (void);
 477 static struct edge_list *compute_pre_data (void);
 478 static int pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
 479                                     basic_block);
 480 static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
 481 static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
 482 static void pre_insert_copies (void);
 483 static int pre_delete (void);
 484 static int pre_gcse (struct edge_list *);
 485 static int one_pre_gcse_pass (void);
 486 static void add_label_notes (rtx, rtx_insn *);
 487 static void alloc_code_hoist_mem (int, int);
 488 static void free_code_hoist_mem (void);
 489 static void compute_code_hoist_vbeinout (void);
 490 static void compute_code_hoist_data (void);
 491 static int should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
 492                                      basic_block,
 493                                      sbitmap, HOST_WIDE_INT, int *,
 494                                      enum reg_class,
 495                                      int *, bitmap, rtx_insn *);
 496 static int hoist_code (void);
 497 static enum reg_class get_regno_pressure_class (int regno, int *nregs);
 498 static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
 499 static int one_code_hoisting_pass (void);
 500 static rtx_insn *process_insert_insn (struct gcse_expr *);
 501 static int pre_edge_insert (struct edge_list *, struct gcse_expr **);
 502 static int pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
 503                                          basic_block, char *);
 504 static struct ls_expr * ldst_entry (rtx);
 505 static void free_ldst_entry (struct ls_expr *);
 506 static void free_ld_motion_mems (void);
 507 static void print_ldst_list (FILE *);
 508 static struct ls_expr * find_rtx_in_ldst (rtx);
 509 static int simple_mem (const_rtx);
 510 static void invalidate_any_buried_refs (rtx);
 511 static void compute_ld_motion_mems (void);
 512 static void trim_ld_motion_mems (void);
 513 static void update_ld_motion_stores (struct gcse_expr *);
 514 static void clear_modify_mem_tables (void);
 515 static void free_modify_mem_tables (void);
 516
 517 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 518 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 519
 520 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 521 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 522
 523 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 524 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 525
 526 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 527 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 528 \f
 529 /* Misc. utilities.  */
 530
 531 #define can_copy \
 532   (this_target_gcse->x_can_copy)
 533 #define can_copy_init_p \
 534   (this_target_gcse->x_can_copy_init_p)
 535
 536 /* Compute which modes support reg/reg copy operations.  */
 537
 538 static void
 539 compute_can_copy (void)
 540 {
 541   int i;
 542 #ifndef AVOID_CCMODE_COPIES
 543   rtx reg;
 544  rtx_insn *insn;
 545 #endif
 546   memset (can_copy, 0, NUM_MACHINE_MODES);
 547
 548   start_sequence ();
 549   for (i = 0; i < NUM_MACHINE_MODES; i++)
 550     if (GET_MODE_CLASS (i) == MODE_CC)
 551       {
 552 #ifdef AVOID_CCMODE_COPIES
 553         can_copy[i] = 0;
 554 #else
 555         reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 556         insn = emit_insn (gen_rtx_SET (reg, reg));
 557         if (recog (PATTERN (insn), insn, NULL) >= 0)
 558           can_copy[i] = 1;
 559 #endif
 560       }
 561     else
 562       can_copy[i] = 1;
 563
 564   end_sequence ();
 565 }
 566
 567 /* Returns whether the mode supports reg/reg copy operations.  */
 568
 569 bool
 570 can_copy_p (machine_mode mode)
 571 {
 572   if (! can_copy_init_p)
 573     {
 574       compute_can_copy ();
 575       can_copy_init_p = true;
 576     }
 577
 578   return can_copy[mode] != 0;
 579 }
 580 \f
 581 /* Cover function to xmalloc to record bytes allocated.  */
 582
 583 static void *
 584 gmalloc (size_t size)
 585 {
 586   bytes_used += size;
 587   return xmalloc (size);
 588 }
 589
 590 /* Cover function to xcalloc to record bytes allocated.  */
 591
 592 static void *
 593 gcalloc (size_t nelem, size_t elsize)
 594 {
 595   bytes_used += nelem * elsize;
 596   return xcalloc (nelem, elsize);
 597 }
 598
 599 /* Cover function to obstack_alloc.  */
 600
 601 static void *
 602 gcse_alloc (unsigned long size)
 603 {
 604   bytes_used += size;
 605   return obstack_alloc (&gcse_obstack, size);
 606 }
 607
 608 /* Allocate memory for the reg/memory set tracking tables.
 609    This is called at the start of each pass.  */
 610
 611 static void
 612 alloc_gcse_mem (void)
 613 {
 614   /* Allocate vars to track sets of regs.  */
 615   reg_set_bitmap = ALLOC_REG_SET (NULL);
 616
 617   /* Allocate array to keep a list of insns which modify memory in each
 618      basic block.  The two typedefs are needed to work around the
 619      pre-processor limitation with template types in macro arguments.  */
 620   typedef vec<rtx_insn *> vec_rtx_heap;
 621   typedef vec<modify_pair> vec_modify_pair_heap;
 622   modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
 623   canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
 624                                     last_basic_block_for_fn (cfun));
 625   modify_mem_list_set = BITMAP_ALLOC (NULL);
 626   blocks_with_calls = BITMAP_ALLOC (NULL);
 627 }
 628
 629 /* Free memory allocated by alloc_gcse_mem.  */
 630
 631 static void
 632 free_gcse_mem (void)
 633 {
 634   FREE_REG_SET (reg_set_bitmap);
 635
 636   free_modify_mem_tables ();
 637   BITMAP_FREE (modify_mem_list_set);
 638   BITMAP_FREE (blocks_with_calls);
 639 }
 640 \f
 641 /* Compute the local properties of each recorded expression.
 642
 643    Local properties are those that are defined by the block, irrespective of
 644    other blocks.
 645
 646    An expression is transparent in a block if its operands are not modified
 647    in the block.
 648
 649    An expression is computed (locally available) in a block if it is computed
 650    at least once and expression would contain the same value if the
 651    computation was moved to the end of the block.
 652
 653    An expression is locally anticipatable in a block if it is computed at
 654    least once and expression would contain the same value if the computation
 655    was moved to the beginning of the block.
 656
 657    We call this routine for pre and code hoisting.  They all compute
 658    basically the same information and thus can easily share this code.
 659
 660    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 661    properties.  If NULL, then it is not necessary to compute or record that
 662    particular property.
 663
 664    TABLE controls which hash table to look at.  */
 665
 666 static void
 667 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 668                           struct gcse_hash_table_d *table)
 669 {
 670   unsigned int i;
 671
 672   /* Initialize any bitmaps that were passed in.  */
 673   if (transp)
 674     {
 675       bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
 676     }
 677
 678   if (comp)
 679     bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
 680   if (antloc)
 681     bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
 682
 683   for (i = 0; i < table->size; i++)
 684     {
 685       struct gcse_expr *expr;
 686
 687       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 688         {
 689           int indx = expr->bitmap_index;
 690           struct gcse_occr *occr;
 691
 692           /* The expression is transparent in this block if it is not killed.
 693              We start by assuming all are transparent [none are killed], and
 694              then reset the bits for those that are.  */
 695           if (transp)
 696             compute_transp (expr->expr, indx, transp,
 697                             blocks_with_calls,
 698                             modify_mem_list_set,
 699                             canon_modify_mem_list);
 700
 701           /* The occurrences recorded in antic_occr are exactly those that
 702              we want to set to nonzero in ANTLOC.  */
 703           if (antloc)
 704             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 705               {
 706                 bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 707
 708                 /* While we're scanning the table, this is a good place to
 709                    initialize this.  */
 710                 occr->deleted_p = 0;
 711               }
 712
 713           /* The occurrences recorded in avail_occr are exactly those that
 714              we want to set to nonzero in COMP.  */
 715           if (comp)
 716             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 717               {
 718                 bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 719
 720                 /* While we're scanning the table, this is a good place to
 721                    initialize this.  */
 722                 occr->copied_p = 0;
 723               }
 724
 725           /* While we're scanning the table, this is a good place to
 726              initialize this.  */
 727           expr->reaching_reg = 0;
 728         }
 729     }
 730 }
 731 \f
 732 /* Hash table support.  */
 733
 734 struct reg_avail_info
 735 {
 736   basic_block last_bb;
 737   int first_set;
 738   int last_set;
 739 };
 740
 741 static struct reg_avail_info *reg_avail_info;
 742 static basic_block current_bb;
 743
 744 /* See whether X, the source of a set, is something we want to consider for
 745    GCSE.  */
 746
 747 static int
 748 want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
 749 {
 750 #ifdef STACK_REGS
 751   /* On register stack architectures, don't GCSE constants from the
 752      constant pool, as the benefits are often swamped by the overhead
 753      of shuffling the register stack between basic blocks.  */
 754   if (IS_STACK_MODE (GET_MODE (x)))
 755     x = avoid_constant_pool_reference (x);
 756 #endif
 757
 758   /* GCSE'ing constants:
 759
 760      We do not specifically distinguish between constant and non-constant
 761      expressions in PRE and Hoist.  We use set_src_cost below to limit
 762      the maximum distance simple expressions can travel.
 763
 764      Nevertheless, constants are much easier to GCSE, and, hence,
 765      it is easy to overdo the optimizations.  Usually, excessive PRE and
 766      Hoisting of constant leads to increased register pressure.
 767
 768      RA can deal with this by rematerialing some of the constants.
 769      Therefore, it is important that the back-end generates sets of constants
 770      in a way that allows reload rematerialize them under high register
 771      pressure, i.e., a pseudo register with REG_EQUAL to constant
 772      is set only once.  Failing to do so will result in IRA/reload
 773      spilling such constants under high register pressure instead of
 774      rematerializing them.  */
 775
 776   switch (GET_CODE (x))
 777     {
 778     case REG:
 779     case SUBREG:
 780     case CALL:
 781       return 0;
 782
 783     CASE_CONST_ANY:
 784       if (!doing_code_hoisting_p)
 785         /* Do not PRE constants.  */
 786         return 0;
 787
 788       /* FALLTHRU */
 789
 790     default:
 791       if (doing_code_hoisting_p)
 792         /* PRE doesn't implement max_distance restriction.  */
 793         {
 794           int cost;
 795           HOST_WIDE_INT max_distance;
 796
 797           gcc_assert (!optimize_function_for_speed_p (cfun)
 798                       && optimize_function_for_size_p (cfun));
 799           cost = set_src_cost (x, mode, 0);
 800
 801           if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
 802             {
 803               max_distance
 804                 = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
 805               if (max_distance == 0)
 806                 return 0;
 807
 808               gcc_assert (max_distance > 0);
 809             }
 810           else
 811             max_distance = 0;
 812
 813           if (max_distance_ptr)
 814             *max_distance_ptr = max_distance;
 815         }
 816
 817       return can_assign_to_reg_without_clobbers_p (x, mode);
 818     }
 819 }
 820
 821 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 822
 823 static GTY(()) rtx_insn *test_insn;
 824
 825 /* Return true if we can assign X to a pseudo register of mode MODE
 826    such that the resulting insn does not result in clobbering a hard
 827    register as a side-effect.
 828
 829    Additionally, if the target requires it, check that the resulting insn
 830    can be copied.  If it cannot, this means that X is special and probably
 831    has hidden side-effects we don't want to mess with.
 832
 833    This function is typically used by code motion passes, to verify
 834    that it is safe to insert an insn without worrying about clobbering
 835    maybe live hard regs.  */
 836
 837 bool
 838 can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
 839 {
 840   int num_clobbers = 0;
 841   int icode;
 842   bool can_assign = false;
 843
 844   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 845   if (general_operand (x, mode))
 846     return 1;
 847   else if (GET_MODE (x) == VOIDmode)
 848     return 0;
 849
 850   /* Otherwise, check if we can make a valid insn from it.  First initialize
 851      our test insn if we haven't already.  */
 852   if (test_insn == 0)
 853     {
 854       test_insn
 855         = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
 856                                                    FIRST_PSEUDO_REGISTER * 2),
 857                                       const0_rtx));
 858       SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
 859       INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
 860     }
 861
 862   /* Now make an insn like the one we would make when GCSE'ing and see if
 863      valid.  */
 864   PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
 865   SET_SRC (PATTERN (test_insn)) = x;
 866
 867   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 868
 869   /* If the test insn is valid and doesn't need clobbers, and the target also
 870      has no objections, we're good.  */
 871   if (icode >= 0
 872       && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
 873       && ! (targetm.cannot_copy_insn_p
 874             && targetm.cannot_copy_insn_p (test_insn)))
 875     can_assign = true;
 876
 877   /* Make sure test_insn doesn't have any pointers into GC space.  */
 878   SET_SRC (PATTERN (test_insn)) = NULL_RTX;
 879
 880   return can_assign;
 881 }
 882
 883 /* Return nonzero if the operands of expression X are unchanged from the
 884    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 885    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 886
 887 static int
 888 oprs_unchanged_p (const_rtx x, const rtx_insn *insn, int avail_p)
 889 {
 890   int i, j;
 891   enum rtx_code code;
 892   const char *fmt;
 893
 894   if (x == 0)
 895     return 1;
 896
 897   code = GET_CODE (x);
 898   switch (code)
 899     {
 900     case REG:
 901       {
 902         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 903
 904         if (info->last_bb != current_bb)
 905           return 1;
 906         if (avail_p)
 907           return info->last_set < DF_INSN_LUID (insn);
 908         else
 909           return info->first_set >= DF_INSN_LUID (insn);
 910       }
 911
 912     case MEM:
 913       if (! flag_gcse_lm
 914           || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 915                                      x, avail_p))
 916         return 0;
 917       else
 918         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 919
 920     case PRE_DEC:
 921     case PRE_INC:
 922     case POST_DEC:
 923     case POST_INC:
 924     case PRE_MODIFY:
 925     case POST_MODIFY:
 926       return 0;
 927
 928     case PC:
 929     case CONST:
 930     CASE_CONST_ANY:
 931     case SYMBOL_REF:
 932     case LABEL_REF:
 933     case ADDR_VEC:
 934     case ADDR_DIFF_VEC:
 935       return 1;
 936
 937     default:
 938       break;
 939     }
 940
 941   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 942     {
 943       if (fmt[i] == 'e')
 944         {
 945           /* If we are about to do the last recursive call needed at this
 946              level, change it into iteration.  This function is called enough
 947              to be worth it.  */
 948           if (i == 0)
 949             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 950
 951           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 952             return 0;
 953         }
 954       else if (fmt[i] == 'E')
 955         for (j = 0; j < XVECLEN (x, i); j++)
 956           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 957             return 0;
 958     }
 959
 960   return 1;
 961 }
 962
 963 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
 964
 965 struct mem_conflict_info
 966 {
 967   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
 968      see if a memory store conflicts with this memory load.  */
 969   const_rtx mem;
 970
 971   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
 972      references.  */
 973   bool conflict;
 974 };
 975
 976 /* DEST is the output of an instruction.  If it is a memory reference and
 977    possibly conflicts with the load found in DATA, then communicate this
 978    information back through DATA.  */
 979
 980 static void
 981 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 982                           void *data)
 983 {
 984   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
 985
 986   while (GET_CODE (dest) == SUBREG
 987          || GET_CODE (dest) == ZERO_EXTRACT
 988          || GET_CODE (dest) == STRICT_LOW_PART)
 989     dest = XEXP (dest, 0);
 990
 991   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 992      that function calls are assumed to clobber memory, but are handled
 993      elsewhere.  */
 994   if (! MEM_P (dest))
 995     return;
 996
 997   /* If we are setting a MEM in our list of specially recognized MEMs,
 998      don't mark as killed this time.  */
 999   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1000     {
1001       if (!find_rtx_in_ldst (dest))
1002         mci->conflict = true;
1003       return;
1004     }
1005
1006   if (true_dependence (dest, GET_MODE (dest), mci->mem))
1007     mci->conflict = true;
1008 }
1009
1010 /* Return nonzero if the expression in X (a memory reference) is killed
1011    in block BB before or after the insn with the LUID in UID_LIMIT.
1012    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1013    before UID_LIMIT.
1014
1015    To check the entire block, set UID_LIMIT to max_uid + 1 and
1016    AVAIL_P to 0.  */
1017
1018 static int
1019 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1020                         int avail_p)
1021 {
1022   vec<rtx_insn *> list = modify_mem_list[bb->index];
1023   rtx_insn *setter;
1024   unsigned ix;
1025
1026   /* If this is a readonly then we aren't going to be changing it.  */
1027   if (MEM_READONLY_P (x))
1028     return 0;
1029
1030   FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1031     {
1032       struct mem_conflict_info mci;
1033
1034       /* Ignore entries in the list that do not apply.  */
1035       if ((avail_p
1036            && DF_INSN_LUID (setter) < uid_limit)
1037           || (! avail_p
1038               && DF_INSN_LUID (setter) > uid_limit))
1039         continue;
1040
1041       /* If SETTER is a call everything is clobbered.  Note that calls
1042          to pure functions are never put on the list, so we need not
1043          worry about them.  */
1044       if (CALL_P (setter))
1045         return 1;
1046
1047       /* SETTER must be an INSN of some kind that sets memory.  Call
1048          note_stores to examine each hunk of memory that is modified.  */
1049       mci.mem = x;
1050       mci.conflict = false;
1051       note_stores (setter, mems_conflict_for_gcse_p, &mci);
1052       if (mci.conflict)
1053         return 1;
1054     }
1055   return 0;
1056 }
1057
1058 /* Return nonzero if the operands of expression X are unchanged from
1059    the start of INSN's basic block up to but not including INSN.  */
1060
1061 static int
1062 oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1063 {
1064   return oprs_unchanged_p (x, insn, 0);
1065 }
1066
1067 /* Return nonzero if the operands of expression X are unchanged from
1068    INSN to the end of INSN's basic block.  */
1069
1070 static int
1071 oprs_available_p (const_rtx x, const rtx_insn *insn)
1072 {
1073   return oprs_unchanged_p (x, insn, 1);
1074 }
1075
1076 /* Hash expression X.
1077
1078    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1079    indicating if a volatile operand is found or if the expression contains
1080    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1081    the current size of the hash table to be probed.  */
1082
1083 static unsigned int
1084 hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
1085            int hash_table_size)
1086 {
1087   unsigned int hash;
1088
1089   *do_not_record_p = 0;
1090
1091   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1092   return hash % hash_table_size;
1093 }
1094
1095 /* Return nonzero if exp1 is equivalent to exp2.  */
1096
1097 static int
1098 expr_equiv_p (const_rtx x, const_rtx y)
1099 {
1100   return exp_equiv_p (x, y, 0, true);
1101 }
1102
1103 /* Insert expression X in INSN in the hash TABLE.
1104    If it is already present, record it as the last occurrence in INSN's
1105    basic block.
1106
1107    MODE is the mode of the value X is being stored into.
1108    It is only used if X is a CONST_INT.
1109
1110    ANTIC_P is nonzero if X is an anticipatable expression.
1111    AVAIL_P is nonzero if X is an available expression.
1112
1113    MAX_DISTANCE is the maximum distance in instructions this expression can
1114    be moved.  */
1115
1116 static void
1117 insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
1118                       int antic_p,
1119                       int avail_p, HOST_WIDE_INT max_distance,
1120                       struct gcse_hash_table_d *table)
1121 {
1122   int found, do_not_record_p;
1123   unsigned int hash;
1124   struct gcse_expr *cur_expr, *last_expr = NULL;
1125   struct gcse_occr *antic_occr, *avail_occr;
1126
1127   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1128
1129   /* Do not insert expression in table if it contains volatile operands,
1130      or if hash_expr determines the expression is something we don't want
1131      to or can't handle.  */
1132   if (do_not_record_p)
1133     return;
1134
1135   cur_expr = table->table[hash];
1136   found = 0;
1137
1138   while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
1139     {
1140       /* If the expression isn't found, save a pointer to the end of
1141          the list.  */
1142       last_expr = cur_expr;
1143       cur_expr = cur_expr->next_same_hash;
1144     }
1145
1146   if (! found)
1147     {
1148       cur_expr = GOBNEW (struct gcse_expr);
1149       bytes_used += sizeof (struct gcse_expr);
1150       if (table->table[hash] == NULL)
1151         /* This is the first pattern that hashed to this index.  */
1152         table->table[hash] = cur_expr;
1153       else
1154         /* Add EXPR to end of this hash chain.  */
1155         last_expr->next_same_hash = cur_expr;
1156
1157       /* Set the fields of the expr element.  */
1158       cur_expr->expr = x;
1159       cur_expr->bitmap_index = table->n_elems++;
1160       cur_expr->next_same_hash = NULL;
1161       cur_expr->antic_occr = NULL;
1162       cur_expr->avail_occr = NULL;
1163       gcc_assert (max_distance >= 0);
1164       cur_expr->max_distance = max_distance;
1165     }
1166   else
1167     gcc_assert (cur_expr->max_distance == max_distance);
1168
1169   /* Now record the occurrence(s).  */
1170   if (antic_p)
1171     {
1172       antic_occr = cur_expr->antic_occr;
1173
1174       if (antic_occr
1175           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1176         antic_occr = NULL;
1177
1178       if (antic_occr)
1179         /* Found another instance of the expression in the same basic block.
1180            Prefer the currently recorded one.  We want the first one in the
1181            block and the block is scanned from start to end.  */
1182         ; /* nothing to do */
1183       else
1184         {
1185           /* First occurrence of this expression in this basic block.  */
1186           antic_occr = GOBNEW (struct gcse_occr);
1187           bytes_used += sizeof (struct gcse_occr);
1188           antic_occr->insn = insn;
1189           antic_occr->next = cur_expr->antic_occr;
1190           antic_occr->deleted_p = 0;
1191           cur_expr->antic_occr = antic_occr;
1192         }
1193     }
1194
1195   if (avail_p)
1196     {
1197       avail_occr = cur_expr->avail_occr;
1198
1199       if (avail_occr
1200           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1201         {
1202           /* Found another instance of the expression in the same basic block.
1203              Prefer this occurrence to the currently recorded one.  We want
1204              the last one in the block and the block is scanned from start
1205              to end.  */
1206           avail_occr->insn = insn;
1207         }
1208       else
1209         {
1210           /* First occurrence of this expression in this basic block.  */
1211           avail_occr = GOBNEW (struct gcse_occr);
1212           bytes_used += sizeof (struct gcse_occr);
1213           avail_occr->insn = insn;
1214           avail_occr->next = cur_expr->avail_occr;
1215           avail_occr->deleted_p = 0;
1216           cur_expr->avail_occr = avail_occr;
1217         }
1218     }
1219 }
1220
1221 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1222
1223 static void
1224 hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1225 {
1226   rtx src = SET_SRC (set);
1227   rtx dest = SET_DEST (set);
1228   rtx note;
1229
1230   if (GET_CODE (src) == CALL)
1231     hash_scan_call (src, insn, table);
1232
1233   else if (REG_P (dest))
1234     {
1235       unsigned int regno = REGNO (dest);
1236       HOST_WIDE_INT max_distance = 0;
1237
1238       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1239
1240          This allows us to do a single GCSE pass and still eliminate
1241          redundant constants, addresses or other expressions that are
1242          constructed with multiple instructions.
1243
1244          However, keep the original SRC if INSN is a simple reg-reg move.
1245          In this case, there will almost always be a REG_EQUAL note on the
1246          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1247          for INSN, we miss copy propagation opportunities and we perform the
1248          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1249          do more than one PRE GCSE pass.
1250
1251          Note that this does not impede profitable constant propagations.  We
1252          "look through" reg-reg sets in lookup_avail_set.  */
1253       note = find_reg_equal_equiv_note (insn);
1254       if (note != 0
1255           && REG_NOTE_KIND (note) == REG_EQUAL
1256           && !REG_P (src)
1257           && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
1258         src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
1259
1260       /* Only record sets of pseudo-regs in the hash table.  */
1261       if (regno >= FIRST_PSEUDO_REGISTER
1262           /* Don't GCSE something if we can't do a reg/reg copy.  */
1263           && can_copy_p (GET_MODE (dest))
1264           /* GCSE commonly inserts instruction after the insn.  We can't
1265              do that easily for EH edges so disable GCSE on these for now.  */
1266           /* ??? We can now easily create new EH landing pads at the
1267              gimple level, for splitting edges; there's no reason we
1268              can't do the same thing at the rtl level.  */
1269           && !can_throw_internal (insn)
1270           /* Is SET_SRC something we want to gcse?  */
1271           && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
1272           /* Don't CSE a nop.  */
1273           && ! set_noop_p (set)
1274           /* Don't GCSE if it has attached REG_EQUIV note.
1275              At this point this only function parameters should have
1276              REG_EQUIV notes and if the argument slot is used somewhere
1277              explicitly, it means address of parameter has been taken,
1278              so we should not extend the lifetime of the pseudo.  */
1279           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1280         {
1281           /* An expression is not anticipatable if its operands are
1282              modified before this insn or if this is not the only SET in
1283              this insn.  The latter condition does not have to mean that
1284              SRC itself is not anticipatable, but we just will not be
1285              able to handle code motion of insns with multiple sets.  */
1286           int antic_p = oprs_anticipatable_p (src, insn)
1287                         && !multiple_sets (insn);
1288           /* An expression is not available if its operands are
1289              subsequently modified, including this insn.  It's also not
1290              available if this is a branch, because we can't insert
1291              a set after the branch.  */
1292           int avail_p = (oprs_available_p (src, insn)
1293                          && ! JUMP_P (insn));
1294
1295           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1296                                 max_distance, table);
1297         }
1298     }
1299   /* In case of store we want to consider the memory value as available in
1300      the REG stored in that memory. This makes it possible to remove
1301      redundant loads from due to stores to the same location.  */
1302   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1303     {
1304       unsigned int regno = REGNO (src);
1305       HOST_WIDE_INT max_distance = 0;
1306
1307       /* Only record sets of pseudo-regs in the hash table.  */
1308       if (regno >= FIRST_PSEUDO_REGISTER
1309           /* Don't GCSE something if we can't do a reg/reg copy.  */
1310           && can_copy_p (GET_MODE (src))
1311           /* GCSE commonly inserts instruction after the insn.  We can't
1312              do that easily for EH edges so disable GCSE on these for now.  */
1313           && !can_throw_internal (insn)
1314           /* Is SET_DEST something we want to gcse?  */
1315           && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
1316           /* Don't CSE a nop.  */
1317           && ! set_noop_p (set)
1318           /* Don't GCSE if it has attached REG_EQUIV note.
1319              At this point this only function parameters should have
1320              REG_EQUIV notes and if the argument slot is used somewhere
1321              explicitly, it means address of parameter has been taken,
1322              so we should not extend the lifetime of the pseudo.  */
1323           && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1324               || ! MEM_P (XEXP (note, 0))))
1325         {
1326           /* Stores are never anticipatable.  */
1327           int antic_p = 0;
1328           /* An expression is not available if its operands are
1329              subsequently modified, including this insn.  It's also not
1330              available if this is a branch, because we can't insert
1331              a set after the branch.  */
1332           int avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
1333
1334           /* Record the memory expression (DEST) in the hash table.  */
1335           insert_expr_in_table (dest, GET_MODE (dest), insn,
1336                                 antic_p, avail_p, max_distance, table);
1337         }
1338     }
1339 }
1340
1341 static void
1342 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1343                    struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1344 {
1345   /* Currently nothing to do.  */
1346 }
1347
1348 static void
1349 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1350                 struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1351 {
1352   /* Currently nothing to do.  */
1353 }
1354
1355 /* Process INSN and add hash table entries as appropriate.  */
1356
1357 static void
1358 hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1359 {
1360   rtx pat = PATTERN (insn);
1361   int i;
1362
1363   /* Pick out the sets of INSN and for other forms of instructions record
1364      what's been modified.  */
1365
1366   if (GET_CODE (pat) == SET)
1367     hash_scan_set (pat, insn, table);
1368
1369   else if (GET_CODE (pat) == CLOBBER)
1370     hash_scan_clobber (pat, insn, table);
1371
1372   else if (GET_CODE (pat) == CALL)
1373     hash_scan_call (pat, insn, table);
1374
1375   else if (GET_CODE (pat) == PARALLEL)
1376     for (i = 0; i < XVECLEN (pat, 0); i++)
1377       {
1378         rtx x = XVECEXP (pat, 0, i);
1379
1380         if (GET_CODE (x) == SET)
1381           hash_scan_set (x, insn, table);
1382         else if (GET_CODE (x) == CLOBBER)
1383           hash_scan_clobber (x, insn, table);
1384         else if (GET_CODE (x) == CALL)
1385           hash_scan_call (x, insn, table);
1386       }
1387 }
1388
1389 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1390
1391 static void
1392 dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1393 {
1394   int i;
1395   /* Flattened out table, so it's printed in proper order.  */
1396   struct gcse_expr **flat_table;
1397   unsigned int *hash_val;
1398   struct gcse_expr *expr;
1399
1400   flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1401   hash_val = XNEWVEC (unsigned int, table->n_elems);
1402
1403   for (i = 0; i < (int) table->size; i++)
1404     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1405       {
1406         flat_table[expr->bitmap_index] = expr;
1407         hash_val[expr->bitmap_index] = i;
1408       }
1409
1410   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1411            name, table->size, table->n_elems);
1412
1413   for (i = 0; i < (int) table->n_elems; i++)
1414     if (flat_table[i] != 0)
1415       {
1416         expr = flat_table[i];
1417         fprintf (file, "Index %d (hash value %d; max distance "
1418                  HOST_WIDE_INT_PRINT_DEC ")\n  ",
1419                  expr->bitmap_index, hash_val[i], expr->max_distance);
1420         print_rtl (file, expr->expr);
1421         fprintf (file, "\n");
1422       }
1423
1424   fprintf (file, "\n");
1425
1426   free (flat_table);
1427   free (hash_val);
1428 }
1429
1430 /* Record register first/last/block set information for REGNO in INSN.
1431
1432    first_set records the first place in the block where the register
1433    is set and is used to compute "anticipatability".
1434
1435    last_set records the last place in the block where the register
1436    is set and is used to compute "availability".
1437
1438    last_bb records the block for which first_set and last_set are
1439    valid, as a quick test to invalidate them.  */
1440
1441 static void
1442 record_last_reg_set_info (rtx_insn *insn, int regno)
1443 {
1444   struct reg_avail_info *info = &reg_avail_info[regno];
1445   int luid = DF_INSN_LUID (insn);
1446
1447   info->last_set = luid;
1448   if (info->last_bb != current_bb)
1449     {
1450       info->last_bb = current_bb;
1451       info->first_set = luid;
1452     }
1453 }
1454
1455 /* Record memory modification information for INSN.  We do not actually care
1456    about the memory location(s) that are set, or even how they are set (consider
1457    a CALL_INSN).  We merely need to record which insns modify memory.  */
1458
1459 static void
1460 record_last_mem_set_info (rtx_insn *insn)
1461 {
1462   if (! flag_gcse_lm)
1463     return;
1464
1465   record_last_mem_set_info_common (insn, modify_mem_list,
1466                                    canon_modify_mem_list,
1467                                    modify_mem_list_set,
1468                                    blocks_with_calls);
1469 }
1470
1471 /* Called from compute_hash_table via note_stores to handle one
1472    SET or CLOBBER in an insn.  DATA is really the instruction in which
1473    the SET is taking place.  */
1474
1475 static void
1476 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1477 {
1478   rtx_insn *last_set_insn = (rtx_insn *) data;
1479
1480   if (GET_CODE (dest) == SUBREG)
1481     dest = SUBREG_REG (dest);
1482
1483   if (REG_P (dest))
1484     record_last_reg_set_info (last_set_insn, REGNO (dest));
1485   else if (MEM_P (dest)
1486            /* Ignore pushes, they clobber nothing.  */
1487            && ! push_operand (dest, GET_MODE (dest)))
1488     record_last_mem_set_info (last_set_insn);
1489 }
1490
1491 /* Top level function to create an expression hash table.
1492
1493    Expression entries are placed in the hash table if
1494    - they are of the form (set (pseudo-reg) src),
1495    - src is something we want to perform GCSE on,
1496    - none of the operands are subsequently modified in the block
1497
1498    Currently src must be a pseudo-reg or a const_int.
1499
1500    TABLE is the table computed.  */
1501
1502 static void
1503 compute_hash_table_work (struct gcse_hash_table_d *table)
1504 {
1505   int i;
1506
1507   /* re-Cache any INSN_LIST nodes we have allocated.  */
1508   clear_modify_mem_tables ();
1509   /* Some working arrays used to track first and last set in each block.  */
1510   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1511
1512   for (i = 0; i < max_reg_num (); ++i)
1513     reg_avail_info[i].last_bb = NULL;
1514
1515   FOR_EACH_BB_FN (current_bb, cfun)
1516     {
1517       rtx_insn *insn;
1518       unsigned int regno;
1519
1520       /* First pass over the instructions records information used to
1521          determine when registers and memory are first and last set.  */
1522       FOR_BB_INSNS (current_bb, insn)
1523         {
1524           if (!NONDEBUG_INSN_P (insn))
1525             continue;
1526
1527           if (CALL_P (insn))
1528             {
1529               hard_reg_set_iterator hrsi;
1530
1531               /* We don't track modes of hard registers, so we need
1532                  to be conservative and assume that partial kills
1533                  are full kills.  */
1534               HARD_REG_SET callee_clobbers
1535                 = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1536               EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
1537                 record_last_reg_set_info (insn, regno);
1538
1539               if (! RTL_CONST_OR_PURE_CALL_P (insn)
1540                   || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn))
1541                 record_last_mem_set_info (insn);
1542             }
1543
1544           note_stores (insn, record_last_set_info, insn);
1545         }
1546
1547       /* The next pass builds the hash table.  */
1548       FOR_BB_INSNS (current_bb, insn)
1549         if (NONDEBUG_INSN_P (insn))
1550           hash_scan_insn (insn, table);
1551     }
1552
1553   free (reg_avail_info);
1554   reg_avail_info = NULL;
1555 }
1556
1557 /* Allocate space for the set/expr hash TABLE.
1558    It is used to determine the number of buckets to use.  */
1559
1560 static void
1561 alloc_hash_table (struct gcse_hash_table_d *table)
1562 {
1563   int n;
1564
1565   n = get_max_insn_count ();
1566
1567   table->size = n / 4;
1568   if (table->size < 11)
1569     table->size = 11;
1570
1571   /* Attempt to maintain efficient use of hash table.
1572      Making it an odd number is simplest for now.
1573      ??? Later take some measurements.  */
1574   table->size |= 1;
1575   n = table->size * sizeof (struct gcse_expr *);
1576   table->table = GNEWVAR (struct gcse_expr *, n);
1577 }
1578
1579 /* Free things allocated by alloc_hash_table.  */
1580
1581 static void
1582 free_hash_table (struct gcse_hash_table_d *table)
1583 {
1584   free (table->table);
1585 }
1586
1587 /* Compute the expression hash table TABLE.  */
1588
1589 static void
1590 compute_hash_table (struct gcse_hash_table_d *table)
1591 {
1592   /* Initialize count of number of entries in hash table.  */
1593   table->n_elems = 0;
1594   memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1595
1596   compute_hash_table_work (table);
1597 }
1598 \f
1599 /* Expression tracking support.  */
1600
1601 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1602 static void
1603 clear_modify_mem_tables (void)
1604 {
1605   unsigned i;
1606   bitmap_iterator bi;
1607
1608   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1609     {
1610       modify_mem_list[i].release ();
1611       canon_modify_mem_list[i].release ();
1612     }
1613   bitmap_clear (modify_mem_list_set);
1614   bitmap_clear (blocks_with_calls);
1615 }
1616
1617 /* Release memory used by modify_mem_list_set.  */
1618
1619 static void
1620 free_modify_mem_tables (void)
1621 {
1622   clear_modify_mem_tables ();
1623   free (modify_mem_list);
1624   free (canon_modify_mem_list);
1625   modify_mem_list = 0;
1626   canon_modify_mem_list = 0;
1627 }
1628 \f
1629 /* Compute PRE+LCM working variables.  */
1630
1631 /* Local properties of expressions.  */
1632
1633 /* Nonzero for expressions that are transparent in the block.  */
1634 static sbitmap *transp;
1635
1636 /* Nonzero for expressions that are computed (available) in the block.  */
1637 static sbitmap *comp;
1638
1639 /* Nonzero for expressions that are locally anticipatable in the block.  */
1640 static sbitmap *antloc;
1641
1642 /* Nonzero for expressions where this block is an optimal computation
1643    point.  */
1644 static sbitmap *pre_optimal;
1645
1646 /* Nonzero for expressions which are redundant in a particular block.  */
1647 static sbitmap *pre_redundant;
1648
1649 /* Nonzero for expressions which should be inserted on a specific edge.  */
1650 static sbitmap *pre_insert_map;
1651
1652 /* Nonzero for expressions which should be deleted in a specific block.  */
1653 static sbitmap *pre_delete_map;
1654
1655 /* Allocate vars used for PRE analysis.  */
1656
1657 static void
1658 alloc_pre_mem (int n_blocks, int n_exprs)
1659 {
1660   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1661   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1662   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1663
1664   pre_optimal = NULL;
1665   pre_redundant = NULL;
1666   pre_insert_map = NULL;
1667   pre_delete_map = NULL;
1668   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1669
1670   /* pre_insert and pre_delete are allocated later.  */
1671 }
1672
1673 /* Free vars used for PRE analysis.  */
1674
1675 static void
1676 free_pre_mem (void)
1677 {
1678   sbitmap_vector_free (transp);
1679   sbitmap_vector_free (comp);
1680
1681   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1682
1683   if (pre_optimal)
1684     sbitmap_vector_free (pre_optimal);
1685   if (pre_redundant)
1686     sbitmap_vector_free (pre_redundant);
1687   if (pre_insert_map)
1688     sbitmap_vector_free (pre_insert_map);
1689   if (pre_delete_map)
1690     sbitmap_vector_free (pre_delete_map);
1691
1692   transp = comp = NULL;
1693   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1694 }
1695
1696 /* Remove certain expressions from anticipatable and transparent
1697    sets of basic blocks that have incoming abnormal edge.
1698    For PRE remove potentially trapping expressions to avoid placing
1699    them on abnormal edges.  For hoisting remove memory references that
1700    can be clobbered by calls.  */
1701
1702 static void
1703 prune_expressions (bool pre_p)
1704 {
1705   struct gcse_expr *expr;
1706   unsigned int ui;
1707   basic_block bb;
1708
1709   auto_sbitmap prune_exprs (expr_hash_table.n_elems);
1710   bitmap_clear (prune_exprs);
1711   for (ui = 0; ui < expr_hash_table.size; ui++)
1712     {
1713       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1714         {
1715           /* Note potentially trapping expressions.  */
1716           if (may_trap_p (expr->expr))
1717             {
1718               bitmap_set_bit (prune_exprs, expr->bitmap_index);
1719               continue;
1720             }
1721
1722           if (!pre_p && contains_mem_rtx_p (expr->expr))
1723             /* Note memory references that can be clobbered by a call.
1724                We do not split abnormal edges in hoisting, so would
1725                a memory reference get hoisted along an abnormal edge,
1726                it would be placed /before/ the call.  Therefore, only
1727                constant memory references can be hoisted along abnormal
1728                edges.  */
1729             {
1730               rtx x = expr->expr;
1731
1732               /* Common cases where we might find the MEM which may allow us
1733                  to avoid pruning the expression.  */
1734               while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
1735                 x = XEXP (x, 0);
1736
1737               /* If we found the MEM, go ahead and look at it to see if it has
1738                  properties that allow us to avoid pruning its expression out
1739                  of the tables.  */
1740               if (MEM_P (x))
1741                 {
1742                   if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1743                       && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
1744                     continue;
1745
1746                   if (MEM_READONLY_P (x)
1747                       && !MEM_VOLATILE_P (x)
1748                       && MEM_NOTRAP_P (x))
1749                     /* Constant memory reference, e.g., a PIC address.  */
1750                     continue;
1751                 }
1752
1753               /* ??? Optimally, we would use interprocedural alias
1754                  analysis to determine if this mem is actually killed
1755                  by this call.  */
1756
1757               bitmap_set_bit (prune_exprs, expr->bitmap_index);
1758             }
1759         }
1760     }
1761
1762   FOR_EACH_BB_FN (bb, cfun)
1763     {
1764       edge e;
1765       edge_iterator ei;
1766
1767       /* If the current block is the destination of an abnormal edge, we
1768          kill all trapping (for PRE) and memory (for hoist) expressions
1769          because we won't be able to properly place the instruction on
1770          the edge.  So make them neither anticipatable nor transparent.
1771          This is fairly conservative.
1772
1773          ??? For hoisting it may be necessary to check for set-and-jump
1774          instructions here, not just for abnormal edges.  The general problem
1775          is that when an expression cannot not be placed right at the end of
1776          a basic block we should account for any side-effects of a subsequent
1777          jump instructions that could clobber the expression.  It would
1778          be best to implement this check along the lines of
1779          should_hoist_expr_to_dom where the target block is already known
1780          and, hence, there's no need to conservatively prune expressions on
1781          "intermediate" set-and-jump instructions.  */
1782       FOR_EACH_EDGE (e, ei, bb->preds)
1783         if ((e->flags & EDGE_ABNORMAL)
1784             && (pre_p || CALL_P (BB_END (e->src))))
1785           {
1786             bitmap_and_compl (antloc[bb->index],
1787                                 antloc[bb->index], prune_exprs);
1788             bitmap_and_compl (transp[bb->index],
1789                                 transp[bb->index], prune_exprs);
1790             break;
1791           }
1792     }
1793 }
1794
1795 /* It may be necessary to insert a large number of insns on edges to
1796    make the existing occurrences of expressions fully redundant.  This
1797    routine examines the set of insertions and deletions and if the ratio
1798    of insertions to deletions is too high for a particular expression, then
1799    the expression is removed from the insertion/deletion sets.
1800
1801    N_ELEMS is the number of elements in the hash table.  */
1802
1803 static void
1804 prune_insertions_deletions (int n_elems)
1805 {
1806   sbitmap_iterator sbi;
1807
1808   /* We always use I to iterate over blocks/edges and J to iterate over
1809      expressions.  */
1810   unsigned int i, j;
1811
1812   /* Counts for the number of times an expression needs to be inserted and
1813      number of times an expression can be removed as a result.  */
1814   int *insertions = GCNEWVEC (int, n_elems);
1815   int *deletions = GCNEWVEC (int, n_elems);
1816
1817   /* Set of expressions which require too many insertions relative to
1818      the number of deletions achieved.  We will prune these out of the
1819      insertion/deletion sets.  */
1820   auto_sbitmap prune_exprs (n_elems);
1821   bitmap_clear (prune_exprs);
1822
1823   /* Iterate over the edges counting the number of times each expression
1824      needs to be inserted.  */
1825   for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1826     {
1827       EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1828         insertions[j]++;
1829     }
1830
1831   /* Similarly for deletions, but those occur in blocks rather than on
1832      edges.  */
1833   for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1834     {
1835       EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1836         deletions[j]++;
1837     }
1838
1839   /* Now that we have accurate counts, iterate over the elements in the
1840      hash table and see if any need too many insertions relative to the
1841      number of evaluations that can be removed.  If so, mark them in
1842      PRUNE_EXPRS.  */
1843   for (j = 0; j < (unsigned) n_elems; j++)
1844     if (deletions[j]
1845         && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
1846       bitmap_set_bit (prune_exprs, j);
1847
1848   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1849   EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1850     {
1851       for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1852         bitmap_clear_bit (pre_insert_map[i], j);
1853
1854       for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1855         bitmap_clear_bit (pre_delete_map[i], j);
1856     }
1857
1858   free (insertions);
1859   free (deletions);
1860 }
1861
1862 /* Top level routine to do the dataflow analysis needed by PRE.  */
1863
1864 static struct edge_list *
1865 compute_pre_data (void)
1866 {
1867   struct edge_list *edge_list;
1868   basic_block bb;
1869
1870   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1871   prune_expressions (true);
1872   bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
1873
1874   /* Compute ae_kill for each basic block using:
1875
1876      ~(TRANSP | COMP)
1877   */
1878
1879   FOR_EACH_BB_FN (bb, cfun)
1880     {
1881       bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1882       bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1883     }
1884
1885   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1886                             ae_kill, &pre_insert_map, &pre_delete_map);
1887   sbitmap_vector_free (antloc);
1888   antloc = NULL;
1889   sbitmap_vector_free (ae_kill);
1890   ae_kill = NULL;
1891
1892   prune_insertions_deletions (expr_hash_table.n_elems);
1893
1894   return edge_list;
1895 }
1896 \f
1897 /* PRE utilities */
1898
1899 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1900    block BB.
1901
1902    VISITED is a pointer to a working buffer for tracking which BB's have
1903    been visited.  It is NULL for the top-level call.
1904
1905    We treat reaching expressions that go through blocks containing the same
1906    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
1907    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
1908    2 as not reaching.  The intent is to improve the probability of finding
1909    only one reaching expression and to reduce register lifetimes by picking
1910    the closest such expression.  */
1911
1912 static int
1913 pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
1914                               basic_block bb, char *visited)
1915 {
1916   edge pred;
1917   edge_iterator ei;
1918
1919   FOR_EACH_EDGE (pred, ei, bb->preds)
1920     {
1921       basic_block pred_bb = pred->src;
1922
1923       if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
1924           /* Has predecessor has already been visited?  */
1925           || visited[pred_bb->index])
1926         ;/* Nothing to do.  */
1927
1928       /* Does this predecessor generate this expression?  */
1929       else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
1930         {
1931           /* Is this the occurrence we're looking for?
1932              Note that there's only one generating occurrence per block
1933              so we just need to check the block number.  */
1934           if (occr_bb == pred_bb)
1935             return 1;
1936
1937           visited[pred_bb->index] = 1;
1938         }
1939       /* Ignore this predecessor if it kills the expression.  */
1940       else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
1941         visited[pred_bb->index] = 1;
1942
1943       /* Neither gen nor kill.  */
1944       else
1945         {
1946           visited[pred_bb->index] = 1;
1947           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
1948             return 1;
1949         }
1950     }
1951
1952   /* All paths have been checked.  */
1953   return 0;
1954 }
1955
1956 /* The wrapper for pre_expr_reaches_here_work that ensures that any
1957    memory allocated for that function is returned.  */
1958
1959 static int
1960 pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr, basic_block bb)
1961 {
1962   int rval;
1963   char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
1964
1965   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
1966
1967   free (visited);
1968   return rval;
1969 }
1970 \f
1971 /* Generate RTL to copy an EXP to REG and return it.  */
1972
1973 rtx_insn *
1974 prepare_copy_insn (rtx reg, rtx exp)
1975 {
1976   rtx_insn *pat;
1977
1978   start_sequence ();
1979
1980   /* If the expression is something that's an operand, like a constant,
1981      just copy it to a register.  */
1982   if (general_operand (exp, GET_MODE (reg)))
1983     emit_move_insn (reg, exp);
1984
1985   /* Otherwise, make a new insn to compute this expression and make sure the
1986      insn will be recognized (this also adds any needed CLOBBERs).  */
1987   else
1988     {
1989       rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
1990
1991       if (insn_invalid_p (insn, false))
1992         gcc_unreachable ();
1993     }
1994
1995   pat = get_insns ();
1996   end_sequence ();
1997
1998   return pat;
1999 }
2000
2001 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2002
2003 static rtx_insn *
2004 process_insert_insn (struct gcse_expr *expr)
2005 {
2006   rtx reg = expr->reaching_reg;
2007   /* Copy the expression to make sure we don't have any sharing issues.  */
2008   rtx exp = copy_rtx (expr->expr);
2009
2010   return prepare_copy_insn (reg, exp);
2011 }
2012
2013 /* Add EXPR to the end of basic block BB.
2014
2015    This is used by both the PRE and code hoisting.  */
2016
2017 static void
2018 insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2019 {
2020   rtx_insn *insn = BB_END (bb);
2021   rtx_insn *new_insn;
2022   rtx reg = expr->reaching_reg;
2023   int regno = REGNO (reg);
2024   rtx_insn *pat, *pat_end;
2025
2026   pat = process_insert_insn (expr);
2027   gcc_assert (pat && INSN_P (pat));
2028
2029   pat_end = pat;
2030   while (NEXT_INSN (pat_end) != NULL_RTX)
2031     pat_end = NEXT_INSN (pat_end);
2032
2033   /* If the last insn is a jump, insert EXPR in front.  Similarly we need to
2034      take care of trapping instructions in presence of non-call exceptions.  */
2035
2036   if (JUMP_P (insn)
2037       || (NONJUMP_INSN_P (insn)
2038           && (!single_succ_p (bb)
2039               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2040     {
2041       /* FIXME: What if something in jump uses value set in new insn?  */
2042       new_insn = emit_insn_before_noloc (pat, insn, bb);
2043     }
2044
2045   /* Likewise if the last insn is a call, as will happen in the presence
2046      of exception handling.  */
2047   else if (CALL_P (insn)
2048            && (!single_succ_p (bb)
2049                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2050     {
2051       /* Keeping in mind targets with small register classes and parameters
2052          in registers, we search backward and place the instructions before
2053          the first parameter is loaded.  Do this for everyone for consistency
2054          and a presumption that we'll get better code elsewhere as well.  */
2055
2056       /* Since different machines initialize their parameter registers
2057          in different orders, assume nothing.  Collect the set of all
2058          parameter registers.  */
2059       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2060
2061       /* If we found all the parameter loads, then we want to insert
2062          before the first parameter load.
2063
2064          If we did not find all the parameter loads, then we might have
2065          stopped on the head of the block, which could be a CODE_LABEL.
2066          If we inserted before the CODE_LABEL, then we would be putting
2067          the insn in the wrong basic block.  In that case, put the insn
2068          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2069       while (LABEL_P (insn)
2070              || NOTE_INSN_BASIC_BLOCK_P (insn))
2071         insn = NEXT_INSN (insn);
2072
2073       new_insn = emit_insn_before_noloc (pat, insn, bb);
2074     }
2075   else
2076     new_insn = emit_insn_after_noloc (pat, insn, bb);
2077
2078   while (1)
2079     {
2080       if (INSN_P (pat))
2081         add_label_notes (PATTERN (pat), new_insn);
2082       if (pat == pat_end)
2083         break;
2084       pat = NEXT_INSN (pat);
2085     }
2086
2087   gcse_create_count++;
2088
2089   if (dump_file)
2090     {
2091       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2092                bb->index, INSN_UID (new_insn));
2093       fprintf (dump_file, "copying expression %d to reg %d\n",
2094                expr->bitmap_index, regno);
2095     }
2096 }
2097
2098 /* Insert partially redundant expressions on edges in the CFG to make
2099    the expressions fully redundant.  */
2100
2101 static int
2102 pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2103 {
2104   int e, i, j, num_edges, set_size, did_insert = 0;
2105   sbitmap *inserted;
2106
2107   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2108      if it reaches any of the deleted expressions.  */
2109
2110   set_size = pre_insert_map[0]->size;
2111   num_edges = NUM_EDGES (edge_list);
2112   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2113   bitmap_vector_clear (inserted, num_edges);
2114
2115   for (e = 0; e < num_edges; e++)
2116     {
2117       int indx;
2118       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2119
2120       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2121         {
2122           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2123
2124           for (j = indx;
2125                insert && j < (int) expr_hash_table.n_elems;
2126                j++, insert >>= 1)
2127             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2128               {
2129                 struct gcse_expr *expr = index_map[j];
2130                 struct gcse_occr *occr;
2131
2132                 /* Now look at each deleted occurrence of this expression.  */
2133                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2134                   {
2135                     if (! occr->deleted_p)
2136                       continue;
2137
2138                     /* Insert this expression on this edge if it would
2139                        reach the deleted occurrence in BB.  */
2140                     if (!bitmap_bit_p (inserted[e], j))
2141                       {
2142                         rtx_insn *insn;
2143                         edge eg = INDEX_EDGE (edge_list, e);
2144
2145                         /* We can't insert anything on an abnormal and
2146                            critical edge, so we insert the insn at the end of
2147                            the previous block. There are several alternatives
2148                            detailed in Morgans book P277 (sec 10.5) for
2149                            handling this situation.  This one is easiest for
2150                            now.  */
2151
2152                         if (eg->flags & EDGE_ABNORMAL)
2153                           insert_insn_end_basic_block (index_map[j], bb);
2154                         else
2155                           {
2156                             insn = process_insert_insn (index_map[j]);
2157                             insert_insn_on_edge (insn, eg);
2158                           }
2159
2160                         if (dump_file)
2161                           {
2162                             fprintf (dump_file, "PRE: edge (%d,%d), ",
2163                                      bb->index,
2164                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2165                             fprintf (dump_file, "copy expression %d\n",
2166                                      expr->bitmap_index);
2167                           }
2168
2169                         update_ld_motion_stores (expr);
2170                         bitmap_set_bit (inserted[e], j);
2171                         did_insert = 1;
2172                         gcse_create_count++;
2173                       }
2174                   }
2175               }
2176         }
2177     }
2178
2179   sbitmap_vector_free (inserted);
2180   return did_insert;
2181 }
2182
2183 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2184    Given "old_reg <- expr" (INSN), instead of adding after it
2185      reaching_reg <- old_reg
2186    it's better to do the following:
2187      reaching_reg <- expr
2188      old_reg      <- reaching_reg
2189    because this way copy propagation can discover additional PRE
2190    opportunities.  But if this fails, we try the old way.
2191    When "expr" is a store, i.e.
2192    given "MEM <- old_reg", instead of adding after it
2193      reaching_reg <- old_reg
2194    it's better to add it before as follows:
2195      reaching_reg <- old_reg
2196      MEM          <- reaching_reg.  */
2197
2198 static void
2199 pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2200 {
2201   rtx reg = expr->reaching_reg;
2202   int regno = REGNO (reg);
2203   int indx = expr->bitmap_index;
2204   rtx pat = PATTERN (insn);
2205   rtx set, first_set;
2206   rtx_insn *new_insn;
2207   rtx old_reg;
2208   int i;
2209
2210   /* This block matches the logic in hash_scan_insn.  */
2211   switch (GET_CODE (pat))
2212     {
2213     case SET:
2214       set = pat;
2215       break;
2216
2217     case PARALLEL:
2218       /* Search through the parallel looking for the set whose
2219          source was the expression that we're interested in.  */
2220       first_set = NULL_RTX;
2221       set = NULL_RTX;
2222       for (i = 0; i < XVECLEN (pat, 0); i++)
2223         {
2224           rtx x = XVECEXP (pat, 0, i);
2225           if (GET_CODE (x) == SET)
2226             {
2227               /* If the source was a REG_EQUAL or REG_EQUIV note, we
2228                  may not find an equivalent expression, but in this
2229                  case the PARALLEL will have a single set.  */
2230               if (first_set == NULL_RTX)
2231                 first_set = x;
2232               if (expr_equiv_p (SET_SRC (x), expr->expr))
2233                 {
2234                   set = x;
2235                   break;
2236                 }
2237             }
2238         }
2239
2240       gcc_assert (first_set);
2241       if (set == NULL_RTX)
2242         set = first_set;
2243       break;
2244
2245     default:
2246       gcc_unreachable ();
2247     }
2248
2249   if (REG_P (SET_DEST (set)))
2250     {
2251       old_reg = SET_DEST (set);
2252       /* Check if we can modify the set destination in the original insn.  */
2253       if (validate_change (insn, &SET_DEST (set), reg, 0))
2254         {
2255           new_insn = gen_move_insn (old_reg, reg);
2256           new_insn = emit_insn_after (new_insn, insn);
2257         }
2258       else
2259         {
2260           new_insn = gen_move_insn (reg, old_reg);
2261           new_insn = emit_insn_after (new_insn, insn);
2262         }
2263     }
2264   else /* This is possible only in case of a store to memory.  */
2265     {
2266       old_reg = SET_SRC (set);
2267       new_insn = gen_move_insn (reg, old_reg);
2268
2269       /* Check if we can modify the set source in the original insn.  */
2270       if (validate_change (insn, &SET_SRC (set), reg, 0))
2271         new_insn = emit_insn_before (new_insn, insn);
2272       else
2273         new_insn = emit_insn_after (new_insn, insn);
2274     }
2275
2276   gcse_create_count++;
2277
2278   if (dump_file)
2279     fprintf (dump_file,
2280              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2281               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2282               INSN_UID (insn), regno);
2283 }
2284
2285 /* Copy available expressions that reach the redundant expression
2286    to `reaching_reg'.  */
2287
2288 static void
2289 pre_insert_copies (void)
2290 {
2291   unsigned int i, added_copy;
2292   struct gcse_expr *expr;
2293   struct gcse_occr *occr;
2294   struct gcse_occr *avail;
2295
2296   /* For each available expression in the table, copy the result to
2297      `reaching_reg' if the expression reaches a deleted one.
2298
2299      ??? The current algorithm is rather brute force.
2300      Need to do some profiling.  */
2301
2302   for (i = 0; i < expr_hash_table.size; i++)
2303     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2304       {
2305         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2306            we don't want to insert a copy here because the expression may not
2307            really be redundant.  So only insert an insn if the expression was
2308            deleted.  This test also avoids further processing if the
2309            expression wasn't deleted anywhere.  */
2310         if (expr->reaching_reg == NULL)
2311           continue;
2312
2313         /* Set when we add a copy for that expression.  */
2314         added_copy = 0;
2315
2316         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2317           {
2318             if (! occr->deleted_p)
2319               continue;
2320
2321             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2322               {
2323                 rtx_insn *insn = avail->insn;
2324
2325                 /* No need to handle this one if handled already.  */
2326                 if (avail->copied_p)
2327                   continue;
2328
2329                 /* Don't handle this one if it's a redundant one.  */
2330                 if (insn->deleted ())
2331                   continue;
2332
2333                 /* Or if the expression doesn't reach the deleted one.  */
2334                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2335                                                expr,
2336                                                BLOCK_FOR_INSN (occr->insn)))
2337                   continue;
2338
2339                 added_copy = 1;
2340
2341                 /* Copy the result of avail to reaching_reg.  */
2342                 pre_insert_copy_insn (expr, insn);
2343                 avail->copied_p = 1;
2344               }
2345           }
2346
2347           if (added_copy)
2348             update_ld_motion_stores (expr);
2349       }
2350 }
2351
2352 struct set_data
2353 {
2354   rtx_insn *insn;
2355   const_rtx set;
2356   int nsets;
2357 };
2358
2359 /* Increment number of sets and record set in DATA.  */
2360
2361 static void
2362 record_set_data (rtx dest, const_rtx set, void *data)
2363 {
2364   struct set_data *s = (struct set_data *)data;
2365
2366   if (GET_CODE (set) == SET)
2367     {
2368       /* We allow insns having multiple sets, where all but one are
2369          dead as single set insns.  In the common case only a single
2370          set is present, so we want to avoid checking for REG_UNUSED
2371          notes unless necessary.  */
2372       if (s->nsets == 1
2373           && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2374           && !side_effects_p (s->set))
2375         s->nsets = 0;
2376
2377       if (!s->nsets)
2378         {
2379           /* Record this set.  */
2380           s->nsets += 1;
2381           s->set = set;
2382         }
2383       else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2384                || side_effects_p (set))
2385         s->nsets += 1;
2386     }
2387 }
2388
2389 static const_rtx
2390 single_set_gcse (rtx_insn *insn)
2391 {
2392   struct set_data s;
2393   rtx pattern;
2394
2395   gcc_assert (INSN_P (insn));
2396
2397   /* Optimize common case.  */
2398   pattern = PATTERN (insn);
2399   if (GET_CODE (pattern) == SET)
2400     return pattern;
2401
2402   s.insn = insn;
2403   s.nsets = 0;
2404   note_pattern_stores (pattern, record_set_data, &s);
2405
2406   /* Considered invariant insns have exactly one set.  */
2407   gcc_assert (s.nsets == 1);
2408   return s.set;
2409 }
2410
2411 /* Emit move from SRC to DEST noting the equivalence with expression computed
2412    in INSN.  */
2413
2414 static rtx_insn *
2415 gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2416 {
2417   rtx_insn *new_rtx;
2418   const_rtx set = single_set_gcse (insn);
2419   rtx set2;
2420   rtx note;
2421   rtx eqv = NULL_RTX;
2422
2423   /* This should never fail since we're creating a reg->reg copy
2424      we've verified to be valid.  */
2425
2426   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2427
2428   /* Note the equivalence for local CSE pass.  Take the note from the old
2429      set if there was one.  Otherwise record the SET_SRC from the old set
2430      unless DEST is also an operand of the SET_SRC.  */
2431   set2 = single_set (new_rtx);
2432   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2433     return new_rtx;
2434   if ((note = find_reg_equal_equiv_note (insn)))
2435     eqv = XEXP (note, 0);
2436   else if (! REG_P (dest)
2437            || ! reg_mentioned_p (dest, SET_SRC (set)))
2438     eqv = SET_SRC (set);
2439
2440   if (eqv != NULL_RTX)
2441     set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2442
2443   return new_rtx;
2444 }
2445
2446 /* Delete redundant computations.
2447    Deletion is done by changing the insn to copy the `reaching_reg' of
2448    the expression into the result of the SET.  It is left to later passes
2449    to propagate the copy or eliminate it.
2450
2451    Return nonzero if a change is made.  */
2452
2453 static int
2454 pre_delete (void)
2455 {
2456   unsigned int i;
2457   int changed;
2458   struct gcse_expr *expr;
2459   struct gcse_occr *occr;
2460
2461   changed = 0;
2462   for (i = 0; i < expr_hash_table.size; i++)
2463     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2464       {
2465         int indx = expr->bitmap_index;
2466
2467         /* We only need to search antic_occr since we require ANTLOC != 0.  */
2468         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2469           {
2470             rtx_insn *insn = occr->insn;
2471             rtx set;
2472             basic_block bb = BLOCK_FOR_INSN (insn);
2473
2474             /* We only delete insns that have a single_set.  */
2475             if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2476                 && (set = single_set (insn)) != 0
2477                 && dbg_cnt (pre_insn))
2478               {
2479                 /* Create a pseudo-reg to store the result of reaching
2480                    expressions into.  Get the mode for the new pseudo from
2481                    the mode of the original destination pseudo.  */
2482                 if (expr->reaching_reg == NULL)
2483                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2484
2485                 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2486                 delete_insn (insn);
2487                 occr->deleted_p = 1;
2488                 changed = 1;
2489                 gcse_subst_count++;
2490
2491                 if (dump_file)
2492                   {
2493                     fprintf (dump_file,
2494                              "PRE: redundant insn %d (expression %d) in ",
2495                                INSN_UID (insn), indx);
2496                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
2497                              bb->index, REGNO (expr->reaching_reg));
2498                   }
2499               }
2500           }
2501       }
2502
2503   return changed;
2504 }
2505
2506 /* Perform GCSE optimizations using PRE.
2507    This is called by one_pre_gcse_pass after all the dataflow analysis
2508    has been done.
2509
2510    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2511    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2512    Compiler Design and Implementation.
2513
2514    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2515    thing about the classical approach is that it would try to use an existing
2516    reg.  If the register can't be adequately optimized [i.e. we introduce
2517    reload problems], one could add a pass here to propagate the new register
2518    through the block.
2519
2520    ??? We don't handle single sets in PARALLELs because we're [currently] not
2521    able to copy the rest of the parallel when we insert copies to create full
2522    redundancies from partial redundancies.  However, there's no reason why we
2523    can't handle PARALLELs in the cases where there are no partial
2524    redundancies.  */
2525
2526 static int
2527 pre_gcse (struct edge_list *edge_list)
2528 {
2529   unsigned int i;
2530   int did_insert, changed;
2531   struct gcse_expr **index_map;
2532   struct gcse_expr *expr;
2533
2534   /* Compute a mapping from expression number (`bitmap_index') to
2535      hash table entry.  */
2536
2537   index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2538   for (i = 0; i < expr_hash_table.size; i++)
2539     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2540       index_map[expr->bitmap_index] = expr;
2541
2542   /* Delete the redundant insns first so that
2543      - we know what register to use for the new insns and for the other
2544        ones with reaching expressions
2545      - we know which insns are redundant when we go to create copies  */
2546
2547   changed = pre_delete ();
2548   did_insert = pre_edge_insert (edge_list, index_map);
2549
2550   /* In other places with reaching expressions, copy the expression to the
2551      specially allocated pseudo-reg that reaches the redundant expr.  */
2552   pre_insert_copies ();
2553   if (did_insert)
2554     {
2555       commit_edge_insertions ();
2556       changed = 1;
2557     }
2558
2559   free (index_map);
2560   return changed;
2561 }
2562
2563 /* Top level routine to perform one PRE GCSE pass.
2564
2565    Return nonzero if a change was made.  */
2566
2567 static int
2568 one_pre_gcse_pass (void)
2569 {
2570   int changed = 0;
2571
2572   gcse_subst_count = 0;
2573   gcse_create_count = 0;
2574
2575   /* Return if there's nothing to do, or it is too expensive.  */
2576   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2577       || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
2578     return 0;
2579
2580   /* We need alias.  */
2581   init_alias_analysis ();
2582
2583   bytes_used = 0;
2584   gcc_obstack_init (&gcse_obstack);
2585   alloc_gcse_mem ();
2586
2587   alloc_hash_table (&expr_hash_table);
2588   add_noreturn_fake_exit_edges ();
2589   if (flag_gcse_lm)
2590     compute_ld_motion_mems ();
2591
2592   compute_hash_table (&expr_hash_table);
2593   if (flag_gcse_lm)
2594     trim_ld_motion_mems ();
2595   if (dump_file)
2596     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2597
2598   if (expr_hash_table.n_elems > 0)
2599     {
2600       struct edge_list *edge_list;
2601       alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2602       edge_list = compute_pre_data ();
2603       changed |= pre_gcse (edge_list);
2604       free_edge_list (edge_list);
2605       free_pre_mem ();
2606     }
2607
2608   if (flag_gcse_lm)
2609     free_ld_motion_mems ();
2610   remove_fake_exit_edges ();
2611   free_hash_table (&expr_hash_table);
2612
2613   free_gcse_mem ();
2614   obstack_free (&gcse_obstack, NULL);
2615
2616   /* We are finished with alias.  */
2617   end_alias_analysis ();
2618
2619   if (dump_file)
2620     {
2621       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2622                current_function_name (), n_basic_blocks_for_fn (cfun),
2623                bytes_used);
2624       fprintf (dump_file, "%d substs, %d insns created\n",
2625                gcse_subst_count, gcse_create_count);
2626     }
2627
2628   return changed;
2629 }
2630 \f
2631 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2632    to INSN.  If such notes are added to an insn which references a
2633    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2634    that note, because the following loop optimization pass requires
2635    them.  */
2636
2637 /* ??? If there was a jump optimization pass after gcse and before loop,
2638    then we would not need to do this here, because jump would add the
2639    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2640
2641 static void
2642 add_label_notes (rtx x, rtx_insn *insn)
2643 {
2644   enum rtx_code code = GET_CODE (x);
2645   int i, j;
2646   const char *fmt;
2647
2648   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2649     {
2650       /* This code used to ignore labels that referred to dispatch tables to
2651          avoid flow generating (slightly) worse code.
2652
2653          We no longer ignore such label references (see LABEL_REF handling in
2654          mark_jump_label for additional information).  */
2655
2656       /* There's no reason for current users to emit jump-insns with
2657          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2658          notes.  */
2659       gcc_assert (!JUMP_P (insn));
2660       add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
2661
2662       if (LABEL_P (label_ref_label (x)))
2663         LABEL_NUSES (label_ref_label (x))++;
2664
2665       return;
2666     }
2667
2668   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2669     {
2670       if (fmt[i] == 'e')
2671         add_label_notes (XEXP (x, i), insn);
2672       else if (fmt[i] == 'E')
2673         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2674           add_label_notes (XVECEXP (x, i, j), insn);
2675     }
2676 }
2677
2678 /* Code Hoisting variables and subroutines.  */
2679
2680 /* Very busy expressions.  */
2681 static sbitmap *hoist_vbein;
2682 static sbitmap *hoist_vbeout;
2683
2684 /* ??? We could compute post dominators and run this algorithm in
2685    reverse to perform tail merging, doing so would probably be
2686    more effective than the tail merging code in jump.c.
2687
2688    It's unclear if tail merging could be run in parallel with
2689    code hoisting.  It would be nice.  */
2690
2691 /* Allocate vars used for code hoisting analysis.  */
2692
2693 static void
2694 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2695 {
2696   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2697   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2698   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2699
2700   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2701   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2702 }
2703
2704 /* Free vars used for code hoisting analysis.  */
2705
2706 static void
2707 free_code_hoist_mem (void)
2708 {
2709   sbitmap_vector_free (antloc);
2710   sbitmap_vector_free (transp);
2711   sbitmap_vector_free (comp);
2712
2713   sbitmap_vector_free (hoist_vbein);
2714   sbitmap_vector_free (hoist_vbeout);
2715
2716   free_dominance_info (CDI_DOMINATORS);
2717 }
2718
2719 /* Compute the very busy expressions at entry/exit from each block.
2720
2721    An expression is very busy if all paths from a given point
2722    compute the expression.  */
2723
2724 static void
2725 compute_code_hoist_vbeinout (void)
2726 {
2727   int changed, passes;
2728   basic_block bb;
2729
2730   bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2731   bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2732
2733   passes = 0;
2734   changed = 1;
2735
2736   while (changed)
2737     {
2738       changed = 0;
2739
2740       /* We scan the blocks in the reverse order to speed up
2741          the convergence.  */
2742       FOR_EACH_BB_REVERSE_FN (bb, cfun)
2743         {
2744           if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2745             {
2746               bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2747                                             hoist_vbein, bb);
2748
2749               /* Include expressions in VBEout that are calculated
2750                  in BB and available at its end.  */
2751               bitmap_ior (hoist_vbeout[bb->index],
2752                               hoist_vbeout[bb->index], comp[bb->index]);
2753             }
2754
2755           changed |= bitmap_or_and (hoist_vbein[bb->index],
2756                                               antloc[bb->index],
2757                                               hoist_vbeout[bb->index],
2758                                               transp[bb->index]);
2759         }
2760
2761       passes++;
2762     }
2763
2764   if (dump_file)
2765     {
2766       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2767
2768       FOR_EACH_BB_FN (bb, cfun)
2769         {
2770           fprintf (dump_file, "vbein (%d): ", bb->index);
2771           dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2772           fprintf (dump_file, "vbeout(%d): ", bb->index);
2773           dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2774         }
2775     }
2776 }
2777
2778 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2779
2780 static void
2781 compute_code_hoist_data (void)
2782 {
2783   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2784   prune_expressions (false);
2785   compute_code_hoist_vbeinout ();
2786   calculate_dominance_info (CDI_DOMINATORS);
2787   if (dump_file)
2788     fprintf (dump_file, "\n");
2789 }
2790
2791 /* Update register pressure for BB when hoisting an expression from
2792    instruction FROM, if live ranges of inputs are shrunk.  Also
2793    maintain live_in information if live range of register referred
2794    in FROM is shrunk.
2795
2796    Return 0 if register pressure doesn't change, otherwise return
2797    the number by which register pressure is decreased.
2798
2799    NOTE: Register pressure won't be increased in this function.  */
2800
2801 static int
2802 update_bb_reg_pressure (basic_block bb, rtx_insn *from)
2803 {
2804   rtx dreg;
2805   rtx_insn *insn;
2806   basic_block succ_bb;
2807   df_ref use, op_ref;
2808   edge succ;
2809   edge_iterator ei;
2810   int decreased_pressure = 0;
2811   int nregs;
2812   enum reg_class pressure_class;
2813
2814   FOR_EACH_INSN_USE (use, from)
2815     {
2816       dreg = DF_REF_REAL_REG (use);
2817       /* The live range of register is shrunk only if it isn't:
2818          1. referred on any path from the end of this block to EXIT, or
2819          2. referred by insns other than FROM in this block.  */
2820       FOR_EACH_EDGE (succ, ei, bb->succs)
2821         {
2822           succ_bb = succ->dest;
2823           if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2824             continue;
2825
2826           if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
2827             break;
2828         }
2829       if (succ != NULL)
2830         continue;
2831
2832       op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
2833       for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
2834         {
2835           if (!DF_REF_INSN_INFO (op_ref))
2836             continue;
2837
2838           insn = DF_REF_INSN (op_ref);
2839           if (BLOCK_FOR_INSN (insn) == bb
2840               && NONDEBUG_INSN_P (insn) && insn != from)
2841             break;
2842         }
2843
2844       pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
2845       /* Decrease register pressure and update live_in information for
2846          this block.  */
2847       if (!op_ref && pressure_class != NO_REGS)
2848         {
2849           decreased_pressure += nregs;
2850           BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
2851           bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
2852         }
2853     }
2854   return decreased_pressure;
2855 }
2856
2857 /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
2858    flow graph, if it can reach BB unimpared.  Stop the search if the
2859    expression would need to be moved more than DISTANCE instructions.
2860
2861    DISTANCE is the number of instructions through which EXPR can be
2862    hoisted up in flow graph.
2863
2864    BB_SIZE points to an array which contains the number of instructions
2865    for each basic block.
2866
2867    PRESSURE_CLASS and NREGS are register class and number of hard registers
2868    for storing EXPR.
2869
2870    HOISTED_BBS points to a bitmap indicating basic blocks through which
2871    EXPR is hoisted.
2872
2873    FROM is the instruction from which EXPR is hoisted.
2874
2875    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2876    to me that the expression must either be computed or transparent in
2877    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2878    would allow the expression to be hoisted out of loops, even if
2879    the expression wasn't a loop invariant.
2880
2881    Contrast this to reachability for PRE where an expression is
2882    considered reachable if *any* path reaches instead of *all*
2883    paths.  */
2884
2885 static int
2886 should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
2887                           basic_block bb, sbitmap visited,
2888                           HOST_WIDE_INT distance,
2889                           int *bb_size, enum reg_class pressure_class,
2890                           int *nregs, bitmap hoisted_bbs, rtx_insn *from)
2891 {
2892   unsigned int i;
2893   edge pred;
2894   edge_iterator ei;
2895   sbitmap_iterator sbi;
2896   int visited_allocated_locally = 0;
2897   int decreased_pressure = 0;
2898
2899   if (flag_ira_hoist_pressure)
2900     {
2901       /* Record old information of basic block BB when it is visited
2902          at the first time.  */
2903       if (!bitmap_bit_p (hoisted_bbs, bb->index))
2904         {
2905           struct bb_data *data = BB_DATA (bb);
2906           bitmap_copy (data->backup, data->live_in);
2907           data->old_pressure = data->max_reg_pressure[pressure_class];
2908         }
2909       decreased_pressure = update_bb_reg_pressure (bb, from);
2910     }
2911   /* Terminate the search if distance, for which EXPR is allowed to move,
2912      is exhausted.  */
2913   if (distance > 0)
2914     {
2915       if (flag_ira_hoist_pressure)
2916         {
2917           /* Prefer to hoist EXPR if register pressure is decreased.  */
2918           if (decreased_pressure > *nregs)
2919             distance += bb_size[bb->index];
2920           /* Let EXPR be hoisted through basic block at no cost if one
2921              of following conditions is satisfied:
2922
2923              1. The basic block has low register pressure.
2924              2. Register pressure won't be increases after hoisting EXPR.
2925
2926              Constant expressions is handled conservatively, because
2927              hoisting constant expression aggressively results in worse
2928              code.  This decision is made by the observation of CSiBE
2929              on ARM target, while it has no obvious effect on other
2930              targets like x86, x86_64, mips and powerpc.  */
2931           else if (CONST_INT_P (expr->expr)
2932                    || (BB_DATA (bb)->max_reg_pressure[pressure_class]
2933                          >= ira_class_hard_regs_num[pressure_class]
2934                        && decreased_pressure < *nregs))
2935             distance -= bb_size[bb->index];
2936         }
2937       else
2938         distance -= bb_size[bb->index];
2939
2940       if (distance <= 0)
2941         return 0;
2942     }
2943   else
2944     gcc_assert (distance == 0);
2945
2946   if (visited == NULL)
2947     {
2948       visited_allocated_locally = 1;
2949       visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
2950       bitmap_clear (visited);
2951     }
2952
2953   FOR_EACH_EDGE (pred, ei, bb->preds)
2954     {
2955       basic_block pred_bb = pred->src;
2956
2957       if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
2958         break;
2959       else if (pred_bb == expr_bb)
2960         continue;
2961       else if (bitmap_bit_p (visited, pred_bb->index))
2962         continue;
2963       else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2964         break;
2965       /* Not killed.  */
2966       else
2967         {
2968           bitmap_set_bit (visited, pred_bb->index);
2969           if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
2970                                           visited, distance, bb_size,
2971                                           pressure_class, nregs,
2972                                           hoisted_bbs, from))
2973             break;
2974         }
2975     }
2976   if (visited_allocated_locally)
2977     {
2978       /* If EXPR can be hoisted to expr_bb, record basic blocks through
2979          which EXPR is hoisted in hoisted_bbs.  */
2980       if (flag_ira_hoist_pressure && !pred)
2981         {
2982           /* Record the basic block from which EXPR is hoisted.  */
2983           bitmap_set_bit (visited, bb->index);
2984           EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
2985             bitmap_set_bit (hoisted_bbs, i);
2986         }
2987       sbitmap_free (visited);
2988     }
2989
2990   return (pred == NULL);
2991 }
2992 \f
2993 /* Find occurrence in BB.  */
2994
2995 static struct gcse_occr *
2996 find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
2997 {
2998   /* Find the right occurrence of this expression.  */
2999   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3000     occr = occr->next;
3001
3002   return occr;
3003 }
3004
3005 /* Actually perform code hoisting.
3006
3007    The code hoisting pass can hoist multiple computations of the same
3008    expression along dominated path to a dominating basic block, like
3009    from b2/b3 to b1 as depicted below:
3010
3011           b1      ------
3012           /\         |
3013          /  \        |
3014         bx   by   distance
3015        /      \      |
3016       /        \     |
3017      b2        b3 ------
3018
3019    Unfortunately code hoisting generally extends the live range of an
3020    output pseudo register, which increases register pressure and hurts
3021    register allocation.  To address this issue, an attribute MAX_DISTANCE
3022    is computed and attached to each expression.  The attribute is computed
3023    from rtx cost of the corresponding expression and it's used to control
3024    how long the expression can be hoisted up in flow graph.  As the
3025    expression is hoisted up in flow graph, GCC decreases its DISTANCE
3026    and stops the hoist if DISTANCE reaches 0.  Code hoisting can decrease
3027    register pressure if live ranges of inputs are shrunk.
3028
3029    Option "-fira-hoist-pressure" implements register pressure directed
3030    hoist based on upper method.  The rationale is:
3031      1. Calculate register pressure for each basic block by reusing IRA
3032         facility.
3033      2. When expression is hoisted through one basic block, GCC checks
3034         the change of live ranges for inputs/output.  The basic block's
3035         register pressure will be increased because of extended live
3036         range of output.  However, register pressure will be decreased
3037         if the live ranges of inputs are shrunk.
3038      3. After knowing how hoisting affects register pressure, GCC prefers
3039         to hoist the expression if it can decrease register pressure, by
3040         increasing DISTANCE of the corresponding expression.
3041      4. If hoisting the expression increases register pressure, GCC checks
3042         register pressure of the basic block and decrease DISTANCE only if
3043         the register pressure is high.  In other words, expression will be
3044         hoisted through at no cost if the basic block has low register
3045         pressure.
3046      5. Update register pressure information for basic blocks through
3047         which expression is hoisted.  */
3048
3049 static int
3050 hoist_code (void)
3051 {
3052   basic_block bb, dominated;
3053   unsigned int dom_tree_walk_index;
3054   unsigned int i, j, k;
3055   struct gcse_expr **index_map;
3056   struct gcse_expr *expr;
3057   int *to_bb_head;
3058   int *bb_size;
3059   int changed = 0;
3060   struct bb_data *data;
3061   /* Basic blocks that have occurrences reachable from BB.  */
3062   bitmap from_bbs;
3063   /* Basic blocks through which expr is hoisted.  */
3064   bitmap hoisted_bbs = NULL;
3065   bitmap_iterator bi;
3066
3067   /* Compute a mapping from expression number (`bitmap_index') to
3068      hash table entry.  */
3069
3070   index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3071   for (i = 0; i < expr_hash_table.size; i++)
3072     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3073       index_map[expr->bitmap_index] = expr;
3074
3075   /* Calculate sizes of basic blocks and note how far
3076      each instruction is from the start of its block.  We then use this
3077      data to restrict distance an expression can travel.  */
3078
3079   to_bb_head = XCNEWVEC (int, get_max_uid ());
3080   bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3081
3082   FOR_EACH_BB_FN (bb, cfun)
3083     {
3084       rtx_insn *insn;
3085       int to_head;
3086
3087       to_head = 0;
3088       FOR_BB_INSNS (bb, insn)
3089         {
3090           /* Don't count debug instructions to avoid them affecting
3091              decision choices.  */
3092           if (NONDEBUG_INSN_P (insn))
3093             to_bb_head[INSN_UID (insn)] = to_head++;
3094         }
3095
3096       bb_size[bb->index] = to_head;
3097     }
3098
3099   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3100               && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3101                   == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3102
3103   from_bbs = BITMAP_ALLOC (NULL);
3104   if (flag_ira_hoist_pressure)
3105     hoisted_bbs = BITMAP_ALLOC (NULL);
3106
3107   auto_vec<basic_block> dom_tree_walk
3108   = get_all_dominated_blocks (CDI_DOMINATORS,
3109                               ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3110
3111   /* Walk over each basic block looking for potentially hoistable
3112      expressions, nothing gets hoisted from the entry block.  */
3113   FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3114     {
3115       auto_vec<basic_block> domby
3116         = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth);
3117
3118       if (domby.length () == 0)
3119         continue;
3120
3121       /* Examine each expression that is very busy at the exit of this
3122          block.  These are the potentially hoistable expressions.  */
3123       for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3124         {
3125           if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3126             {
3127               int nregs = 0;
3128               enum reg_class pressure_class = NO_REGS;
3129               /* Current expression.  */
3130               struct gcse_expr *expr = index_map[i];
3131               /* Number of occurrences of EXPR that can be hoisted to BB.  */
3132               int hoistable = 0;
3133               /* Occurrences reachable from BB.  */
3134               vec<occr_t> occrs_to_hoist = vNULL;
3135               /* We want to insert the expression into BB only once, so
3136                  note when we've inserted it.  */
3137               int insn_inserted_p;
3138               occr_t occr;
3139
3140               /* If an expression is computed in BB and is available at end of
3141                  BB, hoist all occurrences dominated by BB to BB.  */
3142               if (bitmap_bit_p (comp[bb->index], i))
3143                 {
3144                   occr = find_occr_in_bb (expr->antic_occr, bb);
3145
3146                   if (occr)
3147                     {
3148                       /* An occurrence might've been already deleted
3149                          while processing a dominator of BB.  */
3150                       if (!occr->deleted_p)
3151                         {
3152                           gcc_assert (NONDEBUG_INSN_P (occr->insn));
3153                           hoistable++;
3154                         }
3155                     }
3156                   else
3157                     hoistable++;
3158                 }
3159
3160               /* We've found a potentially hoistable expression, now
3161                  we look at every block BB dominates to see if it
3162                  computes the expression.  */
3163               FOR_EACH_VEC_ELT (domby, j, dominated)
3164                 {
3165                   HOST_WIDE_INT max_distance;
3166
3167                   /* Ignore self dominance.  */
3168                   if (bb == dominated)
3169                     continue;
3170                   /* We've found a dominated block, now see if it computes
3171                      the busy expression and whether or not moving that
3172                      expression to the "beginning" of that block is safe.  */
3173                   if (!bitmap_bit_p (antloc[dominated->index], i))
3174                     continue;
3175
3176                   occr = find_occr_in_bb (expr->antic_occr, dominated);
3177                   gcc_assert (occr);
3178
3179                   /* An occurrence might've been already deleted
3180                      while processing a dominator of BB.  */
3181                   if (occr->deleted_p)
3182                     continue;
3183                   gcc_assert (NONDEBUG_INSN_P (occr->insn));
3184
3185                   max_distance = expr->max_distance;
3186                   if (max_distance > 0)
3187                     /* Adjust MAX_DISTANCE to account for the fact that
3188                        OCCR won't have to travel all of DOMINATED, but
3189                        only part of it.  */
3190                     max_distance += (bb_size[dominated->index]
3191                                      - to_bb_head[INSN_UID (occr->insn)]);
3192
3193                   pressure_class = get_pressure_class_and_nregs (occr->insn,
3194                                                                  &nregs);
3195
3196                   /* Note if the expression should be hoisted from the dominated
3197                      block to BB if it can reach DOMINATED unimpared.
3198
3199                      Keep track of how many times this expression is hoistable
3200                      from a dominated block into BB.  */
3201                   if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3202                                                 max_distance, bb_size,
3203                                                 pressure_class, &nregs,
3204                                                 hoisted_bbs, occr->insn))
3205                     {
3206                       hoistable++;
3207                       occrs_to_hoist.safe_push (occr);
3208                       bitmap_set_bit (from_bbs, dominated->index);
3209                     }
3210                 }
3211
3212               /* If we found more than one hoistable occurrence of this
3213                  expression, then note it in the vector of expressions to
3214                  hoist.  It makes no sense to hoist things which are computed
3215                  in only one BB, and doing so tends to pessimize register
3216                  allocation.  One could increase this value to try harder
3217                  to avoid any possible code expansion due to register
3218                  allocation issues; however experiments have shown that
3219                  the vast majority of hoistable expressions are only movable
3220                  from two successors, so raising this threshold is likely
3221                  to nullify any benefit we get from code hoisting.  */
3222               if (hoistable > 1 && dbg_cnt (hoist_insn))
3223                 {
3224                   /* If (hoistable != vec::length), then there is
3225                      an occurrence of EXPR in BB itself.  Don't waste
3226                      time looking for LCA in this case.  */
3227                   if ((unsigned) hoistable == occrs_to_hoist.length ())
3228                     {
3229                       basic_block lca;
3230
3231                       lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3232                                                               from_bbs);
3233                       if (lca != bb)
3234                         /* Punt, it's better to hoist these occurrences to
3235                            LCA.  */
3236                         occrs_to_hoist.release ();
3237                     }
3238                 }
3239               else
3240                 /* Punt, no point hoisting a single occurrence.  */
3241                 occrs_to_hoist.release ();
3242
3243               if (flag_ira_hoist_pressure
3244                   && !occrs_to_hoist.is_empty ())
3245                 {
3246                   /* Increase register pressure of basic blocks to which
3247                      expr is hoisted because of extended live range of
3248                      output.  */
3249                   data = BB_DATA (bb);
3250                   data->max_reg_pressure[pressure_class] += nregs;
3251                   EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3252                     {
3253                       data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3254                       data->max_reg_pressure[pressure_class] += nregs;
3255                     }
3256                 }
3257               else if (flag_ira_hoist_pressure)
3258                 {
3259                   /* Restore register pressure and live_in info for basic
3260                      blocks recorded in hoisted_bbs when expr will not be
3261                      hoisted.  */
3262                   EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3263                     {
3264                       data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3265                       bitmap_copy (data->live_in, data->backup);
3266                       data->max_reg_pressure[pressure_class]
3267                           = data->old_pressure;
3268                     }
3269                 }
3270
3271               if (flag_ira_hoist_pressure)
3272                 bitmap_clear (hoisted_bbs);
3273
3274               insn_inserted_p = 0;
3275
3276               /* Walk through occurrences of I'th expressions we want
3277                  to hoist to BB and make the transformations.  */
3278               FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3279                 {
3280                   rtx_insn *insn;
3281                   const_rtx set;
3282
3283                   gcc_assert (!occr->deleted_p);
3284
3285                   insn = occr->insn;
3286                   set = single_set_gcse (insn);
3287
3288                   /* Create a pseudo-reg to store the result of reaching
3289                      expressions into.  Get the mode for the new pseudo
3290                      from the mode of the original destination pseudo.
3291
3292                      It is important to use new pseudos whenever we
3293                      emit a set.  This will allow reload to use
3294                      rematerialization for such registers.  */
3295                   if (!insn_inserted_p)
3296                     expr->reaching_reg
3297                       = gen_reg_rtx_and_attrs (SET_DEST (set));
3298
3299                   gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3300                                         insn);
3301                   delete_insn (insn);
3302                   occr->deleted_p = 1;
3303                   changed = 1;
3304                   gcse_subst_count++;
3305
3306                   if (!insn_inserted_p)
3307                     {
3308                       insert_insn_end_basic_block (expr, bb);
3309                       insn_inserted_p = 1;
3310                     }
3311                 }
3312
3313               occrs_to_hoist.release ();
3314               bitmap_clear (from_bbs);
3315             }
3316         }
3317     }
3318
3319   BITMAP_FREE (from_bbs);
3320   if (flag_ira_hoist_pressure)
3321     BITMAP_FREE (hoisted_bbs);
3322
3323   free (bb_size);
3324   free (to_bb_head);
3325   free (index_map);
3326
3327   return changed;
3328 }
3329
3330 /* Return pressure class and number of needed hard registers (through
3331    *NREGS) of register REGNO.  */
3332 static enum reg_class
3333 get_regno_pressure_class (int regno, int *nregs)
3334 {
3335   if (regno >= FIRST_PSEUDO_REGISTER)
3336     {
3337       enum reg_class pressure_class;
3338
3339       pressure_class = reg_allocno_class (regno);
3340       pressure_class = ira_pressure_class_translate[pressure_class];
3341       *nregs
3342         = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3343       return pressure_class;
3344     }
3345   else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3346            && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3347     {
3348       *nregs = 1;
3349       return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3350     }
3351   else
3352     {
3353       *nregs = 0;
3354       return NO_REGS;
3355     }
3356 }
3357
3358 /* Return pressure class and number of hard registers (through *NREGS)
3359    for destination of INSN. */
3360 static enum reg_class
3361 get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3362 {
3363   rtx reg;
3364   enum reg_class pressure_class;
3365   const_rtx set = single_set_gcse (insn);
3366
3367   reg = SET_DEST (set);
3368   if (GET_CODE (reg) == SUBREG)
3369     reg = SUBREG_REG (reg);
3370   if (MEM_P (reg))
3371     {
3372       *nregs = 0;
3373       pressure_class = NO_REGS;
3374     }
3375   else
3376     {
3377       gcc_assert (REG_P (reg));
3378       pressure_class = reg_allocno_class (REGNO (reg));
3379       pressure_class = ira_pressure_class_translate[pressure_class];
3380       *nregs
3381         = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3382     }
3383   return pressure_class;
3384 }
3385
3386 /* Increase (if INCR_P) or decrease current register pressure for
3387    register REGNO.  */
3388 static void
3389 change_pressure (int regno, bool incr_p)
3390 {
3391   int nregs;
3392   enum reg_class pressure_class;
3393
3394   pressure_class = get_regno_pressure_class (regno, &nregs);
3395   if (! incr_p)
3396     curr_reg_pressure[pressure_class] -= nregs;
3397   else
3398     {
3399       curr_reg_pressure[pressure_class] += nregs;
3400       if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3401           < curr_reg_pressure[pressure_class])
3402         BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3403           = curr_reg_pressure[pressure_class];
3404     }
3405 }
3406
3407 /* Calculate register pressure for each basic block by walking insns
3408    from last to first.  */
3409 static void
3410 calculate_bb_reg_pressure (void)
3411 {
3412   int i;
3413   unsigned int j;
3414   rtx_insn *insn;
3415   basic_block bb;
3416   bitmap curr_regs_live;
3417   bitmap_iterator bi;
3418
3419
3420   ira_setup_eliminable_regset ();
3421   curr_regs_live = BITMAP_ALLOC (&reg_obstack);
3422   FOR_EACH_BB_FN (bb, cfun)
3423     {
3424       curr_bb = bb;
3425       BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3426       BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3427       bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3428       bitmap_copy (curr_regs_live, df_get_live_out (bb));
3429       for (i = 0; i < ira_pressure_classes_num; i++)
3430         curr_reg_pressure[ira_pressure_classes[i]] = 0;
3431       EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3432         change_pressure (j, true);
3433
3434       FOR_BB_INSNS_REVERSE (bb, insn)
3435         {
3436           rtx dreg;
3437           int regno;
3438           df_ref def, use;
3439
3440           if (! NONDEBUG_INSN_P (insn))
3441             continue;
3442
3443           FOR_EACH_INSN_DEF (def, insn)
3444             {
3445               dreg = DF_REF_REAL_REG (def);
3446               gcc_assert (REG_P (dreg));
3447               regno = REGNO (dreg);
3448               if (!(DF_REF_FLAGS (def)
3449                     & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3450                 {
3451                   if (bitmap_clear_bit (curr_regs_live, regno))
3452                     change_pressure (regno, false);
3453                 }
3454             }
3455
3456           FOR_EACH_INSN_USE (use, insn)
3457             {
3458               dreg = DF_REF_REAL_REG (use);
3459               gcc_assert (REG_P (dreg));
3460               regno = REGNO (dreg);
3461               if (bitmap_set_bit (curr_regs_live, regno))
3462                 change_pressure (regno, true);
3463             }
3464         }
3465     }
3466   BITMAP_FREE (curr_regs_live);
3467
3468   if (dump_file == NULL)
3469     return;
3470
3471   fprintf (dump_file, "\nRegister Pressure: \n");
3472   FOR_EACH_BB_FN (bb, cfun)
3473     {
3474       fprintf (dump_file, "  Basic block %d: \n", bb->index);
3475       for (i = 0; (int) i < ira_pressure_classes_num; i++)
3476         {
3477           enum reg_class pressure_class;
3478
3479           pressure_class = ira_pressure_classes[i];
3480           if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3481             continue;
3482
3483           fprintf (dump_file, "    %s=%d\n", reg_class_names[pressure_class],
3484                    BB_DATA (bb)->max_reg_pressure[pressure_class]);
3485         }
3486     }
3487   fprintf (dump_file, "\n");
3488 }
3489
3490 /* Top level routine to perform one code hoisting (aka unification) pass
3491
3492    Return nonzero if a change was made.  */
3493
3494 static int
3495 one_code_hoisting_pass (void)
3496 {
3497   int changed = 0;
3498
3499   gcse_subst_count = 0;
3500   gcse_create_count = 0;
3501
3502   /* Return if there's nothing to do, or it is too expensive.  */
3503   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3504       || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
3505     return 0;
3506
3507   doing_code_hoisting_p = true;
3508
3509   /* Calculate register pressure for each basic block.  */
3510   if (flag_ira_hoist_pressure)
3511     {
3512       regstat_init_n_sets_and_refs ();
3513       ira_set_pseudo_classes (false, dump_file);
3514       alloc_aux_for_blocks (sizeof (struct bb_data));
3515       calculate_bb_reg_pressure ();
3516       regstat_free_n_sets_and_refs ();
3517     }
3518
3519   /* We need alias.  */
3520   init_alias_analysis ();
3521
3522   bytes_used = 0;
3523   gcc_obstack_init (&gcse_obstack);
3524   alloc_gcse_mem ();
3525
3526   alloc_hash_table (&expr_hash_table);
3527   compute_hash_table (&expr_hash_table);
3528   if (dump_file)
3529     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3530
3531   if (expr_hash_table.n_elems > 0)
3532     {
3533       alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3534                             expr_hash_table.n_elems);
3535       compute_code_hoist_data ();
3536       changed = hoist_code ();
3537       free_code_hoist_mem ();
3538     }
3539
3540   if (flag_ira_hoist_pressure)
3541     {
3542       free_aux_for_blocks ();
3543       free_reg_info ();
3544     }
3545   free_hash_table (&expr_hash_table);
3546   free_gcse_mem ();
3547   obstack_free (&gcse_obstack, NULL);
3548
3549   /* We are finished with alias.  */
3550   end_alias_analysis ();
3551
3552   if (dump_file)
3553     {
3554       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3555                current_function_name (), n_basic_blocks_for_fn (cfun),
3556                bytes_used);
3557       fprintf (dump_file, "%d substs, %d insns created\n",
3558                gcse_subst_count, gcse_create_count);
3559     }
3560
3561   doing_code_hoisting_p = false;
3562
3563   return changed;
3564 }
3565 \f
3566 /*  Here we provide the things required to do store motion towards the exit.
3567     In order for this to be effective, gcse also needed to be taught how to
3568     move a load when it is killed only by a store to itself.
3569
3570             int i;
3571             float a[10];
3572
3573             void foo(float scale)
3574             {
3575               for (i=0; i<10; i++)
3576                 a[i] *= scale;
3577             }
3578
3579     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3580     the load out since its live around the loop, and stored at the bottom
3581     of the loop.
3582
3583       The 'Load Motion' referred to and implemented in this file is
3584     an enhancement to gcse which when using edge based LCM, recognizes
3585     this situation and allows gcse to move the load out of the loop.
3586
3587       Once gcse has hoisted the load, store motion can then push this
3588     load towards the exit, and we end up with no loads or stores of 'i'
3589     in the loop.  */
3590
3591 /* This will search the ldst list for a matching expression. If it
3592    doesn't find one, we create one and initialize it.  */
3593
3594 static struct ls_expr *
3595 ldst_entry (rtx x)
3596 {
3597   int do_not_record_p = 0;
3598   struct ls_expr * ptr;
3599   unsigned int hash;
3600   ls_expr **slot;
3601   struct ls_expr e;
3602
3603   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3604                    NULL,  /*have_reg_qty=*/false);
3605
3606   e.pattern = x;
3607   slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3608   if (*slot)
3609     return *slot;
3610
3611   ptr = XNEW (struct ls_expr);
3612
3613   ptr->next         = pre_ldst_mems;
3614   ptr->expr         = NULL;
3615   ptr->pattern      = x;
3616   ptr->pattern_regs = NULL_RTX;
3617   ptr->stores.create (0);
3618   ptr->reaching_reg = NULL_RTX;
3619   ptr->invalid      = 0;
3620   ptr->index        = 0;
3621   ptr->hash_index   = hash;
3622   pre_ldst_mems     = ptr;
3623   *slot = ptr;
3624
3625   return ptr;
3626 }
3627
3628 /* Free up an individual ldst entry.  */
3629
3630 static void
3631 free_ldst_entry (struct ls_expr * ptr)
3632 {
3633   ptr->stores.release ();
3634
3635   free (ptr);
3636 }
3637
3638 /* Free up all memory associated with the ldst list.  */
3639
3640 static void
3641 free_ld_motion_mems (void)
3642 {
3643   delete pre_ldst_table;
3644   pre_ldst_table = NULL;
3645
3646   while (pre_ldst_mems)
3647     {
3648       struct ls_expr * tmp = pre_ldst_mems;
3649
3650       pre_ldst_mems = pre_ldst_mems->next;
3651
3652       free_ldst_entry (tmp);
3653     }
3654
3655   pre_ldst_mems = NULL;
3656 }
3657
3658 /* Dump debugging info about the ldst list.  */
3659
3660 static void
3661 print_ldst_list (FILE * file)
3662 {
3663   struct ls_expr * ptr;
3664
3665   fprintf (file, "LDST list: \n");
3666
3667   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3668     {
3669       fprintf (file, "  Pattern (%3d): ", ptr->index);
3670
3671       print_rtl (file, ptr->pattern);
3672
3673       fprintf (file, "\n        Stores : ");
3674       print_rtx_insn_vec (file, ptr->stores);
3675
3676       fprintf (file, "\n\n");
3677     }
3678
3679   fprintf (file, "\n");
3680 }
3681
3682 /* Returns 1 if X is in the list of ldst only expressions.  */
3683
3684 static struct ls_expr *
3685 find_rtx_in_ldst (rtx x)
3686 {
3687   struct ls_expr e;
3688   ls_expr **slot;
3689   if (!pre_ldst_table)
3690     return NULL;
3691   e.pattern = x;
3692   slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3693   if (!slot || (*slot)->invalid)
3694     return NULL;
3695   return *slot;
3696 }
3697 \f
3698 /* Load Motion for loads which only kill themselves.  */
3699
3700 /* Return true if x, a MEM, is a simple access with no side effects.
3701    These are the types of loads we consider for the ld_motion list,
3702    otherwise we let the usual aliasing take care of it.  */
3703
3704 static int
3705 simple_mem (const_rtx x)
3706 {
3707   if (MEM_VOLATILE_P (x))
3708     return 0;
3709
3710   if (GET_MODE (x) == BLKmode)
3711     return 0;
3712
3713   /* If we are handling exceptions, we must be careful with memory references
3714      that may trap.  If we are not, the behavior is undefined, so we may just
3715      continue.  */
3716   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3717     return 0;
3718
3719   if (side_effects_p (x))
3720     return 0;
3721
3722   /* Do not consider function arguments passed on stack.  */
3723   if (reg_mentioned_p (stack_pointer_rtx, x))
3724     return 0;
3725
3726   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3727     return 0;
3728
3729   return 1;
3730 }
3731
3732 /* Make sure there isn't a buried reference in this pattern anywhere.
3733    If there is, invalidate the entry for it since we're not capable
3734    of fixing it up just yet.. We have to be sure we know about ALL
3735    loads since the aliasing code will allow all entries in the
3736    ld_motion list to not-alias itself.  If we miss a load, we will get
3737    the wrong value since gcse might common it and we won't know to
3738    fix it up.  */
3739
3740 static void
3741 invalidate_any_buried_refs (rtx x)
3742 {
3743   const char * fmt;
3744   int i, j;
3745   struct ls_expr * ptr;
3746
3747   /* Invalidate it in the list.  */
3748   if (MEM_P (x) && simple_mem (x))
3749     {
3750       ptr = ldst_entry (x);
3751       ptr->invalid = 1;
3752     }
3753
3754   /* Recursively process the insn.  */
3755   fmt = GET_RTX_FORMAT (GET_CODE (x));
3756
3757   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3758     {
3759       if (fmt[i] == 'e')
3760         invalidate_any_buried_refs (XEXP (x, i));
3761       else if (fmt[i] == 'E')
3762         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3763           invalidate_any_buried_refs (XVECEXP (x, i, j));
3764     }
3765 }
3766
3767 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3768    being defined as MEM loads and stores to symbols, with no side effects
3769    and no registers in the expression.  For a MEM destination, we also
3770    check that the insn is still valid if we replace the destination with a
3771    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3772    which don't match this criteria, they are invalidated and trimmed out
3773    later.  */
3774
3775 static void
3776 compute_ld_motion_mems (void)
3777 {
3778   struct ls_expr * ptr;
3779   basic_block bb;
3780   rtx_insn *insn;
3781
3782   pre_ldst_mems = NULL;
3783   pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3784
3785   FOR_EACH_BB_FN (bb, cfun)
3786     {
3787       FOR_BB_INSNS (bb, insn)
3788         {
3789           if (NONDEBUG_INSN_P (insn))
3790             {
3791               if (GET_CODE (PATTERN (insn)) == SET)
3792                 {
3793                   rtx src = SET_SRC (PATTERN (insn));
3794                   rtx dest = SET_DEST (PATTERN (insn));
3795
3796                   /* Check for a simple load.  */
3797                   if (MEM_P (src) && simple_mem (src))
3798                     {
3799                       ptr = ldst_entry (src);
3800                       if (!REG_P (dest))
3801                         ptr->invalid = 1;
3802                     }
3803                   else
3804                     {
3805                       /* Make sure there isn't a buried load somewhere.  */
3806                       invalidate_any_buried_refs (src);
3807                     }
3808
3809                   /* Check for a simple load through a REG_EQUAL note.  */
3810                   rtx note = find_reg_equal_equiv_note (insn), src_eq;
3811                   if (note
3812                       && REG_NOTE_KIND (note) == REG_EQUAL
3813                       && (src_eq = XEXP (note, 0))
3814                       && !(MEM_P (src_eq) && simple_mem (src_eq)))
3815                     invalidate_any_buried_refs (src_eq);
3816
3817                   /* Check for stores. Don't worry about aliased ones, they
3818                      will block any movement we might do later. We only care
3819                      about this exact pattern since those are the only
3820                      circumstance that we will ignore the aliasing info.  */
3821                   if (MEM_P (dest) && simple_mem (dest))
3822                     {
3823                       ptr = ldst_entry (dest);
3824                       machine_mode src_mode = GET_MODE (src);
3825                       if (! MEM_P (src)
3826                           && GET_CODE (src) != ASM_OPERANDS
3827                           /* Check for REG manually since want_to_gcse_p
3828                              returns 0 for all REGs.  */
3829                           && can_assign_to_reg_without_clobbers_p (src,
3830                                                                     src_mode))
3831                         ptr->stores.safe_push (insn);
3832                       else
3833                         ptr->invalid = 1;
3834                     }
3835                 }
3836               else
3837                 {
3838                   /* Invalidate all MEMs in the pattern and...  */
3839                   invalidate_any_buried_refs (PATTERN (insn));
3840
3841                   /* ...in REG_EQUAL notes for PARALLELs with single SET.  */
3842                   rtx note = find_reg_equal_equiv_note (insn), src_eq;
3843                   if (note
3844                       && REG_NOTE_KIND (note) == REG_EQUAL
3845                       && (src_eq = XEXP (note, 0)))
3846                     invalidate_any_buried_refs (src_eq);
3847                 }
3848             }
3849         }
3850     }
3851 }
3852
3853 /* Remove any references that have been either invalidated or are not in the
3854    expression list for pre gcse.  */
3855
3856 static void
3857 trim_ld_motion_mems (void)
3858 {
3859   struct ls_expr * * last = & pre_ldst_mems;
3860   struct ls_expr * ptr = pre_ldst_mems;
3861
3862   while (ptr != NULL)
3863     {
3864       struct gcse_expr * expr;
3865
3866       /* Delete if entry has been made invalid.  */
3867       if (! ptr->invalid)
3868         {
3869           /* Delete if we cannot find this mem in the expression list.  */
3870           unsigned int hash = ptr->hash_index % expr_hash_table.size;
3871
3872           for (expr = expr_hash_table.table[hash];
3873                expr != NULL;
3874                expr = expr->next_same_hash)
3875             if (expr_equiv_p (expr->expr, ptr->pattern))
3876               break;
3877         }
3878       else
3879         expr = (struct gcse_expr *) 0;
3880
3881       if (expr)
3882         {
3883           /* Set the expression field if we are keeping it.  */
3884           ptr->expr = expr;
3885           last = & ptr->next;
3886           ptr = ptr->next;
3887         }
3888       else
3889         {
3890           *last = ptr->next;
3891           pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
3892           free_ldst_entry (ptr);
3893           ptr = * last;
3894         }
3895     }
3896
3897   /* Show the world what we've found.  */
3898   if (dump_file && pre_ldst_mems != NULL)
3899     print_ldst_list (dump_file);
3900 }
3901
3902 /* This routine will take an expression which we are replacing with
3903    a reaching register, and update any stores that are needed if
3904    that expression is in the ld_motion list.  Stores are updated by
3905    copying their SRC to the reaching register, and then storing
3906    the reaching register into the store location. These keeps the
3907    correct value in the reaching register for the loads.  */
3908
3909 static void
3910 update_ld_motion_stores (struct gcse_expr * expr)
3911 {
3912   struct ls_expr * mem_ptr;
3913
3914   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3915     {
3916       /* We can try to find just the REACHED stores, but is shouldn't
3917          matter to set the reaching reg everywhere...  some might be
3918          dead and should be eliminated later.  */
3919
3920       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3921          where reg is the reaching reg used in the load.  We checked in
3922          compute_ld_motion_mems that we can replace (set mem expr) with
3923          (set reg expr) in that insn.  */
3924       rtx_insn *insn;
3925       unsigned int i;
3926       FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
3927         {
3928           rtx pat = PATTERN (insn);
3929           rtx src = SET_SRC (pat);
3930           rtx reg = expr->reaching_reg;
3931
3932           /* If we've already copied it, continue.  */
3933           if (expr->reaching_reg == src)
3934             continue;
3935
3936           if (dump_file)
3937             {
3938               fprintf (dump_file, "PRE:  store updated with reaching reg ");
3939               print_rtl (dump_file, reg);
3940               fprintf (dump_file, ":\n  ");
3941               print_inline_rtx (dump_file, insn, 8);
3942               fprintf (dump_file, "\n");
3943             }
3944
3945           rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3946           emit_insn_before (copy, insn);
3947           SET_SRC (pat) = reg;
3948           df_insn_rescan (insn);
3949
3950           /* un-recognize this pattern since it's probably different now.  */
3951           INSN_CODE (insn) = -1;
3952           gcse_create_count++;
3953         }
3954     }
3955 }
3956 \f
3957 /* Return true if the graph is too expensive to optimize. PASS is the
3958    optimization about to be performed.  */
3959
3960 bool
3961 gcse_or_cprop_is_too_expensive (const char *pass)
3962 {
3963   unsigned HOST_WIDE_INT memory_request
3964     = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
3965        * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
3966
3967   /* Trying to perform global optimizations on flow graphs which have
3968      a high connectivity will take a long time and is unlikely to be
3969      particularly useful.
3970
3971      In normal circumstances a cfg should have about twice as many
3972      edges as blocks.  But we do not want to punish small functions
3973      which have a couple switch statements.  Rather than simply
3974      threshold the number of blocks, uses something with a more
3975      graceful degradation.  */
3976   if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
3977     {
3978       warning (OPT_Wdisabled_optimization,
3979                "%s: %d basic blocks and %d edges/basic block",
3980                pass, n_basic_blocks_for_fn (cfun),
3981                n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
3982
3983       return true;
3984     }
3985
3986   /* If allocating memory for the dataflow bitmaps would take up too much
3987      storage it's better just to disable the optimization.  */
3988   if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
3989     {
3990       warning (OPT_Wdisabled_optimization,
3991                "%s: %d basic blocks and %d registers; "
3992                "increase %<--param max-gcse-memory%> above %wu",
3993                pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
3994                memory_request / 1024);
3995
3996       return true;
3997     }
3998
3999   return false;
4000 }
4001 \f
4002 static unsigned int
4003 execute_rtl_pre (void)
4004 {
4005   int changed;
4006   delete_unreachable_blocks ();
4007   df_analyze ();
4008   changed = one_pre_gcse_pass ();
4009   flag_rerun_cse_after_global_opts |= changed;
4010   if (changed)
4011     cleanup_cfg (0);
4012   return 0;
4013 }
4014
4015 static unsigned int
4016 execute_rtl_hoist (void)
4017 {
4018   int changed;
4019   delete_unreachable_blocks ();
4020   df_analyze ();
4021   changed = one_code_hoisting_pass ();
4022   flag_rerun_cse_after_global_opts |= changed;
4023   if (changed)
4024     cleanup_cfg (0);
4025   return 0;
4026 }
4027
4028 namespace {
4029
4030 const pass_data pass_data_rtl_pre =
4031 {
4032   RTL_PASS, /* type */
4033   "rtl pre", /* name */
4034   OPTGROUP_NONE, /* optinfo_flags */
4035   TV_PRE, /* tv_id */
4036   PROP_cfglayout, /* properties_required */
4037   0, /* properties_provided */
4038   0, /* properties_destroyed */
4039   0, /* todo_flags_start */
4040   TODO_df_finish, /* todo_flags_finish */
4041 };
4042
4043 class pass_rtl_pre : public rtl_opt_pass
4044 {
4045 public:
4046   pass_rtl_pre (gcc::context *ctxt)
4047     : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4048   {}
4049
4050   /* opt_pass methods: */
4051   virtual bool gate (function *);
4052   virtual unsigned int execute (function *) { return execute_rtl_pre (); }
4053
4054 }; // class pass_rtl_pre
4055
4056 /* We do not construct an accurate cfg in functions which call
4057    setjmp, so none of these passes runs if the function calls
4058    setjmp.
4059    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
4060
4061 bool
4062 pass_rtl_pre::gate (function *fun)
4063 {
4064   return optimize > 0 && flag_gcse
4065     && !fun->calls_setjmp
4066     && optimize_function_for_speed_p (fun)
4067     && dbg_cnt (pre);
4068 }
4069
4070 } // anon namespace
4071
4072 rtl_opt_pass *
4073 make_pass_rtl_pre (gcc::context *ctxt)
4074 {
4075   return new pass_rtl_pre (ctxt);
4076 }
4077
4078 namespace {
4079
4080 const pass_data pass_data_rtl_hoist =
4081 {
4082   RTL_PASS, /* type */
4083   "hoist", /* name */
4084   OPTGROUP_NONE, /* optinfo_flags */
4085   TV_HOIST, /* tv_id */
4086   PROP_cfglayout, /* properties_required */
4087   0, /* properties_provided */
4088   0, /* properties_destroyed */
4089   0, /* todo_flags_start */
4090   TODO_df_finish, /* todo_flags_finish */
4091 };
4092
4093 class pass_rtl_hoist : public rtl_opt_pass
4094 {
4095 public:
4096   pass_rtl_hoist (gcc::context *ctxt)
4097     : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4098   {}
4099
4100   /* opt_pass methods: */
4101   virtual bool gate (function *);
4102   virtual unsigned int execute (function *) { return execute_rtl_hoist (); }
4103
4104 }; // class pass_rtl_hoist
4105
4106 bool
4107 pass_rtl_hoist::gate (function *)
4108 {
4109   return optimize > 0 && flag_gcse
4110     && !cfun->calls_setjmp
4111     /* It does not make sense to run code hoisting unless we are optimizing
4112        for code size -- it rarely makes programs faster, and can make then
4113        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
4114     && optimize_function_for_size_p (cfun)
4115     && dbg_cnt (hoist);
4116 }
4117
4118 } // anon namespace
4119
4120 rtl_opt_pass *
4121 make_pass_rtl_hoist (gcc::context *ctxt)
4122 {
4123   return new pass_rtl_hoist (ctxt);
4124 }
4125
4126 /* Reset all state within gcse.c so that we can rerun the compiler
4127    within the same process.  For use by toplev::finalize.  */
4128
4129 void
4130 gcse_c_finalize (void)
4131 {
4132   test_insn = NULL;
4133 }
4134
4135 #include "gt-gcse.h"