gcc/gcse.c

   1 /* Partial redundancy elimination / Hoisting for RTL.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   3    2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* TODO
  22    - reordering of memory allocation and freeing to be more space efficient
  23    - do rough calc of how many regs are needed in each block, and a rough
  24      calc of how many regs are available in each class and use that to
  25      throttle back the code in cases where RTX_COST is minimal.
  26 */
  27
  28 /* References searched while implementing this.
  29
  30    Compilers Principles, Techniques and Tools
  31    Aho, Sethi, Ullman
  32    Addison-Wesley, 1988
  33
  34    Global Optimization by Suppression of Partial Redundancies
  35    E. Morel, C. Renvoise
  36    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  37
  38    A Portable Machine-Independent Global Optimizer - Design and Measurements
  39    Frederick Chow
  40    Stanford Ph.D. thesis, Dec. 1983
  41
  42    A Fast Algorithm for Code Movement Optimization
  43    D.M. Dhamdhere
  44    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  45
  46    A Solution to a Problem with Morel and Renvoise's
  47    Global Optimization by Suppression of Partial Redundancies
  48    K-H Drechsler, M.P. Stadel
  49    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  50
  51    Practical Adaptation of the Global Optimization
  52    Algorithm of Morel and Renvoise
  53    D.M. Dhamdhere
  54    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  55
  56    Efficiently Computing Static Single Assignment Form and the Control
  57    Dependence Graph
  58    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  59    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  60
  61    Lazy Code Motion
  62    J. Knoop, O. Ruthing, B. Steffen
  63    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  64
  65    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  66    Time for Reducible Flow Control
  67    Thomas Ball
  68    ACM Letters on Programming Languages and Systems,
  69    Vol. 2, Num. 1-4, Mar-Dec 1993
  70
  71    An Efficient Representation for Sparse Sets
  72    Preston Briggs, Linda Torczon
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  77    K-H Drechsler, M.P. Stadel
  78    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  79
  80    Partial Dead Code Elimination
  81    J. Knoop, O. Ruthing, B. Steffen
  82    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  83
  84    Effective Partial Redundancy Elimination
  85    P. Briggs, K.D. Cooper
  86    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  87
  88    The Program Structure Tree: Computing Control Regions in Linear Time
  89    R. Johnson, D. Pearson, K. Pingali
  90    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  91
  92    Optimal Code Motion: Theory and Practice
  93    J. Knoop, O. Ruthing, B. Steffen
  94    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
  95
  96    The power of assignment motion
  97    J. Knoop, O. Ruthing, B. Steffen
  98    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
  99
 100    Global code motion / global value numbering
 101    C. Click
 102    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 103
 104    Value Driven Redundancy Elimination
 105    L.T. Simpson
 106    Rice University Ph.D. thesis, Apr. 1996
 107
 108    Value Numbering
 109    L.T. Simpson
 110    Massively Scalar Compiler Project, Rice University, Sep. 1996
 111
 112    High Performance Compilers for Parallel Computing
 113    Michael Wolfe
 114    Addison-Wesley, 1996
 115
 116    Advanced Compiler Design and Implementation
 117    Steven Muchnick
 118    Morgan Kaufmann, 1997
 119
 120    Building an Optimizing Compiler
 121    Robert Morgan
 122    Digital Press, 1998
 123
 124    People wishing to speed up the code here should read:
 125      Elimination Algorithms for Data Flow Analysis
 126      B.G. Ryder, M.C. Paull
 127      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 128
 129      How to Analyze Large Programs Efficiently and Informatively
 130      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 131      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 132
 133    People wishing to do something different can find various possibilities
 134    in the above papers and elsewhere.
 135 */
 136
 137 #include "config.h"
 138 #include "system.h"
 139 #include "coretypes.h"
 140 #include "tm.h"
 141 #include "diagnostic-core.h"
 142 #include "toplev.h"
 143
 144 #include "rtl.h"
 145 #include "tree.h"
 146 #include "tm_p.h"
 147 #include "regs.h"
 148 #include "hard-reg-set.h"
 149 #include "flags.h"
 150 #include "insn-config.h"
 151 #include "recog.h"
 152 #include "basic-block.h"
 153 #include "function.h"
 154 #include "expr.h"
 155 #include "except.h"
 156 #include "ggc.h"
 157 #include "params.h"
 158 #include "cselib.h"
 159 #include "intl.h"
 160 #include "obstack.h"
 161 #include "tree-pass.h"
 162 #include "hashtab.h"
 163 #include "df.h"
 164 #include "dbgcnt.h"
 165 #include "target.h"
 166 #include "gcse.h"
 167
 168 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 169    are a superset of those done by classic GCSE.
 170
 171    Two passes of copy/constant propagation are done around PRE or hoisting
 172    because the first one enables more GCSE and the second one helps to clean
 173    up the copies that PRE and HOIST create.  This is needed more for PRE than
 174    for HOIST because code hoisting will try to use an existing register
 175    containing the common subexpression rather than create a new one.  This is
 176    harder to do for PRE because of the code motion (which HOIST doesn't do).
 177
 178    Expressions we are interested in GCSE-ing are of the form
 179    (set (pseudo-reg) (expression)).
 180    Function want_to_gcse_p says what these are.
 181
 182    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 183    This allows PRE to hoist expressions that are expressed in multiple insns,
 184    such as complex address calculations (e.g. for PIC code, or loads with a
 185    high part and a low part).
 186
 187    PRE handles moving invariant expressions out of loops (by treating them as
 188    partially redundant).
 189
 190    **********************
 191
 192    We used to support multiple passes but there are diminishing returns in
 193    doing so.  The first pass usually makes 90% of the changes that are doable.
 194    A second pass can make a few more changes made possible by the first pass.
 195    Experiments show any further passes don't make enough changes to justify
 196    the expense.
 197
 198    A study of spec92 using an unlimited number of passes:
 199    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 200    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 201    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 202
 203    It was found doing copy propagation between each pass enables further
 204    substitutions.
 205
 206    This study was done before expressions in REG_EQUAL notes were added as
 207    candidate expressions for optimization, and before the GIMPLE optimizers
 208    were added.  Probably, multiple passes is even less efficient now than
 209    at the time when the study was conducted.
 210
 211    PRE is quite expensive in complicated functions because the DFA can take
 212    a while to converge.  Hence we only perform one pass.
 213
 214    **********************
 215
 216    The steps for PRE are:
 217
 218    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 219
 220    2) Perform the data flow analysis for PRE.
 221
 222    3) Delete the redundant instructions
 223
 224    4) Insert the required copies [if any] that make the partially
 225       redundant instructions fully redundant.
 226
 227    5) For other reaching expressions, insert an instruction to copy the value
 228       to a newly created pseudo that will reach the redundant instruction.
 229
 230    The deletion is done first so that when we do insertions we
 231    know which pseudo reg to use.
 232
 233    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 234    argue it is not.  The number of iterations for the algorithm to converge
 235    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 236
 237    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 238    we create.  To make an expression reach the place where it's redundant,
 239    the result of the expression is copied to a new register, and the redundant
 240    expression is deleted by replacing it with this new register.  Classic GCSE
 241    doesn't have this problem as much as it computes the reaching defs of
 242    each register in each block and thus can try to use an existing
 243    register.  */
 244 \f
 245 /* GCSE global vars.  */
 246
 247 struct target_gcse default_target_gcse;
 248 #if SWITCHABLE_TARGET
 249 struct target_gcse *this_target_gcse = &default_target_gcse;
 250 #endif
 251
 252 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 253 int flag_rerun_cse_after_global_opts;
 254
 255 /* An obstack for our working variables.  */
 256 static struct obstack gcse_obstack;
 257
 258 struct reg_use {rtx reg_rtx; };
 259
 260 /* Hash table of expressions.  */
 261
 262 struct expr
 263 {
 264   /* The expression.  */
 265   rtx expr;
 266   /* Index in the available expression bitmaps.  */
 267   int bitmap_index;
 268   /* Next entry with the same hash.  */
 269   struct expr *next_same_hash;
 270   /* List of anticipatable occurrences in basic blocks in the function.
 271      An "anticipatable occurrence" is one that is the first occurrence in the
 272      basic block, the operands are not modified in the basic block prior
 273      to the occurrence and the output is not used between the start of
 274      the block and the occurrence.  */
 275   struct occr *antic_occr;
 276   /* List of available occurrence in basic blocks in the function.
 277      An "available occurrence" is one that is the last occurrence in the
 278      basic block and the operands are not modified by following statements in
 279      the basic block [including this insn].  */
 280   struct occr *avail_occr;
 281   /* Non-null if the computation is PRE redundant.
 282      The value is the newly created pseudo-reg to record a copy of the
 283      expression in all the places that reach the redundant copy.  */
 284   rtx reaching_reg;
 285   /* Maximum distance in instructions this expression can travel.
 286      We avoid moving simple expressions for more than a few instructions
 287      to keep register pressure under control.
 288      A value of "0" removes restrictions on how far the expression can
 289      travel.  */
 290   int max_distance;
 291 };
 292
 293 /* Occurrence of an expression.
 294    There is one per basic block.  If a pattern appears more than once the
 295    last appearance is used [or first for anticipatable expressions].  */
 296
 297 struct occr
 298 {
 299   /* Next occurrence of this expression.  */
 300   struct occr *next;
 301   /* The insn that computes the expression.  */
 302   rtx insn;
 303   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 304   char deleted_p;
 305   /* Nonzero if this [available] occurrence has been copied to
 306      reaching_reg.  */
 307   /* ??? This is mutually exclusive with deleted_p, so they could share
 308      the same byte.  */
 309   char copied_p;
 310 };
 311
 312 typedef struct occr *occr_t;
 313 DEF_VEC_P (occr_t);
 314 DEF_VEC_ALLOC_P (occr_t, heap);
 315
 316 /* Expression hash tables.
 317    Each hash table is an array of buckets.
 318    ??? It is known that if it were an array of entries, structure elements
 319    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 320    not clear whether in the final analysis a sufficient amount of memory would
 321    be saved as the size of the available expression bitmaps would be larger
 322    [one could build a mapping table without holes afterwards though].
 323    Someday I'll perform the computation and figure it out.  */
 324
 325 struct hash_table_d
 326 {
 327   /* The table itself.
 328      This is an array of `expr_hash_table_size' elements.  */
 329   struct expr **table;
 330
 331   /* Size of the hash table, in elements.  */
 332   unsigned int size;
 333
 334   /* Number of hash table elements.  */
 335   unsigned int n_elems;
 336 };
 337
 338 /* Expression hash table.  */
 339 static struct hash_table_d expr_hash_table;
 340
 341 /* This is a list of expressions which are MEMs and will be used by load
 342    or store motion.
 343    Load motion tracks MEMs which aren't killed by anything except itself,
 344    i.e. loads and stores to a single location.
 345    We can then allow movement of these MEM refs with a little special
 346    allowance. (all stores copy the same value to the reaching reg used
 347    for the loads).  This means all values used to store into memory must have
 348    no side effects so we can re-issue the setter value.  */
 349
 350 struct ls_expr
 351 {
 352   struct expr * expr;           /* Gcse expression reference for LM.  */
 353   rtx pattern;                  /* Pattern of this mem.  */
 354   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 355   rtx loads;                    /* INSN list of loads seen.  */
 356   rtx stores;                   /* INSN list of stores seen.  */
 357   struct ls_expr * next;        /* Next in the list.  */
 358   int invalid;                  /* Invalid for some reason.  */
 359   int index;                    /* If it maps to a bitmap index.  */
 360   unsigned int hash_index;      /* Index when in a hash table.  */
 361   rtx reaching_reg;             /* Register to use when re-writing.  */
 362 };
 363
 364 /* Head of the list of load/store memory refs.  */
 365 static struct ls_expr * pre_ldst_mems = NULL;
 366
 367 /* Hashtable for the load/store memory refs.  */
 368 static htab_t pre_ldst_table = NULL;
 369
 370 /* Bitmap containing one bit for each register in the program.
 371    Used when performing GCSE to track which registers have been set since
 372    the start of the basic block.  */
 373 static regset reg_set_bitmap;
 374
 375 /* Array, indexed by basic block number for a list of insns which modify
 376    memory within that block.  */
 377 static VEC (rtx,heap) **modify_mem_list;
 378 static bitmap modify_mem_list_set;
 379
 380 typedef struct modify_pair_s
 381 {
 382   rtx dest;                     /* A MEM.  */
 383   rtx dest_addr;                /* The canonical address of `dest'.  */
 384 } modify_pair;
 385
 386 DEF_VEC_O(modify_pair);
 387 DEF_VEC_ALLOC_O(modify_pair,heap);
 388
 389 /* This array parallels modify_mem_list, except that it stores MEMs
 390    being set and their canonicalized memory addresses.  */
 391 static VEC (modify_pair,heap) **canon_modify_mem_list;
 392
 393 /* Bitmap indexed by block numbers to record which blocks contain
 394    function calls.  */
 395 static bitmap blocks_with_calls;
 396
 397 /* Various variables for statistics gathering.  */
 398
 399 /* Memory used in a pass.
 400    This isn't intended to be absolutely precise.  Its intent is only
 401    to keep an eye on memory usage.  */
 402 static int bytes_used;
 403
 404 /* GCSE substitutions made.  */
 405 static int gcse_subst_count;
 406 /* Number of copy instructions created.  */
 407 static int gcse_create_count;
 408 \f
 409 /* Doing code hoisting.  */
 410 static bool doing_code_hoisting_p = false;
 411 \f
 412 /* For available exprs */
 413 static sbitmap *ae_kill;
 414 \f
 415 static void compute_can_copy (void);
 416 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 417 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 418 static void *gcse_alloc (unsigned long);
 419 static void alloc_gcse_mem (void);
 420 static void free_gcse_mem (void);
 421 static void hash_scan_insn (rtx, struct hash_table_d *);
 422 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 423 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 424 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 425 static int want_to_gcse_p (rtx, int *);
 426 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 427 static int oprs_anticipatable_p (const_rtx, const_rtx);
 428 static int oprs_available_p (const_rtx, const_rtx);
 429 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
 430                                   struct hash_table_d *);
 431 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 432 static int expr_equiv_p (const_rtx, const_rtx);
 433 static void record_last_reg_set_info (rtx, int);
 434 static void record_last_mem_set_info (rtx);
 435 static void record_last_set_info (rtx, const_rtx, void *);
 436 static void compute_hash_table (struct hash_table_d *);
 437 static void alloc_hash_table (struct hash_table_d *);
 438 static void free_hash_table (struct hash_table_d *);
 439 static void compute_hash_table_work (struct hash_table_d *);
 440 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 441 static void compute_transp (const_rtx, int, sbitmap *);
 442 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 443                                       struct hash_table_d *);
 444 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 445 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 446 static void canon_list_insert (rtx, const_rtx, void *);
 447 static void alloc_pre_mem (int, int);
 448 static void free_pre_mem (void);
 449 static struct edge_list *compute_pre_data (void);
 450 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 451                                     basic_block);
 452 static void insert_insn_end_basic_block (struct expr *, basic_block);
 453 static void pre_insert_copy_insn (struct expr *, rtx);
 454 static void pre_insert_copies (void);
 455 static int pre_delete (void);
 456 static int pre_gcse (struct edge_list *);
 457 static int one_pre_gcse_pass (void);
 458 static void add_label_notes (rtx, rtx);
 459 static void alloc_code_hoist_mem (int, int);
 460 static void free_code_hoist_mem (void);
 461 static void compute_code_hoist_vbeinout (void);
 462 static void compute_code_hoist_data (void);
 463 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
 464                                       int, int *);
 465 static int hoist_code (void);
 466 static int one_code_hoisting_pass (void);
 467 static rtx process_insert_insn (struct expr *);
 468 static int pre_edge_insert (struct edge_list *, struct expr **);
 469 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 470                                          basic_block, char *);
 471 static struct ls_expr * ldst_entry (rtx);
 472 static void free_ldst_entry (struct ls_expr *);
 473 static void free_ld_motion_mems (void);
 474 static void print_ldst_list (FILE *);
 475 static struct ls_expr * find_rtx_in_ldst (rtx);
 476 static int simple_mem (const_rtx);
 477 static void invalidate_any_buried_refs (rtx);
 478 static void compute_ld_motion_mems (void);
 479 static void trim_ld_motion_mems (void);
 480 static void update_ld_motion_stores (struct expr *);
 481 static void clear_modify_mem_tables (void);
 482 static void free_modify_mem_tables (void);
 483 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 484 static bool is_too_expensive (const char *);
 485
 486 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 487 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 488
 489 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 490 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 491
 492 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 493 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 494
 495 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 496 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 497 \f
 498 /* Misc. utilities.  */
 499
 500 #define can_copy \
 501   (this_target_gcse->x_can_copy)
 502 #define can_copy_init_p \
 503   (this_target_gcse->x_can_copy_init_p)
 504
 505 /* Compute which modes support reg/reg copy operations.  */
 506
 507 static void
 508 compute_can_copy (void)
 509 {
 510   int i;
 511 #ifndef AVOID_CCMODE_COPIES
 512   rtx reg, insn;
 513 #endif
 514   memset (can_copy, 0, NUM_MACHINE_MODES);
 515
 516   start_sequence ();
 517   for (i = 0; i < NUM_MACHINE_MODES; i++)
 518     if (GET_MODE_CLASS (i) == MODE_CC)
 519       {
 520 #ifdef AVOID_CCMODE_COPIES
 521         can_copy[i] = 0;
 522 #else
 523         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 524         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 525         if (recog (PATTERN (insn), insn, NULL) >= 0)
 526           can_copy[i] = 1;
 527 #endif
 528       }
 529     else
 530       can_copy[i] = 1;
 531
 532   end_sequence ();
 533 }
 534
 535 /* Returns whether the mode supports reg/reg copy operations.  */
 536
 537 bool
 538 can_copy_p (enum machine_mode mode)
 539 {
 540   if (! can_copy_init_p)
 541     {
 542       compute_can_copy ();
 543       can_copy_init_p = true;
 544     }
 545
 546   return can_copy[mode] != 0;
 547 }
 548 \f
 549 /* Cover function to xmalloc to record bytes allocated.  */
 550
 551 static void *
 552 gmalloc (size_t size)
 553 {
 554   bytes_used += size;
 555   return xmalloc (size);
 556 }
 557
 558 /* Cover function to xcalloc to record bytes allocated.  */
 559
 560 static void *
 561 gcalloc (size_t nelem, size_t elsize)
 562 {
 563   bytes_used += nelem * elsize;
 564   return xcalloc (nelem, elsize);
 565 }
 566
 567 /* Cover function to obstack_alloc.  */
 568
 569 static void *
 570 gcse_alloc (unsigned long size)
 571 {
 572   bytes_used += size;
 573   return obstack_alloc (&gcse_obstack, size);
 574 }
 575
 576 /* Allocate memory for the reg/memory set tracking tables.
 577    This is called at the start of each pass.  */
 578
 579 static void
 580 alloc_gcse_mem (void)
 581 {
 582   /* Allocate vars to track sets of regs.  */
 583   reg_set_bitmap = ALLOC_REG_SET (NULL);
 584
 585   /* Allocate array to keep a list of insns which modify memory in each
 586      basic block.  */
 587   modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
 588   canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
 589                                     last_basic_block);
 590   modify_mem_list_set = BITMAP_ALLOC (NULL);
 591   blocks_with_calls = BITMAP_ALLOC (NULL);
 592 }
 593
 594 /* Free memory allocated by alloc_gcse_mem.  */
 595
 596 static void
 597 free_gcse_mem (void)
 598 {
 599   FREE_REG_SET (reg_set_bitmap);
 600
 601   free_modify_mem_tables ();
 602   BITMAP_FREE (modify_mem_list_set);
 603   BITMAP_FREE (blocks_with_calls);
 604 }
 605 \f
 606 /* Compute the local properties of each recorded expression.
 607
 608    Local properties are those that are defined by the block, irrespective of
 609    other blocks.
 610
 611    An expression is transparent in a block if its operands are not modified
 612    in the block.
 613
 614    An expression is computed (locally available) in a block if it is computed
 615    at least once and expression would contain the same value if the
 616    computation was moved to the end of the block.
 617
 618    An expression is locally anticipatable in a block if it is computed at
 619    least once and expression would contain the same value if the computation
 620    was moved to the beginning of the block.
 621
 622    We call this routine for pre and code hoisting.  They all compute
 623    basically the same information and thus can easily share this code.
 624
 625    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 626    properties.  If NULL, then it is not necessary to compute or record that
 627    particular property.
 628
 629    TABLE controls which hash table to look at.  */
 630
 631 static void
 632 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 633                           struct hash_table_d *table)
 634 {
 635   unsigned int i;
 636
 637   /* Initialize any bitmaps that were passed in.  */
 638   if (transp)
 639     {
 640       sbitmap_vector_ones (transp, last_basic_block);
 641     }
 642
 643   if (comp)
 644     sbitmap_vector_zero (comp, last_basic_block);
 645   if (antloc)
 646     sbitmap_vector_zero (antloc, last_basic_block);
 647
 648   for (i = 0; i < table->size; i++)
 649     {
 650       struct expr *expr;
 651
 652       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 653         {
 654           int indx = expr->bitmap_index;
 655           struct occr *occr;
 656
 657           /* The expression is transparent in this block if it is not killed.
 658              We start by assuming all are transparent [none are killed], and
 659              then reset the bits for those that are.  */
 660           if (transp)
 661             compute_transp (expr->expr, indx, transp);
 662
 663           /* The occurrences recorded in antic_occr are exactly those that
 664              we want to set to nonzero in ANTLOC.  */
 665           if (antloc)
 666             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 667               {
 668                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 669
 670                 /* While we're scanning the table, this is a good place to
 671                    initialize this.  */
 672                 occr->deleted_p = 0;
 673               }
 674
 675           /* The occurrences recorded in avail_occr are exactly those that
 676              we want to set to nonzero in COMP.  */
 677           if (comp)
 678             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 679               {
 680                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 681
 682                 /* While we're scanning the table, this is a good place to
 683                    initialize this.  */
 684                 occr->copied_p = 0;
 685               }
 686
 687           /* While we're scanning the table, this is a good place to
 688              initialize this.  */
 689           expr->reaching_reg = 0;
 690         }
 691     }
 692 }
 693 \f
 694 /* Hash table support.  */
 695
 696 struct reg_avail_info
 697 {
 698   basic_block last_bb;
 699   int first_set;
 700   int last_set;
 701 };
 702
 703 static struct reg_avail_info *reg_avail_info;
 704 static basic_block current_bb;
 705
 706 /* See whether X, the source of a set, is something we want to consider for
 707    GCSE.  */
 708
 709 static int
 710 want_to_gcse_p (rtx x, int *max_distance_ptr)
 711 {
 712 #ifdef STACK_REGS
 713   /* On register stack architectures, don't GCSE constants from the
 714      constant pool, as the benefits are often swamped by the overhead
 715      of shuffling the register stack between basic blocks.  */
 716   if (IS_STACK_MODE (GET_MODE (x)))
 717     x = avoid_constant_pool_reference (x);
 718 #endif
 719
 720   /* GCSE'ing constants:
 721
 722      We do not specifically distinguish between constant and non-constant
 723      expressions in PRE and Hoist.  We use set_src_cost below to limit
 724      the maximum distance simple expressions can travel.
 725
 726      Nevertheless, constants are much easier to GCSE, and, hence,
 727      it is easy to overdo the optimizations.  Usually, excessive PRE and
 728      Hoisting of constant leads to increased register pressure.
 729
 730      RA can deal with this by rematerialing some of the constants.
 731      Therefore, it is important that the back-end generates sets of constants
 732      in a way that allows reload rematerialize them under high register
 733      pressure, i.e., a pseudo register with REG_EQUAL to constant
 734      is set only once.  Failing to do so will result in IRA/reload
 735      spilling such constants under high register pressure instead of
 736      rematerializing them.  */
 737
 738   switch (GET_CODE (x))
 739     {
 740     case REG:
 741     case SUBREG:
 742     case CALL:
 743       return 0;
 744
 745     case CONST_INT:
 746     case CONST_DOUBLE:
 747     case CONST_FIXED:
 748     case CONST_VECTOR:
 749       if (!doing_code_hoisting_p)
 750         /* Do not PRE constants.  */
 751         return 0;
 752
 753       /* FALLTHRU */
 754
 755     default:
 756       if (doing_code_hoisting_p)
 757         /* PRE doesn't implement max_distance restriction.  */
 758         {
 759           int cost;
 760           int max_distance;
 761
 762           gcc_assert (!optimize_function_for_speed_p (cfun)
 763                       && optimize_function_for_size_p (cfun));
 764           cost = set_src_cost (x, 0);
 765
 766           if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
 767             {
 768               max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
 769               if (max_distance == 0)
 770                 return 0;
 771
 772               gcc_assert (max_distance > 0);
 773             }
 774           else
 775             max_distance = 0;
 776
 777           if (max_distance_ptr)
 778             *max_distance_ptr = max_distance;
 779         }
 780
 781       return can_assign_to_reg_without_clobbers_p (x);
 782     }
 783 }
 784
 785 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 786
 787 static GTY(()) rtx test_insn;
 788
 789 /* Return true if we can assign X to a pseudo register such that the
 790    resulting insn does not result in clobbering a hard register as a
 791    side-effect.
 792
 793    Additionally, if the target requires it, check that the resulting insn
 794    can be copied.  If it cannot, this means that X is special and probably
 795    has hidden side-effects we don't want to mess with.
 796
 797    This function is typically used by code motion passes, to verify
 798    that it is safe to insert an insn without worrying about clobbering
 799    maybe live hard regs.  */
 800
 801 bool
 802 can_assign_to_reg_without_clobbers_p (rtx x)
 803 {
 804   int num_clobbers = 0;
 805   int icode;
 806
 807   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 808   if (general_operand (x, GET_MODE (x)))
 809     return 1;
 810   else if (GET_MODE (x) == VOIDmode)
 811     return 0;
 812
 813   /* Otherwise, check if we can make a valid insn from it.  First initialize
 814      our test insn if we haven't already.  */
 815   if (test_insn == 0)
 816     {
 817       test_insn
 818         = make_insn_raw (gen_rtx_SET (VOIDmode,
 819                                       gen_rtx_REG (word_mode,
 820                                                    FIRST_PSEUDO_REGISTER * 2),
 821                                       const0_rtx));
 822       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 823     }
 824
 825   /* Now make an insn like the one we would make when GCSE'ing and see if
 826      valid.  */
 827   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 828   SET_SRC (PATTERN (test_insn)) = x;
 829
 830   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 831   if (icode < 0)
 832     return false;
 833
 834   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 835     return false;
 836
 837   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 838     return false;
 839
 840   return true;
 841 }
 842
 843 /* Return nonzero if the operands of expression X are unchanged from the
 844    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 845    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 846
 847 static int
 848 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 849 {
 850   int i, j;
 851   enum rtx_code code;
 852   const char *fmt;
 853
 854   if (x == 0)
 855     return 1;
 856
 857   code = GET_CODE (x);
 858   switch (code)
 859     {
 860     case REG:
 861       {
 862         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 863
 864         if (info->last_bb != current_bb)
 865           return 1;
 866         if (avail_p)
 867           return info->last_set < DF_INSN_LUID (insn);
 868         else
 869           return info->first_set >= DF_INSN_LUID (insn);
 870       }
 871
 872     case MEM:
 873       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 874                                   x, avail_p))
 875         return 0;
 876       else
 877         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 878
 879     case PRE_DEC:
 880     case PRE_INC:
 881     case POST_DEC:
 882     case POST_INC:
 883     case PRE_MODIFY:
 884     case POST_MODIFY:
 885       return 0;
 886
 887     case PC:
 888     case CC0: /*FIXME*/
 889     case CONST:
 890     case CONST_INT:
 891     case CONST_DOUBLE:
 892     case CONST_FIXED:
 893     case CONST_VECTOR:
 894     case SYMBOL_REF:
 895     case LABEL_REF:
 896     case ADDR_VEC:
 897     case ADDR_DIFF_VEC:
 898       return 1;
 899
 900     default:
 901       break;
 902     }
 903
 904   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 905     {
 906       if (fmt[i] == 'e')
 907         {
 908           /* If we are about to do the last recursive call needed at this
 909              level, change it into iteration.  This function is called enough
 910              to be worth it.  */
 911           if (i == 0)
 912             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 913
 914           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 915             return 0;
 916         }
 917       else if (fmt[i] == 'E')
 918         for (j = 0; j < XVECLEN (x, i); j++)
 919           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 920             return 0;
 921     }
 922
 923   return 1;
 924 }
 925
 926 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
 927
 928 struct mem_conflict_info
 929 {
 930   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
 931      see if a memory store conflicts with this memory load.  */
 932   const_rtx mem;
 933
 934   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
 935      references.  */
 936   bool conflict;
 937 };
 938
 939 /* DEST is the output of an instruction.  If it is a memory reference and
 940    possibly conflicts with the load found in DATA, then communicate this
 941    information back through DATA.  */
 942
 943 static void
 944 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 945                           void *data)
 946 {
 947   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
 948
 949   while (GET_CODE (dest) == SUBREG
 950          || GET_CODE (dest) == ZERO_EXTRACT
 951          || GET_CODE (dest) == STRICT_LOW_PART)
 952     dest = XEXP (dest, 0);
 953
 954   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 955      that function calls are assumed to clobber memory, but are handled
 956      elsewhere.  */
 957   if (! MEM_P (dest))
 958     return;
 959
 960   /* If we are setting a MEM in our list of specially recognized MEMs,
 961      don't mark as killed this time.  */
 962   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
 963     {
 964       if (!find_rtx_in_ldst (dest))
 965         mci->conflict = true;
 966       return;
 967     }
 968
 969   if (true_dependence (dest, GET_MODE (dest), mci->mem))
 970     mci->conflict = true;
 971 }
 972
 973 /* Return nonzero if the expression in X (a memory reference) is killed
 974    in block BB before or after the insn with the LUID in UID_LIMIT.
 975    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 976    before UID_LIMIT.
 977
 978    To check the entire block, set UID_LIMIT to max_uid + 1 and
 979    AVAIL_P to 0.  */
 980
 981 static int
 982 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
 983                         int avail_p)
 984 {
 985   VEC (rtx,heap) *list = modify_mem_list[bb->index];
 986   rtx setter;
 987   unsigned ix;
 988
 989   /* If this is a readonly then we aren't going to be changing it.  */
 990   if (MEM_READONLY_P (x))
 991     return 0;
 992
 993   FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
 994     {
 995       struct mem_conflict_info mci;
 996
 997       /* Ignore entries in the list that do not apply.  */
 998       if ((avail_p
 999            && DF_INSN_LUID (setter) < uid_limit)
1000           || (! avail_p
1001               && DF_INSN_LUID (setter) > uid_limit))
1002         continue;
1003
1004       /* If SETTER is a call everything is clobbered.  Note that calls
1005          to pure functions are never put on the list, so we need not
1006          worry about them.  */
1007       if (CALL_P (setter))
1008         return 1;
1009
1010       /* SETTER must be an INSN of some kind that sets memory.  Call
1011          note_stores to examine each hunk of memory that is modified.  */
1012       mci.mem = x;
1013       mci.conflict = false;
1014       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1015       if (mci.conflict)
1016         return 1;
1017     }
1018   return 0;
1019 }
1020
1021 /* Return nonzero if the operands of expression X are unchanged from
1022    the start of INSN's basic block up to but not including INSN.  */
1023
1024 static int
1025 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1026 {
1027   return oprs_unchanged_p (x, insn, 0);
1028 }
1029
1030 /* Return nonzero if the operands of expression X are unchanged from
1031    INSN to the end of INSN's basic block.  */
1032
1033 static int
1034 oprs_available_p (const_rtx x, const_rtx insn)
1035 {
1036   return oprs_unchanged_p (x, insn, 1);
1037 }
1038
1039 /* Hash expression X.
1040
1041    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1042    indicating if a volatile operand is found or if the expression contains
1043    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1044    the current size of the hash table to be probed.  */
1045
1046 static unsigned int
1047 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1048            int hash_table_size)
1049 {
1050   unsigned int hash;
1051
1052   *do_not_record_p = 0;
1053
1054   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1055   return hash % hash_table_size;
1056 }
1057
1058 /* Return nonzero if exp1 is equivalent to exp2.  */
1059
1060 static int
1061 expr_equiv_p (const_rtx x, const_rtx y)
1062 {
1063   return exp_equiv_p (x, y, 0, true);
1064 }
1065
1066 /* Insert expression X in INSN in the hash TABLE.
1067    If it is already present, record it as the last occurrence in INSN's
1068    basic block.
1069
1070    MODE is the mode of the value X is being stored into.
1071    It is only used if X is a CONST_INT.
1072
1073    ANTIC_P is nonzero if X is an anticipatable expression.
1074    AVAIL_P is nonzero if X is an available expression.
1075
1076    MAX_DISTANCE is the maximum distance in instructions this expression can
1077    be moved.  */
1078
1079 static void
1080 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1081                       int avail_p, int max_distance, struct hash_table_d *table)
1082 {
1083   int found, do_not_record_p;
1084   unsigned int hash;
1085   struct expr *cur_expr, *last_expr = NULL;
1086   struct occr *antic_occr, *avail_occr;
1087
1088   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1089
1090   /* Do not insert expression in table if it contains volatile operands,
1091      or if hash_expr determines the expression is something we don't want
1092      to or can't handle.  */
1093   if (do_not_record_p)
1094     return;
1095
1096   cur_expr = table->table[hash];
1097   found = 0;
1098
1099   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1100     {
1101       /* If the expression isn't found, save a pointer to the end of
1102          the list.  */
1103       last_expr = cur_expr;
1104       cur_expr = cur_expr->next_same_hash;
1105     }
1106
1107   if (! found)
1108     {
1109       cur_expr = GOBNEW (struct expr);
1110       bytes_used += sizeof (struct expr);
1111       if (table->table[hash] == NULL)
1112         /* This is the first pattern that hashed to this index.  */
1113         table->table[hash] = cur_expr;
1114       else
1115         /* Add EXPR to end of this hash chain.  */
1116         last_expr->next_same_hash = cur_expr;
1117
1118       /* Set the fields of the expr element.  */
1119       cur_expr->expr = x;
1120       cur_expr->bitmap_index = table->n_elems++;
1121       cur_expr->next_same_hash = NULL;
1122       cur_expr->antic_occr = NULL;
1123       cur_expr->avail_occr = NULL;
1124       gcc_assert (max_distance >= 0);
1125       cur_expr->max_distance = max_distance;
1126     }
1127   else
1128     gcc_assert (cur_expr->max_distance == max_distance);
1129
1130   /* Now record the occurrence(s).  */
1131   if (antic_p)
1132     {
1133       antic_occr = cur_expr->antic_occr;
1134
1135       if (antic_occr
1136           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1137         antic_occr = NULL;
1138
1139       if (antic_occr)
1140         /* Found another instance of the expression in the same basic block.
1141            Prefer the currently recorded one.  We want the first one in the
1142            block and the block is scanned from start to end.  */
1143         ; /* nothing to do */
1144       else
1145         {
1146           /* First occurrence of this expression in this basic block.  */
1147           antic_occr = GOBNEW (struct occr);
1148           bytes_used += sizeof (struct occr);
1149           antic_occr->insn = insn;
1150           antic_occr->next = cur_expr->antic_occr;
1151           antic_occr->deleted_p = 0;
1152           cur_expr->antic_occr = antic_occr;
1153         }
1154     }
1155
1156   if (avail_p)
1157     {
1158       avail_occr = cur_expr->avail_occr;
1159
1160       if (avail_occr
1161           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1162         {
1163           /* Found another instance of the expression in the same basic block.
1164              Prefer this occurrence to the currently recorded one.  We want
1165              the last one in the block and the block is scanned from start
1166              to end.  */
1167           avail_occr->insn = insn;
1168         }
1169       else
1170         {
1171           /* First occurrence of this expression in this basic block.  */
1172           avail_occr = GOBNEW (struct occr);
1173           bytes_used += sizeof (struct occr);
1174           avail_occr->insn = insn;
1175           avail_occr->next = cur_expr->avail_occr;
1176           avail_occr->deleted_p = 0;
1177           cur_expr->avail_occr = avail_occr;
1178         }
1179     }
1180 }
1181
1182 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1183
1184 static void
1185 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1186 {
1187   rtx src = SET_SRC (set);
1188   rtx dest = SET_DEST (set);
1189   rtx note;
1190
1191   if (GET_CODE (src) == CALL)
1192     hash_scan_call (src, insn, table);
1193
1194   else if (REG_P (dest))
1195     {
1196       unsigned int regno = REGNO (dest);
1197       int max_distance = 0;
1198
1199       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1200
1201          This allows us to do a single GCSE pass and still eliminate
1202          redundant constants, addresses or other expressions that are
1203          constructed with multiple instructions.
1204
1205          However, keep the original SRC if INSN is a simple reg-reg move.
1206          In this case, there will almost always be a REG_EQUAL note on the
1207          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1208          for INSN, we miss copy propagation opportunities and we perform the
1209          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1210          do more than one PRE GCSE pass.
1211
1212          Note that this does not impede profitable constant propagations.  We
1213          "look through" reg-reg sets in lookup_avail_set.  */
1214       note = find_reg_equal_equiv_note (insn);
1215       if (note != 0
1216           && REG_NOTE_KIND (note) == REG_EQUAL
1217           && !REG_P (src)
1218           && want_to_gcse_p (XEXP (note, 0), NULL))
1219         src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1220
1221       /* Only record sets of pseudo-regs in the hash table.  */
1222       if (regno >= FIRST_PSEUDO_REGISTER
1223           /* Don't GCSE something if we can't do a reg/reg copy.  */
1224           && can_copy_p (GET_MODE (dest))
1225           /* GCSE commonly inserts instruction after the insn.  We can't
1226              do that easily for EH edges so disable GCSE on these for now.  */
1227           /* ??? We can now easily create new EH landing pads at the
1228              gimple level, for splitting edges; there's no reason we
1229              can't do the same thing at the rtl level.  */
1230           && !can_throw_internal (insn)
1231           /* Is SET_SRC something we want to gcse?  */
1232           && want_to_gcse_p (src, &max_distance)
1233           /* Don't CSE a nop.  */
1234           && ! set_noop_p (set)
1235           /* Don't GCSE if it has attached REG_EQUIV note.
1236              At this point this only function parameters should have
1237              REG_EQUIV notes and if the argument slot is used somewhere
1238              explicitly, it means address of parameter has been taken,
1239              so we should not extend the lifetime of the pseudo.  */
1240           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1241         {
1242           /* An expression is not anticipatable if its operands are
1243              modified before this insn or if this is not the only SET in
1244              this insn.  The latter condition does not have to mean that
1245              SRC itself is not anticipatable, but we just will not be
1246              able to handle code motion of insns with multiple sets.  */
1247           int antic_p = oprs_anticipatable_p (src, insn)
1248                         && !multiple_sets (insn);
1249           /* An expression is not available if its operands are
1250              subsequently modified, including this insn.  It's also not
1251              available if this is a branch, because we can't insert
1252              a set after the branch.  */
1253           int avail_p = (oprs_available_p (src, insn)
1254                          && ! JUMP_P (insn));
1255
1256           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1257                                 max_distance, table);
1258         }
1259     }
1260   /* In case of store we want to consider the memory value as available in
1261      the REG stored in that memory. This makes it possible to remove
1262      redundant loads from due to stores to the same location.  */
1263   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1264       {
1265         unsigned int regno = REGNO (src);
1266         int max_distance = 0;
1267
1268         /* Only record sets of pseudo-regs in the hash table.  */
1269         if (regno >= FIRST_PSEUDO_REGISTER
1270            /* Don't GCSE something if we can't do a reg/reg copy.  */
1271            && can_copy_p (GET_MODE (src))
1272            /* GCSE commonly inserts instruction after the insn.  We can't
1273               do that easily for EH edges so disable GCSE on these for now.  */
1274            && !can_throw_internal (insn)
1275            /* Is SET_DEST something we want to gcse?  */
1276            && want_to_gcse_p (dest, &max_distance)
1277            /* Don't CSE a nop.  */
1278            && ! set_noop_p (set)
1279            /* Don't GCSE if it has attached REG_EQUIV note.
1280               At this point this only function parameters should have
1281               REG_EQUIV notes and if the argument slot is used somewhere
1282               explicitly, it means address of parameter has been taken,
1283               so we should not extend the lifetime of the pseudo.  */
1284            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1285                || ! MEM_P (XEXP (note, 0))))
1286              {
1287                /* Stores are never anticipatable.  */
1288                int antic_p = 0;
1289                /* An expression is not available if its operands are
1290                   subsequently modified, including this insn.  It's also not
1291                   available if this is a branch, because we can't insert
1292                   a set after the branch.  */
1293                int avail_p = oprs_available_p (dest, insn)
1294                              && ! JUMP_P (insn);
1295
1296                /* Record the memory expression (DEST) in the hash table.  */
1297                insert_expr_in_table (dest, GET_MODE (dest), insn,
1298                                      antic_p, avail_p, max_distance, table);
1299              }
1300       }
1301 }
1302
1303 static void
1304 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1305                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1306 {
1307   /* Currently nothing to do.  */
1308 }
1309
1310 static void
1311 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1312                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1313 {
1314   /* Currently nothing to do.  */
1315 }
1316
1317 /* Process INSN and add hash table entries as appropriate.  */
1318
1319 static void
1320 hash_scan_insn (rtx insn, struct hash_table_d *table)
1321 {
1322   rtx pat = PATTERN (insn);
1323   int i;
1324
1325   /* Pick out the sets of INSN and for other forms of instructions record
1326      what's been modified.  */
1327
1328   if (GET_CODE (pat) == SET)
1329     hash_scan_set (pat, insn, table);
1330
1331   else if (GET_CODE (pat) == CLOBBER)
1332     hash_scan_clobber (pat, insn, table);
1333
1334   else if (GET_CODE (pat) == CALL)
1335     hash_scan_call (pat, insn, table);
1336
1337   else if (GET_CODE (pat) == PARALLEL)
1338     for (i = 0; i < XVECLEN (pat, 0); i++)
1339       {
1340         rtx x = XVECEXP (pat, 0, i);
1341
1342         if (GET_CODE (x) == SET)
1343           hash_scan_set (x, insn, table);
1344         else if (GET_CODE (x) == CLOBBER)
1345           hash_scan_clobber (x, insn, table);
1346         else if (GET_CODE (x) == CALL)
1347           hash_scan_call (x, insn, table);
1348       }
1349 }
1350
1351 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1352
1353 static void
1354 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1355 {
1356   int i;
1357   /* Flattened out table, so it's printed in proper order.  */
1358   struct expr **flat_table;
1359   unsigned int *hash_val;
1360   struct expr *expr;
1361
1362   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1363   hash_val = XNEWVEC (unsigned int, table->n_elems);
1364
1365   for (i = 0; i < (int) table->size; i++)
1366     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1367       {
1368         flat_table[expr->bitmap_index] = expr;
1369         hash_val[expr->bitmap_index] = i;
1370       }
1371
1372   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1373            name, table->size, table->n_elems);
1374
1375   for (i = 0; i < (int) table->n_elems; i++)
1376     if (flat_table[i] != 0)
1377       {
1378         expr = flat_table[i];
1379         fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1380                  expr->bitmap_index, hash_val[i], expr->max_distance);
1381         print_rtl (file, expr->expr);
1382         fprintf (file, "\n");
1383       }
1384
1385   fprintf (file, "\n");
1386
1387   free (flat_table);
1388   free (hash_val);
1389 }
1390
1391 /* Record register first/last/block set information for REGNO in INSN.
1392
1393    first_set records the first place in the block where the register
1394    is set and is used to compute "anticipatability".
1395
1396    last_set records the last place in the block where the register
1397    is set and is used to compute "availability".
1398
1399    last_bb records the block for which first_set and last_set are
1400    valid, as a quick test to invalidate them.  */
1401
1402 static void
1403 record_last_reg_set_info (rtx insn, int regno)
1404 {
1405   struct reg_avail_info *info = &reg_avail_info[regno];
1406   int luid = DF_INSN_LUID (insn);
1407
1408   info->last_set = luid;
1409   if (info->last_bb != current_bb)
1410     {
1411       info->last_bb = current_bb;
1412       info->first_set = luid;
1413     }
1414 }
1415
1416 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1417    Note we store a pair of elements in the list, so they have to be
1418    taken off pairwise.  */
1419
1420 static void
1421 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1422                    void * v_insn)
1423 {
1424   rtx dest_addr, insn;
1425   int bb;
1426   modify_pair *pair;
1427
1428   while (GET_CODE (dest) == SUBREG
1429       || GET_CODE (dest) == ZERO_EXTRACT
1430       || GET_CODE (dest) == STRICT_LOW_PART)
1431     dest = XEXP (dest, 0);
1432
1433   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1434      that function calls are assumed to clobber memory, but are handled
1435      elsewhere.  */
1436
1437   if (! MEM_P (dest))
1438     return;
1439
1440   dest_addr = get_addr (XEXP (dest, 0));
1441   dest_addr = canon_rtx (dest_addr);
1442   insn = (rtx) v_insn;
1443   bb = BLOCK_FOR_INSN (insn)->index;
1444
1445   pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1446   pair->dest = dest;
1447   pair->dest_addr = dest_addr;
1448 }
1449
1450 /* Record memory modification information for INSN.  We do not actually care
1451    about the memory location(s) that are set, or even how they are set (consider
1452    a CALL_INSN).  We merely need to record which insns modify memory.  */
1453
1454 static void
1455 record_last_mem_set_info (rtx insn)
1456 {
1457   int bb = BLOCK_FOR_INSN (insn)->index;
1458
1459   /* load_killed_in_block_p will handle the case of calls clobbering
1460      everything.  */
1461   VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1462   bitmap_set_bit (modify_mem_list_set, bb);
1463
1464   if (CALL_P (insn))
1465     bitmap_set_bit (blocks_with_calls, bb);
1466   else
1467     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1468 }
1469
1470 /* Called from compute_hash_table via note_stores to handle one
1471    SET or CLOBBER in an insn.  DATA is really the instruction in which
1472    the SET is taking place.  */
1473
1474 static void
1475 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1476 {
1477   rtx last_set_insn = (rtx) data;
1478
1479   if (GET_CODE (dest) == SUBREG)
1480     dest = SUBREG_REG (dest);
1481
1482   if (REG_P (dest))
1483     record_last_reg_set_info (last_set_insn, REGNO (dest));
1484   else if (MEM_P (dest)
1485            /* Ignore pushes, they clobber nothing.  */
1486            && ! push_operand (dest, GET_MODE (dest)))
1487     record_last_mem_set_info (last_set_insn);
1488 }
1489
1490 /* Top level function to create an expression hash table.
1491
1492    Expression entries are placed in the hash table if
1493    - they are of the form (set (pseudo-reg) src),
1494    - src is something we want to perform GCSE on,
1495    - none of the operands are subsequently modified in the block
1496
1497    Currently src must be a pseudo-reg or a const_int.
1498
1499    TABLE is the table computed.  */
1500
1501 static void
1502 compute_hash_table_work (struct hash_table_d *table)
1503 {
1504   int i;
1505
1506   /* re-Cache any INSN_LIST nodes we have allocated.  */
1507   clear_modify_mem_tables ();
1508   /* Some working arrays used to track first and last set in each block.  */
1509   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1510
1511   for (i = 0; i < max_reg_num (); ++i)
1512     reg_avail_info[i].last_bb = NULL;
1513
1514   FOR_EACH_BB (current_bb)
1515     {
1516       rtx insn;
1517       unsigned int regno;
1518
1519       /* First pass over the instructions records information used to
1520          determine when registers and memory are first and last set.  */
1521       FOR_BB_INSNS (current_bb, insn)
1522         {
1523           if (!NONDEBUG_INSN_P (insn))
1524             continue;
1525
1526           if (CALL_P (insn))
1527             {
1528               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1529                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1530                   record_last_reg_set_info (insn, regno);
1531
1532               if (! RTL_CONST_OR_PURE_CALL_P (insn))
1533                 record_last_mem_set_info (insn);
1534             }
1535
1536           note_stores (PATTERN (insn), record_last_set_info, insn);
1537         }
1538
1539       /* The next pass builds the hash table.  */
1540       FOR_BB_INSNS (current_bb, insn)
1541         if (NONDEBUG_INSN_P (insn))
1542           hash_scan_insn (insn, table);
1543     }
1544
1545   free (reg_avail_info);
1546   reg_avail_info = NULL;
1547 }
1548
1549 /* Allocate space for the set/expr hash TABLE.
1550    It is used to determine the number of buckets to use.  */
1551
1552 static void
1553 alloc_hash_table (struct hash_table_d *table)
1554 {
1555   int n;
1556
1557   n = get_max_insn_count ();
1558
1559   table->size = n / 4;
1560   if (table->size < 11)
1561     table->size = 11;
1562
1563   /* Attempt to maintain efficient use of hash table.
1564      Making it an odd number is simplest for now.
1565      ??? Later take some measurements.  */
1566   table->size |= 1;
1567   n = table->size * sizeof (struct expr *);
1568   table->table = GNEWVAR (struct expr *, n);
1569 }
1570
1571 /* Free things allocated by alloc_hash_table.  */
1572
1573 static void
1574 free_hash_table (struct hash_table_d *table)
1575 {
1576   free (table->table);
1577 }
1578
1579 /* Compute the expression hash table TABLE.  */
1580
1581 static void
1582 compute_hash_table (struct hash_table_d *table)
1583 {
1584   /* Initialize count of number of entries in hash table.  */
1585   table->n_elems = 0;
1586   memset (table->table, 0, table->size * sizeof (struct expr *));
1587
1588   compute_hash_table_work (table);
1589 }
1590 \f
1591 /* Expression tracking support.  */
1592
1593 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1594 static void
1595 clear_modify_mem_tables (void)
1596 {
1597   unsigned i;
1598   bitmap_iterator bi;
1599
1600   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1601     {
1602       VEC_free (rtx, heap, modify_mem_list[i]);
1603       VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1604     }
1605   bitmap_clear (modify_mem_list_set);
1606   bitmap_clear (blocks_with_calls);
1607 }
1608
1609 /* Release memory used by modify_mem_list_set.  */
1610
1611 static void
1612 free_modify_mem_tables (void)
1613 {
1614   clear_modify_mem_tables ();
1615   free (modify_mem_list);
1616   free (canon_modify_mem_list);
1617   modify_mem_list = 0;
1618   canon_modify_mem_list = 0;
1619 }
1620 \f
1621 /* For each block, compute whether X is transparent.  X is either an
1622    expression or an assignment [though we don't care which, for this context
1623    an assignment is treated as an expression].  For each block where an
1624    element of X is modified, reset the INDX bit in BMAP.  */
1625
1626 static void
1627 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1628 {
1629   int i, j;
1630   enum rtx_code code;
1631   const char *fmt;
1632
1633   /* repeat is used to turn tail-recursion into iteration since GCC
1634      can't do it when there's no return value.  */
1635  repeat:
1636
1637   if (x == 0)
1638     return;
1639
1640   code = GET_CODE (x);
1641   switch (code)
1642     {
1643     case REG:
1644         {
1645           df_ref def;
1646           for (def = DF_REG_DEF_CHAIN (REGNO (x));
1647                def;
1648                def = DF_REF_NEXT_REG (def))
1649             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1650         }
1651
1652       return;
1653
1654     case MEM:
1655       if (! MEM_READONLY_P (x))
1656         {
1657           bitmap_iterator bi;
1658           unsigned bb_index;
1659
1660           /* First handle all the blocks with calls.  We don't need to
1661              do any list walking for them.  */
1662           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1663             {
1664               RESET_BIT (bmap[bb_index], indx);
1665             }
1666
1667             /* Now iterate over the blocks which have memory modifications
1668                but which do not have any calls.  */
1669             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1670                                             blocks_with_calls,
1671                                             0, bb_index, bi)
1672               {
1673                 VEC (modify_pair,heap) *list
1674                   = canon_modify_mem_list[bb_index];
1675                 modify_pair *pair;
1676                 unsigned ix;
1677
1678                 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1679                   {
1680                     rtx dest = pair->dest;
1681                     rtx dest_addr = pair->dest_addr;
1682
1683                     if (canon_true_dependence (dest, GET_MODE (dest),
1684                                                dest_addr, x, NULL_RTX))
1685                       RESET_BIT (bmap[bb_index], indx);
1686                   }
1687               }
1688         }
1689
1690       x = XEXP (x, 0);
1691       goto repeat;
1692
1693     case PC:
1694     case CC0: /*FIXME*/
1695     case CONST:
1696     case CONST_INT:
1697     case CONST_DOUBLE:
1698     case CONST_FIXED:
1699     case CONST_VECTOR:
1700     case SYMBOL_REF:
1701     case LABEL_REF:
1702     case ADDR_VEC:
1703     case ADDR_DIFF_VEC:
1704       return;
1705
1706     default:
1707       break;
1708     }
1709
1710   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1711     {
1712       if (fmt[i] == 'e')
1713         {
1714           /* If we are about to do the last recursive call
1715              needed at this level, change it into iteration.
1716              This function is called enough to be worth it.  */
1717           if (i == 0)
1718             {
1719               x = XEXP (x, i);
1720               goto repeat;
1721             }
1722
1723           compute_transp (XEXP (x, i), indx, bmap);
1724         }
1725       else if (fmt[i] == 'E')
1726         for (j = 0; j < XVECLEN (x, i); j++)
1727           compute_transp (XVECEXP (x, i, j), indx, bmap);
1728     }
1729 }
1730 \f
1731 /* Compute PRE+LCM working variables.  */
1732
1733 /* Local properties of expressions.  */
1734
1735 /* Nonzero for expressions that are transparent in the block.  */
1736 static sbitmap *transp;
1737
1738 /* Nonzero for expressions that are computed (available) in the block.  */
1739 static sbitmap *comp;
1740
1741 /* Nonzero for expressions that are locally anticipatable in the block.  */
1742 static sbitmap *antloc;
1743
1744 /* Nonzero for expressions where this block is an optimal computation
1745    point.  */
1746 static sbitmap *pre_optimal;
1747
1748 /* Nonzero for expressions which are redundant in a particular block.  */
1749 static sbitmap *pre_redundant;
1750
1751 /* Nonzero for expressions which should be inserted on a specific edge.  */
1752 static sbitmap *pre_insert_map;
1753
1754 /* Nonzero for expressions which should be deleted in a specific block.  */
1755 static sbitmap *pre_delete_map;
1756
1757 /* Allocate vars used for PRE analysis.  */
1758
1759 static void
1760 alloc_pre_mem (int n_blocks, int n_exprs)
1761 {
1762   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1763   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1764   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1765
1766   pre_optimal = NULL;
1767   pre_redundant = NULL;
1768   pre_insert_map = NULL;
1769   pre_delete_map = NULL;
1770   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1771
1772   /* pre_insert and pre_delete are allocated later.  */
1773 }
1774
1775 /* Free vars used for PRE analysis.  */
1776
1777 static void
1778 free_pre_mem (void)
1779 {
1780   sbitmap_vector_free (transp);
1781   sbitmap_vector_free (comp);
1782
1783   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1784
1785   if (pre_optimal)
1786     sbitmap_vector_free (pre_optimal);
1787   if (pre_redundant)
1788     sbitmap_vector_free (pre_redundant);
1789   if (pre_insert_map)
1790     sbitmap_vector_free (pre_insert_map);
1791   if (pre_delete_map)
1792     sbitmap_vector_free (pre_delete_map);
1793
1794   transp = comp = NULL;
1795   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1796 }
1797
1798 /* Remove certain expressions from anticipatable and transparent
1799    sets of basic blocks that have incoming abnormal edge.
1800    For PRE remove potentially trapping expressions to avoid placing
1801    them on abnormal edges.  For hoisting remove memory references that
1802    can be clobbered by calls.  */
1803
1804 static void
1805 prune_expressions (bool pre_p)
1806 {
1807   sbitmap prune_exprs;
1808   struct expr *expr;
1809   unsigned int ui;
1810   basic_block bb;
1811
1812   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1813   sbitmap_zero (prune_exprs);
1814   for (ui = 0; ui < expr_hash_table.size; ui++)
1815     {
1816       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1817         {
1818           /* Note potentially trapping expressions.  */
1819           if (may_trap_p (expr->expr))
1820             {
1821               SET_BIT (prune_exprs, expr->bitmap_index);
1822               continue;
1823             }
1824
1825           if (!pre_p && MEM_P (expr->expr))
1826             /* Note memory references that can be clobbered by a call.
1827                We do not split abnormal edges in hoisting, so would
1828                a memory reference get hoisted along an abnormal edge,
1829                it would be placed /before/ the call.  Therefore, only
1830                constant memory references can be hoisted along abnormal
1831                edges.  */
1832             {
1833               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1834                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1835                 continue;
1836
1837               if (MEM_READONLY_P (expr->expr)
1838                   && !MEM_VOLATILE_P (expr->expr)
1839                   && MEM_NOTRAP_P (expr->expr))
1840                 /* Constant memory reference, e.g., a PIC address.  */
1841                 continue;
1842
1843               /* ??? Optimally, we would use interprocedural alias
1844                  analysis to determine if this mem is actually killed
1845                  by this call.  */
1846
1847               SET_BIT (prune_exprs, expr->bitmap_index);
1848             }
1849         }
1850     }
1851
1852   FOR_EACH_BB (bb)
1853     {
1854       edge e;
1855       edge_iterator ei;
1856
1857       /* If the current block is the destination of an abnormal edge, we
1858          kill all trapping (for PRE) and memory (for hoist) expressions
1859          because we won't be able to properly place the instruction on
1860          the edge.  So make them neither anticipatable nor transparent.
1861          This is fairly conservative.
1862
1863          ??? For hoisting it may be necessary to check for set-and-jump
1864          instructions here, not just for abnormal edges.  The general problem
1865          is that when an expression cannot not be placed right at the end of
1866          a basic block we should account for any side-effects of a subsequent
1867          jump instructions that could clobber the expression.  It would
1868          be best to implement this check along the lines of
1869          hoist_expr_reaches_here_p where the target block is already known
1870          and, hence, there's no need to conservatively prune expressions on
1871          "intermediate" set-and-jump instructions.  */
1872       FOR_EACH_EDGE (e, ei, bb->preds)
1873         if ((e->flags & EDGE_ABNORMAL)
1874             && (pre_p || CALL_P (BB_END (e->src))))
1875           {
1876             sbitmap_difference (antloc[bb->index],
1877                                 antloc[bb->index], prune_exprs);
1878             sbitmap_difference (transp[bb->index],
1879                                 transp[bb->index], prune_exprs);
1880             break;
1881           }
1882     }
1883
1884   sbitmap_free (prune_exprs);
1885 }
1886
1887 /* It may be necessary to insert a large number of insns on edges to
1888    make the existing occurrences of expressions fully redundant.  This
1889    routine examines the set of insertions and deletions and if the ratio
1890    of insertions to deletions is too high for a particular expression, then
1891    the expression is removed from the insertion/deletion sets.
1892
1893    N_ELEMS is the number of elements in the hash table.  */
1894
1895 static void
1896 prune_insertions_deletions (int n_elems)
1897 {
1898   sbitmap_iterator sbi;
1899   sbitmap prune_exprs;
1900
1901   /* We always use I to iterate over blocks/edges and J to iterate over
1902      expressions.  */
1903   unsigned int i, j;
1904
1905   /* Counts for the number of times an expression needs to be inserted and
1906      number of times an expression can be removed as a result.  */
1907   int *insertions = GCNEWVEC (int, n_elems);
1908   int *deletions = GCNEWVEC (int, n_elems);
1909
1910   /* Set of expressions which require too many insertions relative to
1911      the number of deletions achieved.  We will prune these out of the
1912      insertion/deletion sets.  */
1913   prune_exprs = sbitmap_alloc (n_elems);
1914   sbitmap_zero (prune_exprs);
1915
1916   /* Iterate over the edges counting the number of times each expression
1917      needs to be inserted.  */
1918   for (i = 0; i < (unsigned) n_edges; i++)
1919     {
1920       EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1921         insertions[j]++;
1922     }
1923
1924   /* Similarly for deletions, but those occur in blocks rather than on
1925      edges.  */
1926   for (i = 0; i < (unsigned) last_basic_block; i++)
1927     {
1928       EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1929         deletions[j]++;
1930     }
1931
1932   /* Now that we have accurate counts, iterate over the elements in the
1933      hash table and see if any need too many insertions relative to the
1934      number of evaluations that can be removed.  If so, mark them in
1935      PRUNE_EXPRS.  */
1936   for (j = 0; j < (unsigned) n_elems; j++)
1937     if (deletions[j]
1938         && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1939       SET_BIT (prune_exprs, j);
1940
1941   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1942   EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1943     {
1944       for (i = 0; i < (unsigned) n_edges; i++)
1945         RESET_BIT (pre_insert_map[i], j);
1946
1947       for (i = 0; i < (unsigned) last_basic_block; i++)
1948         RESET_BIT (pre_delete_map[i], j);
1949     }
1950
1951   sbitmap_free (prune_exprs);
1952   free (insertions);
1953   free (deletions);
1954 }
1955
1956 /* Top level routine to do the dataflow analysis needed by PRE.  */
1957
1958 static struct edge_list *
1959 compute_pre_data (void)
1960 {
1961   struct edge_list *edge_list;
1962   basic_block bb;
1963
1964   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1965   prune_expressions (true);
1966   sbitmap_vector_zero (ae_kill, last_basic_block);
1967
1968   /* Compute ae_kill for each basic block using:
1969
1970      ~(TRANSP | COMP)
1971   */
1972
1973   FOR_EACH_BB (bb)
1974     {
1975       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1976       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1977     }
1978
1979   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1980                             ae_kill, &pre_insert_map, &pre_delete_map);
1981   sbitmap_vector_free (antloc);
1982   antloc = NULL;
1983   sbitmap_vector_free (ae_kill);
1984   ae_kill = NULL;
1985
1986   prune_insertions_deletions (expr_hash_table.n_elems);
1987
1988   return edge_list;
1989 }
1990 \f
1991 /* PRE utilities */
1992
1993 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1994    block BB.
1995
1996    VISITED is a pointer to a working buffer for tracking which BB's have
1997    been visited.  It is NULL for the top-level call.
1998
1999    We treat reaching expressions that go through blocks containing the same
2000    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2001    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2002    2 as not reaching.  The intent is to improve the probability of finding
2003    only one reaching expression and to reduce register lifetimes by picking
2004    the closest such expression.  */
2005
2006 static int
2007 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2008                               basic_block bb, char *visited)
2009 {
2010   edge pred;
2011   edge_iterator ei;
2012
2013   FOR_EACH_EDGE (pred, ei, bb->preds)
2014     {
2015       basic_block pred_bb = pred->src;
2016
2017       if (pred->src == ENTRY_BLOCK_PTR
2018           /* Has predecessor has already been visited?  */
2019           || visited[pred_bb->index])
2020         ;/* Nothing to do.  */
2021
2022       /* Does this predecessor generate this expression?  */
2023       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2024         {
2025           /* Is this the occurrence we're looking for?
2026              Note that there's only one generating occurrence per block
2027              so we just need to check the block number.  */
2028           if (occr_bb == pred_bb)
2029             return 1;
2030
2031           visited[pred_bb->index] = 1;
2032         }
2033       /* Ignore this predecessor if it kills the expression.  */
2034       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2035         visited[pred_bb->index] = 1;
2036
2037       /* Neither gen nor kill.  */
2038       else
2039         {
2040           visited[pred_bb->index] = 1;
2041           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2042             return 1;
2043         }
2044     }
2045
2046   /* All paths have been checked.  */
2047   return 0;
2048 }
2049
2050 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2051    memory allocated for that function is returned.  */
2052
2053 static int
2054 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2055 {
2056   int rval;
2057   char *visited = XCNEWVEC (char, last_basic_block);
2058
2059   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2060
2061   free (visited);
2062   return rval;
2063 }
2064 \f
2065 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2066
2067 static rtx
2068 process_insert_insn (struct expr *expr)
2069 {
2070   rtx reg = expr->reaching_reg;
2071   /* Copy the expression to make sure we don't have any sharing issues.  */
2072   rtx exp = copy_rtx (expr->expr);
2073   rtx pat;
2074
2075   start_sequence ();
2076
2077   /* If the expression is something that's an operand, like a constant,
2078      just copy it to a register.  */
2079   if (general_operand (exp, GET_MODE (reg)))
2080     emit_move_insn (reg, exp);
2081
2082   /* Otherwise, make a new insn to compute this expression and make sure the
2083      insn will be recognized (this also adds any needed CLOBBERs).  */
2084   else
2085     {
2086       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2087
2088       if (insn_invalid_p (insn, false))
2089         gcc_unreachable ();
2090     }
2091
2092   pat = get_insns ();
2093   end_sequence ();
2094
2095   return pat;
2096 }
2097
2098 /* Add EXPR to the end of basic block BB.
2099
2100    This is used by both the PRE and code hoisting.  */
2101
2102 static void
2103 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2104 {
2105   rtx insn = BB_END (bb);
2106   rtx new_insn;
2107   rtx reg = expr->reaching_reg;
2108   int regno = REGNO (reg);
2109   rtx pat, pat_end;
2110
2111   pat = process_insert_insn (expr);
2112   gcc_assert (pat && INSN_P (pat));
2113
2114   pat_end = pat;
2115   while (NEXT_INSN (pat_end) != NULL_RTX)
2116     pat_end = NEXT_INSN (pat_end);
2117
2118   /* If the last insn is a jump, insert EXPR in front [taking care to
2119      handle cc0, etc. properly].  Similarly we need to care trapping
2120      instructions in presence of non-call exceptions.  */
2121
2122   if (JUMP_P (insn)
2123       || (NONJUMP_INSN_P (insn)
2124           && (!single_succ_p (bb)
2125               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2126     {
2127 #ifdef HAVE_cc0
2128       rtx note;
2129 #endif
2130
2131       /* If this is a jump table, then we can't insert stuff here.  Since
2132          we know the previous real insn must be the tablejump, we insert
2133          the new instruction just before the tablejump.  */
2134       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2135           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2136         insn = prev_active_insn (insn);
2137
2138 #ifdef HAVE_cc0
2139       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2140          if cc0 isn't set.  */
2141       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2142       if (note)
2143         insn = XEXP (note, 0);
2144       else
2145         {
2146           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2147           if (maybe_cc0_setter
2148               && INSN_P (maybe_cc0_setter)
2149               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2150             insn = maybe_cc0_setter;
2151         }
2152 #endif
2153       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2154       new_insn = emit_insn_before_noloc (pat, insn, bb);
2155     }
2156
2157   /* Likewise if the last insn is a call, as will happen in the presence
2158      of exception handling.  */
2159   else if (CALL_P (insn)
2160            && (!single_succ_p (bb)
2161                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2162     {
2163       /* Keeping in mind targets with small register classes and parameters
2164          in registers, we search backward and place the instructions before
2165          the first parameter is loaded.  Do this for everyone for consistency
2166          and a presumption that we'll get better code elsewhere as well.  */
2167
2168       /* Since different machines initialize their parameter registers
2169          in different orders, assume nothing.  Collect the set of all
2170          parameter registers.  */
2171       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2172
2173       /* If we found all the parameter loads, then we want to insert
2174          before the first parameter load.
2175
2176          If we did not find all the parameter loads, then we might have
2177          stopped on the head of the block, which could be a CODE_LABEL.
2178          If we inserted before the CODE_LABEL, then we would be putting
2179          the insn in the wrong basic block.  In that case, put the insn
2180          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2181       while (LABEL_P (insn)
2182              || NOTE_INSN_BASIC_BLOCK_P (insn))
2183         insn = NEXT_INSN (insn);
2184
2185       new_insn = emit_insn_before_noloc (pat, insn, bb);
2186     }
2187   else
2188     new_insn = emit_insn_after_noloc (pat, insn, bb);
2189
2190   while (1)
2191     {
2192       if (INSN_P (pat))
2193         add_label_notes (PATTERN (pat), new_insn);
2194       if (pat == pat_end)
2195         break;
2196       pat = NEXT_INSN (pat);
2197     }
2198
2199   gcse_create_count++;
2200
2201   if (dump_file)
2202     {
2203       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2204                bb->index, INSN_UID (new_insn));
2205       fprintf (dump_file, "copying expression %d to reg %d\n",
2206                expr->bitmap_index, regno);
2207     }
2208 }
2209
2210 /* Insert partially redundant expressions on edges in the CFG to make
2211    the expressions fully redundant.  */
2212
2213 static int
2214 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2215 {
2216   int e, i, j, num_edges, set_size, did_insert = 0;
2217   sbitmap *inserted;
2218
2219   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2220      if it reaches any of the deleted expressions.  */
2221
2222   set_size = pre_insert_map[0]->size;
2223   num_edges = NUM_EDGES (edge_list);
2224   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2225   sbitmap_vector_zero (inserted, num_edges);
2226
2227   for (e = 0; e < num_edges; e++)
2228     {
2229       int indx;
2230       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2231
2232       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2233         {
2234           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2235
2236           for (j = indx;
2237                insert && j < (int) expr_hash_table.n_elems;
2238                j++, insert >>= 1)
2239             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2240               {
2241                 struct expr *expr = index_map[j];
2242                 struct occr *occr;
2243
2244                 /* Now look at each deleted occurrence of this expression.  */
2245                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2246                   {
2247                     if (! occr->deleted_p)
2248                       continue;
2249
2250                     /* Insert this expression on this edge if it would
2251                        reach the deleted occurrence in BB.  */
2252                     if (!TEST_BIT (inserted[e], j))
2253                       {
2254                         rtx insn;
2255                         edge eg = INDEX_EDGE (edge_list, e);
2256
2257                         /* We can't insert anything on an abnormal and
2258                            critical edge, so we insert the insn at the end of
2259                            the previous block. There are several alternatives
2260                            detailed in Morgans book P277 (sec 10.5) for
2261                            handling this situation.  This one is easiest for
2262                            now.  */
2263
2264                         if (eg->flags & EDGE_ABNORMAL)
2265                           insert_insn_end_basic_block (index_map[j], bb);
2266                         else
2267                           {
2268                             insn = process_insert_insn (index_map[j]);
2269                             insert_insn_on_edge (insn, eg);
2270                           }
2271
2272                         if (dump_file)
2273                           {
2274                             fprintf (dump_file, "PRE: edge (%d,%d), ",
2275                                      bb->index,
2276                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2277                             fprintf (dump_file, "copy expression %d\n",
2278                                      expr->bitmap_index);
2279                           }
2280
2281                         update_ld_motion_stores (expr);
2282                         SET_BIT (inserted[e], j);
2283                         did_insert = 1;
2284                         gcse_create_count++;
2285                       }
2286                   }
2287               }
2288         }
2289     }
2290
2291   sbitmap_vector_free (inserted);
2292   return did_insert;
2293 }
2294
2295 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2296    Given "old_reg <- expr" (INSN), instead of adding after it
2297      reaching_reg <- old_reg
2298    it's better to do the following:
2299      reaching_reg <- expr
2300      old_reg      <- reaching_reg
2301    because this way copy propagation can discover additional PRE
2302    opportunities.  But if this fails, we try the old way.
2303    When "expr" is a store, i.e.
2304    given "MEM <- old_reg", instead of adding after it
2305      reaching_reg <- old_reg
2306    it's better to add it before as follows:
2307      reaching_reg <- old_reg
2308      MEM          <- reaching_reg.  */
2309
2310 static void
2311 pre_insert_copy_insn (struct expr *expr, rtx insn)
2312 {
2313   rtx reg = expr->reaching_reg;
2314   int regno = REGNO (reg);
2315   int indx = expr->bitmap_index;
2316   rtx pat = PATTERN (insn);
2317   rtx set, first_set, new_insn;
2318   rtx old_reg;
2319   int i;
2320
2321   /* This block matches the logic in hash_scan_insn.  */
2322   switch (GET_CODE (pat))
2323     {
2324     case SET:
2325       set = pat;
2326       break;
2327
2328     case PARALLEL:
2329       /* Search through the parallel looking for the set whose
2330          source was the expression that we're interested in.  */
2331       first_set = NULL_RTX;
2332       set = NULL_RTX;
2333       for (i = 0; i < XVECLEN (pat, 0); i++)
2334         {
2335           rtx x = XVECEXP (pat, 0, i);
2336           if (GET_CODE (x) == SET)
2337             {
2338               /* If the source was a REG_EQUAL or REG_EQUIV note, we
2339                  may not find an equivalent expression, but in this
2340                  case the PARALLEL will have a single set.  */
2341               if (first_set == NULL_RTX)
2342                 first_set = x;
2343               if (expr_equiv_p (SET_SRC (x), expr->expr))
2344                 {
2345                   set = x;
2346                   break;
2347                 }
2348             }
2349         }
2350
2351       gcc_assert (first_set);
2352       if (set == NULL_RTX)
2353         set = first_set;
2354       break;
2355
2356     default:
2357       gcc_unreachable ();
2358     }
2359
2360   if (REG_P (SET_DEST (set)))
2361     {
2362       old_reg = SET_DEST (set);
2363       /* Check if we can modify the set destination in the original insn.  */
2364       if (validate_change (insn, &SET_DEST (set), reg, 0))
2365         {
2366           new_insn = gen_move_insn (old_reg, reg);
2367           new_insn = emit_insn_after (new_insn, insn);
2368         }
2369       else
2370         {
2371           new_insn = gen_move_insn (reg, old_reg);
2372           new_insn = emit_insn_after (new_insn, insn);
2373         }
2374     }
2375   else /* This is possible only in case of a store to memory.  */
2376     {
2377       old_reg = SET_SRC (set);
2378       new_insn = gen_move_insn (reg, old_reg);
2379
2380       /* Check if we can modify the set source in the original insn.  */
2381       if (validate_change (insn, &SET_SRC (set), reg, 0))
2382         new_insn = emit_insn_before (new_insn, insn);
2383       else
2384         new_insn = emit_insn_after (new_insn, insn);
2385     }
2386
2387   gcse_create_count++;
2388
2389   if (dump_file)
2390     fprintf (dump_file,
2391              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2392               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2393               INSN_UID (insn), regno);
2394 }
2395
2396 /* Copy available expressions that reach the redundant expression
2397    to `reaching_reg'.  */
2398
2399 static void
2400 pre_insert_copies (void)
2401 {
2402   unsigned int i, added_copy;
2403   struct expr *expr;
2404   struct occr *occr;
2405   struct occr *avail;
2406
2407   /* For each available expression in the table, copy the result to
2408      `reaching_reg' if the expression reaches a deleted one.
2409
2410      ??? The current algorithm is rather brute force.
2411      Need to do some profiling.  */
2412
2413   for (i = 0; i < expr_hash_table.size; i++)
2414     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2415       {
2416         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2417            we don't want to insert a copy here because the expression may not
2418            really be redundant.  So only insert an insn if the expression was
2419            deleted.  This test also avoids further processing if the
2420            expression wasn't deleted anywhere.  */
2421         if (expr->reaching_reg == NULL)
2422           continue;
2423
2424         /* Set when we add a copy for that expression.  */
2425         added_copy = 0;
2426
2427         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2428           {
2429             if (! occr->deleted_p)
2430               continue;
2431
2432             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2433               {
2434                 rtx insn = avail->insn;
2435
2436                 /* No need to handle this one if handled already.  */
2437                 if (avail->copied_p)
2438                   continue;
2439
2440                 /* Don't handle this one if it's a redundant one.  */
2441                 if (INSN_DELETED_P (insn))
2442                   continue;
2443
2444                 /* Or if the expression doesn't reach the deleted one.  */
2445                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2446                                                expr,
2447                                                BLOCK_FOR_INSN (occr->insn)))
2448                   continue;
2449
2450                 added_copy = 1;
2451
2452                 /* Copy the result of avail to reaching_reg.  */
2453                 pre_insert_copy_insn (expr, insn);
2454                 avail->copied_p = 1;
2455               }
2456           }
2457
2458           if (added_copy)
2459             update_ld_motion_stores (expr);
2460       }
2461 }
2462
2463 /* Emit move from SRC to DEST noting the equivalence with expression computed
2464    in INSN.  */
2465
2466 static rtx
2467 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2468 {
2469   rtx new_rtx;
2470   rtx set = single_set (insn), set2;
2471   rtx note;
2472   rtx eqv;
2473
2474   /* This should never fail since we're creating a reg->reg copy
2475      we've verified to be valid.  */
2476
2477   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2478
2479   /* Note the equivalence for local CSE pass.  */
2480   set2 = single_set (new_rtx);
2481   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2482     return new_rtx;
2483   if ((note = find_reg_equal_equiv_note (insn)))
2484     eqv = XEXP (note, 0);
2485   else
2486     eqv = SET_SRC (set);
2487
2488   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2489
2490   return new_rtx;
2491 }
2492
2493 /* Delete redundant computations.
2494    Deletion is done by changing the insn to copy the `reaching_reg' of
2495    the expression into the result of the SET.  It is left to later passes
2496    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2497
2498    Return nonzero if a change is made.  */
2499
2500 static int
2501 pre_delete (void)
2502 {
2503   unsigned int i;
2504   int changed;
2505   struct expr *expr;
2506   struct occr *occr;
2507
2508   changed = 0;
2509   for (i = 0; i < expr_hash_table.size; i++)
2510     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2511       {
2512         int indx = expr->bitmap_index;
2513
2514         /* We only need to search antic_occr since we require ANTLOC != 0.  */
2515         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2516           {
2517             rtx insn = occr->insn;
2518             rtx set;
2519             basic_block bb = BLOCK_FOR_INSN (insn);
2520
2521             /* We only delete insns that have a single_set.  */
2522             if (TEST_BIT (pre_delete_map[bb->index], indx)
2523                 && (set = single_set (insn)) != 0
2524                 && dbg_cnt (pre_insn))
2525               {
2526                 /* Create a pseudo-reg to store the result of reaching
2527                    expressions into.  Get the mode for the new pseudo from
2528                    the mode of the original destination pseudo.  */
2529                 if (expr->reaching_reg == NULL)
2530                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2531
2532                 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2533                 delete_insn (insn);
2534                 occr->deleted_p = 1;
2535                 changed = 1;
2536                 gcse_subst_count++;
2537
2538                 if (dump_file)
2539                   {
2540                     fprintf (dump_file,
2541                              "PRE: redundant insn %d (expression %d) in ",
2542                                INSN_UID (insn), indx);
2543                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
2544                              bb->index, REGNO (expr->reaching_reg));
2545                   }
2546               }
2547           }
2548       }
2549
2550   return changed;
2551 }
2552
2553 /* Perform GCSE optimizations using PRE.
2554    This is called by one_pre_gcse_pass after all the dataflow analysis
2555    has been done.
2556
2557    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2558    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2559    Compiler Design and Implementation.
2560
2561    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2562    thing about the classical approach is that it would try to use an existing
2563    reg.  If the register can't be adequately optimized [i.e. we introduce
2564    reload problems], one could add a pass here to propagate the new register
2565    through the block.
2566
2567    ??? We don't handle single sets in PARALLELs because we're [currently] not
2568    able to copy the rest of the parallel when we insert copies to create full
2569    redundancies from partial redundancies.  However, there's no reason why we
2570    can't handle PARALLELs in the cases where there are no partial
2571    redundancies.  */
2572
2573 static int
2574 pre_gcse (struct edge_list *edge_list)
2575 {
2576   unsigned int i;
2577   int did_insert, changed;
2578   struct expr **index_map;
2579   struct expr *expr;
2580
2581   /* Compute a mapping from expression number (`bitmap_index') to
2582      hash table entry.  */
2583
2584   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2585   for (i = 0; i < expr_hash_table.size; i++)
2586     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2587       index_map[expr->bitmap_index] = expr;
2588
2589   /* Delete the redundant insns first so that
2590      - we know what register to use for the new insns and for the other
2591        ones with reaching expressions
2592      - we know which insns are redundant when we go to create copies  */
2593
2594   changed = pre_delete ();
2595   did_insert = pre_edge_insert (edge_list, index_map);
2596
2597   /* In other places with reaching expressions, copy the expression to the
2598      specially allocated pseudo-reg that reaches the redundant expr.  */
2599   pre_insert_copies ();
2600   if (did_insert)
2601     {
2602       commit_edge_insertions ();
2603       changed = 1;
2604     }
2605
2606   free (index_map);
2607   return changed;
2608 }
2609
2610 /* Top level routine to perform one PRE GCSE pass.
2611
2612    Return nonzero if a change was made.  */
2613
2614 static int
2615 one_pre_gcse_pass (void)
2616 {
2617   int changed = 0;
2618
2619   gcse_subst_count = 0;
2620   gcse_create_count = 0;
2621
2622   /* Return if there's nothing to do, or it is too expensive.  */
2623   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2624       || is_too_expensive (_("PRE disabled")))
2625     return 0;
2626
2627   /* We need alias.  */
2628   init_alias_analysis ();
2629
2630   bytes_used = 0;
2631   gcc_obstack_init (&gcse_obstack);
2632   alloc_gcse_mem ();
2633
2634   alloc_hash_table (&expr_hash_table);
2635   add_noreturn_fake_exit_edges ();
2636   if (flag_gcse_lm)
2637     compute_ld_motion_mems ();
2638
2639   compute_hash_table (&expr_hash_table);
2640   if (flag_gcse_lm)
2641     trim_ld_motion_mems ();
2642   if (dump_file)
2643     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2644
2645   if (expr_hash_table.n_elems > 0)
2646     {
2647       struct edge_list *edge_list;
2648       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2649       edge_list = compute_pre_data ();
2650       changed |= pre_gcse (edge_list);
2651       free_edge_list (edge_list);
2652       free_pre_mem ();
2653     }
2654
2655   if (flag_gcse_lm)
2656     free_ld_motion_mems ();
2657   remove_fake_exit_edges ();
2658   free_hash_table (&expr_hash_table);
2659
2660   free_gcse_mem ();
2661   obstack_free (&gcse_obstack, NULL);
2662
2663   /* We are finished with alias.  */
2664   end_alias_analysis ();
2665
2666   if (dump_file)
2667     {
2668       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2669                current_function_name (), n_basic_blocks, bytes_used);
2670       fprintf (dump_file, "%d substs, %d insns created\n",
2671                gcse_subst_count, gcse_create_count);
2672     }
2673
2674   return changed;
2675 }
2676 \f
2677 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2678    to INSN.  If such notes are added to an insn which references a
2679    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2680    that note, because the following loop optimization pass requires
2681    them.  */
2682
2683 /* ??? If there was a jump optimization pass after gcse and before loop,
2684    then we would not need to do this here, because jump would add the
2685    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2686
2687 static void
2688 add_label_notes (rtx x, rtx insn)
2689 {
2690   enum rtx_code code = GET_CODE (x);
2691   int i, j;
2692   const char *fmt;
2693
2694   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2695     {
2696       /* This code used to ignore labels that referred to dispatch tables to
2697          avoid flow generating (slightly) worse code.
2698
2699          We no longer ignore such label references (see LABEL_REF handling in
2700          mark_jump_label for additional information).  */
2701
2702       /* There's no reason for current users to emit jump-insns with
2703          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2704          notes.  */
2705       gcc_assert (!JUMP_P (insn));
2706       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2707
2708       if (LABEL_P (XEXP (x, 0)))
2709         LABEL_NUSES (XEXP (x, 0))++;
2710
2711       return;
2712     }
2713
2714   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2715     {
2716       if (fmt[i] == 'e')
2717         add_label_notes (XEXP (x, i), insn);
2718       else if (fmt[i] == 'E')
2719         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2720           add_label_notes (XVECEXP (x, i, j), insn);
2721     }
2722 }
2723
2724 /* Code Hoisting variables and subroutines.  */
2725
2726 /* Very busy expressions.  */
2727 static sbitmap *hoist_vbein;
2728 static sbitmap *hoist_vbeout;
2729
2730 /* ??? We could compute post dominators and run this algorithm in
2731    reverse to perform tail merging, doing so would probably be
2732    more effective than the tail merging code in jump.c.
2733
2734    It's unclear if tail merging could be run in parallel with
2735    code hoisting.  It would be nice.  */
2736
2737 /* Allocate vars used for code hoisting analysis.  */
2738
2739 static void
2740 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2741 {
2742   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2743   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2744   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2745
2746   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2747   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2748 }
2749
2750 /* Free vars used for code hoisting analysis.  */
2751
2752 static void
2753 free_code_hoist_mem (void)
2754 {
2755   sbitmap_vector_free (antloc);
2756   sbitmap_vector_free (transp);
2757   sbitmap_vector_free (comp);
2758
2759   sbitmap_vector_free (hoist_vbein);
2760   sbitmap_vector_free (hoist_vbeout);
2761
2762   free_dominance_info (CDI_DOMINATORS);
2763 }
2764
2765 /* Compute the very busy expressions at entry/exit from each block.
2766
2767    An expression is very busy if all paths from a given point
2768    compute the expression.  */
2769
2770 static void
2771 compute_code_hoist_vbeinout (void)
2772 {
2773   int changed, passes;
2774   basic_block bb;
2775
2776   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2777   sbitmap_vector_zero (hoist_vbein, last_basic_block);
2778
2779   passes = 0;
2780   changed = 1;
2781
2782   while (changed)
2783     {
2784       changed = 0;
2785
2786       /* We scan the blocks in the reverse order to speed up
2787          the convergence.  */
2788       FOR_EACH_BB_REVERSE (bb)
2789         {
2790           if (bb->next_bb != EXIT_BLOCK_PTR)
2791             {
2792               sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2793                                              hoist_vbein, bb);
2794
2795               /* Include expressions in VBEout that are calculated
2796                  in BB and available at its end.  */
2797               sbitmap_a_or_b (hoist_vbeout[bb->index],
2798                               hoist_vbeout[bb->index], comp[bb->index]);
2799             }
2800
2801           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2802                                               antloc[bb->index],
2803                                               hoist_vbeout[bb->index],
2804                                               transp[bb->index]);
2805         }
2806
2807       passes++;
2808     }
2809
2810   if (dump_file)
2811     {
2812       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2813
2814       FOR_EACH_BB (bb)
2815         {
2816           fprintf (dump_file, "vbein (%d): ", bb->index);
2817           dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2818           fprintf (dump_file, "vbeout(%d): ", bb->index);
2819           dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2820         }
2821     }
2822 }
2823
2824 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2825
2826 static void
2827 compute_code_hoist_data (void)
2828 {
2829   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2830   prune_expressions (false);
2831   compute_code_hoist_vbeinout ();
2832   calculate_dominance_info (CDI_DOMINATORS);
2833   if (dump_file)
2834     fprintf (dump_file, "\n");
2835 }
2836
2837 /* Determine if the expression identified by EXPR_INDEX would
2838    reach BB unimpared if it was placed at the end of EXPR_BB.
2839    Stop the search if the expression would need to be moved more
2840    than DISTANCE instructions.
2841
2842    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2843    to me that the expression must either be computed or transparent in
2844    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2845    would allow the expression to be hoisted out of loops, even if
2846    the expression wasn't a loop invariant.
2847
2848    Contrast this to reachability for PRE where an expression is
2849    considered reachable if *any* path reaches instead of *all*
2850    paths.  */
2851
2852 static int
2853 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2854                            char *visited, int distance, int *bb_size)
2855 {
2856   edge pred;
2857   edge_iterator ei;
2858   int visited_allocated_locally = 0;
2859
2860   /* Terminate the search if distance, for which EXPR is allowed to move,
2861      is exhausted.  */
2862   if (distance > 0)
2863     {
2864       distance -= bb_size[bb->index];
2865
2866       if (distance <= 0)
2867         return 0;
2868     }
2869   else
2870     gcc_assert (distance == 0);
2871
2872   if (visited == NULL)
2873     {
2874       visited_allocated_locally = 1;
2875       visited = XCNEWVEC (char, last_basic_block);
2876     }
2877
2878   FOR_EACH_EDGE (pred, ei, bb->preds)
2879     {
2880       basic_block pred_bb = pred->src;
2881
2882       if (pred->src == ENTRY_BLOCK_PTR)
2883         break;
2884       else if (pred_bb == expr_bb)
2885         continue;
2886       else if (visited[pred_bb->index])
2887         continue;
2888
2889       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2890         break;
2891
2892       /* Not killed.  */
2893       else
2894         {
2895           visited[pred_bb->index] = 1;
2896           if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2897                                            visited, distance, bb_size))
2898             break;
2899         }
2900     }
2901   if (visited_allocated_locally)
2902     free (visited);
2903
2904   return (pred == NULL);
2905 }
2906 \f
2907 /* Find occurrence in BB.  */
2908
2909 static struct occr *
2910 find_occr_in_bb (struct occr *occr, basic_block bb)
2911 {
2912   /* Find the right occurrence of this expression.  */
2913   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2914     occr = occr->next;
2915
2916   return occr;
2917 }
2918
2919 /* Actually perform code hoisting.  */
2920
2921 static int
2922 hoist_code (void)
2923 {
2924   basic_block bb, dominated;
2925   VEC (basic_block, heap) *dom_tree_walk;
2926   unsigned int dom_tree_walk_index;
2927   VEC (basic_block, heap) *domby;
2928   unsigned int i,j;
2929   struct expr **index_map;
2930   struct expr *expr;
2931   int *to_bb_head;
2932   int *bb_size;
2933   int changed = 0;
2934
2935   /* Compute a mapping from expression number (`bitmap_index') to
2936      hash table entry.  */
2937
2938   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2939   for (i = 0; i < expr_hash_table.size; i++)
2940     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2941       index_map[expr->bitmap_index] = expr;
2942
2943   /* Calculate sizes of basic blocks and note how far
2944      each instruction is from the start of its block.  We then use this
2945      data to restrict distance an expression can travel.  */
2946
2947   to_bb_head = XCNEWVEC (int, get_max_uid ());
2948   bb_size = XCNEWVEC (int, last_basic_block);
2949
2950   FOR_EACH_BB (bb)
2951     {
2952       rtx insn;
2953       int to_head;
2954
2955       to_head = 0;
2956       FOR_BB_INSNS (bb, insn)
2957         {
2958           /* Don't count debug instructions to avoid them affecting
2959              decision choices.  */
2960           if (NONDEBUG_INSN_P (insn))
2961             to_bb_head[INSN_UID (insn)] = to_head++;
2962         }
2963
2964       bb_size[bb->index] = to_head;
2965     }
2966
2967   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2968               && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2969                   == ENTRY_BLOCK_PTR->next_bb));
2970
2971   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2972                                             ENTRY_BLOCK_PTR->next_bb);
2973
2974   /* Walk over each basic block looking for potentially hoistable
2975      expressions, nothing gets hoisted from the entry block.  */
2976   FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2977     {
2978       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2979
2980       if (VEC_length (basic_block, domby) == 0)
2981         continue;
2982
2983       /* Examine each expression that is very busy at the exit of this
2984          block.  These are the potentially hoistable expressions.  */
2985       for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
2986         {
2987           if (TEST_BIT (hoist_vbeout[bb->index], i))
2988             {
2989               /* Current expression.  */
2990               struct expr *expr = index_map[i];
2991               /* Number of occurrences of EXPR that can be hoisted to BB.  */
2992               int hoistable = 0;
2993               /* Basic blocks that have occurrences reachable from BB.  */
2994               bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2995               /* Occurrences reachable from BB.  */
2996               VEC (occr_t, heap) *occrs_to_hoist = NULL;
2997               /* We want to insert the expression into BB only once, so
2998                  note when we've inserted it.  */
2999               int insn_inserted_p;
3000               occr_t occr;
3001
3002               bitmap_initialize (from_bbs, 0);
3003
3004               /* If an expression is computed in BB and is available at end of
3005                  BB, hoist all occurrences dominated by BB to BB.  */
3006               if (TEST_BIT (comp[bb->index], i))
3007                 {
3008                   occr = find_occr_in_bb (expr->antic_occr, bb);
3009
3010                   if (occr)
3011                     {
3012                       /* An occurrence might've been already deleted
3013                          while processing a dominator of BB.  */
3014                       if (!occr->deleted_p)
3015                         {
3016                           gcc_assert (NONDEBUG_INSN_P (occr->insn));
3017                           hoistable++;
3018                         }
3019                     }
3020                   else
3021                     hoistable++;
3022                 }
3023
3024               /* We've found a potentially hoistable expression, now
3025                  we look at every block BB dominates to see if it
3026                  computes the expression.  */
3027               FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3028                 {
3029                   int max_distance;
3030
3031                   /* Ignore self dominance.  */
3032                   if (bb == dominated)
3033                     continue;
3034                   /* We've found a dominated block, now see if it computes
3035                      the busy expression and whether or not moving that
3036                      expression to the "beginning" of that block is safe.  */
3037                   if (!TEST_BIT (antloc[dominated->index], i))
3038                     continue;
3039
3040                   occr = find_occr_in_bb (expr->antic_occr, dominated);
3041                   gcc_assert (occr);
3042
3043                   /* An occurrence might've been already deleted
3044                      while processing a dominator of BB.  */
3045                   if (occr->deleted_p)
3046                     continue;
3047                   gcc_assert (NONDEBUG_INSN_P (occr->insn));
3048
3049                   max_distance = expr->max_distance;
3050                   if (max_distance > 0)
3051                     /* Adjust MAX_DISTANCE to account for the fact that
3052                        OCCR won't have to travel all of DOMINATED, but
3053                        only part of it.  */
3054                     max_distance += (bb_size[dominated->index]
3055                                      - to_bb_head[INSN_UID (occr->insn)]);
3056
3057                   /* Note if the expression would reach the dominated block
3058                      unimpared if it was placed at the end of BB.
3059
3060                      Keep track of how many times this expression is hoistable
3061                      from a dominated block into BB.  */
3062                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3063                                                  max_distance, bb_size))
3064                     {
3065                       hoistable++;
3066                       VEC_safe_push (occr_t, heap,
3067                                      occrs_to_hoist, occr);
3068                       bitmap_set_bit (from_bbs, dominated->index);
3069                     }
3070                 }
3071
3072               /* If we found more than one hoistable occurrence of this
3073                  expression, then note it in the vector of expressions to
3074                  hoist.  It makes no sense to hoist things which are computed
3075                  in only one BB, and doing so tends to pessimize register
3076                  allocation.  One could increase this value to try harder
3077                  to avoid any possible code expansion due to register
3078                  allocation issues; however experiments have shown that
3079                  the vast majority of hoistable expressions are only movable
3080                  from two successors, so raising this threshold is likely
3081                  to nullify any benefit we get from code hoisting.  */
3082               if (hoistable > 1 && dbg_cnt (hoist_insn))
3083                 {
3084                   /* If (hoistable != VEC_length), then there is
3085                      an occurrence of EXPR in BB itself.  Don't waste
3086                      time looking for LCA in this case.  */
3087                   if ((unsigned) hoistable
3088                       == VEC_length (occr_t, occrs_to_hoist))
3089                     {
3090                       basic_block lca;
3091
3092                       lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3093                                                               from_bbs);
3094                       if (lca != bb)
3095                         /* Punt, it's better to hoist these occurrences to
3096                            LCA.  */
3097                         VEC_free (occr_t, heap, occrs_to_hoist);
3098                     }
3099                 }
3100               else
3101                 /* Punt, no point hoisting a single occurence.  */
3102                 VEC_free (occr_t, heap, occrs_to_hoist);
3103
3104               insn_inserted_p = 0;
3105
3106               /* Walk through occurrences of I'th expressions we want
3107                  to hoist to BB and make the transformations.  */
3108               FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3109                 {
3110                   rtx insn;
3111                   rtx set;
3112
3113                   gcc_assert (!occr->deleted_p);
3114
3115                   insn = occr->insn;
3116                   set = single_set (insn);
3117                   gcc_assert (set);
3118
3119                   /* Create a pseudo-reg to store the result of reaching
3120                      expressions into.  Get the mode for the new pseudo
3121                      from the mode of the original destination pseudo.
3122
3123                      It is important to use new pseudos whenever we
3124                      emit a set.  This will allow reload to use
3125                      rematerialization for such registers.  */
3126                   if (!insn_inserted_p)
3127                     expr->reaching_reg
3128                       = gen_reg_rtx_and_attrs (SET_DEST (set));
3129
3130                   gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3131                                         insn);
3132                   delete_insn (insn);
3133                   occr->deleted_p = 1;
3134                   changed = 1;
3135                   gcse_subst_count++;
3136
3137                   if (!insn_inserted_p)
3138                     {
3139                       insert_insn_end_basic_block (expr, bb);
3140                       insn_inserted_p = 1;
3141                     }
3142                 }
3143
3144               VEC_free (occr_t, heap, occrs_to_hoist);
3145               bitmap_clear (from_bbs);
3146             }
3147         }
3148       VEC_free (basic_block, heap, domby);
3149     }
3150
3151   VEC_free (basic_block, heap, dom_tree_walk);
3152   free (bb_size);
3153   free (to_bb_head);
3154   free (index_map);
3155
3156   return changed;
3157 }
3158
3159 /* Top level routine to perform one code hoisting (aka unification) pass
3160
3161    Return nonzero if a change was made.  */
3162
3163 static int
3164 one_code_hoisting_pass (void)
3165 {
3166   int changed = 0;
3167
3168   gcse_subst_count = 0;
3169   gcse_create_count = 0;
3170
3171   /* Return if there's nothing to do, or it is too expensive.  */
3172   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3173       || is_too_expensive (_("GCSE disabled")))
3174     return 0;
3175
3176   doing_code_hoisting_p = true;
3177
3178   /* We need alias.  */
3179   init_alias_analysis ();
3180
3181   bytes_used = 0;
3182   gcc_obstack_init (&gcse_obstack);
3183   alloc_gcse_mem ();
3184
3185   alloc_hash_table (&expr_hash_table);
3186   compute_hash_table (&expr_hash_table);
3187   if (dump_file)
3188     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3189
3190   if (expr_hash_table.n_elems > 0)
3191     {
3192       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3193       compute_code_hoist_data ();
3194       changed = hoist_code ();
3195       free_code_hoist_mem ();
3196     }
3197
3198   free_hash_table (&expr_hash_table);
3199   free_gcse_mem ();
3200   obstack_free (&gcse_obstack, NULL);
3201
3202   /* We are finished with alias.  */
3203   end_alias_analysis ();
3204
3205   if (dump_file)
3206     {
3207       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3208                current_function_name (), n_basic_blocks, bytes_used);
3209       fprintf (dump_file, "%d substs, %d insns created\n",
3210                gcse_subst_count, gcse_create_count);
3211     }
3212
3213   doing_code_hoisting_p = false;
3214
3215   return changed;
3216 }
3217 \f
3218 /*  Here we provide the things required to do store motion towards the exit.
3219     In order for this to be effective, gcse also needed to be taught how to
3220     move a load when it is killed only by a store to itself.
3221
3222             int i;
3223             float a[10];
3224
3225             void foo(float scale)
3226             {
3227               for (i=0; i<10; i++)
3228                 a[i] *= scale;
3229             }
3230
3231     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3232     the load out since its live around the loop, and stored at the bottom
3233     of the loop.
3234
3235       The 'Load Motion' referred to and implemented in this file is
3236     an enhancement to gcse which when using edge based LCM, recognizes
3237     this situation and allows gcse to move the load out of the loop.
3238
3239       Once gcse has hoisted the load, store motion can then push this
3240     load towards the exit, and we end up with no loads or stores of 'i'
3241     in the loop.  */
3242
3243 static hashval_t
3244 pre_ldst_expr_hash (const void *p)
3245 {
3246   int do_not_record_p = 0;
3247   const struct ls_expr *const x = (const struct ls_expr *) p;
3248   return
3249     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3250 }
3251
3252 static int
3253 pre_ldst_expr_eq (const void *p1, const void *p2)
3254 {
3255   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3256     *const ptr2 = (const struct ls_expr *) p2;
3257   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3258 }
3259
3260 /* This will search the ldst list for a matching expression. If it
3261    doesn't find one, we create one and initialize it.  */
3262
3263 static struct ls_expr *
3264 ldst_entry (rtx x)
3265 {
3266   int do_not_record_p = 0;
3267   struct ls_expr * ptr;
3268   unsigned int hash;
3269   void **slot;
3270   struct ls_expr e;
3271
3272   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3273                    NULL,  /*have_reg_qty=*/false);
3274
3275   e.pattern = x;
3276   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3277   if (*slot)
3278     return (struct ls_expr *)*slot;
3279
3280   ptr = XNEW (struct ls_expr);
3281
3282   ptr->next         = pre_ldst_mems;
3283   ptr->expr         = NULL;
3284   ptr->pattern      = x;
3285   ptr->pattern_regs = NULL_RTX;
3286   ptr->loads        = NULL_RTX;
3287   ptr->stores       = NULL_RTX;
3288   ptr->reaching_reg = NULL_RTX;
3289   ptr->invalid      = 0;
3290   ptr->index        = 0;
3291   ptr->hash_index   = hash;
3292   pre_ldst_mems     = ptr;
3293   *slot = ptr;
3294
3295   return ptr;
3296 }
3297
3298 /* Free up an individual ldst entry.  */
3299
3300 static void
3301 free_ldst_entry (struct ls_expr * ptr)
3302 {
3303   free_INSN_LIST_list (& ptr->loads);
3304   free_INSN_LIST_list (& ptr->stores);
3305
3306   free (ptr);
3307 }
3308
3309 /* Free up all memory associated with the ldst list.  */
3310
3311 static void
3312 free_ld_motion_mems (void)
3313 {
3314   if (pre_ldst_table)
3315     htab_delete (pre_ldst_table);
3316   pre_ldst_table = NULL;
3317
3318   while (pre_ldst_mems)
3319     {
3320       struct ls_expr * tmp = pre_ldst_mems;
3321
3322       pre_ldst_mems = pre_ldst_mems->next;
3323
3324       free_ldst_entry (tmp);
3325     }
3326
3327   pre_ldst_mems = NULL;
3328 }
3329
3330 /* Dump debugging info about the ldst list.  */
3331
3332 static void
3333 print_ldst_list (FILE * file)
3334 {
3335   struct ls_expr * ptr;
3336
3337   fprintf (file, "LDST list: \n");
3338
3339   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3340     {
3341       fprintf (file, "  Pattern (%3d): ", ptr->index);
3342
3343       print_rtl (file, ptr->pattern);
3344
3345       fprintf (file, "\n         Loads : ");
3346
3347       if (ptr->loads)
3348         print_rtl (file, ptr->loads);
3349       else
3350         fprintf (file, "(nil)");
3351
3352       fprintf (file, "\n        Stores : ");
3353
3354       if (ptr->stores)
3355         print_rtl (file, ptr->stores);
3356       else
3357         fprintf (file, "(nil)");
3358
3359       fprintf (file, "\n\n");
3360     }
3361
3362   fprintf (file, "\n");
3363 }
3364
3365 /* Returns 1 if X is in the list of ldst only expressions.  */
3366
3367 static struct ls_expr *
3368 find_rtx_in_ldst (rtx x)
3369 {
3370   struct ls_expr e;
3371   void **slot;
3372   if (!pre_ldst_table)
3373     return NULL;
3374   e.pattern = x;
3375   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3376   if (!slot || ((struct ls_expr *)*slot)->invalid)
3377     return NULL;
3378   return (struct ls_expr *) *slot;
3379 }
3380 \f
3381 /* Load Motion for loads which only kill themselves.  */
3382
3383 /* Return true if x, a MEM, is a simple access with no side effects.
3384    These are the types of loads we consider for the ld_motion list,
3385    otherwise we let the usual aliasing take care of it.  */
3386
3387 static int
3388 simple_mem (const_rtx x)
3389 {
3390   if (MEM_VOLATILE_P (x))
3391     return 0;
3392
3393   if (GET_MODE (x) == BLKmode)
3394     return 0;
3395
3396   /* If we are handling exceptions, we must be careful with memory references
3397      that may trap.  If we are not, the behavior is undefined, so we may just
3398      continue.  */
3399   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3400     return 0;
3401
3402   if (side_effects_p (x))
3403     return 0;
3404
3405   /* Do not consider function arguments passed on stack.  */
3406   if (reg_mentioned_p (stack_pointer_rtx, x))
3407     return 0;
3408
3409   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3410     return 0;
3411
3412   return 1;
3413 }
3414
3415 /* Make sure there isn't a buried reference in this pattern anywhere.
3416    If there is, invalidate the entry for it since we're not capable
3417    of fixing it up just yet.. We have to be sure we know about ALL
3418    loads since the aliasing code will allow all entries in the
3419    ld_motion list to not-alias itself.  If we miss a load, we will get
3420    the wrong value since gcse might common it and we won't know to
3421    fix it up.  */
3422
3423 static void
3424 invalidate_any_buried_refs (rtx x)
3425 {
3426   const char * fmt;
3427   int i, j;
3428   struct ls_expr * ptr;
3429
3430   /* Invalidate it in the list.  */
3431   if (MEM_P (x) && simple_mem (x))
3432     {
3433       ptr = ldst_entry (x);
3434       ptr->invalid = 1;
3435     }
3436
3437   /* Recursively process the insn.  */
3438   fmt = GET_RTX_FORMAT (GET_CODE (x));
3439
3440   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3441     {
3442       if (fmt[i] == 'e')
3443         invalidate_any_buried_refs (XEXP (x, i));
3444       else if (fmt[i] == 'E')
3445         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3446           invalidate_any_buried_refs (XVECEXP (x, i, j));
3447     }
3448 }
3449
3450 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3451    being defined as MEM loads and stores to symbols, with no side effects
3452    and no registers in the expression.  For a MEM destination, we also
3453    check that the insn is still valid if we replace the destination with a
3454    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3455    which don't match this criteria, they are invalidated and trimmed out
3456    later.  */
3457
3458 static void
3459 compute_ld_motion_mems (void)
3460 {
3461   struct ls_expr * ptr;
3462   basic_block bb;
3463   rtx insn;
3464
3465   pre_ldst_mems = NULL;
3466   pre_ldst_table
3467     = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3468
3469   FOR_EACH_BB (bb)
3470     {
3471       FOR_BB_INSNS (bb, insn)
3472         {
3473           if (NONDEBUG_INSN_P (insn))
3474             {
3475               if (GET_CODE (PATTERN (insn)) == SET)
3476                 {
3477                   rtx src = SET_SRC (PATTERN (insn));
3478                   rtx dest = SET_DEST (PATTERN (insn));
3479
3480                   /* Check for a simple LOAD...  */
3481                   if (MEM_P (src) && simple_mem (src))
3482                     {
3483                       ptr = ldst_entry (src);
3484                       if (REG_P (dest))
3485                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3486                       else
3487                         ptr->invalid = 1;
3488                     }
3489                   else
3490                     {
3491                       /* Make sure there isn't a buried load somewhere.  */
3492                       invalidate_any_buried_refs (src);
3493                     }
3494
3495                   /* Check for stores. Don't worry about aliased ones, they
3496                      will block any movement we might do later. We only care
3497                      about this exact pattern since those are the only
3498                      circumstance that we will ignore the aliasing info.  */
3499                   if (MEM_P (dest) && simple_mem (dest))
3500                     {
3501                       ptr = ldst_entry (dest);
3502
3503                       if (! MEM_P (src)
3504                           && GET_CODE (src) != ASM_OPERANDS
3505                           /* Check for REG manually since want_to_gcse_p
3506                              returns 0 for all REGs.  */
3507                           && can_assign_to_reg_without_clobbers_p (src))
3508                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3509                       else
3510                         ptr->invalid = 1;
3511                     }
3512                 }
3513               else
3514                 invalidate_any_buried_refs (PATTERN (insn));
3515             }
3516         }
3517     }
3518 }
3519
3520 /* Remove any references that have been either invalidated or are not in the
3521    expression list for pre gcse.  */
3522
3523 static void
3524 trim_ld_motion_mems (void)
3525 {
3526   struct ls_expr * * last = & pre_ldst_mems;
3527   struct ls_expr * ptr = pre_ldst_mems;
3528
3529   while (ptr != NULL)
3530     {
3531       struct expr * expr;
3532
3533       /* Delete if entry has been made invalid.  */
3534       if (! ptr->invalid)
3535         {
3536           /* Delete if we cannot find this mem in the expression list.  */
3537           unsigned int hash = ptr->hash_index % expr_hash_table.size;
3538
3539           for (expr = expr_hash_table.table[hash];
3540                expr != NULL;
3541                expr = expr->next_same_hash)
3542             if (expr_equiv_p (expr->expr, ptr->pattern))
3543               break;
3544         }
3545       else
3546         expr = (struct expr *) 0;
3547
3548       if (expr)
3549         {
3550           /* Set the expression field if we are keeping it.  */
3551           ptr->expr = expr;
3552           last = & ptr->next;
3553           ptr = ptr->next;
3554         }
3555       else
3556         {
3557           *last = ptr->next;
3558           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3559           free_ldst_entry (ptr);
3560           ptr = * last;
3561         }
3562     }
3563
3564   /* Show the world what we've found.  */
3565   if (dump_file && pre_ldst_mems != NULL)
3566     print_ldst_list (dump_file);
3567 }
3568
3569 /* This routine will take an expression which we are replacing with
3570    a reaching register, and update any stores that are needed if
3571    that expression is in the ld_motion list.  Stores are updated by
3572    copying their SRC to the reaching register, and then storing
3573    the reaching register into the store location. These keeps the
3574    correct value in the reaching register for the loads.  */
3575
3576 static void
3577 update_ld_motion_stores (struct expr * expr)
3578 {
3579   struct ls_expr * mem_ptr;
3580
3581   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3582     {
3583       /* We can try to find just the REACHED stores, but is shouldn't
3584          matter to set the reaching reg everywhere...  some might be
3585          dead and should be eliminated later.  */
3586
3587       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3588          where reg is the reaching reg used in the load.  We checked in
3589          compute_ld_motion_mems that we can replace (set mem expr) with
3590          (set reg expr) in that insn.  */
3591       rtx list = mem_ptr->stores;
3592
3593       for ( ; list != NULL_RTX; list = XEXP (list, 1))
3594         {
3595           rtx insn = XEXP (list, 0);
3596           rtx pat = PATTERN (insn);
3597           rtx src = SET_SRC (pat);
3598           rtx reg = expr->reaching_reg;
3599           rtx copy;
3600
3601           /* If we've already copied it, continue.  */
3602           if (expr->reaching_reg == src)
3603             continue;
3604
3605           if (dump_file)
3606             {
3607               fprintf (dump_file, "PRE:  store updated with reaching reg ");
3608               print_rtl (dump_file, reg);
3609               fprintf (dump_file, ":\n  ");
3610               print_inline_rtx (dump_file, insn, 8);
3611               fprintf (dump_file, "\n");
3612             }
3613
3614           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3615           emit_insn_before (copy, insn);
3616           SET_SRC (pat) = reg;
3617           df_insn_rescan (insn);
3618
3619           /* un-recognize this pattern since it's probably different now.  */
3620           INSN_CODE (insn) = -1;
3621           gcse_create_count++;
3622         }
3623     }
3624 }
3625 \f
3626 /* Return true if the graph is too expensive to optimize. PASS is the
3627    optimization about to be performed.  */
3628
3629 static bool
3630 is_too_expensive (const char *pass)
3631 {
3632   /* Trying to perform global optimizations on flow graphs which have
3633      a high connectivity will take a long time and is unlikely to be
3634      particularly useful.
3635
3636      In normal circumstances a cfg should have about twice as many
3637      edges as blocks.  But we do not want to punish small functions
3638      which have a couple switch statements.  Rather than simply
3639      threshold the number of blocks, uses something with a more
3640      graceful degradation.  */
3641   if (n_edges > 20000 + n_basic_blocks * 4)
3642     {
3643       warning (OPT_Wdisabled_optimization,
3644                "%s: %d basic blocks and %d edges/basic block",
3645                pass, n_basic_blocks, n_edges / n_basic_blocks);
3646
3647       return true;
3648     }
3649
3650   /* If allocating memory for the dataflow bitmaps would take up too much
3651      storage it's better just to disable the optimization.  */
3652   if ((n_basic_blocks
3653        * SBITMAP_SET_SIZE (max_reg_num ())
3654        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3655     {
3656       warning (OPT_Wdisabled_optimization,
3657                "%s: %d basic blocks and %d registers",
3658                pass, n_basic_blocks, max_reg_num ());
3659
3660       return true;
3661     }
3662
3663   return false;
3664 }
3665 \f
3666 /* All the passes implemented in this file.  Each pass has its
3667    own gate and execute function, and at the end of the file a
3668    pass definition for passes.c.
3669
3670    We do not construct an accurate cfg in functions which call
3671    setjmp, so none of these passes runs if the function calls
3672    setjmp.
3673    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3674
3675 static bool
3676 gate_rtl_pre (void)
3677 {
3678   return optimize > 0 && flag_gcse
3679     && !cfun->calls_setjmp
3680     && optimize_function_for_speed_p (cfun)
3681     && dbg_cnt (pre);
3682 }
3683
3684 static unsigned int
3685 execute_rtl_pre (void)
3686 {
3687   int changed;
3688   delete_unreachable_blocks ();
3689   df_analyze ();
3690   changed = one_pre_gcse_pass ();
3691   flag_rerun_cse_after_global_opts |= changed;
3692   if (changed)
3693     cleanup_cfg (0);
3694   return 0;
3695 }
3696
3697 static bool
3698 gate_rtl_hoist (void)
3699 {
3700   return optimize > 0 && flag_gcse
3701     && !cfun->calls_setjmp
3702     /* It does not make sense to run code hoisting unless we are optimizing
3703        for code size -- it rarely makes programs faster, and can make then
3704        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3705     && optimize_function_for_size_p (cfun)
3706     && dbg_cnt (hoist);
3707 }
3708
3709 static unsigned int
3710 execute_rtl_hoist (void)
3711 {
3712   int changed;
3713   delete_unreachable_blocks ();
3714   df_analyze ();
3715   changed = one_code_hoisting_pass ();
3716   flag_rerun_cse_after_global_opts |= changed;
3717   if (changed)
3718     cleanup_cfg (0);
3719   return 0;
3720 }
3721
3722 struct rtl_opt_pass pass_rtl_pre =
3723 {
3724  {
3725   RTL_PASS,
3726   "rtl pre",                            /* name */
3727   gate_rtl_pre,                         /* gate */
3728   execute_rtl_pre,                      /* execute */
3729   NULL,                                 /* sub */
3730   NULL,                                 /* next */
3731   0,                                    /* static_pass_number */
3732   TV_PRE,                               /* tv_id */
3733   PROP_cfglayout,                       /* properties_required */
3734   0,                                    /* properties_provided */
3735   0,                                    /* properties_destroyed */
3736   0,                                    /* todo_flags_start */
3737   TODO_df_finish | TODO_verify_rtl_sharing |
3738   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3739  }
3740 };
3741
3742 struct rtl_opt_pass pass_rtl_hoist =
3743 {
3744  {
3745   RTL_PASS,
3746   "hoist",                              /* name */
3747   gate_rtl_hoist,                       /* gate */
3748   execute_rtl_hoist,                    /* execute */
3749   NULL,                                 /* sub */
3750   NULL,                                 /* next */
3751   0,                                    /* static_pass_number */
3752   TV_HOIST,                             /* tv_id */
3753   PROP_cfglayout,                       /* properties_required */
3754   0,                                    /* properties_provided */
3755   0,                                    /* properties_destroyed */
3756   0,                                    /* todo_flags_start */
3757   TODO_df_finish | TODO_verify_rtl_sharing |
3758   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3759  }
3760 };
3761
3762 #include "gt-gcse.h"