gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {
 311   rtx reg_rtx;
 312 };
 313
 314 /* Hash table of expressions.  */
 315
 316 struct expr
 317 {
 318   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 319   rtx expr;
 320   /* Index in the available expression bitmaps.  */
 321   int bitmap_index;
 322   /* Next entry with the same hash.  */
 323   struct expr *next_same_hash;
 324   /* List of anticipatable occurrences in basic blocks in the function.
 325      An "anticipatable occurrence" is one that is the first occurrence in the
 326      basic block, the operands are not modified in the basic block prior
 327      to the occurrence and the output is not used between the start of
 328      the block and the occurrence.  */
 329   struct occr *antic_occr;
 330   /* List of available occurrence in basic blocks in the function.
 331      An "available occurrence" is one that is the last occurrence in the
 332      basic block and the operands are not modified by following statements in
 333      the basic block [including this insn].  */
 334   struct occr *avail_occr;
 335   /* Non-null if the computation is PRE redundant.
 336      The value is the newly created pseudo-reg to record a copy of the
 337      expression in all the places that reach the redundant copy.  */
 338   rtx reaching_reg;
 339 };
 340
 341 /* Occurrence of an expression.
 342    There is one per basic block.  If a pattern appears more than once the
 343    last appearance is used [or first for anticipatable expressions].  */
 344
 345 struct occr
 346 {
 347   /* Next occurrence of this expression.  */
 348   struct occr *next;
 349   /* The insn that computes the expression.  */
 350   rtx insn;
 351   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 352   char deleted_p;
 353   /* Non-zero if this [available] occurrence has been copied to
 354      reaching_reg.  */
 355   /* ??? This is mutually exclusive with deleted_p, so they could share
 356      the same byte.  */
 357   char copied_p;
 358 };
 359
 360 /* Expression and copy propagation hash tables.
 361    Each hash table is an array of buckets.
 362    ??? It is known that if it were an array of entries, structure elements
 363    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 364    not clear whether in the final analysis a sufficient amount of memory would
 365    be saved as the size of the available expression bitmaps would be larger
 366    [one could build a mapping table without holes afterwards though].
 367    Someday I'll perform the computation and figure it out.
 368 */
 369
 370 /* Total size of the expression hash table, in elements.  */
 371 static int expr_hash_table_size;
 372 /* The table itself.
 373    This is an array of `expr_hash_table_size' elements.  */
 374 static struct expr **expr_hash_table;
 375
 376 /* Total size of the copy propagation hash table, in elements.  */
 377 static int set_hash_table_size;
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 391
 392 /* Number of cuids.  */
 393 static int max_cuid;
 394
 395 /* Mapping of cuids to insns.  */
 396 static rtx *cuid_insn;
 397
 398 /* Get insn from cuid.  */
 399 #define CUID_INSN(CUID) (cuid_insn[CUID])
 400
 401 /* Maximum register number in function prior to doing gcse + 1.
 402    Registers created during this pass have regno >= max_gcse_regno.
 403    This is named with "gcse" to not collide with global of same name.  */
 404 static int max_gcse_regno;
 405
 406 /* Maximum number of cse-able expressions found.  */
 407 static int n_exprs;
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps
 420    (num-bbs x num-regs)
 421    [however perhaps it may be useful to keep the data as is].
 422    One advantage of recording things this way is that `reg_set_table' is
 423    fairly sparse with respect to pseudo regs but for hard regs could be
 424    fairly dense [relatively speaking].
 425    And recording sets of pseudo-regs in lists speeds
 426    up functions like compute_transp since in the case of pseudo-regs we only
 427    need to iterate over the number of times a pseudo-reg is set, not over the
 428    number of basic blocks [clearly there is a bit of a slow down in the cases
 429    where a pseudo is set more than once in a block, however it is believed
 430    that the net effect is to speed things up].  This isn't done for hard-regs
 431    because recording call-clobbered hard-regs in `reg_set_table' at each
 432    function call can consume a fair bit of memory, and iterating over hard-regs
 433    stored this way in compute_transp will be more expensive.  */
 434
 435 typedef struct reg_set {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441 static reg_set **reg_set_table;
 442 /* Size of `reg_set_table'.
 443    The table starts out at max_gcse_regno + slop, and is enlarged as
 444    necessary.  */
 445 static int reg_set_table_size;
 446 /* Amount to grow `reg_set_table' by when it's full.  */
 447 #define REG_SET_TABLE_SLOP 100
 448
 449 /* Bitmap containing one bit for each register in the program.
 450    Used when performing GCSE to track which registers have been set since
 451    the start of the basic block.  */
 452 static sbitmap reg_set_bitmap;
 453
 454 /* For each block, a bitmap of registers set in the block.
 455    This is used by expr_killed_p and compute_transp.
 456    It is computed during hash table computation and not by compute_sets
 457    as it includes registers added since the last pass (or between cprop and
 458    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 459 static sbitmap *reg_set_in_block;
 460
 461 /* For each block, non-zero if memory is set in that block.
 462    This is computed during hash table computation and is used by
 463    expr_killed_p and compute_transp.
 464    ??? Handling of memory is very simple, we don't make any attempt
 465    to optimize things (later).
 466    ??? This can be computed by compute_sets since the information
 467    doesn't change.  */
 468 static char *mem_set_in_block;
 469
 470 /* Various variables for statistics gathering.  */
 471
 472 /* Memory used in a pass.
 473    This isn't intended to be absolutely precise.  Its intent is only
 474    to keep an eye on memory usage.  */
 475 static int bytes_used;
 476 /* GCSE substitutions made.  */
 477 static int gcse_subst_count;
 478 /* Number of copy instructions created.  */
 479 static int gcse_create_count;
 480 /* Number of constants propagated.  */
 481 static int const_prop_count;
 482 /* Number of copys propagated.  */
 483 static int copy_prop_count;
 484 \f
 485 /* These variables are used by classic GCSE.
 486    Normally they'd be defined a bit later, but `rd_gen' needs to
 487    be declared sooner.  */
 488
 489 /* A bitmap of all ones for implementing the algorithm for available
 490    expressions and reaching definitions.  */
 491 /* ??? Available expression bitmaps have a different size than reaching
 492    definition bitmaps.  This should be the larger of the two, however, it
 493    is not currently used for reaching definitions.  */
 494 static sbitmap u_bitmap;
 495
 496 /* Each block has a bitmap of each type.
 497    The length of each blocks bitmap is:
 498
 499        max_cuid  - for reaching definitions
 500        n_exprs - for available expressions
 501
 502    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 503    rd_kill[block_num][cuid_num]
 504    ae_kill[block_num][expr_num]
 505 */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info {
 516   /* The basic block being processed.  */
 517   int current_block;
 518   /* The first register to be handled in this pass.  */
 519   int min_reg;
 520   /* One greater than the last register to be handled in this pass.  */
 521   int max_reg;
 522   sbitmap *nonnull_local;
 523   sbitmap *nonnull_killed;
 524 };
 525 \f
 526 static void compute_can_copy      PROTO ((void));
 527
 528 static char *gmalloc              PROTO ((unsigned int));
 529 static char *grealloc            PROTO ((char *, unsigned int));
 530 static char *gcse_alloc        PROTO ((unsigned long));
 531 static void alloc_gcse_mem          PROTO ((rtx));
 532 static void free_gcse_mem            PROTO ((void));
 533 static void alloc_reg_set_mem    PROTO ((int));
 534 static void free_reg_set_mem      PROTO ((void));
 535 static int get_bitmap_width           PROTO ((int, int, int));
 536 static void record_one_set          PROTO ((int, rtx));
 537 static void record_set_info        PROTO ((rtx, rtx, void *));
 538 static void compute_sets              PROTO ((rtx));
 539
 540 static void hash_scan_insn          PROTO ((rtx, int, int));
 541 static void hash_scan_set            PROTO ((rtx, rtx, int));
 542 static void hash_scan_clobber    PROTO ((rtx, rtx));
 543 static void hash_scan_call          PROTO ((rtx, rtx));
 544 static int want_to_gcse_p            PROTO ((rtx));
 545 static int oprs_unchanged_p        PROTO ((rtx, rtx, int));
 546 static int oprs_anticipatable_p       PROTO ((rtx, rtx));
 547 static int oprs_available_p        PROTO ((rtx, rtx));
 548 static void insert_expr_in_table      PROTO ((rtx, enum machine_mode,
 549                                               rtx, int, int));
 550 static void insert_set_in_table       PROTO ((rtx, rtx));
 551 static unsigned int hash_expr    PROTO ((rtx, enum machine_mode,
 552                                          int *, int));
 553 static unsigned int hash_expr_1       PROTO ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set      PROTO ((int, int));
 555 static int expr_equiv_p        PROTO ((rtx, rtx));
 556 static void record_last_reg_set_info  PROTO ((rtx, int));
 557 static void record_last_mem_set_info  PROTO ((rtx));
 558 static void record_last_set_info      PROTO ((rtx, rtx, void *));
 559 static void compute_hash_table  PROTO ((int));
 560 static void alloc_set_hash_table      PROTO ((int));
 561 static void free_set_hash_table       PROTO ((void));
 562 static void compute_set_hash_table    PROTO ((void));
 563 static void alloc_expr_hash_table     PROTO ((int));
 564 static void free_expr_hash_table      PROTO ((void));
 565 static void compute_expr_hash_table   PROTO ((void));
 566 static void dump_hash_table        PROTO ((FILE *, const char *, struct expr **,
 567                                            int, int));
 568 static struct expr *lookup_expr       PROTO ((rtx));
 569 static struct expr *lookup_set  PROTO ((int, rtx));
 570 static struct expr *next_set      PROTO ((int, struct expr *));
 571 static void reset_opr_set_tables      PROTO ((void));
 572 static int oprs_not_set_p            PROTO ((rtx, rtx));
 573 static void mark_call            PROTO ((rtx));
 574 static void mark_set              PROTO ((rtx, rtx));
 575 static void mark_clobber              PROTO ((rtx, rtx));
 576 static void mark_oprs_set            PROTO ((rtx));
 577
 578 static void alloc_cprop_mem        PROTO ((int, int));
 579 static void free_cprop_mem          PROTO ((void));
 580 static void compute_transp          PROTO ((rtx, int, sbitmap *, int));
 581 static void compute_transpout       PROTO ((void));
 582 static void compute_local_properties  PROTO ((sbitmap *, sbitmap *,
 583                                               sbitmap *, int));
 584 static void compute_cprop_avinout     PROTO ((void));
 585 static void compute_cprop_data  PROTO ((void));
 586 static void find_used_regs          PROTO ((rtx));
 587 static int try_replace_reg          PROTO ((rtx, rtx, rtx));
 588 static struct expr *find_avail_set    PROTO ((int, rtx));
 589 static int cprop_jump                   PROTO((rtx, rtx, struct reg_use *, rtx));
 590 #ifdef HAVE_cc0
 591 static int cprop_cc0_jump               PROTO((rtx, struct reg_use *, rtx));
 592 #endif
 593 static int cprop_insn            PROTO ((rtx, int));
 594 static int cprop                      PROTO ((int));
 595 static int one_cprop_pass            PROTO ((int, int));
 596
 597 static void alloc_pre_mem            PROTO ((int, int));
 598 static void free_pre_mem              PROTO ((void));
 599 static void compute_pre_data      PROTO ((void));
 600 static int pre_expr_reaches_here_p    PROTO ((int, struct expr *,
 601                                               int, int));
 602 static void insert_insn_end_bb  PROTO ((struct expr *, int, int));
 603 static void pre_insert_copy_insn      PROTO ((struct expr *, rtx));
 604 static void pre_insert_copies    PROTO ((void));
 605 static int pre_delete            PROTO ((void));
 606 static int pre_gcse                PROTO ((void));
 607 static int one_pre_gcse_pass      PROTO ((int));
 608
 609 static void add_label_notes           PROTO ((rtx, rtx));
 610
 611 static void alloc_code_hoist_mem        PROTO ((int, int));
 612 static void free_code_hoist_mem         PROTO ((void));
 613 static void compute_code_hoist_vbeinout PROTO ((void));
 614 static void compute_code_hoist_data     PROTO ((void));
 615 static int hoist_expr_reaches_here_p    PROTO ((int, int, int, char *));
 616 static void hoist_code                  PROTO ((void));
 617 static int one_code_hoisting_pass       PROTO ((void));
 618
 619 static void alloc_rd_mem              PROTO ((int, int));
 620 static void free_rd_mem        PROTO ((void));
 621 static void handle_rd_kill_set  PROTO ((rtx, int, int));
 622 static void compute_kill_rd        PROTO ((void));
 623 static void compute_rd          PROTO ((void));
 624 static void alloc_avail_expr_mem      PROTO ((int, int));
 625 static void free_avail_expr_mem       PROTO ((void));
 626 static void compute_ae_gen          PROTO ((void));
 627 static int expr_killed_p              PROTO ((rtx, int));
 628 static void compute_ae_kill        PROTO ((sbitmap *, sbitmap *));
 629 static int expr_reaches_here_p  PROTO ((struct occr *, struct expr *,
 630                                               int, int));
 631 static rtx computing_insn            PROTO ((struct expr *, rtx));
 632 static int def_reaches_here_p    PROTO ((rtx, rtx));
 633 static int can_disregard_other_sets   PROTO ((struct reg_set **, rtx, int));
 634 static int handle_avail_expr      PROTO ((rtx, struct expr *));
 635 static int classic_gcse        PROTO ((void));
 636 static int one_classic_gcse_pass      PROTO ((int));
 637 static void invalidate_nonnull_info     PROTO ((rtx, rtx, void *));
 638 static void delete_null_pointer_checks_1 PROTO ((int_list_ptr *, int *,
 639                                                  sbitmap *, sbitmap *,
 640                                                  struct null_pointer_info *));
 641 static rtx process_insert_insn  PROTO ((struct expr *));
 642 static int pre_edge_insert      PROTO ((struct edge_list *, struct expr **));
 643 static int expr_reaches_here_p_work     PROTO ((struct occr *, struct expr *, int, int, char *));
 644 static int pre_expr_reaches_here_p_work PROTO ((int, struct expr *, int, int, char *));
 645 \f
 646 /* Entry point for global common subexpression elimination.
 647    F is the first instruction in the function.  */
 648
 649 int
 650 gcse_main (f, file)
 651      rtx f;
 652      FILE *file;
 653 {
 654   int changed, pass;
 655   /* Bytes used at start of pass.  */
 656   int initial_bytes_used;
 657   /* Maximum number of bytes used by a pass.  */
 658   int max_pass_bytes;
 659   /* Point to release obstack data from for each pass.  */
 660   char *gcse_obstack_bottom;
 661
 662   /* We do not construct an accurate cfg in functions which call
 663      setjmp, so just punt to be safe.  */
 664   if (current_function_calls_setjmp)
 665     return 0;
 666
 667   /* Assume that we do not need to run jump optimizations after gcse.  */
 668   run_jump_opt_after_gcse = 0;
 669
 670   /* For calling dump_foo fns from gdb.  */
 671   debug_stderr = stderr;
 672   gcse_file = file;
 673
 674   /* Identify the basic block information for this function, including
 675      successors and predecessors.  */
 676   max_gcse_regno = max_reg_num ();
 677   find_basic_blocks (f, max_gcse_regno, file, 1);
 678
 679   if (file)
 680     dump_flow_info (file);
 681
 682   /* Return if there's nothing to do.  */
 683   if (n_basic_blocks <= 1)
 684     {
 685       /* Free storage allocated by find_basic_blocks.  */
 686       free_basic_block_vars (0);
 687       return 0;
 688     }
 689
 690   /* Trying to perform global optimizations on flow graphs which have
 691      a high connectivity will take a long time and is unlikely to be
 692      particularly useful.
 693
 694      In normal circumstances a cfg should have about twice has many edges
 695      as blocks.  But we do not want to punish small functions which have
 696      a couple switch statements.  So we require a relatively large number
 697      of basic blocks and the ratio of edges to blocks to be high.  */
 698   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 699     {
 700       /* Free storage allocated by find_basic_blocks.  */
 701       free_basic_block_vars (0);
 702       return 0;
 703     }
 704
 705   /* See what modes support reg/reg copy operations.  */
 706   if (! can_copy_init_p)
 707     {
 708       compute_can_copy ();
 709       can_copy_init_p = 1;
 710     }
 711
 712   gcc_obstack_init (&gcse_obstack);
 713   bytes_used = 0;
 714
 715   /* Record where pseudo-registers are set.
 716      This data is kept accurate during each pass.
 717      ??? We could also record hard-reg information here
 718      [since it's unchanging], however it is currently done during
 719      hash table computation.
 720
 721      It may be tempting to compute MEM set information here too, but MEM
 722      sets will be subject to code motion one day and thus we need to compute
 723      information about memory sets when we build the hash tables.  */
 724
 725   alloc_reg_set_mem (max_gcse_regno);
 726   compute_sets (f);
 727
 728   pass = 0;
 729   initial_bytes_used = bytes_used;
 730   max_pass_bytes = 0;
 731   gcse_obstack_bottom = gcse_alloc (1);
 732   changed = 1;
 733   while (changed && pass < MAX_PASSES)
 734     {
 735       changed = 0;
 736       if (file)
 737         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 738
 739       /* Initialize bytes_used to the space for the pred/succ lists,
 740          and the reg_set_table data.  */
 741       bytes_used = initial_bytes_used;
 742
 743       /* Each pass may create new registers, so recalculate each time.  */
 744       max_gcse_regno = max_reg_num ();
 745
 746       alloc_gcse_mem (f);
 747
 748       /* Don't allow constant propagation to modify jumps
 749          during this pass.  */
 750       changed = one_cprop_pass (pass + 1, 0);
 751
 752       if (optimize_size)
 753         changed |= one_classic_gcse_pass (pass + 1);
 754       else
 755         {
 756           changed |= one_pre_gcse_pass (pass + 1);
 757           free_reg_set_mem ();
 758           alloc_reg_set_mem (max_reg_num ());
 759           compute_sets (f);
 760           run_jump_opt_after_gcse = 1;
 761         }
 762
 763       if (max_pass_bytes < bytes_used)
 764         max_pass_bytes = bytes_used;
 765
 766       /* Free up memory, then reallocate for code hoisting.  We can
 767          not re-use the existing allocated memory because the tables
 768          will not have info for the insns or registers created by
 769          partial redundancy elimination.  */
 770       free_gcse_mem ();
 771
 772       /* It does not make sense to run code hoisting unless we optimizing
 773          for code size -- it rarely makes programs faster, and can make
 774          them bigger if we did partial redundancy elimination (when optimizing
 775          for space, we use a classic gcse algorithm instead of partial
 776          redundancy algorithms).  */
 777       if (optimize_size)
 778         {
 779           max_gcse_regno = max_reg_num ();
 780           alloc_gcse_mem (f);
 781           changed |= one_code_hoisting_pass ();
 782           free_gcse_mem ();
 783
 784           if (max_pass_bytes < bytes_used)
 785             max_pass_bytes = bytes_used;
 786         }
 787
 788       if (file)
 789         {
 790           fprintf (file, "\n");
 791           fflush (file);
 792         }
 793       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 794       pass++;
 795     }
 796
 797   /* Do one last pass of copy propagation, including cprop into
 798      conditional jumps.  */
 799
 800   max_gcse_regno = max_reg_num ();
 801   alloc_gcse_mem (f);
 802   /* This time, go ahead and allow cprop to alter jumps.  */
 803   one_cprop_pass (pass + 1, 1);
 804   free_gcse_mem ();
 805
 806   if (file)
 807     {
 808       fprintf (file, "GCSE of %s: %d basic blocks, ",
 809                current_function_name, n_basic_blocks);
 810       fprintf (file, "%d pass%s, %d bytes\n\n",
 811                pass, pass > 1 ? "es" : "", max_pass_bytes);
 812     }
 813
 814   /* Free our obstack.  */
 815   obstack_free (&gcse_obstack, NULL_PTR);
 816   /* Free reg_set_table.  */
 817   free_reg_set_mem ();
 818   /* Free storage used to record predecessor/successor data.  */
 819   free_bb_mem ();
 820   /* Free storage allocated by find_basic_blocks.  */
 821   free_basic_block_vars (0);
 822   return run_jump_opt_after_gcse;
 823 }
 824 \f
 825 /* Misc. utilities.  */
 826
 827 /* Compute which modes support reg/reg copy operations.  */
 828
 829 static void
 830 compute_can_copy ()
 831 {
 832   int i;
 833 #ifndef AVOID_CCMODE_COPIES
 834   rtx reg,insn;
 835 #endif
 836   char *free_point = (char *) oballoc (1);
 837
 838   bzero (can_copy_p, NUM_MACHINE_MODES);
 839
 840   start_sequence ();
 841   for (i = 0; i < NUM_MACHINE_MODES; i++)
 842     {
 843       switch (GET_MODE_CLASS (i))
 844         {
 845         case MODE_CC :
 846 #ifdef AVOID_CCMODE_COPIES
 847           can_copy_p[i] = 0;
 848 #else
 849           reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 850           insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 851           if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 852             can_copy_p[i] = 1;
 853 #endif
 854           break;
 855         default :
 856           can_copy_p[i] = 1;
 857           break;
 858         }
 859     }
 860   end_sequence ();
 861
 862   /* Free the objects we just allocated.  */
 863   obfree (free_point);
 864 }
 865 \f
 866 /* Cover function to xmalloc to record bytes allocated.  */
 867
 868 static char *
 869 gmalloc (size)
 870      unsigned int size;
 871 {
 872   bytes_used += size;
 873   return xmalloc (size);
 874 }
 875
 876 /* Cover function to xrealloc.
 877    We don't record the additional size since we don't know it.
 878    It won't affect memory usage stats much anyway.  */
 879
 880 static char *
 881 grealloc (ptr, size)
 882      char *ptr;
 883      unsigned int size;
 884 {
 885   return xrealloc (ptr, size);
 886 }
 887
 888 /* Cover function to obstack_alloc.
 889    We don't need to record the bytes allocated here since
 890    obstack_chunk_alloc is set to gmalloc.  */
 891
 892 static char *
 893 gcse_alloc (size)
 894      unsigned long size;
 895 {
 896   return (char *) obstack_alloc (&gcse_obstack, size);
 897 }
 898
 899 /* Allocate memory for the cuid mapping array,
 900    and reg/memory set tracking tables.
 901
 902    This is called at the start of each pass.  */
 903
 904 static void
 905 alloc_gcse_mem (f)
 906      rtx f;
 907 {
 908   int i,n;
 909   rtx insn;
 910
 911   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 912      CUIDs are like UIDs except they increase monotonically, have no gaps,
 913      and only apply to real insns.  */
 914
 915   max_uid = get_max_uid ();
 916   n = (max_uid + 1) * sizeof (int);
 917   uid_cuid = (int *) gmalloc (n);
 918   bzero ((char *) uid_cuid, n);
 919   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 920     {
 921       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 922         INSN_CUID (insn) = i++;
 923       else
 924         INSN_CUID (insn) = i;
 925     }
 926
 927   /* Create a table mapping cuids to insns.  */
 928
 929   max_cuid = i;
 930   n = (max_cuid + 1) * sizeof (rtx);
 931   cuid_insn = (rtx *) gmalloc (n);
 932   bzero ((char *) cuid_insn, n);
 933   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 934     {
 935       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 936         {
 937           CUID_INSN (i) = insn;
 938           i++;
 939         }
 940     }
 941
 942   /* Allocate vars to track sets of regs.  */
 943
 944   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 945
 946   /* Allocate vars to track sets of regs, memory per block.  */
 947
 948   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 949                                                        max_gcse_regno);
 950   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 951 }
 952
 953 /* Free memory allocated by alloc_gcse_mem.  */
 954
 955 static void
 956 free_gcse_mem ()
 957 {
 958   free (uid_cuid);
 959   free (cuid_insn);
 960
 961   free (reg_set_bitmap);
 962
 963   free (reg_set_in_block);
 964   free (mem_set_in_block);
 965 }
 966
 967 /* Many of the global optimization algorithms work by solving dataflow
 968    equations for various expressions.  Initially, some local value is
 969    computed for each expression in each block.  Then, the values
 970    across the various blocks are combined (by following flow graph
 971    edges) to arrive at global values.  Conceptually, each set of
 972    equations is independent.  We may therefore solve all the equations
 973    in parallel, solve them one at a time, or pick any intermediate
 974    approach.
 975
 976    When you're going to need N two-dimensional bitmaps, each X (say,
 977    the number of blocks) by Y (say, the number of expressions), call
 978    this function.  It's not important what X and Y represent; only
 979    that Y correspond to the things that can be done in parallel.  This
 980    function will return an appropriate chunking factor C; you should
 981    solve C sets of equations in parallel.  By going through this
 982    function, we can easily trade space against time; by solving fewer
 983    equations in parallel we use less space.  */
 984
 985 static int
 986 get_bitmap_width (n, x, y)
 987      int n;
 988      int x;
 989      int y;
 990 {
 991   /* It's not really worth figuring out *exactly* how much memory will
 992      be used by a particular choice.  The important thing is to get
 993      something approximately right.  */
 994   size_t max_bitmap_memory = 10 * 1024 * 1024;
 995
 996   /* The number of bytes we'd use for a single column of minimum
 997      width.  */
 998   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 999
1000   /* Often, it's reasonable just to solve all the equations in
1001      parallel.  */
1002   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1003     return y;
1004
1005   /* Otherwise, pick the largest width we can, without going over the
1006      limit.  */
1007   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1008                              / column_size);
1009 }
1010
1011 \f
1012 /* Compute the local properties of each recorded expression.
1013    Local properties are those that are defined by the block, irrespective
1014    of other blocks.
1015
1016    An expression is transparent in a block if its operands are not modified
1017    in the block.
1018
1019    An expression is computed (locally available) in a block if it is computed
1020    at least once and expression would contain the same value if the
1021    computation was moved to the end of the block.
1022
1023    An expression is locally anticipatable in a block if it is computed at
1024    least once and expression would contain the same value if the computation
1025    was moved to the beginning of the block.
1026
1027    We call this routine for cprop, pre and code hoisting.  They all
1028    compute basically the same information and thus can easily share
1029    this code.
1030
1031    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
1032    local properties.  If NULL, then it is not necessary to compute
1033    or record that particular property.
1034
1035    SETP controls which hash table to look at.  If zero, this routine
1036    looks at the expr hash table; if nonzero this routine looks at
1037    the set hash table.  Additionally, TRANSP is computed as ~TRANSP,
1038    since this is really cprop's ABSALTERED.  */
1039
1040 static void
1041 compute_local_properties (transp, comp, antloc, setp)
1042      sbitmap *transp;
1043      sbitmap *comp;
1044      sbitmap *antloc;
1045      int setp;
1046 {
1047   int i, hash_table_size;
1048   struct expr **hash_table;
1049
1050   /* Initialize any bitmaps that were passed in.  */
1051   if (transp)
1052     {
1053       if (setp)
1054         sbitmap_vector_zero (transp, n_basic_blocks);
1055       else
1056         sbitmap_vector_ones (transp, n_basic_blocks);
1057     }
1058   if (comp)
1059     sbitmap_vector_zero (comp, n_basic_blocks);
1060   if (antloc)
1061     sbitmap_vector_zero (antloc, n_basic_blocks);
1062
1063   /* We use the same code for cprop, pre and hoisting.  For cprop
1064      we care about the set hash table, for pre and hoisting we
1065      care about the expr hash table.  */
1066   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1067   hash_table = setp ? set_hash_table : expr_hash_table;
1068
1069   for (i = 0; i < hash_table_size; i++)
1070     {
1071       struct expr *expr;
1072
1073       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1074         {
1075           struct occr *occr;
1076           int indx = expr->bitmap_index;
1077
1078           /* The expression is transparent in this block if it is not killed.
1079              We start by assuming all are transparent [none are killed], and
1080              then reset the bits for those that are.  */
1081
1082           if (transp)
1083             compute_transp (expr->expr, indx, transp, setp);
1084
1085           /* The occurrences recorded in antic_occr are exactly those that
1086              we want to set to non-zero in ANTLOC.  */
1087
1088           if (antloc)
1089             {
1090               for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1091                 {
1092                   int bb = BLOCK_NUM (occr->insn);
1093                   SET_BIT (antloc[bb], indx);
1094
1095                   /* While we're scanning the table, this is a good place to
1096                      initialize this.  */
1097                   occr->deleted_p = 0;
1098                 }
1099             }
1100
1101           /* The occurrences recorded in avail_occr are exactly those that
1102              we want to set to non-zero in COMP.  */
1103           if (comp)
1104             {
1105
1106               for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1107                 {
1108                   int bb = BLOCK_NUM (occr->insn);
1109                   SET_BIT (comp[bb], indx);
1110
1111                   /* While we're scanning the table, this is a good place to
1112                      initialize this.  */
1113                   occr->copied_p = 0;
1114                 }
1115             }
1116
1117           /* While we're scanning the table, this is a good place to
1118              initialize this.  */
1119           expr->reaching_reg = 0;
1120         }
1121     }
1122 }
1123
1124 \f
1125 /* Register set information.
1126
1127    `reg_set_table' records where each register is set or otherwise
1128    modified.  */
1129
1130 static struct obstack reg_set_obstack;
1131
1132 static void
1133 alloc_reg_set_mem (n_regs)
1134      int n_regs;
1135 {
1136   int n;
1137
1138   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1139   n = reg_set_table_size * sizeof (struct reg_set *);
1140   reg_set_table = (struct reg_set **) gmalloc (n);
1141   bzero ((char *) reg_set_table, n);
1142
1143   gcc_obstack_init (&reg_set_obstack);
1144 }
1145
1146 static void
1147 free_reg_set_mem ()
1148 {
1149   free (reg_set_table);
1150   obstack_free (&reg_set_obstack, NULL_PTR);
1151 }
1152
1153 /* Record REGNO in the reg_set table.  */
1154
1155 static void
1156 record_one_set (regno, insn)
1157      int regno;
1158      rtx insn;
1159 {
1160   /* allocate a new reg_set element and link it onto the list */
1161   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1162
1163   /* If the table isn't big enough, enlarge it.  */
1164   if (regno >= reg_set_table_size)
1165     {
1166       int new_size = regno + REG_SET_TABLE_SLOP;
1167       reg_set_table = (struct reg_set **)
1168         grealloc ((char *) reg_set_table,
1169                   new_size * sizeof (struct reg_set *));
1170       bzero ((char *) (reg_set_table + reg_set_table_size),
1171              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1172       reg_set_table_size = new_size;
1173     }
1174
1175   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1176                                                    sizeof (struct reg_set));
1177   bytes_used += sizeof (struct reg_set);
1178   new_reg_info->insn = insn;
1179   new_reg_info->next = NULL;
1180   if (reg_set_table[regno] == NULL)
1181     reg_set_table[regno] = new_reg_info;
1182   else
1183     {
1184       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1185       /* ??? One could keep a "last" pointer to speed this up.  */
1186       while (reg_info_ptr1 != NULL)
1187         {
1188           reg_info_ptr2 = reg_info_ptr1;
1189           reg_info_ptr1 = reg_info_ptr1->next;
1190         }
1191       reg_info_ptr2->next = new_reg_info;
1192     }
1193 }
1194
1195 /* Called from compute_sets via note_stores to handle one
1196    SET or CLOBBER in an insn.  The DATA is really the instruction
1197    in which the SET is occurring.  */
1198
1199 static void
1200 record_set_info (dest, setter, data)
1201      rtx dest, setter ATTRIBUTE_UNUSED;
1202      void *data;
1203 {
1204   rtx record_set_insn = (rtx) data;
1205
1206   if (GET_CODE (dest) == SUBREG)
1207     dest = SUBREG_REG (dest);
1208
1209   if (GET_CODE (dest) == REG)
1210     {
1211       if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1212         record_one_set (REGNO (dest), record_set_insn);
1213     }
1214 }
1215
1216 /* Scan the function and record each set of each pseudo-register.
1217
1218    This is called once, at the start of the gcse pass.
1219    See the comments for `reg_set_table' for further docs.  */
1220
1221 static void
1222 compute_sets (f)
1223      rtx f;
1224 {
1225   rtx insn = f;
1226
1227   while (insn)
1228     {
1229       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1230         note_stores (PATTERN (insn), record_set_info, insn);
1231       insn = NEXT_INSN (insn);
1232     }
1233 }
1234 \f
1235 /* Hash table support.  */
1236
1237 #define NEVER_SET -1
1238
1239 /* For each register, the cuid of the first/last insn in the block to set it,
1240    or -1 if not set.  */
1241 static int *reg_first_set;
1242 static int *reg_last_set;
1243
1244 /* While computing "first/last set" info, this is the CUID of first/last insn
1245    to set memory or -1 if not set.  `mem_last_set' is also used when
1246    performing GCSE to record whether memory has been set since the beginning
1247    of the block.
1248    Note that handling of memory is very simple, we don't make any attempt
1249    to optimize things (later).  */
1250 static int mem_first_set;
1251 static int mem_last_set;
1252
1253 /* Perform a quick check whether X, the source of a set, is something
1254    we want to consider for GCSE.  */
1255
1256 static int
1257 want_to_gcse_p (x)
1258      rtx x;
1259 {
1260   enum rtx_code code = GET_CODE (x);
1261
1262   switch (code)
1263     {
1264     case REG:
1265     case SUBREG:
1266     case CONST_INT:
1267     case CONST_DOUBLE:
1268     case CALL:
1269       return 0;
1270
1271     default:
1272       break;
1273     }
1274
1275   return 1;
1276 }
1277
1278 /* Return non-zero if the operands of expression X are unchanged from the
1279    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1280    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1281
1282 static int
1283 oprs_unchanged_p (x, insn, avail_p)
1284      rtx x, insn;
1285      int avail_p;
1286 {
1287   int i;
1288   enum rtx_code code;
1289   const char *fmt;
1290
1291   /* repeat is used to turn tail-recursion into iteration.  */
1292  repeat:
1293
1294   if (x == 0)
1295     return 1;
1296
1297   code = GET_CODE (x);
1298   switch (code)
1299     {
1300     case REG:
1301       if (avail_p)
1302         return (reg_last_set[REGNO (x)] == NEVER_SET
1303                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1304       else
1305         return (reg_first_set[REGNO (x)] == NEVER_SET
1306                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1307
1308     case MEM:
1309       if (avail_p)
1310         {
1311           if (mem_last_set != NEVER_SET
1312               && mem_last_set >= INSN_CUID (insn))
1313             return 0;
1314         }
1315       else
1316         {
1317           if (mem_first_set != NEVER_SET
1318               && mem_first_set < INSN_CUID (insn))
1319             return 0;
1320         }
1321       x = XEXP (x, 0);
1322       goto repeat;
1323
1324     case PRE_DEC:
1325     case PRE_INC:
1326     case POST_DEC:
1327     case POST_INC:
1328       return 0;
1329
1330     case PC:
1331     case CC0: /*FIXME*/
1332     case CONST:
1333     case CONST_INT:
1334     case CONST_DOUBLE:
1335     case SYMBOL_REF:
1336     case LABEL_REF:
1337     case ADDR_VEC:
1338     case ADDR_DIFF_VEC:
1339       return 1;
1340
1341     default:
1342       break;
1343     }
1344
1345   i = GET_RTX_LENGTH (code) - 1;
1346   fmt = GET_RTX_FORMAT (code);
1347   for (; i >= 0; i--)
1348     {
1349       if (fmt[i] == 'e')
1350         {
1351           rtx tem = XEXP (x, i);
1352
1353           /* If we are about to do the last recursive call
1354              needed at this level, change it into iteration.
1355              This function is called enough to be worth it.  */
1356           if (i == 0)
1357             {
1358               x = tem;
1359               goto repeat;
1360             }
1361           if (! oprs_unchanged_p (tem, insn, avail_p))
1362             return 0;
1363         }
1364       else if (fmt[i] == 'E')
1365         {
1366           int j;
1367           for (j = 0; j < XVECLEN (x, i); j++)
1368             {
1369               if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1370                 return 0;
1371             }
1372         }
1373     }
1374
1375   return 1;
1376 }
1377
1378 /* Return non-zero if the operands of expression X are unchanged from
1379    the start of INSN's basic block up to but not including INSN.  */
1380
1381 static int
1382 oprs_anticipatable_p (x, insn)
1383      rtx x, insn;
1384 {
1385   return oprs_unchanged_p (x, insn, 0);
1386 }
1387
1388 /* Return non-zero if the operands of expression X are unchanged from
1389    INSN to the end of INSN's basic block.  */
1390
1391 static int
1392 oprs_available_p (x, insn)
1393      rtx x, insn;
1394 {
1395   return oprs_unchanged_p (x, insn, 1);
1396 }
1397
1398 /* Hash expression X.
1399    MODE is only used if X is a CONST_INT.
1400    A boolean indicating if a volatile operand is found or if the expression
1401    contains something we don't want to insert in the table is stored in
1402    DO_NOT_RECORD_P.
1403
1404    ??? One might want to merge this with canon_hash.  Later.  */
1405
1406 static unsigned int
1407 hash_expr (x, mode, do_not_record_p, hash_table_size)
1408      rtx x;
1409      enum machine_mode mode;
1410      int *do_not_record_p;
1411      int hash_table_size;
1412 {
1413   unsigned int hash;
1414
1415   *do_not_record_p = 0;
1416
1417   hash = hash_expr_1 (x, mode, do_not_record_p);
1418   return hash % hash_table_size;
1419 }
1420
1421 /* Subroutine of hash_expr to do the actual work.  */
1422
1423 static unsigned int
1424 hash_expr_1 (x, mode, do_not_record_p)
1425      rtx x;
1426      enum machine_mode mode;
1427      int *do_not_record_p;
1428 {
1429   int i, j;
1430   unsigned hash = 0;
1431   enum rtx_code code;
1432   const char *fmt;
1433
1434   /* repeat is used to turn tail-recursion into iteration.  */
1435  repeat:
1436
1437   if (x == 0)
1438     return hash;
1439
1440   code = GET_CODE (x);
1441   switch (code)
1442     {
1443     case REG:
1444       {
1445         register int regno = REGNO (x);
1446         hash += ((unsigned) REG << 7) + regno;
1447         return hash;
1448       }
1449
1450     case CONST_INT:
1451       {
1452         unsigned HOST_WIDE_INT tem = INTVAL (x);
1453         hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1454         return hash;
1455       }
1456
1457     case CONST_DOUBLE:
1458       /* This is like the general case, except that it only counts
1459          the integers representing the constant.  */
1460       hash += (unsigned) code + (unsigned) GET_MODE (x);
1461       if (GET_MODE (x) != VOIDmode)
1462         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1463           {
1464             unsigned tem = XWINT (x, i);
1465             hash += tem;
1466           }
1467       else
1468         hash += ((unsigned) CONST_DOUBLE_LOW (x)
1469                  + (unsigned) CONST_DOUBLE_HIGH (x));
1470       return hash;
1471
1472       /* Assume there is only one rtx object for any given label.  */
1473     case LABEL_REF:
1474       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1475          differences and differences between each stage's debugging dumps.  */
1476       hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1477       return hash;
1478
1479     case SYMBOL_REF:
1480       {
1481         /* Don't hash on the symbol's address to avoid bootstrap differences.
1482            Different hash values may cause expressions to be recorded in
1483            different orders and thus different registers to be used in the
1484            final assembler.  This also avoids differences in the dump files
1485            between various stages.  */
1486         unsigned int h = 0;
1487         unsigned char *p = (unsigned char *) XSTR (x, 0);
1488         while (*p)
1489           h += (h << 7) + *p++; /* ??? revisit */
1490         hash += ((unsigned) SYMBOL_REF << 7) + h;
1491         return hash;
1492       }
1493
1494     case MEM:
1495       if (MEM_VOLATILE_P (x))
1496         {
1497           *do_not_record_p = 1;
1498           return 0;
1499         }
1500       hash += (unsigned) MEM;
1501       hash += MEM_ALIAS_SET (x);
1502       x = XEXP (x, 0);
1503       goto repeat;
1504
1505     case PRE_DEC:
1506     case PRE_INC:
1507     case POST_DEC:
1508     case POST_INC:
1509     case PC:
1510     case CC0:
1511     case CALL:
1512     case UNSPEC_VOLATILE:
1513       *do_not_record_p = 1;
1514       return 0;
1515
1516     case ASM_OPERANDS:
1517       if (MEM_VOLATILE_P (x))
1518         {
1519           *do_not_record_p = 1;
1520           return 0;
1521         }
1522
1523     default:
1524       break;
1525     }
1526
1527   i = GET_RTX_LENGTH (code) - 1;
1528   hash += (unsigned) code + (unsigned) GET_MODE (x);
1529   fmt = GET_RTX_FORMAT (code);
1530   for (; i >= 0; i--)
1531     {
1532       if (fmt[i] == 'e')
1533         {
1534           rtx tem = XEXP (x, i);
1535
1536           /* If we are about to do the last recursive call
1537              needed at this level, change it into iteration.
1538              This function is called enough to be worth it.  */
1539           if (i == 0)
1540             {
1541               x = tem;
1542               goto repeat;
1543             }
1544           hash += hash_expr_1 (tem, 0, do_not_record_p);
1545           if (*do_not_record_p)
1546             return 0;
1547         }
1548       else if (fmt[i] == 'E')
1549         for (j = 0; j < XVECLEN (x, i); j++)
1550           {
1551             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1552             if (*do_not_record_p)
1553               return 0;
1554           }
1555       else if (fmt[i] == 's')
1556         {
1557           register unsigned char *p = (unsigned char *) XSTR (x, i);
1558           if (p)
1559             while (*p)
1560               hash += *p++;
1561         }
1562       else if (fmt[i] == 'i')
1563         {
1564           register unsigned tem = XINT (x, i);
1565           hash += tem;
1566         }
1567       else
1568         abort ();
1569     }
1570
1571   return hash;
1572 }
1573
1574 /* Hash a set of register REGNO.
1575
1576    Sets are hashed on the register that is set.
1577    This simplifies the PRE copy propagation code.
1578
1579    ??? May need to make things more elaborate.  Later, as necessary.  */
1580
1581 static unsigned int
1582 hash_set (regno, hash_table_size)
1583      int regno;
1584      int hash_table_size;
1585 {
1586   unsigned int hash;
1587
1588   hash = regno;
1589   return hash % hash_table_size;
1590 }
1591
1592 /* Return non-zero if exp1 is equivalent to exp2.
1593    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1594
1595 static int
1596 expr_equiv_p (x, y)
1597      rtx x, y;
1598 {
1599   register int i, j;
1600   register enum rtx_code code;
1601   register const char *fmt;
1602
1603   if (x == y)
1604     return 1;
1605   if (x == 0 || y == 0)
1606     return x == y;
1607
1608   code = GET_CODE (x);
1609   if (code != GET_CODE (y))
1610     return 0;
1611
1612   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1613   if (GET_MODE (x) != GET_MODE (y))
1614     return 0;
1615
1616   switch (code)
1617     {
1618     case PC:
1619     case CC0:
1620       return x == y;
1621
1622     case CONST_INT:
1623       return INTVAL (x) == INTVAL (y);
1624
1625     case LABEL_REF:
1626       return XEXP (x, 0) == XEXP (y, 0);
1627
1628     case SYMBOL_REF:
1629       return XSTR (x, 0) == XSTR (y, 0);
1630
1631     case REG:
1632       return REGNO (x) == REGNO (y);
1633
1634     case MEM:
1635       /* Can't merge two expressions in different alias sets, since we can
1636          decide that the expression is transparent in a block when it isn't,
1637          due to it being set with the different alias set.  */
1638       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1639         return 0;
1640       break;
1641
1642     /*  For commutative operations, check both orders.  */
1643     case PLUS:
1644     case MULT:
1645     case AND:
1646     case IOR:
1647     case XOR:
1648     case NE:
1649     case EQ:
1650       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1651                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1652               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1653                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1654
1655     default:
1656       break;
1657     }
1658
1659   /* Compare the elements.  If any pair of corresponding elements
1660      fail to match, return 0 for the whole thing.  */
1661
1662   fmt = GET_RTX_FORMAT (code);
1663   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1664     {
1665       switch (fmt[i])
1666         {
1667         case 'e':
1668           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1669             return 0;
1670           break;
1671
1672         case 'E':
1673           if (XVECLEN (x, i) != XVECLEN (y, i))
1674             return 0;
1675           for (j = 0; j < XVECLEN (x, i); j++)
1676             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1677               return 0;
1678           break;
1679
1680         case 's':
1681           if (strcmp (XSTR (x, i), XSTR (y, i)))
1682             return 0;
1683           break;
1684
1685         case 'i':
1686           if (XINT (x, i) != XINT (y, i))
1687             return 0;
1688           break;
1689
1690         case 'w':
1691           if (XWINT (x, i) != XWINT (y, i))
1692             return 0;
1693         break;
1694
1695         case '0':
1696           break;
1697
1698         default:
1699           abort ();
1700         }
1701       }
1702
1703   return 1;
1704 }
1705
1706 /* Insert expression X in INSN in the hash table.
1707    If it is already present, record it as the last occurrence in INSN's
1708    basic block.
1709
1710    MODE is the mode of the value X is being stored into.
1711    It is only used if X is a CONST_INT.
1712
1713    ANTIC_P is non-zero if X is an anticipatable expression.
1714    AVAIL_P is non-zero if X is an available expression.  */
1715
1716 static void
1717 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1718      rtx x;
1719      enum machine_mode mode;
1720      rtx insn;
1721      int antic_p, avail_p;
1722 {
1723   int found, do_not_record_p;
1724   unsigned int hash;
1725   struct expr *cur_expr, *last_expr = NULL;
1726   struct occr *antic_occr, *avail_occr;
1727   struct occr *last_occr = NULL;
1728
1729   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1730
1731   /* Do not insert expression in table if it contains volatile operands,
1732      or if hash_expr determines the expression is something we don't want
1733      to or can't handle.  */
1734   if (do_not_record_p)
1735     return;
1736
1737   cur_expr = expr_hash_table[hash];
1738   found = 0;
1739
1740   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1741     {
1742       /* If the expression isn't found, save a pointer to the end of
1743          the list.  */
1744       last_expr = cur_expr;
1745       cur_expr = cur_expr->next_same_hash;
1746     }
1747
1748   if (! found)
1749     {
1750       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1751       bytes_used += sizeof (struct expr);
1752       if (expr_hash_table[hash] == NULL)
1753         {
1754           /* This is the first pattern that hashed to this index.  */
1755           expr_hash_table[hash] = cur_expr;
1756         }
1757       else
1758         {
1759           /* Add EXPR to end of this hash chain.  */
1760           last_expr->next_same_hash = cur_expr;
1761         }
1762       /* Set the fields of the expr element.  */
1763       cur_expr->expr = x;
1764       cur_expr->bitmap_index = n_exprs++;
1765       cur_expr->next_same_hash = NULL;
1766       cur_expr->antic_occr = NULL;
1767       cur_expr->avail_occr = NULL;
1768     }
1769
1770   /* Now record the occurrence(s).  */
1771
1772   if (antic_p)
1773     {
1774       antic_occr = cur_expr->antic_occr;
1775
1776       /* Search for another occurrence in the same basic block.  */
1777       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1778         {
1779           /* If an occurrence isn't found, save a pointer to the end of
1780              the list.  */
1781           last_occr = antic_occr;
1782           antic_occr = antic_occr->next;
1783         }
1784
1785       if (antic_occr)
1786         {
1787           /* Found another instance of the expression in the same basic block.
1788              Prefer the currently recorded one.  We want the first one in the
1789              block and the block is scanned from start to end.  */
1790           ; /* nothing to do */
1791         }
1792       else
1793         {
1794           /* First occurrence of this expression in this basic block.  */
1795           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1796           bytes_used += sizeof (struct occr);
1797           /* First occurrence of this expression in any block?  */
1798           if (cur_expr->antic_occr == NULL)
1799             cur_expr->antic_occr = antic_occr;
1800           else
1801             last_occr->next = antic_occr;
1802           antic_occr->insn = insn;
1803           antic_occr->next = NULL;
1804         }
1805     }
1806
1807   if (avail_p)
1808     {
1809       avail_occr = cur_expr->avail_occr;
1810
1811       /* Search for another occurrence in the same basic block.  */
1812       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1813         {
1814           /* If an occurrence isn't found, save a pointer to the end of
1815              the list.  */
1816           last_occr = avail_occr;
1817           avail_occr = avail_occr->next;
1818         }
1819
1820       if (avail_occr)
1821         {
1822           /* Found another instance of the expression in the same basic block.
1823              Prefer this occurrence to the currently recorded one.  We want
1824              the last one in the block and the block is scanned from start
1825              to end.  */
1826           avail_occr->insn = insn;
1827         }
1828       else
1829         {
1830           /* First occurrence of this expression in this basic block.  */
1831           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1832           bytes_used += sizeof (struct occr);
1833           /* First occurrence of this expression in any block?  */
1834           if (cur_expr->avail_occr == NULL)
1835             cur_expr->avail_occr = avail_occr;
1836           else
1837             last_occr->next = avail_occr;
1838           avail_occr->insn = insn;
1839           avail_occr->next = NULL;
1840         }
1841     }
1842 }
1843
1844 /* Insert pattern X in INSN in the hash table.
1845    X is a SET of a reg to either another reg or a constant.
1846    If it is already present, record it as the last occurrence in INSN's
1847    basic block.  */
1848
1849 static void
1850 insert_set_in_table (x, insn)
1851      rtx x;
1852      rtx insn;
1853 {
1854   int found;
1855   unsigned int hash;
1856   struct expr *cur_expr, *last_expr = NULL;
1857   struct occr *cur_occr, *last_occr = NULL;
1858
1859   if (GET_CODE (x) != SET
1860       || GET_CODE (SET_DEST (x)) != REG)
1861     abort ();
1862
1863   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1864
1865   cur_expr = set_hash_table[hash];
1866   found = 0;
1867
1868   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1869     {
1870       /* If the expression isn't found, save a pointer to the end of
1871          the list.  */
1872       last_expr = cur_expr;
1873       cur_expr = cur_expr->next_same_hash;
1874     }
1875
1876   if (! found)
1877     {
1878       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1879       bytes_used += sizeof (struct expr);
1880       if (set_hash_table[hash] == NULL)
1881         {
1882           /* This is the first pattern that hashed to this index.  */
1883           set_hash_table[hash] = cur_expr;
1884         }
1885       else
1886         {
1887           /* Add EXPR to end of this hash chain.  */
1888           last_expr->next_same_hash = cur_expr;
1889         }
1890       /* Set the fields of the expr element.
1891          We must copy X because it can be modified when copy propagation is
1892          performed on its operands.  */
1893       /* ??? Should this go in a different obstack?  */
1894       cur_expr->expr = copy_rtx (x);
1895       cur_expr->bitmap_index = n_sets++;
1896       cur_expr->next_same_hash = NULL;
1897       cur_expr->antic_occr = NULL;
1898       cur_expr->avail_occr = NULL;
1899     }
1900
1901   /* Now record the occurrence.  */
1902
1903   cur_occr = cur_expr->avail_occr;
1904
1905   /* Search for another occurrence in the same basic block.  */
1906   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1907     {
1908       /* If an occurrence isn't found, save a pointer to the end of
1909          the list.  */
1910       last_occr = cur_occr;
1911       cur_occr = cur_occr->next;
1912     }
1913
1914   if (cur_occr)
1915     {
1916       /* Found another instance of the expression in the same basic block.
1917          Prefer this occurrence to the currently recorded one.  We want
1918          the last one in the block and the block is scanned from start
1919          to end.  */
1920       cur_occr->insn = insn;
1921     }
1922   else
1923     {
1924       /* First occurrence of this expression in this basic block.  */
1925       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1926       bytes_used += sizeof (struct occr);
1927       /* First occurrence of this expression in any block?  */
1928       if (cur_expr->avail_occr == NULL)
1929         cur_expr->avail_occr = cur_occr;
1930       else
1931         last_occr->next = cur_occr;
1932       cur_occr->insn = insn;
1933       cur_occr->next = NULL;
1934     }
1935 }
1936
1937 /* Scan pattern PAT of INSN and add an entry to the hash table.
1938    If SET_P is non-zero, this is for the assignment hash table,
1939    otherwise it is for the expression hash table.  */
1940
1941 static void
1942 hash_scan_set (pat, insn, set_p)
1943      rtx pat, insn;
1944      int set_p;
1945 {
1946   rtx src = SET_SRC (pat);
1947   rtx dest = SET_DEST (pat);
1948
1949   if (GET_CODE (src) == CALL)
1950     hash_scan_call (src, insn);
1951
1952   if (GET_CODE (dest) == REG)
1953     {
1954       int regno = REGNO (dest);
1955       rtx tmp;
1956
1957       /* Only record sets of pseudo-regs in the hash table.  */
1958       if (! set_p
1959           && regno >= FIRST_PSEUDO_REGISTER
1960           /* Don't GCSE something if we can't do a reg/reg copy.  */
1961           && can_copy_p [GET_MODE (dest)]
1962           /* Is SET_SRC something we want to gcse?  */
1963           && want_to_gcse_p (src))
1964         {
1965           /* An expression is not anticipatable if its operands are
1966              modified before this insn.  */
1967           int antic_p = oprs_anticipatable_p (src, insn);
1968           /* An expression is not available if its operands are
1969              subsequently modified, including this insn.  */
1970           int avail_p = oprs_available_p (src, insn);
1971           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1972         }
1973       /* Record sets for constant/copy propagation.  */
1974       else if (set_p
1975                && regno >= FIRST_PSEUDO_REGISTER
1976                && ((GET_CODE (src) == REG
1977                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1978                     && can_copy_p [GET_MODE (dest)])
1979                    || GET_CODE (src) == CONST_INT
1980                    || GET_CODE (src) == SYMBOL_REF
1981                    || GET_CODE (src) == CONST_DOUBLE)
1982                /* A copy is not available if its src or dest is subsequently
1983                   modified.  Here we want to search from INSN+1 on, but
1984                   oprs_available_p searches from INSN on.  */
1985                && (insn == BLOCK_END (BLOCK_NUM (insn))
1986                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1987                        && oprs_available_p (pat, tmp))))
1988         insert_set_in_table (pat, insn);
1989     }
1990 }
1991
1992 static void
1993 hash_scan_clobber (x, insn)
1994      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1995 {
1996   /* Currently nothing to do.  */
1997 }
1998
1999 static void
2000 hash_scan_call (x, insn)
2001      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2002 {
2003   /* Currently nothing to do.  */
2004 }
2005
2006 /* Process INSN and add hash table entries as appropriate.
2007
2008    Only available expressions that set a single pseudo-reg are recorded.
2009
2010    Single sets in a PARALLEL could be handled, but it's an extra complication
2011    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2012    are also in the PARALLEL.  Later.
2013
2014    If SET_P is non-zero, this is for the assignment hash table,
2015    otherwise it is for the expression hash table.
2016    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2017    not record any expressions.  */
2018
2019 static void
2020 hash_scan_insn (insn, set_p, in_libcall_block)
2021      rtx insn;
2022      int set_p;
2023      int in_libcall_block;
2024 {
2025   rtx pat = PATTERN (insn);
2026
2027   /* Pick out the sets of INSN and for other forms of instructions record
2028      what's been modified.  */
2029
2030   if (GET_CODE (pat) == SET && ! in_libcall_block)
2031     {
2032       /* Ignore obvious no-ops.  */
2033       if (SET_SRC (pat) != SET_DEST (pat))
2034         hash_scan_set (pat, insn, set_p);
2035     }
2036   else if (GET_CODE (pat) == PARALLEL)
2037     {
2038       int i;
2039
2040       for (i = 0; i < XVECLEN (pat, 0); i++)
2041         {
2042           rtx x = XVECEXP (pat, 0, i);
2043
2044           if (GET_CODE (x) == SET)
2045             {
2046               if (GET_CODE (SET_SRC (x)) == CALL)
2047                 hash_scan_call (SET_SRC (x), insn);
2048             }
2049           else if (GET_CODE (x) == CLOBBER)
2050             hash_scan_clobber (x, insn);
2051           else if (GET_CODE (x) == CALL)
2052             hash_scan_call (x, insn);
2053         }
2054     }
2055   else if (GET_CODE (pat) == CLOBBER)
2056     hash_scan_clobber (pat, insn);
2057   else if (GET_CODE (pat) == CALL)
2058     hash_scan_call (pat, insn);
2059 }
2060
2061 static void
2062 dump_hash_table (file, name, table, table_size, total_size)
2063      FILE *file;
2064      const char *name;
2065      struct expr **table;
2066      int table_size, total_size;
2067 {
2068   int i;
2069   /* Flattened out table, so it's printed in proper order.  */
2070   struct expr **flat_table;
2071   unsigned int *hash_val;
2072
2073   flat_table
2074     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2075   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2076
2077   for (i = 0; i < table_size; i++)
2078     {
2079       struct expr *expr;
2080
2081       for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2082         {
2083           flat_table[expr->bitmap_index] = expr;
2084           hash_val[expr->bitmap_index] = i;
2085         }
2086     }
2087
2088   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2089            name, table_size, total_size);
2090
2091   for (i = 0; i < total_size; i++)
2092     {
2093       struct expr *expr = flat_table[i];
2094
2095       fprintf (file, "Index %d (hash value %d)\n  ",
2096                expr->bitmap_index, hash_val[i]);
2097       print_rtl (file, expr->expr);
2098       fprintf (file, "\n");
2099     }
2100
2101   fprintf (file, "\n");
2102
2103   /* Clean up.  */
2104   free (flat_table);
2105   free (hash_val);
2106 }
2107
2108 /* Record register first/last/block set information for REGNO in INSN.
2109    reg_first_set records the first place in the block where the register
2110    is set and is used to compute "anticipatability".
2111    reg_last_set records the last place in the block where the register
2112    is set and is used to compute "availability".
2113    reg_set_in_block records whether the register is set in the block
2114    and is used to compute "transparency".  */
2115
2116 static void
2117 record_last_reg_set_info (insn, regno)
2118      rtx insn;
2119      int regno;
2120 {
2121   if (reg_first_set[regno] == NEVER_SET)
2122     reg_first_set[regno] = INSN_CUID (insn);
2123   reg_last_set[regno] = INSN_CUID (insn);
2124   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2125 }
2126
2127 /* Record memory first/last/block set information for INSN.  */
2128
2129 static void
2130 record_last_mem_set_info (insn)
2131      rtx insn;
2132 {
2133   if (mem_first_set == NEVER_SET)
2134     mem_first_set = INSN_CUID (insn);
2135   mem_last_set = INSN_CUID (insn);
2136   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2137 }
2138
2139 /* Called from compute_hash_table via note_stores to handle one
2140    SET or CLOBBER in an insn.  DATA is really the instruction in which
2141    the SET is taking place.  */
2142
2143 static void
2144 record_last_set_info (dest, setter, data)
2145      rtx dest, setter ATTRIBUTE_UNUSED;
2146      void *data;
2147 {
2148   rtx last_set_insn = (rtx) data;
2149
2150   if (GET_CODE (dest) == SUBREG)
2151     dest = SUBREG_REG (dest);
2152
2153   if (GET_CODE (dest) == REG)
2154     record_last_reg_set_info (last_set_insn, REGNO (dest));
2155   else if (GET_CODE (dest) == MEM
2156            /* Ignore pushes, they clobber nothing.  */
2157            && ! push_operand (dest, GET_MODE (dest)))
2158     record_last_mem_set_info (last_set_insn);
2159 }
2160
2161 /* Top level function to create an expression or assignment hash table.
2162
2163    Expression entries are placed in the hash table if
2164    - they are of the form (set (pseudo-reg) src),
2165    - src is something we want to perform GCSE on,
2166    - none of the operands are subsequently modified in the block
2167
2168    Assignment entries are placed in the hash table if
2169    - they are of the form (set (pseudo-reg) src),
2170    - src is something we want to perform const/copy propagation on,
2171    - none of the operands or target are subsequently modified in the block
2172    Currently src must be a pseudo-reg or a const_int.
2173
2174    F is the first insn.
2175    SET_P is non-zero for computing the assignment hash table.  */
2176
2177 static void
2178 compute_hash_table (set_p)
2179      int set_p;
2180 {
2181   int bb;
2182
2183   /* While we compute the hash table we also compute a bit array of which
2184      registers are set in which blocks.
2185      We also compute which blocks set memory, in the absence of aliasing
2186      support [which is TODO].
2187      ??? This isn't needed during const/copy propagation, but it's cheap to
2188      compute.  Later.  */
2189   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2190   bzero ((char *) mem_set_in_block, n_basic_blocks);
2191
2192   /* Some working arrays used to track first and last set in each block.  */
2193   /* ??? One could use alloca here, but at some size a threshold is crossed
2194      beyond which one should use malloc.  Are we at that threshold here?  */
2195   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2196   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2197
2198   for (bb = 0; bb < n_basic_blocks; bb++)
2199     {
2200       rtx insn;
2201       int regno;
2202       int in_libcall_block;
2203       int i;
2204
2205       /* First pass over the instructions records information used to
2206          determine when registers and memory are first and last set.
2207          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2208          could be moved to compute_sets since they currently don't change.  */
2209
2210       for (i = 0; i < max_gcse_regno; i++)
2211         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2212       mem_first_set = NEVER_SET;
2213       mem_last_set = NEVER_SET;
2214
2215       for (insn = BLOCK_HEAD (bb);
2216            insn && insn != NEXT_INSN (BLOCK_END (bb));
2217            insn = NEXT_INSN (insn))
2218         {
2219 #ifdef NON_SAVING_SETJMP
2220           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2221               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2222             {
2223               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2224                 record_last_reg_set_info (insn, regno);
2225               continue;
2226             }
2227 #endif
2228
2229           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2230             continue;
2231
2232           if (GET_CODE (insn) == CALL_INSN)
2233             {
2234               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2235                 if ((call_used_regs[regno]
2236                      && regno != STACK_POINTER_REGNUM
2237 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2238                      && regno != HARD_FRAME_POINTER_REGNUM
2239 #endif
2240 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2241                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2242 #endif
2243 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2244                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2245 #endif
2246
2247                      && regno != FRAME_POINTER_REGNUM)
2248                     || global_regs[regno])
2249                   record_last_reg_set_info (insn, regno);
2250               if (! CONST_CALL_P (insn))
2251                 record_last_mem_set_info (insn);
2252             }
2253
2254           note_stores (PATTERN (insn), record_last_set_info, insn);
2255         }
2256
2257       /* The next pass builds the hash table.  */
2258
2259       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2260            insn && insn != NEXT_INSN (BLOCK_END (bb));
2261            insn = NEXT_INSN (insn))
2262         {
2263           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2264             {
2265               if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2266                 in_libcall_block = 1;
2267               else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2268                 in_libcall_block = 0;
2269               hash_scan_insn (insn, set_p, in_libcall_block);
2270             }
2271         }
2272     }
2273
2274   free (reg_first_set);
2275   free (reg_last_set);
2276   /* Catch bugs early.  */
2277   reg_first_set = reg_last_set = 0;
2278 }
2279
2280 /* Allocate space for the set hash table.
2281    N_INSNS is the number of instructions in the function.
2282    It is used to determine the number of buckets to use.  */
2283
2284 static void
2285 alloc_set_hash_table (n_insns)
2286      int n_insns;
2287 {
2288   int n;
2289
2290   set_hash_table_size = n_insns / 4;
2291   if (set_hash_table_size < 11)
2292     set_hash_table_size = 11;
2293   /* Attempt to maintain efficient use of hash table.
2294      Making it an odd number is simplest for now.
2295      ??? Later take some measurements.  */
2296   set_hash_table_size |= 1;
2297   n = set_hash_table_size * sizeof (struct expr *);
2298   set_hash_table = (struct expr **) gmalloc (n);
2299 }
2300
2301 /* Free things allocated by alloc_set_hash_table.  */
2302
2303 static void
2304 free_set_hash_table ()
2305 {
2306   free (set_hash_table);
2307 }
2308
2309 /* Compute the hash table for doing copy/const propagation.  */
2310
2311 static void
2312 compute_set_hash_table ()
2313 {
2314   /* Initialize count of number of entries in hash table.  */
2315   n_sets = 0;
2316   bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2317
2318   compute_hash_table (1);
2319 }
2320
2321 /* Allocate space for the expression hash table.
2322    N_INSNS is the number of instructions in the function.
2323    It is used to determine the number of buckets to use.  */
2324
2325 static void
2326 alloc_expr_hash_table (n_insns)
2327      int n_insns;
2328 {
2329   int n;
2330
2331   expr_hash_table_size = n_insns / 2;
2332   /* Make sure the amount is usable.  */
2333   if (expr_hash_table_size < 11)
2334     expr_hash_table_size = 11;
2335   /* Attempt to maintain efficient use of hash table.
2336      Making it an odd number is simplest for now.
2337      ??? Later take some measurements.  */
2338   expr_hash_table_size |= 1;
2339   n = expr_hash_table_size * sizeof (struct expr *);
2340   expr_hash_table = (struct expr **) gmalloc (n);
2341 }
2342
2343 /* Free things allocated by alloc_expr_hash_table.  */
2344
2345 static void
2346 free_expr_hash_table ()
2347 {
2348   free (expr_hash_table);
2349 }
2350
2351 /* Compute the hash table for doing GCSE.  */
2352
2353 static void
2354 compute_expr_hash_table ()
2355 {
2356   /* Initialize count of number of entries in hash table.  */
2357   n_exprs = 0;
2358   bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2359
2360   compute_hash_table (0);
2361 }
2362 \f
2363 /* Expression tracking support.  */
2364
2365 /* Lookup pattern PAT in the expression table.
2366    The result is a pointer to the table entry, or NULL if not found.  */
2367
2368 static struct expr *
2369 lookup_expr (pat)
2370      rtx pat;
2371 {
2372   int do_not_record_p;
2373   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2374                                  expr_hash_table_size);
2375   struct expr *expr;
2376
2377   if (do_not_record_p)
2378     return NULL;
2379
2380   expr = expr_hash_table[hash];
2381
2382   while (expr && ! expr_equiv_p (expr->expr, pat))
2383     expr = expr->next_same_hash;
2384
2385   return expr;
2386 }
2387
2388 /* Lookup REGNO in the set table.
2389    If PAT is non-NULL look for the entry that matches it, otherwise return
2390    the first entry for REGNO.
2391    The result is a pointer to the table entry, or NULL if not found.  */
2392
2393 static struct expr *
2394 lookup_set (regno, pat)
2395      int regno;
2396      rtx pat;
2397 {
2398   unsigned int hash = hash_set (regno, set_hash_table_size);
2399   struct expr *expr;
2400
2401   expr = set_hash_table[hash];
2402
2403   if (pat)
2404     {
2405       while (expr && ! expr_equiv_p (expr->expr, pat))
2406         expr = expr->next_same_hash;
2407     }
2408   else
2409     {
2410       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2411         expr = expr->next_same_hash;
2412     }
2413
2414   return expr;
2415 }
2416
2417 /* Return the next entry for REGNO in list EXPR.  */
2418
2419 static struct expr *
2420 next_set (regno, expr)
2421      int regno;
2422      struct expr *expr;
2423 {
2424   do
2425     expr = expr->next_same_hash;
2426   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2427   return expr;
2428 }
2429
2430 /* Reset tables used to keep track of what's still available [since the
2431    start of the block].  */
2432
2433 static void
2434 reset_opr_set_tables ()
2435 {
2436   /* Maintain a bitmap of which regs have been set since beginning of
2437      the block.  */
2438   sbitmap_zero (reg_set_bitmap);
2439   /* Also keep a record of the last instruction to modify memory.
2440      For now this is very trivial, we only record whether any memory
2441      location has been modified.  */
2442   mem_last_set = 0;
2443 }
2444
2445 /* Return non-zero if the operands of X are not set before INSN in
2446    INSN's basic block.  */
2447
2448 static int
2449 oprs_not_set_p (x, insn)
2450      rtx x, insn;
2451 {
2452   int i;
2453   enum rtx_code code;
2454   const char *fmt;
2455
2456   /* repeat is used to turn tail-recursion into iteration.  */
2457 repeat:
2458
2459   if (x == 0)
2460     return 1;
2461
2462   code = GET_CODE (x);
2463   switch (code)
2464     {
2465     case PC:
2466     case CC0:
2467     case CONST:
2468     case CONST_INT:
2469     case CONST_DOUBLE:
2470     case SYMBOL_REF:
2471     case LABEL_REF:
2472     case ADDR_VEC:
2473     case ADDR_DIFF_VEC:
2474       return 1;
2475
2476     case MEM:
2477       if (mem_last_set != 0)
2478         return 0;
2479       x = XEXP (x, 0);
2480       goto repeat;
2481
2482     case REG:
2483       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2484
2485     default:
2486       break;
2487     }
2488
2489   fmt = GET_RTX_FORMAT (code);
2490   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2491     {
2492       if (fmt[i] == 'e')
2493         {
2494           int not_set_p;
2495           /* If we are about to do the last recursive call
2496              needed at this level, change it into iteration.
2497              This function is called enough to be worth it.  */
2498           if (i == 0)
2499             {
2500               x = XEXP (x, 0);
2501               goto repeat;
2502             }
2503           not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2504           if (! not_set_p)
2505             return 0;
2506         }
2507       else if (fmt[i] == 'E')
2508         {
2509           int j;
2510           for (j = 0; j < XVECLEN (x, i); j++)
2511             {
2512               int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2513               if (! not_set_p)
2514                 return 0;
2515             }
2516         }
2517     }
2518
2519   return 1;
2520 }
2521
2522 /* Mark things set by a CALL.  */
2523
2524 static void
2525 mark_call (insn)
2526      rtx insn;
2527 {
2528   mem_last_set = INSN_CUID (insn);
2529 }
2530
2531 /* Mark things set by a SET.  */
2532
2533 static void
2534 mark_set (pat, insn)
2535      rtx pat, insn;
2536 {
2537   rtx dest = SET_DEST (pat);
2538
2539   while (GET_CODE (dest) == SUBREG
2540          || GET_CODE (dest) == ZERO_EXTRACT
2541          || GET_CODE (dest) == SIGN_EXTRACT
2542          || GET_CODE (dest) == STRICT_LOW_PART)
2543     dest = XEXP (dest, 0);
2544
2545   if (GET_CODE (dest) == REG)
2546     SET_BIT (reg_set_bitmap, REGNO (dest));
2547   else if (GET_CODE (dest) == MEM)
2548     mem_last_set = INSN_CUID (insn);
2549
2550   if (GET_CODE (SET_SRC (pat)) == CALL)
2551     mark_call (insn);
2552 }
2553
2554 /* Record things set by a CLOBBER.  */
2555
2556 static void
2557 mark_clobber (pat, insn)
2558      rtx pat, insn;
2559 {
2560   rtx clob = XEXP (pat, 0);
2561
2562   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2563     clob = XEXP (clob, 0);
2564
2565   if (GET_CODE (clob) == REG)
2566     SET_BIT (reg_set_bitmap, REGNO (clob));
2567   else
2568     mem_last_set = INSN_CUID (insn);
2569 }
2570
2571 /* Record things set by INSN.
2572    This data is used by oprs_not_set_p.  */
2573
2574 static void
2575 mark_oprs_set (insn)
2576      rtx insn;
2577 {
2578   rtx pat = PATTERN (insn);
2579
2580   if (GET_CODE (pat) == SET)
2581     mark_set (pat, insn);
2582   else if (GET_CODE (pat) == PARALLEL)
2583     {
2584       int i;
2585
2586       for (i = 0; i < XVECLEN (pat, 0); i++)
2587         {
2588           rtx x = XVECEXP (pat, 0, i);
2589
2590           if (GET_CODE (x) == SET)
2591             mark_set (x, insn);
2592           else if (GET_CODE (x) == CLOBBER)
2593             mark_clobber (x, insn);
2594           else if (GET_CODE (x) == CALL)
2595             mark_call (insn);
2596         }
2597     }
2598   else if (GET_CODE (pat) == CLOBBER)
2599     mark_clobber (pat, insn);
2600   else if (GET_CODE (pat) == CALL)
2601     mark_call (insn);
2602 }
2603
2604 \f
2605 /* Classic GCSE reaching definition support.  */
2606
2607 /* Allocate reaching def variables.  */
2608
2609 static void
2610 alloc_rd_mem (n_blocks, n_insns)
2611      int n_blocks, n_insns;
2612 {
2613   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2614   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2615
2616   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2617   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2618
2619   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2620   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2621
2622   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2623   sbitmap_vector_zero (rd_out, n_basic_blocks);
2624 }
2625
2626 /* Free reaching def variables.  */
2627
2628 static void
2629 free_rd_mem ()
2630 {
2631   free (rd_kill);
2632   free (rd_gen);
2633   free (reaching_defs);
2634   free (rd_out);
2635 }
2636
2637 /* Add INSN to the kills of BB.
2638    REGNO, set in BB, is killed by INSN.  */
2639
2640 static void
2641 handle_rd_kill_set (insn, regno, bb)
2642      rtx insn;
2643      int regno, bb;
2644 {
2645   struct reg_set *this_reg = reg_set_table[regno];
2646
2647   while (this_reg)
2648     {
2649       if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2650         SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2651       this_reg = this_reg->next;
2652     }
2653 }
2654
2655 /* Compute the set of kill's for reaching definitions.  */
2656
2657 static void
2658 compute_kill_rd ()
2659 {
2660   int bb,cuid;
2661
2662   /* For each block
2663        For each set bit in `gen' of the block (i.e each insn which
2664            generates a definition in the block)
2665          Call the reg set by the insn corresponding to that bit regx
2666          Look at the linked list starting at reg_set_table[regx]
2667          For each setting of regx in the linked list, which is not in
2668              this block
2669            Set the bit in `kill' corresponding to that insn
2670     */
2671
2672   for (bb = 0; bb < n_basic_blocks; bb++)
2673     {
2674       for (cuid = 0; cuid < max_cuid; cuid++)
2675         {
2676           if (TEST_BIT (rd_gen[bb], cuid))
2677             {
2678               rtx insn = CUID_INSN (cuid);
2679               rtx pat = PATTERN (insn);
2680
2681               if (GET_CODE (insn) == CALL_INSN)
2682                 {
2683                   int regno;
2684
2685                   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2686                     {
2687                       if ((call_used_regs[regno]
2688                            && regno != STACK_POINTER_REGNUM
2689 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2690                            && regno != HARD_FRAME_POINTER_REGNUM
2691 #endif
2692 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2693                            && ! (regno == ARG_POINTER_REGNUM
2694                                  && fixed_regs[regno])
2695 #endif
2696 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2697                            && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2698 #endif
2699                            && regno != FRAME_POINTER_REGNUM)
2700                           || global_regs[regno])
2701                         handle_rd_kill_set (insn, regno, bb);
2702                     }
2703                 }
2704
2705               if (GET_CODE (pat) == PARALLEL)
2706                 {
2707                   int i;
2708
2709                   /* We work backwards because ... */
2710                   for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2711                     {
2712                       enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2713                       if ((code == SET || code == CLOBBER)
2714                           && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2715                         handle_rd_kill_set (insn,
2716                                             REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2717                                             bb);
2718                     }
2719                 }
2720               else if (GET_CODE (pat) == SET)
2721                 {
2722                   if (GET_CODE (SET_DEST (pat)) == REG)
2723                     {
2724                       /* Each setting of this register outside of this block
2725                          must be marked in the set of kills in this block.  */
2726                       handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2727                     }
2728                 }
2729               /* FIXME: CLOBBER? */
2730             }
2731         }
2732     }
2733 }
2734
2735 /* Compute the reaching definitions as in
2736    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2737    Chapter 10.  It is the same algorithm as used for computing available
2738    expressions but applied to the gens and kills of reaching definitions.  */
2739
2740 static void
2741 compute_rd ()
2742 {
2743   int bb, changed, passes;
2744
2745   for (bb = 0; bb < n_basic_blocks; bb++)
2746     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2747
2748   passes = 0;
2749   changed = 1;
2750   while (changed)
2751     {
2752       changed = 0;
2753       for (bb = 0; bb < n_basic_blocks; bb++)
2754         {
2755           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2756           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2757                                             reaching_defs[bb], rd_kill[bb]);
2758         }
2759       passes++;
2760     }
2761
2762   if (gcse_file)
2763     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2764 }
2765 \f
2766 /* Classic GCSE available expression support.  */
2767
2768 /* Allocate memory for available expression computation.  */
2769
2770 static void
2771 alloc_avail_expr_mem (n_blocks, n_exprs)
2772      int n_blocks, n_exprs;
2773 {
2774   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2775   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2776
2777   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2778   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2779
2780   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2781   sbitmap_vector_zero (ae_in, n_basic_blocks);
2782
2783   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2784   sbitmap_vector_zero (ae_out, n_basic_blocks);
2785
2786   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2787   sbitmap_ones (u_bitmap);
2788 }
2789
2790 static void
2791 free_avail_expr_mem ()
2792 {
2793   free (ae_kill);
2794   free (ae_gen);
2795   free (ae_in);
2796   free (ae_out);
2797   free (u_bitmap);
2798 }
2799
2800 /* Compute the set of available expressions generated in each basic block.  */
2801
2802 static void
2803 compute_ae_gen ()
2804 {
2805   int i;
2806
2807   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2808      This is all we have to do because an expression is not recorded if it
2809      is not available, and the only expressions we want to work with are the
2810      ones that are recorded.  */
2811
2812   for (i = 0; i < expr_hash_table_size; i++)
2813     {
2814       struct expr *expr = expr_hash_table[i];
2815       while (expr != NULL)
2816         {
2817           struct occr *occr = expr->avail_occr;
2818           while (occr != NULL)
2819             {
2820               SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2821               occr = occr->next;
2822             }
2823           expr = expr->next_same_hash;
2824         }
2825     }
2826 }
2827
2828 /* Return non-zero if expression X is killed in BB.  */
2829
2830 static int
2831 expr_killed_p (x, bb)
2832      rtx x;
2833      int bb;
2834 {
2835   int i;
2836   enum rtx_code code;
2837   const char *fmt;
2838
2839   /* repeat is used to turn tail-recursion into iteration.  */
2840  repeat:
2841
2842   if (x == 0)
2843     return 1;
2844
2845   code = GET_CODE (x);
2846   switch (code)
2847     {
2848     case REG:
2849       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2850
2851     case MEM:
2852       if (mem_set_in_block[bb])
2853         return 1;
2854       x = XEXP (x, 0);
2855       goto repeat;
2856
2857     case PC:
2858     case CC0: /*FIXME*/
2859     case CONST:
2860     case CONST_INT:
2861     case CONST_DOUBLE:
2862     case SYMBOL_REF:
2863     case LABEL_REF:
2864     case ADDR_VEC:
2865     case ADDR_DIFF_VEC:
2866       return 0;
2867
2868     default:
2869       break;
2870     }
2871
2872   i = GET_RTX_LENGTH (code) - 1;
2873   fmt = GET_RTX_FORMAT (code);
2874   for (; i >= 0; i--)
2875     {
2876       if (fmt[i] == 'e')
2877         {
2878           rtx tem = XEXP (x, i);
2879
2880           /* If we are about to do the last recursive call
2881              needed at this level, change it into iteration.
2882              This function is called enough to be worth it.  */
2883           if (i == 0)
2884             {
2885               x = tem;
2886               goto repeat;
2887             }
2888           if (expr_killed_p (tem, bb))
2889             return 1;
2890         }
2891       else if (fmt[i] == 'E')
2892         {
2893           int j;
2894           for (j = 0; j < XVECLEN (x, i); j++)
2895             {
2896               if (expr_killed_p (XVECEXP (x, i, j), bb))
2897                 return 1;
2898             }
2899         }
2900     }
2901
2902   return 0;
2903 }
2904
2905 /* Compute the set of available expressions killed in each basic block.  */
2906
2907 static void
2908 compute_ae_kill (ae_gen, ae_kill)
2909      sbitmap *ae_gen, *ae_kill;
2910 {
2911   int bb,i;
2912
2913   for (bb = 0; bb < n_basic_blocks; bb++)
2914     {
2915       for (i = 0; i < expr_hash_table_size; i++)
2916         {
2917           struct expr *expr = expr_hash_table[i];
2918
2919           for ( ; expr != NULL; expr = expr->next_same_hash)
2920             {
2921               /* Skip EXPR if generated in this block.  */
2922               if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2923                 continue;
2924
2925               if (expr_killed_p (expr->expr, bb))
2926                 SET_BIT (ae_kill[bb], expr->bitmap_index);
2927             }
2928         }
2929     }
2930 }
2931 \f
2932 /* Actually perform the Classic GCSE optimizations.  */
2933
2934 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2935
2936    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2937    as a positive reach.  We want to do this when there are two computations
2938    of the expression in the block.
2939
2940    VISITED is a pointer to a working buffer for tracking which BB's have
2941    been visited.  It is NULL for the top-level call.
2942
2943    We treat reaching expressions that go through blocks containing the same
2944    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2945    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2946    2 as not reaching.  The intent is to improve the probability of finding
2947    only one reaching expression and to reduce register lifetimes by picking
2948    the closest such expression.  */
2949
2950 static int
2951 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2952      struct occr *occr;
2953      struct expr *expr;
2954      int bb;
2955      int check_self_loop;
2956      char *visited;
2957 {
2958   edge pred;
2959
2960   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2961     {
2962       int pred_bb = pred->src->index;
2963
2964       if (visited[pred_bb])
2965         {
2966           /* This predecessor has already been visited.
2967              Nothing to do.  */
2968           ;
2969         }
2970       else if (pred_bb == bb)
2971         {
2972           /* BB loops on itself.  */
2973           if (check_self_loop
2974               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2975               && BLOCK_NUM (occr->insn) == pred_bb)
2976             return 1;
2977           visited[pred_bb] = 1;
2978         }
2979       /* Ignore this predecessor if it kills the expression.  */
2980       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2981         visited[pred_bb] = 1;
2982       /* Does this predecessor generate this expression?  */
2983       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2984         {
2985           /* Is this the occurrence we're looking for?
2986              Note that there's only one generating occurrence per block
2987              so we just need to check the block number.  */
2988           if (BLOCK_NUM (occr->insn) == pred_bb)
2989             return 1;
2990           visited[pred_bb] = 1;
2991         }
2992       /* Neither gen nor kill.  */
2993       else
2994         {
2995           visited[pred_bb] = 1;
2996           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2997               visited))
2998             return 1;
2999         }
3000     }
3001
3002   /* All paths have been checked.  */
3003   return 0;
3004 }
3005
3006 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3007    memory allocated for that function is returned. */
3008
3009 static int
3010 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3011      struct occr *occr;
3012      struct expr *expr;
3013      int bb;
3014      int check_self_loop;
3015 {
3016   int rval;
3017   char * visited = (char *) xcalloc (n_basic_blocks, 1);
3018
3019   rval = expr_reaches_here_p_work(occr, expr, bb, check_self_loop, visited);
3020
3021   free (visited);
3022
3023   return (rval);
3024 }
3025
3026 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3027    If there is more than one such instruction, return NULL.
3028
3029    Called only by handle_avail_expr.  */
3030
3031 static rtx
3032 computing_insn (expr, insn)
3033      struct expr *expr;
3034      rtx insn;
3035 {
3036   int bb = BLOCK_NUM (insn);
3037
3038   if (expr->avail_occr->next == NULL)
3039     {
3040       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3041         {
3042           /* The available expression is actually itself
3043              (i.e. a loop in the flow graph) so do nothing.  */
3044           return NULL;
3045         }
3046       /* (FIXME) Case that we found a pattern that was created by
3047          a substitution that took place.  */
3048       return expr->avail_occr->insn;
3049     }
3050   else
3051     {
3052       /* Pattern is computed more than once.
3053          Search backwards from this insn to see how many of these
3054          computations actually reach this insn.  */
3055       struct occr *occr;
3056       rtx insn_computes_expr = NULL;
3057       int can_reach = 0;
3058
3059       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3060         {
3061           if (BLOCK_NUM (occr->insn) == bb)
3062             {
3063               /* The expression is generated in this block.
3064                  The only time we care about this is when the expression
3065                  is generated later in the block [and thus there's a loop].
3066                  We let the normal cse pass handle the other cases.  */
3067               if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3068                 {
3069                   if (expr_reaches_here_p (occr, expr, bb, 1))
3070                     {
3071                       can_reach++;
3072                       if (can_reach > 1)
3073                         return NULL;
3074                       insn_computes_expr = occr->insn;
3075                     }
3076                 }
3077             }
3078           else /* Computation of the pattern outside this block.  */
3079             {
3080               if (expr_reaches_here_p (occr, expr, bb, 0))
3081                 {
3082                   can_reach++;
3083                   if (can_reach > 1)
3084                     return NULL;
3085                   insn_computes_expr = occr->insn;
3086                 }
3087             }
3088         }
3089
3090       if (insn_computes_expr == NULL)
3091         abort ();
3092       return insn_computes_expr;
3093     }
3094 }
3095
3096 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3097    Only called by can_disregard_other_sets.  */
3098
3099 static int
3100 def_reaches_here_p (insn, def_insn)
3101      rtx insn, def_insn;
3102 {
3103   rtx reg;
3104
3105   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3106     return 1;
3107
3108   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3109     {
3110       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3111         {
3112           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3113             return 1;
3114           if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3115             reg = XEXP (PATTERN (def_insn), 0);
3116           else if (GET_CODE (PATTERN (def_insn)) == SET)
3117             reg = SET_DEST (PATTERN (def_insn));
3118           else
3119             abort ();
3120           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3121         }
3122       else
3123         return 0;
3124     }
3125
3126   return 0;
3127 }
3128
3129 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3130    The value returned is the number of definitions that reach INSN.
3131    Returning a value of zero means that [maybe] more than one definition
3132    reaches INSN and the caller can't perform whatever optimization it is
3133    trying.  i.e. it is always safe to return zero.  */
3134
3135 static int
3136 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3137      struct reg_set **addr_this_reg;
3138      rtx insn;
3139      int for_combine;
3140 {
3141   int number_of_reaching_defs = 0;
3142   struct reg_set *this_reg = *addr_this_reg;
3143
3144   while (this_reg)
3145     {
3146       if (def_reaches_here_p (insn, this_reg->insn))
3147         {
3148           number_of_reaching_defs++;
3149           /* Ignore parallels for now.  */
3150           if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3151             return 0;
3152           if (!for_combine
3153               && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3154                   || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3155                                     SET_SRC (PATTERN (insn)))))
3156             {
3157               /* A setting of the reg to a different value reaches INSN.  */
3158               return 0;
3159             }
3160           if (number_of_reaching_defs > 1)
3161             {
3162               /* If in this setting the value the register is being
3163                  set to is equal to the previous value the register
3164                  was set to and this setting reaches the insn we are
3165                  trying to do the substitution on then we are ok.  */
3166
3167               if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3168                 return 0;
3169               if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3170                                  SET_SRC (PATTERN (insn))))
3171                 return 0;
3172             }
3173           *addr_this_reg = this_reg;
3174         }
3175
3176       /* prev_this_reg = this_reg; */
3177       this_reg = this_reg->next;
3178     }
3179
3180   return number_of_reaching_defs;
3181 }
3182
3183 /* Expression computed by insn is available and the substitution is legal,
3184    so try to perform the substitution.
3185
3186    The result is non-zero if any changes were made.  */
3187
3188 static int
3189 handle_avail_expr (insn, expr)
3190      rtx insn;
3191      struct expr *expr;
3192 {
3193   rtx pat, insn_computes_expr;
3194   rtx to;
3195   struct reg_set *this_reg;
3196   int found_setting, use_src;
3197   int changed = 0;
3198
3199   /* We only handle the case where one computation of the expression
3200      reaches this instruction.  */
3201   insn_computes_expr = computing_insn (expr, insn);
3202   if (insn_computes_expr == NULL)
3203     return 0;
3204
3205   found_setting = 0;
3206   use_src = 0;
3207
3208   /* At this point we know only one computation of EXPR outside of this
3209      block reaches this insn.  Now try to find a register that the
3210      expression is computed into.  */
3211
3212   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3213     {
3214       /* This is the case when the available expression that reaches
3215          here has already been handled as an available expression.  */
3216       int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3217       /* If the register was created by GCSE we can't use `reg_set_table',
3218          however we know it's set only once.  */
3219       if (regnum_for_replacing >= max_gcse_regno
3220           /* If the register the expression is computed into is set only once,
3221              or only one set reaches this insn, we can use it.  */
3222           || (((this_reg = reg_set_table[regnum_for_replacing]),
3223                this_reg->next == NULL)
3224               || can_disregard_other_sets (&this_reg, insn, 0)))
3225        {
3226          use_src = 1;
3227          found_setting = 1;
3228        }
3229     }
3230
3231   if (!found_setting)
3232     {
3233       int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3234       /* This shouldn't happen.  */
3235       if (regnum_for_replacing >= max_gcse_regno)
3236         abort ();
3237       this_reg = reg_set_table[regnum_for_replacing];
3238       /* If the register the expression is computed into is set only once,
3239          or only one set reaches this insn, use it.  */
3240       if (this_reg->next == NULL
3241           || can_disregard_other_sets (&this_reg, insn, 0))
3242         found_setting = 1;
3243     }
3244
3245   if (found_setting)
3246     {
3247       pat = PATTERN (insn);
3248       if (use_src)
3249         to = SET_SRC (PATTERN (insn_computes_expr));
3250       else
3251         to = SET_DEST (PATTERN (insn_computes_expr));
3252       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3253
3254       /* We should be able to ignore the return code from validate_change but
3255          to play it safe we check.  */
3256       if (changed)
3257         {
3258           gcse_subst_count++;
3259           if (gcse_file != NULL)
3260             {
3261               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3262                        INSN_UID (insn), REGNO (to),
3263                        use_src ? "from" : "set in",
3264                        INSN_UID (insn_computes_expr));
3265             }
3266
3267         }
3268     }
3269   /* The register that the expr is computed into is set more than once.  */
3270   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3271     {
3272       /* Insert an insn after insnx that copies the reg set in insnx
3273          into a new pseudo register call this new register REGN.
3274          From insnb until end of basic block or until REGB is set
3275          replace all uses of REGB with REGN.  */
3276       rtx new_insn;
3277
3278       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3279
3280       /* Generate the new insn.  */
3281       /* ??? If the change fails, we return 0, even though we created
3282          an insn.  I think this is ok.  */
3283       new_insn
3284         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3285                                         SET_DEST (PATTERN (insn_computes_expr))),
3286                                   insn_computes_expr);
3287       /* Keep block number table up to date.  */
3288       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3289       /* Keep register set table up to date.  */
3290       record_one_set (REGNO (to), new_insn);
3291
3292       gcse_create_count++;
3293       if (gcse_file != NULL)
3294         {
3295           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3296                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3297                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3298                    INSN_UID (insn_computes_expr));
3299           fprintf (gcse_file, "      into newly allocated reg %d\n", REGNO (to));
3300         }
3301
3302       pat = PATTERN (insn);
3303
3304       /* Do register replacement for INSN.  */
3305       changed = validate_change (insn, &SET_SRC (pat),
3306                                  SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3307                                  0);
3308
3309       /* We should be able to ignore the return code from validate_change but
3310          to play it safe we check.  */
3311       if (changed)
3312         {
3313           gcse_subst_count++;
3314           if (gcse_file != NULL)
3315             {
3316               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3317                        INSN_UID (insn),
3318                        REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3319                        INSN_UID (insn_computes_expr));
3320             }
3321
3322         }
3323     }
3324
3325   return changed;
3326 }
3327
3328 /* Perform classic GCSE.
3329    This is called by one_classic_gcse_pass after all the dataflow analysis
3330    has been done.
3331
3332    The result is non-zero if a change was made.  */
3333
3334 static int
3335 classic_gcse ()
3336 {
3337   int bb, changed;
3338   rtx insn;
3339
3340   /* Note we start at block 1.  */
3341
3342   changed = 0;
3343   for (bb = 1; bb < n_basic_blocks; bb++)
3344     {
3345       /* Reset tables used to keep track of what's still valid [since the
3346          start of the block].  */
3347       reset_opr_set_tables ();
3348
3349       for (insn = BLOCK_HEAD (bb);
3350            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3351            insn = NEXT_INSN (insn))
3352         {
3353           /* Is insn of form (set (pseudo-reg) ...)?  */
3354
3355           if (GET_CODE (insn) == INSN
3356               && GET_CODE (PATTERN (insn)) == SET
3357               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3358               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3359             {
3360               rtx pat = PATTERN (insn);
3361               rtx src = SET_SRC (pat);
3362               struct expr *expr;
3363
3364               if (want_to_gcse_p (src)
3365                   /* Is the expression recorded?  */
3366                   && ((expr = lookup_expr (src)) != NULL)
3367                   /* Is the expression available [at the start of the
3368                      block]?  */
3369                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3370                   /* Are the operands unchanged since the start of the
3371                      block?  */
3372                   && oprs_not_set_p (src, insn))
3373                 changed |= handle_avail_expr (insn, expr);
3374             }
3375
3376           /* Keep track of everything modified by this insn.  */
3377           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3378           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3379             mark_oprs_set (insn);
3380         }
3381     }
3382
3383   return changed;
3384 }
3385
3386 /* Top level routine to perform one classic GCSE pass.
3387
3388    Return non-zero if a change was made.  */
3389
3390 static int
3391 one_classic_gcse_pass (pass)
3392      int pass;
3393 {
3394   int changed = 0;
3395
3396   gcse_subst_count = 0;
3397   gcse_create_count = 0;
3398
3399   alloc_expr_hash_table (max_cuid);
3400   alloc_rd_mem (n_basic_blocks, max_cuid);
3401   compute_expr_hash_table ();
3402   if (gcse_file)
3403     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3404                      expr_hash_table_size, n_exprs);
3405   if (n_exprs > 0)
3406     {
3407       compute_kill_rd ();
3408       compute_rd ();
3409       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3410       compute_ae_gen ();
3411       compute_ae_kill (ae_gen, ae_kill);
3412       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3413       changed = classic_gcse ();
3414       free_avail_expr_mem ();
3415     }
3416   free_rd_mem ();
3417   free_expr_hash_table ();
3418
3419   if (gcse_file)
3420     {
3421       fprintf (gcse_file, "\n");
3422       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3423                current_function_name, pass,
3424                bytes_used, gcse_subst_count, gcse_create_count);
3425     }
3426
3427   return changed;
3428 }
3429 \f
3430 /* Compute copy/constant propagation working variables.  */
3431
3432 /* Local properties of assignments.  */
3433
3434 static sbitmap *cprop_pavloc;
3435 static sbitmap *cprop_absaltered;
3436
3437 /* Global properties of assignments (computed from the local properties).  */
3438
3439 static sbitmap *cprop_avin;
3440 static sbitmap *cprop_avout;
3441
3442 /* Allocate vars used for copy/const propagation.
3443    N_BLOCKS is the number of basic blocks.
3444    N_SETS is the number of sets.  */
3445
3446 static void
3447 alloc_cprop_mem (n_blocks, n_sets)
3448      int n_blocks, n_sets;
3449 {
3450   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3451   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3452
3453   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3454   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3455 }
3456
3457 /* Free vars used by copy/const propagation.  */
3458
3459 static void
3460 free_cprop_mem ()
3461 {
3462   free (cprop_pavloc);
3463   free (cprop_absaltered);
3464   free (cprop_avin);
3465   free (cprop_avout);
3466 }
3467
3468 /* For each block, compute whether X is transparent.
3469    X is either an expression or an assignment [though we don't care which,
3470    for this context an assignment is treated as an expression].
3471    For each block where an element of X is modified, set (SET_P == 1) or reset
3472    (SET_P == 0) the INDX bit in BMAP.  */
3473
3474 static void
3475 compute_transp (x, indx, bmap, set_p)
3476      rtx x;
3477      int indx;
3478      sbitmap *bmap;
3479      int set_p;
3480 {
3481   int bb,i;
3482   enum rtx_code code;
3483   const char *fmt;
3484
3485   /* repeat is used to turn tail-recursion into iteration.  */
3486  repeat:
3487
3488   if (x == 0)
3489     return;
3490
3491   code = GET_CODE (x);
3492   switch (code)
3493     {
3494     case REG:
3495       {
3496         reg_set *r;
3497         int regno = REGNO (x);
3498
3499         if (set_p)
3500           {
3501             if (regno < FIRST_PSEUDO_REGISTER)
3502               {
3503                 for (bb = 0; bb < n_basic_blocks; bb++)
3504                   if (TEST_BIT (reg_set_in_block[bb], regno))
3505                     SET_BIT (bmap[bb], indx);
3506               }
3507             else
3508               {
3509                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3510                   {
3511                     bb = BLOCK_NUM (r->insn);
3512                     SET_BIT (bmap[bb], indx);
3513                   }
3514               }
3515           }
3516         else
3517           {
3518             if (regno < FIRST_PSEUDO_REGISTER)
3519               {
3520                 for (bb = 0; bb < n_basic_blocks; bb++)
3521                   if (TEST_BIT (reg_set_in_block[bb], regno))
3522                     RESET_BIT (bmap[bb], indx);
3523               }
3524             else
3525               {
3526                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3527                   {
3528                     bb = BLOCK_NUM (r->insn);
3529                     RESET_BIT (bmap[bb], indx);
3530                   }
3531               }
3532           }
3533         return;
3534       }
3535
3536     case MEM:
3537       if (set_p)
3538         {
3539           for (bb = 0; bb < n_basic_blocks; bb++)
3540             if (mem_set_in_block[bb])
3541               SET_BIT (bmap[bb], indx);
3542         }
3543       else
3544         {
3545           for (bb = 0; bb < n_basic_blocks; bb++)
3546             if (mem_set_in_block[bb])
3547               RESET_BIT (bmap[bb], indx);
3548         }
3549       x = XEXP (x, 0);
3550       goto repeat;
3551
3552     case PC:
3553     case CC0: /*FIXME*/
3554     case CONST:
3555     case CONST_INT:
3556     case CONST_DOUBLE:
3557     case SYMBOL_REF:
3558     case LABEL_REF:
3559     case ADDR_VEC:
3560     case ADDR_DIFF_VEC:
3561       return;
3562
3563     default:
3564       break;
3565     }
3566
3567   i = GET_RTX_LENGTH (code) - 1;
3568   fmt = GET_RTX_FORMAT (code);
3569   for (; i >= 0; i--)
3570     {
3571       if (fmt[i] == 'e')
3572         {
3573           rtx tem = XEXP (x, i);
3574
3575           /* If we are about to do the last recursive call
3576              needed at this level, change it into iteration.
3577              This function is called enough to be worth it.  */
3578           if (i == 0)
3579             {
3580               x = tem;
3581               goto repeat;
3582             }
3583           compute_transp (tem, indx, bmap, set_p);
3584         }
3585       else if (fmt[i] == 'E')
3586         {
3587           int j;
3588           for (j = 0; j < XVECLEN (x, i); j++)
3589             compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3590         }
3591     }
3592 }
3593
3594 /* Compute the available expressions at the start and end of each
3595    basic block for cprop.  This particular dataflow equation is
3596    used often enough that we might want to generalize it and make
3597    as a subroutine for other global optimizations that need available
3598    in/out information.  */
3599 static void
3600 compute_cprop_avinout ()
3601 {
3602   int bb, changed, passes;
3603
3604   sbitmap_zero (cprop_avin[0]);
3605   sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3606
3607   passes = 0;
3608   changed = 1;
3609   while (changed)
3610     {
3611       changed = 0;
3612       for (bb = 0; bb < n_basic_blocks; bb++)
3613         {
3614           if (bb != 0)
3615             sbitmap_intersection_of_preds (cprop_avin[bb], cprop_avout, bb);
3616           changed |= sbitmap_union_of_diff (cprop_avout[bb],
3617                                             cprop_pavloc[bb],
3618                                             cprop_avin[bb],
3619                                             cprop_absaltered[bb]);
3620         }
3621       passes++;
3622     }
3623
3624   if (gcse_file)
3625     fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3626 }
3627
3628 /* Top level routine to do the dataflow analysis needed by copy/const
3629    propagation.  */
3630
3631 static void
3632 compute_cprop_data ()
3633 {
3634   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3635   compute_cprop_avinout ();
3636 }
3637 \f
3638 /* Copy/constant propagation.  */
3639
3640 /* Maximum number of register uses in an insn that we handle.  */
3641 #define MAX_USES 8
3642
3643 /* Table of uses found in an insn.
3644    Allocated statically to avoid alloc/free complexity and overhead.  */
3645 static struct reg_use reg_use_table[MAX_USES];
3646
3647 /* Index into `reg_use_table' while building it.  */
3648 static int reg_use_count;
3649
3650 /* Set up a list of register numbers used in INSN.
3651    The found uses are stored in `reg_use_table'.
3652    `reg_use_count' is initialized to zero before entry, and
3653    contains the number of uses in the table upon exit.
3654
3655    ??? If a register appears multiple times we will record it multiple
3656    times.  This doesn't hurt anything but it will slow things down.  */
3657
3658 static void
3659 find_used_regs (x)
3660      rtx x;
3661 {
3662   int i;
3663   enum rtx_code code;
3664   const char *fmt;
3665
3666   /* repeat is used to turn tail-recursion into iteration.  */
3667  repeat:
3668
3669   if (x == 0)
3670     return;
3671
3672   code = GET_CODE (x);
3673   switch (code)
3674     {
3675     case REG:
3676       if (reg_use_count == MAX_USES)
3677         return;
3678       reg_use_table[reg_use_count].reg_rtx = x;
3679       reg_use_count++;
3680       return;
3681
3682     case MEM:
3683       x = XEXP (x, 0);
3684       goto repeat;
3685
3686     case PC:
3687     case CC0:
3688     case CONST:
3689     case CONST_INT:
3690     case CONST_DOUBLE:
3691     case SYMBOL_REF:
3692     case LABEL_REF:
3693     case CLOBBER:
3694     case ADDR_VEC:
3695     case ADDR_DIFF_VEC:
3696     case ASM_INPUT: /*FIXME*/
3697       return;
3698
3699     case SET:
3700       if (GET_CODE (SET_DEST (x)) == MEM)
3701         find_used_regs (SET_DEST (x));
3702       x = SET_SRC (x);
3703       goto repeat;
3704
3705     default:
3706       break;
3707     }
3708
3709   /* Recursively scan the operands of this expression.  */
3710
3711   fmt = GET_RTX_FORMAT (code);
3712   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3713     {
3714       if (fmt[i] == 'e')
3715         {
3716           /* If we are about to do the last recursive call
3717              needed at this level, change it into iteration.
3718              This function is called enough to be worth it.  */
3719           if (i == 0)
3720             {
3721               x = XEXP (x, 0);
3722               goto repeat;
3723             }
3724           find_used_regs (XEXP (x, i));
3725         }
3726       else if (fmt[i] == 'E')
3727         {
3728           int j;
3729           for (j = 0; j < XVECLEN (x, i); j++)
3730             find_used_regs (XVECEXP (x, i, j));
3731         }
3732     }
3733 }
3734
3735 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3736    Returns non-zero is successful.  */
3737
3738 static int
3739 try_replace_reg (from, to, insn)
3740      rtx from, to, insn;
3741 {
3742   /* If this fails we could try to simplify the result of the
3743      replacement and attempt to recognize the simplified insn.
3744
3745      But we need a general simplify_rtx that doesn't have pass
3746      specific state variables.  I'm not aware of one at the moment.  */
3747   return validate_replace_src (from, to, insn);
3748 }
3749
3750 /* Find a set of REGNO that is available on entry to INSN's block.
3751    Returns NULL if not found.  */
3752
3753 static struct expr *
3754 find_avail_set (regno, insn)
3755      int regno;
3756      rtx insn;
3757 {
3758   /* SET1 contains the last set found that can be returned to the caller for
3759      use in a substitution.  */
3760   struct expr *set1 = 0;
3761
3762   /* Loops are not possible here.  To get a loop we would need two sets
3763      available at the start of the block containing INSN.  ie we would
3764      need two sets like this available at the start of the block:
3765
3766        (set (reg X) (reg Y))
3767        (set (reg Y) (reg X))
3768
3769      This can not happen since the set of (reg Y) would have killed the
3770      set of (reg X) making it unavailable at the start of this block.  */
3771   while (1)
3772      {
3773       rtx src;
3774       struct expr *set = lookup_set (regno, NULL_RTX);
3775
3776       /* Find a set that is available at the start of the block
3777          which contains INSN.  */
3778       while (set)
3779         {
3780           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3781             break;
3782           set = next_set (regno, set);
3783         }
3784
3785       /* If no available set was found we've reached the end of the
3786          (possibly empty) copy chain.  */
3787       if (set == 0)
3788         break;
3789
3790       if (GET_CODE (set->expr) != SET)
3791         abort ();
3792
3793       src = SET_SRC (set->expr);
3794
3795       /* We know the set is available.
3796          Now check that SRC is ANTLOC (i.e. none of the source operands
3797          have changed since the start of the block).
3798
3799          If the source operand changed, we may still use it for the next
3800          iteration of this loop, but we may not use it for substitutions.  */
3801       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3802         set1 = set;
3803
3804       /* If the source of the set is anything except a register, then
3805          we have reached the end of the copy chain.  */
3806       if (GET_CODE (src) != REG)
3807         break;
3808
3809       /* Follow the copy chain, ie start another iteration of the loop
3810          and see if we have an available copy into SRC.  */
3811       regno = REGNO (src);
3812      }
3813
3814   /* SET1 holds the last set that was available and anticipatable at
3815      INSN.  */
3816   return set1;
3817 }
3818
3819 /* Subroutine of cprop_insn that tries to propagate constants into
3820    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3821    that we can use for substitutions.
3822    REG_USED is the use we will try to replace, SRC is the constant we
3823    will try to substitute for it.
3824    Returns nonzero if a change was made.  */
3825 static int
3826 cprop_jump (insn, copy, reg_used, src)
3827      rtx insn, copy;
3828      struct reg_use *reg_used;
3829      rtx src;
3830 {
3831   rtx set = PATTERN (copy);
3832   rtx temp;
3833
3834   /* Replace the register with the appropriate constant.  */
3835   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3836
3837   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3838                                      GET_MODE (SET_SRC (set)),
3839                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3840                                      XEXP (SET_SRC (set), 0),
3841                                      XEXP (SET_SRC (set), 1),
3842                                      XEXP (SET_SRC (set), 2));
3843
3844   /* If no simplification can be made, then try the next
3845      register.  */
3846   if (temp == 0)
3847     return 0;
3848
3849   SET_SRC (set) = temp;
3850
3851   /* That may have changed the structure of TEMP, so
3852      force it to be rerecognized if it has not turned
3853      into a nop or unconditional jump.  */
3854
3855   INSN_CODE (copy) = -1;
3856   if ((SET_DEST (set) == pc_rtx
3857        && (SET_SRC (set) == pc_rtx
3858            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3859       || recog (PATTERN (copy), copy, NULL) >= 0)
3860     {
3861       /* This has either become an unconditional jump
3862          or a nop-jump.  We'd like to delete nop jumps
3863          here, but doing so confuses gcse.  So we just
3864          make the replacement and let later passes
3865          sort things out.  */
3866       PATTERN (insn) = set;
3867       INSN_CODE (insn) = -1;
3868
3869       /* One less use of the label this insn used to jump to
3870          if we turned this into a NOP jump.  */
3871       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3872         --LABEL_NUSES (JUMP_LABEL (insn));
3873
3874       /* If this has turned into an unconditional jump,
3875          then put a barrier after it so that the unreachable
3876          code will be deleted.  */
3877       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3878         emit_barrier_after (insn);
3879
3880       run_jump_opt_after_gcse = 1;
3881
3882       const_prop_count++;
3883       if (gcse_file != NULL)
3884         {
3885           int regno = REGNO (reg_used->reg_rtx);
3886           fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3887                    regno, INSN_UID (insn));
3888           print_rtl (gcse_file, src);
3889           fprintf (gcse_file, "\n");
3890         }
3891       return 1;
3892     }
3893   return 0;
3894 }
3895
3896 #ifdef HAVE_cc0
3897 /* Subroutine of cprop_insn that tries to propagate constants into
3898    JUMP_INSNS for machines that have CC0.  INSN is a single set that
3899    stores into CC0; the insn following it is a conditional jump.
3900    REG_USED is the use we will try to replace, SRC is the constant we
3901    will try to substitute for it.
3902    Returns nonzero if a change was made.  */
3903 static int
3904 cprop_cc0_jump (insn, reg_used, src)
3905      rtx insn;
3906      struct reg_use *reg_used;
3907      rtx src;
3908 {
3909   rtx jump = NEXT_INSN (insn);
3910   rtx copy = copy_rtx (jump);
3911   rtx set = PATTERN (copy);
3912
3913   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3914      substitute into it.  */
3915   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3916   if (! cprop_jump (jump, copy, reg_used, src))
3917     return 0;
3918
3919   /* If we succeeded, delete the cc0 setter.  */
3920   PUT_CODE (insn, NOTE);
3921   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3922   NOTE_SOURCE_FILE (insn) = 0;
3923   return 1;
3924  }
3925 #endif
3926
3927 /* Perform constant and copy propagation on INSN.
3928    The result is non-zero if a change was made.  */
3929
3930 static int
3931 cprop_insn (insn, alter_jumps)
3932      rtx insn;
3933      int alter_jumps;
3934 {
3935   struct reg_use *reg_used;
3936   int changed = 0;
3937
3938   /* Only propagate into SETs.  Note that a conditional jump is a
3939      SET with pc_rtx as the destination.  */
3940   if ((GET_CODE (insn) != INSN
3941        && GET_CODE (insn) != JUMP_INSN)
3942       || GET_CODE (PATTERN (insn)) != SET)
3943     return 0;
3944
3945   reg_use_count = 0;
3946   find_used_regs (PATTERN (insn));
3947
3948   reg_used = &reg_use_table[0];
3949   for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3950     {
3951       rtx pat, src;
3952       struct expr *set;
3953       int regno = REGNO (reg_used->reg_rtx);
3954
3955       /* Ignore registers created by GCSE.
3956          We do this because ... */
3957       if (regno >= max_gcse_regno)
3958         continue;
3959
3960       /* If the register has already been set in this block, there's
3961          nothing we can do.  */
3962       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3963         continue;
3964
3965       /* Find an assignment that sets reg_used and is available
3966          at the start of the block.  */
3967       set = find_avail_set (regno, insn);
3968       if (! set)
3969         continue;
3970
3971       pat = set->expr;
3972       /* ??? We might be able to handle PARALLELs.  Later.  */
3973       if (GET_CODE (pat) != SET)
3974         abort ();
3975       src = SET_SRC (pat);
3976
3977       /* Constant propagation.  */
3978       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3979           || GET_CODE (src) == SYMBOL_REF)
3980         {
3981           /* Handle normal insns first.  */
3982           if (GET_CODE (insn) == INSN
3983               && try_replace_reg (reg_used->reg_rtx, src, insn))
3984             {
3985               changed = 1;
3986               const_prop_count++;
3987               if (gcse_file != NULL)
3988                 {
3989                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3990                            regno, INSN_UID (insn));
3991                   print_rtl (gcse_file, src);
3992                   fprintf (gcse_file, "\n");
3993                 }
3994
3995               /* The original insn setting reg_used may or may not now be
3996                  deletable.  We leave the deletion to flow.  */
3997             }
3998
3999           /* Try to propagate a CONST_INT into a conditional jump.
4000              We're pretty specific about what we will handle in this
4001              code, we can extend this as necessary over time.
4002
4003              Right now the insn in question must look like
4004              (set (pc) (if_then_else ...))  */
4005           else if (alter_jumps
4006                    && GET_CODE (insn) == JUMP_INSN
4007                    && condjump_p (insn)
4008                    && ! simplejump_p (insn))
4009             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
4010 #ifdef HAVE_cc0
4011           /* Similar code for machines that use a pair of CC0 setter and
4012              conditional jump insn.  */
4013           else if (alter_jumps
4014                    && GET_CODE (PATTERN (insn)) == SET
4015                    && SET_DEST (PATTERN (insn)) == cc0_rtx
4016                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4017                    && condjump_p (NEXT_INSN (insn))
4018                    && ! simplejump_p (NEXT_INSN (insn)))
4019             changed |= cprop_cc0_jump (insn, reg_used, src);
4020 #endif
4021         }
4022       else if (GET_CODE (src) == REG
4023                && REGNO (src) >= FIRST_PSEUDO_REGISTER
4024                && REGNO (src) != regno)
4025         {
4026           if (try_replace_reg (reg_used->reg_rtx, src, insn))
4027             {
4028               changed = 1;
4029               copy_prop_count++;
4030               if (gcse_file != NULL)
4031                 {
4032                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
4033                            regno, INSN_UID (insn), REGNO (src));
4034                 }
4035
4036               /* The original insn setting reg_used may or may not now be
4037                  deletable.  We leave the deletion to flow.  */
4038               /* FIXME: If it turns out that the insn isn't deletable,
4039                  then we may have unnecessarily extended register lifetimes
4040                  and made things worse.  */
4041             }
4042         }
4043     }
4044
4045   return changed;
4046 }
4047
4048 /* Forward propagate copies.
4049    This includes copies and constants.
4050    Return non-zero if a change was made.  */
4051
4052 static int
4053 cprop (alter_jumps)
4054      int alter_jumps;
4055 {
4056   int bb, changed;
4057   rtx insn;
4058
4059   /* Note we start at block 1.  */
4060
4061   changed = 0;
4062   for (bb = 1; bb < n_basic_blocks; bb++)
4063     {
4064       /* Reset tables used to keep track of what's still valid [since the
4065          start of the block].  */
4066       reset_opr_set_tables ();
4067
4068       for (insn = BLOCK_HEAD (bb);
4069            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4070            insn = NEXT_INSN (insn))
4071         {
4072           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4073             {
4074               changed |= cprop_insn (insn, alter_jumps);
4075
4076               /* Keep track of everything modified by this insn.  */
4077               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4078                  call mark_oprs_set if we turned the insn into a NOTE.  */
4079               if (GET_CODE (insn) != NOTE)
4080                 mark_oprs_set (insn);
4081             }
4082         }
4083     }
4084
4085   if (gcse_file != NULL)
4086     fprintf (gcse_file, "\n");
4087
4088   return changed;
4089 }
4090
4091 /* Perform one copy/constant propagation pass.
4092    F is the first insn in the function.
4093    PASS is the pass count.  */
4094
4095 static int
4096 one_cprop_pass (pass, alter_jumps)
4097      int pass;
4098      int alter_jumps;
4099 {
4100   int changed = 0;
4101
4102   const_prop_count = 0;
4103   copy_prop_count = 0;
4104
4105   alloc_set_hash_table (max_cuid);
4106   compute_set_hash_table ();
4107   if (gcse_file)
4108     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4109                      n_sets);
4110   if (n_sets > 0)
4111     {
4112       alloc_cprop_mem (n_basic_blocks, n_sets);
4113       compute_cprop_data ();
4114       changed = cprop (alter_jumps);
4115       free_cprop_mem ();
4116     }
4117   free_set_hash_table ();
4118
4119   if (gcse_file)
4120     {
4121       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4122                current_function_name, pass,
4123                bytes_used, const_prop_count, copy_prop_count);
4124       fprintf (gcse_file, "\n");
4125     }
4126
4127   return changed;
4128 }
4129 \f
4130 /* Compute PRE+LCM working variables.  */
4131
4132 /* Local properties of expressions.  */
4133 /* Nonzero for expressions that are transparent in the block.  */
4134 static sbitmap *transp;
4135
4136 /* Nonzero for expressions that are transparent at the end of the block.
4137    This is only zero for expressions killed by abnormal critical edge
4138    created by a calls.  */
4139 static sbitmap *transpout;
4140
4141 /* Nonzero for expressions that are computed (available) in the block.  */
4142 static sbitmap *comp;
4143
4144 /* Nonzero for expressions that are locally anticipatable in the block.  */
4145 static sbitmap *antloc;
4146
4147 /* Nonzero for expressions where this block is an optimal computation
4148    point.  */
4149 static sbitmap *pre_optimal;
4150
4151 /* Nonzero for expressions which are redundant in a particular block.  */
4152 static sbitmap *pre_redundant;
4153
4154 /* Nonzero for expressions which should be inserted on a specific edge.  */
4155 static sbitmap *pre_insert_map;
4156
4157 /* Nonzero for expressions which should be deleted in a specific block.  */
4158 static sbitmap *pre_delete_map;
4159
4160 /* Contains the edge_list returned by pre_edge_lcm.  */
4161 static struct edge_list *edge_list;
4162
4163 static sbitmap *temp_bitmap;
4164
4165 /* Redundant insns.  */
4166 static sbitmap pre_redundant_insns;
4167
4168 /* Allocate vars used for PRE analysis.  */
4169
4170 static void
4171 alloc_pre_mem (n_blocks, n_exprs)
4172      int n_blocks, n_exprs;
4173 {
4174   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4175   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4176   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4177   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4178
4179   pre_optimal = NULL;
4180   pre_redundant = NULL;
4181   pre_insert_map = NULL;
4182   pre_delete_map = NULL;
4183   ae_in = NULL;
4184   ae_out = NULL;
4185   u_bitmap = NULL;
4186   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4187   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4188   /* pre_insert and pre_delete are allocated later.  */
4189 }
4190
4191 /* Free vars used for PRE analysis.  */
4192
4193 static void
4194 free_pre_mem ()
4195 {
4196   free (transp);
4197   free (comp);
4198   free (antloc);
4199   free (temp_bitmap);
4200
4201   if (pre_optimal)
4202     free (pre_optimal);
4203   if (pre_redundant)
4204     free (pre_redundant);
4205   if (pre_insert_map)
4206     free (pre_insert_map);
4207   if (pre_delete_map)
4208     free (pre_delete_map);
4209   if (transpout)
4210     free (transpout);
4211
4212   if (ae_in)
4213     free (ae_in);
4214   if (ae_out)
4215     free (ae_out);
4216   if (ae_kill)
4217     free (ae_kill);
4218   if (u_bitmap)
4219     free (u_bitmap);
4220
4221   transp = comp = antloc = NULL;
4222   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4223   transpout = ae_in = ae_out = ae_kill = NULL;
4224   u_bitmap = NULL;
4225
4226 }
4227
4228 /* Top level routine to do the dataflow analysis needed by PRE.  */
4229
4230 static void
4231 compute_pre_data ()
4232 {
4233   compute_local_properties (transp, comp, antloc, 0);
4234   compute_transpout ();
4235   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4236   compute_ae_kill (comp, ae_kill);
4237   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4238                             ae_kill, &pre_insert_map, &pre_delete_map);
4239 }
4240
4241 \f
4242 /* PRE utilities */
4243
4244 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4245    block BB.
4246
4247    VISITED is a pointer to a working buffer for tracking which BB's have
4248    been visited.  It is NULL for the top-level call.
4249
4250    CHECK_PRE_COMP controls whether or not we check for a computation of
4251    EXPR in OCCR_BB.
4252
4253    We treat reaching expressions that go through blocks containing the same
4254    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4255    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4256    2 as not reaching.  The intent is to improve the probability of finding
4257    only one reaching expression and to reduce register lifetimes by picking
4258    the closest such expression.  */
4259
4260 static int
4261 pre_expr_reaches_here_p_work (occr_bb, expr, bb, check_pre_comp, visited)
4262      int occr_bb;
4263      struct expr *expr;
4264      int bb;
4265      int check_pre_comp;
4266      char *visited;
4267 {
4268   edge pred;
4269
4270   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4271     {
4272       int pred_bb = pred->src->index;
4273
4274       if (pred->src == ENTRY_BLOCK_PTR
4275           /* Has predecessor has already been visited?  */
4276           || visited[pred_bb])
4277         {
4278           /* Nothing to do.  */
4279         }
4280       /* Does this predecessor generate this expression?  */
4281       else if ((!check_pre_comp && occr_bb == pred_bb)
4282                || TEST_BIT (comp[pred_bb], expr->bitmap_index))
4283         {
4284           /* Is this the occurrence we're looking for?
4285              Note that there's only one generating occurrence per block
4286              so we just need to check the block number.  */
4287           if (occr_bb == pred_bb)
4288             return 1;
4289           visited[pred_bb] = 1;
4290         }
4291       /* Ignore this predecessor if it kills the expression.  */
4292       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4293         visited[pred_bb] = 1;
4294       /* Neither gen nor kill.  */
4295       else
4296         {
4297           visited[pred_bb] = 1;
4298           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb,
4299                                             check_pre_comp, visited))
4300             return 1;
4301         }
4302     }
4303
4304   /* All paths have been checked.  */
4305   return 0;
4306 }
4307
4308 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4309    memory allocated for that function is returned. */
4310
4311 static int
4312 pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp)
4313      int occr_bb;
4314      struct expr *expr;
4315      int bb;
4316      int check_pre_comp;
4317 {
4318   int rval;
4319   char * visited = (char *) xcalloc (n_basic_blocks, 1);
4320
4321   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, check_pre_comp,
4322                                       visited);
4323
4324   free (visited);
4325
4326   return (rval);
4327 }
4328 \f
4329
4330 /* Given an expr, generate RTL which we can insert at the end of a BB,
4331    or on an edge.  Set the block number of any insns generated to
4332    the value of BB.  */
4333
4334 static rtx
4335 process_insert_insn (expr)
4336      struct expr *expr;
4337 {
4338   rtx reg = expr->reaching_reg;
4339   rtx pat, copied_expr;
4340   rtx first_new_insn;
4341
4342   start_sequence ();
4343   copied_expr = copy_rtx (expr->expr);
4344   emit_move_insn (reg, copied_expr);
4345   first_new_insn = get_insns ();
4346   pat = gen_sequence ();
4347   end_sequence ();
4348
4349   return pat;
4350 }
4351
4352 /* Add EXPR to the end of basic block BB.
4353
4354    This is used by both the PRE and code hoisting.
4355
4356    For PRE, we want to verify that the expr is either transparent
4357    or locally anticipatable in the target block.  This check makes
4358    no sense for code hoisting.  */
4359
4360 static void
4361 insert_insn_end_bb (expr, bb, pre)
4362      struct expr *expr;
4363      int bb;
4364      int pre;
4365 {
4366   rtx insn = BLOCK_END (bb);
4367   rtx new_insn;
4368   rtx reg = expr->reaching_reg;
4369   int regno = REGNO (reg);
4370   rtx pat;
4371
4372   pat = process_insert_insn (expr);
4373
4374   /* If the last insn is a jump, insert EXPR in front [taking care to
4375      handle cc0, etc. properly].  */
4376
4377   if (GET_CODE (insn) == JUMP_INSN)
4378     {
4379 #ifdef HAVE_cc0
4380       rtx note;
4381 #endif
4382
4383       /* If this is a jump table, then we can't insert stuff here.  Since
4384          we know the previous real insn must be the tablejump, we insert
4385          the new instruction just before the tablejump.  */
4386       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4387           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4388         insn = prev_real_insn (insn);
4389
4390 #ifdef HAVE_cc0
4391       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4392          if cc0 isn't set.  */
4393       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4394       if (note)
4395         insn = XEXP (note, 0);
4396       else
4397         {
4398           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4399           if (maybe_cc0_setter
4400               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4401               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4402             insn = maybe_cc0_setter;
4403         }
4404 #endif
4405       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4406       new_insn = emit_insn_before (pat, insn);
4407       if (BLOCK_HEAD (bb) == insn)
4408         BLOCK_HEAD (bb) = new_insn;
4409     }
4410   /* Likewise if the last insn is a call, as will happen in the presence
4411      of exception handling.  */
4412   else if (GET_CODE (insn) == CALL_INSN)
4413     {
4414       HARD_REG_SET parm_regs;
4415       int nparm_regs;
4416       rtx p;
4417
4418       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4419          we search backward and place the instructions before the first
4420          parameter is loaded.  Do this for everyone for consistency and a
4421          presumtion that we'll get better code elsewhere as well.  */
4422
4423       /* It should always be the case that we can put these instructions
4424          anywhere in the basic block with performing PRE optimizations.
4425          Check this.  */
4426       if (pre
4427           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4428           && !TEST_BIT (transp[bb], expr->bitmap_index))
4429         abort ();
4430
4431       /* Since different machines initialize their parameter registers
4432          in different orders, assume nothing.  Collect the set of all
4433          parameter registers.  */
4434       CLEAR_HARD_REG_SET (parm_regs);
4435       nparm_regs = 0;
4436       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4437         if (GET_CODE (XEXP (p, 0)) == USE
4438             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4439           {
4440             int regno = REGNO (XEXP (XEXP (p, 0), 0));
4441             if (regno >= FIRST_PSEUDO_REGISTER)
4442               abort ();
4443             SET_HARD_REG_BIT (parm_regs, regno);
4444             nparm_regs++;
4445           }
4446
4447       /* Search backward for the first set of a register in this set.  */
4448       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4449         {
4450           insn = PREV_INSN (insn);
4451           p = single_set (insn);
4452           if (p && GET_CODE (SET_DEST (p)) == REG
4453               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4454               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4455             {
4456               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4457               nparm_regs--;
4458             }
4459         }
4460
4461       /* If we found all the parameter loads, then we want to insert
4462          before the first parameter load.
4463
4464          If we did not find all the parameter loads, then we might have
4465          stopped on the head of the block, which could be a CODE_LABEL.
4466          If we inserted before the CODE_LABEL, then we would be putting
4467          the insn in the wrong basic block.  In that case, put the insn
4468          after the CODE_LABEL.
4469
4470          ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here?  */
4471       if (GET_CODE (insn) != CODE_LABEL)
4472         {
4473           new_insn = emit_insn_before (pat, insn);
4474           if (BLOCK_HEAD (bb) == insn)
4475             BLOCK_HEAD (bb) = new_insn;
4476         }
4477       else
4478         {
4479           new_insn = emit_insn_after (pat, insn);
4480         }
4481     }
4482   else
4483     {
4484       new_insn = emit_insn_after (pat, insn);
4485       BLOCK_END (bb) = new_insn;
4486     }
4487
4488   /* Keep block number table up to date.
4489      Note, PAT could be a multiple insn sequence, we have to make
4490      sure that each insn in the sequence is handled.  */
4491   if (GET_CODE (pat) == SEQUENCE)
4492     {
4493       int i;
4494
4495       for (i = 0; i < XVECLEN (pat, 0); i++)
4496         {
4497           rtx insn = XVECEXP (pat, 0, i);
4498           set_block_num (insn, bb);
4499           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4500             add_label_notes (PATTERN (insn), new_insn);
4501           note_stores (PATTERN (insn), record_set_info, insn);
4502         }
4503     }
4504   else
4505     {
4506       add_label_notes (SET_SRC (pat), new_insn);
4507       set_block_num (new_insn, bb);
4508       /* Keep register set table up to date.  */
4509       record_one_set (regno, new_insn);
4510     }
4511
4512   gcse_create_count++;
4513
4514   if (gcse_file)
4515     {
4516       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4517                bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4518     }
4519 }
4520
4521 /* Insert partially redundant expressions on edges in the CFG to make
4522    the expressions fully redundant.  */
4523
4524 static int
4525 pre_edge_insert (edge_list, index_map)
4526      struct edge_list *edge_list;
4527      struct expr **index_map;
4528 {
4529   int e, i, num_edges, set_size, did_insert = 0;
4530   sbitmap *inserted;
4531
4532   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4533      if it reaches any of the deleted expressions.  */
4534
4535   set_size = pre_insert_map[0]->size;
4536   num_edges = NUM_EDGES (edge_list);
4537   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4538   sbitmap_vector_zero (inserted, num_edges);
4539
4540   for (e = 0; e < num_edges; e++)
4541     {
4542       int indx;
4543       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4544       int bb = pred->index;
4545
4546       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4547         {
4548           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4549           int j;
4550
4551           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4552             {
4553               if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4554                 {
4555                   struct expr *expr = index_map[j];
4556                   struct occr *occr;
4557
4558                   /* Now look at each deleted occurence of this expression.  */
4559                   for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4560                     {
4561                       if (! occr->deleted_p)
4562                         continue;
4563
4564                       /* Insert this expression on this edge if if it would
4565                          reach the deleted occurence in BB.  */
4566                       if (!TEST_BIT (inserted[e], j)
4567                           && (bb == ENTRY_BLOCK
4568                               || pre_expr_reaches_here_p (bb, expr,
4569                                                    BLOCK_NUM (occr->insn), 0)))
4570                         {
4571                           rtx insn;
4572                           edge eg = INDEX_EDGE (edge_list, e);
4573                           /* We can't insert anything on an abnormal
4574                              and critical edge, so we insert the
4575                              insn at the end of the previous block. There
4576                              are several alternatives detailed in
4577                              Morgans book P277 (sec 10.5) for handling
4578                              this situation.  This one is easiest for now.  */
4579
4580                           if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4581                             {
4582                               insert_insn_end_bb (index_map[j], bb, 0);
4583                             }
4584                           else
4585                             {
4586                               insn = process_insert_insn (index_map[j]);
4587                               insert_insn_on_edge (insn, eg);
4588                             }
4589                           if (gcse_file)
4590                             {
4591                               fprintf (gcse_file,
4592                                        "PRE/HOIST: edge (%d,%d), copy expression %d\n",
4593                                         bb,
4594                                         INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
4595                             }
4596                           SET_BIT (inserted[e], j);
4597                           did_insert = 1;
4598                           gcse_create_count++;
4599                         }
4600                     }
4601                 }
4602             }
4603         }
4604     }
4605
4606   /* Clean up.  */
4607   free (inserted);
4608
4609   return did_insert;
4610 }
4611
4612 /* Copy the result of INSN to REG.
4613    INDX is the expression number.  */
4614
4615 static void
4616 pre_insert_copy_insn (expr, insn)
4617      struct expr *expr;
4618      rtx insn;
4619 {
4620   rtx reg = expr->reaching_reg;
4621   int regno = REGNO (reg);
4622   int indx = expr->bitmap_index;
4623   rtx set = single_set (insn);
4624   rtx new_insn;
4625   int bb = BLOCK_NUM (insn);
4626
4627   if (!set)
4628     abort ();
4629   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4630                               insn);
4631   /* Keep block number table up to date.  */
4632   set_block_num (new_insn, bb);
4633   /* Keep register set table up to date.  */
4634   record_one_set (regno, new_insn);
4635   if (insn == BLOCK_END (bb))
4636     BLOCK_END (bb) = new_insn;
4637
4638   gcse_create_count++;
4639
4640   if (gcse_file)
4641     fprintf (gcse_file,
4642              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4643               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4644               INSN_UID (insn), regno);
4645 }
4646
4647 /* Copy available expressions that reach the redundant expression
4648    to `reaching_reg'.  */
4649
4650 static void
4651 pre_insert_copies ()
4652 {
4653   int i;
4654
4655   /* For each available expression in the table, copy the result to
4656      `reaching_reg' if the expression reaches a deleted one.
4657
4658      ??? The current algorithm is rather brute force.
4659      Need to do some profiling.  */
4660
4661   for (i = 0; i < expr_hash_table_size; i++)
4662     {
4663       struct expr *expr;
4664
4665       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4666         {
4667           struct occr *occr;
4668
4669           /* If the basic block isn't reachable, PPOUT will be TRUE.
4670              However, we don't want to insert a copy here because the
4671              expression may not really be redundant.  So only insert
4672              an insn if the expression was deleted.
4673              This test also avoids further processing if the expression
4674              wasn't deleted anywhere.  */
4675           if (expr->reaching_reg == NULL)
4676             continue;
4677
4678           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4679             {
4680               struct occr *avail;
4681
4682               if (! occr->deleted_p)
4683                 continue;
4684
4685               for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4686                 {
4687                   rtx insn = avail->insn;
4688
4689                   /* No need to handle this one if handled already.  */
4690                   if (avail->copied_p)
4691                     continue;
4692                   /* Don't handle this one if it's a redundant one.  */
4693                   if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4694                     continue;
4695                   /* Or if the expression doesn't reach the deleted one.  */
4696                   if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4697                                                  BLOCK_NUM (occr->insn),1))
4698                     continue;
4699
4700                   /* Copy the result of avail to reaching_reg.  */
4701                   pre_insert_copy_insn (expr, insn);
4702                   avail->copied_p = 1;
4703                 }
4704             }
4705         }
4706     }
4707 }
4708
4709 /* Delete redundant computations.
4710    Deletion is done by changing the insn to copy the `reaching_reg' of
4711    the expression into the result of the SET.  It is left to later passes
4712    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4713
4714    Returns non-zero if a change is made.  */
4715
4716 static int
4717 pre_delete ()
4718 {
4719   int i, bb, changed;
4720
4721   /* Compute the expressions which are redundant and need to be replaced by
4722      copies from the reaching reg to the target reg.  */
4723   for (bb = 0; bb < n_basic_blocks; bb++)
4724     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4725
4726   changed = 0;
4727   for (i = 0; i < expr_hash_table_size; i++)
4728     {
4729       struct expr *expr;
4730
4731       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4732         {
4733           struct occr *occr;
4734           int indx = expr->bitmap_index;
4735
4736           /* We only need to search antic_occr since we require
4737              ANTLOC != 0.  */
4738
4739           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4740             {
4741               rtx insn = occr->insn;
4742               rtx set;
4743               int bb = BLOCK_NUM (insn);
4744
4745               if (TEST_BIT (temp_bitmap[bb], indx))
4746                 {
4747                   set = single_set (insn);
4748                   if (! set)
4749                     abort ();
4750
4751                   /* Create a pseudo-reg to store the result of reaching
4752                      expressions into.  Get the mode for the new pseudo
4753                      from the mode of the original destination pseudo.  */
4754                   if (expr->reaching_reg == NULL)
4755                     expr->reaching_reg
4756                       = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4757
4758                   /* In theory this should never fail since we're creating
4759                      a reg->reg copy.
4760
4761                      However, on the x86 some of the movXX patterns actually
4762                      contain clobbers of scratch regs.  This may cause the
4763                      insn created by validate_change to not match any pattern
4764                      and thus cause validate_change to fail.   */
4765                   if (validate_change (insn, &SET_SRC (set),
4766                                        expr->reaching_reg, 0))
4767                     {
4768                       occr->deleted_p = 1;
4769                       SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4770                       changed = 1;
4771                       gcse_subst_count++;
4772                     }
4773
4774                   if (gcse_file)
4775                     {
4776                       fprintf (gcse_file,
4777                                "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4778                                INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4779                     }
4780                 }
4781             }
4782         }
4783     }
4784
4785   return changed;
4786 }
4787
4788 /* Perform GCSE optimizations using PRE.
4789    This is called by one_pre_gcse_pass after all the dataflow analysis
4790    has been done.
4791
4792    This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4793    and lazy code motion from Knoop, Ruthing and Steffen as described in
4794    Advanced Compiler Design and Implementation.
4795
4796    ??? A new pseudo reg is created to hold the reaching expression.
4797    The nice thing about the classical approach is that it would try to
4798    use an existing reg.  If the register can't be adequately optimized
4799    [i.e. we introduce reload problems], one could add a pass here to
4800    propagate the new register through the block.
4801
4802    ??? We don't handle single sets in PARALLELs because we're [currently]
4803    not able to copy the rest of the parallel when we insert copies to create
4804    full redundancies from partial redundancies.  However, there's no reason
4805    why we can't handle PARALLELs in the cases where there are no partial
4806    redundancies.  */
4807
4808 static int
4809 pre_gcse ()
4810 {
4811   int i, did_insert;
4812   int changed;
4813   struct expr **index_map;
4814
4815   /* Compute a mapping from expression number (`bitmap_index') to
4816      hash table entry.  */
4817
4818   index_map = xcalloc (n_exprs, sizeof (struct expr *));
4819   for (i = 0; i < expr_hash_table_size; i++)
4820     {
4821       struct expr *expr;
4822
4823       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4824         index_map[expr->bitmap_index] = expr;
4825     }
4826
4827   /* Reset bitmap used to track which insns are redundant.  */
4828   pre_redundant_insns = sbitmap_alloc (max_cuid);
4829   sbitmap_zero (pre_redundant_insns);
4830
4831   /* Delete the redundant insns first so that
4832      - we know what register to use for the new insns and for the other
4833        ones with reaching expressions
4834      - we know which insns are redundant when we go to create copies  */
4835   changed = pre_delete ();
4836
4837   did_insert = pre_edge_insert (edge_list, index_map);
4838   /* In other places with reaching expressions, copy the expression to the
4839      specially allocated pseudo-reg that reaches the redundant expr.  */
4840   pre_insert_copies ();
4841   if (did_insert)
4842     {
4843       commit_edge_insertions ();
4844       changed = 1;
4845     }
4846
4847   free (index_map);
4848   free (pre_redundant_insns);
4849
4850   return changed;
4851 }
4852
4853 /* Top level routine to perform one PRE GCSE pass.
4854
4855    Return non-zero if a change was made.  */
4856
4857 static int
4858 one_pre_gcse_pass (pass)
4859      int pass;
4860 {
4861   int changed = 0;
4862
4863   gcse_subst_count = 0;
4864   gcse_create_count = 0;
4865
4866   alloc_expr_hash_table (max_cuid);
4867   add_noreturn_fake_exit_edges ();
4868   compute_expr_hash_table ();
4869   if (gcse_file)
4870     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4871                      expr_hash_table_size, n_exprs);
4872   if (n_exprs > 0)
4873     {
4874       alloc_pre_mem (n_basic_blocks, n_exprs);
4875       compute_pre_data ();
4876       changed |= pre_gcse ();
4877       free_edge_list (edge_list);
4878       free_pre_mem ();
4879     }
4880   remove_fake_edges ();
4881   free_expr_hash_table ();
4882
4883   if (gcse_file)
4884     {
4885       fprintf (gcse_file, "\n");
4886       fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4887                current_function_name, pass,
4888                bytes_used, gcse_subst_count, gcse_create_count);
4889     }
4890
4891   return changed;
4892 }
4893 \f
4894 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4895    We have to add REG_LABEL notes, because the following loop optimization
4896    pass requires them.  */
4897
4898 /* ??? This is very similar to the loop.c add_label_notes function.  We
4899    could probably share code here.  */
4900
4901 /* ??? If there was a jump optimization pass after gcse and before loop,
4902    then we would not need to do this here, because jump would add the
4903    necessary REG_LABEL notes.  */
4904
4905 static void
4906 add_label_notes (x, insn)
4907      rtx x;
4908      rtx insn;
4909 {
4910   enum rtx_code code = GET_CODE (x);
4911   int i, j;
4912   const char *fmt;
4913
4914   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4915     {
4916       /* This code used to ignore labels that referred to dispatch tables to
4917          avoid flow generating (slighly) worse code.
4918
4919          We no longer ignore such label references (see LABEL_REF handling in
4920          mark_jump_label for additional information).  */
4921       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4922                                             REG_NOTES (insn));
4923       return;
4924     }
4925
4926   fmt = GET_RTX_FORMAT (code);
4927   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4928     {
4929       if (fmt[i] == 'e')
4930         add_label_notes (XEXP (x, i), insn);
4931       else if (fmt[i] == 'E')
4932         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4933           add_label_notes (XVECEXP (x, i, j), insn);
4934     }
4935 }
4936
4937 /* Compute transparent outgoing information for each block.
4938
4939    An expression is transparent to an edge unless it is killed by
4940    the edge itself.  This can only happen with abnormal control flow,
4941    when the edge is traversed through a call.  This happens with
4942    non-local labels and exceptions.
4943
4944    This would not be necessary if we split the edge.  While this is
4945    normally impossible for abnormal critical edges, with some effort
4946    it should be possible with exception handling, since we still have
4947    control over which handler should be invoked.  But due to increased
4948    EH table sizes, this may not be worthwhile.  */
4949
4950 static void
4951 compute_transpout ()
4952 {
4953   int bb;
4954
4955   sbitmap_vector_ones (transpout, n_basic_blocks);
4956
4957   for (bb = 0; bb < n_basic_blocks; ++bb)
4958     {
4959       int i;
4960
4961       /* Note that flow inserted a nop a the end of basic blocks that
4962          end in call instructions for reasons other than abnormal
4963          control flow.  */
4964       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4965         continue;
4966
4967       for (i = 0; i < expr_hash_table_size; i++)
4968         {
4969           struct expr *expr;
4970           for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4971             if (GET_CODE (expr->expr) == MEM)
4972               {
4973                 rtx addr = XEXP (expr->expr, 0);
4974
4975                 if (GET_CODE (addr) == SYMBOL_REF
4976                     && CONSTANT_POOL_ADDRESS_P (addr))
4977                   continue;
4978
4979                 /* ??? Optimally, we would use interprocedural alias
4980                    analysis to determine if this mem is actually killed
4981                    by this call.  */
4982                 RESET_BIT (transpout[bb], expr->bitmap_index);
4983               }
4984         }
4985     }
4986 }
4987
4988 /* Removal of useless null pointer checks */
4989
4990 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4991    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4992    `null_pointer_info *'.
4993
4994    We ignore hard registers.  */
4995 static void
4996 invalidate_nonnull_info (x, setter, data)
4997      rtx x;
4998      rtx setter ATTRIBUTE_UNUSED;
4999      void *data;
5000 {
5001   int offset, regno;
5002   struct null_pointer_info* npi = (struct null_pointer_info *) data;
5003
5004   offset = 0;
5005   while (GET_CODE (x) == SUBREG)
5006     x = SUBREG_REG (x);
5007
5008   /* Ignore anything that is not a register or is a hard register.  */
5009   if (GET_CODE (x) != REG
5010       || REGNO (x) < npi->min_reg
5011       || REGNO (x) >= npi->max_reg)
5012     return;
5013
5014   regno = REGNO (x) - npi->min_reg;
5015
5016   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5017   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
5018 }
5019
5020 /* Do null-pointer check elimination for the registers indicated in
5021    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5022    they are not our responsibility to free.  */
5023
5024 static void
5025 delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
5026                               nonnull_avout, npi)
5027      int_list_ptr *s_preds;
5028      int *block_reg;
5029      sbitmap *nonnull_avin;
5030      sbitmap *nonnull_avout;
5031      struct null_pointer_info *npi;
5032 {
5033   int changed, bb;
5034   int current_block;
5035   sbitmap *nonnull_local = npi->nonnull_local;
5036   sbitmap *nonnull_killed = npi->nonnull_killed;
5037
5038   /* Compute local properties, nonnull and killed.  A register will have
5039      the nonnull property if at the end of the current block its value is
5040      known to be nonnull.  The killed property indicates that somewhere in
5041      the block any information we had about the register is killed.
5042
5043      Note that a register can have both properties in a single block.  That
5044      indicates that it's killed, then later in the block a new value is
5045      computed.  */
5046   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5047   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5048   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5049     {
5050       rtx insn, stop_insn;
5051
5052       /* Set the current block for invalidate_nonnull_info.  */
5053       npi->current_block = current_block;
5054
5055       /* Scan each insn in the basic block looking for memory references and
5056          register sets.  */
5057       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5058       for (insn = BLOCK_HEAD (current_block);
5059            insn != stop_insn;
5060            insn = NEXT_INSN (insn))
5061         {
5062           rtx set;
5063           rtx reg;
5064
5065           /* Ignore anything that is not a normal insn.  */
5066           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5067             continue;
5068
5069           /* Basically ignore anything that is not a simple SET.  We do have
5070              to make sure to invalidate nonnull_local and set nonnull_killed
5071              for such insns though.  */
5072           set = single_set (insn);
5073           if (!set)
5074             {
5075               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5076               continue;
5077             }
5078
5079           /* See if we've got a useable memory load.  We handle it first
5080              in case it uses its address register as a dest (which kills
5081              the nonnull property).  */
5082           if (GET_CODE (SET_SRC (set)) == MEM
5083               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5084               && REGNO (reg) >= npi->min_reg
5085               && REGNO (reg) < npi->max_reg)
5086             SET_BIT (nonnull_local[current_block],
5087                      REGNO (reg) - npi->min_reg);
5088
5089           /* Now invalidate stuff clobbered by this insn.  */
5090           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5091
5092           /* And handle stores, we do these last since any sets in INSN can
5093              not kill the nonnull property if it is derived from a MEM
5094              appearing in a SET_DEST.  */
5095           if (GET_CODE (SET_DEST (set)) == MEM
5096               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5097               && REGNO (reg) >= npi->min_reg
5098               && REGNO (reg) < npi->max_reg)
5099             SET_BIT (nonnull_local[current_block],
5100                      REGNO (reg) - npi->min_reg);
5101         }
5102     }
5103
5104   /* Now compute global properties based on the local properties.   This
5105      is a classic global availablity algorithm.  */
5106   sbitmap_zero (nonnull_avin[0]);
5107   sbitmap_vector_ones (nonnull_avout, n_basic_blocks);
5108   changed = 1;
5109   while (changed)
5110     {
5111       changed = 0;
5112
5113       for (bb = 0; bb < n_basic_blocks; bb++)
5114         {
5115           if (bb != 0)
5116             sbitmap_intersect_of_predecessors (nonnull_avin[bb],
5117                                                nonnull_avout, bb, s_preds);
5118
5119           changed |= sbitmap_union_of_diff (nonnull_avout[bb],
5120                                             nonnull_local[bb],
5121                                             nonnull_avin[bb],
5122                                             nonnull_killed[bb]);
5123         }
5124     }
5125
5126   /* Now look at each bb and see if it ends with a compare of a value
5127      against zero.  */
5128   for (bb = 0; bb < n_basic_blocks; bb++)
5129     {
5130       rtx last_insn = BLOCK_END (bb);
5131       rtx condition, earliest;
5132       int compare_and_branch;
5133
5134       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5135          since BLOCK_REG[BB] is zero if this block did not end with a
5136          comparison against zero, this condition works.  */
5137       if (block_reg[bb] < npi->min_reg
5138           || block_reg[bb] >= npi->max_reg)
5139         continue;
5140
5141       /* LAST_INSN is a conditional jump.  Get its condition.  */
5142       condition = get_condition (last_insn, &earliest);
5143
5144       /* Is the register known to have a nonzero value?  */
5145       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5146         continue;
5147
5148       /* Try to compute whether the compare/branch at the loop end is one or
5149          two instructions.  */
5150       if (earliest == last_insn)
5151         compare_and_branch = 1;
5152       else if (earliest == prev_nonnote_insn (last_insn))
5153         compare_and_branch = 2;
5154       else
5155         continue;
5156
5157       /* We know the register in this comparison is nonnull at exit from
5158          this block.  We can optimize this comparison.  */
5159       if (GET_CODE (condition) == NE)
5160         {
5161           rtx new_jump;
5162
5163           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5164                                             last_insn);
5165           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5166           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5167           emit_barrier_after (new_jump);
5168         }
5169       delete_insn (last_insn);
5170       if (compare_and_branch == 2)
5171         delete_insn (earliest);
5172
5173       /* Don't check this block again.  (Note that BLOCK_END is
5174          invalid here; we deleted the last instruction in the
5175          block.)  */
5176       block_reg[bb] = 0;
5177     }
5178 }
5179
5180 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5181    at compile time.
5182
5183    This is conceptually similar to global constant/copy propagation and
5184    classic global CSE (it even uses the same dataflow equations as cprop).
5185
5186    If a register is used as memory address with the form (mem (reg)), then we
5187    know that REG can not be zero at that point in the program.  Any instruction
5188    which sets REG "kills" this property.
5189
5190    So, if every path leading to a conditional branch has an available memory
5191    reference of that form, then we know the register can not have the value
5192    zero at the conditional branch.
5193
5194    So we merely need to compute the local properies and propagate that data
5195    around the cfg, then optimize where possible.
5196
5197    We run this pass two times.  Once before CSE, then again after CSE.  This
5198    has proven to be the most profitable approach.  It is rare for new
5199    optimization opportunities of this nature to appear after the first CSE
5200    pass.
5201
5202    This could probably be integrated with global cprop with a little work.  */
5203
5204 void
5205 delete_null_pointer_checks (f)
5206      rtx f;
5207 {
5208   int_list_ptr *s_preds, *s_succs;
5209   int *num_preds, *num_succs;
5210   sbitmap *nonnull_avin, *nonnull_avout;
5211   int *block_reg;
5212   int bb;
5213   int reg;
5214   int regs_per_pass;
5215   int max_reg;
5216   struct null_pointer_info npi;
5217
5218   /* First break the program into basic blocks.  */
5219   find_basic_blocks (f, max_reg_num (), NULL, 1);
5220
5221   /* If we have only a single block, then there's nothing to do.  */
5222   if (n_basic_blocks <= 1)
5223     {
5224       /* Free storage allocated by find_basic_blocks.  */
5225       free_basic_block_vars (0);
5226       return;
5227     }
5228
5229   /* Trying to perform global optimizations on flow graphs which have
5230      a high connectivity will take a long time and is unlikely to be
5231      particularly useful.
5232
5233      In normal circumstances a cfg should have about twice has many edges
5234      as blocks.  But we do not want to punish small functions which have
5235      a couple switch statements.  So we require a relatively large number
5236      of basic blocks and the ratio of edges to blocks to be high.  */
5237   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5238     {
5239       /* Free storage allocated by find_basic_blocks.  */
5240       free_basic_block_vars (0);
5241       return;
5242     }
5243
5244   /* We need predecessor/successor lists as well as pred/succ counts for
5245      each basic block.  */
5246   s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
5247   s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
5248   num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
5249   num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
5250   compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
5251
5252   /* We need four bitmaps, each with a bit for each register in each
5253      basic block.  */
5254   max_reg = max_reg_num ();
5255   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5256
5257   /* Allocate bitmaps to hold local and global properties.  */
5258   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5259   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5260   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5261   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5262
5263   /* Go through the basic blocks, seeing whether or not each block
5264      ends with a conditional branch whose condition is a comparison
5265      against zero.  Record the register compared in BLOCK_REG.  */
5266   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5267   for (bb = 0; bb < n_basic_blocks; bb++)
5268     {
5269       rtx last_insn = BLOCK_END (bb);
5270       rtx condition, earliest, reg;
5271
5272       /* We only want conditional branches.  */
5273       if (GET_CODE (last_insn) != JUMP_INSN
5274           || !condjump_p (last_insn)
5275           || simplejump_p (last_insn))
5276         continue;
5277
5278       /* LAST_INSN is a conditional jump.  Get its condition.  */
5279       condition = get_condition (last_insn, &earliest);
5280
5281       /* If we were unable to get the condition, or it is not a equality
5282          comparison against zero then there's nothing we can do.  */
5283       if (!condition
5284           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5285           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5286           || (XEXP (condition, 1)
5287               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5288         continue;
5289
5290       /* We must be checking a register against zero.  */
5291       reg = XEXP (condition, 0);
5292       if (GET_CODE (reg) != REG)
5293         continue;
5294
5295       block_reg[bb] = REGNO (reg);
5296     }
5297
5298   /* Go through the algorithm for each block of registers.  */
5299   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5300     {
5301       npi.min_reg = reg;
5302       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5303       delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
5304                                     nonnull_avout, &npi);
5305     }
5306
5307   /* Free storage allocated by find_basic_blocks.  */
5308   free_basic_block_vars (0);
5309
5310   /* Free our local predecessor/successor lists. */
5311   free (s_preds);
5312   free (s_succs);
5313   free (num_preds);
5314   free (num_succs);
5315
5316   /* Free the table of registers compared at the end of every block.  */
5317   free (block_reg);
5318
5319   /* Free bitmaps.  */
5320   free (npi.nonnull_local);
5321   free (npi.nonnull_killed);
5322   free (nonnull_avin);
5323   free (nonnull_avout);
5324 }
5325
5326 /* Code Hoisting variables and subroutines.  */
5327
5328 /* Very busy expressions.  */
5329 static sbitmap *hoist_vbein;
5330 static sbitmap *hoist_vbeout;
5331
5332 /* Hoistable expressions.  */
5333 static sbitmap *hoist_exprs;
5334
5335 /* Dominator bitmaps.  */
5336 static sbitmap *dominators;
5337
5338 /* ??? We could compute post dominators and run this algorithm in
5339    reverse to to perform tail merging, doing so would probably be
5340    more effective than the tail merging code in jump.c.
5341
5342    It's unclear if tail merging could be run in parallel with
5343    code hoisting.  It would be nice.  */
5344
5345 /* Allocate vars used for code hoisting analysis.  */
5346
5347 static void
5348 alloc_code_hoist_mem (n_blocks, n_exprs)
5349      int n_blocks, n_exprs;
5350 {
5351   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5352   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5353   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5354
5355   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5356   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5357   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5358   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5359
5360   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5361 }
5362
5363 /* Free vars used for code hoisting analysis.  */
5364
5365 static void
5366 free_code_hoist_mem ()
5367 {
5368   free (antloc);
5369   free (transp);
5370   free (comp);
5371
5372   free (hoist_vbein);
5373   free (hoist_vbeout);
5374   free (hoist_exprs);
5375   free (transpout);
5376
5377   free (dominators);
5378 }
5379
5380 /* Compute the very busy expressions at entry/exit from each block.
5381
5382    An expression is very busy if all paths from a given point
5383    compute the expression.  */
5384
5385 static void
5386 compute_code_hoist_vbeinout ()
5387 {
5388   int bb, changed, passes;
5389
5390   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5391   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5392
5393   passes = 0;
5394   changed = 1;
5395   while (changed)
5396     {
5397       changed = 0;
5398       /* We scan the blocks in the reverse order to speed up
5399          the convergence.  */
5400       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5401         {
5402           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5403                                            hoist_vbeout[bb], transp[bb]);
5404           if (bb != n_basic_blocks - 1)
5405             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5406         }
5407       passes++;
5408     }
5409
5410   if (gcse_file)
5411     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5412 }
5413
5414 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5415
5416 static void
5417 compute_code_hoist_data ()
5418 {
5419   compute_local_properties (transp, comp, antloc, 0);
5420   compute_transpout ();
5421   compute_code_hoist_vbeinout ();
5422   compute_flow_dominators (dominators, NULL);
5423   if (gcse_file)
5424     fprintf (gcse_file, "\n");
5425 }
5426
5427 /* Determine if the expression identified by EXPR_INDEX would
5428    reach BB unimpared if it was placed at the end of EXPR_BB.
5429
5430    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5431    to me that the expression must either be computed or transparent in
5432    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5433    would allow the expression to be hoisted out of loops, even if
5434    the expression wasn't a loop invariant.
5435
5436    Contrast this to reachability for PRE where an expression is
5437    considered reachable if *any* path reaches instead of *all*
5438    paths.  */
5439
5440 static int
5441 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5442      int expr_bb;
5443      int expr_index;
5444      int bb;
5445      char *visited;
5446 {
5447   edge pred;
5448   int visited_allocated_locally = 0;
5449
5450
5451   if (visited == NULL)
5452     {
5453        visited_allocated_locally = 1;
5454        visited = xcalloc (n_basic_blocks, 1);
5455     }
5456
5457   visited[expr_bb] = 1;
5458   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5459     {
5460       int pred_bb = pred->src->index;
5461
5462       if (pred->src == ENTRY_BLOCK_PTR)
5463         break;
5464       else if (visited[pred_bb])
5465         continue;
5466       /* Does this predecessor generate this expression?  */
5467       else if (TEST_BIT (comp[pred_bb], expr_index))
5468         break;
5469       else if (! TEST_BIT (transp[pred_bb], expr_index))
5470         break;
5471       /* Not killed.  */
5472       else
5473         {
5474           visited[pred_bb] = 1;
5475           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5476                                            pred_bb, visited))
5477             break;
5478         }
5479     }
5480   if (visited_allocated_locally)
5481     free (visited);
5482   return (pred == NULL);
5483 }
5484 \f
5485 /* Actually perform code hoisting.  */
5486 static void
5487 hoist_code ()
5488 {
5489   int bb, dominated, i;
5490   struct expr **index_map;
5491
5492   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5493
5494   /* Compute a mapping from expression number (`bitmap_index') to
5495      hash table entry.  */
5496
5497   index_map = xcalloc (n_exprs, sizeof (struct expr *));
5498   for (i = 0; i < expr_hash_table_size; i++)
5499     {
5500       struct expr *expr;
5501
5502       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5503         index_map[expr->bitmap_index] = expr;
5504     }
5505
5506   /* Walk over each basic block looking for potentially hoistable
5507      expressions, nothing gets hoisted from the entry block.  */
5508   for (bb = 0; bb < n_basic_blocks; bb++)
5509     {
5510       int found = 0;
5511       int insn_inserted_p;
5512
5513       /* Examine each expression that is very busy at the exit of this
5514          block.  These are the potentially hoistable expressions.  */
5515       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5516         {
5517           int hoistable = 0;
5518           if (TEST_BIT (hoist_vbeout[bb], i)
5519               && TEST_BIT (transpout[bb], i))
5520             {
5521               /* We've found a potentially hoistable expression, now
5522                  we look at every block BB dominates to see if it
5523                  computes the expression.  */
5524               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5525                 {
5526                   /* Ignore self dominance.  */
5527                   if (bb == dominated
5528                       || ! TEST_BIT (dominators[dominated], bb))
5529                     continue;
5530
5531                   /* We've found a dominated block, now see if it computes
5532                      the busy expression and whether or not moving that
5533                      expression to the "beginning" of that block is safe.  */
5534                   if (!TEST_BIT (antloc[dominated], i))
5535                     continue;
5536
5537                   /* Note if the expression would reach the dominated block
5538                      unimpared if it was placed at the end of BB.
5539
5540                      Keep track of how many times this expression is hoistable
5541                      from a dominated block into BB.  */
5542                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5543                     hoistable++;
5544                 }
5545
5546               /* If we found more than one hoistable occurence of this
5547                  expression, then note it in the bitmap of expressions to
5548                  hoist.  It makes no sense to hoist things which are computed
5549                  in only one BB, and doing so tends to pessimize register
5550                  allocation.  One could increase this value to try harder
5551                  to avoid any possible code expansion due to register
5552                  allocation issues; however experiments have shown that
5553                  the vast majority of hoistable expressions are only movable
5554                  from two successors, so raising this threshhold is likely
5555                  to nullify any benefit we get from code hoisting.  */
5556               if (hoistable > 1)
5557                 {
5558                   SET_BIT (hoist_exprs[bb], i);
5559                   found = 1;
5560                 }
5561             }
5562         }
5563
5564       /* If we found nothing to hoist, then quit now.  */
5565       if (! found)
5566         continue;
5567
5568       /* Loop over all the hoistable expressions.  */
5569       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5570         {
5571           /* We want to insert the expression into BB only once, so
5572              note when we've inserted it.  */
5573           insn_inserted_p = 0;
5574
5575           /* These tests should be the same as the tests above.  */
5576           if (TEST_BIT (hoist_vbeout[bb], i))
5577             {
5578               /* We've found a potentially hoistable expression, now
5579                  we look at every block BB dominates to see if it
5580                  computes the expression.  */
5581               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5582                 {
5583                   /* Ignore self dominance.  */
5584                   if (bb == dominated
5585                       || ! TEST_BIT (dominators[dominated], bb))
5586                     continue;
5587
5588                   /* We've found a dominated block, now see if it computes
5589                      the busy expression and whether or not moving that
5590                      expression to the "beginning" of that block is safe.  */
5591                   if (!TEST_BIT (antloc[dominated], i))
5592                     continue;
5593
5594                   /* The expression is computed in the dominated block and
5595                      it would be safe to compute it at the start of the
5596                      dominated block.  Now we have to determine if the
5597                      expresion would reach the dominated block if it was
5598                      placed at the end of BB.  */
5599                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5600                     {
5601                       struct expr *expr = index_map[i];
5602                       struct occr *occr = expr->antic_occr;
5603                       rtx insn;
5604                       rtx set;
5605
5606
5607                       /* Find the right occurence of this expression.  */
5608                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5609                         occr = occr->next;
5610
5611                       /* Should never happen.  */
5612                       if (!occr)
5613                         abort ();
5614
5615                       insn = occr->insn;
5616
5617                       set = single_set (insn);
5618                       if (! set)
5619                         abort ();
5620
5621                       /* Create a pseudo-reg to store the result of reaching
5622                          expressions into.  Get the mode for the new pseudo
5623                          from the mode of the original destination pseudo.  */
5624                       if (expr->reaching_reg == NULL)
5625                         expr->reaching_reg
5626                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5627
5628                       /* In theory this should never fail since we're creating
5629                          a reg->reg copy.
5630
5631                          However, on the x86 some of the movXX patterns actually
5632                          contain clobbers of scratch regs.  This may cause the
5633                          insn created by validate_change to not match any
5634                          pattern and thus cause validate_change to fail.   */
5635                       if (validate_change (insn, &SET_SRC (set),
5636                                            expr->reaching_reg, 0))
5637                         {
5638                           occr->deleted_p = 1;
5639                           if (!insn_inserted_p)
5640                             {
5641                               insert_insn_end_bb (index_map[i], bb, 0);
5642                               insn_inserted_p = 1;
5643                             }
5644                         }
5645                     }
5646                 }
5647             }
5648         }
5649     }
5650     free (index_map);
5651 }
5652
5653 /* Top level routine to perform one code hoisting (aka unification) pass
5654
5655    Return non-zero if a change was made.  */
5656
5657 static int
5658 one_code_hoisting_pass ()
5659 {
5660   int changed = 0;
5661
5662   alloc_expr_hash_table (max_cuid);
5663   compute_expr_hash_table ();
5664   if (gcse_file)
5665     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5666                      expr_hash_table_size, n_exprs);
5667   if (n_exprs > 0)
5668     {
5669       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5670       compute_code_hoist_data ();
5671       hoist_code ();
5672       free_code_hoist_mem ();
5673     }
5674   free_expr_hash_table ();
5675
5676   return changed;
5677 }